自制简历匹配

2022-09-26 16:52:11 +08:00 · 2022-09-26 16:52:11 +08:00 · 2d959ead6b
commit 2d959ead6b
parent 73539753df
2 changed files with 346 additions and 237 deletions
--- a/api/api_v1/endpoints/interview.py
+++ b/api/api_v1/endpoints/interview.py
@ -436,187 +436,228 @@ async def file_to_hw(

    try:
        fn = path_data + '/' + filename
-        if fn.endswith('pdf'):  # pdf正常上传到华为云
-            res = obsClient.putFile('legu-cdn-source', 'hrms/' + filename, path_data + '/' + filename)
-        else:  # doc/docx则转化为pdf上传到华为云
-            new_fn, fil = doc2pdf(fn, path_data, filename)
+        if not fn.endswith('pdf'):  # pdf正常上传到华为云
+            # res = obsClient.putFile('legu-cdn-source', 'hrms/' + filename, path_data + '/' + filename)
+            # doc/docx则转化为pdf上传到华为云
+            fn, fil = doc2pdf(fn, path_data, filename)
            filename = fil
-            res = obsClient.putFile('legu-cdn-source', 'hrms/' + filename, new_fn)
+
+        # 简历初始文档
+        data_mode = {
+            "interview_name": "",
+            "interview_type": 1,
+            "interview_sign": 0,
+            "hope_money": "",
+            "feedback": 0,
+            "interview_round": 0,
+            "event_time": datetime.now(),
+            "name": "",
+            "phone": "",
+            "job_name": "",
+            "hr_name": "",
+            "work_exp": 0,
+            "interview_stage": 1,
+            "owner_name": 2,
+            "education": 1,
+            "work_undergo": [],
+            "project_undergo": [],
+            "work_list": [],
+            "school": "",
+            "at_school": "",
+            "specialty": "",
+            "specialty_do": [],
+            "mmended_state": 0,
+            "mail": "",
+            "account": "",
+            "id_card": "",
+            "gender": "",
+            "age": 0,
+            "gam": "",
+            "interview_state": 1,
+            "counts": 1,
+            "nation": "汉",
+            "review": "",
+            "upgrade": [],
+            "come_time": "",
+            "now_money": "",
+            "men_state": 1,
+            "teacher_state": 1,
+            "teacher_back": 1,
+            "offer_state": 1,
+            "offer_exam_state": 1,
+            "notice_state": 1,
+            "pass_why": 0,
+            "pass_text": "",
+            "now_address": "",
+            "language": [],
+            "remembrance": [],
+            "file_url": '',
+            "hr_manner": 2,
+        }
+        uid = get_uid()
+        data_mode['uid'] = uid
+        # 存数据
+        chk_txt = getText_pdf(path_data + '/' + filename)
+        data = fmt_txt(chk_txt)
+        education = data['education']
+        # 学历int转化
+        education_int = {
+            '大专': 1,
+            '本科': 2,
+            '研究生': 3,
+            '博士': 4,
+            '硕士': 5,
+        }
+        if education and isinstance(education, str):
+            data['education'] = education_int.get(education, 1)
+        age = data['age']
+        if not age:
+            data['age'] = 20
+        # 年龄int转化
+        if age and isinstance(age, str):
+            true_age = re.search(r"\d+\.?\d*", age)
+            if len(true_age.group()) > 2:
+                data['age'] = 20
+            else:
+                data['age'] = int(true_age.group())
+        work_exp = data['work_exp']
+        if not work_exp:
+            data['work_exp'] = 0
+        # 工作经验float转化
+        if work_exp and isinstance(work_exp, str):
+            true_work_exp = re.search(r"\d+\.?\d*", work_exp)
+            if len(true_work_exp.group()) > 3:
+                data['work_exp'] = 0
+            else:
+                data['work_exp'] = float(true_work_exp.group())
+
+        data_mode.update(data)
+        # 转json字符串
+        if 'remembrance_list' in data_mode:
+            remembrance = data_mode.pop('remembrance_list')
+            data_mode['remembrance'] = remembrance
+        if 'language_list' in data_mode:
+            language = data_mode.pop('language_list')
+            data_mode['language'] = language
+        if 'project_undergo' in data_mode:
+            if data_mode.get('project_undergo', []):
+                data_mode['project_undergo'] = [json.dumps(i) for i in data_mode['project_undergo']]
+            else:
+                data_mode['project_undergo'] = []
+        if 'work_list' in data_mode:
+            if data_mode.get('work_list', []):
+                data_mode['work_list'] = [json.dumps(i) for i in data_mode['work_list']]
+            else:
+                data_mode['work_list'] = []
+        if 'language' in data_mode:
+            if data_mode.get('language', []):
+                data_mode['language'] = [json.dumps(i) for i in data_mode['language']]
+            else:
+                data_mode['language'] = []
+        if 'remembrance' in data_mode:
+            if data_mode.get('remembrance', []):
+                data_mode['remembrance'] = [json.dumps(i) for i in data_mode['remembrance']]
+            else:
+                data_mode['remembrance'] = []
+
+        # 字符串转datetime
+        if data_mode.get('in_time', ''):
+            chk_in_time = data_mode['in_time'].replace('-', '/').replace('.', '/')
+            if len(chk_in_time.split('/')) == 2:
+                data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m").date())
+            if len(chk_in_time.split('/')) == 3:
+                data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m/%d").date())
+
+        if data_mode.get('out_time', ''):
+            chk_out_time = data_mode['out_time'].replace('-', '/').replace('.', '/')
+            if len(chk_out_time.split('/')) == 2:
+                data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m").date())
+            if len(chk_out_time.split('/')) == 3:
+                data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m/%d").date())
+
+        if data_mode.get('birthday', ''):
+            chk_birthday = data_mode['birthday'].replace('-', '/').replace('.', '/')
+            if len(chk_birthday.split('/')) == 2:
+                data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m").date())
+            if len(chk_birthday.split('/')) == 3:
+                data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m/%d").date())
+
+        if data_mode.get('star_time', ''):
+            chk_star_time = data_mode['star_time'].replace('-', '/').replace('.', '/')
+            if len(chk_star_time.split('/')) == 2:
+                data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m").date())
+            if len(chk_star_time.split('/')) == 3:
+                data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m/%d").date())
+
+        if data_mode.get('end_time', ''):
+            chk_end_time = data_mode['end_time'].replace('-', '/').replace('.', '/')
+            if len(chk_end_time.split('/')) == 2:
+                data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m").date())
+            if len(chk_end_time.split('/')) == 3:
+                data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m/%d").date())
+
+        if data_mode.get('graduate_time', ''):
+            chk_graduate = data_mode['graduate_time'].replace('-', '/').replace('.', '/')
+            if len(chk_graduate.split('/')) == 2:
+                data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m").date())
+            if len(chk_graduate.split('/')) == 3:
+                data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m/%d").date())
+
+        work_list = data['work_list']
+        language = data['language']
+        project_undergo = data['project_undergo']
+        remembrance = data['remembrance']
+
+        # 简历查重,姓名,手机号,性别name,phone,gender
+        find_name = data['name']
+        find_phone = data['phone']
+        find_gender = data['gender']
+        where = {}
+        if find_name:
+            where.update({
+                'name': find_name
+            })
+        if find_phone:
+            where.update({
+                'phone': find_phone
+            })
+        if find_gender:
+            where.update({
+                'gender': find_gender
+            })
+        whereStr = ''
+        for key, value in where.items():
+            if isinstance(value, str):
+                if not value.strip():
+                    continue
+                if whereStr:
+                    whereStr += 'and ' + str(key) + ' = ' + "'" + value + "'" + ' '
+                else:
+                    whereStr += str(key) + ' = ' + "'" + value + "'" + ' '
+                continue
+            if whereStr:
+                whereStr += 'and ' + str(key) + ' = ' + str(value) + ' '
+            else:
+                whereStr += str(key) + ' = ' + str(value) + ' '
+        whereStr = whereStr.strip()
+        sql = f"select uid from HR.resumes where {whereStr}"
+        is_in_data = await db.execute(sql)
+        if is_in_data:
+            return schemas.Msg(code=-9, msg='简历已存在', data=[])
+        if not find_phone:
+            return schemas.Msg(code=-9, msg='电话号码不存在', data=[])
+        # os.rename(path_data + '/' + filename, path_data + '/' + find_phone + '.pdf')
+        res = obsClient.putFile('legu-cdn-source', 'hrms/' + find_phone + '.pdf', fn)
        if res.status < 300:
            # 地址
            url = res.body.objectUrl
-            # 简历初始文档
-            data_mode = {
-                "interview_name": "",
-                "interview_type": 1,
-                "interview_sign": 0,
-                "hope_money": "",
-                "feedback": 0,
-                "interview_round": 0,
-                "event_time": datetime.now(),
-                "name": "",
-                "phone": "",
-                "job_name": "",
-                "hr_name": "",
-                "work_exp": 0,
-                "interview_stage": 1,
-                "owner_name": 2,
-                "education": 1,
-                "work_undergo": [],
-                "project_undergo": [],
-                "work_list": [],
-                "school": "",
-                "at_school": "",
-                "specialty": "",
-                "specialty_do": [],
-                "mmended_state": 0,
-                "mail": "",
-                "account": "",
-                "id_card": "",
-                "gender": "",
-                "age": 0,
-                "gam": "",
-                "interview_state": 1,
-                "counts": 1,
-                "nation": "汉",
-                "review": "",
-                "upgrade": [],
-                "come_time": "",
-                "now_money": "",
-                "men_state": 1,
-                "teacher_state": 1,
-                "teacher_back": 1,
-                "offer_state": 1,
-                "offer_exam_state": 1,
-                "notice_state": 1,
-                "pass_why": 0,
-                "pass_text": "",
-                "now_address": "",
-                "language": [],
-                "remembrance": [],
-                "file_url": url,
-                "hr_manner": 2,
-            }
-            uid = get_uid()
-            data_mode['uid'] = uid
+            data_mode['file_url'] = url
            sql = f"insert into HR.resumes(interview_name, interview_type, interview_sign, hope_money, feedback," \
                  f" interview_round, event_time, uid, name, phone, job_name, hr_name, work_exp, interview_stage, owner_name," \
                  f" education, work_undergo, project_undergo, work_list, school, at_school, specialty, specialty_do, " \
                  f"mmended_state, mail, account, id_card, gender, age, gam, interview_state, counts, nation, come_time," \
                  f" review, upgrade, now_money, men_state, teacher_state, teacher_back, offer_state, offer_exam_state," \
                  f" notice_state, pass_why, pass_text, now_address,language,remembrance, file_url, hr_manner) values"
-            # 存数据
-            chk_txt = getText_pdf(path_data + '/' + filename)
-            data = fmt_txt(chk_txt)
-            education = data['education']
-            # 学历int转化
-            education_int = {
-                '大专': 1,
-                '本科': 2,
-                '研究生': 3,
-                '博士': 4,
-                '硕士': 5,
-            }
-            if education and isinstance(education, str):
-                data['education'] = education_int.get(education, 1)
-            age = data['age']
-            if not age:
-                data['age'] = 20
-            # 年龄int转化
-            if age and isinstance(age, str):
-                true_age = re.search(r"\d+\.?\d*", age)
-                if len(true_age.group()) > 2:
-                    data['age'] = 20
-                else:
-                    data['age'] = int(true_age.group())
-            work_exp = data['work_exp']
-            if not work_exp:
-                data['work_exp'] = 0
-            # 工作经验float转化
-            if work_exp and isinstance(work_exp, str):
-                true_work_exp = re.search(r"\d+\.?\d*", work_exp)
-                if len(true_work_exp.group()) > 3:
-                    data['work_exp'] = 0
-                else:
-                    data['work_exp'] = float(true_work_exp.group())
-
-            data_mode.update(data)
-            # 转json字符串
-            if 'remembrance_list' in data_mode:
-                remembrance = data_mode.pop('remembrance_list')
-                data_mode['remembrance'] = remembrance
-            if 'language_list' in data_mode:
-                language = data_mode.pop('language_list')
-                data_mode['language'] = language
-            if 'project_undergo' in data_mode:
-                if data_mode.get('project_undergo', []):
-                    data_mode['project_undergo'] = [json.dumps(i) for i in data_mode['project_undergo']]
-                else:
-                    data_mode['project_undergo'] = []
-            if 'work_list' in data_mode:
-                if data_mode.get('work_list', []):
-                    data_mode['work_list'] = [json.dumps(i) for i in data_mode['work_list']]
-                else:
-                    data_mode['work_list'] = []
-            if 'language' in data_mode:
-                if data_mode.get('language', []):
-                    data_mode['language'] = [json.dumps(i) for i in data_mode['language']]
-                else:
-                    data_mode['language'] = []
-            if 'remembrance' in data_mode:
-                if data_mode.get('remembrance', []):
-                    data_mode['remembrance'] = [json.dumps(i) for i in data_mode['remembrance']]
-                else:
-                    data_mode['remembrance'] = []
-
-            # 字符串转datetime
-            if data_mode.get('in_time', ''):
-                chk_in_time = data_mode['in_time'].replace('-', '/').replace('.', '/')
-                if len(chk_in_time.split('/')) == 2:
-                    data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m").date())
-                if len(chk_in_time.split('/')) == 3:
-                    data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m/%d").date())
-
-            if data_mode.get('out_time', ''):
-                chk_out_time = data_mode['out_time'].replace('-', '/').replace('.', '/')
-                if len(chk_out_time.split('/')) == 2:
-                    data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m").date())
-                if len(chk_out_time.split('/')) == 3:
-                    data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m/%d").date())
-
-            if data_mode.get('birthday', ''):
-                chk_birthday = data_mode['birthday'].replace('-', '/').replace('.', '/')
-                if len(chk_birthday.split('/')) == 2:
-                    data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m").date())
-                if len(chk_birthday.split('/')) == 3:
-                    data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m/%d").date())
-
-            if data_mode.get('star_time', ''):
-                chk_star_time = data_mode['star_time'].replace('-', '/').replace('.', '/')
-                if len(chk_star_time.split('/')) == 2:
-                    data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m").date())
-                if len(chk_star_time.split('/')) == 3:
-                    data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m/%d").date())
-
-            if data_mode.get('end_time', ''):
-                chk_end_time = data_mode['end_time'].replace('-', '/').replace('.', '/')
-                if len(chk_end_time.split('/')) == 2:
-                    data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m").date())
-                if len(chk_end_time.split('/')) == 3:
-                    data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m/%d").date())
-
-            if data_mode.get('graduate_time', ''):
-                chk_graduate = data_mode['graduate_time'].replace('-', '/').replace('.', '/')
-                if len(chk_graduate.split('/')) == 2:
-                    data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m").date())
-                if len(chk_graduate.split('/')) == 3:
-                    data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m/%d").date())
-
-            work_list = data['work_list']
-            language = data['language']
-            project_undergo = data['project_undergo']
-            remembrance = data['remembrance']
-
            res_data = {
                'data': data,
                'file_url': url,
--- a/utils/re_to_jianli.py
+++ b/utils/re_to_jianli.py
@ -145,25 +145,27 @@ def fmt_txt(chk_str):
        if work_exp:
            work_exp_str = work_exp[0].replace('\n', '')
            if '：' in work_exp_str:
-                work_exp = work_exp_str.split('：')[1].strip().split('年')[0]
-                dict_int_year = {
-                    '一': 1,
-                    '二': 2,
-                    '三': 3,
-                    '四': 4,
-                    '五': 5,
-                    '六': 6,
-                    '七': 7,
-                    '八': 8,
-                    '九': 9
-                }
-                if work_exp in ['一', '二', '三', '四', '五', '六', '七', '八', '九']:
-                    work_exp1 = dict_int_year.get(work_exp)
-                else:
-                    if '.' in work_exp:
-                        work_exp = work_exp.split('.')[0]
-                    work_exp1 = int(work_exp)
-                dict_chk['work_exp'] = work_exp1
+                for i in work_exp_str.split('：'):
+                    if i.strip()[0].isdigit():
+                        work_exp = i.strip().split('年')[0]
+                        dict_int_year = {
+                            '一': 1,
+                            '二': 2,
+                            '三': 3,
+                            '四': 4,
+                            '五': 5,
+                            '六': 6,
+                            '七': 7,
+                            '八': 8,
+                            '九': 9
+                        }
+                        if work_exp in ['一', '二', '三', '四', '五', '六', '七', '八', '九']:
+                            work_exp1 = dict_int_year.get(work_exp)
+                        else:
+                            if '.' in work_exp:
+                                work_exp = work_exp.split('.')[0]
+                            work_exp1 = int(work_exp)
+                        dict_chk['work_exp'] = work_exp1
        else:
            dict_chk['work_exp'] = 0

@ -198,8 +200,17 @@ def fmt_txt(chk_str):
        if new_chk_str and '教育经历' in true_chkStr:
            bold = re.compile(r'\n')
            true_chk = bold.sub(' ', new_chk_str, count=3)
-            school_str1 = re.findall(r'.*?学院.*?\n', true_chk, re.M)[0].replace('\n', '')
-            school_list = school_str1.split(' ')
+            n_true = 1
+            if '\n' not in true_chk:
+                n_true = 0
+            if n_true:
+                school_chk_list = re.findall(r'.*?学院.*?\n', true_chk, re.M)
+                if not school_chk_list:
+                    school_chk_list = re.findall(r'.*?大学.*?\n', true_chk, re.M)
+            else:
+                school_chk_list = [true_chk]
+            school_str1 = school_chk_list[0].replace('\n', '')
+            school_list = [i for i in school_str1.split(' ') if i != '']

    if school_list:
        time_index = -1
@ -284,6 +295,10 @@ def fmt_txt(chk_str):
            dict_chk['age'] = int(age_str.split('：')[-1])
        else:
            dict_chk['age'] = int("".join(re.findall("\d+", age_str)))
+            age = dict_chk['age']
+            if age >= 100:
+                from builtins import str
+                dict_chk['age'] = int(str(age)[0:2])
    else:
        age1 = re.findall(r'[0-9]{2}.*?岁', true_chkStr, re.M)
        if age1:
@ -302,10 +317,13 @@ def fmt_txt(chk_str):
            dict_chk['nation'] = nation_str.split('：')[-1]
    else:
        dict_chk['nation'] = '汉'
-    if '工作经历' not in true_chkStr:
+    if '工作经历' not in true_chkStr and '⼯作经历' not in true_chkStr:
        work_undergo_str = true_chkStr.split('工作经验')[-1]
    else:
-        work_undergo_str = true_chkStr.split('工作经历')[-1]
+        if '⼯作经历' in true_chkStr:
+            work_undergo_str = true_chkStr.split('⼯作经历')[-1]
+        else:
+            work_undergo_str = true_chkStr.split('工作经历')[-1]
    work_undergo_str1 = work_undergo_str.split('教育经历')[0]
    work_str2 = work_undergo_str1.strip().strip('\n')
    project_undergo = ''
@ -320,6 +338,12 @@ def fmt_txt(chk_str):
            if '项目简介' in work_str2:
                project_undergo = ''.join(i for i in work_str2.split('项目简介')[:])
                # work_str2 = work_str2.split('项目简介')[0]
+            elif '项⽬经验' in work_str2:
+                project_undergo = ''.join(i for i in work_str2.split('项⽬经验')[1:])
+                work_str2 = work_str2.split('项⽬经验')[0]
+            elif '项目经历' in true_chkStr:
+                project_undergo = true_chkStr.split('项目经历')[-1].split('自我评价')[0]
+
    project_undergo2 = project_undergo.strip().strip('\n')
    str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
    if len(str_2) <= 1:
@ -330,6 +354,7 @@ def fmt_txt(chk_str):
        #     new_str = new_i_list[0] + ' 年' + new_i_list[1].replace(' ', '')
        #     work_str2 = work_str2.replace(i, new_str)
    str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
+    project_undergo2 = project_undergo2.replace('年', '/').replace('月', '')
    project_list = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2)
    if len(project_list) <= 1:
        if '：' in project_undergo2:
@ -349,12 +374,12 @@ def fmt_txt(chk_str):
                if not i:
                    continue
                chk_i = i.strip()
-                if chk_i in ['-', '–', '―']:
+                if chk_i in ['-', '–', '―', '—']:
                    index2 = index + 2
                    new_str = project_list[index - 1] + i + project_list[index + 1] + project_list[index + 2]
                    new_str_list1.append(new_str)
                    continue
-                if chk_i not in ['-', '–', '―'] and ('-' in chk_i or '–' in chk_i or '―' in chk_i):
+                if chk_i not in ['-', '–', '―', '—'] and ('-' in chk_i or '–' in chk_i or '―' in chk_i or '—' in chk_i):
                    index2 = index
                    new_str = project_list[index - 1] + chk_i
                    new_str_list1.append(new_str)
@ -444,7 +469,7 @@ def fmt_txt(chk_str):
                    new_str_list1 = []
                    index2 = -1
                    for index, i in enumerate(project_list):
-                        if not i[0].isdigit() and i not in ['-', '–', '―']:
+                        if not i[0].isdigit() and i not in ['-', '–', '―', '—']:
                            if index <= index2:
                                continue
                            index2 = index + 3
@ -537,7 +562,7 @@ def fmt_txt(chk_str):
                            continue
                        if not i:
                            continue
-                        if i.strip() not in ['-', '–', '―']:
+                        if i.strip() in ['-', '–', '―', '—']:
                            index2 = index + 2
                            if not name2:
                                name3 = project_list[index + 2].split('\n')[-1]
@ -556,7 +581,7 @@ def fmt_txt(chk_str):
                                name2 = name3
                            new_str_list1.append(new_str)
                            continue
-                        if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i):
+                        if i.strip() not in ['-', '–', '―', '—'] and ('-' in i or '–' in i or '―' in i or '—' in i):
                            index2 = index
                            if not name2:
                                name3 = i.split('\n')[-1]
@ -594,7 +619,8 @@ def fmt_txt(chk_str):
                                                    p_str + project_name_time_str2[index + 1] + project_name_time_str2[
                                                index + 2]).replace('.', '/')
                                            dict_project['name'] = project_name_time_str2[index - 1]
-
+                                            if index + 3 <= len(project_name_time_str2):
+                                                dict_project['work'] = project_name_time_str2[index + 3]
                                            break
                            project_chk_str2 = project_str.split(project_name_time_str)[-1]
                            project_chk_str2_list = re.split('(:|：)', project_chk_str2)
@ -620,9 +646,9 @@ def fmt_txt(chk_str):
                                if new_p_chk_list:
                                    for p_str_true in new_p_chk_list:
                                        if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true:
-                                            dict_project['work'] += re.split('[:|：]', p_str_true)[-1]
+                                            dict_project['comment'] += re.split('[:|：]', p_str_true)[-1]
                                            continue
-                                        if '项目描述' in p_str_true or '功能介绍' in p_str_true:
+                                        if '项目描述' in p_str_true or '功能介绍' in p_str_true or '项⽬描述' in p_str_true:
                                            dict_project['comment'] += re.split('[:|：]', p_str_true)[-1]
                                            continue
                                        if '职责' in p_str_true:
@ -674,7 +700,7 @@ def fmt_txt(chk_str):
                                        chk_key = 'duty'
                                        continue
                                project_undergo_list.append(dict_project1)
-    dict_chk['project_undergo'] = project_undergo_list
+    dict_chk['project_undergo'] = [i for i in project_undergo_list if i != {'name': '', 'time': '', 'comment': '', 'work': '', 'duty': ''}]

    # 数字开头
    if work_str2[0].isdigit():
@ -685,12 +711,12 @@ def fmt_txt(chk_str):
                continue
            if not i:
                continue
-            if i.strip() in ['-', '–', '―']:
+            if i.strip() in ['-', '–', '―', '—']:
                index1 = index + 2
                new_str = str_2[index - 1] + i + str_2[index + 1] + str_2[index + 2]
                new_str_list.append(new_str)
                continue
-            if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i):
+            if i.strip() not in ['-', '–', '―', '—'] and ('-' in i or '–' in i or '―' in i or '—' in i):
                index1 = index
                new_str = str_2[index - 1] + i
                new_str_list.append(new_str)
@ -742,23 +768,27 @@ def fmt_txt(chk_str):
                continue
            if not i:
                continue
-            if i.strip() in ['-', '–', '―']:
+            if i.strip() in ['-', '–', '―', '—']:
                index1 = index + 2
                if not name:
-                    name1 = str_2[index + 2].split('\n')[-1]
+                    name1 = str_2[index + 2].strip().strip('\n').split('\n')[-1]
+                    if str_2[index + 2].strip().endswith('：') or str_2[index + 2].strip().endswith(':'):
+                        name1 = str_2[index + 2].split('\n')[-2]
                    new_str = str_2[index - 2] + str_2[index - 1] + i + str_2[index + 1] + \
                              str_2[index + 2].split(name1)[0]
                    name = name1
                else:
-                    name1 = str_2[index + 2].split('\n')[-1]
-                    if name1:
+                    name1 = str_2[index + 2].strip().strip('\n').split('\n')[-1]
+                    if str_2[index + 2].strip().endswith('：') or str_2[index + 2].strip().endswith(':'):
+                        name1 = str_2[index + 2].split('\n')[-2]
+                    if name1 and '公司' in name1:
                        new_str = name + str_2[index - 1] + i + str_2[index + 1] + str_2[index + 2].split(name1)[0]
                    else:
                        new_str = name + str_2[index - 1] + i + str_2[index + 1] + str_2[index + 2]
                    name = name1
                new_str_list.append(new_str)
                continue
-            if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i):
+            if i.strip() not in ['-', '–', '―', '—'] and ('-' in i or '–' in i or '―' in i or '—' in i):
                index1 = index
                if not name:
                    name1 = i.split('\n')[-1]
@ -784,38 +814,73 @@ def fmt_txt(chk_str):
                    new_str_list2 = new_str_list1[0].split('（', 1)
                else:
                    new_str_list2 = new_str_list1[0].split(' ', 1)
-                work_dict['company_name'] = new_str_list2[0]
-                if ':' in new_str_list2[1]:
-                    work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip()
-                elif '：' in new_str_list2[1]:
-                    work_dict['time'] = new_str_list2[1].split('：')[-1].replace('.', '/').strip()
-                elif '）' in new_str_list2[1]:
-                    date_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', new_str_list2[1])
-                    work_dict['time'] = date_list[0] + '-' + date_list[1]
+                company_name_str = new_str_list2[0]
+                if ':' in company_name_str or '：' in company_name_str:
+                    if ':' in company_name_str:
+                        company_name_str = company_name_str.split(':')[-1]
+                    if '：' in company_name_str:
+                        company_name_str = company_name_str.split('：')[-1]
+                    if company_name_str.strip() == '':
+                        company_name_str = new_str_list2[1]
+                date_time_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', company_name_str)
+                if len(date_time_list) >= 2:
+                    work_dict['company_name'] = company_name_str.split(date_time_list[0])[0]
+                    work_dict['time'] = date_time_list[0] + '-' + date_time_list[1]
                else:
-                    work_dict['time'] = new_str_list2[1].replace('.', '/').strip()
-                if len(new_str_list1) > 1:
-                    if '：' in new_str_list1[1]:
-                        work_dict['position_name'] = new_str_list1[1].split('：')[-1]
-                    if work_dict['position_name']:
-                        work_duty = new_str_list1[3:]
-                        duty1 = new_str_list1[2].split('职责')[-1]
-                        duty = duty1.join((x for x in work_duty))
-                        work_dict['duty'] = duty
-                    if '负责' in new_str_list1[1]:
-                        duty1 = new_str_list1[1].split('负责')[-1]
-                        duty = duty1.join((x for x in new_str_list1[2:]))
-                        work_dict['duty'] = duty
+                    work_dict['company_name'] = company_name_str
+                if len(new_str_list2) > 1:
+                    if ':' in new_str_list2[1]:
+                        work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip()
+                    elif '：' in new_str_list2[1]:
+                        work_dict['time'] = new_str_list2[1].split('：')[-1].replace('.', '/').strip()
+                    elif '）' in new_str_list2[1]:
+                        date_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', new_str_list2[1])
+                        work_dict['time'] = date_list[0] + '-' + date_list[1]
+                    else:
+                        work_dict['time'] = new_str_list2[1].replace('.', '/').strip()
+                        if work_dict['time'] == work_dict['company_name']:
+                            work_dict['time'] = new_str_list1[1].split('：')[-1].replace('.', '/').strip()
+                        if ' ' in work_dict['time']:
+                            chk_time_str = work_dict['time']
+                            work_dict['time'] = chk_time_str.split(' ')[0]
+                            work_dict['position_name'] = chk_time_str.split(' ')[-1]
+                    if len(new_str_list1) > 1:
+                        if '：' in new_str_list1[1]:
+                            work_dict['position_name'] = new_str_list1[1].split('：')[-1]
+                            if work_dict['position_name'].replace('.', '/').strip() == work_dict['time']:
+                                work_dict['position_name'] = ''
+                        if work_dict['position_name']:
+                            work_duty = new_str_list1[3:]
+                            duty1 = new_str_list1[2].split('职责')[-1]
+                            duty = duty1.join((x for x in work_duty))
+                            work_dict['duty'] = duty
+                        if '负责' in new_str_list1[1]:
+                            duty1 = new_str_list1[1].split('负责')[-1]
+                            duty = duty1.join((x for x in new_str_list1[2:]))
+                            work_dict['duty'] = duty
+                        else:
+                            if '工作描述' in work_str and '专业技能' in work_str:
+                                work_dict['duty'] = work_str.split('工作描述')[-1].split('专业技能')[0]
+                                # work_dict['duty'] = work_str.split('工作描述')[-1].split('专业技能')[-1]
+                else:
+                    for i in new_str_list1:
+                        if '时间' in i:
+                            work_dict['time'] = i.split('：')[-1].strip()
+                            continue
+                        if '职 位' in i:
+                            work_dict['position_name'] = i.split('：')[-1].strip()
+                            continue
                work_list.append(work_dict)
    dict_chk['work_list'] = work_list
    review = ''
    upgrade = true_chkStr.split('教育经历')[-1]
-    if '自我评价' in upgrade or '自我描述' in upgrade:
+    if '自我评价' in upgrade or '自我描述' in upgrade or '⾃我评价' in upgrade:
        if '自我评价' in upgrade:
            review = upgrade.split('自我评价')[-1].split('技能特长')[0]
-        else:
-            if '自我描述' in upgrade:
-                review = upgrade.split('自我描述')[-1].split('技能特长')[0]
+        elif '自我描述' in upgrade:
+            review = upgrade.split('自我描述')[-1].split('技能特长')[0]
+        elif '⾃我评价' in upgrade:
+            review = upgrade.split('⾃我评价')[-1].split('技能特长')[0]
    else:
        review_chk = true_chkStr.split('工作经历')[0]
        if '自我评价' in review_chk:
@ -946,7 +1011,10 @@ def fmt_txt(chk_str):
            specialty_do_str = upgrade.split('专业技能')[-1].split('工作经验')[0]
            specialty_do = [i for i in specialty_do_str.split('\n') if i.strip() != '']
        if '技能' in upgrade:
-            specialty_do_str = upgrade.split('技能')[-1].split('项目经验')[0]
+            if '项目经验' in upgrade:
+                specialty_do_str = upgrade.split('技能')[-1].split('项目经验')[0]
+            else:
+                specialty_do_str = upgrade.split('技能')[-1].split('项目经历')[0]
            specialty_do = [i.strip('') for i in specialty_do_str.split('\n') if i.strip() not in ['', '']]
    dict_chk['remembrance'] = remembrance
    dict_chk['specialty_do'] = specialty_do
@ -2379,4 +2447,4 @@ egreat，海尔，MeleA20，MeleA31，LG1154，极米，杰科，亿典等机顶

    """

-    fmt_txt(chk_str10)
+    fmt_txt(chk_str14)