自制简历匹配

2022-09-22 18:45:39 +08:00 · 2022-09-22 18:45:39 +08:00 · 4de04a83ab
commit 4de04a83ab
parent 9b943ec14b
1 changed files with 185 additions and 115 deletions
--- a/utils/re_to_jianli.py
+++ b/utils/re_to_jianli.py
@ -322,6 +322,14 @@ def fmt_txt(chk_str):
                # work_str2 = work_str2.split('项目简介')[0]
    project_undergo2 = project_undergo.strip().strip('\n')
    str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
+    if len(str_2) <= 1:
+        work_str2 = work_str2.replace(' 年', '/').replace('年', '/').replace('月', '').replace(' 月', '')
+        # find_str2_list = re.findall('[0-9]{4}年[0-9]{1,2} 月', work_str2)
+        # for i in find_str2_list:
+        #     new_i_list = i.split('年')
+        #     new_str = new_i_list[0] + ' 年' + new_i_list[1].replace(' ', '')
+        #     work_str2 = work_str2.replace(i, new_str)
+    str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
    project_list = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2)
    if len(project_list) <= 1:
        if '：' in project_undergo2:
@ -367,7 +375,8 @@ def fmt_txt(chk_str):
                            if not p_str:
                                continue
                            if p_str[0].isdigit():
-                                dict_project['time'] = (p_str + project_name_time_str2[index + 1] + project_name_time_str2[
+                                dict_project['time'] = (
+                                        p_str + project_name_time_str2[index + 1] + project_name_time_str2[
                                    index + 2]).replace('.', '/')
                                name_str = project_name_time_str2[index + 3].strip()
                                if '  ' in name_str:
@ -442,7 +451,8 @@ def fmt_txt(chk_str):
                            if len(project_list) < index2 + 1:
                                break
                            else:
-                                new_str = i + project_list[index + 1] + project_list[index + 2] + project_list[index + 3]
+                                new_str = i + project_list[index + 1] + project_list[index + 2] + project_list[
+                                    index + 3]
                                new_str_list1.append(new_str)
                    if new_str_list1:
                        for project_chk_str2 in new_str_list1:
@ -485,36 +495,40 @@ def fmt_txt(chk_str):
                            project_str_list = project_str_i.split('\n')
                            if project_str_list:
                                dict_project1 = copy.deepcopy(dict_project)
-                                dict_project1['name'] = project_str_list[0].split('：')[-1]
+                                if '：' in project_str_list[0]:
+                                    dict_project1['name'] = project_str_list[0].split('：')[-1].strip()
+                                else:
+                                    dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
                                chk_key = ''
-                                for index, i in enumerate(project_str_list[1:]):
+                                for i in project_str_list[1:]:
                                    if not i or i.isdigit():
                                        continue
-                                    if '：' not in i and chk_key:
+                                    if '：' not in i and ':' not in i and chk_key:
                                        dict_project1[chk_key] += i
                                        continue
-                                    if i.startswith('开发周期'):
+                                    if '开发周期' in i and ('：' in i or ':' in i):
+                                        if '：' in i:
                                            dict_project1['time'] = i.split('：')[-1]
+                                        else:
+                                            dict_project1['time'] = i.split(':')[-1]
                                        continue
-                                    if i.startswith('开发环境'):
+                                    if ('开发环境' in i or '项目描述' in i) and ('：' in i or ':' in i):
+                                        if '：' in i:
                                            dict_project1['comment'] += i.split('：')[-1]
+                                        else:
+                                            dict_project1['comment'] += i.split(':')[-1]
                                        chk_key = 'comment'
                                        continue
-                                    if i.startswith('功能模块'):
+                                    if ('模块' in i or '框架' in i or '技术要点' in i or '职责' in i) and ('：' in i or ':' in i):
+                                        if '：' in i:
                                            dict_project1['duty'] = i.split('：')[-1]
-                                        chk_key = 'duty'
-                                        continue
-                                    if i.startswith('项目描述'):
-                                        dict_project1['comment'] += i.split('：')[-1]
-                                        chk_key = 'comment'
-                                        continue
-                                    if i.startswith('技术要点'):
-                                        dict_project1['duty'] += i.split('：')[-1]
+                                        else:
+                                            dict_project1['duty'] = i.split(':')[-1]
                                        chk_key = 'duty'
                                        continue
                                project_undergo_list.append(dict_project1)
            else:
-                # if re.findall('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2):
+                if re.findall('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2):
                    index2 = -1
                    name2 = ''
                    new_str_list1 = []
@ -527,7 +541,8 @@ def fmt_txt(chk_str):
                            index2 = index + 2
                            if not name2:
                                name3 = project_list[index + 2].split('\n')[-1]
-                            new_str = project_list[index - 2] + project_list[index - 1] + i + project_list[index + 1] + \
+                                new_str = project_list[index - 2] + project_list[index - 1] + i + project_list[
+                                    index + 1] + \
                                          project_list[index + 2].split(name3)[0]
                                name2 = name3
                            else:
@ -614,6 +629,51 @@ def fmt_txt(chk_str):
                                            dict_project['duty'] += re.split('[:|：]', p_str_true)[-1]
                                            continue
                            project_undergo_list.append(dict_project)
+                else:
+                    dict_project = {
+                        'name': '',
+                        'time': '',
+                        'comment': '',
+                        'work': '',
+                        'duty': '',
+                    }
+                    for project_str_i in project_list[1:]:
+                        if project_str_i:
+                            project_str_list = project_str_i.split('\n')
+                            if project_str_list:
+                                dict_project1 = copy.deepcopy(dict_project)
+                                if '：' in project_str_list[0]:
+                                    dict_project1['name'] = project_str_list[0].split('：')[-1].strip()
+                                else:
+                                    dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
+                                chk_key = ''
+                                for i in project_str_list[1:]:
+                                    if not i or i.isdigit():
+                                        continue
+                                    if '：' not in i and ':' not in i and chk_key:
+                                        dict_project1[chk_key] += i
+                                        continue
+                                    if '开发周期' in i and ('：' in i or ':' in i):
+                                        if '：' in i:
+                                            dict_project1['time'] = i.split('：')[-1]
+                                        else:
+                                            dict_project1['time'] = i.split(':')[-1]
+                                        continue
+                                    if ('开发环境' in i or '项目描述' in i) and ('：' in i or ':' in i):
+                                        if '：' in i:
+                                            dict_project1['comment'] += i.split('：')[-1]
+                                        else:
+                                            dict_project1['comment'] += i.split(':')[-1]
+                                        chk_key = 'comment'
+                                        continue
+                                    if ('模块' in i or '框架' in i or '技术要点' in i or '职责' in i) and ('：' in i or ':' in i):
+                                        if '：' in i:
+                                            dict_project1['duty'] = i.split('：')[-1]
+                                        else:
+                                            dict_project1['duty'] = i.split(':')[-1]
+                                        chk_key = 'duty'
+                                        continue
+                                project_undergo_list.append(dict_project1)
    dict_chk['project_undergo'] = project_undergo_list

    # 数字开头
@ -720,12 +780,18 @@ def fmt_txt(chk_str):
                    'position_name': '',
                    'duty': '',
                }
+                if '（' in new_str_list1[0]:
+                    new_str_list2 = new_str_list1[0].split('（', 1)
+                else:
                    new_str_list2 = new_str_list1[0].split(' ', 1)
                work_dict['company_name'] = new_str_list2[0]
                if ':' in new_str_list2[1]:
                    work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip()
                elif '：' in new_str_list2[1]:
                    work_dict['time'] = new_str_list2[1].split('：')[-1].replace('.', '/').strip()
+                elif '）' in new_str_list2[1]:
+                    date_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', new_str_list2[1])
+                    work_dict['time'] = date_list[0] + '-' + date_list[1]
                else:
                    work_dict['time'] = new_str_list2[1].replace('.', '/').strip()
                if len(new_str_list1) > 1:
@ -736,6 +802,10 @@ def fmt_txt(chk_str):
                        duty1 = new_str_list1[2].split('职责')[-1]
                        duty = duty1.join((x for x in work_duty))
                        work_dict['duty'] = duty
+                    if '负责' in new_str_list1[1]:
+                        duty1 = new_str_list1[1].split('负责')[-1]
+                        duty = duty1.join((x for x in new_str_list1[2:]))
+                        work_dict['duty'] = duty
                work_list.append(work_dict)
    dict_chk['work_list'] = work_list
    review = ''
@ -2309,4 +2379,4 @@ egreat，海尔，MeleA20，MeleA31，LG1154，极米，杰科，亿典等机顶

    """

-    fmt_txt(chk_str9)
+    fmt_txt(chk_str10)