数据详情

2022-09-13 17:57:13 +08:00 · 2022-09-13 17:57:13 +08:00 · bad2f4acd7
commit bad2f4acd7
parent 7755381cd6
3 changed files with 50 additions and 89 deletions
--- a/api/api_v1/endpoints/interview.py
+++ b/api/api_v1/endpoints/interview.py
@ -462,72 +462,11 @@ async def interview_file_insert(
                    data['work_exp'] = float(true_work_exp.group())

            print(data)
-            # 项目切割
-            # if data['project_undergo']:
-            #     string = data['project_undergo'][0]
-            #     project_u = string.split('项目名')
-            #     for chkstr in project_u:
-            #         if '：' not in chkstr:
-            #             continue
-            #         chkdict = {}
-            #         chklist = chkstr.split('\n\n')
-            #         for turestr in chklist:
-            #             turestr.strip()
-            #             if not turestr:
-            #                 continue
-            #             if turestr.startswith('称'):
-            #                 turestr1 = '项目名' + turestr
-            #                 chklist1 = turestr1.split('\n')
-            #                 chklist += chklist1
-            #                 continue
-            #             true_d_list = turestr.split('：')
-            #             if len(true_d_list) == 2:
-            #                 chk_list = deepcopy(true_d_list)
-            #                 chkdict[chk_list[0]] = chk_list[1]
-            #         dp_dict = {
-            #             'time': '',
-            #             'name': '',
-            #             'work': '',
-            #             'comment': '',
-            #             'duty': '',
-            #         }
-            #
-            #         true_dict = deepcopy(dp_dict)
-            #         for key, v in chkdict.items():
-            #             if key == '项目名称':
-            #                 true_dict['name'] = v
-            #                 continue
-            #             if key == '开发周期':
-            #                 true_dict['time'] = v
-            #                 continue
-            #             if key == '项目描述':
-            #                 true_dict['comment'] = v
-            #                 continue
-            #             if key in ['技术要点', '功能模块']:
-            #                 true_dict['work'] += v
-            #                 continue
-            #             if key == '职务':
-            #                 true_dict['duty'] = v
-            #                 continue
-            #         # true_dict = deepcopy(chkdict)
-            #         true_upgrade.append(true_dict)

            work_list = data['work_list']
            language = data['language']
            project_undergo = data['project_undergo']
            remembrance = data['remembrance']
-            # true_work = []
-            # language_list = []
-            # remembrance_list = []
-            # true_upgrade = []
-            # if work_list:
-            #     true_work = json.loads(work_list)
-            # if language:
-            #     language_list = json.loads(language)
-            # if project_undergo:
-            #     true_upgrade = json.loads(project_undergo)
-            # if remembrance:
-            #     remembrance_list = json.loads(remembrance)

            res_data = {
                'data': data,
--- a/utils/func.py
+++ b/utils/func.py
@ -360,7 +360,7 @@ def doc2pdf(fn, path_data, filename):

 if __name__ == '__main__':
    pass
-    # fn=r'C:\Users\Administrator\Desktop\面试简历\智联招聘_张双琪_Web开发工程师_中文.doc'
-    # path_data=r'C:\Users\Administrator\Desktop\面试简历\\'
+    # fn=r'C:\Users\Administrator\Desktop\面试简历1\智联招聘_张双琪_Web开发工程师_中文.doc'
+    # path_data=r'C:\Users\Administrator\Desktop\面试简历1\\'
    # filename='智联招聘_张双琪_Web开发工程师_中文.doc'
    # doc2pdf(fn, path_data, filename)
--- a/utils/re_to_jianli.py
+++ b/utils/re_to_jianli.py
@ -152,8 +152,15 @@ def fmt_txt(chk_str):
        school_str = school[0].replace('\n', '').strip()
        if ' ' in school_str and '：' not in school_str:
            school_list = school_str.split(' ')
+
        else:
-            school_list = [school_str]
+            if '：' in school_str:
+                school_list1 = school_str.split('：')
+                for index, i in enumerate(school_list1):
+                    if i.endswith('院校'):
+                        school_list = [school_list1[index + 1], school_list1[index + 1]]
+            else:
+                school_list = [school_str]
    else:
        school = re.findall(r'.*?大学.*?\n', true_chkStr, re.M)
        if school:
@ -291,7 +298,7 @@ def fmt_txt(chk_str):
    if project_list[0] == '':
        project_list = project_list[1:]
    # 数字开头
-    if project_list[0][0].isdigit():
+    if project_list[0][0].isdigit() and project_list[0][1] != '、':
        index2 = -1
        new_str_list1 = []
        for index, i in enumerate(project_list):
@ -473,11 +480,18 @@ def fmt_txt(chk_str):
                            if not p_str:
                                continue
                            if p_str[0].isdigit():
-                                dict_project['time'] = (
-                                            p_str + project_name_time_str2[index + 1] + project_name_time_str2[
-                                        index + 2]).replace('.', '/')
-                                dict_project['name'] = project_name_time_str2[index - 1]
-                                break
+                                if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff':
+                                    dict_project['time'] = p_str.replace('.', '/')
+                                    dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2]
+                                    dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1]
+                                    break
+                                else:
+                                    dict_project['time'] = (
+                                                p_str + project_name_time_str2[index + 1] + project_name_time_str2[
+                                            index + 2]).replace('.', '/')
+                                    dict_project['name'] = project_name_time_str2[index - 1]
+
+                                    break
                    project_chk_str2 = project_str.split(project_name_time_str)[-1]
                    project_chk_str2_list = re.split('(:|：)', project_chk_str2)
                    if project_chk_str2_list:
@ -608,7 +622,7 @@ def fmt_txt(chk_str):
                work_dict['position_name'] = new_str_list1[1].split('：')[-1]
                work_duty = new_str_list1[3:]
                duty1 = new_str_list1[2].split('职责')[-1]
-                duty = duty1.join((str(x) for x in work_duty))
+                duty = duty1.join((x for x in work_duty))
                work_dict['duty'] = duty
                work_list.append(work_dict)
    dict_chk['work_list'] = work_list
@ -627,22 +641,31 @@ def fmt_txt(chk_str):
        if '自我描述' in review_chk:
            review = review_chk.split('自我描述')[-1]
    dict_chk['review'] = review
-    upgrade = upgrade.strip().strip('\n')
-    upgrade_chk_list = upgrade.split('\n')
-    upgrade_list = []
-    if upgrade_chk_list:
-        for index, i in enumerate(upgrade_chk_list):
-            if index == 0:
-                if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0:
-                    upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2])
-                    break
-                else:
-                    upgrade_list.append(i)
-                    break
-    dict_chk['upgrade'] = upgrade_list
-    specialty_do = []
+    if '教育经历' in true_chkStr:
+        upgrade = true_chkStr.split('教育经历')[-1]
+        upgrade = upgrade.strip().strip('\n')
+        upgrade_chk_list = upgrade.split('\n')
+        upgrade_list = []
+        if upgrade_chk_list:
+            for index, i in enumerate(upgrade_chk_list):
+                if index == 0:
+                    if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0:
+                        upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2])
+                        break
+                    else:
+                        upgrade_list.append(i)
+                        break
+        dict_chk['upgrade'] = upgrade_list
+
+    else:
+        dict_chk['upgrade'] = []
+    if '教育经历' in true_chkStr:
+        upgrade = true_chkStr.split('教育经历')[-1]
+    else:
+        upgrade = true_chkStr
    language = []
    remembrance = []
+    specialty_do = []
    if '证书' in upgrade:
        chk_upgrade_str1 = upgrade.split('证书')[-1]
        if '专业技能' in chk_upgrade_str1:
@ -742,7 +765,7 @@ def fmt_txt(chk_str):


 if __name__ == '__main__':
-    # txt = getText_pdf('D:\wokerplay\面试简历\吴操.pdf')
+    # txt = getText_pdf('D:\wokerplay\面试简历1\Android高级开发工程师-方明洋-拉勾招聘.pdf')
    # 拉勾
    chk_str1 = """
      
@ -914,7 +937,6 @@ ListView和GridView视图切换的效果。 3.在分类信息中，每个分类
 教育经历 
 汉口学院   本科 / 电子信息工程  2011 - 2015 
 
-
    """
    # 前程无忧
    chk_str2 = """
@ -1335,4 +1357,4 @@ vms视频和grafana监控系统；

    """

-    fmt_txt(chk_str6)
+    fmt_txt(chk_str1)