数据详情

2022-09-13 17:57:13 +08:00 · 2022-09-13 17:57:13 +08:00 · bad2f4acd7
commit bad2f4acd7
parent 7755381cd6
3 changed files with 50 additions and 89 deletions
--- a/api/api_v1/endpoints/interview.py
+++ b/api/api_v1/endpoints/interview.py
@ -462,72 +462,11 @@ async def interview_file_insert(
                    data['work_exp'] = float(true_work_exp.group())
            print(data)
            # 项目切割
            # if data['project_undergo']:
            #     string = data['project_undergo'][0]
            #     project_u = string.split('项目名')
            #     for chkstr in project_u:
            #         if '：' not in chkstr:
            #             continue
            #         chkdict = {}
            #         chklist = chkstr.split('\n\n')
            #         for turestr in chklist:
            #             turestr.strip()
            #             if not turestr:
            #                 continue
            #             if turestr.startswith('称'):
            #                 turestr1 = '项目名' + turestr
            #                 chklist1 = turestr1.split('\n')
            #                 chklist += chklist1
            #                 continue
            #             true_d_list = turestr.split('：')
            #             if len(true_d_list) == 2:
            #                 chk_list = deepcopy(true_d_list)
            #                 chkdict[chk_list[0]] = chk_list[1]
            #         dp_dict = {
            #             'time': '',
            #             'name': '',
            #             'work': '',
            #             'comment': '',
            #             'duty': '',
            #         }
            #
            #         true_dict = deepcopy(dp_dict)
            #         for key, v in chkdict.items():
            #             if key == '项目名称':
            #                 true_dict['name'] = v
            #                 continue
            #             if key == '开发周期':
            #                 true_dict['time'] = v
            #                 continue
            #             if key == '项目描述':
            #                 true_dict['comment'] = v
            #                 continue
            #             if key in ['技术要点', '功能模块']:
            #                 true_dict['work'] += v
            #                 continue
            #             if key == '职务':
            #                 true_dict['duty'] = v
            #                 continue
            #         # true_dict = deepcopy(chkdict)
            #         true_upgrade.append(true_dict)
            work_list = data['work_list']
            language = data['language']
            project_undergo = data['project_undergo']
            remembrance = data['remembrance']
            # true_work = []
            # language_list = []
            # remembrance_list = []
            # true_upgrade = []
            # if work_list:
            #     true_work = json.loads(work_list)
            # if language:
            #     language_list = json.loads(language)
            # if project_undergo:
            #     true_upgrade = json.loads(project_undergo)
            # if remembrance:
            #     remembrance_list = json.loads(remembrance)
            res_data = {
                'data': data,
--- a/utils/func.py
+++ b/utils/func.py
@ -360,7 +360,7 @@ def doc2pdf(fn, path_data, filename):
 if __name__ == '__main__':
    pass
-    # fn=r'C:\Users\Administrator\Desktop\面试简历\智联招聘_张双琪_Web开发工程师_中文.doc'
+    # fn=r'C:\Users\Administrator\Desktop\面试简历1\智联招聘_张双琪_Web开发工程师_中文.doc'
-    # path_data=r'C:\Users\Administrator\Desktop\面试简历\\'
+    # path_data=r'C:\Users\Administrator\Desktop\面试简历1\\'
    # filename='智联招聘_张双琪_Web开发工程师_中文.doc'
    # doc2pdf(fn, path_data, filename)
--- a/utils/re_to_jianli.py
+++ b/utils/re_to_jianli.py
@ -152,8 +152,15 @@ def fmt_txt(chk_str):
        school_str = school[0].replace('\n', '').strip()
        if ' ' in school_str and '：' not in school_str:
            school_list = school_str.split(' ')
        else:
-            school_list = [school_str]
+            if '：' in school_str:
                school_list1 = school_str.split('：')
                for index, i in enumerate(school_list1):
                    if i.endswith('院校'):
                        school_list = [school_list1[index + 1], school_list1[index + 1]]
            else:
                school_list = [school_str]
    else:
        school = re.findall(r'.*?大学.*?\n', true_chkStr, re.M)
        if school:
@ -291,7 +298,7 @@ def fmt_txt(chk_str):
    if project_list[0] == '':
        project_list = project_list[1:]
    # 数字开头
-    if project_list[0][0].isdigit():
+    if project_list[0][0].isdigit() and project_list[0][1] != '、':
        index2 = -1
        new_str_list1 = []
        for index, i in enumerate(project_list):
@ -473,11 +480,18 @@ def fmt_txt(chk_str):
                            if not p_str:
                                continue
                            if p_str[0].isdigit():
-                                dict_project['time'] = (
+                                if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff':
-                                            p_str + project_name_time_str2[index + 1] + project_name_time_str2[
+                                    dict_project['time'] = p_str.replace('.', '/')
-                                        index + 2]).replace('.', '/')
+                                    dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2]
-                                dict_project['name'] = project_name_time_str2[index - 1]
+                                    dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1]
-                                break
+                                    break
                                else:
                                    dict_project['time'] = (
                                                p_str + project_name_time_str2[index + 1] + project_name_time_str2[
                                            index + 2]).replace('.', '/')
                                    dict_project['name'] = project_name_time_str2[index - 1]
                                    break
                    project_chk_str2 = project_str.split(project_name_time_str)[-1]
                    project_chk_str2_list = re.split('(:|：)', project_chk_str2)
                    if project_chk_str2_list:
@ -608,7 +622,7 @@ def fmt_txt(chk_str):
                work_dict['position_name'] = new_str_list1[1].split('：')[-1]
                work_duty = new_str_list1[3:]
                duty1 = new_str_list1[2].split('职责')[-1]
-                duty = duty1.join((str(x) for x in work_duty))
+                duty = duty1.join((x for x in work_duty))
                work_dict['duty'] = duty
                work_list.append(work_dict)
    dict_chk['work_list'] = work_list
@ -627,22 +641,31 @@ def fmt_txt(chk_str):
        if '自我描述' in review_chk:
            review = review_chk.split('自我描述')[-1]
    dict_chk['review'] = review
-    upgrade = upgrade.strip().strip('\n')
+    if '教育经历' in true_chkStr:
-    upgrade_chk_list = upgrade.split('\n')
+        upgrade = true_chkStr.split('教育经历')[-1]
-    upgrade_list = []
+        upgrade = upgrade.strip().strip('\n')
-    if upgrade_chk_list:
+        upgrade_chk_list = upgrade.split('\n')
-        for index, i in enumerate(upgrade_chk_list):
+        upgrade_list = []
-            if index == 0:
+        if upgrade_chk_list:
-                if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0:
+            for index, i in enumerate(upgrade_chk_list):
-                    upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2])
+                if index == 0:
-                    break
+                    if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0:
-                else:
+                        upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2])
-                    upgrade_list.append(i)
+                        break
-                    break
+                    else:
-    dict_chk['upgrade'] = upgrade_list
+                        upgrade_list.append(i)
-    specialty_do = []
+                        break
        dict_chk['upgrade'] = upgrade_list
    else:
        dict_chk['upgrade'] = []
    if '教育经历' in true_chkStr:
        upgrade = true_chkStr.split('教育经历')[-1]
    else:
        upgrade = true_chkStr
    language = []
    remembrance = []
    specialty_do = []
    if '证书' in upgrade:
        chk_upgrade_str1 = upgrade.split('证书')[-1]
        if '专业技能' in chk_upgrade_str1:
@ -742,10 +765,10 @@ def fmt_txt(chk_str):
 if __name__ == '__main__':
-    # txt = getText_pdf('D:\wokerplay\面试简历\吴操.pdf')
+    # txt = getText_pdf('D:\wokerplay\面试简历1\Android高级开发工程师-方明洋-拉勾招聘.pdf')
    # 拉勾
    chk_str1 = """
-     
+      
 方明洋 
 5年工作经验 | 本科 | 27岁 | 男  
 期望职位： Android /  期望薪资：17k-18k 
@ -914,7 +937,6 @@ ListView和GridView视图切换的效果。 3.在分类信息中，每个分类
 教育经历 
 汉口学院   本科 / 电子信息工程  2011 - 2015 
    """
    # 前程无忧
    chk_str2 = """
@ -1335,4 +1357,4 @@ vms视频和grafana监控系统；
    """
-    fmt_txt(chk_str6)
+    fmt_txt(chk_str1)