From bad2f4acd77177df7d7327ea9525425b75794dc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=80=C3=AE=C3=97=C3=9A=C3=95=C3=B1?= <lizz556@163.com>
Date: Tue, 13 Sep 2022 17:57:13 +0800
Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E8=AF=A6=E6=83=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/api_v1/endpoints/interview.py | 61 -------------------------
 utils/func.py                     |  4 +-
 utils/re_to_jianli.py             | 74 ++++++++++++++++++++-----------
 3 files changed, 50 insertions(+), 89 deletions(-)

diff --git a/api/api_v1/endpoints/interview.py b/api/api_v1/endpoints/interview.py
index e8b8b98..9c5d981 100644
--- a/api/api_v1/endpoints/interview.py
+++ b/api/api_v1/endpoints/interview.py
@@ -462,72 +462,11 @@ async def interview_file_insert(
                     data['work_exp'] = float(true_work_exp.group())
 
             print(data)
-            # 项目切割
-            # if data['project_undergo']:
-            #     string = data['project_undergo'][0]
-            #     project_u = string.split('项目名')
-            #     for chkstr in project_u:
-            #         if '：' not in chkstr:
-            #             continue
-            #         chkdict = {}
-            #         chklist = chkstr.split('\n\n')
-            #         for turestr in chklist:
-            #             turestr.strip()
-            #             if not turestr:
-            #                 continue
-            #             if turestr.startswith('称'):
-            #                 turestr1 = '项目名' + turestr
-            #                 chklist1 = turestr1.split('\n')
-            #                 chklist += chklist1
-            #                 continue
-            #             true_d_list = turestr.split('：')
-            #             if len(true_d_list) == 2:
-            #                 chk_list = deepcopy(true_d_list)
-            #                 chkdict[chk_list[0]] = chk_list[1]
-            #         dp_dict = {
-            #             'time': '',
-            #             'name': '',
-            #             'work': '',
-            #             'comment': '',
-            #             'duty': '',
-            #         }
-            #
-            #         true_dict = deepcopy(dp_dict)
-            #         for key, v in chkdict.items():
-            #             if key == '项目名称':
-            #                 true_dict['name'] = v
-            #                 continue
-            #             if key == '开发周期':
-            #                 true_dict['time'] = v
-            #                 continue
-            #             if key == '项目描述':
-            #                 true_dict['comment'] = v
-            #                 continue
-            #             if key in ['技术要点', '功能模块']:
-            #                 true_dict['work'] += v
-            #                 continue
-            #             if key == '职务':
-            #                 true_dict['duty'] = v
-            #                 continue
-            #         # true_dict = deepcopy(chkdict)
-            #         true_upgrade.append(true_dict)
 
             work_list = data['work_list']
             language = data['language']
             project_undergo = data['project_undergo']
             remembrance = data['remembrance']
-            # true_work = []
-            # language_list = []
-            # remembrance_list = []
-            # true_upgrade = []
-            # if work_list:
-            #     true_work = json.loads(work_list)
-            # if language:
-            #     language_list = json.loads(language)
-            # if project_undergo:
-            #     true_upgrade = json.loads(project_undergo)
-            # if remembrance:
-            #     remembrance_list = json.loads(remembrance)
 
             res_data = {
                 'data': data,
diff --git a/utils/func.py b/utils/func.py
index ec9828e..aee74d6 100644
--- a/utils/func.py
+++ b/utils/func.py
@@ -360,7 +360,7 @@ def doc2pdf(fn, path_data, filename):
 
 if __name__ == '__main__':
     pass
-    # fn=r'C:\Users\Administrator\Desktop\面试简历\智联招聘_张双琪_Web开发工程师_中文.doc'
-    # path_data=r'C:\Users\Administrator\Desktop\面试简历\\'
+    # fn=r'C:\Users\Administrator\Desktop\面试简历1\智联招聘_张双琪_Web开发工程师_中文.doc'
+    # path_data=r'C:\Users\Administrator\Desktop\面试简历1\\'
     # filename='智联招聘_张双琪_Web开发工程师_中文.doc'
     # doc2pdf(fn, path_data, filename)
diff --git a/utils/re_to_jianli.py b/utils/re_to_jianli.py
index cd713d0..f23ffbf 100644
--- a/utils/re_to_jianli.py
+++ b/utils/re_to_jianli.py
@@ -152,8 +152,15 @@ def fmt_txt(chk_str):
         school_str = school[0].replace('\n', '').strip()
         if ' ' in school_str and '：' not in school_str:
             school_list = school_str.split(' ')
+
         else:
-            school_list = [school_str]
+            if '：' in school_str:
+                school_list1 = school_str.split('：')
+                for index, i in enumerate(school_list1):
+                    if i.endswith('院校'):
+                        school_list = [school_list1[index + 1], school_list1[index + 1]]
+            else:
+                school_list = [school_str]
     else:
         school = re.findall(r'.*?大学.*?\n', true_chkStr, re.M)
         if school:
@@ -291,7 +298,7 @@ def fmt_txt(chk_str):
     if project_list[0] == '':
         project_list = project_list[1:]
     # 数字开头
-    if project_list[0][0].isdigit():
+    if project_list[0][0].isdigit() and project_list[0][1] != '、':
         index2 = -1
         new_str_list1 = []
         for index, i in enumerate(project_list):
@@ -473,11 +480,18 @@ def fmt_txt(chk_str):
                             if not p_str:
                                 continue
                             if p_str[0].isdigit():
-                                dict_project['time'] = (
-                                            p_str + project_name_time_str2[index + 1] + project_name_time_str2[
-                                        index + 2]).replace('.', '/')
-                                dict_project['name'] = project_name_time_str2[index - 1]
-                                break
+                                if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff':
+                                    dict_project['time'] = p_str.replace('.', '/')
+                                    dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2]
+                                    dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1]
+                                    break
+                                else:
+                                    dict_project['time'] = (
+                                                p_str + project_name_time_str2[index + 1] + project_name_time_str2[
+                                            index + 2]).replace('.', '/')
+                                    dict_project['name'] = project_name_time_str2[index - 1]
+
+                                    break
                     project_chk_str2 = project_str.split(project_name_time_str)[-1]
                     project_chk_str2_list = re.split('(:|：)', project_chk_str2)
                     if project_chk_str2_list:
@@ -608,7 +622,7 @@ def fmt_txt(chk_str):
                 work_dict['position_name'] = new_str_list1[1].split('：')[-1]
                 work_duty = new_str_list1[3:]
                 duty1 = new_str_list1[2].split('职责')[-1]
-                duty = duty1.join((str(x) for x in work_duty))
+                duty = duty1.join((x for x in work_duty))
                 work_dict['duty'] = duty
                 work_list.append(work_dict)
     dict_chk['work_list'] = work_list
@@ -627,22 +641,31 @@ def fmt_txt(chk_str):
         if '自我描述' in review_chk:
             review = review_chk.split('自我描述')[-1]
     dict_chk['review'] = review
-    upgrade = upgrade.strip().strip('\n')
-    upgrade_chk_list = upgrade.split('\n')
-    upgrade_list = []
-    if upgrade_chk_list:
-        for index, i in enumerate(upgrade_chk_list):
-            if index == 0:
-                if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0:
-                    upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2])
-                    break
-                else:
-                    upgrade_list.append(i)
-                    break
-    dict_chk['upgrade'] = upgrade_list
-    specialty_do = []
+    if '教育经历' in true_chkStr:
+        upgrade = true_chkStr.split('教育经历')[-1]
+        upgrade = upgrade.strip().strip('\n')
+        upgrade_chk_list = upgrade.split('\n')
+        upgrade_list = []
+        if upgrade_chk_list:
+            for index, i in enumerate(upgrade_chk_list):
+                if index == 0:
+                    if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0:
+                        upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2])
+                        break
+                    else:
+                        upgrade_list.append(i)
+                        break
+        dict_chk['upgrade'] = upgrade_list
+
+    else:
+        dict_chk['upgrade'] = []
+    if '教育经历' in true_chkStr:
+        upgrade = true_chkStr.split('教育经历')[-1]
+    else:
+        upgrade = true_chkStr
     language = []
     remembrance = []
+    specialty_do = []
     if '证书' in upgrade:
         chk_upgrade_str1 = upgrade.split('证书')[-1]
         if '专业技能' in chk_upgrade_str1:
@@ -742,10 +765,10 @@ def fmt_txt(chk_str):
 
 
 if __name__ == '__main__':
-    # txt = getText_pdf('D:\wokerplay\面试简历\吴操.pdf')
+    # txt = getText_pdf('D:\wokerplay\面试简历1\Android高级开发工程师-方明洋-拉勾招聘.pdf')
     # 拉勾
     chk_str1 = """
-     
+      
 方明洋 
 5年工作经验 | 本科 | 27岁 | 男  
 期望职位： Android /  期望薪资：17k-18k 
@@ -914,7 +937,6 @@ ListView和GridView视图切换的效果。 3.在分类信息中，每个分类
 教育经历 
 汉口学院   本科 / 电子信息工程  2011 - 2015 
  
-
     """
     # 前程无忧
     chk_str2 = """
@@ -1335,4 +1357,4 @@ vms视频和grafana监控系统；
 
     """
 
-    fmt_txt(chk_str6)
+    fmt_txt(chk_str1)