From bad2f4acd77177df7d7327ea9525425b75794dc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=80=C3=AE=C3=97=C3=9A=C3=95=C3=B1?= Date: Tue, 13 Sep 2022 17:57:13 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E8=AF=A6=E6=83=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/api_v1/endpoints/interview.py | 61 ------------------------- utils/func.py | 4 +- utils/re_to_jianli.py | 74 ++++++++++++++++++++----------- 3 files changed, 50 insertions(+), 89 deletions(-) diff --git a/api/api_v1/endpoints/interview.py b/api/api_v1/endpoints/interview.py index e8b8b98..9c5d981 100644 --- a/api/api_v1/endpoints/interview.py +++ b/api/api_v1/endpoints/interview.py @@ -462,72 +462,11 @@ async def interview_file_insert( data['work_exp'] = float(true_work_exp.group()) print(data) - # 项目切割 - # if data['project_undergo']: - # string = data['project_undergo'][0] - # project_u = string.split('项目名') - # for chkstr in project_u: - # if ':' not in chkstr: - # continue - # chkdict = {} - # chklist = chkstr.split('\n\n') - # for turestr in chklist: - # turestr.strip() - # if not turestr: - # continue - # if turestr.startswith('称'): - # turestr1 = '项目名' + turestr - # chklist1 = turestr1.split('\n') - # chklist += chklist1 - # continue - # true_d_list = turestr.split(':') - # if len(true_d_list) == 2: - # chk_list = deepcopy(true_d_list) - # chkdict[chk_list[0]] = chk_list[1] - # dp_dict = { - # 'time': '', - # 'name': '', - # 'work': '', - # 'comment': '', - # 'duty': '', - # } - # - # true_dict = deepcopy(dp_dict) - # for key, v in chkdict.items(): - # if key == '项目名称': - # true_dict['name'] = v - # continue - # if key == '开发周期': - # true_dict['time'] = v - # continue - # if key == '项目描述': - # true_dict['comment'] = v - # continue - # if key in ['技术要点', '功能模块']: - # true_dict['work'] += v - # continue - # if key == '职务': - # true_dict['duty'] = v - # continue - # # true_dict = deepcopy(chkdict) - # true_upgrade.append(true_dict) work_list = data['work_list'] language = data['language'] project_undergo = data['project_undergo'] remembrance = data['remembrance'] - # true_work = [] - # language_list = [] - # remembrance_list = [] - # true_upgrade = [] - # if work_list: - # true_work = json.loads(work_list) - # if language: - # language_list = json.loads(language) - # if project_undergo: - # true_upgrade = json.loads(project_undergo) - # if remembrance: - # remembrance_list = json.loads(remembrance) res_data = { 'data': data, diff --git a/utils/func.py b/utils/func.py index ec9828e..aee74d6 100644 --- a/utils/func.py +++ b/utils/func.py @@ -360,7 +360,7 @@ def doc2pdf(fn, path_data, filename): if __name__ == '__main__': pass - # fn=r'C:\Users\Administrator\Desktop\面试简历\智联招聘_张双琪_Web开发工程师_中文.doc' - # path_data=r'C:\Users\Administrator\Desktop\面试简历\\' + # fn=r'C:\Users\Administrator\Desktop\面试简历1\智联招聘_张双琪_Web开发工程师_中文.doc' + # path_data=r'C:\Users\Administrator\Desktop\面试简历1\\' # filename='智联招聘_张双琪_Web开发工程师_中文.doc' # doc2pdf(fn, path_data, filename) diff --git a/utils/re_to_jianli.py b/utils/re_to_jianli.py index cd713d0..f23ffbf 100644 --- a/utils/re_to_jianli.py +++ b/utils/re_to_jianli.py @@ -152,8 +152,15 @@ def fmt_txt(chk_str): school_str = school[0].replace('\n', '').strip() if ' ' in school_str and ':' not in school_str: school_list = school_str.split(' ') + else: - school_list = [school_str] + if ':' in school_str: + school_list1 = school_str.split(':') + for index, i in enumerate(school_list1): + if i.endswith('院校'): + school_list = [school_list1[index + 1], school_list1[index + 1]] + else: + school_list = [school_str] else: school = re.findall(r'.*?大学.*?\n', true_chkStr, re.M) if school: @@ -291,7 +298,7 @@ def fmt_txt(chk_str): if project_list[0] == '': project_list = project_list[1:] # 数字开头 - if project_list[0][0].isdigit(): + if project_list[0][0].isdigit() and project_list[0][1] != '、': index2 = -1 new_str_list1 = [] for index, i in enumerate(project_list): @@ -473,11 +480,18 @@ def fmt_txt(chk_str): if not p_str: continue if p_str[0].isdigit(): - dict_project['time'] = ( - p_str + project_name_time_str2[index + 1] + project_name_time_str2[ - index + 2]).replace('.', '/') - dict_project['name'] = project_name_time_str2[index - 1] - break + if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff': + dict_project['time'] = p_str.replace('.', '/') + dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2] + dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1] + break + else: + dict_project['time'] = ( + p_str + project_name_time_str2[index + 1] + project_name_time_str2[ + index + 2]).replace('.', '/') + dict_project['name'] = project_name_time_str2[index - 1] + + break project_chk_str2 = project_str.split(project_name_time_str)[-1] project_chk_str2_list = re.split('(:|:)', project_chk_str2) if project_chk_str2_list: @@ -608,7 +622,7 @@ def fmt_txt(chk_str): work_dict['position_name'] = new_str_list1[1].split(':')[-1] work_duty = new_str_list1[3:] duty1 = new_str_list1[2].split('职责')[-1] - duty = duty1.join((str(x) for x in work_duty)) + duty = duty1.join((x for x in work_duty)) work_dict['duty'] = duty work_list.append(work_dict) dict_chk['work_list'] = work_list @@ -627,22 +641,31 @@ def fmt_txt(chk_str): if '自我描述' in review_chk: review = review_chk.split('自我描述')[-1] dict_chk['review'] = review - upgrade = upgrade.strip().strip('\n') - upgrade_chk_list = upgrade.split('\n') - upgrade_list = [] - if upgrade_chk_list: - for index, i in enumerate(upgrade_chk_list): - if index == 0: - if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0: - upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2]) - break - else: - upgrade_list.append(i) - break - dict_chk['upgrade'] = upgrade_list - specialty_do = [] + if '教育经历' in true_chkStr: + upgrade = true_chkStr.split('教育经历')[-1] + upgrade = upgrade.strip().strip('\n') + upgrade_chk_list = upgrade.split('\n') + upgrade_list = [] + if upgrade_chk_list: + for index, i in enumerate(upgrade_chk_list): + if index == 0: + if sum([1 if u'\u4e00' <= x <= u'\u9fff' else 0 for x in i]) <= 0: + upgrade_list.append(i + upgrade_chk_list[index + 1] + upgrade_chk_list[index + 2]) + break + else: + upgrade_list.append(i) + break + dict_chk['upgrade'] = upgrade_list + + else: + dict_chk['upgrade'] = [] + if '教育经历' in true_chkStr: + upgrade = true_chkStr.split('教育经历')[-1] + else: + upgrade = true_chkStr language = [] remembrance = [] + specialty_do = [] if '证书' in upgrade: chk_upgrade_str1 = upgrade.split('证书')[-1] if '专业技能' in chk_upgrade_str1: @@ -742,10 +765,10 @@ def fmt_txt(chk_str): if __name__ == '__main__': - # txt = getText_pdf('D:\wokerplay\面试简历\吴操.pdf') + # txt = getText_pdf('D:\wokerplay\面试简历1\Android高级开发工程师-方明洋-拉勾招聘.pdf') # 拉勾 chk_str1 = """ - + 方明洋 5年工作经验 | 本科 | 27岁 | 男 期望职位: Android / 期望薪资:17k-18k @@ -914,7 +937,6 @@ ListView和GridView视图切换的效果。 3.在分类信息中,每个分类 教育经历 汉口学院 本科 / 电子信息工程 2011 - 2015 - """ # 前程无忧 chk_str2 = """ @@ -1335,4 +1357,4 @@ vms视频和grafana监控系统; """ - fmt_txt(chk_str6) + fmt_txt(chk_str1)