简历解析兼容

This commit is contained in:
Àî×ÚÕñ 2022-10-08 15:42:34 +08:00
parent 2f9ef29bf8
commit b2ccffe0d8

View File

@ -385,76 +385,136 @@ def fmt_txt(chk_str):
new_str_list1.append(new_str) new_str_list1.append(new_str)
continue continue
if new_str_list1: if new_str_list1:
for project_str in new_str_list1: if '项目:' in new_str_list1[0]:
project_name_time_str = project_str.split('\n')[0] for project_str in new_str_list1:
dict_project = { dict_project = {
'name': '', 'name': '',
'time': '', 'time': '',
'comment': '', 'comment': '',
'work': '', 'work': '',
'duty': '', 'duty': '',
} }
project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str) time_str = ''
if project_name_time_str2: company_name1 = ''
for index, p_str in enumerate(project_name_time_str2): job_name = ''
if not p_str: chk_project_list = project_str.split('\n')
chk_key = ''
for index, str_project in enumerate(chk_project_list):
# 解析时间以及工作经历
if index == 0:
time_name_list = str_project.split(' ')
time_str = time_name_list[0] + time_name_list[1]
for project_str1 in time_name_list[2:]:
if '公司' in project_str1:
company_name1 = project_str1
if '/' in company_name1:
new_chk_company = company_name1.split('/')
company_name1 = new_chk_company[0]
job_name = new_chk_company[1]
continue
job_name = project_str1
continue continue
if p_str[0].isdigit(): if '项目:' in str_project:
dict_project['time'] = ( dict_project['name'] = str_project.split('')[-1]
p_str + project_name_time_str2[index + 1] + project_name_time_str2[ continue
index + 2]).replace('.', '/') if '开发环境' in str_project or '开发工具' in str_project or '开发技术' in str_project:
name_str = project_name_time_str2[index + 3].strip() dict_project['duty'] += re.split('[:|]', str_project)[-1]
if ' ' in name_str: chk_key = 'duty'
name = name_str.split(' ')[0] continue
else: if '项目描述' in str_project or '功能介绍' in str_project:
name = name_str dict_project['comment'] += re.split('[:|]', str_project)[-1]
dict_project['name'] = name chk_key = 'comment'
break continue
project_chk_str2 = project_str.split(project_name_time_str)[-1] if '职责' in str_project or '负责' in str_project:
if project_chk_str2.replace('\n', '').replace('', '').strip().startswith(dict_project['name']): dict_project['duty'] += re.split('[:|]', str_project)[-1]
new_chk_project = project_chk_str2.replace('\n', '').replace('', '').strip() chk_key = 'duty'
dict_project['comment'] += new_chk_project.split('职责')[0] continue
dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[0] if chk_key:
dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[-1] dict_project[chk_key] += str_project
else: continue
project_chk_str2_list = re.split('(:|)', project_chk_str2) dict_project['time'] = time_str
if project_chk_str2_list: dict_project['work'] = job_name
index3 = -1 project_undergo_list.append(dict_project)
start_name = '' # 存在工作公司经历:
new_p_chk_list = [] if company_name1:
for index, p_str3 in enumerate(project_chk_str2_list): work_dict = {
if index <= index3: 'company_name': company_name1,
'time': time_str,
'position_name': job_name,
'duty': dict_project['duty'],
}
work_list.append(work_dict)
else:
for project_str in new_str_list1:
project_name_time_str = project_str.split('\n')[0]
dict_project = {
'name': '',
'time': '',
'comment': '',
'work': '',
'duty': '',
}
project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str)
if project_name_time_str2:
for index, p_str in enumerate(project_name_time_str2):
if not p_str:
continue continue
if dict_project['name'] in p_str3: if p_str[0].isdigit():
dict_project['comment'] += p_str3.split('\n\n')[0] dict_project['time'] = (
if p_str3 in [':', '']: p_str + project_name_time_str2[index + 1] + project_name_time_str2[
if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len( index + 2]).replace('.', '/')
project_chk_str2_list[index + 1]) <= 5: name_str = project_name_time_str2[index + 3].strip()
continue if ' ' in name_str:
start_name = project_chk_str2_list[index + 1].split('\n')[-1] name = name_str.split(' ')[0]
if start_name:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
project_chk_str2_list[index + 1].split(start_name)[0]
else: else:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \ name = name_str
project_chk_str2_list[index + 1] dict_project['name'] = name
new_p_chk_list.append(new_p_str) break
if new_p_chk_list: project_chk_str2 = project_str.split(project_name_time_str)[-1]
for p_str_true in new_p_chk_list: if project_chk_str2.replace('\n', '').replace('', '').strip().startswith(dict_project['name']):
if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true: new_chk_project = project_chk_str2.replace('\n', '').replace('', '').strip()
if '职责' in p_str_true: dict_project['comment'] += new_chk_project.split('职责')[0]
dict_project['duty'] += p_str_true.split('职责')[-1].split('相关技术')[0] dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[0]
dict_project['work'] += p_str_true.split('职责')[-1].split('相关技术')[1] dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[-1]
else:
project_chk_str2_list = re.split('(:|)', project_chk_str2)
if project_chk_str2_list:
index3 = -1
start_name = ''
new_p_chk_list = []
for index, p_str3 in enumerate(project_chk_str2_list):
if index <= index3:
continue
if dict_project['name'] in p_str3:
dict_project['comment'] += p_str3.split('\n\n')[0]
if p_str3 in [':', '']:
if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len(
project_chk_str2_list[index + 1]) <= 5:
continue
start_name = project_chk_str2_list[index + 1].split('\n')[-1]
if start_name:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
project_chk_str2_list[index + 1].split(start_name)[0]
else: else:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
project_chk_str2_list[index + 1]
new_p_chk_list.append(new_p_str)
if new_p_chk_list:
for p_str_true in new_p_chk_list:
if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true:
if '职责' in p_str_true:
dict_project['duty'] += p_str_true.split('职责')[-1].split('相关技术')[0]
dict_project['work'] += p_str_true.split('职责')[-1].split('相关技术')[1]
else:
dict_project['duty'] += re.split('[:|]', p_str_true)[-1]
continue
if '项目描述' in p_str_true or '功能介绍' in p_str_true:
dict_project['comment'] += re.split('[:|]', p_str_true)[-1]
continue
if '职责' in p_str_true:
dict_project['duty'] += re.split('[:|]', p_str_true)[-1] dict_project['duty'] += re.split('[:|]', p_str_true)[-1]
continue continue
if '项目描述' in p_str_true or '功能介绍' in p_str_true: project_undergo_list.append(dict_project)
dict_project['comment'] += re.split('[:|]', p_str_true)[-1]
continue
if '职责' in p_str_true:
dict_project['duty'] += re.split('[:|]', p_str_true)[-1]
continue
project_undergo_list.append(dict_project)
# 项目名开头 # 项目名开头
else: else:
if project_undergo2.startswith(':') or project_undergo2.startswith(''): if project_undergo2.startswith(':') or project_undergo2.startswith(''):
@ -700,7 +760,8 @@ def fmt_txt(chk_str):
chk_key = 'duty' chk_key = 'duty'
continue continue
project_undergo_list.append(dict_project1) project_undergo_list.append(dict_project1)
dict_chk['project_undergo'] = [i for i in project_undergo_list if i != {'name': '', 'time': '', 'comment': '', 'work': '', 'duty': ''}] dict_chk['project_undergo'] = [i for i in project_undergo_list if
i != {'name': '', 'time': '', 'comment': '', 'work': '', 'duty': ''}]
# 数字开头 # 数字开头
if work_str2[0].isdigit(): if work_str2[0].isdigit():
@ -2447,4 +2508,4 @@ egreat海尔MeleA20MeleA31LG1154极米杰科亿典等机顶
""" """
fmt_txt(chk_str14) fmt_txt(chk_str3)