自制简历匹配

This commit is contained in:
Àî×ÚÕñ 2022-09-22 18:45:39 +08:00
parent 9b943ec14b
commit 4de04a83ab

View File

@ -283,7 +283,7 @@ def fmt_txt(chk_str):
if age_str.split('')[-1].isdigit():
dict_chk['age'] = int(age_str.split('')[-1])
else:
dict_chk['age'] = int("".join(re.findall("\d+",age_str)))
dict_chk['age'] = int("".join(re.findall("\d+", age_str)))
else:
age1 = re.findall(r'[0-9]{2}.*?岁', true_chkStr, re.M)
if age1:
@ -322,6 +322,14 @@ def fmt_txt(chk_str):
# work_str2 = work_str2.split('项目简介')[0]
project_undergo2 = project_undergo.strip().strip('\n')
str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
if len(str_2) <= 1:
work_str2 = work_str2.replace('', '/').replace('', '/').replace('', '').replace('', '')
# find_str2_list = re.findall('[0-9]{4}年[0-9]{1,2} 月', work_str2)
# for i in find_str2_list:
# new_i_list = i.split('年')
# new_str = new_i_list[0] + ' 年' + new_i_list[1].replace(' ', '')
# work_str2 = work_str2.replace(i, new_str)
str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
project_list = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2)
if len(project_list) <= 1:
if '' in project_undergo2:
@ -367,7 +375,8 @@ def fmt_txt(chk_str):
if not p_str:
continue
if p_str[0].isdigit():
dict_project['time'] = (p_str + project_name_time_str2[index + 1] + project_name_time_str2[
dict_project['time'] = (
p_str + project_name_time_str2[index + 1] + project_name_time_str2[
index + 2]).replace('.', '/')
name_str = project_name_time_str2[index + 3].strip()
if ' ' in name_str:
@ -442,7 +451,8 @@ def fmt_txt(chk_str):
if len(project_list) < index2 + 1:
break
else:
new_str = i + project_list[index + 1] + project_list[index + 2] + project_list[index + 3]
new_str = i + project_list[index + 1] + project_list[index + 2] + project_list[
index + 3]
new_str_list1.append(new_str)
if new_str_list1:
for project_chk_str2 in new_str_list1:
@ -485,135 +495,185 @@ def fmt_txt(chk_str):
project_str_list = project_str_i.split('\n')
if project_str_list:
dict_project1 = copy.deepcopy(dict_project)
dict_project1['name'] = project_str_list[0].split('')[-1]
if '' in project_str_list[0]:
dict_project1['name'] = project_str_list[0].split('')[-1].strip()
else:
dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
chk_key = ''
for index, i in enumerate(project_str_list[1:]):
for i in project_str_list[1:]:
if not i or i.isdigit():
continue
if '' not in i and chk_key:
if '' not in i and ':' not in i and chk_key:
dict_project1[chk_key] += i
continue
if i.startswith('开发周期'):
dict_project1['time'] = i.split('')[-1]
if '开发周期' in i and ('' in i or ':' in i):
if '' in i:
dict_project1['time'] = i.split('')[-1]
else:
dict_project1['time'] = i.split(':')[-1]
continue
if i.startswith('开发环境'):
dict_project1['comment'] += i.split('')[-1]
if ('开发环境' in i or '项目描述' in i) and ('' in i or ':' in i):
if '' in i:
dict_project1['comment'] += i.split('')[-1]
else:
dict_project1['comment'] += i.split(':')[-1]
chk_key = 'comment'
continue
if i.startswith('功能模块'):
dict_project1['duty'] = i.split('')[-1]
chk_key = 'duty'
continue
if i.startswith('项目描述'):
dict_project1['comment'] += i.split('')[-1]
chk_key = 'comment'
continue
if i.startswith('技术要点'):
dict_project1['duty'] += i.split('')[-1]
if ('模块' in i or '框架' in i or '技术要点' in i or '职责' in i) and ('' in i or ':' in i):
if '' in i:
dict_project1['duty'] = i.split('')[-1]
else:
dict_project1['duty'] = i.split(':')[-1]
chk_key = 'duty'
continue
project_undergo_list.append(dict_project1)
else:
# if re.findall('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2):
index2 = -1
name2 = ''
new_str_list1 = []
for index, i in enumerate(project_list):
if index <= index2:
continue
if not i:
continue
if i.strip() not in ['-', '', '']:
index2 = index + 2
if not name2:
name3 = project_list[index + 2].split('\n')[-1]
new_str = project_list[index - 2] + project_list[index - 1] + i + project_list[index + 1] + \
project_list[index + 2].split(name3)[0]
name2 = name3
else:
name3 = project_list[index + 2].split('\n')[-1]
if name2:
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
if re.findall('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2):
index2 = -1
name2 = ''
new_str_list1 = []
for index, i in enumerate(project_list):
if index <= index2:
continue
if not i:
continue
if i.strip() not in ['-', '', '']:
index2 = index + 2
if not name2:
name3 = project_list[index + 2].split('\n')[-1]
new_str = project_list[index - 2] + project_list[index - 1] + i + project_list[
index + 1] + \
project_list[index + 2].split(name3)[0]
name2 = name3
else:
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
project_list[index + 2]
name2 = name3
new_str_list1.append(new_str)
continue
if i.strip() not in ['-', '', ''] and ('-' in i or '' in i or '' in i):
index2 = index
if not name2:
name3 = i.split('\n')[-1]
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
name2 = name3
else:
name3 = i.split('\n')[-1]
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
name2 = name3
new_str_list1.append(new_str)
continue
if new_str_list1:
for project_str in new_str_list1:
project_name_time_str = project_str.split('\n')[0]
dict_project = {
'name': '',
'time': '',
'comment': '',
'work': '',
'duty': '',
}
project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str)
if project_name_time_str2:
for index, p_str in enumerate(project_name_time_str2):
if not p_str:
continue
if p_str[0].isdigit():
if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff':
dict_project['time'] = p_str.replace('.', '/')
dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2]
dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1]
break
else:
dict_project['time'] = (
p_str + project_name_time_str2[index + 1] + project_name_time_str2[
index + 2]).replace('.', '/')
dict_project['name'] = project_name_time_str2[index - 1]
name3 = project_list[index + 2].split('\n')[-1]
if name2:
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
project_list[index + 2].split(name3)[0]
else:
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
project_list[index + 2]
name2 = name3
new_str_list1.append(new_str)
continue
if i.strip() not in ['-', '', ''] and ('-' in i or '' in i or '' in i):
index2 = index
if not name2:
name3 = i.split('\n')[-1]
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
name2 = name3
else:
name3 = i.split('\n')[-1]
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
name2 = name3
new_str_list1.append(new_str)
continue
if new_str_list1:
for project_str in new_str_list1:
project_name_time_str = project_str.split('\n')[0]
dict_project = {
'name': '',
'time': '',
'comment': '',
'work': '',
'duty': '',
}
project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str)
if project_name_time_str2:
for index, p_str in enumerate(project_name_time_str2):
if not p_str:
continue
if p_str[0].isdigit():
if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff':
dict_project['time'] = p_str.replace('.', '/')
dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2]
dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1]
break
else:
dict_project['time'] = (
p_str + project_name_time_str2[index + 1] + project_name_time_str2[
index + 2]).replace('.', '/')
dict_project['name'] = project_name_time_str2[index - 1]
break
project_chk_str2 = project_str.split(project_name_time_str)[-1]
project_chk_str2_list = re.split('(:|)', project_chk_str2)
if project_chk_str2_list:
index3 = -1
start_name = ''
new_p_chk_list = []
for index, p_str3 in enumerate(project_chk_str2_list):
if index <= index3:
continue
if p_str3 in [':', '']:
if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len(
project_chk_str2_list[index + 1]) <= 10:
break
project_chk_str2 = project_str.split(project_name_time_str)[-1]
project_chk_str2_list = re.split('(:|)', project_chk_str2)
if project_chk_str2_list:
index3 = -1
start_name = ''
new_p_chk_list = []
for index, p_str3 in enumerate(project_chk_str2_list):
if index <= index3:
continue
start_name = project_chk_str2_list[index + 1].split('\n')[-1]
if start_name:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
project_chk_str2_list[index + 1].split(start_name)[0]
else:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
project_chk_str2_list[index + 1]
new_p_chk_list.append(new_p_str)
if new_p_chk_list:
for p_str_true in new_p_chk_list:
if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true:
dict_project['work'] += re.split('[:|]', p_str_true)[-1]
if p_str3 in [':', '']:
if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len(
project_chk_str2_list[index + 1]) <= 10:
continue
start_name = project_chk_str2_list[index + 1].split('\n')[-1]
if start_name:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
project_chk_str2_list[index + 1].split(start_name)[0]
else:
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
project_chk_str2_list[index + 1]
new_p_chk_list.append(new_p_str)
if new_p_chk_list:
for p_str_true in new_p_chk_list:
if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true:
dict_project['work'] += re.split('[:|]', p_str_true)[-1]
continue
if '项目描述' in p_str_true or '功能介绍' in p_str_true:
dict_project['comment'] += re.split('[:|]', p_str_true)[-1]
continue
if '职责' in p_str_true:
dict_project['duty'] += re.split('[:|]', p_str_true)[-1]
continue
project_undergo_list.append(dict_project)
else:
dict_project = {
'name': '',
'time': '',
'comment': '',
'work': '',
'duty': '',
}
for project_str_i in project_list[1:]:
if project_str_i:
project_str_list = project_str_i.split('\n')
if project_str_list:
dict_project1 = copy.deepcopy(dict_project)
if '' in project_str_list[0]:
dict_project1['name'] = project_str_list[0].split('')[-1].strip()
else:
dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
chk_key = ''
for i in project_str_list[1:]:
if not i or i.isdigit():
continue
if '项目描述' in p_str_true or '功能介绍' in p_str_true:
dict_project['comment'] += re.split('[:|]', p_str_true)[-1]
if '' not in i and ':' not in i and chk_key:
dict_project1[chk_key] += i
continue
if '职责' in p_str_true:
dict_project['duty'] += re.split('[:|]', p_str_true)[-1]
if '开发周期' in i and ('' in i or ':' in i):
if '' in i:
dict_project1['time'] = i.split('')[-1]
else:
dict_project1['time'] = i.split(':')[-1]
continue
project_undergo_list.append(dict_project)
if ('开发环境' in i or '项目描述' in i) and ('' in i or ':' in i):
if '' in i:
dict_project1['comment'] += i.split('')[-1]
else:
dict_project1['comment'] += i.split(':')[-1]
chk_key = 'comment'
continue
if ('模块' in i or '框架' in i or '技术要点' in i or '职责' in i) and ('' in i or ':' in i):
if '' in i:
dict_project1['duty'] = i.split('')[-1]
else:
dict_project1['duty'] = i.split(':')[-1]
chk_key = 'duty'
continue
project_undergo_list.append(dict_project1)
dict_chk['project_undergo'] = project_undergo_list
# 数字开头
@ -720,12 +780,18 @@ def fmt_txt(chk_str):
'position_name': '',
'duty': '',
}
new_str_list2 = new_str_list1[0].split(' ', 1)
if '' in new_str_list1[0]:
new_str_list2 = new_str_list1[0].split('', 1)
else:
new_str_list2 = new_str_list1[0].split(' ', 1)
work_dict['company_name'] = new_str_list2[0]
if ':' in new_str_list2[1]:
work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip()
elif '' in new_str_list2[1]:
work_dict['time'] = new_str_list2[1].split('')[-1].replace('.', '/').strip()
elif '' in new_str_list2[1]:
date_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', new_str_list2[1])
work_dict['time'] = date_list[0] + '-' + date_list[1]
else:
work_dict['time'] = new_str_list2[1].replace('.', '/').strip()
if len(new_str_list1) > 1:
@ -736,6 +802,10 @@ def fmt_txt(chk_str):
duty1 = new_str_list1[2].split('职责')[-1]
duty = duty1.join((x for x in work_duty))
work_dict['duty'] = duty
if '负责' in new_str_list1[1]:
duty1 = new_str_list1[1].split('负责')[-1]
duty = duty1.join((x for x in new_str_list1[2:]))
work_dict['duty'] = duty
work_list.append(work_dict)
dict_chk['work_list'] = work_list
review = ''
@ -2309,4 +2379,4 @@ egreat海尔MeleA20MeleA31LG1154极米杰科亿典等机顶
"""
fmt_txt(chk_str9)
fmt_txt(chk_str10)