自制简历匹配
This commit is contained in:
parent
9b943ec14b
commit
4de04a83ab
@ -283,7 +283,7 @@ def fmt_txt(chk_str):
|
||||
if age_str.split(':')[-1].isdigit():
|
||||
dict_chk['age'] = int(age_str.split(':')[-1])
|
||||
else:
|
||||
dict_chk['age'] = int("".join(re.findall("\d+",age_str)))
|
||||
dict_chk['age'] = int("".join(re.findall("\d+", age_str)))
|
||||
else:
|
||||
age1 = re.findall(r'[0-9]{2}.*?岁', true_chkStr, re.M)
|
||||
if age1:
|
||||
@ -322,6 +322,14 @@ def fmt_txt(chk_str):
|
||||
# work_str2 = work_str2.split('项目简介')[0]
|
||||
project_undergo2 = project_undergo.strip().strip('\n')
|
||||
str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
|
||||
if len(str_2) <= 1:
|
||||
work_str2 = work_str2.replace(' 年', '/').replace('年', '/').replace('月', '').replace(' 月', '')
|
||||
# find_str2_list = re.findall('[0-9]{4}年[0-9]{1,2} 月', work_str2)
|
||||
# for i in find_str2_list:
|
||||
# new_i_list = i.split('年')
|
||||
# new_str = new_i_list[0] + ' 年' + new_i_list[1].replace(' ', '')
|
||||
# work_str2 = work_str2.replace(i, new_str)
|
||||
str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2)
|
||||
project_list = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2)
|
||||
if len(project_list) <= 1:
|
||||
if ':' in project_undergo2:
|
||||
@ -367,7 +375,8 @@ def fmt_txt(chk_str):
|
||||
if not p_str:
|
||||
continue
|
||||
if p_str[0].isdigit():
|
||||
dict_project['time'] = (p_str + project_name_time_str2[index + 1] + project_name_time_str2[
|
||||
dict_project['time'] = (
|
||||
p_str + project_name_time_str2[index + 1] + project_name_time_str2[
|
||||
index + 2]).replace('.', '/')
|
||||
name_str = project_name_time_str2[index + 3].strip()
|
||||
if ' ' in name_str:
|
||||
@ -442,7 +451,8 @@ def fmt_txt(chk_str):
|
||||
if len(project_list) < index2 + 1:
|
||||
break
|
||||
else:
|
||||
new_str = i + project_list[index + 1] + project_list[index + 2] + project_list[index + 3]
|
||||
new_str = i + project_list[index + 1] + project_list[index + 2] + project_list[
|
||||
index + 3]
|
||||
new_str_list1.append(new_str)
|
||||
if new_str_list1:
|
||||
for project_chk_str2 in new_str_list1:
|
||||
@ -485,135 +495,185 @@ def fmt_txt(chk_str):
|
||||
project_str_list = project_str_i.split('\n')
|
||||
if project_str_list:
|
||||
dict_project1 = copy.deepcopy(dict_project)
|
||||
dict_project1['name'] = project_str_list[0].split(':')[-1]
|
||||
if ':' in project_str_list[0]:
|
||||
dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
|
||||
else:
|
||||
dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
|
||||
chk_key = ''
|
||||
for index, i in enumerate(project_str_list[1:]):
|
||||
for i in project_str_list[1:]:
|
||||
if not i or i.isdigit():
|
||||
continue
|
||||
if ':' not in i and chk_key:
|
||||
if ':' not in i and ':' not in i and chk_key:
|
||||
dict_project1[chk_key] += i
|
||||
continue
|
||||
if i.startswith('开发周期'):
|
||||
dict_project1['time'] = i.split(':')[-1]
|
||||
if '开发周期' in i and (':' in i or ':' in i):
|
||||
if ':' in i:
|
||||
dict_project1['time'] = i.split(':')[-1]
|
||||
else:
|
||||
dict_project1['time'] = i.split(':')[-1]
|
||||
continue
|
||||
if i.startswith('开发环境'):
|
||||
dict_project1['comment'] += i.split(':')[-1]
|
||||
if ('开发环境' in i or '项目描述' in i) and (':' in i or ':' in i):
|
||||
if ':' in i:
|
||||
dict_project1['comment'] += i.split(':')[-1]
|
||||
else:
|
||||
dict_project1['comment'] += i.split(':')[-1]
|
||||
chk_key = 'comment'
|
||||
continue
|
||||
if i.startswith('功能模块'):
|
||||
dict_project1['duty'] = i.split(':')[-1]
|
||||
chk_key = 'duty'
|
||||
continue
|
||||
if i.startswith('项目描述'):
|
||||
dict_project1['comment'] += i.split(':')[-1]
|
||||
chk_key = 'comment'
|
||||
continue
|
||||
if i.startswith('技术要点'):
|
||||
dict_project1['duty'] += i.split(':')[-1]
|
||||
if ('模块' in i or '框架' in i or '技术要点' in i or '职责' in i) and (':' in i or ':' in i):
|
||||
if ':' in i:
|
||||
dict_project1['duty'] = i.split(':')[-1]
|
||||
else:
|
||||
dict_project1['duty'] = i.split(':')[-1]
|
||||
chk_key = 'duty'
|
||||
continue
|
||||
project_undergo_list.append(dict_project1)
|
||||
else:
|
||||
# if re.findall('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2):
|
||||
index2 = -1
|
||||
name2 = ''
|
||||
new_str_list1 = []
|
||||
for index, i in enumerate(project_list):
|
||||
if index <= index2:
|
||||
continue
|
||||
if not i:
|
||||
continue
|
||||
if i.strip() not in ['-', '–', '―']:
|
||||
index2 = index + 2
|
||||
if not name2:
|
||||
name3 = project_list[index + 2].split('\n')[-1]
|
||||
new_str = project_list[index - 2] + project_list[index - 1] + i + project_list[index + 1] + \
|
||||
project_list[index + 2].split(name3)[0]
|
||||
name2 = name3
|
||||
else:
|
||||
name3 = project_list[index + 2].split('\n')[-1]
|
||||
if name2:
|
||||
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
|
||||
if re.findall('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2):
|
||||
index2 = -1
|
||||
name2 = ''
|
||||
new_str_list1 = []
|
||||
for index, i in enumerate(project_list):
|
||||
if index <= index2:
|
||||
continue
|
||||
if not i:
|
||||
continue
|
||||
if i.strip() not in ['-', '–', '―']:
|
||||
index2 = index + 2
|
||||
if not name2:
|
||||
name3 = project_list[index + 2].split('\n')[-1]
|
||||
new_str = project_list[index - 2] + project_list[index - 1] + i + project_list[
|
||||
index + 1] + \
|
||||
project_list[index + 2].split(name3)[0]
|
||||
name2 = name3
|
||||
else:
|
||||
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
|
||||
project_list[index + 2]
|
||||
name2 = name3
|
||||
new_str_list1.append(new_str)
|
||||
continue
|
||||
if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i):
|
||||
index2 = index
|
||||
if not name2:
|
||||
name3 = i.split('\n')[-1]
|
||||
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
|
||||
name2 = name3
|
||||
else:
|
||||
name3 = i.split('\n')[-1]
|
||||
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
|
||||
name2 = name3
|
||||
new_str_list1.append(new_str)
|
||||
continue
|
||||
if new_str_list1:
|
||||
for project_str in new_str_list1:
|
||||
project_name_time_str = project_str.split('\n')[0]
|
||||
dict_project = {
|
||||
'name': '',
|
||||
'time': '',
|
||||
'comment': '',
|
||||
'work': '',
|
||||
'duty': '',
|
||||
}
|
||||
project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str)
|
||||
if project_name_time_str2:
|
||||
for index, p_str in enumerate(project_name_time_str2):
|
||||
if not p_str:
|
||||
continue
|
||||
if p_str[0].isdigit():
|
||||
if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff':
|
||||
dict_project['time'] = p_str.replace('.', '/')
|
||||
dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2]
|
||||
dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1]
|
||||
break
|
||||
else:
|
||||
dict_project['time'] = (
|
||||
p_str + project_name_time_str2[index + 1] + project_name_time_str2[
|
||||
index + 2]).replace('.', '/')
|
||||
dict_project['name'] = project_name_time_str2[index - 1]
|
||||
name3 = project_list[index + 2].split('\n')[-1]
|
||||
if name2:
|
||||
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
|
||||
project_list[index + 2].split(name3)[0]
|
||||
else:
|
||||
new_str = name2 + project_list[index - 1] + i + project_list[index + 1] + \
|
||||
project_list[index + 2]
|
||||
name2 = name3
|
||||
new_str_list1.append(new_str)
|
||||
continue
|
||||
if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i):
|
||||
index2 = index
|
||||
if not name2:
|
||||
name3 = i.split('\n')[-1]
|
||||
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
|
||||
name2 = name3
|
||||
else:
|
||||
name3 = i.split('\n')[-1]
|
||||
new_str = str_2[index - 2] + str_2[index - 1] + i.split(name3)[0]
|
||||
name2 = name3
|
||||
new_str_list1.append(new_str)
|
||||
continue
|
||||
if new_str_list1:
|
||||
for project_str in new_str_list1:
|
||||
project_name_time_str = project_str.split('\n')[0]
|
||||
dict_project = {
|
||||
'name': '',
|
||||
'time': '',
|
||||
'comment': '',
|
||||
'work': '',
|
||||
'duty': '',
|
||||
}
|
||||
project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str)
|
||||
if project_name_time_str2:
|
||||
for index, p_str in enumerate(project_name_time_str2):
|
||||
if not p_str:
|
||||
continue
|
||||
if p_str[0].isdigit():
|
||||
if u'\u4e00' <= project_name_time_str2[index + 1].strip()[0] <= u'\u9fff':
|
||||
dict_project['time'] = p_str.replace('.', '/')
|
||||
dict_project['name'] = project_name_time_str2[index + 1].split(' ')[-2]
|
||||
dict_project['work'] = project_name_time_str2[index + 1].split(' ')[-1]
|
||||
break
|
||||
else:
|
||||
dict_project['time'] = (
|
||||
p_str + project_name_time_str2[index + 1] + project_name_time_str2[
|
||||
index + 2]).replace('.', '/')
|
||||
dict_project['name'] = project_name_time_str2[index - 1]
|
||||
|
||||
break
|
||||
project_chk_str2 = project_str.split(project_name_time_str)[-1]
|
||||
project_chk_str2_list = re.split('(:|:)', project_chk_str2)
|
||||
if project_chk_str2_list:
|
||||
index3 = -1
|
||||
start_name = ''
|
||||
new_p_chk_list = []
|
||||
for index, p_str3 in enumerate(project_chk_str2_list):
|
||||
if index <= index3:
|
||||
continue
|
||||
if p_str3 in [':', ':']:
|
||||
if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len(
|
||||
project_chk_str2_list[index + 1]) <= 10:
|
||||
break
|
||||
project_chk_str2 = project_str.split(project_name_time_str)[-1]
|
||||
project_chk_str2_list = re.split('(:|:)', project_chk_str2)
|
||||
if project_chk_str2_list:
|
||||
index3 = -1
|
||||
start_name = ''
|
||||
new_p_chk_list = []
|
||||
for index, p_str3 in enumerate(project_chk_str2_list):
|
||||
if index <= index3:
|
||||
continue
|
||||
start_name = project_chk_str2_list[index + 1].split('\n')[-1]
|
||||
if start_name:
|
||||
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
|
||||
project_chk_str2_list[index + 1].split(start_name)[0]
|
||||
else:
|
||||
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
|
||||
project_chk_str2_list[index + 1]
|
||||
new_p_chk_list.append(new_p_str)
|
||||
if new_p_chk_list:
|
||||
for p_str_true in new_p_chk_list:
|
||||
if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true:
|
||||
dict_project['work'] += re.split('[:|:]', p_str_true)[-1]
|
||||
if p_str3 in [':', ':']:
|
||||
if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len(
|
||||
project_chk_str2_list[index + 1]) <= 10:
|
||||
continue
|
||||
start_name = project_chk_str2_list[index + 1].split('\n')[-1]
|
||||
if start_name:
|
||||
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
|
||||
project_chk_str2_list[index + 1].split(start_name)[0]
|
||||
else:
|
||||
new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \
|
||||
project_chk_str2_list[index + 1]
|
||||
new_p_chk_list.append(new_p_str)
|
||||
if new_p_chk_list:
|
||||
for p_str_true in new_p_chk_list:
|
||||
if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true:
|
||||
dict_project['work'] += re.split('[:|:]', p_str_true)[-1]
|
||||
continue
|
||||
if '项目描述' in p_str_true or '功能介绍' in p_str_true:
|
||||
dict_project['comment'] += re.split('[:|:]', p_str_true)[-1]
|
||||
continue
|
||||
if '职责' in p_str_true:
|
||||
dict_project['duty'] += re.split('[:|:]', p_str_true)[-1]
|
||||
continue
|
||||
project_undergo_list.append(dict_project)
|
||||
else:
|
||||
dict_project = {
|
||||
'name': '',
|
||||
'time': '',
|
||||
'comment': '',
|
||||
'work': '',
|
||||
'duty': '',
|
||||
}
|
||||
for project_str_i in project_list[1:]:
|
||||
if project_str_i:
|
||||
project_str_list = project_str_i.split('\n')
|
||||
if project_str_list:
|
||||
dict_project1 = copy.deepcopy(dict_project)
|
||||
if ':' in project_str_list[0]:
|
||||
dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
|
||||
else:
|
||||
dict_project1['name'] = project_str_list[0].split(':')[-1].strip()
|
||||
chk_key = ''
|
||||
for i in project_str_list[1:]:
|
||||
if not i or i.isdigit():
|
||||
continue
|
||||
if '项目描述' in p_str_true or '功能介绍' in p_str_true:
|
||||
dict_project['comment'] += re.split('[:|:]', p_str_true)[-1]
|
||||
if ':' not in i and ':' not in i and chk_key:
|
||||
dict_project1[chk_key] += i
|
||||
continue
|
||||
if '职责' in p_str_true:
|
||||
dict_project['duty'] += re.split('[:|:]', p_str_true)[-1]
|
||||
if '开发周期' in i and (':' in i or ':' in i):
|
||||
if ':' in i:
|
||||
dict_project1['time'] = i.split(':')[-1]
|
||||
else:
|
||||
dict_project1['time'] = i.split(':')[-1]
|
||||
continue
|
||||
project_undergo_list.append(dict_project)
|
||||
if ('开发环境' in i or '项目描述' in i) and (':' in i or ':' in i):
|
||||
if ':' in i:
|
||||
dict_project1['comment'] += i.split(':')[-1]
|
||||
else:
|
||||
dict_project1['comment'] += i.split(':')[-1]
|
||||
chk_key = 'comment'
|
||||
continue
|
||||
if ('模块' in i or '框架' in i or '技术要点' in i or '职责' in i) and (':' in i or ':' in i):
|
||||
if ':' in i:
|
||||
dict_project1['duty'] = i.split(':')[-1]
|
||||
else:
|
||||
dict_project1['duty'] = i.split(':')[-1]
|
||||
chk_key = 'duty'
|
||||
continue
|
||||
project_undergo_list.append(dict_project1)
|
||||
dict_chk['project_undergo'] = project_undergo_list
|
||||
|
||||
# 数字开头
|
||||
@ -720,12 +780,18 @@ def fmt_txt(chk_str):
|
||||
'position_name': '',
|
||||
'duty': '',
|
||||
}
|
||||
new_str_list2 = new_str_list1[0].split(' ', 1)
|
||||
if '(' in new_str_list1[0]:
|
||||
new_str_list2 = new_str_list1[0].split('(', 1)
|
||||
else:
|
||||
new_str_list2 = new_str_list1[0].split(' ', 1)
|
||||
work_dict['company_name'] = new_str_list2[0]
|
||||
if ':' in new_str_list2[1]:
|
||||
work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip()
|
||||
elif ':' in new_str_list2[1]:
|
||||
work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip()
|
||||
elif ')' in new_str_list2[1]:
|
||||
date_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', new_str_list2[1])
|
||||
work_dict['time'] = date_list[0] + '-' + date_list[1]
|
||||
else:
|
||||
work_dict['time'] = new_str_list2[1].replace('.', '/').strip()
|
||||
if len(new_str_list1) > 1:
|
||||
@ -736,6 +802,10 @@ def fmt_txt(chk_str):
|
||||
duty1 = new_str_list1[2].split('职责')[-1]
|
||||
duty = duty1.join((x for x in work_duty))
|
||||
work_dict['duty'] = duty
|
||||
if '负责' in new_str_list1[1]:
|
||||
duty1 = new_str_list1[1].split('负责')[-1]
|
||||
duty = duty1.join((x for x in new_str_list1[2:]))
|
||||
work_dict['duty'] = duty
|
||||
work_list.append(work_dict)
|
||||
dict_chk['work_list'] = work_list
|
||||
review = ''
|
||||
@ -2309,4 +2379,4 @@ egreat,海尔,MeleA20,MeleA31,LG1154,极米,杰科,亿典等机顶
|
||||
|
||||
"""
|
||||
|
||||
fmt_txt(chk_str9)
|
||||
fmt_txt(chk_str10)
|
||||
|
Loading…
Reference in New Issue
Block a user