diff --git a/utils/re_to_jianli.py b/utils/re_to_jianli.py index 7234589..0022e4e 100644 --- a/utils/re_to_jianli.py +++ b/utils/re_to_jianli.py @@ -385,76 +385,136 @@ def fmt_txt(chk_str): new_str_list1.append(new_str) continue if new_str_list1: - for project_str in new_str_list1: - project_name_time_str = project_str.split('\n')[0] - dict_project = { - 'name': '', - 'time': '', - 'comment': '', - 'work': '', - 'duty': '', - } - project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str) - if project_name_time_str2: - for index, p_str in enumerate(project_name_time_str2): - if not p_str: + if '项目:' in new_str_list1[0]: + for project_str in new_str_list1: + dict_project = { + 'name': '', + 'time': '', + 'comment': '', + 'work': '', + 'duty': '', + } + time_str = '' + company_name1 = '' + job_name = '' + chk_project_list = project_str.split('\n') + chk_key = '' + for index, str_project in enumerate(chk_project_list): + # 解析时间以及工作经历 + if index == 0: + time_name_list = str_project.split(' ') + time_str = time_name_list[0] + time_name_list[1] + for project_str1 in time_name_list[2:]: + if '公司' in project_str1: + company_name1 = project_str1 + if '/' in company_name1: + new_chk_company = company_name1.split('/') + company_name1 = new_chk_company[0] + job_name = new_chk_company[1] + continue + job_name = project_str1 continue - if p_str[0].isdigit(): - dict_project['time'] = ( - p_str + project_name_time_str2[index + 1] + project_name_time_str2[ - index + 2]).replace('.', '/') - name_str = project_name_time_str2[index + 3].strip() - if ' ' in name_str: - name = name_str.split(' ')[0] - else: - name = name_str - dict_project['name'] = name - break - project_chk_str2 = project_str.split(project_name_time_str)[-1] - if project_chk_str2.replace('\n', '').replace('■', '').strip().startswith(dict_project['name']): - new_chk_project = project_chk_str2.replace('\n', '').replace('■', '').strip() - dict_project['comment'] += new_chk_project.split('职责')[0] - dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[0] - dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[-1] - else: - project_chk_str2_list = re.split('(:|:)', project_chk_str2) - if project_chk_str2_list: - index3 = -1 - start_name = '' - new_p_chk_list = [] - for index, p_str3 in enumerate(project_chk_str2_list): - if index <= index3: + if '项目:' in str_project: + dict_project['name'] = str_project.split(':')[-1] + continue + if '开发环境' in str_project or '开发工具' in str_project or '开发技术' in str_project: + dict_project['duty'] += re.split('[:|:]', str_project)[-1] + chk_key = 'duty' + continue + if '项目描述' in str_project or '功能介绍' in str_project: + dict_project['comment'] += re.split('[:|:]', str_project)[-1] + chk_key = 'comment' + continue + if '职责' in str_project or '负责' in str_project: + dict_project['duty'] += re.split('[:|:]', str_project)[-1] + chk_key = 'duty' + continue + if chk_key: + dict_project[chk_key] += str_project + continue + dict_project['time'] = time_str + dict_project['work'] = job_name + project_undergo_list.append(dict_project) + # 存在工作公司经历: + if company_name1: + work_dict = { + 'company_name': company_name1, + 'time': time_str, + 'position_name': job_name, + 'duty': dict_project['duty'], + } + work_list.append(work_dict) + else: + for project_str in new_str_list1: + project_name_time_str = project_str.split('\n')[0] + dict_project = { + 'name': '', + 'time': '', + 'comment': '', + 'work': '', + 'duty': '', + } + project_name_time_str2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_name_time_str) + if project_name_time_str2: + for index, p_str in enumerate(project_name_time_str2): + if not p_str: continue - if dict_project['name'] in p_str3: - dict_project['comment'] += p_str3.split('\n\n')[0] - if p_str3 in [':', ':']: - if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len( - project_chk_str2_list[index + 1]) <= 5: - continue - start_name = project_chk_str2_list[index + 1].split('\n')[-1] - if start_name: - new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \ - project_chk_str2_list[index + 1].split(start_name)[0] + if p_str[0].isdigit(): + dict_project['time'] = ( + p_str + project_name_time_str2[index + 1] + project_name_time_str2[ + index + 2]).replace('.', '/') + name_str = project_name_time_str2[index + 3].strip() + if ' ' in name_str: + name = name_str.split(' ')[0] else: - new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \ - project_chk_str2_list[index + 1] - new_p_chk_list.append(new_p_str) - if new_p_chk_list: - for p_str_true in new_p_chk_list: - if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true: - if '职责' in p_str_true: - dict_project['duty'] += p_str_true.split('职责')[-1].split('相关技术')[0] - dict_project['work'] += p_str_true.split('职责')[-1].split('相关技术')[1] + name = name_str + dict_project['name'] = name + break + project_chk_str2 = project_str.split(project_name_time_str)[-1] + if project_chk_str2.replace('\n', '').replace('■', '').strip().startswith(dict_project['name']): + new_chk_project = project_chk_str2.replace('\n', '').replace('■', '').strip() + dict_project['comment'] += new_chk_project.split('职责')[0] + dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[0] + dict_project['duty'] += new_chk_project.split('职责')[1].split('相关技术')[-1] + else: + project_chk_str2_list = re.split('(:|:)', project_chk_str2) + if project_chk_str2_list: + index3 = -1 + start_name = '' + new_p_chk_list = [] + for index, p_str3 in enumerate(project_chk_str2_list): + if index <= index3: + continue + if dict_project['name'] in p_str3: + dict_project['comment'] += p_str3.split('\n\n')[0] + if p_str3 in [':', ':']: + if not re.split('[\n|\t]', project_chk_str2_list[index + 1])[0].strip() and len( + project_chk_str2_list[index + 1]) <= 5: + continue + start_name = project_chk_str2_list[index + 1].split('\n')[-1] + if start_name: + new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \ + project_chk_str2_list[index + 1].split(start_name)[0] else: + new_p_str = project_chk_str2_list[index - 1].split('\n')[-1] + p_str3 + \ + project_chk_str2_list[index + 1] + new_p_chk_list.append(new_p_str) + if new_p_chk_list: + for p_str_true in new_p_chk_list: + if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true: + if '职责' in p_str_true: + dict_project['duty'] += p_str_true.split('职责')[-1].split('相关技术')[0] + dict_project['work'] += p_str_true.split('职责')[-1].split('相关技术')[1] + else: + dict_project['duty'] += re.split('[:|:]', p_str_true)[-1] + continue + if '项目描述' in p_str_true or '功能介绍' in p_str_true: + dict_project['comment'] += re.split('[:|:]', p_str_true)[-1] + continue + if '职责' in p_str_true: dict_project['duty'] += re.split('[:|:]', p_str_true)[-1] - continue - if '项目描述' in p_str_true or '功能介绍' in p_str_true: - dict_project['comment'] += re.split('[:|:]', p_str_true)[-1] - continue - if '职责' in p_str_true: - dict_project['duty'] += re.split('[:|:]', p_str_true)[-1] - continue - project_undergo_list.append(dict_project) + continue + project_undergo_list.append(dict_project) # 项目名开头 else: if project_undergo2.startswith(':') or project_undergo2.startswith(':'): @@ -700,7 +760,8 @@ def fmt_txt(chk_str): chk_key = 'duty' continue project_undergo_list.append(dict_project1) - dict_chk['project_undergo'] = [i for i in project_undergo_list if i != {'name': '', 'time': '', 'comment': '', 'work': '', 'duty': ''}] + dict_chk['project_undergo'] = [i for i in project_undergo_list if + i != {'name': '', 'time': '', 'comment': '', 'work': '', 'duty': ''}] # 数字开头 if work_str2[0].isdigit(): @@ -2447,4 +2508,4 @@ egreat,海尔,MeleA20,MeleA31,LG1154,极米,杰科,亿典等机顶 """ - fmt_txt(chk_str14) + fmt_txt(chk_str3)