diff --git a/api/api_v1/endpoints/interview.py b/api/api_v1/endpoints/interview.py index b806d73..c4cb48e 100644 --- a/api/api_v1/endpoints/interview.py +++ b/api/api_v1/endpoints/interview.py @@ -436,187 +436,228 @@ async def file_to_hw( try: fn = path_data + '/' + filename - if fn.endswith('pdf'): # pdf正常上传到华为云 - res = obsClient.putFile('legu-cdn-source', 'hrms/' + filename, path_data + '/' + filename) - else: # doc/docx则转化为pdf上传到华为云 - new_fn, fil = doc2pdf(fn, path_data, filename) + if not fn.endswith('pdf'): # pdf正常上传到华为云 + # res = obsClient.putFile('legu-cdn-source', 'hrms/' + filename, path_data + '/' + filename) + # doc/docx则转化为pdf上传到华为云 + fn, fil = doc2pdf(fn, path_data, filename) filename = fil - res = obsClient.putFile('legu-cdn-source', 'hrms/' + filename, new_fn) + + # 简历初始文档 + data_mode = { + "interview_name": "", + "interview_type": 1, + "interview_sign": 0, + "hope_money": "", + "feedback": 0, + "interview_round": 0, + "event_time": datetime.now(), + "name": "", + "phone": "", + "job_name": "", + "hr_name": "", + "work_exp": 0, + "interview_stage": 1, + "owner_name": 2, + "education": 1, + "work_undergo": [], + "project_undergo": [], + "work_list": [], + "school": "", + "at_school": "", + "specialty": "", + "specialty_do": [], + "mmended_state": 0, + "mail": "", + "account": "", + "id_card": "", + "gender": "", + "age": 0, + "gam": "", + "interview_state": 1, + "counts": 1, + "nation": "汉", + "review": "", + "upgrade": [], + "come_time": "", + "now_money": "", + "men_state": 1, + "teacher_state": 1, + "teacher_back": 1, + "offer_state": 1, + "offer_exam_state": 1, + "notice_state": 1, + "pass_why": 0, + "pass_text": "", + "now_address": "", + "language": [], + "remembrance": [], + "file_url": '', + "hr_manner": 2, + } + uid = get_uid() + data_mode['uid'] = uid + # 存数据 + chk_txt = getText_pdf(path_data + '/' + filename) + data = fmt_txt(chk_txt) + education = data['education'] + # 学历int转化 + education_int = { + '大专': 1, + '本科': 2, + '研究生': 3, + '博士': 4, + '硕士': 5, + } + if education and isinstance(education, str): + data['education'] = education_int.get(education, 1) + age = data['age'] + if not age: + data['age'] = 20 + # 年龄int转化 + if age and isinstance(age, str): + true_age = re.search(r"\d+\.?\d*", age) + if len(true_age.group()) > 2: + data['age'] = 20 + else: + data['age'] = int(true_age.group()) + work_exp = data['work_exp'] + if not work_exp: + data['work_exp'] = 0 + # 工作经验float转化 + if work_exp and isinstance(work_exp, str): + true_work_exp = re.search(r"\d+\.?\d*", work_exp) + if len(true_work_exp.group()) > 3: + data['work_exp'] = 0 + else: + data['work_exp'] = float(true_work_exp.group()) + + data_mode.update(data) + # 转json字符串 + if 'remembrance_list' in data_mode: + remembrance = data_mode.pop('remembrance_list') + data_mode['remembrance'] = remembrance + if 'language_list' in data_mode: + language = data_mode.pop('language_list') + data_mode['language'] = language + if 'project_undergo' in data_mode: + if data_mode.get('project_undergo', []): + data_mode['project_undergo'] = [json.dumps(i) for i in data_mode['project_undergo']] + else: + data_mode['project_undergo'] = [] + if 'work_list' in data_mode: + if data_mode.get('work_list', []): + data_mode['work_list'] = [json.dumps(i) for i in data_mode['work_list']] + else: + data_mode['work_list'] = [] + if 'language' in data_mode: + if data_mode.get('language', []): + data_mode['language'] = [json.dumps(i) for i in data_mode['language']] + else: + data_mode['language'] = [] + if 'remembrance' in data_mode: + if data_mode.get('remembrance', []): + data_mode['remembrance'] = [json.dumps(i) for i in data_mode['remembrance']] + else: + data_mode['remembrance'] = [] + + # 字符串转datetime + if data_mode.get('in_time', ''): + chk_in_time = data_mode['in_time'].replace('-', '/').replace('.', '/') + if len(chk_in_time.split('/')) == 2: + data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m").date()) + if len(chk_in_time.split('/')) == 3: + data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m/%d").date()) + + if data_mode.get('out_time', ''): + chk_out_time = data_mode['out_time'].replace('-', '/').replace('.', '/') + if len(chk_out_time.split('/')) == 2: + data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m").date()) + if len(chk_out_time.split('/')) == 3: + data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m/%d").date()) + + if data_mode.get('birthday', ''): + chk_birthday = data_mode['birthday'].replace('-', '/').replace('.', '/') + if len(chk_birthday.split('/')) == 2: + data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m").date()) + if len(chk_birthday.split('/')) == 3: + data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m/%d").date()) + + if data_mode.get('star_time', ''): + chk_star_time = data_mode['star_time'].replace('-', '/').replace('.', '/') + if len(chk_star_time.split('/')) == 2: + data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m").date()) + if len(chk_star_time.split('/')) == 3: + data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m/%d").date()) + + if data_mode.get('end_time', ''): + chk_end_time = data_mode['end_time'].replace('-', '/').replace('.', '/') + if len(chk_end_time.split('/')) == 2: + data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m").date()) + if len(chk_end_time.split('/')) == 3: + data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m/%d").date()) + + if data_mode.get('graduate_time', ''): + chk_graduate = data_mode['graduate_time'].replace('-', '/').replace('.', '/') + if len(chk_graduate.split('/')) == 2: + data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m").date()) + if len(chk_graduate.split('/')) == 3: + data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m/%d").date()) + + work_list = data['work_list'] + language = data['language'] + project_undergo = data['project_undergo'] + remembrance = data['remembrance'] + + # 简历查重,姓名,手机号,性别name,phone,gender + find_name = data['name'] + find_phone = data['phone'] + find_gender = data['gender'] + where = {} + if find_name: + where.update({ + 'name': find_name + }) + if find_phone: + where.update({ + 'phone': find_phone + }) + if find_gender: + where.update({ + 'gender': find_gender + }) + whereStr = '' + for key, value in where.items(): + if isinstance(value, str): + if not value.strip(): + continue + if whereStr: + whereStr += 'and ' + str(key) + ' = ' + "'" + value + "'" + ' ' + else: + whereStr += str(key) + ' = ' + "'" + value + "'" + ' ' + continue + if whereStr: + whereStr += 'and ' + str(key) + ' = ' + str(value) + ' ' + else: + whereStr += str(key) + ' = ' + str(value) + ' ' + whereStr = whereStr.strip() + sql = f"select uid from HR.resumes where {whereStr}" + is_in_data = await db.execute(sql) + if is_in_data: + return schemas.Msg(code=-9, msg='简历已存在', data=[]) + if not find_phone: + return schemas.Msg(code=-9, msg='电话号码不存在', data=[]) + # os.rename(path_data + '/' + filename, path_data + '/' + find_phone + '.pdf') + res = obsClient.putFile('legu-cdn-source', 'hrms/' + find_phone + '.pdf', fn) if res.status < 300: # 地址 url = res.body.objectUrl - # 简历初始文档 - data_mode = { - "interview_name": "", - "interview_type": 1, - "interview_sign": 0, - "hope_money": "", - "feedback": 0, - "interview_round": 0, - "event_time": datetime.now(), - "name": "", - "phone": "", - "job_name": "", - "hr_name": "", - "work_exp": 0, - "interview_stage": 1, - "owner_name": 2, - "education": 1, - "work_undergo": [], - "project_undergo": [], - "work_list": [], - "school": "", - "at_school": "", - "specialty": "", - "specialty_do": [], - "mmended_state": 0, - "mail": "", - "account": "", - "id_card": "", - "gender": "", - "age": 0, - "gam": "", - "interview_state": 1, - "counts": 1, - "nation": "汉", - "review": "", - "upgrade": [], - "come_time": "", - "now_money": "", - "men_state": 1, - "teacher_state": 1, - "teacher_back": 1, - "offer_state": 1, - "offer_exam_state": 1, - "notice_state": 1, - "pass_why": 0, - "pass_text": "", - "now_address": "", - "language": [], - "remembrance": [], - "file_url": url, - "hr_manner": 2, - } - uid = get_uid() - data_mode['uid'] = uid + data_mode['file_url'] = url sql = f"insert into HR.resumes(interview_name, interview_type, interview_sign, hope_money, feedback," \ f" interview_round, event_time, uid, name, phone, job_name, hr_name, work_exp, interview_stage, owner_name," \ f" education, work_undergo, project_undergo, work_list, school, at_school, specialty, specialty_do, " \ f"mmended_state, mail, account, id_card, gender, age, gam, interview_state, counts, nation, come_time," \ f" review, upgrade, now_money, men_state, teacher_state, teacher_back, offer_state, offer_exam_state," \ f" notice_state, pass_why, pass_text, now_address,language,remembrance, file_url, hr_manner) values" - # 存数据 - chk_txt = getText_pdf(path_data + '/' + filename) - data = fmt_txt(chk_txt) - education = data['education'] - # 学历int转化 - education_int = { - '大专': 1, - '本科': 2, - '研究生': 3, - '博士': 4, - '硕士': 5, - } - if education and isinstance(education, str): - data['education'] = education_int.get(education, 1) - age = data['age'] - if not age: - data['age'] = 20 - # 年龄int转化 - if age and isinstance(age, str): - true_age = re.search(r"\d+\.?\d*", age) - if len(true_age.group()) > 2: - data['age'] = 20 - else: - data['age'] = int(true_age.group()) - work_exp = data['work_exp'] - if not work_exp: - data['work_exp'] = 0 - # 工作经验float转化 - if work_exp and isinstance(work_exp, str): - true_work_exp = re.search(r"\d+\.?\d*", work_exp) - if len(true_work_exp.group()) > 3: - data['work_exp'] = 0 - else: - data['work_exp'] = float(true_work_exp.group()) - - data_mode.update(data) - # 转json字符串 - if 'remembrance_list' in data_mode: - remembrance = data_mode.pop('remembrance_list') - data_mode['remembrance'] = remembrance - if 'language_list' in data_mode: - language = data_mode.pop('language_list') - data_mode['language'] = language - if 'project_undergo' in data_mode: - if data_mode.get('project_undergo', []): - data_mode['project_undergo'] = [json.dumps(i) for i in data_mode['project_undergo']] - else: - data_mode['project_undergo'] = [] - if 'work_list' in data_mode: - if data_mode.get('work_list', []): - data_mode['work_list'] = [json.dumps(i) for i in data_mode['work_list']] - else: - data_mode['work_list'] = [] - if 'language' in data_mode: - if data_mode.get('language', []): - data_mode['language'] = [json.dumps(i) for i in data_mode['language']] - else: - data_mode['language'] = [] - if 'remembrance' in data_mode: - if data_mode.get('remembrance', []): - data_mode['remembrance'] = [json.dumps(i) for i in data_mode['remembrance']] - else: - data_mode['remembrance'] = [] - - # 字符串转datetime - if data_mode.get('in_time', ''): - chk_in_time = data_mode['in_time'].replace('-', '/').replace('.', '/') - if len(chk_in_time.split('/')) == 2: - data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m").date()) - if len(chk_in_time.split('/')) == 3: - data_mode['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m/%d").date()) - - if data_mode.get('out_time', ''): - chk_out_time = data_mode['out_time'].replace('-', '/').replace('.', '/') - if len(chk_out_time.split('/')) == 2: - data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m").date()) - if len(chk_out_time.split('/')) == 3: - data_mode['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m/%d").date()) - - if data_mode.get('birthday', ''): - chk_birthday = data_mode['birthday'].replace('-', '/').replace('.', '/') - if len(chk_birthday.split('/')) == 2: - data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m").date()) - if len(chk_birthday.split('/')) == 3: - data_mode['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m/%d").date()) - - if data_mode.get('star_time', ''): - chk_star_time = data_mode['star_time'].replace('-', '/').replace('.', '/') - if len(chk_star_time.split('/')) == 2: - data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m").date()) - if len(chk_star_time.split('/')) == 3: - data_mode['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m/%d").date()) - - if data_mode.get('end_time', ''): - chk_end_time = data_mode['end_time'].replace('-', '/').replace('.', '/') - if len(chk_end_time.split('/')) == 2: - data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m").date()) - if len(chk_end_time.split('/')) == 3: - data_mode['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m/%d").date()) - - if data_mode.get('graduate_time', ''): - chk_graduate = data_mode['graduate_time'].replace('-', '/').replace('.', '/') - if len(chk_graduate.split('/')) == 2: - data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m").date()) - if len(chk_graduate.split('/')) == 3: - data_mode['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m/%d").date()) - - work_list = data['work_list'] - language = data['language'] - project_undergo = data['project_undergo'] - remembrance = data['remembrance'] - res_data = { 'data': data, 'file_url': url, diff --git a/utils/re_to_jianli.py b/utils/re_to_jianli.py index ef4a52b..3088f30 100644 --- a/utils/re_to_jianli.py +++ b/utils/re_to_jianli.py @@ -145,25 +145,27 @@ def fmt_txt(chk_str): if work_exp: work_exp_str = work_exp[0].replace('\n', '') if ':' in work_exp_str: - work_exp = work_exp_str.split(':')[1].strip().split('年')[0] - dict_int_year = { - '一': 1, - '二': 2, - '三': 3, - '四': 4, - '五': 5, - '六': 6, - '七': 7, - '八': 8, - '九': 9 - } - if work_exp in ['一', '二', '三', '四', '五', '六', '七', '八', '九']: - work_exp1 = dict_int_year.get(work_exp) - else: - if '.' in work_exp: - work_exp = work_exp.split('.')[0] - work_exp1 = int(work_exp) - dict_chk['work_exp'] = work_exp1 + for i in work_exp_str.split(':'): + if i.strip()[0].isdigit(): + work_exp = i.strip().split('年')[0] + dict_int_year = { + '一': 1, + '二': 2, + '三': 3, + '四': 4, + '五': 5, + '六': 6, + '七': 7, + '八': 8, + '九': 9 + } + if work_exp in ['一', '二', '三', '四', '五', '六', '七', '八', '九']: + work_exp1 = dict_int_year.get(work_exp) + else: + if '.' in work_exp: + work_exp = work_exp.split('.')[0] + work_exp1 = int(work_exp) + dict_chk['work_exp'] = work_exp1 else: dict_chk['work_exp'] = 0 @@ -198,8 +200,17 @@ def fmt_txt(chk_str): if new_chk_str and '教育经历' in true_chkStr: bold = re.compile(r'\n') true_chk = bold.sub(' ', new_chk_str, count=3) - school_str1 = re.findall(r'.*?学院.*?\n', true_chk, re.M)[0].replace('\n', '') - school_list = school_str1.split(' ') + n_true = 1 + if '\n' not in true_chk: + n_true = 0 + if n_true: + school_chk_list = re.findall(r'.*?学院.*?\n', true_chk, re.M) + if not school_chk_list: + school_chk_list = re.findall(r'.*?大学.*?\n', true_chk, re.M) + else: + school_chk_list = [true_chk] + school_str1 = school_chk_list[0].replace('\n', '') + school_list = [i for i in school_str1.split(' ') if i != ''] if school_list: time_index = -1 @@ -284,6 +295,10 @@ def fmt_txt(chk_str): dict_chk['age'] = int(age_str.split(':')[-1]) else: dict_chk['age'] = int("".join(re.findall("\d+", age_str))) + age = dict_chk['age'] + if age >= 100: + from builtins import str + dict_chk['age'] = int(str(age)[0:2]) else: age1 = re.findall(r'[0-9]{2}.*?岁', true_chkStr, re.M) if age1: @@ -302,10 +317,13 @@ def fmt_txt(chk_str): dict_chk['nation'] = nation_str.split(':')[-1] else: dict_chk['nation'] = '汉' - if '工作经历' not in true_chkStr: + if '工作经历' not in true_chkStr and '⼯作经历' not in true_chkStr: work_undergo_str = true_chkStr.split('工作经验')[-1] else: - work_undergo_str = true_chkStr.split('工作经历')[-1] + if '⼯作经历' in true_chkStr: + work_undergo_str = true_chkStr.split('⼯作经历')[-1] + else: + work_undergo_str = true_chkStr.split('工作经历')[-1] work_undergo_str1 = work_undergo_str.split('教育经历')[0] work_str2 = work_undergo_str1.strip().strip('\n') project_undergo = '' @@ -320,6 +338,12 @@ def fmt_txt(chk_str): if '项目简介' in work_str2: project_undergo = ''.join(i for i in work_str2.split('项目简介')[:]) # work_str2 = work_str2.split('项目简介')[0] + elif '项⽬经验' in work_str2: + project_undergo = ''.join(i for i in work_str2.split('项⽬经验')[1:]) + work_str2 = work_str2.split('项⽬经验')[0] + elif '项目经历' in true_chkStr: + project_undergo = true_chkStr.split('项目经历')[-1].split('自我评价')[0] + project_undergo2 = project_undergo.strip().strip('\n') str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2) if len(str_2) <= 1: @@ -330,6 +354,7 @@ def fmt_txt(chk_str): # new_str = new_i_list[0] + ' 年' + new_i_list[1].replace(' ', '') # work_str2 = work_str2.replace(i, new_str) str_2 = re.split('([0-9]{4}[/|.][0-9]{1,2})', work_str2) + project_undergo2 = project_undergo2.replace('年', '/').replace('月', '') project_list = re.split('([0-9]{4}[/|.][0-9]{1,2})', project_undergo2) if len(project_list) <= 1: if ':' in project_undergo2: @@ -349,12 +374,12 @@ def fmt_txt(chk_str): if not i: continue chk_i = i.strip() - if chk_i in ['-', '–', '―']: + if chk_i in ['-', '–', '―', '—']: index2 = index + 2 new_str = project_list[index - 1] + i + project_list[index + 1] + project_list[index + 2] new_str_list1.append(new_str) continue - if chk_i not in ['-', '–', '―'] and ('-' in chk_i or '–' in chk_i or '―' in chk_i): + if chk_i not in ['-', '–', '―', '—'] and ('-' in chk_i or '–' in chk_i or '―' in chk_i or '—' in chk_i): index2 = index new_str = project_list[index - 1] + chk_i new_str_list1.append(new_str) @@ -444,7 +469,7 @@ def fmt_txt(chk_str): new_str_list1 = [] index2 = -1 for index, i in enumerate(project_list): - if not i[0].isdigit() and i not in ['-', '–', '―']: + if not i[0].isdigit() and i not in ['-', '–', '―', '—']: if index <= index2: continue index2 = index + 3 @@ -537,7 +562,7 @@ def fmt_txt(chk_str): continue if not i: continue - if i.strip() not in ['-', '–', '―']: + if i.strip() in ['-', '–', '―', '—']: index2 = index + 2 if not name2: name3 = project_list[index + 2].split('\n')[-1] @@ -556,7 +581,7 @@ def fmt_txt(chk_str): name2 = name3 new_str_list1.append(new_str) continue - if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i): + if i.strip() not in ['-', '–', '―', '—'] and ('-' in i or '–' in i or '―' in i or '—' in i): index2 = index if not name2: name3 = i.split('\n')[-1] @@ -594,7 +619,8 @@ def fmt_txt(chk_str): p_str + project_name_time_str2[index + 1] + project_name_time_str2[ index + 2]).replace('.', '/') dict_project['name'] = project_name_time_str2[index - 1] - + if index + 3 <= len(project_name_time_str2): + dict_project['work'] = project_name_time_str2[index + 3] break project_chk_str2 = project_str.split(project_name_time_str)[-1] project_chk_str2_list = re.split('(:|:)', project_chk_str2) @@ -620,9 +646,9 @@ def fmt_txt(chk_str): if new_p_chk_list: for p_str_true in new_p_chk_list: if '开发环境' in p_str_true or '开发工具' in p_str_true or '开发技术' in p_str_true or '模块' in p_str_true: - dict_project['work'] += re.split('[:|:]', p_str_true)[-1] + dict_project['comment'] += re.split('[:|:]', p_str_true)[-1] continue - if '项目描述' in p_str_true or '功能介绍' in p_str_true: + if '项目描述' in p_str_true or '功能介绍' in p_str_true or '项⽬描述' in p_str_true: dict_project['comment'] += re.split('[:|:]', p_str_true)[-1] continue if '职责' in p_str_true: @@ -674,7 +700,7 @@ def fmt_txt(chk_str): chk_key = 'duty' continue project_undergo_list.append(dict_project1) - dict_chk['project_undergo'] = project_undergo_list + dict_chk['project_undergo'] = [i for i in project_undergo_list if i != {'name': '', 'time': '', 'comment': '', 'work': '', 'duty': ''}] # 数字开头 if work_str2[0].isdigit(): @@ -685,12 +711,12 @@ def fmt_txt(chk_str): continue if not i: continue - if i.strip() in ['-', '–', '―']: + if i.strip() in ['-', '–', '―', '—']: index1 = index + 2 new_str = str_2[index - 1] + i + str_2[index + 1] + str_2[index + 2] new_str_list.append(new_str) continue - if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i): + if i.strip() not in ['-', '–', '―', '—'] and ('-' in i or '–' in i or '―' in i or '—' in i): index1 = index new_str = str_2[index - 1] + i new_str_list.append(new_str) @@ -742,23 +768,27 @@ def fmt_txt(chk_str): continue if not i: continue - if i.strip() in ['-', '–', '―']: + if i.strip() in ['-', '–', '―', '—']: index1 = index + 2 if not name: - name1 = str_2[index + 2].split('\n')[-1] + name1 = str_2[index + 2].strip().strip('\n').split('\n')[-1] + if str_2[index + 2].strip().endswith(':') or str_2[index + 2].strip().endswith(':'): + name1 = str_2[index + 2].split('\n')[-2] new_str = str_2[index - 2] + str_2[index - 1] + i + str_2[index + 1] + \ str_2[index + 2].split(name1)[0] name = name1 else: - name1 = str_2[index + 2].split('\n')[-1] - if name1: + name1 = str_2[index + 2].strip().strip('\n').split('\n')[-1] + if str_2[index + 2].strip().endswith(':') or str_2[index + 2].strip().endswith(':'): + name1 = str_2[index + 2].split('\n')[-2] + if name1 and '公司' in name1: new_str = name + str_2[index - 1] + i + str_2[index + 1] + str_2[index + 2].split(name1)[0] else: new_str = name + str_2[index - 1] + i + str_2[index + 1] + str_2[index + 2] name = name1 new_str_list.append(new_str) continue - if i.strip() not in ['-', '–', '―'] and ('-' in i or '–' in i or '―' in i): + if i.strip() not in ['-', '–', '―', '—'] and ('-' in i or '–' in i or '―' in i or '—' in i): index1 = index if not name: name1 = i.split('\n')[-1] @@ -784,38 +814,73 @@ def fmt_txt(chk_str): new_str_list2 = new_str_list1[0].split('(', 1) else: new_str_list2 = new_str_list1[0].split(' ', 1) - work_dict['company_name'] = new_str_list2[0] - if ':' in new_str_list2[1]: - work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip() - elif ':' in new_str_list2[1]: - work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip() - elif ')' in new_str_list2[1]: - date_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', new_str_list2[1]) - work_dict['time'] = date_list[0] + '-' + date_list[1] + company_name_str = new_str_list2[0] + if ':' in company_name_str or ':' in company_name_str: + if ':' in company_name_str: + company_name_str = company_name_str.split(':')[-1] + if ':' in company_name_str: + company_name_str = company_name_str.split(':')[-1] + if company_name_str.strip() == '': + company_name_str = new_str_list2[1] + date_time_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', company_name_str) + if len(date_time_list) >= 2: + work_dict['company_name'] = company_name_str.split(date_time_list[0])[0] + work_dict['time'] = date_time_list[0] + '-' + date_time_list[1] else: - work_dict['time'] = new_str_list2[1].replace('.', '/').strip() - if len(new_str_list1) > 1: - if ':' in new_str_list1[1]: - work_dict['position_name'] = new_str_list1[1].split(':')[-1] - if work_dict['position_name']: - work_duty = new_str_list1[3:] - duty1 = new_str_list1[2].split('职责')[-1] - duty = duty1.join((x for x in work_duty)) - work_dict['duty'] = duty - if '负责' in new_str_list1[1]: - duty1 = new_str_list1[1].split('负责')[-1] - duty = duty1.join((x for x in new_str_list1[2:])) - work_dict['duty'] = duty + work_dict['company_name'] = company_name_str + if len(new_str_list2) > 1: + if ':' in new_str_list2[1]: + work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip() + elif ':' in new_str_list2[1]: + work_dict['time'] = new_str_list2[1].split(':')[-1].replace('.', '/').strip() + elif ')' in new_str_list2[1]: + date_list = re.findall('[0-9]{4}[/|.][0-9]{1,2}', new_str_list2[1]) + work_dict['time'] = date_list[0] + '-' + date_list[1] + else: + work_dict['time'] = new_str_list2[1].replace('.', '/').strip() + if work_dict['time'] == work_dict['company_name']: + work_dict['time'] = new_str_list1[1].split(':')[-1].replace('.', '/').strip() + if ' ' in work_dict['time']: + chk_time_str = work_dict['time'] + work_dict['time'] = chk_time_str.split(' ')[0] + work_dict['position_name'] = chk_time_str.split(' ')[-1] + if len(new_str_list1) > 1: + if ':' in new_str_list1[1]: + work_dict['position_name'] = new_str_list1[1].split(':')[-1] + if work_dict['position_name'].replace('.', '/').strip() == work_dict['time']: + work_dict['position_name'] = '' + if work_dict['position_name']: + work_duty = new_str_list1[3:] + duty1 = new_str_list1[2].split('职责')[-1] + duty = duty1.join((x for x in work_duty)) + work_dict['duty'] = duty + if '负责' in new_str_list1[1]: + duty1 = new_str_list1[1].split('负责')[-1] + duty = duty1.join((x for x in new_str_list1[2:])) + work_dict['duty'] = duty + else: + if '工作描述' in work_str and '专业技能' in work_str: + work_dict['duty'] = work_str.split('工作描述')[-1].split('专业技能')[0] + # work_dict['duty'] = work_str.split('工作描述')[-1].split('专业技能')[-1] + else: + for i in new_str_list1: + if '时间' in i: + work_dict['time'] = i.split(':')[-1].strip() + continue + if '职 位' in i: + work_dict['position_name'] = i.split(':')[-1].strip() + continue work_list.append(work_dict) dict_chk['work_list'] = work_list review = '' upgrade = true_chkStr.split('教育经历')[-1] - if '自我评价' in upgrade or '自我描述' in upgrade: + if '自我评价' in upgrade or '自我描述' in upgrade or '⾃我评价' in upgrade: if '自我评价' in upgrade: review = upgrade.split('自我评价')[-1].split('技能特长')[0] - else: - if '自我描述' in upgrade: - review = upgrade.split('自我描述')[-1].split('技能特长')[0] + elif '自我描述' in upgrade: + review = upgrade.split('自我描述')[-1].split('技能特长')[0] + elif '⾃我评价' in upgrade: + review = upgrade.split('⾃我评价')[-1].split('技能特长')[0] else: review_chk = true_chkStr.split('工作经历')[0] if '自我评价' in review_chk: @@ -946,7 +1011,10 @@ def fmt_txt(chk_str): specialty_do_str = upgrade.split('专业技能')[-1].split('工作经验')[0] specialty_do = [i for i in specialty_do_str.split('\n') if i.strip() != ''] if '技能' in upgrade: - specialty_do_str = upgrade.split('技能')[-1].split('项目经验')[0] + if '项目经验' in upgrade: + specialty_do_str = upgrade.split('技能')[-1].split('项目经验')[0] + else: + specialty_do_str = upgrade.split('技能')[-1].split('项目经历')[0] specialty_do = [i.strip('') for i in specialty_do_str.split('\n') if i.strip() not in ['', '']] dict_chk['remembrance'] = remembrance dict_chk['specialty_do'] = specialty_do @@ -2379,4 +2447,4 @@ egreat,海尔,MeleA20,MeleA31,LG1154,极米,杰科,亿典等机顶 """ - fmt_txt(chk_str10) + fmt_txt(chk_str14)