From 232c7e850d5f2d43faa3bc2e0e68fbb781ffd8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=80=C3=AE=C3=97=C3=9A=C3=95=C3=B1?= Date: Mon, 22 Aug 2022 09:47:34 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/api_v1/endpoints/interview.py | 47 ++++++- models/interview_zsgc.py | 7 +- utils/jianli.py | 212 +++++++++++++++++++++++++++++- 3 files changed, 257 insertions(+), 9 deletions(-) diff --git a/api/api_v1/endpoints/interview.py b/api/api_v1/endpoints/interview.py index 613fb00..17bae10 100644 --- a/api/api_v1/endpoints/interview.py +++ b/api/api_v1/endpoints/interview.py @@ -419,19 +419,62 @@ async def interview_file_insert( if len(true_d_list) == 2: chk_list = deepcopy(true_d_list) chkdict[chk_list[0]] = chk_list[1] - true_dict = deepcopy(chkdict) + dp_dict = { + 'time': '', + 'name': '', + 'work': '', + 'comment': '', + 'duty': '', + } + + true_dict = deepcopy(dp_dict) + for key, v in chkdict.items(): + if key == '项目名称': + true_dict['name'] = v + continue + if key == '开发周期': + true_dict['time'] = v + continue + if key == '项目描述': + true_dict['comment'] = v + continue + if key in ['技术要点', '功能模块']: + true_dict['work'] += v + continue + if key == '职务': + true_dict['duty'] = v + continue + # true_dict = deepcopy(chkdict) true_upgrade.append(true_dict) + work_list = data['work_list'] + language = data['language'] + remembrance = data['remembrance'] true_work = [] + language_list = [] + remembrance_list = [] if work_list: for dstr in work_list: data1 = eval(dstr) dictdata = deepcopy(data1) true_work.append(dictdata) + if language: + for dstr in work_list: + data1 = eval(dstr) + dictdata = deepcopy(data1) + language_list.append(dictdata) + if remembrance: + for dstr in work_list: + data1 = eval(dstr) + dictdata = deepcopy(data1) + remembrance_list.append(dictdata) + res_data = { 'data': data, 'project_undergo': true_upgrade, - 'work_list': true_work + 'work_list': true_work, + 'language_list': language_list, + 'remembrance_list': remembrance_list } return schemas.Msg(code=200, msg='ok', data=res_data) else: diff --git a/models/interview_zsgc.py b/models/interview_zsgc.py index 422b756..bf913df 100644 --- a/models/interview_zsgc.py +++ b/models/interview_zsgc.py @@ -112,7 +112,10 @@ class InterviewDo: "notice_state": 1, "pass_why": 0, "pass_text": [], - "file_url": '', + "language": [], + "remembrance": [], + "birthday": '', + "file_url": '' } insert_data = [] @@ -127,7 +130,7 @@ class InterviewDo: f" education, work_undergo, project_undergo, work_list, school, at_school, specialty, specialty_do, " \ f"mmended_state, mail, account, id_card, gender, age, gam, interview_state, counts, nation, come_time," \ f" review, upgrade, now_money, men_state, teacher_state, teacher_back, offer_state, offer_exam_state," \ - f" notice_state, pass_why, pass_text, file_url) values" + f" notice_state, pass_why, pass_text,language,remembrance,birthday, file_url) values" now_time = datetime.datetime.now() for data in insert_data: s1 = data['event_time'] diff --git a/utils/jianli.py b/utils/jianli.py index 1ed79d6..7dbe506 100644 --- a/utils/jianli.py +++ b/utils/jianli.py @@ -103,9 +103,121 @@ def chkworlkandtime(listdata): ress = [] if res != {}: for i in range(len(res['公司名'])): + company_name = '' + position_name = '' + duty = '' + if '公司名' in res: + company_name = res['公司名'][i]['text'] + if '职责' in res: + position_name = res['职责'][i]['text'] + if '工作内容' in res: + duty = res['工作内容'][i]['text'] date = { - 'name': res['公司名'][i]['text'], - 'time': res['时间'][i]['text'] + 'company_name ': company_name, + 'position_name': position_name, + 'duty': duty + } + ress.append(str(date)) + return ress + + +def chkworlkandtime1(listdata): + """ + 获取语言能力中语言类型和掌握程度,听说,读写 + :param dictdata: + :return:返回列表格式 + """ + res = {} + for i in listdata: + for key, datalist in i.items(): + trueDict = {} + for data in datalist: + if data['text'] in trueDict: + if data['probability'] <= trueDict[data['text']]['probability']: + continue + trueDict.update({ + data['text']: { + 'end': data['end'], + 'probability': data['probability'], + 'start': data['start'], + } + }) + trueList = [] + for key1, value1 in trueDict.items(): + value1.update({ + 'text': key1 + }) + trueDict1 = copy.deepcopy(value1) + trueList.append(trueDict1) + trueList.sort(key=lambda item: item['start']) + res.update({key: trueList}) + ress = [] + if res != {}: + for i in range(len(res['语言'])): + language_name = '' + has_sleep = '' + reading = '' + writing = '' + if '语言' in res: + language_name = res['语言'][i]['text'] + if '掌握程度' in res: + has_sleep = res['掌握程度'][i]['text'] + if '听说' in res: + reading = res['听说'][i]['text'] + if '读写' in res: + writing = res['读写'][i]['text'] + date = { + 'language_name ': language_name, + 'has_sleep ': has_sleep, + 'reading': reading, + 'writing': writing + } + ress.append(str(date)) + return ress + + +def chkworlkandtime2(listdata): + """ + 获取获奖经历中奖项名称和获奖时间 + :param dictdata: + :return:返回列表格式 + """ + res = {} + for i in listdata: + for key, datalist in i.items(): + trueDict = {} + for data in datalist: + if data['text'] in trueDict: + if data['probability'] <= trueDict[data['text']]['probability']: + continue + trueDict.update({ + data['text']: { + 'end': data['end'], + 'probability': data['probability'], + 'start': data['start'], + } + }) + trueList = [] + for key1, value1 in trueDict.items(): + value1.update({ + 'text': key1 + }) + trueDict1 = copy.deepcopy(value1) + trueList.append(trueDict1) + trueList.sort(key=lambda item: item['start']) + res.update({key: trueList}) + ress = [] + if res != {}: + for i in range(len(res['奖项名'])): + prize_name = '' + prize_time = '' + if '奖项名' in res: + prize_name = res['公司名'][i]['text'] + if '时间' in res: + prize_time = res['时间'][i]['text'] + date = { + 'prize_name ': prize_name, + 'prize_time': prize_time, } ress.append(str(date)) return ress @@ -224,6 +336,16 @@ def get_date(schema, dates, schema_dict): clash(date, 'phone', 'tels') work_exp = date['work_exp'] + if not work_exp: + date['work_exp'] = 0 + # 工作经验float转化 + if work_exp and isinstance(work_exp, str): + true_work_exp = re.search(r"\d+\.?\d*", work_exp) + if len(true_work_exp.group()) > 3: + work_exp = 0 + else: + work_exp = float(true_work_exp.group()) + if 1 <= work_exp < 3: date['work_exp'] = 1 if 3 <= work_exp < 5: @@ -236,7 +358,7 @@ def get_date(schema, dates, schema_dict): def fmtTxt(txt, istable=0): # 所有关键字 chkStr = ['自我评价', '自我描述', '个人优势', '项目经历', '项目经验', '项目描述', '教育经历', '学习经历', '工作经历', '工作经验', '实习经历', - '技能特长', '技能', '特长', '专长', '技能专长', '专业技能', '职业技能', '个人评价'] + '技能特长', '技能', '特长', '专长', '技能专长', '专业技能', '职业技能', '个人评价', '语言', '获奖', '证书', '获奖记录', '获奖经历'] # 自我描述 chkList1 = ['自我评价', '自我描述', '个人优势', '个人评价'] # 项目经验 @@ -247,6 +369,10 @@ def fmtTxt(txt, istable=0): chkList4 = ['工作经历', '工作经验', '实习经历'] # 个人技能 chkList5 = ['技能特长', '技能', '特长', '专长', '技能专长', '专业技能', '职业技能'] + # 语言能力 + chkList6 = ['语言'] + # 获奖经历 + chkList7 = ['获奖', '证书', '获奖记录', '获奖经历'] fmtList = [] # 返回拼接好的字符串列表 trueIndex = 0 fmtStr = '' @@ -309,9 +435,35 @@ def fmtTxt(txt, istable=0): continue # 个人技能 for i in chkList5: + # 判断是不是以关键字开头 + if not text.startswith(i, 0): + continue + else: + if i in text: + fmtStr = text + nowChkList = [chk for chk in chkStr if chk not in chkList5] + stop_int = 1 + break + if fmtStr: + continue + # 语言能力 + for i in chkList6: + # 判断是不是以关键字开头 + if not text.startswith(i, 0): + continue + else: + if i in text: + fmtStr = text + nowChkList = [chk for chk in chkStr if chk not in chkList6] + stop_int = 1 + break + if fmtStr: + continue + # 获奖经历 + for i in chkList7: if i in text: fmtStr = text - nowChkList = [chk for chk in chkStr if chk not in chkList5] + nowChkList = [chk for chk in chkStr if chk not in chkList7] stop_int = 1 break continue @@ -349,6 +501,8 @@ def fmtList(txtlist, dates): chkList3 = ['教育经历', '学习经历'] chkList4 = ['工作经历', '工作经验', '实习经历'] chkList5 = ['技能特长', '技能', '特长', '专长', '技能专长', '专业技能', '职业技能'] + chkList6 = ['语言'] + chkList7 = ['获奖', '证书', '获奖记录', '获奖经历'] # 自我评价 review = [] # 项目经验 @@ -359,6 +513,10 @@ def fmtList(txtlist, dates): upgrade = [] # 技能特长 specialty = [] + # 语言能力 + language = [] + # 获奖经历 + remembrance = [] for text in txtlist: ischk = 0 # 自我评价 @@ -401,16 +559,58 @@ def fmtList(txtlist, dates): break if ischk: continue + # 语言能力 + for i in chkList6: + if i in text: + language.append(text) + ischk = 1 + break + if ischk: + continue + # 获奖经历 + for i in chkList7: + if i in text: + remembrance.append(text) + ischk = 1 + break + if ischk: + continue # 取出工作经验里面的公司名和时间 work_list = [] if len(work) > 0: works = '' for i in work: works += i - schema = ['公司名', '时间'] + schema = ['公司名', '职责', '工作内容'] ie = Taskflow('information_extraction', schema=schema) text_lists = ie(works) work_list = chkworlkandtime(text_lists) + else: + work_list = ["{'company_name ': '','position_name': '','duty': ''}"] + # 取出获奖经历里面的公司名和时间 + remembrance_list = [] + if len(remembrance) > 0: + remembrances = '' + for i in remembrance: + remembrances += i + schema = ['奖项名', '时间'] + ie = Taskflow('information_extraction', schema=schema) + text_lists = ie(remembrances) + remembrance_list = chkworlkandtime2(text_lists) + else: + remembrance_list = ["{'prize_name ': '', 'prize_time': '', }"] + # 取出语言能力里面的语言,掌握程度,听说,读写 + language_list = [] + if len(language) > 0: + works = '' + for i in work: + works += i + schema = ['语言', '掌握程度', '听说', '读写'] + ie = Taskflow('information_extraction', schema=schema) + text_lists = ie(works) + language_list = chkworlkandtime1(text_lists) + else: + language_list = ["{'language_name ': '', 'has_sleep ': '', 'reading': '', 'writing': ''}"] # review自我评价, project项目经验,work工作经验,work具体工作的公司和时间,upgrade教育经历,specialty技能特长 dates.update({ 'review': review, @@ -419,6 +619,8 @@ def fmtList(txtlist, dates): 'work_list': work_list, 'upgrade': upgrade, 'specialty_do': specialty, + 'language': language_list, + 'remembrance': remembrance_list, }) return dates