简历批量导入人才库

This commit is contained in:
Àî×ÚÕñ 2022-10-14 17:32:40 +08:00
parent 023ba9f856
commit b5307ed0b6
3 changed files with 296 additions and 26 deletions

View File

@ -2704,8 +2704,8 @@ async def find_column_update(
current_user: schemas.UserDB = Depends(deps.get_current_user)
) -> schemas.Msg:
""" 修改一条查询字段对照表数据 """
data = await crud.api_find_column.update_columns(db, update_data, where={'column_type': column_type})
return schemas.Msg(code=200, msg='ok', data=data)
await crud.api_find_column.update_columns(db, update_data, where={'column_type': column_type})
return schemas.Msg(code=200, msg='ok', data='')
# 报表查询字段对照列表
@ -2773,6 +2773,7 @@ async def currency_interview_form(
}
return schemas.Msg(code=200, msg='ok', data=res_data)
# 通用入职人员分析报表
@router.post("/currency_worker_form")
async def currency_worker_form(
@ -2822,6 +2823,7 @@ async def currency_worker_form(
}
return schemas.Msg(code=200, msg='ok', data=res_data)
# 读取邮箱简历方法
def email_user(dir_name, mail_dict):
file_list = [] # 简历文件

View File

@ -30,7 +30,7 @@ from db import get_database
from db.ckdb import get_ck_db, CKDrive
from datetime import timedelta
from models.interview_zsgc import InterviewDo
from utils import get_time, qujian_time, Download_xlsx, send_str_mail, doc2pdf
from utils import get_time, qujian_time, Download_xlsx, send_str_mail, doc2pdf, png2pdf
router = APIRouter()
@ -709,6 +709,258 @@ async def file_to_hw(
return schemas.Msg(code=400, msg='上传华为云失败或者解析失败', data=None)
# 批量导入简历
@router.post("/files_to_hw")
async def files_to_hw(
request: Request,
file_path: str,
db: CKDrive = Depends(get_ck_db),
current_user: schemas.UserDB = Depends(deps.get_current_user)
) -> schemas.Msg:
""" 批量导入简历 """
insert_list = [] # 插入数据
error_files = [] # 解析错误简历文件
error_to_fw_files = [] # 上传失败简历文件
# 简历初始文档
data_mode = {
"interview_name": "",
"interview_type": 1,
"interview_sign": 0,
"hope_money": "",
"feedback": 0,
"interview_round": 0,
"event_time": datetime.now(),
"name": "",
"phone": "",
"job_name": "",
"hr_name": "",
"work_exp": 0,
"interview_stage": -1,
"owner_name": 2,
"education": 1,
"work_undergo": [],
"project_undergo": [],
"work_list": [],
"school": "",
"at_school": "",
"specialty": "",
"specialty_do": [],
"mmended_state": 0,
"mail": "",
"account": "",
"id_card": "",
"gender": "",
"age": 0,
"gam": "",
"interview_state": 1,
"counts": 1,
"nation": "",
"review": "",
"upgrade": [],
"come_time": "",
"now_money": "",
"men_state": 1,
"teacher_state": 1,
"teacher_back": 1,
"offer_state": 1,
"offer_exam_state": 1,
"notice_state": 1,
"pass_why": 0,
"pass_text": "",
"now_address": "",
"language": [],
"remembrance": [],
"file_url": '',
"hr_manner": 2,
}
for file in os.listdir(file_path):
try:
fn = file_path + '/' + file
end_str = file.split('.')[-1].lower() # 文件类型
# 文件类型支持
if end_str not in ['pdf', 'doc', 'docx', 'png', 'jpg', 'jpeg', 'word']:
continue
if end_str in ['doc', 'docx', 'word']: # doc,docx, word转pdf
fn, fil = doc2pdf(fn, file_path, file)
file = fil
if end_str in ['png', 'jpg', 'jpeg']: # 图片转pdf
fn, fil = png2pdf(file_path, file)
file = fil
data_mode1 = deepcopy(data_mode)
uid = get_uid()
data_mode1['uid'] = uid
# 存数据
chk_txt = getText_pdf(file_path + '/' + file)
data = fmt_txt(chk_txt)
education = data['education']
# 学历int转化
education_int = {
'大专': 1,
'本科': 2,
'研究生': 3,
'博士': 4,
'硕士': 5,
}
if education and isinstance(education, str):
data['education'] = education_int.get(education, 1)
age = data['age']
if not age:
data['age'] = 20
# 年龄int转化
if age and isinstance(age, str):
true_age = re.search(r"\d+\.?\d*", age)
if len(true_age.group()) > 2:
data['age'] = 20
else:
data['age'] = int(true_age.group())
work_exp = data['work_exp']
if not work_exp:
data['work_exp'] = 0
# 工作经验float转化
if work_exp and isinstance(work_exp, str):
true_work_exp = re.search(r"\d+\.?\d*", work_exp)
if len(true_work_exp.group()) > 3:
data['work_exp'] = 0
else:
data['work_exp'] = float(true_work_exp.group())
data_mode1.update(data)
# 转json字符串
if 'remembrance_list' in data_mode1:
remembrance = data_mode1.pop('remembrance_list')
data_mode1['remembrance'] = remembrance
if 'language_list' in data_mode1:
language = data_mode1.pop('language_list')
data_mode1['language'] = language
if 'project_undergo' in data_mode1:
if data_mode1.get('project_undergo', []):
data_mode1['project_undergo'] = [json.dumps(i) for i in data_mode1['project_undergo']]
else:
data_mode1['project_undergo'] = []
if 'work_list' in data_mode1:
if data_mode1.get('work_list', []):
data_mode1['work_list'] = [json.dumps(i) for i in data_mode1['work_list']]
else:
data_mode1['work_list'] = []
if 'language' in data_mode1:
if data_mode1.get('language', []):
data_mode1['language'] = [json.dumps(i) for i in data_mode1['language']]
else:
data_mode1['language'] = []
if 'remembrance' in data_mode1:
if data_mode1.get('remembrance', []):
data_mode1['remembrance'] = [json.dumps(i) for i in data_mode1['remembrance']]
else:
data_mode1['remembrance'] = []
# 字符串转datetime
if data_mode1.get('in_time', ''):
chk_in_time = data_mode1['in_time'].replace('-', '/').replace('.', '/')
if len(chk_in_time.split('/')) == 2:
data_mode1['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m").date())
if len(chk_in_time.split('/')) == 3:
data_mode1['in_time'] = str(datetime.strptime(chk_in_time, "%Y/%m/%d").date())
if data_mode1.get('out_time', ''):
chk_out_time = data_mode1['out_time'].replace('-', '/').replace('.', '/')
if len(chk_out_time.split('/')) == 2:
data_mode1['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m").date())
if len(chk_out_time.split('/')) == 3:
data_mode1['out_time'] = str(datetime.strptime(chk_out_time, "%Y/%m/%d").date())
if data_mode1.get('birthday', ''):
chk_birthday = data_mode1['birthday'].replace('-', '/').replace('.', '/')
if len(chk_birthday.split('/')) == 2:
data_mode1['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m").date())
if len(chk_birthday.split('/')) == 3:
data_mode1['birthday'] = str(datetime.strptime(chk_birthday, "%Y/%m/%d").date())
if data_mode1.get('star_time', ''):
chk_star_time = data_mode1['star_time'].replace('-', '/').replace('.', '/')
if len(chk_star_time.split('/')) == 2:
data_mode1['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m").date())
if len(chk_star_time.split('/')) == 3:
data_mode1['star_time'] = str(datetime.strptime(chk_star_time, "%Y/%m/%d").date())
if data_mode1.get('end_time', ''):
chk_end_time = data_mode1['end_time'].replace('-', '/').replace('.', '/')
if len(chk_end_time.split('/')) == 2:
data_mode1['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m").date())
if len(chk_end_time.split('/')) == 3:
data_mode1['end_time'] = str(datetime.strptime(chk_end_time, "%Y/%m/%d").date())
if data_mode1.get('graduate_time', ''):
chk_graduate = data_mode1['graduate_time'].replace('-', '/').replace('.', '/')
if len(chk_graduate.split('/')) == 2:
data_mode1['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m").date())
if len(chk_graduate.split('/')) == 3:
data_mode1['graduate_time'] = str(datetime.strptime(chk_graduate, "%Y/%m/%d").date())
# 简历查重,姓名,手机号,性别name,phone,gender
find_name = data['name']
find_phone = data['phone']
find_gender = data['gender']
where = {}
if find_name:
where.update({
'name': find_name
})
if find_phone:
where.update({
'phone': find_phone
})
if find_gender:
where.update({
'gender': find_gender
})
whereStr = ''
for key, value in where.items():
if isinstance(value, str):
if not value.strip():
continue
if whereStr:
whereStr += 'and ' + str(key) + ' = ' + "'" + value + "'" + ' '
else:
whereStr += str(key) + ' = ' + "'" + value + "'" + ' '
continue
if whereStr:
whereStr += 'and ' + str(key) + ' = ' + str(value) + ' '
else:
whereStr += str(key) + ' = ' + str(value) + ' '
whereStr = whereStr.strip()
sql = f"select uid from HR.resumes where {whereStr}"
is_in_data = await db.execute(sql)
exist = 0
if is_in_data:
exist = 1
uid = list(is_in_data.values())[0]['uid']
# os.rename(path_data + '/' + filename, path_data + '/' + find_phone + '.pdf')
res = obsClient.putFile('legu-cdn-source', 'hrms/' + uid + '.pdf', fn)
if res.status < 300:
# 地址
url = res.body.objectUrl
data_mode1['file_url'] = url
data['file_url'] = url
if exist:
continue
insert_list.append(data_mode1)
else:
error_to_fw_files.append(file)
continue
except:
error_files.append(file)
continue
sql = f"insert into HR.resumes(interview_name, interview_type, interview_sign, hope_money, feedback," \
f" interview_round, event_time, uid, name, phone, job_name, hr_name, work_exp, interview_stage, owner_name," \
f" education, work_undergo, project_undergo, work_list, school, at_school, specialty, specialty_do, " \
f"mmended_state, mail, account, id_card, gender, age, gam, interview_state, counts, nation, come_time," \
f" review, upgrade, now_money, men_state, teacher_state, teacher_back, offer_state, offer_exam_state," \
f" notice_state, pass_why, pass_text, now_address,language,remembrance, file_url, hr_manner) values"
await db.execute_dict(sql, insert_list)
return schemas.Msg(code=0, msg='ok', data='')
# 导入面试数据
@router.post("/interview_file_insert")
async def interview_file_insert(
@ -2312,9 +2564,6 @@ async def interview_find(
return schemas.Msg(code=200, msg='ok', data=res_data)
@router.post("/interview")
async def interview(
request: Request,
@ -2428,6 +2677,7 @@ async def interviews(
return schemas.Msg(code=200, msg='ok', data=datas)
@router.post("/interview_teacher")
async def interview_teacher(
request: Request,
@ -2503,4 +2753,3 @@ async def hint(
res.append(i)
data = sorted(res, key=operator.itemgetter('times'))
return schemas.Msg(code=200, msg='ok', data=data)

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
# coding:utf-8
import os
import random
import time
import datetime
@ -9,14 +9,13 @@ import pandas as pd
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
from fitz import fitz
from datetime import timedelta
from datetime import datetime as p1
import calendar
from core.config import Settings
def get_uid():
return hex(int(time.time() * 10 ** 7) + random.randint(0, 10000))[2:]
@ -358,6 +357,26 @@ def doc2pdf(fn, path_data, filename):
return path_data + '/' + new_filename + '.pdf', new_filename + '.pdf'
# 图片转pdf
def png2pdf(dir_path, filename):
"""
:param dir_path: 图片所在目录路径
:param filename: 图片文件名
:return: 转化成的dpf的绝对路径
"""
img_path = dir_path + '/' + filename
img_type = filename.split('.')[-1]
new_filename = os.path.basename(img_path).replace(img_type, 'pdf')
doc = fitz.open()
img_doc = fitz.open(img_path)
pdf_bytes = img_doc.convert_to_pdf()
img_pdf = fitz.open('pdf', pdf_bytes)
doc.insert_pdf(img_pdf)
res_path = dir_path + '/' + new_filename
doc.save(res_path)
return res_path, new_filename
if __name__ == '__main__':
pass
# fn=r'C:\Users\Administrator\Desktop\面试简历1\智联招聘_张双琪_Web开发工程师_中文.doc'