导入候选人

This commit is contained in:
Àî×ÚÕñ 2022-07-11 18:52:07 +08:00
parent fa32450898
commit 4ab0064258
4 changed files with 108 additions and 68 deletions

View File

@ -2,16 +2,17 @@ import datetime
import mimetypes import mimetypes
from collections import defaultdict from collections import defaultdict
import time import time
import os
from urllib.parse import quote from urllib.parse import quote
import re import re
from clickhouse_driver import Client from clickhouse_driver import Client
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from fastapi import APIRouter, Depends, Request from fastapi import APIRouter, Depends, Request, File, UploadFile
from motor.motor_asyncio import AsyncIOMotorDatabase from motor.motor_asyncio import AsyncIOMotorDatabase
from pandas import DataFrame from pandas import DataFrame
from starlette.responses import StreamingResponse from starlette.responses import StreamingResponse
from utils.jianli import get_resume
import crud, schemas import crud, schemas
from common import * from common import *
@ -79,6 +80,33 @@ async def interview_insert(
return schemas.Msg(code=200, msg='ok', data=data) return schemas.Msg(code=200, msg='ok', data=data)
# 写入面试数据
@router.post("/interview_file_insert")
async def interview_insert(
request: Request,
file: UploadFile = File(...),
db: CKDrive = Depends(get_ck_db),
) -> schemas.Msg:
""" interview面试数据写入 """
path_data = os.getcwd() + '/jianli' # 当前文件所在的目录
contents = await file.read()
filename = file.filename
try:
with open(path_data + filename, "wb") as f:
# 将获取的file文件内容写入到新文件中
f.write(contents)
f.close()
except:
return schemas.Msg(code=400, msg='上传文件有误', data=None)
insert_data = get_resume(filename, path_data)
sql = f"insert into HR.resumes(interview_name, interview_type, interview_sign, feedback, interview_round, star_time, end_time, event_time, uid, name, phone, job_name, hr_name, work_exp, interview_stage, owner_name, education, work_undergo, school, specialty, mmended_state, mail, account, id_card, gender, interview_state, graduate_time, counts) values"
data = await db.execute_dict(sql, insert_data)
return schemas.Msg(code=200, msg='ok', data=data)
@app.post("/file_upload")
async def file_upload(file: UploadFile = File(...)):
# @router.post("/interview_insert") # @router.post("/interview_insert")
# async def interview_insert( # async def interview_insert(
# request: Request, # request: Request,

View File

@ -9,7 +9,6 @@ from win32com import client as wc
from pdf2docx import Converter from pdf2docx import Converter
# 文件路径 # 文件路径
PATH_DATA = os.path.abspath("C:/Users/Administrator/Desktop/面试简历")
schema = ['姓名', '所在地', '户口所在地', '籍贯', '婚姻状况', '民族', '身高', '电话', 'tel', '应聘职位', '到岗时间', '学历', '毕业学校', '专业', schema = ['姓名', '所在地', '户口所在地', '籍贯', '婚姻状况', '民族', '身高', '电话', 'tel', '应聘职位', '到岗时间', '学历', '毕业学校', '专业',
'期望薪资', '在校时间', '电子邮箱', '工作经验', 'Email', '性别', '年龄' '期望薪资', '在校时间', '电子邮箱', '工作经验', 'Email', '性别', '年龄'
] ]
@ -363,26 +362,27 @@ def fmtList(txtlist, dates):
return dates return dates
def get_resume(): def get_resume(file, path_data):
for root, dirs, files in os.walk(PATH_DATA): # for root, dirs, files in os.walk(PATH_DATA):
for file in files: # 一个file就是一份简历 # for file in files: # 一个file就是一份简历
url = PATH_DATA + f"/{file}"
url = path_data + f"/{file}"
if os.path.splitext(file)[1] == '.pdf': if os.path.splitext(file)[1] == '.pdf':
pdf_docx(PATH_DATA, file) # 转为docx pdf_docx(path_data, file) # 转为docx
name = file.split('.')[0] name = file.split('.')[0]
open_txt = docx.Document(PATH_DATA + f"/{name}.docx") # 打开docx open_txt = docx.Document(path_data + f"/{name}.docx") # 打开docx
os.remove(PATH_DATA + f"/{name}.docx") # 删除生成的文件 os.remove(path_data + f"/{name}.docx") # 删除生成的文件
txt = getText_pdf(url) # 打开pdf格式文件转txt txt = getText_pdf(url) # 打开pdf格式文件转txt
# txt = getText_docx(PATH_DATA + f"\{name}.docx") # txt = getText_docx(PATH_DATA + f"\{name}.docx")
elif os.path.splitext(file)[1] == '.docx': elif os.path.splitext(file)[1] == '.docx':
open_txt = docx.Document(url) # 打开docx将用来读取每一段的内容 open_txt = docx.Document(url) # 打开docx将用来读取每一段的内容
txt = getText_docx(url) # 打开docx格式文件转txt txt = getText_docx(url) # 打开docx格式文件转txt
elif os.path.splitext(file)[1] == '.doc': elif os.path.splitext(file)[1] == '.doc':
doc_docx(PATH_DATA, file) # 转为docx doc_docx(path_data, file) # 转为docx
name = file.split('.')[0] name = file.split('.')[0]
open_txt = docx.Document(PATH_DATA + f"/{name}.docx") # 打开docx open_txt = docx.Document(path_data + f"/{name}.docx") # 打开docx
txt = getText_docx(PATH_DATA + f"/{name}.docx") # 打开docx格式文件转txt txt = getText_docx(path_data + f"/{name}.docx") # 打开docx格式文件转txt
os.remove(PATH_DATA + f"/{name}.docx") # 删除生成的文件 os.remove(path_data + f"/{name}.docx") # 删除生成的文件
ie = Taskflow('information_extraction', schema=schema) # 花费时间会安装文件 ie = Taskflow('information_extraction', schema=schema) # 花费时间会安装文件
# pprint(ie(txt)) # 姓名,电话,电子邮箱,民族,毕业院校,专业,工作经验,婚姻状况 # pprint(ie(txt)) # 姓名,电话,电子邮箱,民族,毕业院校,专业,工作经验,婚姻状况
# 获取的基础数据 # 获取的基础数据
@ -431,4 +431,4 @@ def get_resume():
if __name__ == '__main__': if __name__ == '__main__':
get_resume() get_resume(file, path_data)

View File

@ -0,0 +1,12 @@
路由: /api/v1/itr/interview_file_insert
参数:
文件
返回值:
{
"code": 200,
"msg": "ok",
"data": 1 # 成功添加的数据条数
}