导入候选人

This commit is contained in:
Àî×ÚÕñ 2022-07-11 18:52:07 +08:00
parent fa32450898
commit 4ab0064258
4 changed files with 108 additions and 68 deletions

View File

@ -2,16 +2,17 @@ import datetime
import mimetypes
from collections import defaultdict
import time
import os
from urllib.parse import quote
import re
from clickhouse_driver import Client
import pandas as pd
import numpy as np
from fastapi import APIRouter, Depends, Request
from fastapi import APIRouter, Depends, Request, File, UploadFile
from motor.motor_asyncio import AsyncIOMotorDatabase
from pandas import DataFrame
from starlette.responses import StreamingResponse
from utils.jianli import get_resume
import crud, schemas
from common import *
@ -79,6 +80,33 @@ async def interview_insert(
return schemas.Msg(code=200, msg='ok', data=data)
# 写入面试数据
@router.post("/interview_file_insert")
async def interview_insert(
request: Request,
file: UploadFile = File(...),
db: CKDrive = Depends(get_ck_db),
) -> schemas.Msg:
""" interview面试数据写入 """
path_data = os.getcwd() + '/jianli' # 当前文件所在的目录
contents = await file.read()
filename = file.filename
try:
with open(path_data + filename, "wb") as f:
# 将获取的file文件内容写入到新文件中
f.write(contents)
f.close()
except:
return schemas.Msg(code=400, msg='上传文件有误', data=None)
insert_data = get_resume(filename, path_data)
sql = f"insert into HR.resumes(interview_name, interview_type, interview_sign, feedback, interview_round, star_time, end_time, event_time, uid, name, phone, job_name, hr_name, work_exp, interview_stage, owner_name, education, work_undergo, school, specialty, mmended_state, mail, account, id_card, gender, interview_state, graduate_time, counts) values"
data = await db.execute_dict(sql, insert_data)
return schemas.Msg(code=200, msg='ok', data=data)
@app.post("/file_upload")
async def file_upload(file: UploadFile = File(...)):
# @router.post("/interview_insert")
# async def interview_insert(
# request: Request,

View File

@ -9,7 +9,6 @@ from win32com import client as wc
from pdf2docx import Converter
# 文件路径
PATH_DATA = os.path.abspath("C:/Users/Administrator/Desktop/面试简历")
schema = ['姓名', '所在地', '户口所在地', '籍贯', '婚姻状况', '民族', '身高', '电话', 'tel', '应聘职位', '到岗时间', '学历', '毕业学校', '专业',
'期望薪资', '在校时间', '电子邮箱', '工作经验', 'Email', '性别', '年龄'
]
@ -363,26 +362,27 @@ def fmtList(txtlist, dates):
return dates
def get_resume():
for root, dirs, files in os.walk(PATH_DATA):
for file in files: # 一个file就是一份简历
url = PATH_DATA + f"/{file}"
def get_resume(file, path_data):
# for root, dirs, files in os.walk(PATH_DATA):
# for file in files: # 一个file就是一份简历
url = path_data + f"/{file}"
if os.path.splitext(file)[1] == '.pdf':
pdf_docx(PATH_DATA, file) # 转为docx
pdf_docx(path_data, file) # 转为docx
name = file.split('.')[0]
open_txt = docx.Document(PATH_DATA + f"/{name}.docx") # 打开docx
os.remove(PATH_DATA + f"/{name}.docx") # 删除生成的文件
open_txt = docx.Document(path_data + f"/{name}.docx") # 打开docx
os.remove(path_data + f"/{name}.docx") # 删除生成的文件
txt = getText_pdf(url) # 打开pdf格式文件转txt
# txt = getText_docx(PATH_DATA + f"\{name}.docx")
elif os.path.splitext(file)[1] == '.docx':
open_txt = docx.Document(url) # 打开docx将用来读取每一段的内容
txt = getText_docx(url) # 打开docx格式文件转txt
elif os.path.splitext(file)[1] == '.doc':
doc_docx(PATH_DATA, file) # 转为docx
doc_docx(path_data, file) # 转为docx
name = file.split('.')[0]
open_txt = docx.Document(PATH_DATA + f"/{name}.docx") # 打开docx
txt = getText_docx(PATH_DATA + f"/{name}.docx") # 打开docx格式文件转txt
os.remove(PATH_DATA + f"/{name}.docx") # 删除生成的文件
open_txt = docx.Document(path_data + f"/{name}.docx") # 打开docx
txt = getText_docx(path_data + f"/{name}.docx") # 打开docx格式文件转txt
os.remove(path_data + f"/{name}.docx") # 删除生成的文件
ie = Taskflow('information_extraction', schema=schema) # 花费时间会安装文件
# pprint(ie(txt)) # 姓名,电话,电子邮箱,民族,毕业院校,专业,工作经验,婚姻状况
# 获取的基础数据
@ -431,4 +431,4 @@ def get_resume():
if __name__ == '__main__':
get_resume()
get_resume(file, path_data)

View File

@ -0,0 +1,12 @@
路由: /api/v1/itr/interview_file_insert
参数:
文件
返回值:
{
"code": 200,
"msg": "ok",
"data": 1 # 成功添加的数据条数
}