diff --git a/flask_app/routes/偏离表main.py b/flask_app/routes/偏离表main.py index 1a56446..5f2dc14 100644 --- a/flask_app/routes/偏离表main.py +++ b/flask_app/routes/偏离表main.py @@ -5,7 +5,8 @@ import string import time from flask_app.general.doubao import doubao_model -from flask_app.general.format_change import pdf2docx, docx2pdf +from flask_app.general.format_change import pdf2docx, docx2pdf,doc2docx +from flask_app.general.insert_del_pagemark import insert_mark from flask_app.general.json_utils import clean_json_string from flask_app.general.通义千问long import upload_file from flask_app.货物标.截取pdf货物标版 import truncate_pdf_specific_goods @@ -353,10 +354,14 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder) global logger logger = get_global_logger(unique_id) if file_type == 1: # docx + docx_path=file_path pdf_path = docx2pdf(file_path) # 将docx转换为pdf以供后续处理 elif file_type == 2: # pdf + # docx_path=pdf2docx(file_path) + docx_path="" pdf_path = file_path elif file_type == 3: # doc + docx_path=doc2docx(file_path) pdf_path = docx2pdf(file_path) else: logger.error("Unsupported file type provided. Preprocessing halted.") @@ -366,14 +371,17 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder) notice_path=files[0] qualification_file=files[1] procurement_file=files[2] + # invalid_path=docx_path + invalid_path=docx_path if docx_path != "" else pdf_path #可能是pdf docx if not procurement_file: - procurement_file=pdf_path #直接传整份文件 + procurement_file=invalid_path + tech_deviation={} with concurrent.futures.ThreadPoolExecutor() as executor: # 提交任务到线程池 - future_procurement = executor.submit(fetch_procurement_reqs, procurement_file, pdf_path) + future_procurement = executor.submit(fetch_procurement_reqs, procurement_file, invalid_path) time.sleep(1) - future_review = executor.submit(combine_qualification_review, pdf_path, qualification_file, notice_path) + future_review = executor.submit(combine_qualification_review, invalid_path, qualification_file, notice_path) try: # 获取函数执行结果 diff --git a/flask_app/routes/小解析main.py b/flask_app/routes/小解析main.py index 1d6dc49..4106b0b 100644 --- a/flask_app/routes/小解析main.py +++ b/flask_app/routes/小解析main.py @@ -77,8 +77,7 @@ def little_parse_engineering(output_folder, pdf_path): # 假设最后一个文件是需要处理的基础信息文件 baseinfo_file_path = files[-1] if not baseinfo_file_path: - baseinfo_file_path=truncate_pdf_main(pdf_path,output_folder,5)[0] - # baseinfo_file_path=pdf_path #截取失败就传整份文件 + baseinfo_file_path=truncate_pdf_main(pdf_path,output_folder,5)[0] #invalid_path # 上传文件并获取文件 ID file_id = upload_file(baseinfo_file_path) # 注意:以下路径被硬编码,确保该路径存在并且正确 diff --git a/flask_app/货物标/商务服务其他要求提取.py b/flask_app/货物标/商务服务其他要求提取.py index 0a86322..5a9abe9 100644 --- a/flask_app/货物标/商务服务其他要求提取.py +++ b/flask_app/货物标/商务服务其他要求提取.py @@ -7,6 +7,7 @@ from flask_app.general.doubao import read_txt_to_string, pdf2txt from flask_app.general.json_utils import combine_json_results, clean_json_string from flask_app.general.通义千问long import upload_file, qianwen_long_stream from flask_app.货物标.截取pdf货物标版 import extract_common_header, clean_page_content +from flask_app.general.format_change import docx2pdf import concurrent.futures from flask_app.general.doubao import doubao_model @@ -256,7 +257,10 @@ def generate_template(required_keys,full_text, type=1): def get_business_requirements(procurement_path,processed_filepath): required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"] - contained_keys = find_exists(procurement_path, required_keys) + procurement_pdf_path=procurement_path + if procurement_path.lower().endswith('.doc', '.docx'): + procurement_pdf_path=docx2pdf(procurement_path) + contained_keys = find_exists(procurement_pdf_path, required_keys) print(contained_keys) if not contained_keys: return {} diff --git a/flask_app/货物标/技术参数要求提取.py b/flask_app/货物标/技术参数要求提取.py index f283f29..05e6797 100644 --- a/flask_app/货物标/技术参数要求提取.py +++ b/flask_app/货物标/技术参数要求提取.py @@ -373,6 +373,8 @@ def get_technical_requirements(invalid_path,processed_filepath): if '否' in judge_res: model_type = 0 # 使用qianwen-long+invalid_path print("no!调用invalid_path") + if invalid_path.lower().endswith('.pdf'): #确保上传的是docx + invalid_path = pdf2docx(invalid_path) file_id=upload_file(invalid_path) user_query = generate_prompt(judge_res) model_res=qianwen_long(file_id,user_query) diff --git a/flask_app/货物标/提取采购需求main.py b/flask_app/货物标/提取采购需求main.py index 7ed9e66..ad024a2 100644 --- a/flask_app/货物标/提取采购需求main.py +++ b/flask_app/货物标/提取采购需求main.py @@ -12,6 +12,7 @@ from flask_app.货物标.商务服务其他要求提取 import get_business_requ # 获取采购清单 def fetch_procurement_reqs(procurement_path, invalid_path): + #procurement_path可能是pdf\docx # 定义默认的 procurement_reqs 字典 DEFAULT_PROCUREMENT_REQS = { "采购需求": "",