diff --git a/flask_app/货物标/商务服务其他要求提取.py b/flask_app/货物标/商务服务其他要求提取.py index c935778..f7bac2b 100644 --- a/flask_app/货物标/商务服务其他要求提取.py +++ b/flask_app/货物标/商务服务其他要求提取.py @@ -7,7 +7,7 @@ from flask_app.general.doubao import read_txt_to_string, pdf2txt from flask_app.general.json_utils import combine_json_results, clean_json_string from flask_app.general.通义千问long import upload_file, qianwen_long_stream from flask_app.货物标.截取pdf货物标版 import extract_common_header, clean_page_content -from flask_app.general.format_change import docx2pdf +from flask_app.general.format_change import docx2pdf, pdf2docx import concurrent.futures from flask_app.general.doubao import doubao_model @@ -255,26 +255,60 @@ def generate_template(required_keys,full_text, type=1): user_query_template += f"\n\n文件内容:{full_text}" return user_query_template -def get_business_requirements(procurement_path,processed_filepath): - required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"] - procurement_pdf_path=procurement_path + +def get_business_requirements(procurement_path, processed_filepath, model_type): + required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求", + "总\s*体\s*要\s*求", "建\s*设\s*要\s*求", "进\s*度\s*要\s*求", "工\s*期\s*要\s*求", + "质\s*保\s*要\s*求", "培\s*训\s*要\s*求", "售\s*后\s*要\s*求"] + + # 将 doc/docx 转换为 pdf + procurement_pdf_path = procurement_path if procurement_path.lower().endswith(('.doc', '.docx')): procurement_pdf_path = docx2pdf(procurement_path) + + # 查找包含的关键词 contained_keys = find_exists(procurement_pdf_path, required_keys) print(contained_keys) if not contained_keys: return {} - # queries = generate_queries(truncate_file, contained_keys) + + # 读取文件全文 full_text = read_txt_to_string(processed_filepath) + + # 生成业务查询和技术查询 busi_user_query = generate_template(contained_keys, full_text, 1) tech_user_query = generate_template(contained_keys, full_text, 2) - final_res={} + + # 初始化结果存储 + final_res = {} + + # 如果是非模型调用,需要提前上传文件并获取 file_id + file_id = None + if not model_type: + procurement_docx_path=procurement_path + if procurement_path.lower().endswith('.pdf'): + procurement_docx_path = pdf2docx(procurement_path) + file_id = upload_file(procurement_docx_path) # 只上传一次文件,避免冗余调用 + + # 并行处理业务和技术查询 with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: futures = [] if busi_user_query: - futures.append(executor.submit(doubao_model, busi_user_query)) + if model_type: + # 如果是模型调用,直接使用 doubao_model + futures.append(executor.submit(doubao_model, busi_user_query)) + else: + # 使用 qianwen_long_stream 并传入 file_id + futures.append(executor.submit(qianwen_long_stream, file_id, busi_user_query, 2, 1)) + if tech_user_query: - futures.append(executor.submit(doubao_model, tech_user_query)) + if model_type: + # 如果是模型调用,直接使用 doubao_model + futures.append(executor.submit(doubao_model, tech_user_query)) + else: + # 使用 qianwen_long_stream 并传入 file_id + futures.append(executor.submit(qianwen_long_stream, file_id, tech_user_query, 2, 1)) + # 获取结果 for future in concurrent.futures.as_completed(futures): try: @@ -283,6 +317,7 @@ def get_business_requirements(procurement_path,processed_filepath): final_res.update(clean_json_string(result)) except Exception as e: print(f"An error occurred: {e}") + return final_res diff --git a/flask_app/货物标/提取采购需求main.py b/flask_app/货物标/提取采购需求main.py index 0cec0a2..037477d 100644 --- a/flask_app/货物标/提取采购需求main.py +++ b/flask_app/货物标/提取采购需求main.py @@ -47,7 +47,7 @@ def fetch_procurement_reqs(procurement_path, invalid_path): # 提交任务给线程池 future_technical = executor.submit(get_technical_requirements, invalid_path, processed_filepath,model_type) time.sleep(0.5) # 保持原有的延时 - future_business = executor.submit(get_business_requirements, procurement_path, processed_filepath) + future_business = executor.submit(get_business_requirements, procurement_path, processed_filepath,model_type) # 获取并行任务的结果 technical_requirements = future_technical.result() business_requirements = future_business.result()