11.16

2024-11-16 16:14:53 +08:00 · 2024-11-16 16:14:53 +08:00 · ca9e995d2e
commit ca9e995d2e
parent 22a8b5676a
5 changed files with 77 additions and 80 deletions
--- a/flask_app/general/doubao.py
+++ b/flask_app/general/doubao.py
@ -46,6 +46,7 @@ def read_txt_to_string(file_path):
    except Exception as e:
        return f"错误：读取文件时发生错误。详细信息：{e}"
 def doubao_model(full_user_query):
    print("call doubao...")
    # 相关参数
    url = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
    api_key = "ad0c363f-1f23-4b13-aba3-698a4f8c3eb8"
@ -64,7 +65,7 @@ def doubao_model(full_user_query):
                "content": full_user_query
            }
        ],
-        "temperature":0.5
+        "temperature":0.3
    }
    response = requests.post(url, headers=headers, json=data)
--- a/flask_app/货物标/基础信息解析main.py
+++ b/flask_app/货物标/基础信息解析main.py
@ -176,7 +176,7 @@ def get_base_info(merged_baseinfo_path,clause_path):
    #         baseinfo_list.append(clean_json_string(response))
    return baseinfo_list
-def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath,clause_path,invalid_path):
+def combine_basic_info(merged_baseinfo_path, procurement_path,clause_path,invalid_path):
    baseinfo_list = []
    temp_list = []
    procurement_reqs = {}
@ -187,7 +187,7 @@ def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpat
    # 定义一个线程函数来获取采购需求
    def fetch_procurement_reqs_thread():
        nonlocal procurement_reqs
-        procurement_reqs = fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path)
+        procurement_reqs = fetch_procurement_reqs(procurement_path,invalid_path)
    # 创建并启动获取基础信息的线程
    thread1 = threading.Thread(target=get_base_info_thread)
    thread1.start()
--- a/flask_app/货物标/技术参数要求提取.py
+++ b/flask_app/货物标/技术参数要求提取.py
@ -202,7 +202,8 @@ def get_technical_requirements(file_path,invalid_path):
                    6.最后一级键内值留空或填'未知'（如数量较多或未知内容）。
                    特殊情况处理：
-                        同一层级下同名但采购要求不同的货物，以'货物名-编号'区分，编号从1递增。
+                        1.同一层级下同名但采购要求不同的货物，以'货物名-编号'区分，编号从1递增。
                        2.对于工程施工、建设相关的采购需求，你无需提取，提取的范围仅限软件、硬件，。
                    示例输出结构：
                    {{
@ -276,73 +277,74 @@ def get_technical_requirements(file_path,invalid_path):
        file_id=upload_file(invalid_path)
        print("调用invalid_path")
        model_res=qianwen_long(file_id,prompt_template1)
        print(model_res)
    else:
        user_query=generate_full_user_query(file_path,prompt_template2)
        model_res=doubao_model(user_query)
        # model_res = qianwen_long(file_id,prompt_template1)
        print(model_res)
    # res = qianwen_long(file_id, user_query1)
-    cleaned_res = clean_json_string(model_res)     #转字典
+#     cleaned_res = clean_json_string(model_res)     #转字典
-    keys_list,good_list,grouped_paths,no_keys_added= generate_key_paths(cleaned_res['采购需求'])  # 提取需要采购的货物清单 key_list：交通监控视频子系统.高清视频抓拍像机 ...
+#     keys_list,good_list,grouped_paths,no_keys_added= generate_key_paths(cleaned_res['采购需求'])  # 提取需要采购的货物清单 key_list：交通监控视频子系统.高清视频抓拍像机 ...
-    if no_keys_added:
+#     if no_keys_added:
-        final_res = postprocess(cleaned_res)
+#         final_res = postprocess(cleaned_res)
-    else:
+#     else:
-        # user_query_template = "请你根据该货物标中采购要求部分的内容，请你给出\"{}\"的技术参数（或采购要求），请以json格式返回结果，外层键名为\"{}\", 键值对中的键是你对该要求的总结，而值需要完全与原文保持一致，不可擅自总结删减。"
+#         # user_query_template = "请你根据该货物标中采购要求部分的内容，请你给出\"{}\"的技术参数（或采购要求），请以json格式返回结果，外层键名为\"{}\", 键值对中的键是你对该要求的总结，而值需要完全与原文保持一致，不可擅自总结删减。"
-        user_query_template = """
+#         user_query_template = """
-请你根据该货物标中采购要求部分的内容，请你给出\"{}\"的技术参数（或采购要求），请以json格式返回结果，键名为\"{}\", 键值为一个列表，列表中包含若干描述\"{}\"的技术参数（或采购要求）的字符串，需与原文完全一致，即若技术参数前存在序号也要保留，但你不可擅自增添或删减。以下为需要考虑的特殊情况：如果该货物没有相关采购要求或技术参数要求，键值为空列表。示例输出格式如下：
+# 请你根据该货物标中采购要求部分的内容，请你给出\"{}\"的技术参数（或采购要求），请以json格式返回结果，键名为\"{}\", 键值为一个列表，列表中包含若干描述\"{}\"的技术参数（或采购要求）的字符串，需与原文完全一致，即若技术参数前存在序号也要保留，但你不可擅自增添或删减。以下为需要考虑的特殊情况：如果该货物没有相关采购要求或技术参数要求，键值为空列表。示例输出格式如下：
-{{
+# {{
-    "摄像机控制键盘": [
+#     "摄像机控制键盘": [
-        "1、支持串行 RS232/RS422 和 IP 混合控制，允许在一个控制器上使用 RS232/RS422/IP 控制单个系统中的摄像机；",
+#         "1、支持串行 RS232/RS422 和 IP 混合控制，允许在一个控制器上使用 RS232/RS422/IP 控制单个系统中的摄像机；",
-        "2、支持 2 组 RS422 串口 VISCA 协议菊花链控制 2x7 台摄像机。"
+#         "2、支持 2 组 RS422 串口 VISCA 协议菊花链控制 2x7 台摄像机。"
-    ]
+#     ]
-}}
+# }}
-            """
+#             """
-        user_query_template_two="""
+#         user_query_template_two="""
-请你根据该货物标中采购要求部分的内容，请你给出\"{}\"的技术参数（或采购要求），由于该货物存在多种不同的采购要求或技术参数，请你请逐一列出，请以json格式返回结果，请你以'货物名-编号'区分多种型号，编号为从 1 开始的自然数，依次递增，即第一个键名为\"{}-1\", 键值为一个列表，列表中包含若干描述\"{}\"的技术参数（或采购要求）的字符串，需与原文完全一致，即若技术参数前存在序号也要保留，但你不可擅自增添或删减。示例输出格式如下：
+# 请你根据该货物标中采购要求部分的内容，请你给出\"{}\"的技术参数（或采购要求），由于该货物存在多种不同的采购要求或技术参数，请你请逐一列出，请以json格式返回结果，请你以'货物名-编号'区分多种型号，编号为从 1 开始的自然数，依次递增，即第一个键名为\"{}-1\", 键值为一个列表，列表中包含若干描述\"{}\"的技术参数（或采购要求）的字符串，需与原文完全一致，即若技术参数前存在序号也要保留，但你不可擅自增添或删减。示例输出格式如下：
-{{
+# {{
-    "交换机-1": [
+#     "交换机-1": [
-        "1、支持固化千兆电口≥8 个，固化千兆光口≥2 个，桌面型设备;",
+#         "1、支持固化千兆电口≥8 个，固化千兆光口≥2 个，桌面型设备;",
-        "2、支持静态链路聚合"
+#         "2、支持静态链路聚合"
-    ]
+#     ]
-    "交换机-2":[
+#     "交换机-2":[
-        "1、交换容量≥52Gbps，包转发率≥38.69Mpps，",
+#         "1、交换容量≥52Gbps，包转发率≥38.69Mpps，",
-        "2、提供国家强制性产品认证证书及测试报告（3C）"
+#         "2、提供国家强制性产品认证证书及测试报告（3C）"
-    ]
+#     ]
-}}
+# }}
-        """
+#         """
-        queries = []
+#         queries = []
-        for key in keys_list:
+#         for key in keys_list:
-            # 将键中的 '.' 替换为 '下的'
+#             # 将键中的 '.' 替换为 '下的'
-            modified_key = key.replace('.', '下的')
+#             modified_key = key.replace('.', '下的')
-            # 使用修改后的键填充第一个占位符，原始键填充第二个占位符
+#             # 使用修改后的键填充第一个占位符，原始键填充第二个占位符
-            new_query = user_query_template.format(modified_key, key, modified_key)
+#             new_query = user_query_template.format(modified_key, key, modified_key)
-            queries.append(new_query)
+#             queries.append(new_query)
-
+#
-            # 处理 grouped_paths 中的项，应用 user_query_template_two
+#             # 处理 grouped_paths 中的项，应用 user_query_template_two
-        for grouped_key in grouped_paths:
+#         for grouped_key in grouped_paths:
-            # 将键中的 '.' 替换为 '下的'
+#             # 将键中的 '.' 替换为 '下的'
-            modified_grouped_key = grouped_key.replace('.', '下的')
+#             modified_grouped_key = grouped_key.replace('.', '下的')
-            # 使用修改后的键填充第一个占位符，原始键填充第二个占位符
+#             # 使用修改后的键填充第一个占位符，原始键填充第二个占位符
-            new_query = user_query_template_two.format(modified_grouped_key, grouped_key, modified_grouped_key)
+#             new_query = user_query_template_two.format(modified_grouped_key, grouped_key, modified_grouped_key)
-            queries.append(new_query)
+#             queries.append(new_query)
-        results = multi_threading(queries, "", file_id, 2)
+#         results = multi_threading(queries, "", file_id, 2)
-        technical_requirements = []
+#         technical_requirements = []
-        if not results:
+#         if not results:
-            print("errror!未获得大模型的回答！")
+#             print("errror!未获得大模型的回答！")
-        else:
+#         else:
-            # 打印结果
+#             # 打印结果
-            for question, response in results:
+#             for question, response in results:
-                technical_requirements.append(response)
+#                 technical_requirements.append(response)
-        technical_requirements_combined_res = combine_json_results(technical_requirements)
+#         technical_requirements_combined_res = combine_json_results(technical_requirements)
-
+#
-        """根据所有键是否已添加处理技术要求"""
+#         """根据所有键是否已添加处理技术要求"""
-        # 更新原始采购需求字典
+#         # 更新原始采购需求字典
-        final_res=combine_and_update_results(cleaned_res['采购需求'], technical_requirements_combined_res)
+#         final_res=combine_and_update_results(cleaned_res['采购需求'], technical_requirements_combined_res)
-        # final_res = postprocess(cleaned_res)
+#         # final_res = postprocess(cleaned_res)
-        final_res["货物列表"] = good_list
+#         final_res["货物列表"] = good_list
-
+#
-    # 输出最终的 JSON 字符串
+#     # 输出最终的 JSON 字符串
-    return {"采购需求": final_res}
+#     return {"采购需求": final_res}
 def test_all_files_in_folder(input_folder, output_folder):
    # 确保输出文件夹存在
--- a/flask_app/货物标/提取采购需求main.py
+++ b/flask_app/货物标/提取采购需求main.py
@ -7,7 +7,8 @@ from flask_app.货物标.商务服务其他要求提取 import get_business_requ
 #获取采购清单
-def fetch_procurement_reqs(procurement_path, procurement_docpath, invalid_path):
+def fetch_procurement_reqs(procurement_path, invalid_path):
    # procurement_docpath = pdf2docx(procurement_path)  # 采购需求docx
    # 定义默认的 procurement_reqs 字典
    DEFAULT_PROCUREMENT_REQS = {
        "采购需求": "",
@ -22,8 +23,6 @@ def fetch_procurement_reqs(procurement_path, procurement_docpath, invalid_path):
        return DEFAULT_PROCUREMENT_REQS.copy()
    try:
        # 上传文件并获取 file_id
        # file_id = upload_file(procurement_docpath)
        # 使用 ThreadPoolExecutor 并行处理 get_technical_requirements 和 get_business_requirements
        with concurrent.futures.ThreadPoolExecutor() as executor:
@ -63,7 +62,7 @@ if __name__ == "__main__":
    procurement_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\a110ed59-00e8-47ec-873a-bd4579a6e628\\ztbfile_procurement.pdf"
    procurement_docpath="C:\\Users\\Administrator\\Desktop\\fsdownload\\a110ed59-00e8-47ec-873a-bd4579a6e628\\ztbfile_procurement.docx"
    invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\a110ed59-00e8-47ec-873a-bd4579a6e628\\ztbfile.pdf"
-    res=fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path)
+    res=fetch_procurement_reqs(procurement_path,invalid_path)
    print(json.dumps(res, ensure_ascii=False, indent=4))
    end_time=time.time()
    print("耗时:"+str(end_time-start_time))
--- a/flask_app/货物标/货物标解析main.py
+++ b/flask_app/货物标/货物标解析main.py
@ -54,7 +54,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
    invalid_path=pdf_path
    invalid_docpath = docx_path  # docx截取无效标部分
    procurement_path = truncate_files[5]  # 采购需求
    procurement_docpath=pdf2docx(procurement_path) # 采购需求docx
    evaluation_method_path = truncate_files[1]  # 评标办法
    qualification_path = truncate_files[2]  # 资格审查
    tobidders_notice_path = truncate_files[4]  # 投标人须知正文
@ -70,7 +69,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
        'invalid_path': invalid_path,
        'output_folder': output_folder,
        'procurement_path': procurement_path,
        'procurement_docpath':procurement_docpath,
        'evaluation_method_path': evaluation_method_path,
        'qualification_path': qualification_path,
        'notice_path': notice_path,
@ -80,16 +78,14 @@ def preprocess_files(output_folder, file_path, file_type,logger):
        'merged_baseinfo_path': merged_baseinfo_path
    }
-def fetch_project_basic_info(invalid_path,invalid_docpath, merged_baseinfo_path, procurement_path,procurement_docpath, clause_path,logger):
+def fetch_project_basic_info(invalid_path,invalid_docpath, merged_baseinfo_path, procurement_path, clause_path,logger):
    logger.info("starting 基础信息...")
    start_time = time.time()
    if not merged_baseinfo_path:
        merged_baseinfo_path = invalid_path
    if not procurement_path:
        procurement_path=invalid_path
-    if not procurement_docpath:
+    basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,clause_path,invalid_path)
        procurement_docpath=invalid_docpath
    basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath, clause_path,invalid_path)
    base_info, good_list = post_process_baseinfo(basic_res,logger)
    end_time = time.time()
    logger.info(f"基础信息 done，耗时：{end_time - start_time:.2f} 秒")
@ -207,7 +203,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
                                                              processed_data['clause_path'],logger),
            'opening_bid': executor.submit(fetch_bid_opening, processed_data['invalid_path'],processed_data['merged_baseinfo_path'],processed_data['clause_path'],logger),
            'base_info': executor.submit(fetch_project_basic_info, processed_data['invalid_path'],processed_data['invalid_docpath'],processed_data['merged_baseinfo_path'],
-                                         processed_data['procurement_path'],processed_data['procurement_docpath'],processed_data['clause_path'],logger),
+                                         processed_data['procurement_path'],processed_data['clause_path'],logger),
            'qualification_review': executor.submit(fetch_qualification_review, processed_data['invalid_path'],
                                                    processed_data['qualification_path'],
                                                    processed_data['notice_path'],logger),
@ -242,8 +238,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
 #TODO:把所有未知都删掉。
-#TODO:流式输出
+#TODO:考虑把解析失败的调用豆包，全文上传。
 #TODO:医院优化提示词
 #商务标这里改为列表最里层
 #good_list 金额  截取上下文
 if __name__ == "__main__":