From b97ae7c03481388f3879f47c61aa807646794517 Mon Sep 17 00:00:00 2001
From: zy123 <646228430@qq.com>
Date: Wed, 20 Nov 2024 19:35:22 +0800
Subject: [PATCH] =?UTF-8?q?11.20=20=E4=BF=AE=E6=94=B9bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 flask_app/general/post_processing.py    |   4 +-
 flask_app/general/多线程提问.py         | 102 ++++++++++++++----------
 flask_app/general/纯技术参数要求提取.py |   2 +-
 flask_app/货物标/技术参数要求提取.py    |  69 ++++++++--------
 flask_app/货物标/货物标解析main.py      |   2 +
 flask_app/货物标/资格审查main.py        |   6 +-
 6 files changed, 102 insertions(+), 83 deletions(-)

diff --git a/flask_app/general/post_processing.py b/flask_app/general/post_processing.py
index f109edb..330848f 100644
--- a/flask_app/general/post_processing.py
+++ b/flask_app/general/post_processing.py
@@ -106,7 +106,7 @@ def extract_business_requirements(data):
     model_res1 = doubao_model(user_query1)
     # print(model_res)
     business_req_deviation = clean_json_string(model_res1)
-    prompt_template2 = """以下文本是项目采购需求的商务要求部分，请你帮我从键值列表中各字符串中提取带星★或带三角▲的要求项，你的返回格式同输入文本格式，外键名为'商务要求（带星）'，键值为字符串列表，其中每个字符串为带星★或带三角▲的要求项。
+    prompt_template2 = """以下文本是项目采购需求的商务要求部分，请你帮我从键值列表中各字符串中提取带星★或带三角▲的要求项，你的返回格式同输入文本格式，外键名为'商务要求带星'，键值为字符串列表，其中每个字符串为带星★或带三角▲的要求项。
     要求与指南：
     1. 每个星★或三角▲要求占据一个字符串。
     2. 若没有带星★或带三角▲的要求项，键值为空列表，即[]
@@ -121,7 +121,7 @@ def extract_business_requirements(data):
     }}
     ### 对应的输出如下：
     {{
-        "商务要求（带星）": [
+        "商务要求带星": [
             "★交货期（工期）：合同签订之日起 15个日历天内完成，并通过项目验收。",
             "▲本项目报价须为固定总价，包含但不限于：采购、实施、调试、试运行、验收、运维等所有完成本项目相关的一切费用。"
         ]
diff --git a/flask_app/general/多线程提问.py b/flask_app/general/多线程提问.py
index 45e3bcc..0528f4a 100644
--- a/flask_app/general/多线程提问.py
+++ b/flask_app/general/多线程提问.py
@@ -6,10 +6,12 @@ import re
 import queue
 import concurrent.futures
 import time
+from datetime import datetime
+
 import requests
 from dashscope import Assistants, Messages, Runs, Threads
 from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
-from flask_app.general.通义千问long import qianwen_long
+from flask_app.general.通义千问long import qianwen_long, upload_file
 
 prompt = """
 # 角色
@@ -241,6 +243,10 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
         # assistant=create_assistant(knowledge_name)
     elif llm_type==2:
         print(f"qianwen_long! question:{question}")
+        # 获取当前时间
+        current_time = datetime.now()
+        # 输出时分秒
+        print(current_time.strftime("%H:%M:%S.%f")[:-3])
         # qianwen_res,usage = qianwen_long(file_id,question)   #有bug
         qianwen_res = qianwen_long(file_id, question)
         result_queue.put((ans_index,(question,qianwen_res)))
@@ -257,68 +263,72 @@ def multi_threading(queries, knowledge_name="", file_id="", llm_type=1):
     print("多线程提问：starting multi_threading...")
     result_queue = queue.Queue()
     max_retries = 2  # 设置最大重试次数
-    # 使用 ThreadPoolExecutor 管理线程
+    retry_counts = {}  # 跟踪每个查询的重试次数
+
     with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
-        # 逐个提交任务，每提交一个任务后休眠1秒
         future_to_query = {}
         for index, query in enumerate(queries):
+            time.sleep(0.5)  # 每提交一个任务后等待0.5秒
             future = executor.submit(llm_call, query, knowledge_name, file_id, result_queue, index, llm_type)
             future_to_query[future] = index
-            time.sleep(0.5)  # 每提交一个任务后等待1秒
+            retry_counts[index] = 0  # 初始化重试次数
 
-        # 收集每个线程的结果
-        for future in concurrent.futures.as_completed(future_to_query):
-            index = future_to_query[future]
-            retries = 0
-            try:
-                future.result()  # 捕获异常或确认任务完成
-            except Exception as exc:
-                # print(f"Query {index} generated an exception: {exc}")
-                retries += 1  # 增加重试计数
-                # 确保在异常情况下也向 result_queue 添加占位符
-                result_queue.put((index, None))
-                if retries < max_retries:
-                    print(f"Retrying query {index} (attempt {retries + 1})...")
-                    print("重试的问题："+queries[index])
-                    # 重新提交任务
-                    future = executor.submit(llm_call, queries[index], knowledge_name, file_id, result_queue, index, llm_type)      #可能遇到阿里服务器挂壁的情况，重试一下
-                    future_to_query[future] = index
-                else:
-                    print(f"Query {index} failed after {max_retries} attempts.")
-                    break  # 超过最大重试次数，退出循环
+        while future_to_query:
+            done, _ = concurrent.futures.wait(
+                future_to_query.keys(),
+                return_when=concurrent.futures.FIRST_COMPLETED
+            )
+            for future in done:
+                index = future_to_query[future]
+                del future_to_query[future]
+                try:
+                    future.result()  # 捕获异常或确认任务完成
+                except Exception as exc:
+                    print(f"Query {index} generated an exception: {exc}")
+                    retry_counts[index] += 1  # 增加重试计数
+                    if retry_counts[index] <= max_retries:
+                        print(f"Retrying query {index} (attempt {retry_counts[index]})...")
+                        print("重试的问题：" + queries[index])
+                        # 重新提交任务
+                        new_future = executor.submit(llm_call, queries[index], knowledge_name, file_id, result_queue, index, llm_type)
+                        future_to_query[new_future] = index
+                    else:
+                        print(f"Query {index} failed after {max_retries} attempts.")
+                        result_queue.put((index, None))  # 添加占位符
 
     # 从队列中获取所有结果并按索引排序
     results = [None] * len(queries)
     while not result_queue.empty():
         index, result = result_queue.get()
         results[index] = result
+
     # 检查是否所有结果都是 None
     if all(result is None for result in results):
         return []
-        # 过滤掉None值
+
+    # 过滤掉None值
     results = [r for r in results if r is not None]
-    # 返回一个保证是列表的结构
     return results
 
 if __name__ == "__main__":
     start_time=time.time()
-    # # 读取问题列表
-    baseinfo_file_path = '/flask_app/static/提示词/基本信息工程标.txt'
-    questions =read_questions_from_file(baseinfo_file_path)
-    knowledge_name = "招标解析5word"
-    llm_type=1
-    results = multi_threading(questions, knowledge_name)
-    end_time = time.time()
-    if not results:
-        print("errror!")
-    else:
-        print("elapsed time:"+str(end_time-start_time))
-        # 打印结果
-        for question, response in results:
-            print(f"Response: {response}")
+    # # # 读取问题列表
+    # baseinfo_file_path = '/flask_app/static/提示词/基本信息工程标.txt'
+    # questions =read_questions_from_file(baseinfo_file_path)
+    # knowledge_name = "招标解析5word"
+    # llm_type=1
+    # results = multi_threading(questions, knowledge_name)
+    # end_time = time.time()
+    # if not results:
+    #     print("errror!")
+    # else:
+    #     print("elapsed time:"+str(end_time-start_time))
+    #     # 打印结果
+    #     for question, response in results:
+    #         print(f"Response: {response}")
 
-    # file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件(1)\\6.2定版视频会议磋商文件_1-21.pdf"
-    # file_id = upload_file(file_path)
+    file_path = r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\ztbfile_procurement\ztbfile_procurement_1.pdf"
+    file_id = upload_file(file_path)
     # questions=["该招标文件的项目名称是？项目编号（或招标编号）是？采购人（或招标人）是？采购代理机构（或招标代理机构）是？请按json格式给我提供信息，键名分别是'项目名称','项目编号','采购人','采购代理机构'，若存在未知信息，在对应的键值中填'未知'。","该招标文件的项目概况是？项目基本情况是？请按json格式给我提供信息，键名分别为'项目概况','项目基本情况'，若存在嵌套信息，嵌套内容键名以文件中对应字段命名，而嵌套键值必须与原文保持一致，若存在未知信息，在对应的键值中填'未知'。"]
     # results=multi_threading(questions,"",file_id,2)  #1代表使用百炼rag 2代表使用qianwen-long
     # if not results:
@@ -338,4 +348,10 @@ if __name__ == "__main__":
     #     # 打印结果
     #     for question, response in results:
     #         print(f"Question: {question}")
-    #         print(f"Response: {response}")
\ No newline at end of file
+    #         print(f"Response: {response}")
+    query=[]
+    for i in range(1,50):
+        query.append("请返回这个数字："+str(i))
+    res=multi_threading(query,"",file_id,2)
+    for _,response in res:
+        print(response)
diff --git a/flask_app/general/纯技术参数要求提取.py b/flask_app/general/纯技术参数要求提取.py
index b975582..33f4c68 100644
--- a/flask_app/general/纯技术参数要求提取.py
+++ b/flask_app/general/纯技术参数要求提取.py
@@ -47,7 +47,7 @@ def get_technical_requirements_main(file_path,file_type,unique_id,output_folder)
     else:
         return final_res
 if __name__ == "__main__":
-    file_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件（广水市教育局封闭管理）.pdf"
+    file_path=r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\ztbfile.pdf"
     file_type=2
     output_folder = r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\tmp"
     res=get_technical_requirements_main(file_path,file_type,"123",output_folder)
diff --git a/flask_app/货物标/技术参数要求提取.py b/flask_app/货物标/技术参数要求提取.py
index 8c988a5..2e2aeba 100644
--- a/flask_app/货物标/技术参数要求提取.py
+++ b/flask_app/货物标/技术参数要求提取.py
@@ -5,6 +5,7 @@ import re
 import time
 
 from flask_app.general.file2markdown import convert_pdf_to_markdown
+from flask_app.general.format_change import pdf2docx
 from flask_app.general.多线程提问 import multi_threading
 from flask_app.general.通义千问long import qianwen_long, upload_file
 from flask_app.general.json_utils import clean_json_string, combine_json_results
@@ -209,10 +210,11 @@ def combine_and_update_results(original_data, updates):
 
 #文件内容以markdown格式组织，其中表格部分（若有）以html语法组织，
 def get_technical_requirements(file_path,invalid_path):
-    file_id=upload_file(file_path)
-    first_query_template="该文件是否说明了采购需求,即需要采购哪些货物?如果有,请回答'是',否则,回答'否'"
-    # first_query=generate_full_user_query(file_path,first_query_template)
-    # judge_res=doubao_model(first_query)
+    docx_file_path=pdf2docx(file_path)
+    print(docx_file_path)
+    file_id=upload_file(docx_file_path)
+    # file_id='file-fe-v6T6MGCW83b0m5uxHyP8IAoh'
+    first_query_template="该文件是否说明了采购需求,即需要采购哪些货物?如果有,请回答'是',否则,回答'否'"   #防止截取失败
     judge_res=qianwen_long(file_id,first_query_template)
     prompt_template1 = '''
                     任务：解析采购文件，提取采购需求，并以JSON格式返回。
@@ -230,26 +232,25 @@ def get_technical_requirements(file_path,invalid_path):
                         1.JSON格式，最外层键名为'采购需求'。
                         2.层次关系用嵌套键值对表示。
                         3.嵌套键名为系统或货物或模块名称，与原文保持一致。
-                        4.最内层键值应为空对象（{{}}）。
+                        4.最内层键值应为空列表[]。
                         5.不包含'说明'、'规格'、'技术参数'等列内容，仅返回采购的货物或系统或模块名称。
 
                         特殊情况处理：
                         同一层级（如同一系统中）下同名但采购要求不同的货物，以'货物名-编号'区分，编号从1递增。
 
-                        示例输出1，普通系统、货物类采购：
                         {{
                             "采购需求": {{
-                                "交换机-1": {{}},
-                                "交换机-2": {{}},
+                                "交换机-1": [],
+                                "交换机-2": [],
                                 "门禁管理系统": {{
-                                    "系统功能":{{}}
+                                    "系统功能":[]
                                 }},
                                 "交通监控视频子系统": {{
-                                    "系统功能": {{}},
-                                    "高清视频抓拍像机": {{}},
-                                    "补光灯": {{}}
+                                    "系统功能": [],
+                                    "高清视频抓拍像机": [],
+                                    "补光灯": []
                                 }},
-                                "LED全彩显示屏": {{}}
+                                "LED全彩显示屏": []
                                 // 其他系统和货物
                             }}
                         }}
@@ -257,15 +258,15 @@ def get_technical_requirements(file_path,invalid_path):
                         {{
                             "采购需求": {{
                                 "信息管理系统": {{
-                                    "通用模块":{{}},
-                                    "用户管理":{{}}
+                                    "通用模块":[],
+                                    "用户管理":[]
                                 }},
                                 "信息检索系统": {{
-                                    "系统功能":{{}},
-                                    "权限管理模块":{{}}
+                                    "系统功能":[],
+                                    "权限管理模块":[]
                                 }},
-                                "XX小程序":{{}},
-                                "数据分析中心":{{}}
+                                "XX小程序":[],
+                                "数据分析中心":[]
                             }}
                         }}
 
@@ -289,7 +290,7 @@ def get_technical_requirements(file_path,invalid_path):
                         1.JSON格式，最外层键名为'采购需求'。
                         2.层次关系用嵌套键值对表示。
                         3.嵌套键名为系统或货物或模块名称，与原文保持一致。
-                        4.最内层键值应为空对象（{{}}）。
+                        4.最内层键值应为空列表[]。
                         5.不包含'说明'、'规格'、'技术参数'等列内容，仅返回采购的货物或系统或模块名称。
 
                         特殊情况处理：
@@ -298,17 +299,17 @@ def get_technical_requirements(file_path,invalid_path):
                         示例输出1，普通系统、货物类采购：
                         {{
                             "采购需求": {{
-                                "交换机-1": {{}},
-                                "交换机-2": {{}},
+                                "交换机-1": [],
+                                "交换机-2": [],
                                 "门禁管理系统": {{
-                                    "系统功能":{{}}
+                                    "系统功能":[]
                                 }},
                                 "交通监控视频子系统": {{
-                                    "系统功能": {{}},
-                                    "高清视频抓拍像机": {{}},
-                                    "补光灯": {{}}
+                                    "系统功能": [],
+                                    "高清视频抓拍像机": [],
+                                    "补光灯": []
                                 }},
-                                "LED全彩显示屏": {{}}
+                                "LED全彩显示屏": []
                                 // 其他系统和货物
                             }}
                         }}
@@ -316,15 +317,15 @@ def get_technical_requirements(file_path,invalid_path):
                         {{
                             "采购需求": {{
                                 "信息管理系统": {{
-                                    "通用模块":{{}},
-                                    "用户管理":{{}}
+                                    "通用模块":[],
+                                    "用户管理":[]
                                 }},
                                 "信息检索系统": {{
-                                    "系统功能":{{}},
-                                    "权限管理模块":{{}}
+                                    "系统功能":[],
+                                    "权限管理模块":[]
                                 }},
-                                "XX小程序":{{}},
-                                "数据分析中心":{{}}
+                                "XX小程序":[],
+                                "数据分析中心":[]
                             }}
                         }}
 
@@ -432,7 +433,7 @@ def get_technical_requirements(file_path,invalid_path):
             # 打印结果
             for question, response in results:
                 technical_requirements.append(response)
-                # print(response)
+                print(response)
         technical_requirements_combined_res = combine_json_results(technical_requirements)
 
         """根据所有键是否已添加处理技术要求"""
diff --git a/flask_app/货物标/货物标解析main.py b/flask_app/货物标/货物标解析main.py
index 1e51767..3a02e4b 100644
--- a/flask_app/货物标/货物标解析main.py
+++ b/flask_app/货物标/货物标解析main.py
@@ -240,6 +240,8 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
 #TODO:把所有未知都删掉。
 #TODO:考虑把解析失败的调用豆包，全文上传。
 #TODO:写个脚本确保技术参数没有嵌套
+
+#TODO:C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b 符合性审查未找到
 #商务标这里改为列表最里层
 #good_list 金额  截取上下文
 if __name__ == "__main__":
diff --git a/flask_app/货物标/资格审查main.py b/flask_app/货物标/资格审查main.py
index c9ae926..a7d2a35 100644
--- a/flask_app/货物标/资格审查main.py
+++ b/flask_app/货物标/资格审查main.py
@@ -655,14 +655,14 @@ if __name__ == "__main__":
     output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3"
     # qualification_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_qualification1.pdf"
     # qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf"
-    qualification_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_qualification.pdf"
+    qualification_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_qualification1.pdf"
     # notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf"
     # notice_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_notice.pdf"
-    notice_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_notice.pdf"
+    notice_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_notice.pdf"
     # knowledge_name = "6.2视频会议docx"
     # invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
     # invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile.pdf"
-    invalid_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_invalid.pdf"
+    invalid_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_invalid.pdf"
     res = combine_qualification_review(invalid_path, qualification_path, notice_path)
     print(json.dumps(res, ensure_ascii=False, indent=4))
     end_time=time.time()