11.20 修改bug

This commit is contained in:
zy123 2024-11-20 19:35:22 +08:00
parent 6dd1e02290
commit b97ae7c034
6 changed files with 102 additions and 83 deletions

View File

@ -106,7 +106,7 @@ def extract_business_requirements(data):
model_res1 = doubao_model(user_query1) model_res1 = doubao_model(user_query1)
# print(model_res) # print(model_res)
business_req_deviation = clean_json_string(model_res1) business_req_deviation = clean_json_string(model_res1)
prompt_template2 = """以下文本是项目采购需求的商务要求部分,请你帮我从键值列表中各字符串中提取带星★或带三角▲的要求项,你的返回格式同输入文本格式,外键名为'商务要求带星',键值为字符串列表,其中每个字符串为带星★或带三角▲的要求项。 prompt_template2 = """以下文本是项目采购需求的商务要求部分,请你帮我从键值列表中各字符串中提取带星★或带三角▲的要求项,你的返回格式同输入文本格式,外键名为'商务要求带星',键值为字符串列表,其中每个字符串为带星★或带三角▲的要求项。
要求与指南 要求与指南
1. 每个星或三角要求占据一个字符串 1. 每个星或三角要求占据一个字符串
2. 若没有带星或带三角的要求项键值为空列表[] 2. 若没有带星或带三角的要求项键值为空列表[]
@ -121,7 +121,7 @@ def extract_business_requirements(data):
}} }}
### 对应的输出如下: ### 对应的输出如下:
{{ {{
"商务要求带星": [ "商务要求带星": [
"★交货期(工期):合同签订之日起 15个日历天内完成并通过项目验收。", "★交货期(工期):合同签订之日起 15个日历天内完成并通过项目验收。",
"▲本项目报价须为固定总价,包含但不限于:采购、实施、调试、试运行、验收、运维等所有完成本项目相关的一切费用。" "▲本项目报价须为固定总价,包含但不限于:采购、实施、调试、试运行、验收、运维等所有完成本项目相关的一切费用。"
] ]

View File

@ -6,10 +6,12 @@ import re
import queue import queue
import concurrent.futures import concurrent.futures
import time import time
from datetime import datetime
import requests import requests
from dashscope import Assistants, Messages, Runs, Threads from dashscope import Assistants, Messages, Runs, Threads
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
from flask_app.general.通义千问long import qianwen_long from flask_app.general.通义千问long import qianwen_long, upload_file
prompt = """ prompt = """
# 角色 # 角色
@ -241,6 +243,10 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
# assistant=create_assistant(knowledge_name) # assistant=create_assistant(knowledge_name)
elif llm_type==2: elif llm_type==2:
print(f"qianwen_long! question:{question}") print(f"qianwen_long! question:{question}")
# 获取当前时间
current_time = datetime.now()
# 输出时分秒
print(current_time.strftime("%H:%M:%S.%f")[:-3])
# qianwen_res,usage = qianwen_long(file_id,question) #有bug # qianwen_res,usage = qianwen_long(file_id,question) #有bug
qianwen_res = qianwen_long(file_id, question) qianwen_res = qianwen_long(file_id, question)
result_queue.put((ans_index,(question,qianwen_res))) result_queue.put((ans_index,(question,qianwen_res)))
@ -257,68 +263,72 @@ def multi_threading(queries, knowledge_name="", file_id="", llm_type=1):
print("多线程提问starting multi_threading...") print("多线程提问starting multi_threading...")
result_queue = queue.Queue() result_queue = queue.Queue()
max_retries = 2 # 设置最大重试次数 max_retries = 2 # 设置最大重试次数
# 使用 ThreadPoolExecutor 管理线程 retry_counts = {} # 跟踪每个查询的重试次数
with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
# 逐个提交任务每提交一个任务后休眠1秒
future_to_query = {} future_to_query = {}
for index, query in enumerate(queries): for index, query in enumerate(queries):
time.sleep(0.5) # 每提交一个任务后等待0.5秒
future = executor.submit(llm_call, query, knowledge_name, file_id, result_queue, index, llm_type) future = executor.submit(llm_call, query, knowledge_name, file_id, result_queue, index, llm_type)
future_to_query[future] = index future_to_query[future] = index
time.sleep(0.5) # 每提交一个任务后等待1秒 retry_counts[index] = 0 # 初始化重试次数
# 收集每个线程的结果 while future_to_query:
for future in concurrent.futures.as_completed(future_to_query): done, _ = concurrent.futures.wait(
future_to_query.keys(),
return_when=concurrent.futures.FIRST_COMPLETED
)
for future in done:
index = future_to_query[future] index = future_to_query[future]
retries = 0 del future_to_query[future]
try: try:
future.result() # 捕获异常或确认任务完成 future.result() # 捕获异常或确认任务完成
except Exception as exc: except Exception as exc:
# print(f"Query {index} generated an exception: {exc}") print(f"Query {index} generated an exception: {exc}")
retries += 1 # 增加重试计数 retry_counts[index] += 1 # 增加重试计数
# 确保在异常情况下也向 result_queue 添加占位符 if retry_counts[index] <= max_retries:
result_queue.put((index, None)) print(f"Retrying query {index} (attempt {retry_counts[index]})...")
if retries < max_retries:
print(f"Retrying query {index} (attempt {retries + 1})...")
print("重试的问题:" + queries[index]) print("重试的问题:" + queries[index])
# 重新提交任务 # 重新提交任务
future = executor.submit(llm_call, queries[index], knowledge_name, file_id, result_queue, index, llm_type) #可能遇到阿里服务器挂壁的情况,重试一下 new_future = executor.submit(llm_call, queries[index], knowledge_name, file_id, result_queue, index, llm_type)
future_to_query[future] = index future_to_query[new_future] = index
else: else:
print(f"Query {index} failed after {max_retries} attempts.") print(f"Query {index} failed after {max_retries} attempts.")
break # 超过最大重试次数,退出循环 result_queue.put((index, None)) # 添加占位符
# 从队列中获取所有结果并按索引排序 # 从队列中获取所有结果并按索引排序
results = [None] * len(queries) results = [None] * len(queries)
while not result_queue.empty(): while not result_queue.empty():
index, result = result_queue.get() index, result = result_queue.get()
results[index] = result results[index] = result
# 检查是否所有结果都是 None # 检查是否所有结果都是 None
if all(result is None for result in results): if all(result is None for result in results):
return [] return []
# 过滤掉None值 # 过滤掉None值
results = [r for r in results if r is not None] results = [r for r in results if r is not None]
# 返回一个保证是列表的结构
return results return results
if __name__ == "__main__": if __name__ == "__main__":
start_time=time.time() start_time=time.time()
# # 读取问题列表 # # # 读取问题列表
baseinfo_file_path = '/flask_app/static/提示词/基本信息工程标.txt' # baseinfo_file_path = '/flask_app/static/提示词/基本信息工程标.txt'
questions =read_questions_from_file(baseinfo_file_path) # questions =read_questions_from_file(baseinfo_file_path)
knowledge_name = "招标解析5word" # knowledge_name = "招标解析5word"
llm_type=1 # llm_type=1
results = multi_threading(questions, knowledge_name) # results = multi_threading(questions, knowledge_name)
end_time = time.time() # end_time = time.time()
if not results: # if not results:
print("errror!") # print("errror!")
else: # else:
print("elapsed time:"+str(end_time-start_time)) # print("elapsed time:"+str(end_time-start_time))
# 打印结果 # # 打印结果
for question, response in results: # for question, response in results:
print(f"Response: {response}") # print(f"Response: {response}")
# file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件(1)\\6.2定版视频会议磋商文件_1-21.pdf" file_path = r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\ztbfile_procurement\ztbfile_procurement_1.pdf"
# file_id = upload_file(file_path) file_id = upload_file(file_path)
# questions=["该招标文件的项目名称是项目编号或招标编号采购人或招标人采购代理机构或招标代理机构请按json格式给我提供信息键名分别是'项目名称','项目编号','采购人','采购代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的项目概况是项目基本情况是请按json格式给我提供信息键名分别为'项目概况','项目基本情况',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。"] # questions=["该招标文件的项目名称是项目编号或招标编号采购人或招标人采购代理机构或招标代理机构请按json格式给我提供信息键名分别是'项目名称','项目编号','采购人','采购代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的项目概况是项目基本情况是请按json格式给我提供信息键名分别为'项目概况','项目基本情况',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。"]
# results=multi_threading(questions,"",file_id,2) #1代表使用百炼rag 2代表使用qianwen-long # results=multi_threading(questions,"",file_id,2) #1代表使用百炼rag 2代表使用qianwen-long
# if not results: # if not results:
@ -339,3 +349,9 @@ if __name__ == "__main__":
# for question, response in results: # for question, response in results:
# print(f"Question: {question}") # print(f"Question: {question}")
# print(f"Response: {response}") # print(f"Response: {response}")
query=[]
for i in range(1,50):
query.append("请返回这个数字:"+str(i))
res=multi_threading(query,"",file_id,2)
for _,response in res:
print(response)

View File

@ -47,7 +47,7 @@ def get_technical_requirements_main(file_path,file_type,unique_id,output_folder)
else: else:
return final_res return final_res
if __name__ == "__main__": if __name__ == "__main__":
file_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf" file_path=r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\ztbfile.pdf"
file_type=2 file_type=2
output_folder = r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\tmp" output_folder = r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\tmp"
res=get_technical_requirements_main(file_path,file_type,"123",output_folder) res=get_technical_requirements_main(file_path,file_type,"123",output_folder)

View File

@ -5,6 +5,7 @@ import re
import time import time
from flask_app.general.file2markdown import convert_pdf_to_markdown from flask_app.general.file2markdown import convert_pdf_to_markdown
from flask_app.general.format_change import pdf2docx
from flask_app.general.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.general.通义千问long import qianwen_long, upload_file from flask_app.general.通义千问long import qianwen_long, upload_file
from flask_app.general.json_utils import clean_json_string, combine_json_results from flask_app.general.json_utils import clean_json_string, combine_json_results
@ -209,10 +210,11 @@ def combine_and_update_results(original_data, updates):
#文件内容以markdown格式组织其中表格部分若有以html语法组织 #文件内容以markdown格式组织其中表格部分若有以html语法组织
def get_technical_requirements(file_path,invalid_path): def get_technical_requirements(file_path,invalid_path):
file_id=upload_file(file_path) docx_file_path=pdf2docx(file_path)
first_query_template="该文件是否说明了采购需求,即需要采购哪些货物?如果有,请回答'',否则,回答''" print(docx_file_path)
# first_query=generate_full_user_query(file_path,first_query_template) file_id=upload_file(docx_file_path)
# judge_res=doubao_model(first_query) # file_id='file-fe-v6T6MGCW83b0m5uxHyP8IAoh'
first_query_template="该文件是否说明了采购需求,即需要采购哪些货物?如果有,请回答'',否则,回答''" #防止截取失败
judge_res=qianwen_long(file_id,first_query_template) judge_res=qianwen_long(file_id,first_query_template)
prompt_template1 = ''' prompt_template1 = '''
任务解析采购文件提取采购需求并以JSON格式返回 任务解析采购文件提取采购需求并以JSON格式返回
@ -230,26 +232,25 @@ def get_technical_requirements(file_path,invalid_path):
1.JSON格式最外层键名为'采购需求' 1.JSON格式最外层键名为'采购需求'
2.层次关系用嵌套键值对表示 2.层次关系用嵌套键值对表示
3.嵌套键名为系统或货物或模块名称与原文保持一致 3.嵌套键名为系统或货物或模块名称与原文保持一致
4.最内层键值应为空对象{{}} 4.最内层键值应为空列表[]
5.不包含'说明''规格''技术参数'等列内容仅返回采购的货物或系统或模块名称 5.不包含'说明''规格''技术参数'等列内容仅返回采购的货物或系统或模块名称
特殊情况处理 特殊情况处理
同一层级如同一系统中下同名但采购要求不同的货物'货物名-编号'区分编号从1递增 同一层级如同一系统中下同名但采购要求不同的货物'货物名-编号'区分编号从1递增
示例输出1普通系统货物类采购
{{ {{
"采购需求": {{ "采购需求": {{
"交换机-1": {{}}, "交换机-1": [],
"交换机-2": {{}}, "交换机-2": [],
"门禁管理系统": {{ "门禁管理系统": {{
"系统功能":{{}} "系统功能":[]
}}, }},
"交通监控视频子系统": {{ "交通监控视频子系统": {{
"系统功能": {{}}, "系统功能": [],
"高清视频抓拍像机": {{}}, "高清视频抓拍像机": [],
"补光灯": {{}} "补光灯": []
}}, }},
"LED全彩显示屏": {{}} "LED全彩显示屏": []
// 其他系统和货物 // 其他系统和货物
}} }}
}} }}
@ -257,15 +258,15 @@ def get_technical_requirements(file_path,invalid_path):
{{ {{
"采购需求": {{ "采购需求": {{
"信息管理系统": {{ "信息管理系统": {{
"通用模块":{{}}, "通用模块":[],
"用户管理":{{}} "用户管理":[]
}}, }},
"信息检索系统": {{ "信息检索系统": {{
"系统功能":{{}}, "系统功能":[],
"权限管理模块":{{}} "权限管理模块":[]
}}, }},
"XX小程序":{{}}, "XX小程序":[],
"数据分析中心":{{}} "数据分析中心":[]
}} }}
}} }}
@ -289,7 +290,7 @@ def get_technical_requirements(file_path,invalid_path):
1.JSON格式最外层键名为'采购需求' 1.JSON格式最外层键名为'采购需求'
2.层次关系用嵌套键值对表示 2.层次关系用嵌套键值对表示
3.嵌套键名为系统或货物或模块名称与原文保持一致 3.嵌套键名为系统或货物或模块名称与原文保持一致
4.最内层键值应为空对象{{}} 4.最内层键值应为空列表[]
5.不包含'说明''规格''技术参数'等列内容仅返回采购的货物或系统或模块名称 5.不包含'说明''规格''技术参数'等列内容仅返回采购的货物或系统或模块名称
特殊情况处理 特殊情况处理
@ -298,17 +299,17 @@ def get_technical_requirements(file_path,invalid_path):
示例输出1普通系统货物类采购 示例输出1普通系统货物类采购
{{ {{
"采购需求": {{ "采购需求": {{
"交换机-1": {{}}, "交换机-1": [],
"交换机-2": {{}}, "交换机-2": [],
"门禁管理系统": {{ "门禁管理系统": {{
"系统功能":{{}} "系统功能":[]
}}, }},
"交通监控视频子系统": {{ "交通监控视频子系统": {{
"系统功能": {{}}, "系统功能": [],
"高清视频抓拍像机": {{}}, "高清视频抓拍像机": [],
"补光灯": {{}} "补光灯": []
}}, }},
"LED全彩显示屏": {{}} "LED全彩显示屏": []
// 其他系统和货物 // 其他系统和货物
}} }}
}} }}
@ -316,15 +317,15 @@ def get_technical_requirements(file_path,invalid_path):
{{ {{
"采购需求": {{ "采购需求": {{
"信息管理系统": {{ "信息管理系统": {{
"通用模块":{{}}, "通用模块":[],
"用户管理":{{}} "用户管理":[]
}}, }},
"信息检索系统": {{ "信息检索系统": {{
"系统功能":{{}}, "系统功能":[],
"权限管理模块":{{}} "权限管理模块":[]
}}, }},
"XX小程序":{{}}, "XX小程序":[],
"数据分析中心":{{}} "数据分析中心":[]
}} }}
}} }}
@ -432,7 +433,7 @@ def get_technical_requirements(file_path,invalid_path):
# 打印结果 # 打印结果
for question, response in results: for question, response in results:
technical_requirements.append(response) technical_requirements.append(response)
# print(response) print(response)
technical_requirements_combined_res = combine_json_results(technical_requirements) technical_requirements_combined_res = combine_json_results(technical_requirements)
"""根据所有键是否已添加处理技术要求""" """根据所有键是否已添加处理技术要求"""

View File

@ -240,6 +240,8 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
#TODO:把所有未知都删掉。 #TODO:把所有未知都删掉。
#TODO:考虑把解析失败的调用豆包,全文上传。 #TODO:考虑把解析失败的调用豆包,全文上传。
#TODO:写个脚本确保技术参数没有嵌套 #TODO:写个脚本确保技术参数没有嵌套
#TODO:C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b 符合性审查未找到
#商务标这里改为列表最里层 #商务标这里改为列表最里层
#good_list 金额 截取上下文 #good_list 金额 截取上下文
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -655,14 +655,14 @@ if __name__ == "__main__":
output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3" output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3"
# qualification_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_qualification1.pdf" # qualification_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_qualification1.pdf"
# qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf" # qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf"
qualification_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_qualification.pdf" qualification_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_qualification1.pdf"
# notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf" # notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf"
# notice_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_notice.pdf" # notice_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_notice.pdf"
notice_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_notice.pdf" notice_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_notice.pdf"
# knowledge_name = "6.2视频会议docx" # knowledge_name = "6.2视频会议docx"
# invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf" # invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
# invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile.pdf" # invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile.pdf"
invalid_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_invalid.pdf" invalid_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_invalid.pdf"
res = combine_qualification_review(invalid_path, qualification_path, notice_path) res = combine_qualification_review(invalid_path, qualification_path, notice_path)
print(json.dumps(res, ensure_ascii=False, indent=4)) print(json.dumps(res, ensure_ascii=False, indent=4))
end_time=time.time() end_time=time.time()