11.20 修改bug
This commit is contained in:
parent
6dd1e02290
commit
b97ae7c034
@ -106,7 +106,7 @@ def extract_business_requirements(data):
|
||||
model_res1 = doubao_model(user_query1)
|
||||
# print(model_res)
|
||||
business_req_deviation = clean_json_string(model_res1)
|
||||
prompt_template2 = """以下文本是项目采购需求的商务要求部分,请你帮我从键值列表中各字符串中提取带星★或带三角▲的要求项,你的返回格式同输入文本格式,外键名为'商务要求(带星)',键值为字符串列表,其中每个字符串为带星★或带三角▲的要求项。
|
||||
prompt_template2 = """以下文本是项目采购需求的商务要求部分,请你帮我从键值列表中各字符串中提取带星★或带三角▲的要求项,你的返回格式同输入文本格式,外键名为'商务要求带星',键值为字符串列表,其中每个字符串为带星★或带三角▲的要求项。
|
||||
要求与指南:
|
||||
1. 每个星★或三角▲要求占据一个字符串。
|
||||
2. 若没有带星★或带三角▲的要求项,键值为空列表,即[]
|
||||
@ -121,7 +121,7 @@ def extract_business_requirements(data):
|
||||
}}
|
||||
### 对应的输出如下:
|
||||
{{
|
||||
"商务要求(带星)": [
|
||||
"商务要求带星": [
|
||||
"★交货期(工期):合同签订之日起 15个日历天内完成,并通过项目验收。",
|
||||
"▲本项目报价须为固定总价,包含但不限于:采购、实施、调试、试运行、验收、运维等所有完成本项目相关的一切费用。"
|
||||
]
|
||||
|
@ -6,10 +6,12 @@ import re
|
||||
import queue
|
||||
import concurrent.futures
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from dashscope import Assistants, Messages, Runs, Threads
|
||||
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
||||
from flask_app.general.通义千问long import qianwen_long
|
||||
from flask_app.general.通义千问long import qianwen_long, upload_file
|
||||
|
||||
prompt = """
|
||||
# 角色
|
||||
@ -241,6 +243,10 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
|
||||
# assistant=create_assistant(knowledge_name)
|
||||
elif llm_type==2:
|
||||
print(f"qianwen_long! question:{question}")
|
||||
# 获取当前时间
|
||||
current_time = datetime.now()
|
||||
# 输出时分秒
|
||||
print(current_time.strftime("%H:%M:%S.%f")[:-3])
|
||||
# qianwen_res,usage = qianwen_long(file_id,question) #有bug
|
||||
qianwen_res = qianwen_long(file_id, question)
|
||||
result_queue.put((ans_index,(question,qianwen_res)))
|
||||
@ -257,68 +263,72 @@ def multi_threading(queries, knowledge_name="", file_id="", llm_type=1):
|
||||
print("多线程提问:starting multi_threading...")
|
||||
result_queue = queue.Queue()
|
||||
max_retries = 2 # 设置最大重试次数
|
||||
# 使用 ThreadPoolExecutor 管理线程
|
||||
retry_counts = {} # 跟踪每个查询的重试次数
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
|
||||
# 逐个提交任务,每提交一个任务后休眠1秒
|
||||
future_to_query = {}
|
||||
for index, query in enumerate(queries):
|
||||
time.sleep(0.5) # 每提交一个任务后等待0.5秒
|
||||
future = executor.submit(llm_call, query, knowledge_name, file_id, result_queue, index, llm_type)
|
||||
future_to_query[future] = index
|
||||
time.sleep(0.5) # 每提交一个任务后等待1秒
|
||||
retry_counts[index] = 0 # 初始化重试次数
|
||||
|
||||
# 收集每个线程的结果
|
||||
for future in concurrent.futures.as_completed(future_to_query):
|
||||
while future_to_query:
|
||||
done, _ = concurrent.futures.wait(
|
||||
future_to_query.keys(),
|
||||
return_when=concurrent.futures.FIRST_COMPLETED
|
||||
)
|
||||
for future in done:
|
||||
index = future_to_query[future]
|
||||
retries = 0
|
||||
del future_to_query[future]
|
||||
try:
|
||||
future.result() # 捕获异常或确认任务完成
|
||||
except Exception as exc:
|
||||
# print(f"Query {index} generated an exception: {exc}")
|
||||
retries += 1 # 增加重试计数
|
||||
# 确保在异常情况下也向 result_queue 添加占位符
|
||||
result_queue.put((index, None))
|
||||
if retries < max_retries:
|
||||
print(f"Retrying query {index} (attempt {retries + 1})...")
|
||||
print(f"Query {index} generated an exception: {exc}")
|
||||
retry_counts[index] += 1 # 增加重试计数
|
||||
if retry_counts[index] <= max_retries:
|
||||
print(f"Retrying query {index} (attempt {retry_counts[index]})...")
|
||||
print("重试的问题:" + queries[index])
|
||||
# 重新提交任务
|
||||
future = executor.submit(llm_call, queries[index], knowledge_name, file_id, result_queue, index, llm_type) #可能遇到阿里服务器挂壁的情况,重试一下
|
||||
future_to_query[future] = index
|
||||
new_future = executor.submit(llm_call, queries[index], knowledge_name, file_id, result_queue, index, llm_type)
|
||||
future_to_query[new_future] = index
|
||||
else:
|
||||
print(f"Query {index} failed after {max_retries} attempts.")
|
||||
break # 超过最大重试次数,退出循环
|
||||
result_queue.put((index, None)) # 添加占位符
|
||||
|
||||
# 从队列中获取所有结果并按索引排序
|
||||
results = [None] * len(queries)
|
||||
while not result_queue.empty():
|
||||
index, result = result_queue.get()
|
||||
results[index] = result
|
||||
|
||||
# 检查是否所有结果都是 None
|
||||
if all(result is None for result in results):
|
||||
return []
|
||||
|
||||
# 过滤掉None值
|
||||
results = [r for r in results if r is not None]
|
||||
# 返回一个保证是列表的结构
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
start_time=time.time()
|
||||
# # 读取问题列表
|
||||
baseinfo_file_path = '/flask_app/static/提示词/基本信息工程标.txt'
|
||||
questions =read_questions_from_file(baseinfo_file_path)
|
||||
knowledge_name = "招标解析5word"
|
||||
llm_type=1
|
||||
results = multi_threading(questions, knowledge_name)
|
||||
end_time = time.time()
|
||||
if not results:
|
||||
print("errror!")
|
||||
else:
|
||||
print("elapsed time:"+str(end_time-start_time))
|
||||
# 打印结果
|
||||
for question, response in results:
|
||||
print(f"Response: {response}")
|
||||
# # # 读取问题列表
|
||||
# baseinfo_file_path = '/flask_app/static/提示词/基本信息工程标.txt'
|
||||
# questions =read_questions_from_file(baseinfo_file_path)
|
||||
# knowledge_name = "招标解析5word"
|
||||
# llm_type=1
|
||||
# results = multi_threading(questions, knowledge_name)
|
||||
# end_time = time.time()
|
||||
# if not results:
|
||||
# print("errror!")
|
||||
# else:
|
||||
# print("elapsed time:"+str(end_time-start_time))
|
||||
# # 打印结果
|
||||
# for question, response in results:
|
||||
# print(f"Response: {response}")
|
||||
|
||||
# file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件(1)\\6.2定版视频会议磋商文件_1-21.pdf"
|
||||
# file_id = upload_file(file_path)
|
||||
file_path = r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\ztbfile_procurement\ztbfile_procurement_1.pdf"
|
||||
file_id = upload_file(file_path)
|
||||
# questions=["该招标文件的项目名称是?项目编号(或招标编号)是?采购人(或招标人)是?采购代理机构(或招标代理机构)是?请按json格式给我提供信息,键名分别是'项目名称','项目编号','采购人','采购代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的项目概况是?项目基本情况是?请按json格式给我提供信息,键名分别为'项目概况','项目基本情况',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。"]
|
||||
# results=multi_threading(questions,"",file_id,2) #1代表使用百炼rag 2代表使用qianwen-long
|
||||
# if not results:
|
||||
@ -339,3 +349,9 @@ if __name__ == "__main__":
|
||||
# for question, response in results:
|
||||
# print(f"Question: {question}")
|
||||
# print(f"Response: {response}")
|
||||
query=[]
|
||||
for i in range(1,50):
|
||||
query.append("请返回这个数字:"+str(i))
|
||||
res=multi_threading(query,"",file_id,2)
|
||||
for _,response in res:
|
||||
print(response)
|
||||
|
@ -47,7 +47,7 @@ def get_technical_requirements_main(file_path,file_type,unique_id,output_folder)
|
||||
else:
|
||||
return final_res
|
||||
if __name__ == "__main__":
|
||||
file_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf"
|
||||
file_path=r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\ztbfile.pdf"
|
||||
file_type=2
|
||||
output_folder = r"C:\Users\Administrator\Desktop\fsdownload\39b0c3b4-1807-456c-8330-c5c7d1b7a2ca\tmp"
|
||||
res=get_technical_requirements_main(file_path,file_type,"123",output_folder)
|
||||
|
@ -5,6 +5,7 @@ import re
|
||||
import time
|
||||
|
||||
from flask_app.general.file2markdown import convert_pdf_to_markdown
|
||||
from flask_app.general.format_change import pdf2docx
|
||||
from flask_app.general.多线程提问 import multi_threading
|
||||
from flask_app.general.通义千问long import qianwen_long, upload_file
|
||||
from flask_app.general.json_utils import clean_json_string, combine_json_results
|
||||
@ -209,10 +210,11 @@ def combine_and_update_results(original_data, updates):
|
||||
|
||||
#文件内容以markdown格式组织,其中表格部分(若有)以html语法组织,
|
||||
def get_technical_requirements(file_path,invalid_path):
|
||||
file_id=upload_file(file_path)
|
||||
first_query_template="该文件是否说明了采购需求,即需要采购哪些货物?如果有,请回答'是',否则,回答'否'"
|
||||
# first_query=generate_full_user_query(file_path,first_query_template)
|
||||
# judge_res=doubao_model(first_query)
|
||||
docx_file_path=pdf2docx(file_path)
|
||||
print(docx_file_path)
|
||||
file_id=upload_file(docx_file_path)
|
||||
# file_id='file-fe-v6T6MGCW83b0m5uxHyP8IAoh'
|
||||
first_query_template="该文件是否说明了采购需求,即需要采购哪些货物?如果有,请回答'是',否则,回答'否'" #防止截取失败
|
||||
judge_res=qianwen_long(file_id,first_query_template)
|
||||
prompt_template1 = '''
|
||||
任务:解析采购文件,提取采购需求,并以JSON格式返回。
|
||||
@ -230,26 +232,25 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
1.JSON格式,最外层键名为'采购需求'。
|
||||
2.层次关系用嵌套键值对表示。
|
||||
3.嵌套键名为系统或货物或模块名称,与原文保持一致。
|
||||
4.最内层键值应为空对象({{}})。
|
||||
4.最内层键值应为空列表[]。
|
||||
5.不包含'说明'、'规格'、'技术参数'等列内容,仅返回采购的货物或系统或模块名称。
|
||||
|
||||
特殊情况处理:
|
||||
同一层级(如同一系统中)下同名但采购要求不同的货物,以'货物名-编号'区分,编号从1递增。
|
||||
|
||||
示例输出1,普通系统、货物类采购:
|
||||
{{
|
||||
"采购需求": {{
|
||||
"交换机-1": {{}},
|
||||
"交换机-2": {{}},
|
||||
"交换机-1": [],
|
||||
"交换机-2": [],
|
||||
"门禁管理系统": {{
|
||||
"系统功能":{{}}
|
||||
"系统功能":[]
|
||||
}},
|
||||
"交通监控视频子系统": {{
|
||||
"系统功能": {{}},
|
||||
"高清视频抓拍像机": {{}},
|
||||
"补光灯": {{}}
|
||||
"系统功能": [],
|
||||
"高清视频抓拍像机": [],
|
||||
"补光灯": []
|
||||
}},
|
||||
"LED全彩显示屏": {{}}
|
||||
"LED全彩显示屏": []
|
||||
// 其他系统和货物
|
||||
}}
|
||||
}}
|
||||
@ -257,15 +258,15 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
{{
|
||||
"采购需求": {{
|
||||
"信息管理系统": {{
|
||||
"通用模块":{{}},
|
||||
"用户管理":{{}}
|
||||
"通用模块":[],
|
||||
"用户管理":[]
|
||||
}},
|
||||
"信息检索系统": {{
|
||||
"系统功能":{{}},
|
||||
"权限管理模块":{{}}
|
||||
"系统功能":[],
|
||||
"权限管理模块":[]
|
||||
}},
|
||||
"XX小程序":{{}},
|
||||
"数据分析中心":{{}}
|
||||
"XX小程序":[],
|
||||
"数据分析中心":[]
|
||||
}}
|
||||
}}
|
||||
|
||||
@ -289,7 +290,7 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
1.JSON格式,最外层键名为'采购需求'。
|
||||
2.层次关系用嵌套键值对表示。
|
||||
3.嵌套键名为系统或货物或模块名称,与原文保持一致。
|
||||
4.最内层键值应为空对象({{}})。
|
||||
4.最内层键值应为空列表[]。
|
||||
5.不包含'说明'、'规格'、'技术参数'等列内容,仅返回采购的货物或系统或模块名称。
|
||||
|
||||
特殊情况处理:
|
||||
@ -298,17 +299,17 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
示例输出1,普通系统、货物类采购:
|
||||
{{
|
||||
"采购需求": {{
|
||||
"交换机-1": {{}},
|
||||
"交换机-2": {{}},
|
||||
"交换机-1": [],
|
||||
"交换机-2": [],
|
||||
"门禁管理系统": {{
|
||||
"系统功能":{{}}
|
||||
"系统功能":[]
|
||||
}},
|
||||
"交通监控视频子系统": {{
|
||||
"系统功能": {{}},
|
||||
"高清视频抓拍像机": {{}},
|
||||
"补光灯": {{}}
|
||||
"系统功能": [],
|
||||
"高清视频抓拍像机": [],
|
||||
"补光灯": []
|
||||
}},
|
||||
"LED全彩显示屏": {{}}
|
||||
"LED全彩显示屏": []
|
||||
// 其他系统和货物
|
||||
}}
|
||||
}}
|
||||
@ -316,15 +317,15 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
{{
|
||||
"采购需求": {{
|
||||
"信息管理系统": {{
|
||||
"通用模块":{{}},
|
||||
"用户管理":{{}}
|
||||
"通用模块":[],
|
||||
"用户管理":[]
|
||||
}},
|
||||
"信息检索系统": {{
|
||||
"系统功能":{{}},
|
||||
"权限管理模块":{{}}
|
||||
"系统功能":[],
|
||||
"权限管理模块":[]
|
||||
}},
|
||||
"XX小程序":{{}},
|
||||
"数据分析中心":{{}}
|
||||
"XX小程序":[],
|
||||
"数据分析中心":[]
|
||||
}}
|
||||
}}
|
||||
|
||||
@ -432,7 +433,7 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
# 打印结果
|
||||
for question, response in results:
|
||||
technical_requirements.append(response)
|
||||
# print(response)
|
||||
print(response)
|
||||
technical_requirements_combined_res = combine_json_results(technical_requirements)
|
||||
|
||||
"""根据所有键是否已添加处理技术要求"""
|
||||
|
@ -240,6 +240,8 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
|
||||
#TODO:把所有未知都删掉。
|
||||
#TODO:考虑把解析失败的调用豆包,全文上传。
|
||||
#TODO:写个脚本确保技术参数没有嵌套
|
||||
|
||||
#TODO:C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b 符合性审查未找到
|
||||
#商务标这里改为列表最里层
|
||||
#good_list 金额 截取上下文
|
||||
if __name__ == "__main__":
|
||||
|
@ -655,14 +655,14 @@ if __name__ == "__main__":
|
||||
output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3"
|
||||
# qualification_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_qualification1.pdf"
|
||||
# qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf"
|
||||
qualification_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_qualification.pdf"
|
||||
qualification_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_qualification1.pdf"
|
||||
# notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf"
|
||||
# notice_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_notice.pdf"
|
||||
notice_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_notice.pdf"
|
||||
notice_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_notice.pdf"
|
||||
# knowledge_name = "6.2视频会议docx"
|
||||
# invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
|
||||
# invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile.pdf"
|
||||
invalid_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_invalid.pdf"
|
||||
invalid_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_invalid.pdf"
|
||||
res = combine_qualification_review(invalid_path, qualification_path, notice_path)
|
||||
print(json.dumps(res, ensure_ascii=False, indent=4))
|
||||
end_time=time.time()
|
||||
|
Loading…
x
Reference in New Issue
Block a user