11.6修复bug

This commit is contained in:
zy123 2024-11-06 12:20:24 +08:00
parent d4d1a14c06
commit 3bd548ea81
8 changed files with 113 additions and 66 deletions

View File

@ -1,8 +1,19 @@
from flask_app.general.format_change import pdf2docx
import json
import logging
from flask_app.general.format_change import pdf2docx, docx2pdf
from flask_app.general.通义千问long import upload_file
from flask_app.货物标.截取pdf货物标版 import truncate_pdf_main
from flask_app.货物标.技术参数要求提取 import get_technical_requirements
def get_global_logger(unique_id):
if unique_id is None:
return logging.getLogger() # 获取默认的日志器
logger = logging.getLogger(unique_id)
return logger
logger = None
def extract_matching_keys(data_dict, good_list):
"""
@ -31,23 +42,38 @@ def extract_matching_keys(data_dict, good_list):
recurse(data_dict)
return result
def get_technical_requirements_main(file_path,output_folder):
truncate_file=truncate_pdf_main(file_path,output_folder,5)[0]
def get_technical_requirements_main(file_path,file_type,unique_id,output_folder):
global logger
logger = get_global_logger(unique_id)
if file_type == 1: # docx
docx_path = file_path
pdf_path = docx2pdf(docx_path) # 将docx转换为pdf以供后续处理
elif file_type == 2: # pdf
pdf_path = file_path
elif file_type == 3: # doc
pdf_path = docx2pdf(file_path)
else:
logger.error("Unsupported file type provided. Preprocessing halted.")
return None
truncate_file=truncate_pdf_main(pdf_path,output_folder,5)[0]
if not truncate_file:
truncate_file=file_path #直接传整份文件
truncate_file=pdf_path #直接传整份文件
truncate_file_docx=pdf2docx(truncate_file)
file_id=upload_file(truncate_file_docx)
# file_id=upload_file(truncate_file)
final_res=get_technical_requirements(file_id)
final_res=get_technical_requirements(file_id,pdf_path)
# 安全地提取 "技术要求" 内部的字典内容
if isinstance(final_res, dict) and '技术要求' in final_res and isinstance(final_res['技术要求'], dict):
technical_requirements = final_res['技术要求']
good_list = technical_requirements.pop('货物列表', []) # 如果 '货物列表' 不存在,返回 []
print(good_list)
logger.info("Collected good_list from the processing function: %s", good_list)
return extract_matching_keys(technical_requirements,good_list)
else:
return final_res
if __name__ == "__main__":
file_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\45f650ce-e519-457b-9ad6-5840e2ede539\\ztbfile.pdf"
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\包头市公安支队机动车查验监管系统招标文201907.pdf"
file_type=2
output_folder = "C:\\Users\\Administrator\\Desktop\\fsdownload\\45f650ce-e519-457b-9ad6-5840e2ede539\\tmp"
res=get_technical_requirements_main(file_path,output_folder)
print(res)
res=get_technical_requirements_main(file_path,file_type,"123",output_folder)
print(json.dumps(res,ensure_ascii=False,indent=4))

View File

@ -12,8 +12,10 @@ from flask_app.general.post_processing import outer_post_processing
from flask_app.main.工程标解析main import engineering_bid_main
from flask_app.货物标.货物标解析main import goods_bid_main
from flask_app.general.纯技术参数要求提取 import get_technical_requirements_main
app = Flask(__name__)
class CSTFormatter(logging.Formatter):
"""自定义的 Formatter将日志的时间戳调整为中国标准时间UTC+8"""
@ -156,11 +158,13 @@ def validate_request(default_zb_type=1):
return jsonify({'error': 'Invalid zb_type provided'}), 400
return file_url, zb_type
#提取采购需求
# 提取采购需求
@app.route('/procurement_reqs', methods=['POST'])
def get_procurement_reqs():
logger = g.logger
output_folder=g.output_folder
output_folder = g.output_folder
unique_id=g.unique_id
file_url, zb_type = validate_request()
if isinstance(file_url, tuple): # Check if the returned value is an error response
return file_url
@ -173,8 +177,8 @@ def get_procurement_reqs():
'message': 'This endpoint only supports zb_type 2 (procurement requirements)'
}), 400
else:
final_res_path=os.path.join(output_folder,'final_result.json')
response = download_and_process_file_for_procurement(file_url)
final_res_path = os.path.join(output_folder, 'final_result.json')
response = download_and_process_file_for_procurement(file_url,unique_id)
try:
with open(final_res_path, 'w', encoding='utf-8') as json_file:
json.dump(response, json_file, ensure_ascii=False, indent=4)
@ -190,8 +194,9 @@ def get_procurement_reqs():
logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
return jsonify({'error': str(e)}), 500
#提取采购需求
def download_and_process_file_for_procurement(file_url):
# 提取采购需求
def download_and_process_file_for_procurement(file_url,unique_id):
"""
下载并处理采购需求文件
@ -211,18 +216,19 @@ def download_and_process_file_for_procurement(file_url):
logger.error("Unsupported file type or failed to download file")
return None
logger.info("Local file path: " + downloaded_filepath)
res = get_technical_requirements_main(downloaded_filepath, output_folder)
res = get_technical_requirements_main(downloaded_filepath, file_type, unique_id,output_folder)
return res
@app.route('/little_zbparse',methods=['POST'])
@app.route('/little_zbparse', methods=['POST'])
def little_zbparse():
logger=g.logger
file_url,zb_type = validate_request()
logger = g.logger
file_url, zb_type = validate_request()
if isinstance(file_url, tuple): # Check if the returned value is an error response
return file_url
try:
logger.info("starting parsing url:" + file_url)
final_json_path= download_and_process_file(file_url,zb_type)
final_json_path = download_and_process_file(file_url, zb_type)
if not final_json_path:
return jsonify({'error': 'File processing failed'}), 500
response = generate_response(final_json_path) # 先获取响应内容
@ -232,6 +238,7 @@ def little_zbparse():
logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
return jsonify({'error': str(e)}), 500
def download_and_process_file(file_url, zb_type):
"""
下载并处理文件根据zb_type选择处理函数
@ -259,6 +266,7 @@ def download_and_process_file(file_url, zb_type):
processed_file_path = little_parse_main(output_folder, downloaded_filepath, file_type, zb_type, g.unique_id)
return processed_file_path
def generate_response(final_json_path):
logger = g.logger
# 检查final_json_path是否为空或None
@ -287,16 +295,17 @@ def zbparse():
# 获取并显示接收到的 JSON 数据
received_data = request.get_json()
logger.info("Received JSON data: " + str(received_data))
file_url,zb_type = validate_request()
file_url, zb_type = validate_request()
if isinstance(file_url, tuple): # Check if the returned value is an error response
return file_url
try:
logger.info("starting parsing url:" + file_url)
return Response(stream_with_context(process_and_stream(file_url,zb_type)), content_type='text/event-stream')
return Response(stream_with_context(process_and_stream(file_url, zb_type)), content_type='text/event-stream')
except Exception as e:
logger.error('Exception occurred: ' + str(e))
return jsonify({'error': str(e)}), 500
# 分段返回
def process_and_stream(file_url, zb_type):
"""
@ -352,7 +361,7 @@ def process_and_stream(file_url, zb_type):
1: engineering_bid_main,
2: goods_bid_main
}
processing_func = processing_functions.get(zb_type, engineering_bid_main) #默认按工程标解析
processing_func = processing_functions.get(zb_type, engineering_bid_main) # 默认按工程标解析
# 从 processing_func 获取数据
for data in processing_func(output_folder, downloaded_filepath, file_type, unique_id):
@ -386,17 +395,18 @@ def process_and_stream(file_url, zb_type):
}
yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
base_end_time=time.time()
base_end_time = time.time()
logger.info(f"分段解析完成,耗时:{base_end_time - start_time:.2f}")
# **保存 combined_data 到 output_folder 下的 'final_result.json'**
output_json_path = os.path.join(output_folder, 'final_result.json')
extracted_info_path=os.path.join(output_folder, 'extracted_result.json')
extracted_info_path = os.path.join(output_folder, 'extracted_result.json')
includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"]
final_result, extracted_info,procurement_reqs = outer_post_processing(combined_data, includes,good_list)
final_result, extracted_info, procurement_reqs = outer_post_processing(combined_data, includes, good_list)
logger.info(f"Procurement requirements extracted: {json.dumps(procurement_reqs, ensure_ascii=False, indent=4)}") # 添加日志记录
#采购需求
procurement_reqs_response={
logger.info(
f"Procurement requirements extracted: {json.dumps(procurement_reqs, ensure_ascii=False, indent=4)}") # 添加日志记录
# 采购需求
procurement_reqs_response = {
'message': 'procurement_reqs',
'filename': os.path.basename(downloaded_filepath),
'data': json.dumps(procurement_reqs, ensure_ascii=False)
@ -417,7 +427,7 @@ def process_and_stream(file_url, zb_type):
except IOError as e:
logger.error(f"保存JSON文件时出错: {e}")
#提取的数据
# 提取的数据
extracted_info_response = {
'message': 'extracted_info',
'filename': os.path.basename(downloaded_filepath),

View File

@ -347,7 +347,7 @@ def truncate_pdf_main(input_path, output_folder, selection):
r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]*?(?=.*(?:磋商|谈判|评标|评定|评审))(?=.*(?:办法|方法))[\u4e00-\u9fff、()]*\s*$|\s*评标(办法|方法)前附表\s*$',
re.MULTILINE
),
re.compile(r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*[\u4e00-\u9fff]+\s*$', re.MULTILINE)
re.compile(r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$', re.MULTILINE)
)
]
output_suffix = "evaluation_method"
@ -364,11 +364,11 @@ def truncate_pdf_main(input_path, output_folder, selection):
# ),
(
re.compile(
r'^(?:附录(?:[一1])?[:]|附件(?:[一1])?[:]|附表(?:[一1])?[:]).*(?:资质|能力|信誉).*$|第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff]*资格[\u4e00-\u9fff]*\s*$',
r'^(?:附录(?:[一1])?[:]|附件(?:[一1])?[:]|附表(?:[一1])?[:]).*(?:资质|能力|信誉).*$|第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]*资格[\u4e00-\u9fff、()]*\s*$',
re.MULTILINE),
re.compile(
r'^(?:附录[一二三四五六七八九1-9]*[:]|附件[一二三四五六七八九1-9]*[:]|附表[一二三四五六七八九1-9]*[:])(?!.*(?:资质|能力|信誉)).*|'
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、]+\s*$', re.MULTILINE)
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$', re.MULTILINE)
)
]
output_suffix = "qualification"

View File

@ -158,7 +158,7 @@ def get_base_info(merged_baseinfo_path,clause_path):
# baseinfo_list.append(clean_json_string(response))
return baseinfo_list
def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath,clause_path):
def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath,clause_path,invalid_path):
baseinfo_list = []
temp_list = []
procurement_reqs = {}
@ -169,7 +169,7 @@ def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpat
# 定义一个线程函数来获取采购需求
def fetch_procurement_reqs_thread():
nonlocal procurement_reqs
procurement_reqs = fetch_procurement_reqs(procurement_path,procurement_docpath)
procurement_reqs = fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path)
# 创建并启动获取基础信息的线程
thread1 = threading.Thread(target=get_base_info_thread)
thread1.start()

View File

@ -150,13 +150,17 @@ def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_patter
def get_patterns_for_procurement():
# begin_pattern = re.compile(
# r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十1-9]+(?:章|部分).*(?:采购|需求).*',
# re.MULTILINE)
begin_pattern = re.compile(
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十百千]+(?:章|部分).*(?:采购|需求).*',
re.MULTILINE)
end_pattern = re.compile(r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+', re.MULTILINE)
# begin_pattern=re.compile(
# r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、]*?(?:服务|项目|商务|技术)\s*(办法|方法)[\u4e00-\u9fff、]*\s*$'
# )
r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*' # 匹配“第X章”或“第X部分”
r'[\u4e00-\u9fff、()]*?' # 匹配允许的字符
r'(?:(?:服务|项目|商务|技术)[\u4e00-\u9fff、()]*?要求[\u4e00-\u9fff、()]*?\s*$|' # 匹配“服务”、“项目”、“商务”或“技术”后跟“要求”
r'(?:采购|需求)[\u4e00-\u9fff、()]*?)\s*$', # 或者匹配“采购”或“需求”
re.MULTILINE
)
end_pattern = re.compile(r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$', re.MULTILINE)
return begin_pattern, end_pattern
@ -167,14 +171,14 @@ def get_patterns_for_evaluation_method():
# )
begin_pattern = re.compile(
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*' # 匹配“第X章”或“第X部分”
r'(?:[\u4e00-\u9fff、()]*?)' # 匹配允许的字符(中文、顿号、括号)
r'(?=.*(?:磋商|谈判|评标|评定|评审))' # 确保包含“磋商”、“谈判”、“评标”、“评定”或“评审”
r'(?:[\u4e00-\u9fff、()]*?)' # 匹配允许的字符(中文、顿号、括号)
r'(?=.*(?:磋商|谈判|评标|评定|评审))' # 确保包含“磋商”、“谈判”、“评标”、“评定”或“评审” 注意这里的'.*'是允许这些关键词出现在任意位置,但主体匹配部分仍然受到字符集的限制。
r'(?=.*(?:办法|方法))' # 确保包含“办法”或“方法”
r'[\u4e00-\u9fff、()]*\s*$', # 继续匹配允许的字符直到行尾
re.MULTILINE
)
end_pattern = re.compile(
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff]+\s*$', re.MULTILINE)
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$', re.MULTILINE)
return begin_pattern, end_pattern
def get_patterns_for_qualification():
@ -381,7 +385,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
re.MULTILINE
)
new_end_pattern = re.compile(
r'第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff、]+\s*$',
r'第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$',
re.MULTILINE
)
print("第三次尝试 tobidders_notice! ")
@ -446,10 +450,10 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page):
begin_pattern = re.compile(
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*(?:(?:投标人?|磋商|供应商|谈判供应商|磋商供应商)须知)+'
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*(?:(?:投标人?|磋商|供应商|谈判供应商|磋商供应商)须知)+',re.MULTILINE
)
end_pattern = re.compile(
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*([\u4e00-\u9fff]+)' # 捕获中文部分
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*([\u4e00-\u9fff]+)',re.MULTILINE # 捕获中文部分
)
exclusion_words = ["合同", "评标", "开标","评审","采购","资格"] # 在这里添加需要排除的关键词
@ -654,7 +658,7 @@ def process_input(input_path, output_folder, selection, output_suffix):
# 根据选择设置对应的模式和结束模式
if selection == 1:
begin_pattern = re.compile(r'.*(?:招标公告|磋商公告|谈判公告|邀请书|邀请函|投标邀请|磋商邀请|谈判邀请)[\)]?\s*$', re.MULTILINE)
end_pattern = re.compile(r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$', re.MULTILINE)
end_pattern = re.compile(r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$', re.MULTILINE)
local_output_suffix = "notice"
elif selection == 2:
begin_pattern = re.compile(
@ -673,7 +677,7 @@ def process_input(input_path, output_folder, selection, output_suffix):
local_output_suffix = "tobidders_notice"
elif selection == 5:
begin_pattern = re.compile(
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十百千]+(?:章|部分).*(?:采购|需求).*')
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十百千]+(?:章|部分)(?!.*说明).*(?:采购内容|采购要求|需求).*') #包头中有一章'采购相关说明'
end_pattern = re.compile(r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+')
local_output_suffix = "procurement"
@ -799,7 +803,7 @@ def truncate_pdf_specific_goods(pdf_path, output_folder, selections,unique_id="1
#ztbfile.pdf少资格评审 包头少符合性评审
if __name__ == "__main__":
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
# input_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\f8b793b5-aa60-42d3-ae59-a3f474e06610\\ztbfile.pdf"
# input_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\zbtest4_evaluation_method.pdf"
# input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output1\\2-招标文件_procurement.pdf"

View File

@ -89,9 +89,14 @@ def postprocess(data):
# 递归处理顶层数据
return {key: convert_dict(val) if isinstance(val, dict) else val for key, val in data.items()}
def get_technical_requirements(file_id):
def get_technical_requirements(file_id,invalid_path):
first_query="该文档中是否说明了采购需求,即需要采购哪些货物?如果有,请回答'',否则,回答''"
judge_res=qianwen_long(file_id,first_query)
print(judge_res)
if '' in judge_res:
file_id=upload_file(invalid_path)
user_query1 = """
这是一份货物标中采购要求部分的内容请告诉我需要采购的货物如果有采购清单请直接根据清单上的货物或系统名称给出结果若没有采购清单你要从表格中或文中摘取需要采购的系统或货物采购需求中可能包含层次关系例如采购的某大系统中可能包含几种货物那么你需要用嵌套键值对表示这种关系且不要遗漏该系统中包含的货物你的输出请以json格式返回最外层键名为'采购需求'嵌套键名为对应的系统名称或货物名称需与原文保持一致无需给出采购数量和单位如有未知内容在对应键值处填'未知'以下为示例输出
请你首先定位该采购文件中的采购清单或采购需求部分请告诉我需要采购的货物如果有采购清单请直接根据清单上的货物或系统名称给出结果若没有采购清单你要从表格中或文中摘取需要采购的系统或货物采购需求中可能包含层次关系例如采购的某大系统中可能包含几种货物那么你需要用嵌套键值对表示这种关系且不要遗漏该系统中包含的货物你的输出请以json格式返回最外层键名为'采购需求'嵌套键名为对应的系统名称或货物名称需与原文保持一致无需给出采购数量和单位如有未知内容在对应键值处填'未知'以下为示例输出
{
"采购需求": {
"门禁管理系统": {},
@ -105,9 +110,8 @@ def get_technical_requirements(file_id):
}
"""
res = qianwen_long(file_id, user_query1)
# print(res)
print(res)
cleaned_res = clean_json_string(res) #转字典
# print(res)
keys_list,good_list,no_keys_added= generate_key_paths(cleaned_res['采购需求']) # 提取需要采购的货物清单
if '采购需求' in cleaned_res:
cleaned_res['技术要求'] = cleaned_res.pop('采购需求')
@ -172,14 +176,17 @@ def test_all_files_in_folder(input_folder, output_folder):
print(f"处理文件 {file_path} 时出错: {e}")
if __name__ == "__main__":
# truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\ztbfile_procurement.docx"
# truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\招标文件(107国道).docx"
truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_procurement.docx"
output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\tmp"
file_id = upload_file(truncate_file)
res=get_technical_requirements(file_id)
# # truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\ztbfile_procurement.docx"
# # truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\招标文件(107国道).docx"
# invalid_path="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
# truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_procurement.docx"
# output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\tmp"
# file_id = upload_file(truncate_file)
invalid_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\包头市公安支队机动车查验监管系统招标文201907.pdf"
file_id="file-fe-FcOjv4FiOGjHRG1pKaFrIBeG"
res=get_technical_requirements(file_id,invalid_path)
json_string = json.dumps(res, ensure_ascii=False, indent=4)
print(json_string)
# input_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output1"
# output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output3"
# test_all_files_in_folder(input_folder, output_folder)
# # input_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output1"
# # output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output3"
# # test_all_files_in_folder(input_folder, output_folder)

View File

@ -7,7 +7,7 @@ from flask_app.货物标.商务服务其他要求提取 import get_business_requ
#获取采购清单
def fetch_procurement_reqs(procurement_path,procurement_docpath):
def fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path):
# 定义默认的 procurement_reqs 字典
DEFAULT_PROCUREMENT_REQS = {
"技术要求": "",
@ -25,8 +25,8 @@ def fetch_procurement_reqs(procurement_path,procurement_docpath):
# 使用 ThreadPoolExecutor 并行处理 get_technical_requirements 和 get_business_requirements
with concurrent.futures.ThreadPoolExecutor() as executor:
# 提交任务给线程池
future_technical = executor.submit(get_technical_requirements, file_id)
time.sleep(1) # 如果需要延迟,可以保留,否则建议移除以提高效率
future_technical = executor.submit(get_technical_requirements, file_id,invalid_path)
time.sleep(0.5)
future_business = executor.submit(get_business_requirements, procurement_path, file_id)
# 获取并行任务的结果

View File

@ -90,7 +90,7 @@ def fetch_project_basic_info(invalid_path,invalid_docpath, merged_baseinfo_path,
merged_baseinfo_path = invalid_path
if not procurement_docpath:
procurement_docpath=invalid_docpath
basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath, clause_path)
basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath, clause_path,invalid_path)
base_info, good_list = post_process_baseinfo(basic_res)
end_time = time.time()
logger.info(f"基础信息 done耗时{end_time - start_time:.2f}")