11.6修复bug
This commit is contained in:
parent
d4d1a14c06
commit
3bd548ea81
@ -1,8 +1,19 @@
|
||||
from flask_app.general.format_change import pdf2docx
|
||||
import json
|
||||
import logging
|
||||
|
||||
from flask_app.general.format_change import pdf2docx, docx2pdf
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.货物标.截取pdf货物标版 import truncate_pdf_main
|
||||
from flask_app.货物标.技术参数要求提取 import get_technical_requirements
|
||||
|
||||
def get_global_logger(unique_id):
|
||||
if unique_id is None:
|
||||
return logging.getLogger() # 获取默认的日志器
|
||||
logger = logging.getLogger(unique_id)
|
||||
return logger
|
||||
|
||||
|
||||
logger = None
|
||||
|
||||
def extract_matching_keys(data_dict, good_list):
|
||||
"""
|
||||
@ -31,23 +42,38 @@ def extract_matching_keys(data_dict, good_list):
|
||||
recurse(data_dict)
|
||||
return result
|
||||
|
||||
def get_technical_requirements_main(file_path,output_folder):
|
||||
truncate_file=truncate_pdf_main(file_path,output_folder,5)[0]
|
||||
def get_technical_requirements_main(file_path,file_type,unique_id,output_folder):
|
||||
global logger
|
||||
logger = get_global_logger(unique_id)
|
||||
if file_type == 1: # docx
|
||||
docx_path = file_path
|
||||
pdf_path = docx2pdf(docx_path) # 将docx转换为pdf以供后续处理
|
||||
elif file_type == 2: # pdf
|
||||
pdf_path = file_path
|
||||
elif file_type == 3: # doc
|
||||
pdf_path = docx2pdf(file_path)
|
||||
else:
|
||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||
return None
|
||||
truncate_file=truncate_pdf_main(pdf_path,output_folder,5)[0]
|
||||
if not truncate_file:
|
||||
truncate_file=file_path #直接传整份文件
|
||||
truncate_file=pdf_path #直接传整份文件
|
||||
truncate_file_docx=pdf2docx(truncate_file)
|
||||
file_id=upload_file(truncate_file_docx)
|
||||
# file_id=upload_file(truncate_file)
|
||||
final_res=get_technical_requirements(file_id)
|
||||
final_res=get_technical_requirements(file_id,pdf_path)
|
||||
# 安全地提取 "技术要求" 内部的字典内容
|
||||
if isinstance(final_res, dict) and '技术要求' in final_res and isinstance(final_res['技术要求'], dict):
|
||||
technical_requirements = final_res['技术要求']
|
||||
good_list = technical_requirements.pop('货物列表', []) # 如果 '货物列表' 不存在,返回 []
|
||||
print(good_list)
|
||||
logger.info("Collected good_list from the processing function: %s", good_list)
|
||||
return extract_matching_keys(technical_requirements,good_list)
|
||||
else:
|
||||
return final_res
|
||||
if __name__ == "__main__":
|
||||
file_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\45f650ce-e519-457b-9ad6-5840e2ede539\\ztbfile.pdf"
|
||||
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\包头市公安支队机动车查验监管系统招标文201907.pdf"
|
||||
file_type=2
|
||||
output_folder = "C:\\Users\\Administrator\\Desktop\\fsdownload\\45f650ce-e519-457b-9ad6-5840e2ede539\\tmp"
|
||||
res=get_technical_requirements_main(file_path,output_folder)
|
||||
print(res)
|
||||
res=get_technical_requirements_main(file_path,file_type,"123",output_folder)
|
||||
print(json.dumps(res,ensure_ascii=False,indent=4))
|
||||
|
@ -12,8 +12,10 @@ from flask_app.general.post_processing import outer_post_processing
|
||||
from flask_app.main.工程标解析main import engineering_bid_main
|
||||
from flask_app.货物标.货物标解析main import goods_bid_main
|
||||
from flask_app.general.纯技术参数要求提取 import get_technical_requirements_main
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
class CSTFormatter(logging.Formatter):
|
||||
"""自定义的 Formatter,将日志的时间戳调整为中国标准时间(UTC+8)"""
|
||||
|
||||
@ -156,11 +158,13 @@ def validate_request(default_zb_type=1):
|
||||
return jsonify({'error': 'Invalid zb_type provided'}), 400
|
||||
return file_url, zb_type
|
||||
|
||||
#提取采购需求
|
||||
|
||||
# 提取采购需求
|
||||
@app.route('/procurement_reqs', methods=['POST'])
|
||||
def get_procurement_reqs():
|
||||
logger = g.logger
|
||||
output_folder=g.output_folder
|
||||
output_folder = g.output_folder
|
||||
unique_id=g.unique_id
|
||||
file_url, zb_type = validate_request()
|
||||
if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||
return file_url
|
||||
@ -173,8 +177,8 @@ def get_procurement_reqs():
|
||||
'message': 'This endpoint only supports zb_type 2 (procurement requirements)'
|
||||
}), 400
|
||||
else:
|
||||
final_res_path=os.path.join(output_folder,'final_result.json')
|
||||
response = download_and_process_file_for_procurement(file_url)
|
||||
final_res_path = os.path.join(output_folder, 'final_result.json')
|
||||
response = download_and_process_file_for_procurement(file_url,unique_id)
|
||||
try:
|
||||
with open(final_res_path, 'w', encoding='utf-8') as json_file:
|
||||
json.dump(response, json_file, ensure_ascii=False, indent=4)
|
||||
@ -190,8 +194,9 @@ def get_procurement_reqs():
|
||||
logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
#提取采购需求
|
||||
def download_and_process_file_for_procurement(file_url):
|
||||
|
||||
# 提取采购需求
|
||||
def download_and_process_file_for_procurement(file_url,unique_id):
|
||||
"""
|
||||
下载并处理采购需求文件。
|
||||
|
||||
@ -211,18 +216,19 @@ def download_and_process_file_for_procurement(file_url):
|
||||
logger.error("Unsupported file type or failed to download file")
|
||||
return None
|
||||
logger.info("Local file path: " + downloaded_filepath)
|
||||
res = get_technical_requirements_main(downloaded_filepath, output_folder)
|
||||
res = get_technical_requirements_main(downloaded_filepath, file_type, unique_id,output_folder)
|
||||
return res
|
||||
|
||||
@app.route('/little_zbparse',methods=['POST'])
|
||||
|
||||
@app.route('/little_zbparse', methods=['POST'])
|
||||
def little_zbparse():
|
||||
logger=g.logger
|
||||
file_url,zb_type = validate_request()
|
||||
logger = g.logger
|
||||
file_url, zb_type = validate_request()
|
||||
if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||
return file_url
|
||||
try:
|
||||
logger.info("starting parsing url:" + file_url)
|
||||
final_json_path= download_and_process_file(file_url,zb_type)
|
||||
final_json_path = download_and_process_file(file_url, zb_type)
|
||||
if not final_json_path:
|
||||
return jsonify({'error': 'File processing failed'}), 500
|
||||
response = generate_response(final_json_path) # 先获取响应内容
|
||||
@ -232,6 +238,7 @@ def little_zbparse():
|
||||
logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
def download_and_process_file(file_url, zb_type):
|
||||
"""
|
||||
下载并处理文件,根据zb_type选择处理函数。
|
||||
@ -259,6 +266,7 @@ def download_and_process_file(file_url, zb_type):
|
||||
processed_file_path = little_parse_main(output_folder, downloaded_filepath, file_type, zb_type, g.unique_id)
|
||||
return processed_file_path
|
||||
|
||||
|
||||
def generate_response(final_json_path):
|
||||
logger = g.logger
|
||||
# 检查final_json_path是否为空或None
|
||||
@ -287,16 +295,17 @@ def zbparse():
|
||||
# 获取并显示接收到的 JSON 数据
|
||||
received_data = request.get_json()
|
||||
logger.info("Received JSON data: " + str(received_data))
|
||||
file_url,zb_type = validate_request()
|
||||
file_url, zb_type = validate_request()
|
||||
if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||
return file_url
|
||||
try:
|
||||
logger.info("starting parsing url:" + file_url)
|
||||
return Response(stream_with_context(process_and_stream(file_url,zb_type)), content_type='text/event-stream')
|
||||
return Response(stream_with_context(process_and_stream(file_url, zb_type)), content_type='text/event-stream')
|
||||
except Exception as e:
|
||||
logger.error('Exception occurred: ' + str(e))
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
# 分段返回
|
||||
def process_and_stream(file_url, zb_type):
|
||||
"""
|
||||
@ -352,7 +361,7 @@ def process_and_stream(file_url, zb_type):
|
||||
1: engineering_bid_main,
|
||||
2: goods_bid_main
|
||||
}
|
||||
processing_func = processing_functions.get(zb_type, engineering_bid_main) #默认按工程标解析
|
||||
processing_func = processing_functions.get(zb_type, engineering_bid_main) # 默认按工程标解析
|
||||
|
||||
# 从 processing_func 获取数据
|
||||
for data in processing_func(output_folder, downloaded_filepath, file_type, unique_id):
|
||||
@ -386,17 +395,18 @@ def process_and_stream(file_url, zb_type):
|
||||
}
|
||||
yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
|
||||
|
||||
base_end_time=time.time()
|
||||
base_end_time = time.time()
|
||||
logger.info(f"分段解析完成,耗时:{base_end_time - start_time:.2f} 秒")
|
||||
# **保存 combined_data 到 output_folder 下的 'final_result.json'**
|
||||
output_json_path = os.path.join(output_folder, 'final_result.json')
|
||||
extracted_info_path=os.path.join(output_folder, 'extracted_result.json')
|
||||
extracted_info_path = os.path.join(output_folder, 'extracted_result.json')
|
||||
includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"]
|
||||
final_result, extracted_info,procurement_reqs = outer_post_processing(combined_data, includes,good_list)
|
||||
final_result, extracted_info, procurement_reqs = outer_post_processing(combined_data, includes, good_list)
|
||||
|
||||
logger.info(f"Procurement requirements extracted: {json.dumps(procurement_reqs, ensure_ascii=False, indent=4)}") # 添加日志记录
|
||||
#采购需求
|
||||
procurement_reqs_response={
|
||||
logger.info(
|
||||
f"Procurement requirements extracted: {json.dumps(procurement_reqs, ensure_ascii=False, indent=4)}") # 添加日志记录
|
||||
# 采购需求
|
||||
procurement_reqs_response = {
|
||||
'message': 'procurement_reqs',
|
||||
'filename': os.path.basename(downloaded_filepath),
|
||||
'data': json.dumps(procurement_reqs, ensure_ascii=False)
|
||||
@ -417,7 +427,7 @@ def process_and_stream(file_url, zb_type):
|
||||
except IOError as e:
|
||||
logger.error(f"保存JSON文件时出错: {e}")
|
||||
|
||||
#提取的数据
|
||||
# 提取的数据
|
||||
extracted_info_response = {
|
||||
'message': 'extracted_info',
|
||||
'filename': os.path.basename(downloaded_filepath),
|
||||
|
@ -347,7 +347,7 @@ def truncate_pdf_main(input_path, output_folder, selection):
|
||||
r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()()]*?(?=.*(?:磋商|谈判|评标|评定|评审))(?=.*(?:办法|方法))[\u4e00-\u9fff、()()]*\s*$|\s*评标(办法|方法)前附表\s*$',
|
||||
re.MULTILINE
|
||||
),
|
||||
re.compile(r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*[\u4e00-\u9fff]+\s*$', re.MULTILINE)
|
||||
re.compile(r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()()]+\s*$', re.MULTILINE)
|
||||
)
|
||||
]
|
||||
output_suffix = "evaluation_method"
|
||||
@ -364,11 +364,11 @@ def truncate_pdf_main(input_path, output_folder, selection):
|
||||
# ),
|
||||
(
|
||||
re.compile(
|
||||
r'^(?:附录(?:[一1])?[::]|附件(?:[一1])?[::]|附表(?:[一1])?[::]).*(?:资质|能力|信誉).*$|第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff]*资格[\u4e00-\u9fff]*\s*$',
|
||||
r'^(?:附录(?:[一1])?[::]|附件(?:[一1])?[::]|附表(?:[一1])?[::]).*(?:资质|能力|信誉).*$|第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()()]*资格[\u4e00-\u9fff、()()]*\s*$',
|
||||
re.MULTILINE),
|
||||
re.compile(
|
||||
r'^(?:附录[一二三四五六七八九1-9]*[::]|附件[一二三四五六七八九1-9]*[::]|附表[一二三四五六七八九1-9]*[::])(?!.*(?:资质|能力|信誉)).*|'
|
||||
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、]+\s*$', re.MULTILINE)
|
||||
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()()]+\s*$', re.MULTILINE)
|
||||
)
|
||||
]
|
||||
output_suffix = "qualification"
|
||||
|
@ -158,7 +158,7 @@ def get_base_info(merged_baseinfo_path,clause_path):
|
||||
# baseinfo_list.append(clean_json_string(response))
|
||||
return baseinfo_list
|
||||
|
||||
def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath,clause_path):
|
||||
def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath,clause_path,invalid_path):
|
||||
baseinfo_list = []
|
||||
temp_list = []
|
||||
procurement_reqs = {}
|
||||
@ -169,7 +169,7 @@ def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpat
|
||||
# 定义一个线程函数来获取采购需求
|
||||
def fetch_procurement_reqs_thread():
|
||||
nonlocal procurement_reqs
|
||||
procurement_reqs = fetch_procurement_reqs(procurement_path,procurement_docpath)
|
||||
procurement_reqs = fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path)
|
||||
# 创建并启动获取基础信息的线程
|
||||
thread1 = threading.Thread(target=get_base_info_thread)
|
||||
thread1.start()
|
||||
|
@ -150,13 +150,17 @@ def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_patter
|
||||
|
||||
|
||||
def get_patterns_for_procurement():
|
||||
# begin_pattern = re.compile(
|
||||
# r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十1-9]+(?:章|部分).*(?:采购|需求).*',
|
||||
# re.MULTILINE)
|
||||
begin_pattern = re.compile(
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十百千]+(?:章|部分).*(?:采购|需求).*',
|
||||
re.MULTILINE)
|
||||
end_pattern = re.compile(r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+', re.MULTILINE)
|
||||
# begin_pattern=re.compile(
|
||||
# r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、]*?(?:服务|项目|商务|技术)\s*(办法|方法)[\u4e00-\u9fff、]*\s*$'
|
||||
# )
|
||||
r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*' # 匹配“第X章”或“第X部分”
|
||||
r'[\u4e00-\u9fff、()()]*?' # 匹配允许的字符
|
||||
r'(?:(?:服务|项目|商务|技术)[\u4e00-\u9fff、()()]*?要求[\u4e00-\u9fff、()()]*?\s*$|' # 匹配“服务”、“项目”、“商务”或“技术”后跟“要求”
|
||||
r'(?:采购|需求)[\u4e00-\u9fff、()()]*?)\s*$', # 或者匹配“采购”或“需求”
|
||||
re.MULTILINE
|
||||
)
|
||||
end_pattern = re.compile(r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()()]+\s*$', re.MULTILINE)
|
||||
return begin_pattern, end_pattern
|
||||
|
||||
|
||||
@ -167,14 +171,14 @@ def get_patterns_for_evaluation_method():
|
||||
# )
|
||||
begin_pattern = re.compile(
|
||||
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*' # 匹配“第X章”或“第X部分”
|
||||
r'(?:[\u4e00-\u9fff、()()]*?)' # 匹配允许的字符(中文、顿号、括号)
|
||||
r'(?=.*(?:磋商|谈判|评标|评定|评审))' # 确保包含“磋商”、“谈判”、“评标”、“评定”或“评审”
|
||||
r'(?:[\u4e00-\u9fff、()()]*?)' # 匹配允许的字符(中文、顿号、括号)
|
||||
r'(?=.*(?:磋商|谈判|评标|评定|评审))' # 确保包含“磋商”、“谈判”、“评标”、“评定”或“评审” 注意这里的'.*'是允许这些关键词出现在任意位置,但主体匹配部分仍然受到字符集的限制。
|
||||
r'(?=.*(?:办法|方法))' # 确保包含“办法”或“方法”
|
||||
r'[\u4e00-\u9fff、()()]*\s*$', # 继续匹配允许的字符直到行尾
|
||||
re.MULTILINE
|
||||
)
|
||||
end_pattern = re.compile(
|
||||
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff]+\s*$', re.MULTILINE)
|
||||
r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()()]+\s*$', re.MULTILINE)
|
||||
return begin_pattern, end_pattern
|
||||
|
||||
def get_patterns_for_qualification():
|
||||
@ -381,7 +385,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
||||
re.MULTILINE
|
||||
)
|
||||
new_end_pattern = re.compile(
|
||||
r'第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff、]+\s*$',
|
||||
r'第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff、()()]+\s*$',
|
||||
re.MULTILINE
|
||||
)
|
||||
print("第三次尝试 tobidders_notice! ")
|
||||
@ -446,10 +450,10 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
||||
|
||||
def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page):
|
||||
begin_pattern = re.compile(
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*(?:(?:投标人?|磋商|供应商|谈判供应商|磋商供应商)须知)+'
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*(?:(?:投标人?|磋商|供应商|谈判供应商|磋商供应商)须知)+',re.MULTILINE
|
||||
)
|
||||
end_pattern = re.compile(
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*([\u4e00-\u9fff]+)' # 捕获中文部分
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*([\u4e00-\u9fff]+)',re.MULTILINE # 捕获中文部分
|
||||
)
|
||||
exclusion_words = ["合同", "评标", "开标","评审","采购","资格"] # 在这里添加需要排除的关键词
|
||||
|
||||
@ -654,7 +658,7 @@ def process_input(input_path, output_folder, selection, output_suffix):
|
||||
# 根据选择设置对应的模式和结束模式
|
||||
if selection == 1:
|
||||
begin_pattern = re.compile(r'.*(?:招标公告|磋商公告|谈判公告|邀请书|邀请函|投标邀请|磋商邀请|谈判邀请)[\))]?\s*$', re.MULTILINE)
|
||||
end_pattern = re.compile(r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()]+\s*$', re.MULTILINE)
|
||||
end_pattern = re.compile(r'第[一二三四五六七八九1-9]+(?:章|部分)\s*[\u4e00-\u9fff、()()]+\s*$', re.MULTILINE)
|
||||
local_output_suffix = "notice"
|
||||
elif selection == 2:
|
||||
begin_pattern = re.compile(
|
||||
@ -673,7 +677,7 @@ def process_input(input_path, output_folder, selection, output_suffix):
|
||||
local_output_suffix = "tobidders_notice"
|
||||
elif selection == 5:
|
||||
begin_pattern = re.compile(
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十百千]+(?:章|部分).*(?:采购|需求).*')
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术).*?要求|^第[一二三四五六七八九十百千]+(?:章|部分)(?!.*说明).*(?:采购内容|采购要求|需求).*') #包头中有一章'采购相关说明'
|
||||
end_pattern = re.compile(r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+')
|
||||
local_output_suffix = "procurement"
|
||||
|
||||
@ -799,7 +803,7 @@ def truncate_pdf_specific_goods(pdf_path, output_folder, selections,unique_id="1
|
||||
|
||||
#ztbfile.pdf少资格评审 包头少符合性评审
|
||||
if __name__ == "__main__":
|
||||
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
|
||||
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
|
||||
# input_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\f8b793b5-aa60-42d3-ae59-a3f474e06610\\ztbfile.pdf"
|
||||
# input_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\zbtest4_evaluation_method.pdf"
|
||||
# input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output1\\2-招标文件_procurement.pdf"
|
||||
|
@ -89,9 +89,14 @@ def postprocess(data):
|
||||
# 递归处理顶层数据
|
||||
return {key: convert_dict(val) if isinstance(val, dict) else val for key, val in data.items()}
|
||||
|
||||
def get_technical_requirements(file_id):
|
||||
def get_technical_requirements(file_id,invalid_path):
|
||||
first_query="该文档中是否说明了采购需求,即需要采购哪些货物?如果有,请回答'是',否则,回答'否'"
|
||||
judge_res=qianwen_long(file_id,first_query)
|
||||
print(judge_res)
|
||||
if '否' in judge_res:
|
||||
file_id=upload_file(invalid_path)
|
||||
user_query1 = """
|
||||
这是一份货物标中采购要求部分的内容,请告诉我需要采购的货物,如果有采购清单,请直接根据清单上的货物(或系统)名称给出结果,若没有采购清单,你要从表格中或文中摘取需要采购的系统(或货物),采购需求中可能包含层次关系,例如采购的某大系统中可能包含几种货物,那么你需要用嵌套键值对表示这种关系,且不要遗漏该系统中包含的货物,你的输出请以json格式返回,最外层键名为'采购需求',嵌套键名为对应的系统名称或货物名称,需与原文保持一致,无需给出采购数量和单位,如有未知内容,在对应键值处填'未知'。以下为示例输出:
|
||||
请你首先定位该采购文件中的采购清单或采购需求部分,请告诉我需要采购的货物,如果有采购清单,请直接根据清单上的货物(或系统)名称给出结果,若没有采购清单,你要从表格中或文中摘取需要采购的系统(或货物),采购需求中可能包含层次关系,例如采购的某大系统中可能包含几种货物,那么你需要用嵌套键值对表示这种关系,且不要遗漏该系统中包含的货物,你的输出请以json格式返回,最外层键名为'采购需求',嵌套键名为对应的系统名称或货物名称,需与原文保持一致,无需给出采购数量和单位,如有未知内容,在对应键值处填'未知'。以下为示例输出:
|
||||
{
|
||||
"采购需求": {
|
||||
"门禁管理系统": {},
|
||||
@ -105,9 +110,8 @@ def get_technical_requirements(file_id):
|
||||
}
|
||||
"""
|
||||
res = qianwen_long(file_id, user_query1)
|
||||
# print(res)
|
||||
print(res)
|
||||
cleaned_res = clean_json_string(res) #转字典
|
||||
# print(res)
|
||||
keys_list,good_list,no_keys_added= generate_key_paths(cleaned_res['采购需求']) # 提取需要采购的货物清单
|
||||
if '采购需求' in cleaned_res:
|
||||
cleaned_res['技术要求'] = cleaned_res.pop('采购需求')
|
||||
@ -172,14 +176,17 @@ def test_all_files_in_folder(input_folder, output_folder):
|
||||
print(f"处理文件 {file_path} 时出错: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\ztbfile_procurement.docx"
|
||||
# truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\招标文件(107国道).docx"
|
||||
truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_procurement.docx"
|
||||
output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\tmp"
|
||||
file_id = upload_file(truncate_file)
|
||||
res=get_technical_requirements(file_id)
|
||||
# # truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\ztbfile_procurement.docx"
|
||||
# # truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\招标文件(107国道).docx"
|
||||
# invalid_path="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
|
||||
# truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_procurement.docx"
|
||||
# output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\tmp"
|
||||
# file_id = upload_file(truncate_file)
|
||||
invalid_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\包头市公安支队机动车查验监管系统招标文201907.pdf"
|
||||
file_id="file-fe-FcOjv4FiOGjHRG1pKaFrIBeG"
|
||||
res=get_technical_requirements(file_id,invalid_path)
|
||||
json_string = json.dumps(res, ensure_ascii=False, indent=4)
|
||||
print(json_string)
|
||||
# input_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output1"
|
||||
# output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output3"
|
||||
# test_all_files_in_folder(input_folder, output_folder)
|
||||
# # input_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output1"
|
||||
# # output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output3"
|
||||
# # test_all_files_in_folder(input_folder, output_folder)
|
@ -7,7 +7,7 @@ from flask_app.货物标.商务服务其他要求提取 import get_business_requ
|
||||
|
||||
|
||||
#获取采购清单
|
||||
def fetch_procurement_reqs(procurement_path,procurement_docpath):
|
||||
def fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path):
|
||||
# 定义默认的 procurement_reqs 字典
|
||||
DEFAULT_PROCUREMENT_REQS = {
|
||||
"技术要求": "",
|
||||
@ -25,8 +25,8 @@ def fetch_procurement_reqs(procurement_path,procurement_docpath):
|
||||
# 使用 ThreadPoolExecutor 并行处理 get_technical_requirements 和 get_business_requirements
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
# 提交任务给线程池
|
||||
future_technical = executor.submit(get_technical_requirements, file_id)
|
||||
time.sleep(1) # 如果需要延迟,可以保留,否则建议移除以提高效率
|
||||
future_technical = executor.submit(get_technical_requirements, file_id,invalid_path)
|
||||
time.sleep(0.5)
|
||||
future_business = executor.submit(get_business_requirements, procurement_path, file_id)
|
||||
|
||||
# 获取并行任务的结果
|
||||
|
@ -90,7 +90,7 @@ def fetch_project_basic_info(invalid_path,invalid_docpath, merged_baseinfo_path,
|
||||
merged_baseinfo_path = invalid_path
|
||||
if not procurement_docpath:
|
||||
procurement_docpath=invalid_docpath
|
||||
basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath, clause_path)
|
||||
basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath, clause_path,invalid_path)
|
||||
base_info, good_list = post_process_baseinfo(basic_res)
|
||||
end_time = time.time()
|
||||
logger.info(f"基础信息 done,耗时:{end_time - start_time:.2f} 秒")
|
||||
|
Loading…
x
Reference in New Issue
Block a user