zbparse/flask_app/main/资格审查模块.py

152 lines
8.4 KiB
Python
Raw Normal View History

2024-10-21 17:31:48 +08:00
import json
2024-11-11 17:12:38 +08:00
import os.path
2024-10-21 17:31:48 +08:00
import time
2024-08-29 16:37:09 +08:00
2024-11-11 17:12:38 +08:00
from flask_app.general.format_change import pdf2docx
from flask_app.general.json_utils import extract_content_from_json, clean_json_string
from flask_app.general.table_content_extraction import extract_tables_main
2024-08-29 17:30:49 +08:00
from flask_app.main.形式响应评审 import process_reviews
2024-10-21 17:31:48 +08:00
from flask_app.main.资格评审 import process_qualification
2024-10-22 10:06:22 +08:00
from flask_app.general.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor
2024-11-11 17:12:38 +08:00
from flask_app.货物标.资格审查main import combine_qualification_review
from flask_app.general.merge_pdfs import merge_pdfs
def process_notice(notice_path):
print("call notice_path")
try:
# 上传通知文件并获取文件ID
file_id1 = upload_file(notice_path)
2024-08-29 16:37:09 +08:00
2024-11-11 17:12:38 +08:00
# 定义用户查询,提取申请人资格要求
user_query1 = """
第一章招标公告投标邀请书中说明的申请人资格要求是怎样的请以json格式给出回答外键为'申请人资格要求'键值为字符串列表其中每个字符串对应原文中的一条要求你的回答与原文内容一致不要擅自总结删减输出格式示例如下
{
"申请人资格要求":[
"1.满足《中华人民共和国政府采购法》第二十二条规定;",
"1.1 法人或者其他组织的营业执照等证明文件,如供应商是自然人的提供身份证明材料;",
"2.未被列入“信用中国”网站(www.creditchina.gov.cn)信用服务栏失信被执行人、重大税收违法案件当事人名单;"
]
}
"""
# 执行查询并清洗结果
res1 = clean_json_string(qianwen_long(file_id1, user_query1))
# 提取申请人资格要求
requirements = res1.get("申请人资格要求", "未找到相关内容")
return {"申请人资格要求": requirements}
except Exception as e:
print(f"处理申请人资格要求时出错: {e}")
return {"申请人资格要求": "处理失败"}
2024-11-15 09:23:26 +08:00
#TODO:目前有个问题如果evaluation_method既没有符合性审查又没有形式评审就有问题
2024-11-11 17:12:38 +08:00
def combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
invalid_path, merged_baseinfo_path,notice_path):
2024-11-11 17:12:38 +08:00
"""
结合评审标准包括形式评审响应评审资格评审及申请人资格要求
参数:
evaluation_method (str): 评标办法文件路径
qualification (str): 资格文件路径
output_folder (str): 输出文件夹路径
tobidders_notice_table (str): JSON截断路径
clause_path (str): 条款路径
invalid_path (str): 无效文件路径
merged_baseinfo_path (str): 合并基础信息路径
notice_path (str): 通知文件路径
返回:
dict: 包含资格审查和申请人资格要求的合并结果
"""
# 上传评标办法前附表并获取文件ID
file_id = upload_file(evaluation_method) # 评标办法前附表
first_query="""
该文档的评标办法章节中是否说明了符合性审查标准说明了就回答''否则回答''请以json格式给我返回结果键名分别是'符合性审查'键值仅限于''''注意它与形式响应性评审是对立的也就是说只要文档中描述了形式响应性评审那么符合性审查的键值一定是''以下为输出示例
2024-11-11 17:12:38 +08:00
{
"符合性审查":""
}
"""
first_res=clean_json_string(qianwen_long(file_id,first_query))
if first_res.get("符合性审查") == "":
print("new 资格审查")
paths=[qualification_path,evaluation_method]
output_path=os.path.join(output_folder,"merged_qualification.pdf")
merge_pdfs(paths,output_path)
final_result=combine_qualification_review(invalid_path,output_path,merged_baseinfo_path)
else:
tobidders_notice_table_docx = pdf2docx(tobidders_notice_table) # 投标人须知前附表转docx
truncate_jsonpath = extract_tables_main(tobidders_notice_table_docx, output_folder) # 投标人须知前附表docx->json
# 定义用户查询,提取形式评审标准、响应性评审标准和资格评审标准
user_query_1 = """
根据该文档中的评标办法前附表请你列出该文件中的形式评审标准和响应性评审标准和资格评审标准请以json格式返回外层键名为'形式评审标准''响应性评审标准''资格评审标准',嵌套键名为'评审因素'中的内容相应的键值为对应'评审标准'中的内容
"""
# 执行查询并提取内容
results = qianwen_long(file_id, user_query_1)
original_dict_data = extract_content_from_json(results)
# 提取资格评审标准
qualification_review = original_dict_data.pop('资格评审标准', {}) # qianwen_long有关资格评审的内容
# 初始化 ThreadPoolExecutor设定最多三个线程以处理三个任务
with ThreadPoolExecutor(max_workers=3) as executor:
# 提交任务并建立任务名到Future的映射
futures = {
"资格审查": executor.submit(
process_qualification,
qualification_review,
qualification_path,
invalid_path,
merged_baseinfo_path
),
"形式及响应性审查": executor.submit(
process_reviews,
original_dict_data,
output_folder,
truncate_jsonpath,
clause_path
),
"申请人资格要求": executor.submit(
process_notice,
notice_path
2024-11-11 17:12:38 +08:00
)
}
# 定义所需的顺序
desired_order = ["申请人资格要求", "资格审查", "形式及响应性审查"]
# 初始化结果字典
combined_results = {}
# 按指定顺序收集结果
for key in desired_order:
future = futures.get(key)
if future:
try:
result = future.result()
if isinstance(result, dict):
combined_results.update(result)
else:
combined_results[key] = result
except Exception as e:
print(f"处理 '{key}' 时出错: {e}")
combined_results[key] = "处理失败"
else:
combined_results[key] = "未提交任务"
# 将各部分结果合并到最终的资格审查字典中
final_result = {"资格审查": combined_results}
return final_result
2024-08-29 16:37:09 +08:00
if __name__ == "__main__":
2024-11-11 17:12:38 +08:00
start_time = time.time()
evaluation_method = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_evaluation_method.pdf"
qualification_path=r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_qualification.pdf"
output_folder=r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782"
notice_path=r'C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_notice.pdf'
2024-10-21 17:31:48 +08:00
# knowledge_name="zbtest20"
clause_path = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\clause1.json"
tobidders_notice_table = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_tobidders_notice_table.pdf"
2024-11-11 17:12:38 +08:00
invalid_path = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_invalid.pdf"
merged_baseinfo_path = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_merged_baseinfo.pdf"
2024-11-11 17:12:38 +08:00
res = combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
invalid_path, merged_baseinfo_path,notice_path)
2024-11-11 17:12:38 +08:00
print(json.dumps(res, ensure_ascii=False, indent=4))
end_time = time.time()
print("elapsed time:" + str(end_time - start_time))