zbparse/flask_app/货物标/基础信息解析main.py
2024-10-14 17:13:11 +08:00

123 lines
5.1 KiB
Python

import json
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
from flask_app.main.通义千问long import upload_file
def dynamic_key_handling(key_groups, detected_keys):
# 检查和调整键组配置
for key in detected_keys:
if "投标保证金" in key or "履约保证金" in key:
key_groups["保证金相关"].append(key)
elif "是否接受联合体" in key:
key_groups["项目信息"].append(key)
elif "联合体投标要求" in key:
key_groups["项目信息"].append(key)
elif "分包" in key:
key_groups["项目信息"].append(key)
elif "踏勘现场" in key:
key_groups["其他信息"].append(key)
elif "投标预备会" in key:
key_groups["其他信息"].append(key)
elif "偏离" in key:
key_groups["其他信息"].append(key)
def aggregate_basic_info(baseinfo_list):
"""
将基础信息列表中的数据进行合并和分类。
参数:
- baseinfo_list (list): 包含多个基础信息的列表。
返回:
- list: 合并和分类后的基础信息列表。
"""
combined_baseinfo_list = []
key_groups = {
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
"项目信息": ["工程名称", "招标编号", "工程概况", "招标范围", "招标控制价", "投标竞争下浮率"],
"关键时间/内容": [
"投标文件递交截止日期",
"递交方式",
"投标人要求澄清招标文件的截止时间",
"投标有效期",
"评标结果公示媒介"
],
"保证金相关": ["质量保证金", "退还投标保证金"],
"其他信息": [
"重新招标、不再招标和终止招标",
"是否退还投标文件",
"费用承担"
]
}
combined_data = {}
relevant_keys_detected = set()
# 合并所有基础信息并收集相关键
for baseinfo in baseinfo_list:
json_data = clean_json_string(baseinfo)
combined_data.update(json_data)
relevant_keys_detected.update(json_data.keys())
# 动态调整键组
dynamic_key_handling(key_groups, relevant_keys_detected)
# 按键组分类并嵌套
for group_name, keys in key_groups.items():
group_data = {key: combined_data.get(key, "未提供") for key in keys}
combined_json = nest_json_under_key(group_data, group_name)
combined_baseinfo_list.append(combined_json)
return combined_baseinfo_list
def combine_basic_info(knowledge_name,output_folder,clause_path):
# file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件(1)\\6.2定版视频会议磋商文件_1-21.pdf"
# file_id = upload_file(file_path)
# baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt'
# questions=read_questions_from_file(baseinfo_file_path)
# results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
# if not results:
# print("errror!")
# else:
# # 打印结果
# for question, response in results:
# print(f"Question: {question}")
# print(f"Response: {response}")
baseinfo_combined_res={
"招标人/代理信息": {
"招标人": "黄石临空建设管理有限公司",
"招标人联系方式": {
"名称": "黄石临空建设管理有限公司",
"地址": "大冶市还地桥镇",
"联系人": "王先生",
"电话": "13545510946",
"传真": "未知",
"电子邮件": "未知",
"网址": "未知",
"开户银行": "未知",
"账号": "未知"
},
"招标代理机构": "湖北民成工程项目管理有限公司",
"招标代理机构联系方式": {
"名称": "湖北民成工程项目管理有限公司",
"地址": "大冶市港湖还建楼 20栋二单元 102室",
"联系人": "尹工",
"电话": "18327823905",
"传真": "未知",
"电子邮件": "未知",
"网址": "未知",
"开户银行": "未知",
"账号": "未知"
}
}
}
return {"基础信息":baseinfo_combined_res}
if __name__ == "__main__":
knowledge_name = "ztb"
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
res=combine_basic_info(knowledge_name,output_folder,clause_path)
print(json.dumps(res,ensure_ascii=False,indent=4))