10.18小解析
This commit is contained in:
parent
c50884f9ec
commit
d06d6a145f
@ -46,8 +46,8 @@ def little_parse_goods(output_folder, file_path):
|
|||||||
# 上传文件并获取文件 ID
|
# 上传文件并获取文件 ID
|
||||||
file_id = upload_file(baseinfo_file_path)
|
file_id = upload_file(baseinfo_file_path)
|
||||||
# 注意:以下路径被硬编码,确保该路径存在并且正确
|
# 注意:以下路径被硬编码,确保该路径存在并且正确
|
||||||
# baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt'
|
baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt'
|
||||||
baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt'
|
# baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt'
|
||||||
# 从提示词文件中读取问题
|
# 从提示词文件中读取问题
|
||||||
questions = read_questions_from_file(baseinfo_prompt_file_path)
|
questions = read_questions_from_file(baseinfo_prompt_file_path)
|
||||||
# 多线程处理问题,使用指定的处理模式(2 代表使用 qianwen-long)
|
# 多线程处理问题,使用指定的处理模式(2 代表使用 qianwen-long)
|
||||||
|
172
flask_app/main/基础信息整合快速版.py
Normal file
172
flask_app/main/基础信息整合快速版.py
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key
|
||||||
|
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||||
|
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
|
||||||
|
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||||
|
from flask_app.main.通义千问long import upload_file
|
||||||
|
|
||||||
|
|
||||||
|
def aggregate_basic_info_engineering(baseinfo_list):
|
||||||
|
"""
|
||||||
|
将基础信息列表中的数据进行合并和分类。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
- baseinfo_list (list): 包含多个基础信息的列表。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
- dict: 合并和分类后的基础信息字典。
|
||||||
|
"""
|
||||||
|
key_groups = {
|
||||||
|
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
|
||||||
|
"项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"],
|
||||||
|
"关键时间/内容": [
|
||||||
|
"投标文件递交截止日期",
|
||||||
|
"投标文件递交方式",
|
||||||
|
"开标时间",
|
||||||
|
"开标地点",
|
||||||
|
"投标人要求澄清招标文件的截止时间",
|
||||||
|
"投标有效期",
|
||||||
|
"评标结果公示媒介"
|
||||||
|
],
|
||||||
|
"保证金相关": ["质量保证金", "退还投标保证金"],
|
||||||
|
"其他信息": [
|
||||||
|
"重新招标、不再招标和终止招标",
|
||||||
|
"投标费用承担",
|
||||||
|
"招标代理服务费",
|
||||||
|
"是否退还投标文件",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
combined_data = {}
|
||||||
|
relevant_keys_detected = set()
|
||||||
|
|
||||||
|
# 合并所有基础信息并收集相关键
|
||||||
|
for baseinfo in baseinfo_list:
|
||||||
|
# json_data = clean_json_string(baseinfo)
|
||||||
|
combined_data.update(baseinfo)
|
||||||
|
relevant_keys_detected.update(baseinfo.keys())
|
||||||
|
|
||||||
|
# 动态调整键组
|
||||||
|
dynamic_key_handling(key_groups, relevant_keys_detected)
|
||||||
|
|
||||||
|
# 按键组分类并嵌套
|
||||||
|
for group_name, keys in key_groups.items():
|
||||||
|
group_data = {key: combined_data.get(key, "未提供") for key in keys}
|
||||||
|
combined_data[group_name] = group_data
|
||||||
|
# Optionally remove original keys to avoid duplication
|
||||||
|
for key in keys:
|
||||||
|
combined_data.pop(key, None)
|
||||||
|
|
||||||
|
return combined_data
|
||||||
|
|
||||||
|
def dynamic_key_handling(key_groups, detected_keys):
|
||||||
|
# 检查和调整键组配置
|
||||||
|
for key in detected_keys:
|
||||||
|
# 处理“保证金相关”组,插到"质量保证金"前
|
||||||
|
if "保证金" in key:
|
||||||
|
group = key_groups["保证金相关"]
|
||||||
|
insert_before = "质量保证金"
|
||||||
|
if insert_before in group:
|
||||||
|
index = group.index(insert_before)
|
||||||
|
if key not in group: # 避免重复插入
|
||||||
|
group.insert(index, key)
|
||||||
|
else:
|
||||||
|
group.append(key) # 如果没有找到特定键,则追加到末尾
|
||||||
|
elif "联合体" in key:
|
||||||
|
key_groups["项目信息"].append(key)
|
||||||
|
elif "分包" in key:
|
||||||
|
key_groups["项目信息"].append(key)
|
||||||
|
elif "踏勘现场" in key:
|
||||||
|
key_groups["其他信息"].append(key)
|
||||||
|
elif "投标预备会" in key:
|
||||||
|
key_groups["其他信息"].append(key)
|
||||||
|
elif "偏离" in key:
|
||||||
|
key_groups["其他信息"].append(key)
|
||||||
|
|
||||||
|
|
||||||
|
def judge_consortium_bidding(baseinfo_list):
|
||||||
|
updated_list = []
|
||||||
|
accept_bidding = False
|
||||||
|
for baseinfo in baseinfo_list:
|
||||||
|
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
|
||||||
|
if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "是":
|
||||||
|
accept_bidding = True
|
||||||
|
# 从字典中移除特定键值对
|
||||||
|
baseinfo.pop("是否接受联合体投标", None)
|
||||||
|
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
|
||||||
|
# updated_info = json.dumps(json_data)
|
||||||
|
updated_list.append(baseinfo)
|
||||||
|
# 更新原始列表,如果你想保留修改
|
||||||
|
baseinfo_list[:] = updated_list
|
||||||
|
return accept_bidding
|
||||||
|
def combine_basic_info(merged_baseinfo_path,truncate0, output_folder, clause_path):
|
||||||
|
"""
|
||||||
|
综合和处理基础信息,生成最终的基础信息字典。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
- knowledge_name (str): 知识名称。
|
||||||
|
- truncate0 (str): 文件路径。
|
||||||
|
- output_folder (str): 输出文件夹路径。
|
||||||
|
- clause_path (str): 条款路径。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
- dict: 综合后的基础信息。
|
||||||
|
"""
|
||||||
|
baseinfo_list = []
|
||||||
|
baseinfo_file_path = 'flask_app/static/提示词/基本信息工程标.txt'
|
||||||
|
# baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息工程标.txt'
|
||||||
|
questions = read_questions_from_file(baseinfo_file_path)
|
||||||
|
res1 = multi_threading(questions, knowledge_name)
|
||||||
|
|
||||||
|
for index, response in res1:
|
||||||
|
try:
|
||||||
|
if response and len(response) > 1:
|
||||||
|
baseinfo_list.append(clean_json_string(response[1]))
|
||||||
|
else:
|
||||||
|
print(f"基础信息整合: Warning: Missing or incomplete response data for query index {index}.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"基础信息整合: Error processing response for query index {index}: {e}")
|
||||||
|
|
||||||
|
# 判断是否分包、是否需要递交投标保证金等
|
||||||
|
chosen_numbers, merged = judge_whether_main(truncate0, output_folder)
|
||||||
|
baseinfo_list.append(merged)
|
||||||
|
|
||||||
|
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
|
||||||
|
# judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt'
|
||||||
|
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
||||||
|
judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标
|
||||||
|
|
||||||
|
if judge_consortium:
|
||||||
|
judge_consortium_question = (
|
||||||
|
"该招标文件对于联合体投标的要求是怎样的,请按json格式给我提供信息,"
|
||||||
|
"外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"是\""
|
||||||
|
)
|
||||||
|
judge_questions.append(judge_consortium_question)
|
||||||
|
|
||||||
|
file_id = upload_file(truncate0)
|
||||||
|
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
|
||||||
|
|
||||||
|
if not res2:
|
||||||
|
print("基础信息整合: multi_threading error!")
|
||||||
|
else:
|
||||||
|
for question, response in res2:
|
||||||
|
baseinfo_list.append(clean_json_string(response))
|
||||||
|
|
||||||
|
rebidding_situation = extract_from_notice(clause_path, 3) # "重新招标, 不再招标和终止招标"需从投标人须知正文提取
|
||||||
|
update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
|
||||||
|
baseinfo_list.append(update_json)
|
||||||
|
aggregated_baseinfo = aggregate_basic_info_engineering(baseinfo_list) # 现在是一个字典
|
||||||
|
return {"基础信息": aggregated_baseinfo}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
merged_baseinfo_path="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_merged_baseinfo.pdf"
|
||||||
|
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
|
||||||
|
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
|
||||||
|
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
|
||||||
|
res=combine_basic_info(merged_baseinfo_path,truncate0,output_folder,clause_path)
|
||||||
|
print(json.dumps(res,ensure_ascii=False,indent=4))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -384,7 +384,7 @@ def truncate_pdf_specific_engineering(pdf_path, output_folder):
|
|||||||
truncate_files.append(files)
|
truncate_files.append(files)
|
||||||
|
|
||||||
if truncate_files:
|
if truncate_files:
|
||||||
merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_specific.pdf")
|
merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_baseinfo.pdf")
|
||||||
merge_selected_pdfs(output_folder, truncate_files, merged_output_path, base_file_name)
|
merge_selected_pdfs(output_folder, truncate_files, merged_output_path, base_file_name)
|
||||||
truncate_files.append(merged_output_path)
|
truncate_files.append(merged_output_path)
|
||||||
print(f"已生成合并文件: {merged_output_path}")
|
print(f"已生成合并文件: {merged_output_path}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user