From d06d6a145f749819dd92a41e3c1838a9f6fb024d Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Fri, 18 Oct 2024 16:05:18 +0800 Subject: [PATCH] =?UTF-8?q?10.18=E5=B0=8F=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/little_zbparse.py | 4 +- flask_app/main/基础信息整合快速版.py | 172 +++++++++++++++++++++++++++ flask_app/main/截取pdf.py | 2 +- 3 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 flask_app/main/基础信息整合快速版.py diff --git a/flask_app/general/little_zbparse.py b/flask_app/general/little_zbparse.py index 3513a47..c32fe64 100644 --- a/flask_app/general/little_zbparse.py +++ b/flask_app/general/little_zbparse.py @@ -46,8 +46,8 @@ def little_parse_goods(output_folder, file_path): # 上传文件并获取文件 ID file_id = upload_file(baseinfo_file_path) # 注意:以下路径被硬编码,确保该路径存在并且正确 - # baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt' - baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt' + baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt' + # baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt' # 从提示词文件中读取问题 questions = read_questions_from_file(baseinfo_prompt_file_path) # 多线程处理问题,使用指定的处理模式(2 代表使用 qianwen-long) diff --git a/flask_app/main/基础信息整合快速版.py b/flask_app/main/基础信息整合快速版.py new file mode 100644 index 0000000..d510725 --- /dev/null +++ b/flask_app/main/基础信息整合快速版.py @@ -0,0 +1,172 @@ +import json + +from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key +from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice +from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge +from flask_app.main.多线程提问 import read_questions_from_file, multi_threading +from flask_app.main.通义千问long import upload_file + + +def aggregate_basic_info_engineering(baseinfo_list): + """ + 将基础信息列表中的数据进行合并和分类。 + + 参数: + - baseinfo_list (list): 包含多个基础信息的列表。 + + 返回: + - dict: 合并和分类后的基础信息字典。 + """ + key_groups = { + "招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"], + "项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"], + "关键时间/内容": [ + "投标文件递交截止日期", + "投标文件递交方式", + "开标时间", + "开标地点", + "投标人要求澄清招标文件的截止时间", + "投标有效期", + "评标结果公示媒介" + ], + "保证金相关": ["质量保证金", "退还投标保证金"], + "其他信息": [ + "重新招标、不再招标和终止招标", + "投标费用承担", + "招标代理服务费", + "是否退还投标文件", + ] + } + + combined_data = {} + relevant_keys_detected = set() + + # 合并所有基础信息并收集相关键 + for baseinfo in baseinfo_list: + # json_data = clean_json_string(baseinfo) + combined_data.update(baseinfo) + relevant_keys_detected.update(baseinfo.keys()) + + # 动态调整键组 + dynamic_key_handling(key_groups, relevant_keys_detected) + + # 按键组分类并嵌套 + for group_name, keys in key_groups.items(): + group_data = {key: combined_data.get(key, "未提供") for key in keys} + combined_data[group_name] = group_data + # Optionally remove original keys to avoid duplication + for key in keys: + combined_data.pop(key, None) + + return combined_data + +def dynamic_key_handling(key_groups, detected_keys): + # 检查和调整键组配置 + for key in detected_keys: + # 处理“保证金相关”组,插到"质量保证金"前 + if "保证金" in key: + group = key_groups["保证金相关"] + insert_before = "质量保证金" + if insert_before in group: + index = group.index(insert_before) + if key not in group: # 避免重复插入 + group.insert(index, key) + else: + group.append(key) # 如果没有找到特定键,则追加到末尾 + elif "联合体" in key: + key_groups["项目信息"].append(key) + elif "分包" in key: + key_groups["项目信息"].append(key) + elif "踏勘现场" in key: + key_groups["其他信息"].append(key) + elif "投标预备会" in key: + key_groups["其他信息"].append(key) + elif "偏离" in key: + key_groups["其他信息"].append(key) + + +def judge_consortium_bidding(baseinfo_list): + updated_list = [] + accept_bidding = False + for baseinfo in baseinfo_list: + # 检查 "是否接受联合体投标" 键是否存在且其值为 "是" + if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "是": + accept_bidding = True + # 从字典中移除特定键值对 + baseinfo.pop("是否接受联合体投标", None) + # # 将修改后的 json 数据转换回 JSON 字符串(如果需要) + # updated_info = json.dumps(json_data) + updated_list.append(baseinfo) + # 更新原始列表,如果你想保留修改 + baseinfo_list[:] = updated_list + return accept_bidding +def combine_basic_info(merged_baseinfo_path,truncate0, output_folder, clause_path): + """ + 综合和处理基础信息,生成最终的基础信息字典。 + + 参数: + - knowledge_name (str): 知识名称。 + - truncate0 (str): 文件路径。 + - output_folder (str): 输出文件夹路径。 + - clause_path (str): 条款路径。 + + 返回: + - dict: 综合后的基础信息。 + """ + baseinfo_list = [] + baseinfo_file_path = 'flask_app/static/提示词/基本信息工程标.txt' + # baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息工程标.txt' + questions = read_questions_from_file(baseinfo_file_path) + res1 = multi_threading(questions, knowledge_name) + + for index, response in res1: + try: + if response and len(response) > 1: + baseinfo_list.append(clean_json_string(response[1])) + else: + print(f"基础信息整合: Warning: Missing or incomplete response data for query index {index}.") + except Exception as e: + print(f"基础信息整合: Error processing response for query index {index}: {e}") + + # 判断是否分包、是否需要递交投标保证金等 + chosen_numbers, merged = judge_whether_main(truncate0, output_folder) + baseinfo_list.append(merged) + + judge_file_path = 'flask_app/static/提示词/是否相关问题.txt' + # judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt' + judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers) + judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标 + + if judge_consortium: + judge_consortium_question = ( + "该招标文件对于联合体投标的要求是怎样的,请按json格式给我提供信息," + "外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"是\"" + ) + judge_questions.append(judge_consortium_question) + + file_id = upload_file(truncate0) + res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long + + if not res2: + print("基础信息整合: multi_threading error!") + else: + for question, response in res2: + baseinfo_list.append(clean_json_string(response)) + + rebidding_situation = extract_from_notice(clause_path, 3) # "重新招标, 不再招标和终止招标"需从投标人须知正文提取 + update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标") + baseinfo_list.append(update_json) + aggregated_baseinfo = aggregate_basic_info_engineering(baseinfo_list) # 现在是一个字典 + return {"基础信息": aggregated_baseinfo} + +if __name__ == "__main__": + merged_baseinfo_path="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_merged_baseinfo.pdf" + output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405" + truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf" + clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json" + res=combine_basic_info(merged_baseinfo_path,truncate0,output_folder,clause_path) + print(json.dumps(res,ensure_ascii=False,indent=4)) + + + + diff --git a/flask_app/main/截取pdf.py b/flask_app/main/截取pdf.py index a61c532..f691c81 100644 --- a/flask_app/main/截取pdf.py +++ b/flask_app/main/截取pdf.py @@ -384,7 +384,7 @@ def truncate_pdf_specific_engineering(pdf_path, output_folder): truncate_files.append(files) if truncate_files: - merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_specific.pdf") + merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_baseinfo.pdf") merge_selected_pdfs(output_folder, truncate_files, merged_output_path, base_file_name) truncate_files.append(merged_output_path) print(f"已生成合并文件: {merged_output_path}")