zbparse/flask_app/old_version/判断是否分包等_old.py

235 lines
11 KiB
Python
Raw Normal View History

2024-08-29 16:37:09 +08:00
# -*- encoding:utf-8 -*-
import json
import os.path
import re
from flask_app.general.json_utils import extract_content_from_json # 可以选择性地导入特定的函数
from flask_app.old_version.提取打勾符号_old import read_pdf_and_judge_main
from flask_app.general.llm.多线程提问 import multi_threading
from flask_app.general.llm.通义千问long import qianwen_long,upload_file
2024-08-29 16:37:09 +08:00
#调用qianwen-ask之后组织提示词问百炼。
def construct_judge_questions(json_data):
2024-10-18 13:37:54 +08:00
"""
根据提供的 JSON 数据生成一个问题列表每个问题针对一个值为 '未知' 的键
要求以 JSON 格式提供该键的值仅限于 '', '', '未知'
Args:
json_data (str or dict): 输入的 JSON 数据可以是 JSON 字符串或字典
Returns:
list: 包含生成的问题的列表如果没有符合条件的键则返回空列表
"""
# 如果 json_data 是字符串,尝试将其解析为字典
if isinstance(json_data, str):
try:
parsed_data = json.loads(json_data)
except json.JSONDecodeError:
print("输入的 json_data 不是有效的 JSON 字符串。")
return []
elif isinstance(json_data, dict):
parsed_data = json_data
else:
print("输入的 json_data 必须是 JSON 字符串或字典。")
return []
# 提取值为 '未知' 的键
question_keys = [key for key, value in parsed_data.items() if value == '未知']
# 如果没有符合条件的键,返回空列表
2024-08-29 16:37:09 +08:00
if not question_keys:
2024-10-18 13:37:54 +08:00
return []
# 生成问题列表
questions = []
for key in question_keys:
question = (
f"请你根据投标人须知前附表中的信息回答,本项目{key}"
f"请按json格式给我提供信息键名为'{key}',键值仅限于'','','未知',如果原文中对应内容处为'/'或者'\',键值填''"
)
questions.append(question)
return questions
2024-08-29 16:37:09 +08:00
2025-01-22 09:45:25 +08:00
def merge_json_to_list(merged):
2024-11-12 14:44:57 +08:00
# print(json.dumps(merged,ensure_ascii=False,indent=4))
2024-08-29 16:37:09 +08:00
"""Merge updates into the original data by modifying specific keys based on their value ('' or ''), and create a list based on these values."""
chosen_numbers = []
2025-01-03 17:36:23 +08:00
chosen_numbers.append(9)
# # 定义键名映射
# key_mapping = {
# '是否允许分包': '分包',
# '是否递交投标保证金': '投标保证金',
# '是否递交磋商保证金': '磋商保证金',
# '是否提交履约保证金': '履约保证金',
# '是否有招标代理服务费': '招标代理服务费',
# '是否组织踏勘现场': '踏勘现场',
# '是否召开投标预备会': '投标预备会',
# '是否召开投标答疑会': '投标答疑会',
# '是否允许偏离': '偏离',
# '是否退还投标文件':'退还投标文件'
# }
2024-11-12 14:44:57 +08:00
2024-10-22 21:02:54 +08:00
# 处理是否允许分包
2024-08-29 16:37:09 +08:00
if merged.get('是否允许分包') == '':
chosen_numbers.append(1)
merged.pop('是否允许分包', None)
elif merged.get('是否允许分包') == '':
merged['分包'] = '不允许'
merged.pop('是否允许分包', None)
2024-10-22 21:02:54 +08:00
# 处理保证金
guarantee_processed = False
for guarantee_key in ['是否递交投标保证金', '是否递交磋商保证金']:
if guarantee_key in merged and not guarantee_processed:
2024-11-12 14:44:57 +08:00
guarantee_type = '投标' if '投标' in guarantee_key else '磋商'
2024-10-22 21:02:54 +08:00
if merged[guarantee_key] == '':
chosen_numbers.extend([2, 3])
2025-01-03 17:36:23 +08:00
merged.pop(guarantee_key, None)
2024-10-22 21:02:54 +08:00
elif merged[guarantee_key] == '':
merged[f'{guarantee_type}保证金'] = '不提交'
2025-01-03 17:36:23 +08:00
merged.pop(guarantee_key, None)
2024-10-22 21:02:54 +08:00
merged[f'退还{guarantee_type}保证金'] = '/'
2024-11-12 14:44:57 +08:00
else:
# 当既不是 '是' 也不是 '否' 时执行
2025-01-03 17:36:23 +08:00
merged[f'是否退还{guarantee_type}保证金'] = '未知'
2024-10-22 21:02:54 +08:00
guarantee_processed = True
elif guarantee_key in merged and guarantee_processed:
merged.pop(guarantee_key, None)
2024-08-29 16:37:09 +08:00
# 处理是否有履约保证金
2024-10-15 20:57:58 +08:00
if merged.get('是否提交履约保证金') == '':
2024-08-29 16:37:09 +08:00
chosen_numbers.append(4)
2024-10-15 20:57:58 +08:00
merged.pop('是否提交履约保证金', None)
elif merged.get('是否提交履约保证金') == '':
2024-08-29 16:37:09 +08:00
merged['履约保证金'] = '不提交'
2024-10-15 20:57:58 +08:00
merged.pop('是否提交履约保证金', None)
2024-08-29 16:37:09 +08:00
# 处理是否有招标代理服务费
if merged.get('是否有招标代理服务费') == '':
chosen_numbers.append(5)
merged.pop('是否有招标代理服务费', None)
elif merged.get('是否有招标代理服务费') == '':
merged['招标代理服务费'] = ''
merged.pop('是否有招标代理服务费', None)
2024-10-22 21:02:54 +08:00
# 处理是否组织踏勘现场
2024-08-29 16:37:09 +08:00
if merged.get('是否组织踏勘现场') == '':
chosen_numbers.append(6)
2024-10-22 21:02:54 +08:00
merged.pop('是否组织踏勘现场', None)
2024-08-29 16:37:09 +08:00
elif merged.get('是否组织踏勘现场') == '':
2024-10-22 21:02:54 +08:00
merged['踏勘现场'] = '不组织'
2024-08-29 16:37:09 +08:00
merged.pop('是否组织踏勘现场', None)
2024-10-22 21:02:54 +08:00
# 处理预备会/答疑会
meeting_processed = False
for preparation_key in ['是否召开投标预备会', '是否召开投标答疑会']:
2025-01-03 17:36:23 +08:00
if preparation_key in merged and not meeting_processed:
meeting_type = '预备会' if '预备会' in preparation_key else '答疑会'
if merged[preparation_key] == '':
chosen_numbers.append(7)
merged.pop(preparation_key,None)
elif merged[preparation_key] == '':
merged[f'投标{meeting_type}'] = '不召开'
merged.pop(preparation_key, None)
else:
# 当既不是 '是' 也不是 '否' 时执行
merged[f'是否召开投标{meeting_type}'] = '未知'
meeting_processed = True
elif preparation_key in merged and meeting_processed:
2024-10-22 21:02:54 +08:00
merged.pop(preparation_key, None)
2025-01-03 17:36:23 +08:00
if merged.get('是否有质量保证金') == '':
chosen_numbers.append(8)
merged.pop('是否有质量保证金',None)
elif merged.get('是否有质量保证金') == '':
merged['质量保证金']='不提交'
merged.pop('是否有质量保证金',None)
2024-11-12 14:44:57 +08:00
#11.12这里有问题正文部分的信息重要性不高而且这里的key不好设置先注释了
# # 初始化 questions_list
# questions_list = []
# if tobidders_notice:
# file_id = upload_file(tobidders_notice) # 假设 upload_file 函数已定义
# question_template = (
# f"根据该招标文件中的信息,{{key}}的内容是怎样的请按json格式给我提供信息"
# f"键名是'{{key}}',若存在嵌套信息,嵌套内容键名以文件中对应字段命名(或是你对相应要求的总结),"
# f"而对应键值需要与原文保持一致。注意:默认情况用普通键值对返回结果即可,键名为{{key}};若原文中未提及{{key}}相关内容,在键值中填'未知'。"
# )
# # 遍历 merged查找值为 '未知' 的键
# keys_to_remove=[]
# for original_key, value in merged.items():
# if value == '未知':
# keys_to_remove.append(original_key)
# # 获取映射后的键名
# mapped_key = key_mapping.get(original_key, original_key)
# # 生成问题
# question = question_template.format(key=mapped_key)
# questions_list.append(question)
# for k in keys_to_remove:
# del merged[k]
# # 假设 questions_list 需要传递给 multi_threading 函数
# baseinfo_results = multi_threading(questions_list, "", file_id, 2) # 假设 multi_threading 函数已定义
# baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
# # 更新 merged 中的 '未知' 值
# for info in baseinfo_list:
# merged.update(info)
2024-08-29 16:37:09 +08:00
2025-01-03 17:36:23 +08:00
# print(chosen_numbers)
# print(json.dumps(merged,ensure_ascii=False,indent=4))
2024-11-12 14:44:57 +08:00
return chosen_numbers, merged
2024-08-29 16:37:09 +08:00
def read_questions_from_judge(file_path, indices):
questions = []
# 读取文件内容
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# 正则表达式提取问题
pattern = r'(\d+)\.(.*?)#pdf提取之后的提示词|(\d+)\.(.*?)(?=\d+\.|$)'
matches = re.findall(pattern, content, re.DOTALL)
# 解析匹配到的内容并提取对应序号的问题
for match in matches:
num = match[0] or match[2]
question = match[1].strip() or match[3].strip()
if int(num) in indices:
questions.append(question)
return questions
def judge_whether_main(file_path,output_folder): #传入招标文件中‘投标人须知前附表’
user_query1 = "请你依据以上信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会?是否允许偏离?是否退还投标文件?是否允许分包? 是否需要递交投标保证金是否需要提交履约保证金履约担保是否有招标代理服务费请按json格式给我提供信息键名分别为'是否组织踏勘现场','是否召开投标预备会','是否允许偏离','是否退还投标文件',是否允许分包','是否递交投标保证金','是否提交履约保证金','是否有招标代理服务费',键值仅限于'','','未知',若存在矛盾信息,请回答'未知'"
2024-10-18 13:37:54 +08:00
output_txt_path = os.path.join(output_folder,'judge_exist.txt')
read_pdf_and_judge_main(file_path, output_txt_path) #提取打勾符号
file_id=upload_file(output_txt_path)
qianwen_answer=qianwen_long(file_id,user_query1)
user_querys = construct_judge_questions(extract_content_from_json(qianwen_answer)) # 提取回答为”未知“的键
2024-10-15 20:57:58 +08:00
# 判断user_query2是否为空
2024-10-18 13:37:54 +08:00
if user_querys:
2024-08-29 16:37:09 +08:00
file_id = upload_file(file_path)
2024-10-18 13:37:54 +08:00
qianwen_results = multi_threading(user_querys, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
qianwen_list = [extract_content_from_json(res) for _, res in qianwen_results] if qianwen_results else [] #整个前附表一起传问千问long
return process_judge_content(qianwen_answer, qianwen_list)
2024-08-29 16:37:09 +08:00
else:
print("正常现象,没有'未知',无需调用qianwen-long")
2024-08-29 16:37:09 +08:00
original = extract_content_from_json(qianwen_answer)
return merge_json_to_list(original)
2024-10-18 13:37:54 +08:00
def process_judge_content(original_json, qianwen_list): #用新的数据合并旧数据
2024-08-29 16:37:09 +08:00
"""Process judging content by merging updates into the original JSON data."""
original = extract_content_from_json(original_json)
2024-10-18 13:37:54 +08:00
for i in qianwen_list:
original.update(i)
2024-08-29 16:37:09 +08:00
return merge_json_to_list(original)
if __name__ == "__main__":
2024-10-18 13:37:54 +08:00
file_path="C:\\Users\Administrator\\Desktop\\fsdownload\\ee2d8828-bae0-465a-9171-7b2dd7453251\\ztbfile_tobidders_notice_table.pdf"
output_dir='C:\\Users\Administrator\\Desktop\\fsdownload\\ee2d8828-bae0-465a-9171-7b2dd7453251\\tmp'
2024-08-29 16:37:09 +08:00
chosen_numbers, merged=judge_whether_main(file_path,output_dir)
print(chosen_numbers)
print(merged)