2024-08-29 16:37:09 +08:00
|
|
|
|
# -*- encoding:utf-8 -*-
|
|
|
|
|
import json
|
|
|
|
|
import os.path
|
|
|
|
|
import re
|
2024-08-30 10:34:23 +08:00
|
|
|
|
from flask_app.main.json_utils import extract_content_from_json # 可以选择性地导入特定的函数
|
2024-08-29 17:30:49 +08:00
|
|
|
|
from flask_app.main.提取打勾符号 import read_pdf_and_judge_main
|
|
|
|
|
from flask_app.main.通义千问 import qianwen_ask
|
|
|
|
|
from flask_app.main.通义千问long import qianwen_long,upload_file
|
2024-08-29 16:37:09 +08:00
|
|
|
|
#调用qianwen-ask之后,组织提示词问百炼。
|
|
|
|
|
|
|
|
|
|
def construct_judge_questions(json_data):
|
|
|
|
|
# 使用 extract_content_from_json 提取和解析 JSON 数据
|
|
|
|
|
parsed_data = extract_content_from_json(json_data)
|
|
|
|
|
if not parsed_data:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
question_keys = []
|
|
|
|
|
for key, value in parsed_data.items():
|
|
|
|
|
if value == '未知':
|
|
|
|
|
question_keys.append(f"'{key}'")
|
|
|
|
|
|
|
|
|
|
if not question_keys:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# 移除单引号后的键名列表字符串
|
|
|
|
|
questions_without_quotes = ', '.join(key.strip("'") for key in question_keys) # 移除单引号
|
|
|
|
|
|
|
|
|
|
if not questions_without_quotes: # 检查 questions_without_quotes 是否为空
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
keys_str = ",".join(question_keys)
|
|
|
|
|
question = f"请你依据文档中的信息回答,{questions_without_quotes}?请按json格式给我提供信息,键名分别为{keys_str},键值仅限于'是','否','未知'。"
|
|
|
|
|
|
|
|
|
|
return question
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def merge_json_to_list(merged):
|
|
|
|
|
"""Merge updates into the original data by modifying specific keys based on their value ('是' or '否'), and create a list based on these values."""
|
|
|
|
|
chosen_numbers = []
|
|
|
|
|
|
|
|
|
|
# 处理是否允许分包 保持'是否允许分包'键名主要是由于存在'未知'的情况。
|
|
|
|
|
if merged.get('是否允许分包') == '是':
|
|
|
|
|
chosen_numbers.append(1)
|
|
|
|
|
merged.pop('是否允许分包', None)
|
|
|
|
|
elif merged.get('是否允许分包') == '否':
|
|
|
|
|
merged['分包'] = '不允许'
|
|
|
|
|
merged.pop('是否允许分包', None)
|
|
|
|
|
|
2024-10-15 20:57:58 +08:00
|
|
|
|
guarantee_key = '是否递交投标保证金' if '是否递交投标保证金' in merged else '是否递交磋商保证金'
|
|
|
|
|
if merged.get(guarantee_key) == '是':
|
2024-08-29 16:37:09 +08:00
|
|
|
|
chosen_numbers.extend([2, 3])
|
2024-10-16 20:18:55 +08:00
|
|
|
|
merged.pop(guarantee_key, None)
|
|
|
|
|
elif merged.get(guarantee_key) == '否':
|
2024-10-15 20:57:58 +08:00
|
|
|
|
guarantee_type = '投标' if '投标' in guarantee_key else '磋商'
|
|
|
|
|
merged[f'{guarantee_type}保证金'] = '不提交'
|
|
|
|
|
merged[f'退还{guarantee_type}保证金'] = '/'
|
2024-10-16 20:18:55 +08:00
|
|
|
|
merged.pop(guarantee_key, None)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
# 处理是否有履约保证金
|
2024-10-15 20:57:58 +08:00
|
|
|
|
if merged.get('是否提交履约保证金') == '是':
|
2024-08-29 16:37:09 +08:00
|
|
|
|
chosen_numbers.append(4)
|
2024-10-15 20:57:58 +08:00
|
|
|
|
merged.pop('是否提交履约保证金', None)
|
|
|
|
|
elif merged.get('是否提交履约保证金') == '否':
|
2024-08-29 16:37:09 +08:00
|
|
|
|
merged['履约保证金'] = '不提交'
|
2024-10-15 20:57:58 +08:00
|
|
|
|
merged.pop('是否提交履约保证金', None)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
# 处理是否有招标代理服务费
|
|
|
|
|
if merged.get('是否有招标代理服务费') == '是':
|
|
|
|
|
chosen_numbers.append(5)
|
|
|
|
|
merged.pop('是否有招标代理服务费', None)
|
|
|
|
|
elif merged.get('是否有招标代理服务费') == '否':
|
|
|
|
|
merged['招标代理服务费'] = '无'
|
|
|
|
|
merged.pop('是否有招标代理服务费', None)
|
|
|
|
|
|
|
|
|
|
if merged.get('是否组织踏勘现场') == '是':
|
|
|
|
|
chosen_numbers.append(6)
|
|
|
|
|
merged.pop('是否组织踏勘现场',None)
|
|
|
|
|
elif merged.get('是否组织踏勘现场') == '否':
|
|
|
|
|
merged['踏勘现场']='不组织'
|
|
|
|
|
merged.pop('是否组织踏勘现场', None)
|
|
|
|
|
|
2024-10-15 20:57:58 +08:00
|
|
|
|
preparation_key = '是否召开投标预备会' if '是否召开投标预备会' in merged else '是否召开投标答疑会'
|
|
|
|
|
if merged.get(preparation_key) == '是':
|
2024-08-29 16:37:09 +08:00
|
|
|
|
chosen_numbers.append(7)
|
2024-10-16 20:18:55 +08:00
|
|
|
|
merged.pop(preparation_key, None)
|
|
|
|
|
elif merged.get(preparation_key) == '否':
|
2024-10-15 20:57:58 +08:00
|
|
|
|
meeting_type = '预备会' if '预备会' in preparation_key else '答疑会'
|
2024-10-16 20:18:55 +08:00
|
|
|
|
merged[f'投标{meeting_type}']='不召开'
|
|
|
|
|
merged.pop(preparation_key,None)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
if merged.get('是否允许偏离') == '是':
|
|
|
|
|
chosen_numbers.append(8)
|
|
|
|
|
merged.pop('是否允许偏离',None)
|
|
|
|
|
elif merged.get('是否允许偏离') == '否':
|
|
|
|
|
merged['偏离']='不允许'
|
|
|
|
|
merged.pop('是否允许偏离', None)
|
|
|
|
|
|
|
|
|
|
return chosen_numbers, json.dumps(merged,ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_questions_from_judge(file_path, indices):
|
|
|
|
|
questions = []
|
|
|
|
|
|
|
|
|
|
# 读取文件内容
|
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
|
|
|
content = file.read()
|
|
|
|
|
|
|
|
|
|
# 正则表达式提取问题
|
|
|
|
|
pattern = r'(\d+)\.(.*?)#pdf提取之后的提示词|(\d+)\.(.*?)(?=\d+\.|$)'
|
|
|
|
|
matches = re.findall(pattern, content, re.DOTALL)
|
|
|
|
|
|
|
|
|
|
# 解析匹配到的内容并提取对应序号的问题
|
|
|
|
|
for match in matches:
|
|
|
|
|
num = match[0] or match[2]
|
|
|
|
|
question = match[1].strip() or match[3].strip()
|
|
|
|
|
if int(num) in indices:
|
|
|
|
|
questions.append(question)
|
|
|
|
|
|
|
|
|
|
return questions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def judge_whether_main(file_path,output_folder): #传入招标文件中‘投标人须知前附表’
|
2024-09-03 11:42:43 +08:00
|
|
|
|
user_query1 = "请你依据以上信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会?是否允许偏离?是否退还投标文件?是否允许分包? 是否需要递交投标保证金?是否需要提交履约保证金(履约担保)?是否有招标代理服务费?请按json格式给我提供信息,键名分别为'是否组织踏勘现场','是否召开投标预备会','是否允许偏离','是否退还投标文件',是否允许分包','是否递交投标保证金','是否提交履约保证金','是否有招标代理服务费',键值仅限于'是','否','未知',若存在矛盾信息,请回答'未知'。"
|
2024-08-29 16:37:09 +08:00
|
|
|
|
output_json_path = os.path.join(output_folder,'judge_exist.json')
|
|
|
|
|
read_pdf_and_judge_main(file_path, output_json_path) #提取打勾符号
|
2024-09-03 11:42:43 +08:00
|
|
|
|
qianwen_answer = qianwen_ask(output_json_path, user_query1) # 调用普通千问判断是、否、未知
|
|
|
|
|
user_query2 = construct_judge_questions(qianwen_answer) # 提取回答为”未知“的键
|
2024-10-15 20:57:58 +08:00
|
|
|
|
# 判断user_query2是否为空
|
2024-09-03 11:42:43 +08:00
|
|
|
|
if user_query2:
|
2024-08-29 16:37:09 +08:00
|
|
|
|
file_id = upload_file(file_path)
|
2024-09-03 11:42:43 +08:00
|
|
|
|
res = qianwen_long(file_id, user_query2) #整个前附表一起传问千问long
|
2024-08-29 16:37:09 +08:00
|
|
|
|
return process_judge_content(qianwen_answer, res)
|
|
|
|
|
|
|
|
|
|
else:
|
2024-09-03 11:42:43 +08:00
|
|
|
|
print("正常现象,没有'未知',无需调用qianwen-long")
|
2024-08-29 16:37:09 +08:00
|
|
|
|
original = extract_content_from_json(qianwen_answer)
|
|
|
|
|
return merge_json_to_list(original)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_judge_content(original_json, update_json): #用新的数据合并旧数据
|
|
|
|
|
"""Process judging content by merging updates into the original JSON data."""
|
|
|
|
|
original = extract_content_from_json(original_json)
|
|
|
|
|
updates = extract_content_from_json(update_json)
|
|
|
|
|
original.update(updates)
|
|
|
|
|
return merge_json_to_list(original)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2024-09-03 11:42:43 +08:00
|
|
|
|
file_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\a95a9a0a-f2dd-4007-b849-6b0a1a4c2b91\\ztbfile_tobidders_notice_table.pdf"
|
|
|
|
|
output_dir="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\a95a9a0a-f2dd-4007-b849-6b0a1a4c2b91\\output"
|
2024-08-29 16:37:09 +08:00
|
|
|
|
chosen_numbers, merged=judge_whether_main(file_path,output_dir)
|
|
|
|
|
print(chosen_numbers)
|
|
|
|
|
print(merged)
|
|
|
|
|
|