9.5 资格审查部分返回更能对应原文
This commit is contained in:
parent
f826a3f7a1
commit
cac8ea4136
@ -1,18 +1,15 @@
|
||||
import re
|
||||
|
||||
|
||||
test_text = """
|
||||
xxxxxxxxxxxxxxxxx
|
||||
附录: 投标人资质条件、能力和信誉(资格审查标准)
|
||||
xxxxxx
|
||||
附表一:招标文件澄清申请函
|
||||
xxxxxxxxxxxxxxx
|
||||
content="递交投标文件截止之日后120日内"
|
||||
def extract_content_after_special_chars(content):
|
||||
"""
|
||||
pattern = r'^(?:附录(?:一)?[::]|附件(?:一)?[::]|附表(?:一)?[::]).*(?:资质|能力|信誉).*$'
|
||||
|
||||
match = re.search(pattern, test_text, re.MULTILINE)
|
||||
|
||||
提取特定符号后的内容,直到遇到结束符号。
|
||||
"""
|
||||
pattern = r'[\x01\x02☑√团]([^□]+)'
|
||||
match = re.search(pattern, content)
|
||||
if match:
|
||||
print("匹配到的行:", match.group())
|
||||
else:
|
||||
print("没有匹配到符合条件的行")
|
||||
return match.group(1).strip() # 提取匹配的内容,并去除多余空格
|
||||
return content # 如果没有找到匹配,返回原内容
|
||||
|
||||
res=extract_content_after_special_chars(content)
|
||||
print(res)
|
@ -1,3 +1,4 @@
|
||||
# -*- encoding:utf-8 -*-
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
@ -27,6 +28,38 @@ prompt = """
|
||||
"""
|
||||
|
||||
|
||||
def update_json_data(original_data, updates, second_response_list):
|
||||
"""
|
||||
根据提供的更新字典覆盖原始JSON数据中对应的键值,支持点分隔的键来表示嵌套结构。
|
||||
参数:
|
||||
- original_data: dict, 原始的JSON数据。
|
||||
- updates: dict, 包含需要更新的键值对。
|
||||
- second_response_list: list, 包含多个字典,每个字典包含需要更新的键值对。
|
||||
返回:
|
||||
- updated_data: dict, 更新后的JSON数据。
|
||||
"""
|
||||
def recursive_update(data, key, value):
|
||||
# 处理点分隔的键,递归定位并更新嵌套字典
|
||||
keys = key.split('.')
|
||||
for k in keys[:-1]:
|
||||
data = data.setdefault(k, {})
|
||||
if isinstance(value, dict) and isinstance(data.get(keys[-1], None), dict):
|
||||
data[keys[-1]] = {**data.get(keys[-1], {}), **value}
|
||||
else:
|
||||
data[keys[-1]] = value
|
||||
|
||||
# 合并 updates 到 original_data 中
|
||||
for key, value in updates.items():
|
||||
recursive_update(original_data, key, value)
|
||||
|
||||
# 遍历 second_response_list 中的每个字典,并合并到 original_data 中
|
||||
for response_dict in second_response_list:
|
||||
for key, value in response_dict.items():
|
||||
recursive_update(original_data, key, value)
|
||||
|
||||
return original_data
|
||||
|
||||
|
||||
def extract_matching_keys(json_data):
|
||||
# 函数首先检查输入 json_data 是否为字符串类型。如果是,它会使用 json.loads() 将字符串解析为字典。
|
||||
if isinstance(json_data, str):
|
||||
@ -74,8 +107,11 @@ def extract_matching_keys(json_data):
|
||||
|
||||
return final_matching
|
||||
|
||||
|
||||
#TODO:如果要引用到招标公告中的内容,考虑提取 或者qianwen-long
|
||||
def reformat_questions(match_keys):
|
||||
"""
|
||||
[{'形式评审标准.多标段投标': '符合第一章“招标公告”第 3.3款规定'}, {'形式评审标准.投标文件的签署': '符合第二章“投标人须知”第 3.6.3(5)目规定'}, {'形式评审标准.投标保证金': '符合第二章“投标人须知”第 3.4.1项规定'}, {'形式评审标准.工程分包(如有)': '符合第二章“投标人须知”第 1.11款规定'}, {'响应性评审标准.投标内容': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.监理服务阶段': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.监理工作范围': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.监理服务期': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.投标有效期': '符合第二章“投标人须知”第 3.3.1项的规定'}, {'响应性评审标准.投标保证金': '符合第二章“投标人须知”第 3.4.1项的规定'}, {'响应性评审标准.重大偏差': '见第二章“投标人须知”第 1.12款规定'}]
|
||||
"""
|
||||
"""
|
||||
根据是否包含特定序号格式(如3.7.4或3.7.4(5)或3.7.4(5)),重新格式化匹配到的评审条目。
|
||||
若包含序号,则提取出来;若不包含,则生成格式化的问题字符串。
|
||||
@ -84,10 +120,15 @@ def reformat_questions(match_keys):
|
||||
formatted_questions = []
|
||||
|
||||
# 正则表达式,同时匹配全角和半角括号
|
||||
pattern = re.compile(r'(\d+(?:\.\d+)+)(?:[\(\(](\d+)[\)\)])?')
|
||||
pattern = re.compile(r'(\d+(?:\.\d+)+)(?:[\(\(](\d+)[\)\)])?') #识别包含数字序列的特定格式 eg:3.7.4(5) 3.4.1
|
||||
|
||||
for entry in match_keys:
|
||||
key, value = next(iter(entry.items()))
|
||||
if '招标公告' in value or '第一章' in value:
|
||||
formatted_entry = f"关于‘{key}’,{value.replace('符合', '')}的内容是怎样的?请按json格式给我提供信息,键名为'{key}',如果存在未知信息,请在对应键值处填'未知'。"
|
||||
formatted_questions.append(formatted_entry)
|
||||
continue # 继续处理下一个条目
|
||||
|
||||
match = pattern.search(value)
|
||||
if match:
|
||||
# 如果存在序号,保存序号与对应的键值对,包括括号内的数字(如果存在)
|
||||
@ -98,46 +139,12 @@ def reformat_questions(match_keys):
|
||||
revised_standard = re.sub(r'符合', '', value)
|
||||
formatted_entry = f"关于‘{key}’,{revised_standard}的内容是怎样的?请按json格式给我提供信息,键名为'{key}',如果存在未知信息,请在对应键值处填'未知'。"
|
||||
formatted_questions.append(formatted_entry)
|
||||
|
||||
print(formatted_questions)
|
||||
return entries_with_numbers, formatted_questions
|
||||
|
||||
|
||||
|
||||
def update_json_data(original_data, updates, second_response_list):
|
||||
"""
|
||||
根据提供的更新字典覆盖原始JSON数据中对应的键值,支持点分隔的键来表示嵌套结构。
|
||||
参数:
|
||||
- original_data: dict, 原始的JSON数据。
|
||||
- updates: dict, 包含需要更新的键值对。
|
||||
- second_response_list: list, 包含多个字典,每个字典包含需要更新的键值对。
|
||||
返回:
|
||||
- updated_data: dict, 更新后的JSON数据。
|
||||
"""
|
||||
def recursive_update(data, key, value):
|
||||
# 处理点分隔的键,递归定位并更新嵌套字典
|
||||
keys = key.split('.')
|
||||
for k in keys[:-1]:
|
||||
data = data.setdefault(k, {})
|
||||
if isinstance(value, dict) and isinstance(data.get(keys[-1], None), dict):
|
||||
data[keys[-1]] = {**data.get(keys[-1], {}), **value}
|
||||
else:
|
||||
data[keys[-1]] = value
|
||||
|
||||
# 合并 updates 到 original_data 中
|
||||
for key, value in updates.items():
|
||||
recursive_update(original_data, key, value)
|
||||
|
||||
# 遍历 second_response_list 中的每个字典,并合并到 original_data 中
|
||||
for response_dict in second_response_list:
|
||||
for key, value in response_dict.items():
|
||||
recursive_update(original_data, key, value)
|
||||
|
||||
return original_data
|
||||
|
||||
|
||||
|
||||
def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause_json_path):
|
||||
matched_keys = extract_matching_keys(original_dict_data) #[{'形式评审标准.投标文件签字盖章': '符合第二章“投标人须知”第 3.7.3(4)目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}]
|
||||
matched_keys = extract_matching_keys(original_dict_data) #[{'形式评审标准.投标文件签字盖章': '符合第二章“投标人须知”第 3.7.3(4)目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}] 提取值中包含"符合"的字典
|
||||
entries_with_numbers, formatted_questions = reformat_questions(matched_keys)
|
||||
results_2 = multi_threading(formatted_questions, knowledge_name, True) #无序号的直接问大模型
|
||||
second_response_list = []
|
||||
@ -152,21 +159,23 @@ def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause
|
||||
print(f"Error processing response for query index {_}: {e}")
|
||||
|
||||
# Assume JSON file paths are defined or configured correctly
|
||||
combined_results = process_and_merge_entries(entries_with_numbers, truncate0_jsonpath, clause_json_path) #脚本提取的要求 [{'xxx': '3.7.4(5)'}]
|
||||
# print(entries_with_numbers) #[{'形式评审标准.多标段投标': '3.7.4(5)'}]
|
||||
combined_results = process_and_merge_entries(entries_with_numbers, truncate0_jsonpath, clause_json_path) #调用根据条款号整合json.py
|
||||
updated_json = update_json_data(original_dict_data, combined_results, second_response_list)
|
||||
return updated_json
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
start_time=time.time()
|
||||
knowledge_name="zbfile"
|
||||
truncate_tobidders_table_json_path="C:\\Users\\Administrator\\Desktop\\招标文件\\output\\truncate_output.json"
|
||||
clause_path="C:\\Users\\Administrator\\Desktop\\招标文件\\output\\clause.json"
|
||||
original_dict_data={'营业执照': '具备有效的营业执照', '资质条件': '符合第二章“投标人须知”第 1.4.1项规定', '财务状况': '符合第二章“投标人须知”第 1.4.1项规定', '类似业绩': '符合第二章“投标人须知”第 1.4.1项规定', '信誉': '符合第二章“投标人须知”第 1.4.1项规定', '项目经理资格': '符合第二章“投标人须知”第 1.4.1项规定', '设计负责人资格': '符合第二章“投标人须知”第 1.4.1项规定', '施工负责人资格': '符合第二章“投标人须知”第 1.4.1项规定', '施工机械设备': '符合第二章“投标人须知”第 1.4.1项规定', '项目管理机构及人员': '符合第二章“投标人须知”第 1.4.1项规定', '其他要求': '符合第二章“投标人须知”第 1.4.1项规定', '联合体投投人 (如有)': '符合第二章“投标人须知”第 1.4.2项规定', '不存在禁止投标的情形': '不存在第二章“投标人须知”第 1.4.3项规 定的任何一种情形'}
|
||||
knowledge_name="zbtest20"
|
||||
truncate_tobidders_table_json_path="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\truncate_output.json"
|
||||
clause_path="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\clause.json"
|
||||
original_dict_data={'形式评审标准': {'多标段投标': '符合第一章“招标公告”第 3.3款规定', '投标文件': '投标文件能正常打开', '投标人名称': '与营业执照、资质证书一致', '投标文件的签署': '符合第二章“投标人须知”第 3.6.3(5)目规定', '投标保证金': '符合第二章“投标人须知”规定', '投标文件的格式、内容': '符合第八章“投标文件格式”的格式规定、实质性内容齐全,关键字迹清晰可辨', '投标报价': '①一份投标文件应只有一个投标报价,未提交选择性报价; ②投标函中报价与监理服务费报价汇总表中的报价保持一致; ③投标人未提交调价函。', '联合体投标人(如有)': '提交联合体协议书,并明确联合体牵头人', '工程分包(如有)': '符合第二章“投标人须知”第 1.11款规定'}, '响应性评审标准': {'投标内容': '符合第二章“投标人须知”第 1.3款的规定', '监理服务阶段': '符合第二章“投标人须知”第 1.3款的规定', '监理工作范围': '符合第二章“投标人须知”第 1.3款的规定', '投标报价': '①投标报价不高于最高投标限价(如果有); ②投标报价不低于成本价。', '监理服务期': '符合第二章“投标人须知”第 1.3款的规定', '投标有效期': '符合第二章“投标人须知”第 3.3.1项的规定', '投标保证金': '符合第二章“投标人须知”第 3.4.1项的规定', '权利义务': '符合或优于第四章“合同条款及格式”规定的权利义务', '技术标准': '符合第七章“技术标准”规定', '招标人不能接受的条件': '未附有招标人不能接受的条件', '重大偏差': '见第二章“投标人须知”第 1.12款规定'}}
|
||||
formal_json = process_reviews(original_dict_data,knowledge_name, truncate_tobidders_table_json_path, clause_path)
|
||||
data = json.loads(formal_json)
|
||||
data = json.dumps(formal_json, ensure_ascii=False, indent=4)
|
||||
end_time=time.time()
|
||||
elapsed_time = end_time - start_time
|
||||
print(f"Function execution took {elapsed_time} seconds.")
|
||||
print(data)
|
||||
|
||||
#关于'技术暗标',第二章“投标人须知”规定的内容是怎样的?请按json格式给我提供信息,键名为'技术暗标',请你忠于原文,回答要求完整准确,不要擅自总结、删减,且不要回答诸如'见投标人须知前附表'或'见第x.x项规定'这类无实质性内容的回答。
|
@ -38,45 +38,6 @@ def convert_dict_to_str(d):
|
||||
return "\n".join(f"{k}: {v}" for k, v in d.items())
|
||||
return str(d)
|
||||
|
||||
|
||||
def find_entries_in_jsons(entries, json_primary, json_secondary):
|
||||
results = {}
|
||||
for entry in entries:
|
||||
key, value = next(iter(entry.items()))
|
||||
combined_value = []
|
||||
# 先尝试在json_primary中寻找,如果找不到再在json_secondary中查找
|
||||
found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
|
||||
if not found_in_primary:
|
||||
process_json_with_subentries(json_secondary, value, combined_value)
|
||||
if combined_value:
|
||||
results[key] = "\n".join(combined_value)
|
||||
return results
|
||||
|
||||
def process_json_with_subentries(json_data, value, combined_value):
|
||||
"""
|
||||
处理JSON数据,寻找指定的条目,考虑全角和半角括号。
|
||||
"""
|
||||
value = standardize_brackets(value) #将1.11(1)->1.11(1)
|
||||
if "(" in value and ")" in value: #存在()的情况
|
||||
first_content=get_values_only(json_data.get(value))
|
||||
if first_content:
|
||||
combined_value.append(first_content)
|
||||
return True
|
||||
base_key, subentry_key = value.split("(") #base_key:1.11 subentry_key:(1)
|
||||
subentry_key = "(" + subentry_key
|
||||
content = json_data.get(base_key.strip())
|
||||
if content:
|
||||
if isinstance(content, str):
|
||||
extracted_content = extract_specific_subentry(content, subentry_key)
|
||||
if extracted_content:
|
||||
combined_value.append(extracted_content)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return extarct_normal(json_data, value, combined_value)
|
||||
|
||||
|
||||
def extarct_normal(json_data, value, combined_value):
|
||||
found_subentries = check_and_collect_subentries(json_data, value, combined_value)
|
||||
if not found_subentries: #若无子条目,直接查找
|
||||
@ -116,15 +77,9 @@ def extract_specific_subentry(content, subentry_key):
|
||||
return ""
|
||||
return ""
|
||||
|
||||
def get_values_only(content):
|
||||
if isinstance(content, dict):
|
||||
# 如果内容是字典,首先将字典的值转换为字符串
|
||||
content = " / ".join(content.values())
|
||||
|
||||
# 检查并处理特殊字符
|
||||
return extract_content_after_special_chars(content)
|
||||
|
||||
def extract_content_after_special_chars(content):
|
||||
if not content:
|
||||
return content
|
||||
"""
|
||||
提取特定符号后的内容,直到遇到结束符号或内容末尾。
|
||||
"""
|
||||
@ -136,12 +91,55 @@ def extract_content_after_special_chars(content):
|
||||
return match.group(1).strip() # 去除多余空白字符
|
||||
# 如果没有找到特殊字符,返回原始内容
|
||||
return content
|
||||
|
||||
def get_values_only(content):
|
||||
if isinstance(content, dict):
|
||||
# 如果内容是字典,首先将字典的值转换为字符串
|
||||
content = " / ".join(content.values())
|
||||
# 检查并处理特殊字符
|
||||
return extract_content_after_special_chars(content)
|
||||
def standardize_brackets(value):
|
||||
"""
|
||||
将输入中的所有半角括号转换为全角括号。
|
||||
"""
|
||||
return value.replace('(', '(').replace(')', ')')
|
||||
|
||||
def process_json_with_subentries(json_data, value, combined_value):
|
||||
"""
|
||||
处理JSON数据,寻找指定的条目,考虑全角和半角括号。
|
||||
"""
|
||||
value = standardize_brackets(value) #将1.11(1)->1.11(1)
|
||||
if "(" in value and ")" in value: #存在()的情况
|
||||
first_content=get_values_only(json_data.get(value))
|
||||
if first_content:
|
||||
combined_value.append(first_content)
|
||||
return True
|
||||
base_key, subentry_key = value.split("(") #base_key:1.11 subentry_key:(1)
|
||||
subentry_key = "(" + subentry_key
|
||||
content = json_data.get(base_key.strip())
|
||||
if content:
|
||||
if isinstance(content, str):
|
||||
extracted_content = extract_specific_subentry(content, subentry_key)
|
||||
if extracted_content:
|
||||
combined_value.append(extracted_content)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return extarct_normal(json_data, value, combined_value)
|
||||
|
||||
def find_entries_in_jsons(entries, json_primary, json_secondary):
|
||||
results = {}
|
||||
for entry in entries:
|
||||
key, value = next(iter(entry.items()))
|
||||
combined_value = []
|
||||
# 先尝试在json_primary中寻找,如果找不到再在json_secondary中查找
|
||||
found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
|
||||
if not found_in_primary:
|
||||
process_json_with_subentries(json_secondary, value, combined_value)
|
||||
if combined_value:
|
||||
results[key] = "\n".join(combined_value)
|
||||
return results
|
||||
def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path):
|
||||
primary_json_data = load_json(primary_json_path)
|
||||
secondary_json_data = load_json(secondary_json_path)
|
||||
@ -151,9 +149,9 @@ def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary
|
||||
if __name__ == "__main__":
|
||||
# Hypothetical entries and file paths for testing
|
||||
# entries_with_numbers = [{'形式评审标准.投标文件签字盖章': '3.7.3(3)'}, {'形式评审标准.多标段投标': '10.1'}, {'形式评审标准.“技术暗标”': '3.7.4(5)'}, {'响应性评审标准.投标内容': '1.3.1'}, {'响应性评审标准.工期': '1.3.2'}, {'响应性评审标准.工程质量': '1.3.3'}, {'响应性评审标准.投标有效期': '3.3.1'}, {'响应性评审标准.投标保证金': '3.4.1'}, {'响应性评审标准.分包计划': '1.11'}]
|
||||
entries_with_numbers=[{'xxx': '3.4.1'}]
|
||||
primary_json_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\truncate_output.json'
|
||||
secondary_json_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\clause.json'
|
||||
entries_with_numbers=[{'xxx': '3.6.3(5)'}]
|
||||
primary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\truncate_output.json'
|
||||
secondary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\clause.json'
|
||||
|
||||
# Since this is just a test block, make sure these paths point to actual JSON files with the appropriate structure
|
||||
try:
|
||||
|
@ -5,6 +5,7 @@ from flask_app.main.json_utils import nest_json_under_key, extract_content_from_
|
||||
from flask_app.main.形式响应评审 import process_reviews
|
||||
from flask_app.main.资格评审 import process_qualification
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
||||
def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表
|
||||
@ -15,8 +16,16 @@ def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpa
|
||||
results = qianwen_long(file_id, user_query_1)
|
||||
original_dict_data = extract_content_from_json(results)
|
||||
qualification_review = original_dict_data.pop('资格评审标准', '默认值或None') #qianwen-long有关资格评审的内容
|
||||
final_qualify_json=process_qualification(qualification_review,truncate3,knowledge_name)
|
||||
form_response_dict=process_reviews(original_dict_data, knowledge_name, truncate0_jsonpath, clause_path)
|
||||
print(original_dict_data)
|
||||
with ThreadPoolExecutor() as executor:
|
||||
# 创建Future对象
|
||||
future_qualification = executor.submit(process_qualification, qualification_review, truncate3, knowledge_name)
|
||||
future_form_response = executor.submit(process_reviews, original_dict_data, knowledge_name, truncate0_jsonpath,
|
||||
clause_path)
|
||||
|
||||
# 等待执行结果
|
||||
final_qualify_json = future_qualification.result()
|
||||
form_response_dict = future_form_response.result()
|
||||
print("形式响应评审done")
|
||||
form_response_dict.update(final_qualify_json)
|
||||
return nest_json_under_key(form_response_dict,"资格审查")
|
||||
@ -24,11 +33,12 @@ def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpa
|
||||
|
||||
if __name__ == "__main__":
|
||||
output_folder = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹"
|
||||
truncate0 = os.path.join(output_folder, "zbtest20_tobidders_notice_table.pdf")
|
||||
# truncate0 = os.path.join(output_folder, "zbtest20_tobidders_notice_table.pdf")
|
||||
truncate2=os.path.join(output_folder,"zbtest20_tobidders_notice.pdf")
|
||||
knowledge_name="zbtest20"
|
||||
truncate1=os.path.join(output_folder,"zbtest20_evaluation_method.pdf")
|
||||
truncate3=os.path.join(output_folder,"zbtest20_qualification.pdf")
|
||||
clause_path = convert_clause_to_json(truncate1, output_folder)
|
||||
truncate1_jsonpath = os.path.join(output_folder, "truncate_output.json")
|
||||
res=combine_review_standards(truncate1,truncate3, knowledge_name,truncate1_jsonpath,clause_path)
|
||||
clause_path = convert_clause_to_json(truncate2, output_folder)
|
||||
truncate0_jsonpath = os.path.join(output_folder, "truncate_output.json")
|
||||
res=combine_review_standards(truncate1,truncate3, knowledge_name,truncate0_jsonpath,clause_path)
|
||||
print(res)
|
Loading…
x
Reference in New Issue
Block a user