zbparse/flask_app/货物标/评分标准提取.py

90 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- encoding:utf-8 -*-
import json
from flask_app.main.通义千问long import upload_file, qianwen_long
from flask_app.main.json_utils import clean_json_string
def combine_technical_and_business(data, target_values):
extracted_data = {} # 根级别存储所有数据
technical_found = False
business_found = False
def extract_nested(data, parent_key='', is_technical=False, is_business=False):
nonlocal technical_found, business_found
if isinstance(data, dict):
for key, value in data.items():
current_key = f"{parent_key}.{key}" if parent_key else key
# 检查是否为技术标的内容
if any(target in key for target in target_values):
if not is_technical:
extracted_data[key] = value
technical_found = True
continue
# 默认其他所有内容都归为商务标
else:
if not is_business:
if '商务标' not in extracted_data:
extracted_data['商务标'] = {}
extracted_data['商务标'][key] = value
business_found = True
continue
if isinstance(value, dict) or isinstance(value, list):
extract_nested(value, current_key, is_technical, is_business)
elif isinstance(data, list):
for index, item in enumerate(data):
extract_nested(item, f"{parent_key}[{index}]", is_technical, is_business)
extract_nested(data)
if not technical_found:
extracted_data['技术标'] = ''
if not business_found:
extracted_data['商务标'] = ''
return extracted_data
#如果外键直接是'评分因素',应该这个函数可以提取其中内容。
def process_data_based_on_key(data, word):
# 获取字典的键列表
keys = list(data.keys())
# 检查键的数量是否为1并且该键是否包含指定的词
if len(keys) == 1 and word in keys[0]:
# 返回内层的字典
return data[keys[0]]
# 如果条件不满足,则返回原始字典
return data
def get_evaluation_standards(truncate_file):
file_id = upload_file(truncate_file)
user_query = "根据该文档中的评标办法前附表或者评分标准表,请你列出该文件的技术标,商务标,投标报价评审标准以及它们对应的具体评分要求,外层键名分别为'技术标','商务标','投标报价'。如果评分内容不是这3个则返回文档中给定的评分内容以及它的评分要求都以json的格式返回结果如果该采购活动有多个包则最外层键名为对应的包名。请不要回答有关资格审查的内容"
evaluation_res = qianwen_long(file_id, user_query)
cleaned_evaluation_res = clean_json_string(evaluation_res)
result_data = process_data_based_on_key(cleaned_evaluation_res, '评分')
include = ['一包', '二包', '三包', '四包', '五包']
target_values = ['技术', '设计', '实施']
updated_jsons = {}
# 检查是否有外层键匹配include列表
if any(key for key in result_data if any(included in key for included in include)):
# 有匹配的项,处理这些项
for key in result_data:
if any(item in key for item in include):
inner_dict = result_data[key]
updated_jsons[key] = combine_technical_and_business(inner_dict, target_values)
else:
# 没有匹配的项,对整个字典运行
updated_jsons = combine_technical_and_business(result_data, target_values)
# 将updated_jsons转换为JSON格式
evaluation_combined_res = json.dumps(updated_jsons, ensure_ascii=False, indent=4)
return evaluation_combined_res
if __name__ == "__main__":
truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\竞争性谈判文件(3)_evaluation_method.pdf"
res=get_evaluation_standards(truncate_file)
print(res)