import json from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.main.json_utils import clean_json_string def combine_technical_and_business(data, target_values1, target_values2): extracted_data = {} # 根级别存储所有数据 technical_found = False business_found = False def extract_nested(data, parent_key='', is_technical=False, is_business=False): nonlocal technical_found, business_found if isinstance(data, dict): for key, value in data.items(): current_key = f"{parent_key}.{key}" if parent_key else key # 检查是否为技术标的内容 if any(target in key for target in target_values1): #模糊匹配 if not is_technical: # 直接存储在根级别 extracted_data[key] = value technical_found = True # 标记为技术标内容并停止进一步处理这个分支 continue # 检查是否为商务标的内容 elif any(target in key for target in target_values2): if not is_business: # 存储在'商务标'分类下 if '商务标' not in extracted_data: extracted_data['商务标'] = {} extracted_data['商务标'][key] = value business_found = True # 标记为商务标内容并停止进一步处理这个分支 continue # 如果当前值是字典或列表,且不在技术或商务分类下,继续递归搜索 if isinstance(value, dict) or isinstance(value, list): extract_nested(value, current_key, is_technical, is_business) elif isinstance(data, list): for index, item in enumerate(data): extract_nested(item, f"{parent_key}[{index}]", is_technical, is_business) # 开始从顶级递归搜索 extract_nested(data) # 处理未找到匹配的情况 if not technical_found: extracted_data['技术标'] = '' if not business_found: extracted_data['商务标'] = '' return extracted_data def get_evaluation_standards(truncate_file): file_id = upload_file(truncate_file) user_query = "根据该文档中的评标办法前附表,请你列出该文件的技术标,商务标,投标报价评审标准以及它们对应的具体评分要求,若对应内容中存在其他信息,在键名如'技术标'中新增子键名'备注'存放该信息。如果评分内容不是这3个,则返回文档中给定的评分内容以及它的评分要求,都以json的格式返回结果。请不要回答有关形式、资格、响应性评审标准的内容" evaluation_res = qianwen_long(file_id, user_query) cleaned_evaluation_res = clean_json_string(evaluation_res) include = ['一包', '二包', '三包', '四包', '五包'] target_values1 = ['技术标', '技术部分', '设计', '实施', '方案'] target_values2 = ['投标报价', '商务标', '商务部分', '报价部分', '业绩', '信誉', '分值', '计算公式', '信用', '人员', '资格', '奖项', '认证', '荣誉'] updated_jsons = {} for key in cleaned_evaluation_res.keys(): if any(item in key for item in include): inner_dict = cleaned_evaluation_res[key] # 将处理后的结果存储到updated_jsons中,每个包名为键 updated_jsons[key] = combine_technical_and_business(inner_dict, target_values1, target_values2) # 将updated_jsons转换为JSON格式 evaluation_combined_res = json.dumps(updated_jsons, ensure_ascii=False, indent=4) return evaluation_combined_res def combine_technical_and_business(data, target_values1, target_values2): extracted_data = {} # 根级别存储所有数据 technical_found = False business_found = False def extract_nested(data, parent_key='', is_technical=False, is_business=False): nonlocal technical_found, business_found if isinstance(data, dict): for key, value in data.items(): current_key = f"{parent_key}.{key}" if parent_key else key # 检查是否为技术标的内容 if any(target in key for target in target_values1): if not is_technical: # 直接存储在根级别 extracted_data[key] = value technical_found = True # 标记为技术标内容并停止进一步处理这个分支 continue # 检查是否为商务标的内容 elif any(target in key for target in target_values2): if not is_business: # 存储在'商务标'分类下 if '商务标' not in extracted_data: extracted_data['商务标'] = {} extracted_data['商务标'][key] = value business_found = True # 标记为商务标内容并停止进一步处理这个分支 continue # 如果当前值是字典或列表,且不在技术或商务分类下,继续递归搜索 if isinstance(value, dict) or isinstance(value, list): extract_nested(value, current_key, is_technical, is_business) elif isinstance(data, list): for index, item in enumerate(data): extract_nested(item, f"{parent_key}[{index}]", is_technical, is_business) # 开始从顶级递归搜索 extract_nested(data) # 处理未找到匹配的情况 if not technical_found: extracted_data['技术标'] = '' if not business_found: extracted_data['商务标'] = '' return extracted_data if __name__ == "__main__": truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\招标文件(107国道)_evaluation_method.pdf" res=get_evaluation_standards(truncate_file) cleaned_res=clean_json_string(res).get("")