zbparse/flask_app/货物标/评分标准提取.py

# -*- encoding:utf-8 -*-
import json

from flask_app.main.通义千问long import upload_file, qianwen_long
from flask_app.main.json_utils import clean_json_string
def combine_technical_and_business(data, target_values):
  extracted_data = {}  # 根级别存储所有数据
  technical_found = False
  business_found = False

  def extract_nested(data, parent_key='', is_technical=False, is_business=False):
    nonlocal technical_found, business_found
    if isinstance(data, dict):
      for key, value in data.items():
        current_key = f"{parent_key}.{key}" if parent_key else key

        # 检查是否为技术标的内容
        if any(target in key for target in target_values):
          if not is_technical:
            extracted_data[key] = value
            technical_found = True
            continue

        # 默认其他所有内容都归为商务标
        else:
          if not is_business:
            if '商务标' not in extracted_data:
              extracted_data['商务标'] = {}
            extracted_data['商务标'][key] = value
            business_found = True
            continue

        if isinstance(value, dict) or isinstance(value, list):
          extract_nested(value, current_key, is_technical, is_business)

    elif isinstance(data, list):
      for index, item in enumerate(data):
        extract_nested(item, f"{parent_key}[{index}]", is_technical, is_business)

  extract_nested(data)

  if not technical_found:
    extracted_data['技术标'] = ''
  if not business_found:
    extracted_data['商务标'] = ''

  return extracted_data

#如果外键直接是'评分因素'，应该这个函数可以提取其中内容。
def process_data_based_on_key(data, word):
    # 获取字典的键列表
    keys = list(data.keys())
    # 检查键的数量是否为1并且该键是否包含指定的词
    if len(keys) == 1 and word in keys[0]:
        # 返回内层的字典
        return data[keys[0]]
    # 如果条件不满足，则返回原始字典
    return data

def get_evaluation_standards(truncate_file):
    file_id = upload_file(truncate_file)
    user_query = "根据该文档中的评标办法前附表或者评分标准表，请你列出该文件的技术标，商务标，投标报价评审标准以及它们对应的具体评分要求，外层键名分别为'技术标','商务标','投标报价'。如果评分内容不是这3个，则返回文档中给定的评分内容以及它的评分要求，都以json的格式返回结果，如果该采购活动有多个包，则最外层键名为对应的包名。请不要回答有关资格审查的内容"
    evaluation_res = qianwen_long(file_id, user_query)
    cleaned_evaluation_res = clean_json_string(evaluation_res)
    result_data = process_data_based_on_key(cleaned_evaluation_res, '评分')
    include = ['一包', '二包', '三包', '四包', '五包']
    target_values = ['技术', '设计', '实施']
    updated_jsons = {}

    # 检查是否有外层键匹配include列表
    if any(key for key in result_data if any(included in key for included in include)):
        # 有匹配的项，处理这些项
        for key in result_data:
            if any(item in key for item in include):
                inner_dict = result_data[key]
                updated_jsons[key] = combine_technical_and_business(inner_dict, target_values)
    else:
        # 没有匹配的项，对整个字典运行
        updated_jsons = combine_technical_and_business(result_data, target_values)

    # 将updated_jsons转换为JSON格式
    evaluation_combined_res = json.dumps(updated_jsons, ensure_ascii=False, indent=4)
    return evaluation_combined_res


if __name__ == "__main__":
    truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\竞争性谈判文件(3)_evaluation_method.pdf"
    res=get_evaluation_standards(truncate_file)
    print(res)