zbparse/flask_app/货物标/资格审查main.py

# -*- encoding:utf-8 -*-
import json
import os
import re
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.general.多线程提问 import multi_threading
from flask_app.general.json_utils import clean_json_string
from flask_app.货物标.提取json货物标版 import convert_clause_to_json
import copy
import concurrent.futures
# 这个字典可能有嵌套，你需要遍历里面的键名，对键名作判断，而不是键值，具体是这样的：如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除，重新组织成一个字典格式的数据，你可以考虑用字符串列表来保持部分平级的数据
# 对于同级的键，如果数量>1且键名都统一，那么将键名去掉，用列表保持它们的键值

def is_numeric_key(key):
    # 这个正则表达式匹配由数字、点、括号中的数字或单个字母（小写或大写）组成的字符串，
    # 字母后跟数字，或数字后跟字母，单个字母后跟点，但不能是字母-数字-字母的组合
    pattern = r'^[\d.]+$|^\(\d+\)$|^（\d+）$|^[a-zA-Z]$|^[a-zA-Z]\d+$|^\d+[a-zA-Z]$|^[a-zA-Z]\.$'
    return re.match(pattern, key) is not None


def contains_number_or_index(key, value):
    # 判断值是否是数字或数字字符串
    is_number = isinstance(value, (int, float)) or (isinstance(value, str) and value.isdigit())
    # 判断键是否包含 "序号"
    contains_index = '序号' in key
    # 判断值中是否包含数字
    contains_digit = isinstance(value, str) and re.search(r'\d+', value)
    # 判断值中是否包含中文字符
    contains_chinese = isinstance(value, str) and re.search(r'[\u4e00-\u9fff]', value)
    # 如果值中包含数字但也有中文字符，则保留（返回 False）
    if contains_digit and contains_chinese:
        return False
    # 如果值是数字或包含数字，且不包含中文字符，或者键包含 "序号"，返回 True
    return is_number or contains_index or contains_digit


# 对于同一个字典中，可能存在若干键值对，若它们的键值都是""或者"/" 你就将它们的键值删去，它们的键名用字符串列表保存
# 如果键名是"序号"或者键值中全是数字，删去序号
def preprocess_dict(data):
    if isinstance(data, dict):
        if len(data) > 1:
            # 检查是否所有值都是 "" 或 "/"
            if all(v == "" or v == "/" or (isinstance(v, list) and not v) for v in data.values()):
                return list(data.keys())
            else:
                processed = {}
                for k, v in data.items():
                    if not contains_number_or_index(k, v):
                        processed_v = preprocess_dict(v)
                        if processed_v != "":  # 只添加非空值
                            processed[k] = processed_v
                return processed
        else:
            return {k: preprocess_dict(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [preprocess_dict(item) for item in data]
    else:
        return data


def process_dict(data):
    """
    递归处理字典，将符合条件的键值对进行转换。

    如果键是数字或特定格式的字符串，则将其值放入 'items' 列表中并排序。
    对于非数字键，如果对应的值是列表且列表中只有一个元素，则将其展平为单个元素。

    Args:
        data (dict): 输入的字典数据。

    Returns:
        dict 或 list 或 原始数据类型: 处理后的数据结构。
    """
    if not isinstance(data, dict):
        return data

    result = {}
    numeric_keys = []
    non_numeric_keys = {}

    # 分类键为数字键和非数字键
    for key, value in data.items():
        if is_numeric_key(key):
            numeric_keys.append((key, value))
        else:
            non_numeric_keys[key] = value

    # 处理数字键，将其值递归处理后放入 'items' 列表中
    if numeric_keys:
        def sort_key(item):
            key = item[0]
            if re.match(r'^\d+$', key):  # 纯整数
                return (int(key),)
            elif re.match(r'^\d+\.\d+$', key):  # 单层小数点
                return (float(key),)
            else:  # 多层序号，按字符串处理
                try:
                    return tuple(int(part) for part in key.split('.') if part.isdigit())
                except ValueError:
                    # 处理无法转换的部分，例如返回一个默认值或记录错误
                    return ()
        # 按键排序，确保顺序一致
        numeric_keys_sorted = sorted(numeric_keys, key=sort_key)
        result['items'] = [process_dict(item[1]) for item in numeric_keys_sorted]

    # 处理非数字键
    for key, value in non_numeric_keys.items():
        if isinstance(value, list):
            processed_list = []
            for item in value:
                if isinstance(item, dict):
                    # 处理字典中只有一个键值对的情况
                    if len(item) == 1:
                        processed_item = process_dict(list(item.values())[0])
                    else:
                        processed_item = process_dict(item)
                else:
                    processed_item = process_dict(item)

                # 如果处理后的项是只包含一个元素的列表，则展平它
                if isinstance(processed_item, list) and len(processed_item) == 1:
                    processed_item = processed_item[0]

                processed_list.append(processed_item)

            # 新增逻辑：如果 processed_list 只有一个元素，则将其展平为单个元素
            if len(processed_list) == 1:
                result[key] = processed_list[0]
            else:
                result[key] = processed_list
        else:
            # 如果值不是列表，直接递归处理
            result[key] = process_dict(value)

    # 如果结果只有一个键 'items'，则直接返回 'items' 列表
    if len(result) == 1 and 'items' in result:
        return result['items']

    # 检查如果所有键对应的值都是空列表，则将键名转换成列表项
    if all(isinstance(v, list) and not v for v in result.values()):
        return list(result.keys())

    return result


# 查找引用的序号
def find_chapter_clause_references(data, parent_key=""):
    exclude_list = ["格式要求"]
    result = []
    # 正则匹配"第x章"或"第x款"
    chapter_clause_pattern = re.compile(r'第[一1]+[章款]|公告|邀请')
    # 如果数据不是字典，则直接返回空列表
    if not isinstance(data, dict):
        return result
    # 遍历字典中的键值对
    for key, value in data.items():
        # 生成当前的完整键名
        full_key = f"{parent_key}.{key}" if parent_key else key

        # 检查是否应排除该键或值
        if any(exclude_item in full_key for exclude_item in exclude_list) or \
                (isinstance(value, str) and any(exclude_item in value for exclude_item in exclude_list)):
            continue  # 如果在排除项中，跳过处理

        if isinstance(value, dict):
            # 如果值是字典，递归调用函数
            result.extend(find_chapter_clause_references(value, full_key))
        elif isinstance(value, list):
            # 如果值是列表，遍历列表中的元素
            for index, item in enumerate(value):
                if isinstance(item, dict):
                    # 生成新的键路径，包括列表索引
                    new_parent_key = f"{full_key}[{index}]"
                    result.extend(find_chapter_clause_references(item, new_parent_key))
        elif isinstance(value, str):
            # 如果值是字符串，检查是否匹配"第x章"或"第x款"
            if chapter_clause_pattern.search(value):   #符合|应满足|详见
                if "符合"in value or "满足" in value or "详见" in value:
                    result.append({full_key: value})

    return result


def preprocess_value(value):
    # 使用正则表达式查找"第X章"或"第X款"
    chapter_match = re.search(r'第(.+?)章', value)
    clause_match = re.search(r'第(.+?)款', value)

    if chapter_match or clause_match:
        # 以逗号、句号、问号、感叹号为分隔符
        separators = r'[，。？！,\?!]'

        # 分隔符检测函数，确保括号成对闭合时才用作分隔符
        def is_separator(ch, count):
            return count['('] == count[')'] and count['（'] == count['）'] and re.match(separators, ch)

        parts = []
        current_part = []
        count = {'(': 0, ')': 0, '（': 0, '）': 0}

        for ch in value:
            if ch in count:
                count[ch] += 1
            if is_separator(ch, count):
                parts.append("".join(current_part).strip())
                current_part = []
            else:
                current_part.append(ch)

        if current_part:
            parts.append("".join(current_part).strip())

        # 查找包含章节或条款的部分
        target_part = next((part for part in parts if '章' in part or '款' in part), None)

        if target_part:
            # 删除开头的"符合"或"应满足"
            target_part = re.sub(r'^(符合|应满足|详见)\s*', '', target_part.strip())
            return target_part

    # 如果没有找到特定章节或条款，返回原始值
    return value

#[{'资格性审查.资格要求': '符合本采购文件第一章第二款要求，并提供合格有效的证明材料。'}]
def generate_questions(input_list):
    template = (
        "关于{modified_key},{value}的内容是怎样的？请按json格式给我提供信息，"
        "键名为'{original_key}'，而键值需要完全与原文保持一致，不要擅自总结、删减，"
        "如果存在未知信息，请在对应键值处填'未知'。"
    )

    questions = []
    for input_dict in input_list:
        for original_key, value in input_dict.items():
            # 将第一个 '.' 替换为 '中的'
            if '.' in original_key:
                modified_key = original_key.replace('.', '中的', 1)
            else:
                modified_key = original_key  # 如果没有 '.', 保持不变
            processed_value = preprocess_value(value)  # 假设这是你需要的预处理函数
            question = template.format(modified_key=modified_key, original_key=original_key, value=processed_value)
            questions.append(question)
    return questions


"""
eg:
response_list = [
    {
        "person.name": "Bob",
        "person.address.city": "Los Angeles"
    },
    {
        "company.location": "Austin",
        "person.age": 35
    }
]
"""


# 用新数据更新原有字典
def update_json_data(original_data, response_list):
    def recursive_update(data, key, value):
        # 处理点分隔的键，递归定位并更新嵌套字典
        keys = key.split('.')
        for k in keys[:-1]:
            data = data.setdefault(k, {})
        if isinstance(value, dict) and isinstance(data.get(keys[-1], None), dict):
            data[keys[-1]] = {**data.get(keys[-1], {}), **value}
        else:
            data[keys[-1]] = value

    for response_dict in response_list:
        for key, value in response_dict.items():
            recursive_update(original_data, key, value)
    return original_data


def process_match_keys(match_keys, clause_path_file):
    """
    处理 match_keys，根据其中的数字或中文数字提取相应的条款内容，并附加到原始值后面。

    参数：
    - match_keys (list): 包含键值对的列表。
    - clause_path_file (str): clause_path的JSON文件路径。

    返回：
    - list: 更新后的match_keys列表。
    """
    # 定义数字到中文数字的映射，扩展到'十'
    digit_map = {'1': '一', '2': '二', '3': '三', '4': '四', '5': '五', '6': '六', '7': '七', '8': '八', '9': '九',
                 '10': '十'}
    # 定义中文数字列表
    chinese_numerals = ['一', '二', '三', '四', '五', '六', '七', '八', '九', '十']

    # 编译一个正则表达式，用于查找中文数字后面跟着的不是'章'或'部分'的字符
    # 这个模式会捕获中文数字和紧随其后的一个字符
    pattern = re.compile(r'([一二三四五六七八九十]+)(?!章|部分)(.)')

    # 读取clause_path的内容
    try:
        with open(clause_path_file, 'r', encoding='utf-8') as file:
            clause_path = json.load(file)
    except FileNotFoundError:
        print(f"文件未找到: {clause_path_file}")
        return match_keys
    except json.JSONDecodeError:
        print(f"文件内容不是有效的JSON格式: {clause_path_file}")
        return match_keys

    for item in match_keys:
        for key, value in item.items():
            # 将match_keys中的数字1-10转换为对应的中文数字
            for digit, chinese in digit_map.items():
                value = re.sub(r'{}'.format(digit), chinese, value)

            # 查找值中所有匹配的中文数字
            matches = pattern.findall(value)
            # 存储需要附加的条款内容，避免重复
            clauses_to_append = []
            for match in matches:
                numeral = match[0]
                # 检查提取的中文数字是否在定义的列表中
                if numeral in chinese_numerals:
                    # 在clause_path的键中查找包含该中文数字的键
                    for clause_key in clause_path.keys():
                        if numeral in clause_key:
                            clause_value = clause_path[clause_key]
                            if clause_value not in clauses_to_append:
                                clauses_to_append.append(clause_value)
            if clauses_to_append:
                # 将找到的条款内容用换行符连接
                appended_text = '\n'.join(clauses_to_append)
                # 更新当前项的值，添加换行和附加内容
                item[key] = value + '\n' + appended_text
    return match_keys


# 处理如'符合本采购文件第一章第二款要求'的情况，跳转到指定地方摘取内容
def process_additional_queries(combined_res, match_keys, output_folder, notice_path, invalid_path):
    # print(match_keys)
    """
    处理额外的查询并更新结果。

    Args:
        combined_res: 初始的组合结果。
        match_keys: 匹配的章节或条款引用。 [{'资格性审查.资格要求': '符合本采购文件第一章第二款要求，并提供合格有效的证明材料。'}]
        output_folder: 输出文件夹路径。
        notice_path: 通知文件路径。
        knowledge_name: 知识库的名称。

    Returns:
        dict: 更新后的最终结果。
    """
    # 对于空的notice_path的情况，此处做了异常处理
    clause2_path = convert_clause_to_json(notice_path, output_folder, 2)
    new_match_keys = copy.deepcopy(match_keys)
    updated_match_keys = process_match_keys(new_match_keys, clause2_path)
    if updated_match_keys != match_keys:
        form_response_dict = update_json_data(combined_res, updated_match_keys)
    else:
        # 招标公告没找到内容，继续问大模型
        ques = generate_questions(match_keys)
        file_id = upload_file(invalid_path)
        qianwen_results = multi_threading(ques, "", file_id, 2)  # 1代表使用百炼rag 2代表使用qianwen-long
        updated_match_keys = [clean_json_string(res) for _, res in qianwen_results] if qianwen_results else []
        form_response_dict = update_json_data(combined_res, updated_match_keys)

    # 添加额外的处理步骤
    final_result = {"资格审查": form_response_dict}
    return final_result


def combine_qualification_review(invalid_path, qualification_path, notice_path):
    detailed_res = {}
    together_ask = False
    # 初始化无效文件ID
    invalid_file_id = None
    first_res = {}

    if qualification_path:
        # 上传资格文件并获取文件ID
        qualification_file_id = upload_file(qualification_path)
        # 定义第一个查询，用于检查资格性审查和符合性审查是否存在
        first_query = """
            该文档中是否有关于资格性审查标准的具体内容,是否有关于符合性审查标准的具体内容?若两者都有，你还需要判断资格性审查和符合性审查的内容是否在同一张表中且没有明确的条款名称区分二者？请以json格式给出回答,外键分别为'资格性审查'和'符合性审查'和'无法区分',键值仅限于'是','否'。

            要求与指南：
            "无法区分"的键值默认是'否'，仅当该文档同时满足以下条件时，它的键值才是'是'。
            1. 既有资格性审查内容，也有符合性审查内容时
            2. 它们的内容在同一张表格中
            3. 表中没有两个合并单元格内容为'资格性审查'和'符合性审查'类似的表述，只有'资格性审查和符合性审查'的合并表述。
            
            以下为示例表格1，
            | 序号 | 资格性检查和符合性检查内容 |
            | -- | -- |
            | 1 | 供应商应具备《政府采购法》第二十二条规定的条件，提供相关材料。 |
            | 1 | 1）法人或者其他组织的营业执照等证明文件，自然人的身份证明； |
            | 1 | 2）财务状况报告，依法缴纳税收和社会保障资金的声明函； |
            | 1 | 3）具备履行合同所必需的设备和专业技术能力的证明材料； |
            | 2 | 供应商应提供经营场所标识标牌、经营场地及经营设备、产品等相关图片； |
            | 3 | 按优惠率进行报价，其优惠率应不低于市场价格5％； |
            对应输出示例如下:
            {
                "资格性审查":"是",
                "符合性审查":"是",
                "无法区分":"是"
            }
            
            以下为示例表格2，
            | 条款 | 评审因素 | 评审标准 | 备注 |
            | -- | -- | -- | -- |
            | 资格评审标准 | 满足《中华人民共和国政府采购法》第二十二条规定 | 1、具有独立承担民事责任的能力；2、具有良好的商业信誉和健全的财务会计制度；3、具有履行合同所必需的设备和专业技术能力；4、有依法缴纳税收和社会保障资金的良好记录；5、参加政府采购活动前三年内，在经营活动中没有重大违法记录；6、法律、行政法规规定的其他条件。 |  |
            | 资格评审标准 | 单位负责人 | 提交“未与单位负责人为同一人或者存在直接控股、管理关系的它投标人，参加同一合同项下的政府采购活动声明函”。 |  |
            | 资格评审标准 | 提供服务 | 提交“未为本采购项目提供整体设计、规范编制或者项目管理、理、检测等服务的声明函”。 |  |
            | 符合评审标准 | 递交的响应文件 | 符合竞争性磋商文件第一部分竞争性磋商公告“六、其它补充事宜”第1、2 条要求。 |  |
            | 符合评审标准 | 文件签章 | 响应性文件签字和盖章齐全。 |  |
            | 符合评审标准 | 磋商有效期 | 满足磋商有效期。 |  |
            对应输出示例如下：
            {
                "资格性审查":"是",
                "符合性审查":"是",
                "无法区分":"否"
            }
            """

        # 执行第一个查询并清洗返回的JSON字符串
        print("call first_query")
        first_res = clean_json_string(qianwen_long(qualification_file_id, first_query))

        # 判断是否存在资格性和符合性审查
        zige_file_id = qualification_file_id if first_res.get("资格性审查") == "是" else None
        fuhe_file_id = qualification_file_id if first_res.get("符合性审查") == "是" else None
        if first_res.get("资格性审查") == '是' and first_res.get("符合性审查") == '是' and first_res.get(
                "无法区分") == "是":
            together_ask = True  # 设置是否需要联合查询

        # 如果需要，上传无效文件
        if zige_file_id is None or fuhe_file_id is None:
            if invalid_file_id is None:
                invalid_file_id = upload_file(invalid_path)
            if zige_file_id is None:
                zige_file_id = invalid_file_id
            if fuhe_file_id is None:
                fuhe_file_id = invalid_file_id

    else:
        # 如果 qualification_path 为空，直接使用无效文件
        zige_file_id = fuhe_file_id = upload_file(invalid_path)

    # 根据 together_ask 决定第二组查询的内容
    if together_ask:
        second_query = [
            {
                "key": "资格性和符合性审查",
                "query": '''
                        问题：该招标文件中规定的资格性和符合性审查标准是什么的？
                        输出要求：
                            1.请以json格式给出，外层为'资格性和符合性审查'，最内层的值需要用列表包裹。
                            2.一层嵌套内的键需要总结分类为某类评审因素或是直接使用原文中的评审因素字段、标题。
                            3.你的回答要与原文完全一致，若审查标准在表格中，那么单元格内的内容基本都要涵盖，不要遗漏，作为键值中的字符串列表项。
                            4.最大细分为二层嵌套即可。
                        输出示例：
                            {
                                "资格性和符合性审查": { #一层嵌套
                                    "某类评审因素": [ #二层嵌套
                                        "因素1",
                                        "因素2"
                                    ]
                                    ...
                                }
                            }
                    '''
            }
        ]
    else:
        second_query = [
            {
                "key": "资格性审查",
                "query": '''
                    问题：该招标文件中规定的资格性审查标准是什么的？
                    输出要求：
                        1.请以json格式给出，外层为'资格性审查'，最内层的值需要用列表包裹。
                        2.一层嵌套内的键需要总结分类为某类评审因素。
                        3.你的回答要与原文完全一致，不要回答有关符合性审查的内容。
                        4.仔细检查你所选取的标准，若发现这些标准实际上是在描述不允许出现的资格性审查情况，则将外键替换为'资格性审查(以下情况不得出现)'，并将这些标准写入其中。
                        5.最大细分为二层嵌套即可。
                    输出示例1：
                        {
                            "资格性审查": { #一层嵌套
                                "某类评审因素": [ #二层嵌套
                                    "因素1",
                                    "因素2"
                                ]
                                ...
                            }
                        }
                    输出示例2：
                        {
                            "资格性审查(以下情况不得出现)": { #若发现文中出现均为反向标准，用像该示例一样的处理
                                "某类不允许的评审因素": [ #二层嵌套
                                    "因素1",
                                    "因素2"
                                ]
                                ...
                            }
                        }
                '''
            },
            {
                "key": "符合性审查",
                "query": '''
                    问题：该招标文件中规定的符合性审查标准是什么的？
                    输出要求：
                        1.请以json格式给出，外层为'符合性审查'，最内层的值需要用列表包裹。
                        2.一层嵌套内的键需要总结分类为某类评审因素或是直接使用原文中的评审因素字段、标题。
                        3.你的回答要与原文完全一致，也不要回答有关资格性审查的内容。
                        4.仔细检查你所选取的标准，若发现这些标准实际上是在描述不允许出现的符合性审查情况，则将外键替换为'符合性审查(以下情况不得出现)'，并将这些标准写入其中。
                        5.最大细分为二层嵌套即可。
                    输出示例1：
                        {
                            "符合性审查": { #一层嵌套
                                "某类评审因素": [ #二层嵌套
                                    "因素1",
                                    "因素2"
                                ]
                                ...
                            }
                        }
                    输出示例2：
                        {
                            "符合性审查(以下情况不得出现)": { #若发现文中出现均为反向标准，用像该示例一样的处理
                                "某类不允许的评审因素": [ #二层嵌套
                                    "因素1",
                                    "因素2"
                                ]
                                ...
                            }
                        }
                '''
            }
        ]

    # 定义任务函数
    def process_second_query(key, query, file_id):
        print(f"call {key}")
        try:
            res = qianwen_long(file_id, query)
            cleaned_res = clean_json_string(res)

            # 初始化结果和外键
            result = None
            selected_key = key  # 默认外键为 key

            if key in cleaned_res:
                result = cleaned_res[key]
            else:
                # 尝试匹配带 "(以下情况不得出现)" 后缀的键
                alternate_key = f"{key}(以下情况不得出现)"
                if alternate_key in cleaned_res:
                    result = cleaned_res[alternate_key]
                    selected_key = alternate_key  # 外键需要切换为 alternate_key

            # 如果结果仍为空，返回默认值
            if result is None:
                result = "未找到相关内容"

            # 返回匹配到的外键以及对应的结果
            return selected_key, result
        except Exception as e:
            print(f"执行查询 '{key}' 时出错: {e}")
            return key, "查询失败"

    def process_notice(notice_path):
        print("call notice_path")
        try:
            # 上传通知文件并获取文件ID
            file_id1 = upload_file(notice_path)

            # 定义用户查询，提取申请人资格要求
            user_query1 = """
                第一章招标公告（投标邀请书）中说明的申请人资格要求是怎样的？请以json格式给出回答，外键为'申请人资格要求'，键值为字符串列表，其中每个字符串对应原文中的一条要求，你的回答与原文内容一致，不要擅自总结删减。输出格式示例如下：
                {
                    "申请人资格要求":[
                        "1.满足《中华人民共和国政府采购法》第二十二条规定；",
                        "1.1 法人或者其他组织的营业执照等证明文件，如供应商是自然人的提供身份证明材料；",
                        "2.未被列入“信用中国”网站(www.creditchina.gov.cn)信用服务栏失信被执行人、重大税收违法案件当事人名单；"
                    ]
                }
                """
            # 执行查询并清洗结果
            res1 = clean_json_string(qianwen_long(file_id1, user_query1))
            # 提取申请人资格要求
            requirements = res1.get("申请人资格要求", "未找到相关内容")
            return "申请人资格要求", requirements
        except Exception as e:
            print(f"处理申请人资格要求时出错: {e}")
            return "申请人资格要求", "处理失败"

    # 初始化 ThreadPoolExecutor
    with ThreadPoolExecutor(max_workers=3) as executor:
        future_to_key = {}

        # 提交第二组查询
        for query_info in second_query:
            key = query_info["key"]
            query = query_info["query"]
            if key == "资格性和符合性审查":
                current_file_id = qualification_file_id  # 联合查询使用资格文件ID
            elif key == "资格性审查":
                current_file_id = zige_file_id
            elif key == "符合性审查":
                current_file_id = fuhe_file_id
            else:
                current_file_id = zige_file_id  # 默认使用资格文件ID
            # print(f"Submitting query for key: {key}, file_id: {current_file_id}")
            future = executor.submit(process_second_query, key, query, current_file_id)
            future_to_key[future] = key

        # 有条件地提交通知处理
        if notice_path:
            future = executor.submit(process_notice, notice_path)
            future_to_key[future] = "申请人资格要求"
        else:
            future = executor.submit(process_notice, invalid_path)
            future_to_key[future] = "申请人资格要求"

        # 收集结果（按完成顺序）
        for future in as_completed(future_to_key):
            key, result = future.result()
            detailed_res[key] = result

    # 定义所需的顺序
    if together_ask:
        desired_order = [
            "申请人资格要求",
            "资格性和符合性审查"
        ]
    else:
        desired_order = [
            "申请人资格要求",
            ["资格性审查", "资格性审查(以下情况不得出现)"],
            ["符合性审查", "符合性审查(以下情况不得出现)"]
        ]

    # 创建一个新的有序字典
    ordered_res = {}
    for item in desired_order:
        if isinstance(item, list):
            for key in item:
                if key in detailed_res:
                    ordered_res[key] = detailed_res[key]
                    break  # 只添加第一个匹配的键，互斥
        else:
            if item in detailed_res:
                ordered_res[item] = detailed_res[item]

    # 将重新排序后的字典传递给处理函数
    processed_data = process_dict(preprocess_dict(ordered_res))

    # 最终处理结果，例如打印或保存
    return {"资格审查": processed_data}


# def combine_qualification_review(invalid_path, output_folder, qualification_path, notice_path):
#     DEFAULT_QUALIFICATION_REVIEW = {
#         "资格审查": {
#             "资格审查": "",
#             "符合性审查": ""
#         }
#     }
#
#     def process_file(file_path, invalid_path):
#         file_id = upload_file(file_path)
#         first_query = """
#         该文档中是否有关于资格性审查标准的具体内容,是否有关于符合性审查标准的具体内容?请以json格式给出回答,外键分别为'资格性审查'和'符合性审查',键值仅限于'是','否',输出格式示例如下:
#         {
#             "资格性审查":"是",
#             "符合性审查":"是"
#         }
#         """
#         qianwen_ans = clean_json_string(qianwen_long(file_id, first_query))
#         user_queries = [
#             {
#                 "key": "资格性审查",
#                 "query": "该招标文件中规定的资格性审查标准是怎样的？请以json格式给出，外层为'资格性审查'，你的回答要与原文完全一致，不可擅自总结删减，也不要回答有关符合性性审查的内容。"
#             },
#             {
#                 "key": "符合性审查",
#                 "query": "该招标文件中规定的符合性审查标准是怎样的？请以json格式给出，外层为'符合性审查'，你的回答要与原文完全一致，不可擅自总结删减，也不要回答有关资格性审查的内容。"
#             }
#         ]
#         combined_res = {}
#         file_id2 = None  # 延迟上传 invalid_path
#         def process_single_query(query_info):
#             nonlocal file_id2
#             key = query_info["key"]
#             query = query_info["query"]
#             # 根据键值决定使用哪个 file_id
#             if qianwen_ans.get(key) == "否":
#                 print("no")
#                 if not file_id2:
#                     file_id2 = upload_file(invalid_path)
#                 current_file_id = file_id2
#             else:
#                 current_file_id = file_id
#
#             # 调用大模型获取回答
#             ans = qianwen_long(current_file_id, query)
#             cleaned_data = clean_json_string(ans)
#             processed = process_dict(preprocess_dict(cleaned_data))
#             return processed
#
#         # 使用线程池并行处理查询
#         with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
#             futures = [executor.submit(process_single_query, q) for q in user_queries]
#             for future in concurrent.futures.as_completed(futures):
#                 result = future.result()
#                 combined_res.update(result)
#         return combined_res
#
#     try:
#         if not qualification_path:
#             file_to_process = invalid_path
#         else:
#             file_to_process = qualification_path
#
#         combined_res = process_file(file_to_process,invalid_path)
#         match_keys = find_chapter_clause_references(combined_res)
#
#         if not match_keys:
#             return {"资格审查": combined_res}
#
#         return process_additional_queries(combined_res, match_keys, output_folder, notice_path,invalid_path)   #还要跳转到第一章
#
#     except Exception as e:
#         print(f"Error in combine_qualification_review: {e}")
#         return DEFAULT_QUALIFICATION_REVIEW.copy()


# 整合基础信息核心代码
# [{'资格性审查.资格要求': '符合本采购文件第一章第二款要求，并提供合格有效的证明材料'}, {'资格性审查.没有重大违法记录的书面声明': '是否提交参加政府采购活动前三年内在经营活动中没有重大违法记录的书面承诺或声明（格式要求详见本项目采购文件第六章相关格式要求）'}]
if __name__ == "__main__":
    start_time=time.time()
    # qualification_path="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\6.2定版视频会议磋商文件_qualification2.pdf"
    # output_folder = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89"
    output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3"
    # qualification_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_qualification1.pdf"
    # qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf"
    qualification_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_qualification1.pdf"
    # notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf"
    # notice_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_notice.pdf"
    notice_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_notice.pdf"
    # knowledge_name = "6.2视频会议docx"
    # invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
    # invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile.pdf"
    invalid_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_invalid.pdf"
    res = combine_qualification_review(invalid_path, qualification_path, notice_path)
    print(json.dumps(res, ensure_ascii=False, indent=4))
    end_time=time.time()
    print("耗时："+str(end_time-start_time))
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								# -*- encoding:utf-8 -*-
-.23测试分段

											
										
										
											2024-09-23 15:49:30 +08:00
+								import json
-.22代码结构优化

											
										
										
											2024-10-22 21:02:54 +08:00
+								import os
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								import re
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								import time
 								from concurrent.futures import ThreadPoolExecutor, as_completed
-.5货物标截取优化

											
										
										
											2024-11-05 16:29:32 +08:00
+								from flask_app.general.通义千问long import upload_file, qianwen_long
-.22代码结构优化

											
										
										
											2024-10-22 10:06:22 +08:00
+								from flask_app.general.多线程提问 import multi_threading
 								from flask_app.general.json_utils import clean_json_string
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								from flask_app.货物标.提取json货物标版 import convert_clause_to_json
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								import copy
-.5货物标截取优化

											
										
										
											2024-11-05 16:29:32 +08:00
+								import concurrent.futures
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								# 这个字典可能有嵌套，你需要遍历里面的键名，对键名作判断，而不是键值，具体是这样的：如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除，重新组织成一个字典格式的数据，你可以考虑用字符串列表来保持部分平级的数据
 								# 对于同级的键，如果数量>1且键名都统一，那么将键名去掉，用列表保持它们的键值
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
 								def is_numeric_key(key):
 								    # 这个正则表达式匹配由数字、点、括号中的数字或单个字母（小写或大写）组成的字符串，
 								    # 字母后跟数字，或数字后跟字母，单个字母后跟点，但不能是字母-数字-字母的组合
 								    pattern = r'^[\d.]+$|^\(\d+\)$|^（\d+）$|^[a-zA-Z]$|^[a-zA-Z]\d+$|^\d+[a-zA-Z]$|^[a-zA-Z]\.$'
 								    return re.match(pattern, key) is not None
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
-.30健壮性优化

											
										
										
											2024-10-30 18:08:46 +08:00
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								def contains_number_or_index(key, value):
 								    # 判断值是否是数字或数字字符串
 								    is_number = isinstance(value, (int, float)) or (isinstance(value, str) and value.isdigit())
 								    # 判断键是否包含 "序号"
 								    contains_index = '序号' in key
 								    # 判断值中是否包含数字
 								    contains_digit = isinstance(value, str) and re.search(r'\d+', value)
 								    # 判断值中是否包含中文字符
 								    contains_chinese = isinstance(value, str) and re.search(r'[\u4e00-\u9fff]', value)
 								    # 如果值中包含数字但也有中文字符，则保留（返回 False）
 								    if contains_digit and contains_chinese:
 								        return False
 								    # 如果值是数字或包含数字，且不包含中文字符，或者键包含 "序号"，返回 True
 								    return is_number or contains_index or contains_digit
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
 								# 对于同一个字典中，可能存在若干键值对，若它们的键值都是""或者"/" 你就将它们的键值删去，它们的键名用字符串列表保存
 								# 如果键名是"序号"或者键值中全是数字，删去序号
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								def preprocess_dict(data):
 								    if isinstance(data, dict):
 								        if len(data) > 1:
 								            # 检查是否所有值都是 "" 或 "/"
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								            if all(v == "" or v == "/" or (isinstance(v, list) and not v) for v in data.values()):
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								                return list(data.keys())
 								            else:
 								                processed = {}
 								                for k, v in data.items():
 								                    if not contains_number_or_index(k, v):
 								                        processed_v = preprocess_dict(v)
 								                        if processed_v != "":  # 只添加非空值
 								                            processed[k] = processed_v
 								                return processed
 								        else:
 								            return {k: preprocess_dict(v) for k, v in data.items()}
 								    elif isinstance(data, list):
 								        return [preprocess_dict(item) for item in data]
 								    else:
 								        return data
-.29投标人须知提取指定内容，结构化处理

											
										
										
											2024-09-29 18:01:55 +08:00
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								def process_dict(data):
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    """
 								    递归处理字典，将符合条件的键值对进行转换。
 								    如果键是数字或特定格式的字符串，则将其值放入 'items' 列表中并排序。
 								    对于非数字键，如果对应的值是列表且列表中只有一个元素，则将其展平为单个元素。
 								    Args:
 								        data (dict): 输入的字典数据。
 								    Returns:
 								        dict 或 list 或 原始数据类型: 处理后的数据结构。
 								    """
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								    if not isinstance(data, dict):
 								        return data
 								    result = {}
 								    numeric_keys = []
 								    non_numeric_keys = {}
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    # 分类键为数字键和非数字键
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								    for key, value in data.items():
 								        if is_numeric_key(key):
 								            numeric_keys.append((key, value))
 								        else:
 								            non_numeric_keys[key] = value
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    # 处理数字键，将其值递归处理后放入 'items' 列表中
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								    if numeric_keys:
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								        def sort_key(item):
 								            key = item[0]
 								            if re.match(r'^\d+$', key):  # 纯整数
 								                return (int(key),)
 								            elif re.match(r'^\d+\.\d+$', key):  # 单层小数点
 								                return (float(key),)
 								            else:  # 多层序号，按字符串处理
-.4

											
										
										
											2024-11-04 10:52:23 +08:00
+								                try:
 								                    return tuple(int(part) for part in key.split('.') if part.isdigit())
 								                except ValueError:
 								                    # 处理无法转换的部分，例如返回一个默认值或记录错误
 								                    return ()
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								        # 按键排序，确保顺序一致
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								        numeric_keys_sorted = sorted(numeric_keys, key=sort_key)
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								        result['items'] = [process_dict(item[1]) for item in numeric_keys_sorted]
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    # 处理非数字键
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								    for key, value in non_numeric_keys.items():
-.25 资格审查还差跳转

											
										
										
											2024-09-25 18:03:09 +08:00
+								        if isinstance(value, list):
 								            processed_list = []
 								            for item in value:
 								                if isinstance(item, dict):
 								                    # 处理字典中只有一个键值对的情况
 								                    if len(item) == 1:
 								                        processed_item = process_dict(list(item.values())[0])
 								                    else:
 								                        processed_item = process_dict(item)
 								                else:
 								                    processed_item = process_dict(item)
 								                # 如果处理后的项是只包含一个元素的列表，则展平它
 								                if isinstance(processed_item, list) and len(processed_item) == 1:
 								                    processed_item = processed_item[0]
 								                processed_list.append(processed_item)
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								            # 新增逻辑：如果 processed_list 只有一个元素，则将其展平为单个元素
 								            if len(processed_list) == 1:
 								                result[key] = processed_list[0]
 								            else:
 								                result[key] = processed_list
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								        else:
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								            # 如果值不是列表，直接递归处理
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								            result[key] = process_dict(value)
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    # 如果结果只有一个键 'items'，则直接返回 'items' 列表
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
+								    if len(result) == 1 and 'items' in result:
 								        return result['items']
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
 								    # 检查如果所有键对应的值都是空列表，则将键名转换成列表项
-.29投标人须知提取指定内容，结构化处理

											
										
										
											2024-09-29 18:01:55 +08:00
+								    if all(isinstance(v, list) and not v for v in result.values()):
 								        return list(result.keys())
-.23测试分段

											
										
										
											2024-09-23 17:44:34 +08:00
 								    return result
-.26 分段解析完全版

											
										
										
											2024-09-26 13:43:47 +08:00
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
 								# 查找引用的序号
-.26 分段解析完全版

											
										
										
											2024-09-26 13:43:47 +08:00
+								def find_chapter_clause_references(data, parent_key=""):
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    exclude_list = ["格式要求"]
-.26 分段解析完全版

											
										
										
											2024-09-26 13:43:47 +08:00
+								    result = []
 								    # 正则匹配"第x章"或"第x款"
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								    chapter_clause_pattern = re.compile(r'第[一1]+[章款]|公告|邀请')
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    # 如果数据不是字典，则直接返回空列表
 								    if not isinstance(data, dict):
 								        return result
-.26 分段解析完全版

											
										
										
											2024-09-26 13:43:47 +08:00
+								    # 遍历字典中的键值对
 								    for key, value in data.items():
 								        # 生成当前的完整键名
 								        full_key = f"{parent_key}.{key}" if parent_key else key
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								        # 检查是否应排除该键或值
 								        if any(exclude_item in full_key for exclude_item in exclude_list) or \
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								                (isinstance(value, str) and any(exclude_item in value for exclude_item in exclude_list)):
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								            continue  # 如果在排除项中，跳过处理
-.26 分段解析完全版

											
										
										
											2024-09-26 13:43:47 +08:00
+								        if isinstance(value, dict):
 								            # 如果值是字典，递归调用函数
 								            result.extend(find_chapter_clause_references(value, full_key))
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								        elif isinstance(value, list):
 								            # 如果值是列表，遍历列表中的元素
 								            for index, item in enumerate(value):
 								                if isinstance(item, dict):
 								                    # 生成新的键路径，包括列表索引
 								                    new_parent_key = f"{full_key}[{index}]"
 								                    result.extend(find_chapter_clause_references(item, new_parent_key))
-.26 分段解析完全版

											
										
										
											2024-09-26 13:43:47 +08:00
+								        elif isinstance(value, str):
 								            # 如果值是字符串，检查是否匹配"第x章"或"第x款"
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								            if chapter_clause_pattern.search(value):   #符合|应满足|详见
 								                if "符合"in value or "满足" in value or "详见" in value:
 								                    result.append({full_key: value})
-.26 分段解析完全版

											
										
										
											2024-09-26 13:43:47 +08:00
 								    return result
-.26 分段解析完全版

											
										
										
											2024-09-26 14:08:34 +08:00
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								def preprocess_value(value):
 								    # 使用正则表达式查找"第X章"或"第X款"
 								    chapter_match = re.search(r'第(.+?)章', value)
 								    clause_match = re.search(r'第(.+?)款', value)
 								    if chapter_match or clause_match:
 								        # 以逗号、句号、问号、感叹号为分隔符
 								        separators = r'[，。？！,\?!]'
 								        # 分隔符检测函数，确保括号成对闭合时才用作分隔符
 								        def is_separator(ch, count):
 								            return count['('] == count[')'] and count['（'] == count['）'] and re.match(separators, ch)
 								        parts = []
 								        current_part = []
 								        count = {'(': 0, ')': 0, '（': 0, '）': 0}
 								        for ch in value:
 								            if ch in count:
 								                count[ch] += 1
 								            if is_separator(ch, count):
 								                parts.append("".join(current_part).strip())
 								                current_part = []
 								            else:
 								                current_part.append(ch)
 								        if current_part:
 								            parts.append("".join(current_part).strip())
 								        # 查找包含章节或条款的部分
 								        target_part = next((part for part in parts if '章' in part or '款' in part), None)
 								        if target_part:
 								            # 删除开头的"符合"或"应满足"
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								            target_part = re.sub(r'^(符合|应满足|详见)\s*', '', target_part.strip())
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								            return target_part
 								    # 如果没有找到特定章节或条款，返回原始值
 								    return value
-.30健壮性优化

											
										
										
											2024-10-30 18:08:46 +08:00
+								#[{'资格性审查.资格要求': '符合本采购文件第一章第二款要求，并提供合格有效的证明材料。'}]
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								def generate_questions(input_list):
 								    template = (
-.15货物标完整版

											
										
										
											2024-10-15 20:57:58 +08:00
+								        "关于{modified_key},{value}的内容是怎样的？请按json格式给我提供信息，"
 								        "键名为'{original_key}'，而键值需要完全与原文保持一致，不要擅自总结、删减，"
 								        "如果存在未知信息，请在对应键值处填'未知'。"
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								    )
 								    questions = []
 								    for input_dict in input_list:
-.15货物标完整版

											
										
										
											2024-10-15 20:57:58 +08:00
+								        for original_key, value in input_dict.items():
 								            # 将第一个 '.' 替换为 '中的'
 								            if '.' in original_key:
 								                modified_key = original_key.replace('.', '中的', 1)
 								            else:
 								                modified_key = original_key  # 如果没有 '.', 保持不变
 								            processed_value = preprocess_value(value)  # 假设这是你需要的预处理函数
 								            question = template.format(modified_key=modified_key, original_key=original_key, value=processed_value)
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								            questions.append(question)
 								    return questions
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
 								"""
 								eg:
 								response_list = [
 								    {
 								        "person.name": "Bob",
 								        "person.address.city": "Los Angeles"
 								    },
 								    {
 								        "company.location": "Austin",
 								        "person.age": 35
 								    }
 								]
 								"""
 								# 用新数据更新原有字典
 								def update_json_data(original_data, response_list):
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								    def recursive_update(data, key, value):
 								        # 处理点分隔的键，递归定位并更新嵌套字典
 								        keys = key.split('.')
 								        for k in keys[:-1]:
 								            data = data.setdefault(k, {})
 								        if isinstance(value, dict) and isinstance(data.get(keys[-1], None), dict):
 								            data[keys[-1]] = {**data.get(keys[-1], {}), **value}
 								        else:
 								            data[keys[-1]] = value
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
-.26 分段解析完全版

											
										
										
											2024-09-26 18:06:23 +08:00
+								    for response_dict in response_list:
 								        for key, value in response_dict.items():
 								            recursive_update(original_data, key, value)
 								    return original_data
-.26 分段解析完全版

											
										
										
											2024-09-26 14:08:34 +08:00
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
 								def process_match_keys(match_keys, clause_path_file):
 								    """
 								    处理 match_keys，根据其中的数字或中文数字提取相应的条款内容，并附加到原始值后面。
 								    参数：
 								    - match_keys (list): 包含键值对的列表。
 								    - clause_path_file (str): clause_path的JSON文件路径。
 								    返回：
 								    - list: 更新后的match_keys列表。
 								    """
 								    # 定义数字到中文数字的映射，扩展到'十'
-.30健壮性优化

											
										
										
											2024-10-30 18:08:46 +08:00
+								    digit_map = {'1': '一', '2': '二', '3': '三', '4': '四', '5': '五', '6': '六', '7': '七', '8': '八', '9': '九',
 								                 '10': '十'}
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    # 定义中文数字列表
 								    chinese_numerals = ['一', '二', '三', '四', '五', '六', '七', '八', '九', '十']
 								    # 编译一个正则表达式，用于查找中文数字后面跟着的不是'章'或'部分'的字符
 								    # 这个模式会捕获中文数字和紧随其后的一个字符
 								    pattern = re.compile(r'([一二三四五六七八九十]+)(?!章|部分)(.)')
 								    # 读取clause_path的内容
 								    try:
 								        with open(clause_path_file, 'r', encoding='utf-8') as file:
 								            clause_path = json.load(file)
 								    except FileNotFoundError:
 								        print(f"文件未找到: {clause_path_file}")
 								        return match_keys
 								    except json.JSONDecodeError:
 								        print(f"文件内容不是有效的JSON格式: {clause_path_file}")
 								        return match_keys
 								    for item in match_keys:
 								        for key, value in item.items():
 								            # 将match_keys中的数字1-10转换为对应的中文数字
 								            for digit, chinese in digit_map.items():
 								                value = re.sub(r'{}'.format(digit), chinese, value)
 								            # 查找值中所有匹配的中文数字
 								            matches = pattern.findall(value)
 								            # 存储需要附加的条款内容，避免重复
 								            clauses_to_append = []
 								            for match in matches:
 								                numeral = match[0]
 								                # 检查提取的中文数字是否在定义的列表中
 								                if numeral in chinese_numerals:
 								                    # 在clause_path的键中查找包含该中文数字的键
 								                    for clause_key in clause_path.keys():
 								                        if numeral in clause_key:
 								                            clause_value = clause_path[clause_key]
 								                            if clause_value not in clauses_to_append:
 								                                clauses_to_append.append(clause_value)
 								            if clauses_to_append:
 								                # 将找到的条款内容用换行符连接
 								                appended_text = '\n'.join(clauses_to_append)
 								                # 更新当前项的值，添加换行和附加内容
 								                item[key] = value + '\n' + appended_text
 								    return match_keys
-.30健壮性优化

											
										
										
											2024-10-30 18:08:46 +08:00
+								# 处理如'符合本采购文件第一章第二款要求'的情况，跳转到指定地方摘取内容
 								def process_additional_queries(combined_res, match_keys, output_folder, notice_path, invalid_path):
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								    # print(match_keys)
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    """
 								    处理额外的查询并更新结果。
 								    Args:
 								        combined_res: 初始的组合结果。
 								        match_keys: 匹配的章节或条款引用。 [{'资格性审查.资格要求': '符合本采购文件第一章第二款要求，并提供合格有效的证明材料。'}]
-.14货物标解析

											
										
										
											2024-10-14 10:52:31 +08:00
+								        output_folder: 输出文件夹路径。
 								        notice_path: 通知文件路径。
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								        knowledge_name: 知识库的名称。
 								    Returns:
 								        dict: 更新后的最终结果。
 								    """
-.30健壮性优化

											
										
										
											2024-10-30 18:08:46 +08:00
+								    # 对于空的notice_path的情况，此处做了异常处理
-.14货物标解析

											
										
										
											2024-10-14 10:52:31 +08:00
+								    clause2_path = convert_clause_to_json(notice_path, output_folder, 2)
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								    new_match_keys = copy.deepcopy(match_keys)
 								    updated_match_keys = process_match_keys(new_match_keys, clause2_path)
-.14货物标解析

											
										
										
											2024-10-14 10:52:31 +08:00
+								    if updated_match_keys != match_keys:
 								        form_response_dict = update_json_data(combined_res, updated_match_keys)
 								    else:
-.15货物标完整版

											
										
										
											2024-10-15 20:57:58 +08:00
+								        # 招标公告没找到内容，继续问大模型
-.14货物标解析

											
										
										
											2024-10-14 10:52:31 +08:00
+								        ques = generate_questions(match_keys)
-.30健壮性优化

											
										
										
											2024-10-30 18:08:46 +08:00
+								        file_id = upload_file(invalid_path)
-.15货物标完整版

											
										
										
											2024-10-15 20:57:58 +08:00
+								        qianwen_results = multi_threading(ques, "", file_id, 2)  # 1代表使用百炼rag 2代表使用qianwen-long
 								        updated_match_keys = [clean_json_string(res) for _, res in qianwen_results] if qianwen_results else []
-.14货物标解析

											
										
										
											2024-10-14 10:52:31 +08:00
+								        form_response_dict = update_json_data(combined_res, updated_match_keys)
 								    # 添加额外的处理步骤
 								    final_result = {"资格审查": form_response_dict}
 								    return final_result
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
-.30健壮性优化

											
										
										
											2024-10-30 18:08:46 +08:00
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								def combine_qualification_review(invalid_path, qualification_path, notice_path):
 								    detailed_res = {}
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								    together_ask = False
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    # 初始化无效文件ID
 								    invalid_file_id = None
 								    first_res = {}
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    if qualification_path:
 								        # 上传资格文件并获取文件ID
 								        qualification_file_id = upload_file(qualification_path)
 								        # 定义第一个查询，用于检查资格性审查和符合性审查是否存在
-.5货物标截取优化

											
										
										
											2024-11-05 16:29:32 +08:00
+								        first_query = """
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								            该文档中是否有关于资格性审查标准的具体内容,是否有关于符合性审查标准的具体内容?若两者都有，你还需要判断资格性审查和符合性审查的内容是否在同一张表中且没有明确的条款名称区分二者？请以json格式给出回答,外键分别为'资格性审查'和'符合性审查'和'无法区分',键值仅限于'是','否'。
 								            要求与指南：
 								            "无法区分"的键值默认是'否'，仅当该文档同时满足以下条件时，它的键值才是'是'。
 . 既有资格性审查内容，也有符合性审查内容时
 . 它们的内容在同一张表格中
 . 表中没有两个合并单元格内容为'资格性审查'和'符合性审查'类似的表述，只有'资格性审查和符合性审查'的合并表述。
 								            以下为示例表格1，
 								            | 序号 | 资格性检查和符合性检查内容 |
 								            | -- | -- |
 								            | 1 | 供应商应具备《政府采购法》第二十二条规定的条件，提供相关材料。 |
 								            | 1 | 1）法人或者其他组织的营业执照等证明文件，自然人的身份证明； |
 								            | 1 | 2）财务状况报告，依法缴纳税收和社会保障资金的声明函； |
 								            | 1 | 3）具备履行合同所必需的设备和专业技术能力的证明材料； |
 								            | 2 | 供应商应提供经营场所标识标牌、经营场地及经营设备、产品等相关图片； |
 								            | 3 | 按优惠率进行报价，其优惠率应不低于市场价格5％； |
 								            对应输出示例如下:
-.5货物标截取优化

											
										
										
											2024-11-05 16:29:32 +08:00
+								            {
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								                "资格性审查":"是",
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								                "符合性审查":"是",
 								                "无法区分":"是"
 								            }
 								            以下为示例表格2，
 								            | 条款 | 评审因素 | 评审标准 | 备注 |
 								            | -- | -- | -- | -- |
 								            | 资格评审标准 | 满足《中华人民共和国政府采购法》第二十二条规定 | 1、具有独立承担民事责任的能力；2、具有良好的商业信誉和健全的财务会计制度；3、具有履行合同所必需的设备和专业技术能力；4、有依法缴纳税收和社会保障资金的良好记录；5、参加政府采购活动前三年内，在经营活动中没有重大违法记录；6、法律、行政法规规定的其他条件。 |  |
 								            | 资格评审标准 | 单位负责人 | 提交“未与单位负责人为同一人或者存在直接控股、管理关系的它投标人，参加同一合同项下的政府采购活动声明函”。 |  |
 								            | 资格评审标准 | 提供服务 | 提交“未为本采购项目提供整体设计、规范编制或者项目管理、理、检测等服务的声明函”。 |  |
 								            | 符合评审标准 | 递交的响应文件 | 符合竞争性磋商文件第一部分竞争性磋商公告“六、其它补充事宜”第1、2 条要求。 |  |
 								            | 符合评审标准 | 文件签章 | 响应性文件签字和盖章齐全。 |  |
 								            | 符合评审标准 | 磋商有效期 | 满足磋商有效期。 |  |
 								            对应输出示例如下：
 								            {
 								                "资格性审查":"是",
 								                "符合性审查":"是",
 								                "无法区分":"否"
-.5货物标截取优化

											
										
										
											2024-11-05 16:29:32 +08:00
+								            }
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								            """
-.22代码结构优化

											
										
										
											2024-10-22 21:02:54 +08:00
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								        # 执行第一个查询并清洗返回的JSON字符串
 								        print("call first_query")
 								        first_res = clean_json_string(qianwen_long(qualification_file_id, first_query))
-.17 小解析货物标

											
										
										
											2024-10-17 15:33:58 +08:00
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								        # 判断是否存在资格性和符合性审查
 								        zige_file_id = qualification_file_id if first_res.get("资格性审查") == "是" else None
 								        fuhe_file_id = qualification_file_id if first_res.get("符合性审查") == "是" else None
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								        if first_res.get("资格性审查") == '是' and first_res.get("符合性审查") == '是' and first_res.get(
 								                "无法区分") == "是":
 								            together_ask = True  # 设置是否需要联合查询
-.17 小解析货物标

											
										
										
											2024-10-17 15:33:58 +08:00
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								        # 如果需要，上传无效文件
 								        if zige_file_id is None or fuhe_file_id is None:
 								            if invalid_file_id is None:
 								                invalid_file_id = upload_file(invalid_path)
 								            if zige_file_id is None:
 								                zige_file_id = invalid_file_id
 								            if fuhe_file_id is None:
 								                fuhe_file_id = invalid_file_id
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    else:
 								        # 如果 qualification_path 为空，直接使用无效文件
 								        zige_file_id = fuhe_file_id = upload_file(invalid_path)
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								    # 根据 together_ask 决定第二组查询的内容
 								    if together_ask:
 								        second_query = [
 								            {
 								                "key": "资格性和符合性审查",
 								                "query": '''
 								                        问题：该招标文件中规定的资格性和符合性审查标准是什么的？
 								                        输出要求：
 .请以json格式给出，外层为'资格性和符合性审查'，最内层的值需要用列表包裹。
 .一层嵌套内的键需要总结分类为某类评审因素或是直接使用原文中的评审因素字段、标题。
-.9 截取pdf逻辑优化

											
										
										
											2024-12-10 17:32:08 +08:00
+.你的回答要与原文完全一致，若审查标准在表格中，那么单元格内的内容基本都要涵盖，不要遗漏，作为键值中的字符串列表项。
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+.最大细分为二层嵌套即可。
 								                        输出示例：
 								                            {
 								                                "资格性和符合性审查": { #一层嵌套
 								                                    "某类评审因素": [ #二层嵌套
 								                                        "因素1",
 								                                        "因素2"
 								                                    ]
 								                                    ...
 								                                }
 								                            }
 								                    '''
 								            }
 								        ]
 								    else:
 								        second_query = [
 								            {
 								                "key": "资格性审查",
 								                "query": '''
 								                    问题：该招标文件中规定的资格性审查标准是什么的？
 								                    输出要求：
 .请以json格式给出，外层为'资格性审查'，最内层的值需要用列表包裹。
 .一层嵌套内的键需要总结分类为某类评审因素。
 .你的回答要与原文完全一致，不要回答有关符合性审查的内容。
 .仔细检查你所选取的标准，若发现这些标准实际上是在描述不允许出现的资格性审查情况，则将外键替换为'资格性审查(以下情况不得出现)'，并将这些标准写入其中。
 .最大细分为二层嵌套即可。
 								                    输出示例1：
 								                        {
 								                            "资格性审查": { #一层嵌套
 								                                "某类评审因素": [ #二层嵌套
 								                                    "因素1",
 								                                    "因素2"
 								                                ]
 								                                ...
 								                            }
-												更改second_query提示词

											
										
										
											2024-11-19 16:11:00 +08:00
+								                        }
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								                    输出示例2：
 								                        {
 								                            "资格性审查(以下情况不得出现)": { #若发现文中出现均为反向标准，用像该示例一样的处理
 								                                "某类不允许的评审因素": [ #二层嵌套
 								                                    "因素1",
 								                                    "因素2"
 								                                ]
 								                                ...
 								                            }
-												更改second_query提示词

											
										
										
											2024-11-19 16:11:00 +08:00
+								                        }
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								                '''
 								            },
 								            {
 								                "key": "符合性审查",
 								                "query": '''
 								                    问题：该招标文件中规定的符合性审查标准是什么的？
 								                    输出要求：
 .请以json格式给出，外层为'符合性审查'，最内层的值需要用列表包裹。
 .一层嵌套内的键需要总结分类为某类评审因素或是直接使用原文中的评审因素字段、标题。
 .你的回答要与原文完全一致，也不要回答有关资格性审查的内容。
 .仔细检查你所选取的标准，若发现这些标准实际上是在描述不允许出现的符合性审查情况，则将外键替换为'符合性审查(以下情况不得出现)'，并将这些标准写入其中。
 .最大细分为二层嵌套即可。
 								                    输出示例1：
 								                        {
 								                            "符合性审查": { #一层嵌套
 								                                "某类评审因素": [ #二层嵌套
 								                                    "因素1",
 								                                    "因素2"
 								                                ]
 								                                ...
 								                            }
-												更改second_query提示词

											
										
										
											2024-11-19 16:11:00 +08:00
+								                        }
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								                    输出示例2：
 								                        {
 								                            "符合性审查(以下情况不得出现)": { #若发现文中出现均为反向标准，用像该示例一样的处理
 								                                "某类不允许的评审因素": [ #二层嵌套
 								                                    "因素1",
 								                                    "因素2"
 								                                ]
 								                                ...
 								                            }
-												更改second_query提示词

											
										
										
											2024-11-19 16:11:00 +08:00
+								                        }
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								                '''
 								            }
 								        ]
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
 								    # 定义任务函数
 								    def process_second_query(key, query, file_id):
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								        print(f"call {key}")
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								        try:
 								            res = qianwen_long(file_id, query)
 								            cleaned_res = clean_json_string(res)
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
 								            # 初始化结果和外键
 								            result = None
 								            selected_key = key  # 默认外键为 key
 								            if key in cleaned_res:
 								                result = cleaned_res[key]
 								            else:
 								                # 尝试匹配带 "(以下情况不得出现)" 后缀的键
 								                alternate_key = f"{key}(以下情况不得出现)"
 								                if alternate_key in cleaned_res:
 								                    result = cleaned_res[alternate_key]
 								                    selected_key = alternate_key  # 外键需要切换为 alternate_key
 								            # 如果结果仍为空，返回默认值
 								            if result is None:
 								                result = "未找到相关内容"
 								            # 返回匹配到的外键以及对应的结果
 								            return selected_key, result
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								        except Exception as e:
 								            print(f"执行查询 '{key}' 时出错: {e}")
 								            return key, "查询失败"
 								    def process_notice(notice_path):
 								        print("call notice_path")
 								        try:
 								            # 上传通知文件并获取文件ID
 								            file_id1 = upload_file(notice_path)
 								            # 定义用户查询，提取申请人资格要求
 								            user_query1 = """
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								                第一章招标公告（投标邀请书）中说明的申请人资格要求是怎样的？请以json格式给出回答，外键为'申请人资格要求'，键值为字符串列表，其中每个字符串对应原文中的一条要求，你的回答与原文内容一致，不要擅自总结删减。输出格式示例如下：
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								                {
 								                    "申请人资格要求":[
 								                        "1.满足《中华人民共和国政府采购法》第二十二条规定；",
 								                        "1.1 法人或者其他组织的营业执照等证明文件，如供应商是自然人的提供身份证明材料；",
 								                        "2.未被列入“信用中国”网站(www.creditchina.gov.cn)信用服务栏失信被执行人、重大税收违法案件当事人名单；"
 								                    ]
 								                }
 								                """
 								            # 执行查询并清洗结果
 								            res1 = clean_json_string(qianwen_long(file_id1, user_query1))
 								            # 提取申请人资格要求
 								            requirements = res1.get("申请人资格要求", "未找到相关内容")
 								            return "申请人资格要求", requirements
 								        except Exception as e:
 								            print(f"处理申请人资格要求时出错: {e}")
 								            return "申请人资格要求", "处理失败"
 								    # 初始化 ThreadPoolExecutor
 								    with ThreadPoolExecutor(max_workers=3) as executor:
 								        future_to_key = {}
 								        # 提交第二组查询
 								        for query_info in second_query:
 								            key = query_info["key"]
 								            query = query_info["query"]
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								            if key == "资格性和符合性审查":
 								                current_file_id = qualification_file_id  # 联合查询使用资格文件ID
 								            elif key == "资格性审查":
 								                current_file_id = zige_file_id
 								            elif key == "符合性审查":
 								                current_file_id = fuhe_file_id
 								            else:
 								                current_file_id = zige_file_id  # 默认使用资格文件ID
 								            # print(f"Submitting query for key: {key}, file_id: {current_file_id}")
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								            future = executor.submit(process_second_query, key, query, current_file_id)
 								            future_to_key[future] = key
 								        # 有条件地提交通知处理
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								        if notice_path:
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								            future = executor.submit(process_notice, notice_path)
 								            future_to_key[future] = "申请人资格要求"
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								        else:
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								            future = executor.submit(process_notice, invalid_path)
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								            future_to_key[future] = "申请人资格要求"
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
 								        # 收集结果（按完成顺序）
 								        for future in as_completed(future_to_key):
 								            key, result = future.result()
 								            detailed_res[key] = result
 								    # 定义所需的顺序
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								    if together_ask:
 								        desired_order = [
 								            "申请人资格要求",
 								            "资格性和符合性审查"
 								        ]
 								    else:
 								        desired_order = [
 								            "申请人资格要求",
 								            ["资格性审查", "资格性审查(以下情况不得出现)"],
 								            ["符合性审查", "符合性审查(以下情况不得出现)"]
 								        ]
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    # 创建一个新的有序字典
 								    ordered_res = {}
-.19 五个偏离表

											
										
										
											2024-11-19 17:29:12 +08:00
+								    for item in desired_order:
 								        if isinstance(item, list):
 								            for key in item:
 								                if key in detailed_res:
 								                    ordered_res[key] = detailed_res[key]
-.21 考虑了资格审查和符合性审查一起的情况

											
										
										
											2024-11-21 10:50:06 +08:00
+								                    break  # 只添加第一个匹配的键，互斥
-.19 五个偏离表

											
										
										
											2024-11-19 17:29:12 +08:00
+								        else:
 								            if item in detailed_res:
 								                ordered_res[item] = detailed_res[item]
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
 								    # 将重新排序后的字典传递给处理函数
 								    processed_data = process_dict(preprocess_dict(ordered_res))
 								    # 最终处理结果，例如打印或保存
-.7 资格审查货物标修改

											
										
										
											2024-11-07 16:31:57 +08:00
+								    return {"资格审查": processed_data}
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
 								# def combine_qualification_review(invalid_path, output_folder, qualification_path, notice_path):
 								#     DEFAULT_QUALIFICATION_REVIEW = {
 								#         "资格审查": {
 								#             "资格审查": "",
 								#             "符合性审查": ""
 								#         }
 								#     }
 								#
 								#     def process_file(file_path, invalid_path):
 								#         file_id = upload_file(file_path)
 								#         first_query = """
 								#         该文档中是否有关于资格性审查标准的具体内容,是否有关于符合性审查标准的具体内容?请以json格式给出回答,外键分别为'资格性审查'和'符合性审查',键值仅限于'是','否',输出格式示例如下:
 								#         {
 								#             "资格性审查":"是",
 								#             "符合性审查":"是"
 								#         }
 								#         """
 								#         qianwen_ans = clean_json_string(qianwen_long(file_id, first_query))
 								#         user_queries = [
 								#             {
 								#                 "key": "资格性审查",
 								#                 "query": "该招标文件中规定的资格性审查标准是怎样的？请以json格式给出，外层为'资格性审查'，你的回答要与原文完全一致，不可擅自总结删减，也不要回答有关符合性性审查的内容。"
 								#             },
 								#             {
 								#                 "key": "符合性审查",
 								#                 "query": "该招标文件中规定的符合性审查标准是怎样的？请以json格式给出，外层为'符合性审查'，你的回答要与原文完全一致，不可擅自总结删减，也不要回答有关资格性审查的内容。"
 								#             }
 								#         ]
 								#         combined_res = {}
 								#         file_id2 = None  # 延迟上传 invalid_path
 								#         def process_single_query(query_info):
 								#             nonlocal file_id2
 								#             key = query_info["key"]
 								#             query = query_info["query"]
 								#             # 根据键值决定使用哪个 file_id
 								#             if qianwen_ans.get(key) == "否":
 								#                 print("no")
 								#                 if not file_id2:
 								#                     file_id2 = upload_file(invalid_path)
 								#                 current_file_id = file_id2
 								#             else:
 								#                 current_file_id = file_id
 								#
 								#             # 调用大模型获取回答
 								#             ans = qianwen_long(current_file_id, query)
 								#             cleaned_data = clean_json_string(ans)
 								#             processed = process_dict(preprocess_dict(cleaned_data))
 								#             return processed
 								#
 								#         # 使用线程池并行处理查询
 								#         with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
 								#             futures = [executor.submit(process_single_query, q) for q in user_queries]
 								#             for future in concurrent.futures.as_completed(futures):
 								#                 result = future.result()
 								#                 combined_res.update(result)
 								#         return combined_res
 								#
 								#     try:
 								#         if not qualification_path:
 								#             file_to_process = invalid_path
 								#         else:
 								#             file_to_process = qualification_path
 								#
 								#         combined_res = process_file(file_to_process,invalid_path)
 								#         match_keys = find_chapter_clause_references(combined_res)
 								#
 								#         if not match_keys:
 								#             return {"资格审查": combined_res}
 								#
 								#         return process_additional_queries(combined_res, match_keys, output_folder, notice_path,invalid_path)   #还要跳转到第一章
 								#
 								#     except Exception as e:
 								#         print(f"Error in combine_qualification_review: {e}")
 								#         return DEFAULT_QUALIFICATION_REVIEW.copy()
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
 								# 整合基础信息核心代码
 								# [{'资格性审查.资格要求': '符合本采购文件第一章第二款要求，并提供合格有效的证明材料'}, {'资格性审查.没有重大违法记录的书面声明': '是否提交参加政府采购活动前三年内在经营活动中没有重大违法记录的书面承诺或声明（格式要求详见本项目采购文件第六章相关格式要求）'}]
-.23测试分段

											
										
										
											2024-09-23 15:49:30 +08:00
+								if __name__ == "__main__":
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    start_time=time.time()
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    # qualification_path="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\6.2定版视频会议磋商文件_qualification2.pdf"
-.6修复bug

											
										
										
											2024-11-06 14:07:21 +08:00
+								    # output_folder = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89"
-.15 工程标资格审查提示词重改

											
										
										
											2024-11-15 11:03:04 +08:00
+								    output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3"
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    # qualification_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_qualification1.pdf"
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								    # qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf"
-.20 修改bug

											
										
										
											2024-11-20 19:35:22 +08:00
+								    qualification_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_qualification1.pdf"
-.1适配货物标

											
										
										
											2024-11-01 17:55:26 +08:00
+								    # notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf"
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    # notice_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_notice.pdf"
-.20 修改bug

											
										
										
											2024-11-20 19:35:22 +08:00
+								    notice_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_notice.pdf"
-.15货物标完整版

											
										
										
											2024-10-15 20:57:58 +08:00
+								    # knowledge_name = "6.2视频会议docx"
-.6修复bug

											
										
										
											2024-11-06 14:07:21 +08:00
+								    # invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    # invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile.pdf"
-.20 修改bug

											
										
										
											2024-11-20 19:35:22 +08:00
+								    invalid_path=r"C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b\ztbfile_invalid.pdf"
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    res = combine_qualification_review(invalid_path, qualification_path, notice_path)
-.12

											
										
										
											2024-10-12 18:01:59 +08:00
+								    print(json.dumps(res, ensure_ascii=False, indent=4))
-.7 资格审查货物标修改

											
										
										
											2024-11-07 15:46:24 +08:00
+								    end_time=time.time()
 								    print("耗时："+str(end_time-start_time))