zbparse/flask_app/old_version/资格评审old_old.py

# -*- encoding:utf-8 -*-
# 资格审查中，首先排除'联合体投标'和'不得存在的情况',有'符合'等的，加入matching_keys列表，否则保留原字典
import json
import re
from flask_app.general.json_utils import clean_json_string, combine_json_results, add_keys_to_json
from flask_app.general.多线程提问 import multi_threading, read_questions_from_file
from flask_app.general.通义千问long import upload_file


def merge_dictionaries_under_common_key(dicts, common_key):
    # 初始化一个空字典来保存合并的结果
    merged_dict = {common_key: {}}

    # 遍历列表中的每个字典
    for d in dicts:
        if common_key in d:
            # 使用字典解包来合并字典
            merged_dict[common_key].update(d[common_key])
        else:
            print(f"资格评审： Warning: Dictionary does not contain the key {common_key}")

    return merged_dict


def generate_qual_question(matching_keys_list):  # 这里假设资质、信誉与人员要求 要不都有、要不都没
    if not matching_keys_list:
        return []
    else:
        questions = []
        # 将列表转换为单引号包裹的格式，并用逗号和空格分隔
        formatted_keys = ["'{}'".format(key) for key in matching_keys_list]
        # 将格式化后的关键词列表连接成字符串
        keys_string = "、".join(formatted_keys)
        # 构造完整的问题语句
        question1 = (f"该招标文件中资格评审的内容是怎样的？具体内容包括{keys_string}，"
                     "请你以json格式返回结果，外层键名为'资格评审'，嵌套键名为具体的字段，请你忠于原文，回答要求完整准确，不要擅自总结、删减。")
        question2 = "该招标文件中资格评审中有关人员资格的要求是怎样的？请依次给出所需的岗位、需要的数量、资格要求、需要提交的证明材料（如具体的社保证明、技能证书等，若有时间要求请注明时间范围）、在岗要求、备注，若相关要求不存在，则无需返回该键值对。请你以json格式返回结果，外层键名为'资格评审'，嵌套键名为具体的要求，请你忠于原文，回答要求完整准确，不要擅自总结、删减。"
        questions.append(question1)
        questions.append(question2)
        return questions


def extract_matching_keys_qual(dict_data):
    # 定义包含模式的列表
    include_patterns = [re.compile(r"第.*?章"), re.compile(r"第.*?款"), re.compile(r"第.*?项"), re.compile(r"第.*?目"),
                        re.compile(r"符合")]
    # 初始化列表，用于存储匹配的键
    matching_keys = []
    non_matching_keys = {}
    # 定义排除项
    excludes = ['联合体', '禁止投标', '不存在', '不得存在', '资格', '管理机构', '负责人', '人员']  # 联合体、禁止投标的情况、人员需要额外问
    # 遍历字典中的每个键值对
    for key, value in dict_data.items():
        if "附件" in key and "资质" in key:
            return [], []  # 如果同时出现，立即返回空列表
        # 检查值是否符合任何一个包含模式
        if any(pattern.search(value) for pattern in include_patterns):
            # 如果值符合包含模式，再检查键是否包含任何排除项
            if not any(ex in key for ex in excludes):
                # 如果键不包含排除项，则添加到匹配键列表中
                matching_keys.append(key)
        else:
            # value中有实质的内容，不需要额外问。
            non_matching_keys[key] = value

    return matching_keys, non_matching_keys  # matching:['资质条件', '财务状况']   non_matching_keys:{'营业执照': '具备有效的营业执照', '施工机械设备': '具备完善的施工设备'}


# 获取联合体投标的要求，由于它不一定在资格审查表中，故调用rag
def get_consortium_dict(knowledge_name):
    qualify_list = []
    consortium_questions = [
        "该招标文件对于联合体投标的要求是怎样的，请按json格式给我提供信息，外层键名为'联合体投标要求（如有）'，嵌套键名为你对该要求的总结，而键值需要完全与原文保持一致，不要擅自总结、删减。"]
    results1 = multi_threading(consortium_questions, knowledge_name)
    for _, response in results1:  # _占位，代表ques;response[0]也是ques;response[1]是ans
        try:
            if response and len(response) > 1:  # 检查response存在且有至少两个元素
                qualify_list.append(response[1])
            else:
                print(f"资格评审： Warning: Missing or incomplete response data for query index {_}.")
        except Exception as e:
            print(f"资格评审： Error processing response for query index {_}: {e}")
    consortium_dict = combine_json_results(qualify_list)
    return consortium_dict


def get_all_dict(knowledge_name):
    qualification_review_file_path = 'flask_app/static/提示词/资格评审.txt'
    questions = read_questions_from_file(qualification_review_file_path)
    qualification_list = []
    res1 = multi_threading(questions, knowledge_name)
    for _, response in res1:  # _占位，代表ques;response[0]也是ques;response[1]是ans
        try:
            if response and len(response) > 1:  # 检查response存在且有至少两个元素
                qualification_list.append(response[1])
            else:
                print(f"资格评审： Warning: Missing or incomplete response data for query index {_}.")
        except Exception as e:
            print(f"资格评审： Error processing response for query index {_}: {e}")
    qualification_combined_res = combine_json_results(qualification_list)
    return {'资格评审': qualification_combined_res}


def process_qualification(qualification_review, truncate3, knowledge_name):
    # 资格评审
    matching_keys_list, non_matching_dict = extract_matching_keys_qual(
        qualification_review)  # matching_keys_list:['资质条件', '财务状况']   non_matching_dict:{'营业执照': '具备有效的营业执照', '施工机械设备': '具备完善的施工设备'}
    if not matching_keys_list:
        if not non_matching_dict:  # 古法提取
            if truncate3 != "":  # 提取到资格审查附件的情况
                print("资格评审： type1")
                matching_keys_list = ["资质条件", "财务要求", "业绩要求", "信誉要求", "其他要求"]
                ques = generate_qual_question(matching_keys_list)
                file_id2 = upload_file(truncate3)
                results2 = multi_threading(ques, "", file_id2, 2)  # 资格评审表，调用qianwen-long
                res_list = []
                if not results2:
                    print("资格评审： 调用大模型未成功获取资格评审文件中的要求！")
                else:
                    # 打印结果
                    for question, response in results2:
                        res_list.append(clean_json_string(response))  # 都是问资格评审表得出的
                if res_list:
                    merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审')
                    consortium_dict = get_consortium_dict(knowledge_name)
                    updated_qualify_json = add_keys_to_json(merged_dict, consortium_dict)
                    return updated_qualify_json
                else:
                    print("资格评审： 无法获取大模型结果，返回空值")
                    return {"资格评审": ""}
            else:
                print("资格评审： type2")
                return get_all_dict(knowledge_name) or {"资格评审": ""}

        else:       # 此时要求全部写在评分办法前附表中，不需要额外提取。
            print("资格评审： type3")
            new_non_matching_json = {'资格评审': non_matching_dict}
            substring = '联合体'
            found_key = any(substring in key for key in non_matching_dict.keys())  # 没有联合体投标，则需生成，防止重复
            if not found_key:
                consortium_dict = get_consortium_dict(knowledge_name)
                final_qualify_json = add_keys_to_json(new_non_matching_json, consortium_dict)
                return final_qualify_json
            else:
                return new_non_matching_json or {"资格评审": ""}

    elif matching_keys_list and truncate3 == "":  # 这种情况是评分办法前附表中有要求，但是没有正确截取到'资格审查表'
        print("资格评审： type4")
        final_qualification = get_all_dict(knowledge_name)
        final_qualify_json = add_keys_to_json(final_qualification, non_matching_dict)
        return final_qualify_json or {"资格评审": ""}
    else:  # 大多数情况
        print("资格评审： type5")
        user_querys = generate_qual_question(matching_keys_list)  # 生成提问->‘附件：资格审查’
        file_id2 = upload_file(truncate3)
        results2 = multi_threading(user_querys, "", file_id2, 2)  # 资格评审表，调用qianwen-long
        res_list = []
        if not results2:
            print("资格评审： 调用大模型未成功获取资格评审文件中的要求！")
            return {"资格评审": ""}
        else:
            # 打印结果
            for question, response in results2:
                cleaned_res = clean_json_string(response)
                res_list.append(cleaned_res)  # 都是问资格评审表得出的
        merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审')
        consortium_dict = get_consortium_dict(knowledge_name)
        updated_qualify_json = add_keys_to_json(merged_dict, consortium_dict)  # 合并字典
        final_qualify_json = add_keys_to_json(updated_qualify_json, non_matching_dict)
        return final_qualify_json or {"资格评审": ""}


if __name__ == "__main__":
    # qualification_review={'营业执照': '具备有效的营业执照', '资质等级': '具备建设行政主管部门颁发的市政公用工程监理乙级及以上资质或房屋建筑工程监理乙级及以上资质或工程监理综合资质证书', '财务状况': '投标人须提供近三年（2018 年、2019 年、2020 年）完', '类似项目业绩': '投标人近 5 年（2017 年至今）须具有至少一项投资概算在4000 万元及以上房屋建筑工程或市政公用工程监理业绩，同时提供中标通知书及监理服务合同，项目规模及项目时间认定以监理服务合同内信息为准', '信誉': '根据《关于在招标投标活动中对失信被执行', '主要人员': '监理工程师：至少提供 1 名具有房屋建筑工程专','不存在禁止投标的':'不存在第二章“投标人须知”第 1.4.3 项规定的情形','联合体投标':'hha'}
    qualification_review = {'营业执照': '具备有效的营业执照', '安全生产许可证': '具备有效的安全生产许可证',
                            '资质等级': '符合第二章“投标人须知”规定', '财务状况': '符合第二章“投标人须知”规定'}
    truncate3 = "C:\\Users\\Administrator\\Desktop\\招标文件\\new_test\\zbtest2_qualification.pdf"
    knowledge_name = "招标解析word13"
    res = process_qualification(qualification_review, truncate3, knowledge_name)
    print(json.dumps(res, ensure_ascii=False, indent=4))

# 该招标文件中资格评审关于财务状况的内容是怎样的？请你以json格式返回结果，外层键名为'财务状况'，请你忠于原文，回答要求完整准确，不要擅自总结、删减，且不要回答诸如'见投标人须知前附表'或'见第x.x项规定'这类无实质性内容的回答。