zbparse/flask_app/工程标/资格审查模块.py

import json
import os.path
import time

from flask_app.general.format_change import pdf2docx
from flask_app.general.json_utils import extract_content_from_json, clean_json_string
from flask_app.general.table_content_extraction import extract_tables_main
from flask_app.工程标.形式响应评审 import process_reviews
from flask_app.工程标.资格评审 import process_qualification
from flask_app.general.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor
from flask_app.货物标.资格审查main import combine_qualification_review
from flask_app.general.merge_pdfs import merge_pdfs
def process_notice(notice_path):
    print("call notice_path")
    try:
        # 上传通知文件并获取文件ID
        file_id1 = upload_file(notice_path)

        # 定义用户查询，提取申请人资格要求
        user_query1 = """
            第一章招标公告（投标邀请书）中说明的申请人资格要求是怎样的？请以json格式给出回答，外键为'申请人资格要求'，键值为字符串列表，其中每个字符串对应原文中的一条要求，你的回答与原文内容一致，不要擅自总结删减。输出格式示例如下：
            {
                "申请人资格要求":[
                    "1.满足《中华人民共和国政府采购法》第二十二条规定；",
                    "1.1 法人或者其他组织的营业执照等证明文件，如供应商是自然人的提供身份证明材料；",
                    "2.未被列入“信用中国”网站(www.creditchina.gov.cn)信用服务栏失信被执行人、重大税收违法案件当事人名单；"
                ]
            }
            """
        # 执行查询并清洗结果
        res1 = clean_json_string(qianwen_long(file_id1, user_query1))
        # 提取申请人资格要求
        requirements = res1.get("申请人资格要求", "未找到相关内容")
        return {"申请人资格要求": requirements}
    except Exception as e:
        print(f"处理申请人资格要求时出错: {e}")
        return {"申请人资格要求": "处理失败"}

#TODO:目前有个问题，如果evaluation_method既没有符合性审查又没有形式评审，就有问题
def combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
                             invalid_path, merged_baseinfo_path,notice_path):
    """
    结合评审标准，包括形式评审、响应评审、资格评审及申请人资格要求。

    参数:
        evaluation_method (str): 评标办法文件路径。
        qualification (str): 资格文件路径。
        output_folder (str): 输出文件夹路径。
        tobidders_notice_table (str): JSON截断路径。
        clause_path (str): 条款路径。
        invalid_path (str): 无效文件路径。
        merged_baseinfo_path (str): 合并基础信息路径。
        notice_path (str): 通知文件路径。

    返回:
        dict: 包含资格审查和申请人资格要求的合并结果。
    """
    # 上传评标办法前附表并获取文件ID
    file_id = upload_file(evaluation_method)  # 评标办法前附表

    first_query="""
    该文档的评标办法章节中是否说明了符合性审查标准？说明了就回答'是'，否则回答'否'，请以json格式给我返回结果，键名分别是'符合性审查'，键值仅限于'是'，'否'。注意：它与形式、响应性评审是对立的，也就是说只要文档中描述了形式、响应性评审，那么符合性审查的键值一定是'否'。以下为输出示例：
    {
        "符合性审查":"是"
    }
    """
    first_res=clean_json_string(qianwen_long(file_id,first_query))
    if first_res.get("符合性审查") == "是":
        print("call 资格审查main(货物标)")
        paths=[qualification_path,evaluation_method]
        more_qualification_path=os.path.join(output_folder,"merged_qualification.pdf")
        merge_pdfs(paths,more_qualification_path)
        final_result=combine_qualification_review(invalid_path,more_qualification_path,notice_path)
    else:
        tobidders_notice_table_docx = pdf2docx(tobidders_notice_table)  # 投标人须知前附表转docx
        truncate_jsonpath = extract_tables_main(tobidders_notice_table_docx, output_folder)  # 投标人须知前附表docx->json
        # 定义用户查询，提取形式评审标准、响应性评审标准和资格评审标准
        user_query_1 = """
            根据该文档中的评标办法前附表，请你列出该文件中的形式评审标准和响应性评审标准和资格评审标准，请以json格式返回，外层键名为'形式评审标准'和'响应性评审标准'和'资格评审标准',嵌套键名为'评审因素'中的内容，相应的键值为对应'评审标准'中的内容。
        """
        # 执行查询并提取内容
        results = qianwen_long(file_id, user_query_1)
        original_dict_data = extract_content_from_json(results)

        # 提取资格评审标准
        qualification_review = original_dict_data.pop('资格评审标准', {})  # qianwen_long有关资格评审的内容

        # 初始化 ThreadPoolExecutor，设定最多三个线程以处理三个任务
        with ThreadPoolExecutor(max_workers=3) as executor:
            # 提交任务并建立任务名到Future的映射
            futures = {
                "资格审查": executor.submit(
                    process_qualification,
                    qualification_review,
                    qualification_path,
                    invalid_path,
                    merged_baseinfo_path
                ),
                "形式及响应性审查": executor.submit(
                    process_reviews,
                    original_dict_data,
                    output_folder,
                    truncate_jsonpath,
                    clause_path
                ),
                "申请人资格要求": executor.submit(
                    process_notice,
                    notice_path
                )
            }
            # 定义所需的顺序
            desired_order = ["申请人资格要求", "资格审查", "形式及响应性审查"]
            # 初始化结果字典
            combined_results = {}
            # 按指定顺序收集结果
            for key in desired_order:
                future = futures.get(key)
                if future:
                    try:
                        result = future.result()
                        if isinstance(result, dict):
                            combined_results.update(result)
                        else:
                            combined_results[key] = result
                    except Exception as e:
                        print(f"处理 '{key}' 时出错: {e}")
                        combined_results[key] = "处理失败"
                else:
                    combined_results[key] = "未提交任务"
        # 将各部分结果合并到最终的资格审查字典中
        final_result = {"资格审查": combined_results}
    return final_result

if __name__ == "__main__":
    start_time = time.time()
    output_folder = r"C:\Users\Administrator\Desktop\fsdownload\ec7d5328-9c57-450f-baf4-2e5a6f90ed1d\tmp"
    evaluation_method = os.path.join(output_folder,"ztbfile_evaluation_method.pdf")
    qualification_path=""
    notice_path=os.path.join(output_folder,"ztbfile_notice.pdf")
    # knowledge_name="zbtest20"
    clause_path = ""
    tobidders_notice_table = os.path.join(output_folder,"ztbfile_tobidders_notice")

    invalid_path = os.path.join(output_folder,"ztbfile_invalid.pdf")
    merged_baseinfo_path = os.path.join(output_folder,"ztbfile_merged_baseinfo.pdf")
    res = combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
                                   invalid_path, merged_baseinfo_path,notice_path)
    print(json.dumps(res, ensure_ascii=False, indent=4))
    end_time = time.time()
    print("elapsed time:" + str(end_time - start_time))