zbparse/flask_app/工程标/资格审查模块main.py

import json
import os.path
import time

from flask_app.general.format_change import pdf2docx
from flask_app.general.json_utils import extract_content_from_json, clean_json_string
from flask_app.general.table_content_extraction import extract_tables_main
from flask_app.工程标.形式响应评审 import process_reviews
from flask_app.工程标.资格评审 import process_qualification
from flask_app.general.llm.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor
from flask_app.货物标.资格审查main import combine_qualification_review
from flask_app.general.merge_pdfs import merge_pdfs
def process_notice(notice_path):
    print("call notice_path")
    try:
        # 上传通知文件并获取文件ID
        file_id1 = upload_file(notice_path)

        # 定义用户查询，提取申请人资格要求
        user_query1 = """
            第一章招标公告（投标邀请书）中说明的申请人资格要求是怎样的？请以json格式给出回答，外键为'申请人资格要求'，键值为字符串列表，其中每个字符串对应原文中的一条要求，你的回答与原文内容一致，不要擅自总结删减。输出格式示例如下：
            {
                "申请人资格要求":[
                    "1.满足《中华人民共和国政府采购法》第二十二条规定；",
                    "1.1 法人或者其他组织的营业执照等证明文件，如供应商是自然人的提供身份证明材料；",
                    "2.未被列入“信用中国”网站(www.creditchina.gov.cn)信用服务栏失信被执行人、重大税收违法案件当事人名单；"
                ]
            }
            """
        # 执行查询并清洗结果
        res1 = clean_json_string(qianwen_long(file_id1, user_query1))
        # 提取申请人资格要求
        requirements = res1.get("申请人资格要求", "未找到相关内容")
        return {"申请人资格要求": requirements}
    except Exception as e:
        print(f"处理申请人资格要求时出错: {e}")
        return {"申请人资格要求": "处理失败"}

#TODO:目前有个问题，如果evaluation_method既没有符合性审查又没有形式评审，就有问题
def combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
                             invalid_path, merged_baseinfo_path,notice_path):
    """
    结合评审标准，包括形式评审、响应评审、资格评审及申请人资格要求。

    参数:
        evaluation_method (str): 评标办法文件路径。
        qualification (str): 资格文件路径。
        output_folder (str): 输出文件夹路径。
        tobidders_notice_table (str): JSON截断路径。
        clause_path (str): 条款路径。
        invalid_path (str): 无效文件路径。
        merged_baseinfo_path (str): 合并基础信息路径。
        notice_path (str): 通知文件路径。

    返回:
        dict: 包含资格审查和申请人资格要求的合并结果。
    """
    # 上传评标办法前附表并获取文件ID
    file_id = upload_file(evaluation_method)  # 评标办法前附表

    first_query="""请判断该招标文件的评标办法、评审流程章节中是否明确说明了'符合性审查'（或符合性检查或等同的表述）及标准。注意： 
1. 如果文档中描述了'符合性审查'（或符合性检查或等同的表述）及标准，请回答'是'。
2. 如果文档中仅描述了'形式评审标准'、'响应性评审标准'，而未提到具体的'符合性审查（或等同的表述）'及标准，请回答'否'。
你的回答仅限于'是'或'否'，请不要添加其他说明或解释性内容。
"""
    first_res=qianwen_long(file_id,first_query)
    if '是' in first_res:
        print("call 资格审查main(货物标)")
        paths=[qualification_path,evaluation_method]
        more_qualification_path=os.path.join(output_folder,"merged_qualification.pdf")
        more_qualification_path=merge_pdfs(paths,more_qualification_path)
        final_result=combine_qualification_review(invalid_path,more_qualification_path,notice_path)
    else:
        tobidders_notice_table_docx = pdf2docx(tobidders_notice_table)  # 投标人须知前附表转docx
        truncate_jsonpath = extract_tables_main(tobidders_notice_table_docx, output_folder)  # 投标人须知前附表docx->json
        # 定义用户查询，提取形式评审标准、响应性评审标准和资格评审标准
        user_query_1 = """
            根据该文档中的评标办法前附表，请你列出该文件中的形式评审标准和响应性评审标准和资格评审标准，请以json格式返回，外层键名为'形式评审标准'和'响应性评审标准'和'资格评审标准',嵌套键名为'评审因素'中的内容，相应的键值为对应'评审标准'中的内容。
        """
        # 执行查询并提取内容
        results = qianwen_long(file_id, user_query_1)
        original_dict_data = extract_content_from_json(results)

        # 提取资格评审标准
        qualification_review = original_dict_data.pop('资格评审标准', {})  # qianwen_long有关资格评审的内容

        # 初始化 ThreadPoolExecutor，设定最多三个线程以处理三个任务
        with ThreadPoolExecutor(max_workers=3) as executor:
            # 提交任务并建立任务名到Future的映射
            futures = {
                "资格审查": executor.submit(
                    process_qualification,
                    qualification_review,
                    qualification_path,
                    invalid_path,
                    merged_baseinfo_path
                ),
                "形式及响应性审查": executor.submit(
                    process_reviews,
                    original_dict_data,
                    output_folder,
                    truncate_jsonpath,
                    clause_path
                ),
                "申请人资格要求": executor.submit(
                    process_notice,
                    notice_path
                )
            }
            # 定义所需的顺序
            desired_order = ["申请人资格要求", "资格审查", "形式及响应性审查"]
            # 初始化结果字典
            combined_results = {}
            # 按指定顺序收集结果
            for key in desired_order:
                future = futures.get(key)
                if future:
                    try:
                        result = future.result()
                        if isinstance(result, dict):
                            combined_results.update(result)
                        else:
                            combined_results[key] = result
                    except Exception as e:
                        print(f"处理 '{key}' 时出错: {e}")
                        combined_results[key] = "处理失败"
                else:
                    combined_results[key] = "未提交任务"
        # 将各部分结果合并到最终的资格审查字典中
        final_result = {"资格审查": combined_results}
    return final_result

if __name__ == "__main__":
    start_time = time.time()
    output_folder = r"D:\flask_project\flask_app\static\output\output1\86317976-040a-4c91-87e2-7718da869fd0"
    evaluation_method = os.path.join(output_folder,"ztbfile_evaluation_method.pdf")
    qualification_path=""
    notice_path=os.path.join(output_folder,"ztbfile_notice.pdf")
    # knowledge_name="zbtest20"
    clause_path = ""
    tobidders_notice_table = os.path.join(output_folder,"ztbfile_tobidders_notice")

    invalid_path = os.path.join(output_folder,"ztbfile_invalid.pdf")
    merged_baseinfo_path = os.path.join(output_folder,"ztbfile_merged_baseinfo.pdf")
    res = combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
                                   invalid_path, merged_baseinfo_path,notice_path)
    print(json.dumps(res, ensure_ascii=False, indent=4))
    end_time = time.time()
    print("elapsed time:" + str(end_time - start_time))
-.21工程标快速版

											
										
										
											2024-10-21 17:31:48 +08:00
+								import json
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								import os.path
-.21工程标快速版

											
										
										
											2024-10-21 17:31:48 +08:00
+								import time
-.29

											
										
										
											2024-08-29 16:37:09 +08:00
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								from flask_app.general.format_change import pdf2docx
 								from flask_app.general.json_utils import extract_content_from_json, clean_json_string
-.17 修复了提取货物标的Bug，无效投标bug修复

											
										
										
											2024-11-17 17:27:05 +08:00
+								from flask_app.general.table_content_extraction import extract_tables_main
-.6 优化解析

											
										
										
											2024-12-06 14:40:22 +08:00
+								from flask_app.工程标.形式响应评审 import process_reviews
 								from flask_app.工程标.资格评审 import process_qualification
-.12 使用turbo作为plus超限时的保底选择

											
										
										
											2025-02-12 14:55:35 +08:00
+								from flask_app.general.llm.通义千问long import upload_file, qianwen_long
-.5 资格审查部分返回更能对应原文

											
										
										
											2024-09-05 18:00:40 +08:00
+								from concurrent.futures import ThreadPoolExecutor
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								from flask_app.货物标.资格审查main import combine_qualification_review
 								from flask_app.general.merge_pdfs import merge_pdfs
 								def process_notice(notice_path):
 								    print("call notice_path")
 								    try:
 								        # 上传通知文件并获取文件ID
 								        file_id1 = upload_file(notice_path)
-.29

											
										
										
											2024-08-29 16:37:09 +08:00
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								        # 定义用户查询，提取申请人资格要求
 								        user_query1 = """
 								            第一章招标公告（投标邀请书）中说明的申请人资格要求是怎样的？请以json格式给出回答，外键为'申请人资格要求'，键值为字符串列表，其中每个字符串对应原文中的一条要求，你的回答与原文内容一致，不要擅自总结删减。输出格式示例如下：
 								            {
 								                "申请人资格要求":[
 								                    "1.满足《中华人民共和国政府采购法》第二十二条规定；",
 								                    "1.1 法人或者其他组织的营业执照等证明文件，如供应商是自然人的提供身份证明材料；",
 								                    "2.未被列入“信用中国”网站(www.creditchina.gov.cn)信用服务栏失信被执行人、重大税收违法案件当事人名单；"
 								                ]
 								            }
 								            """
 								        # 执行查询并清洗结果
 								        res1 = clean_json_string(qianwen_long(file_id1, user_query1))
 								        # 提取申请人资格要求
 								        requirements = res1.get("申请人资格要求", "未找到相关内容")
 								        return {"申请人资格要求": requirements}
 								    except Exception as e:
 								        print(f"处理申请人资格要求时出错: {e}")
 								        return {"申请人资格要求": "处理失败"}
-.14 资格审查提示词重改

											
										
										
											2024-11-15 09:23:26 +08:00
+								#TODO:目前有个问题，如果evaluation_method既没有符合性审查又没有形式评审，就有问题
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								def combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
-.15 工程标资格审查提示词重改

											
										
										
											2024-11-15 11:03:04 +08:00
+								                             invalid_path, merged_baseinfo_path,notice_path):
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								    """
 								    结合评审标准，包括形式评审、响应评审、资格评审及申请人资格要求。
 								    参数:
 								        evaluation_method (str): 评标办法文件路径。
 								        qualification (str): 资格文件路径。
 								        output_folder (str): 输出文件夹路径。
 								        tobidders_notice_table (str): JSON截断路径。
 								        clause_path (str): 条款路径。
 								        invalid_path (str): 无效文件路径。
 								        merged_baseinfo_path (str): 合并基础信息路径。
 								        notice_path (str): 通知文件路径。
 								    返回:
 								        dict: 包含资格审查和申请人资格要求的合并结果。
 								    """
 								    # 上传评标办法前附表并获取文件ID
 								    file_id = upload_file(evaluation_method)  # 评标办法前附表
-.14 失败的文件进行记录，截取pdf进一步优化

											
										
										
											2025-01-14 17:10:38 +08:00
+								    first_query="""请判断该招标文件的评标办法、评审流程章节中是否明确说明了'符合性审查'（或符合性检查或等同的表述）及标准。注意：
 . 如果文档中描述了'符合性审查'（或符合性检查或等同的表述）及标准，请回答'是'。
 . 如果文档中仅描述了'形式评审标准'、'响应性评审标准'，而未提到具体的'符合性审查（或等同的表述）'及标准，请回答'否'。
 								你的回答仅限于'是'或'否'，请不要添加其他说明或解释性内容。
 								"""
 								    first_res=qianwen_long(file_id,first_query)
 								    if '是' in first_res:
-.12 资格、符合性提示词优化

											
										
										
											2024-12-12 09:44:04 +08:00
+								        print("call 资格审查main(货物标)")
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								        paths=[qualification_path,evaluation_method]
-.12 资格、符合性提示词优化

											
										
										
											2024-12-12 09:44:04 +08:00
+								        more_qualification_path=os.path.join(output_folder,"merged_qualification.pdf")
-.12 解决了merged_baseinfo_path_more为空的bug

											
										
										
											2024-12-12 18:03:04 +08:00
+								        more_qualification_path=merge_pdfs(paths,more_qualification_path)
-.12 资格、符合性提示词优化

											
										
										
											2024-12-12 09:44:04 +08:00
+								        final_result=combine_qualification_review(invalid_path,more_qualification_path,notice_path)
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								    else:
 								        tobidders_notice_table_docx = pdf2docx(tobidders_notice_table)  # 投标人须知前附表转docx
 								        truncate_jsonpath = extract_tables_main(tobidders_notice_table_docx, output_folder)  # 投标人须知前附表docx->json
 								        # 定义用户查询，提取形式评审标准、响应性评审标准和资格评审标准
 								        user_query_1 = """
 								            根据该文档中的评标办法前附表，请你列出该文件中的形式评审标准和响应性评审标准和资格评审标准，请以json格式返回，外层键名为'形式评审标准'和'响应性评审标准'和'资格评审标准',嵌套键名为'评审因素'中的内容，相应的键值为对应'评审标准'中的内容。
 								        """
 								        # 执行查询并提取内容
 								        results = qianwen_long(file_id, user_query_1)
 								        original_dict_data = extract_content_from_json(results)
 								        # 提取资格评审标准
 								        qualification_review = original_dict_data.pop('资格评审标准', {})  # qianwen_long有关资格评审的内容
 								        # 初始化 ThreadPoolExecutor，设定最多三个线程以处理三个任务
 								        with ThreadPoolExecutor(max_workers=3) as executor:
 								            # 提交任务并建立任务名到Future的映射
 								            futures = {
 								                "资格审查": executor.submit(
 								                    process_qualification,
 								                    qualification_review,
 								                    qualification_path,
 								                    invalid_path,
 								                    merged_baseinfo_path
 								                ),
 								                "形式及响应性审查": executor.submit(
 								                    process_reviews,
 								                    original_dict_data,
 								                    output_folder,
 								                    truncate_jsonpath,
 								                    clause_path
 								                ),
 								                "申请人资格要求": executor.submit(
 								                    process_notice,
-.15 工程标资格审查提示词重改

											
										
										
											2024-11-15 11:03:04 +08:00
+								                    notice_path
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								                )
 								            }
 								            # 定义所需的顺序
 								            desired_order = ["申请人资格要求", "资格审查", "形式及响应性审查"]
 								            # 初始化结果字典
 								            combined_results = {}
 								            # 按指定顺序收集结果
 								            for key in desired_order:
 								                future = futures.get(key)
 								                if future:
 								                    try:
 								                        result = future.result()
 								                        if isinstance(result, dict):
 								                            combined_results.update(result)
 								                        else:
 								                            combined_results[key] = result
 								                    except Exception as e:
 								                        print(f"处理 '{key}' 时出错: {e}")
 								                        combined_results[key] = "处理失败"
 								                else:
 								                    combined_results[key] = "未提交任务"
 								        # 将各部分结果合并到最终的资格审查字典中
 								        final_result = {"资格审查": combined_results}
 								    return final_result
-.29

											
										
										
											2024-08-29 16:37:09 +08:00
 								if __name__ == "__main__":
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								    start_time = time.time()
-.14 失败的文件进行记录，截取pdf进一步优化

											
										
										
											2025-01-14 17:10:38 +08:00
+								    output_folder = r"D:\flask_project\flask_app\static\output\output1\86317976-040a-4c91-87e2-7718da869fd0"
-.11 解析优化

											
										
										
											2024-12-11 17:42:51 +08:00
+								    evaluation_method = os.path.join(output_folder,"ztbfile_evaluation_method.pdf")
 								    qualification_path=""
 								    notice_path=os.path.join(output_folder,"ztbfile_notice.pdf")
-.21工程标快速版

											
										
										
											2024-10-21 17:31:48 +08:00
+								    # knowledge_name="zbtest20"
-.11 解析优化

											
										
										
											2024-12-11 17:42:51 +08:00
+								    clause_path = ""
-.12 资格、符合性提示词优化

											
										
										
											2024-12-12 09:44:04 +08:00
+								    tobidders_notice_table = os.path.join(output_folder,"ztbfile_tobidders_notice")
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
-.12 资格、符合性提示词优化

											
										
										
											2024-12-12 09:44:04 +08:00
+								    invalid_path = os.path.join(output_folder,"ztbfile_invalid.pdf")
 								    merged_baseinfo_path = os.path.join(output_folder,"ztbfile_merged_baseinfo.pdf")
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								    res = combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path,
-.15 工程标资格审查提示词重改

											
										
										
											2024-11-15 11:23:04 +08:00
+								                                   invalid_path, merged_baseinfo_path,notice_path)
-.8 评标修改 技术参数修改

											
										
										
											2024-11-11 17:12:38 +08:00
+								    print(json.dumps(res, ensure_ascii=False, indent=4))
 								    end_time = time.time()
 								    print("elapsed time:" + str(end_time - start_time))