11.4

2024-11-05 09:46:37 +08:00 · 2024-11-05 09:46:37 +08:00 · d0e7f060c8
commit d0e7f060c8
parent 599915fe29
2 changed files with 10 additions and 8 deletions
--- a/flask_app/general/little_zbparse.py
+++ b/flask_app/general/little_zbparse.py
@ -10,10 +10,11 @@ from flask_app.general.多线程提问 import read_questions_from_file, multi_th
 from flask_app.general.通义千问long import upload_file
 from flask_app.货物标.基础信息解析main import aggregate_basic_info_goods
 from flask_app.货物标.截取pdf货物标版 import truncate_pdf_specific_goods
-from flask_app.main.截取pdf import truncate_pdf_specific_engineering
+from flask_app.main.截取pdf import truncate_pdf_specific_engineering,truncate_pdf_main
 from flask_app.general.post_processing import inner_post_processing
 from flask_app.old_version.基础信息整合 import aggregate_basic_info_engineering

+
 def get_global_logger(unique_id):
    if unique_id is None:
        return logging.getLogger()  # 获取默认的日志器
@ -24,7 +25,7 @@ def get_global_logger(unique_id):
 logger = None

 #货物标
-def little_parse_goods(output_folder, file_path):
+def little_parse_goods(output_folder, pdf_path):
    """
    解析货物相关的基础信息。

@ -37,13 +38,13 @@ def little_parse_goods(output_folder, file_path):
    """
    # 截取特定的货物 PDF 文件
    selections = [1,4]  # 仅处理 selection 1和4  #公告+投标人须知
-    files = truncate_pdf_specific_goods(file_path, output_folder,selections)
+    files = truncate_pdf_specific_goods(pdf_path, output_folder,selections)
    if not files:
        raise ValueError("未找到截取后的文件。")
    # 假设最后一个文件是需要处理的基础信息文件
    baseinfo_file_path = files[-1]
    if not baseinfo_file_path:
-        baseinfo_file_path=file_path     #截取失败就传整份文件
+        baseinfo_file_path=pdf_path     #截取失败就传整份文件
    # 上传文件并获取文件 ID
    file_id = upload_file(baseinfo_file_path)
    # 注意：以下路径被硬编码，确保该路径存在并且正确
@ -60,7 +61,7 @@ def little_parse_goods(output_folder, file_path):
    return {"基础信息": aggregated_baseinfo}


-def little_parse_engineering(output_folder, file_path):
+def little_parse_engineering(output_folder, pdf_path):
    """
    解析工程相关的基础信息。

@ -73,13 +74,14 @@ def little_parse_engineering(output_folder, file_path):
    """
    # 截取特定的工程 PDF 文件
    selections = [ 1,4]     #公告+投标人须知前附表
-    files = truncate_pdf_specific_engineering(file_path, output_folder,selections)
+    files = truncate_pdf_specific_engineering(pdf_path, output_folder,selections)
    if not files:
        raise ValueError("未找到截取后的文件。")
    # 假设最后一个文件是需要处理的基础信息文件
    baseinfo_file_path = files[-1]
    if not baseinfo_file_path:
-        baseinfo_file_path=file_path     #截取失败就传整份文件
+        baseinfo_file_path=truncate_pdf_main(pdf_path,output_folder,5)
+        # baseinfo_file_path=pdf_path    #截取失败就传整份文件
    # 上传文件并获取文件 ID
    file_id = upload_file(baseinfo_file_path)
    # 注意：以下路径被硬编码，确保该路径存在并且正确
--- a/flask_app/货物标/截取pdf货物标版.py
+++ b/flask_app/货物标/截取pdf货物标版.py
@ -752,6 +752,6 @@ if __name__ == "__main__":
    # selections = [1,4]
    # files=truncate_pdf_specific_goods(input_path,output_folder,selections)
    # print(files)
-    selection = 2# 例如：1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2（与评标办法一致）  4.投标人须知前附表part1 投标人须知正文part2   5-采购需求
+    selection = 1# 例如：1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2（与评标办法一致）  4.投标人须知前附表part1 投标人须知正文part2   5-采购需求
    generated_files = truncate_pdf_main(input_path, output_folder, selection)
    # print(generated_files)