From f9f17757f35765cc982629674ebb6e534cbb097e Mon Sep 17 00:00:00 2001
From: zy123 <646228430@qq.com>
Date: Tue, 5 Nov 2024 16:57:04 +0800
Subject: [PATCH] =?UTF-8?q?11.5=E8=B4=A7=E7=89=A9=E6=A0=87=E6=88=AA?=
 =?UTF-8?q?=E5=8F=96=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 flask_app/货物标/截取pdf货物标版.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/flask_app/货物标/截取pdf货物标版.py b/flask_app/货物标/截取pdf货物标版.py
index a3c109e..82efd13 100644
--- a/flask_app/货物标/截取pdf货物标版.py
+++ b/flask_app/货物标/截取pdf货物标版.py
@@ -474,10 +474,6 @@ def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page)
     if start_page1 is None or end_page1 is None:
         return "", "",""
 
-    # # 保存第一部分的路径
-    # path1 = save_extracted_pages(pdf_document, start_page1, end_page1, pdf_path, output_folder,
-    #                              "tobidders_notice_part1")
-
     # 提取第二部分
     start_page2 = end_page1
 
@@ -501,10 +497,6 @@ def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page)
     if end_page2 is None:
         return start_page1, end_page1,end_page1
 
-    # # 保存第二部分的路径
-    # path2 = save_extracted_pages(pdf_document, start_page2, end_page2, pdf_path, output_folder,
-    #                              "tobidders_notice_part2")
-
     return start_page1, end_page1,end_page2
 
 def extract_pages_qualification(pdf_document,begin_page,begin_pattern,end_pattern, common_header):
@@ -524,7 +516,7 @@ def extract_pages_qualification(pdf_document,begin_page,begin_pattern,end_patter
                     start_page is None
             ):
                 if begin_pattern.search(cleaned_text):
-                    print("附件")
+                    print("第二次尝试:匹配附件")
                     start_page = i
             # 确定结束页
             if start_page is not None and end_pattern.search(cleaned_text):
@@ -562,8 +554,8 @@ def extract_pages_twice(pdf_path, output_folder, output_suffix, common_header,be
 
         if start_page is None or end_page is None:
             if output_suffix == "qualification1":
-                print(f"second: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中！")
-                print("third:尝试提取评分办法章节...")
+                # print(f"second: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中！")
+                print("第三次尝试资格审查:尝试提取评分办法章节...")
                 temp = truncate_pdf_main(pdf_path, output_folder, 2, "qualification2")
                 if len(temp) > 0:
                     return temp[0]
@@ -825,9 +817,9 @@ def truncate_pdf_specific_goods(pdf_path, output_folder, selections,unique_id="1
 
 # TODO:交通智能系统和招标(1)(1)文件有问题  包头 绍兴    资格审查文件可能不需要默认与"evaluation"同一章  无效投标可能也要考虑 “more”的情况，类似工程标   唐山投标只有正文，没有附表
 
-#ztbfile.pdf jiao通   广水农商行门禁控制主机及基础验证设备采购项目——磋商文件（定稿）（三次）_qualification2.pdf   唐山   包头
+#ztbfile.pdf少资格评审  包头少符合性评审
 if __name__ == "__main__":
-    input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
+    input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
     # input_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\f8b793b5-aa60-42d3-ae59-a3f474e06610\\ztbfile.pdf"
     # input_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\zbtest4_evaluation_method.pdf"
     # input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output1\\2-招标文件_procurement.pdf"
@@ -838,6 +830,6 @@ if __name__ == "__main__":
     # selections = [1,4]
     # files=truncate_pdf_specific_goods(input_path,output_folder,selections)
     # print(files)
-    selection = 4# 例如：1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2（与评标办法一致）  4.投标人须知前附表part1 投标人须知正文part2   5-采购需求
+    selection = 3# 例如：1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2（与评标办法一致）  4.投标人须知前附表part1 投标人须知正文part2   5-采购需求
     generated_files = truncate_pdf_main(input_path, output_folder, selection)
-    # print(generated_files)
\ No newline at end of file
+    print(generated_files)
\ No newline at end of file