From f9f17757f35765cc982629674ebb6e534cbb097e Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Tue, 5 Nov 2024 16:57:04 +0800 Subject: [PATCH] =?UTF-8?q?11.5=E8=B4=A7=E7=89=A9=E6=A0=87=E6=88=AA?= =?UTF-8?q?=E5=8F=96=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/货物标/截取pdf货物标版.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/flask_app/货物标/截取pdf货物标版.py b/flask_app/货物标/截取pdf货物标版.py index a3c109e..82efd13 100644 --- a/flask_app/货物标/截取pdf货物标版.py +++ b/flask_app/货物标/截取pdf货物标版.py @@ -474,10 +474,6 @@ def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page) if start_page1 is None or end_page1 is None: return "", "","" - # # 保存第一部分的路径 - # path1 = save_extracted_pages(pdf_document, start_page1, end_page1, pdf_path, output_folder, - # "tobidders_notice_part1") - # 提取第二部分 start_page2 = end_page1 @@ -501,10 +497,6 @@ def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page) if end_page2 is None: return start_page1, end_page1,end_page1 - # # 保存第二部分的路径 - # path2 = save_extracted_pages(pdf_document, start_page2, end_page2, pdf_path, output_folder, - # "tobidders_notice_part2") - return start_page1, end_page1,end_page2 def extract_pages_qualification(pdf_document,begin_page,begin_pattern,end_pattern, common_header): @@ -524,7 +516,7 @@ def extract_pages_qualification(pdf_document,begin_page,begin_pattern,end_patter start_page is None ): if begin_pattern.search(cleaned_text): - print("附件") + print("第二次尝试:匹配附件") start_page = i # 确定结束页 if start_page is not None and end_pattern.search(cleaned_text): @@ -562,8 +554,8 @@ def extract_pages_twice(pdf_path, output_folder, output_suffix, common_header,be if start_page is None or end_page is None: if output_suffix == "qualification1": - print(f"second: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中!") - print("third:尝试提取评分办法章节...") + # print(f"second: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中!") + print("第三次尝试资格审查:尝试提取评分办法章节...") temp = truncate_pdf_main(pdf_path, output_folder, 2, "qualification2") if len(temp) > 0: return temp[0] @@ -825,9 +817,9 @@ def truncate_pdf_specific_goods(pdf_path, output_folder, selections,unique_id="1 # TODO:交通智能系统和招标(1)(1)文件有问题 包头 绍兴 资格审查文件可能不需要默认与"evaluation"同一章 无效投标可能也要考虑 “more”的情况,类似工程标 唐山投标只有正文,没有附表 -#ztbfile.pdf jiao通 广水农商行门禁控制主机及基础验证设备采购项目——磋商文件(定稿)(三次)_qualification2.pdf 唐山 包头 +#ztbfile.pdf少资格评审 包头少符合性评审 if __name__ == "__main__": - input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles" + input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf" # input_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\f8b793b5-aa60-42d3-ae59-a3f474e06610\\ztbfile.pdf" # input_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\zbtest4_evaluation_method.pdf" # input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output1\\2-招标文件_procurement.pdf" @@ -838,6 +830,6 @@ if __name__ == "__main__": # selections = [1,4] # files=truncate_pdf_specific_goods(input_path,output_folder,selections) # print(files) - selection = 4# 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 + selection = 3# 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 generated_files = truncate_pdf_main(input_path, output_folder, selection) - # print(generated_files) \ No newline at end of file + print(generated_files) \ No newline at end of file