11.5货物标截取优化
This commit is contained in:
parent
ccab078ac3
commit
f9f17757f3
@ -474,10 +474,6 @@ def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page)
|
||||
if start_page1 is None or end_page1 is None:
|
||||
return "", "",""
|
||||
|
||||
# # 保存第一部分的路径
|
||||
# path1 = save_extracted_pages(pdf_document, start_page1, end_page1, pdf_path, output_folder,
|
||||
# "tobidders_notice_part1")
|
||||
|
||||
# 提取第二部分
|
||||
start_page2 = end_page1
|
||||
|
||||
@ -501,10 +497,6 @@ def extract_pages_twice_tobidders_notice(pdf_document, common_header,begin_page)
|
||||
if end_page2 is None:
|
||||
return start_page1, end_page1,end_page1
|
||||
|
||||
# # 保存第二部分的路径
|
||||
# path2 = save_extracted_pages(pdf_document, start_page2, end_page2, pdf_path, output_folder,
|
||||
# "tobidders_notice_part2")
|
||||
|
||||
return start_page1, end_page1,end_page2
|
||||
|
||||
def extract_pages_qualification(pdf_document,begin_page,begin_pattern,end_pattern, common_header):
|
||||
@ -524,7 +516,7 @@ def extract_pages_qualification(pdf_document,begin_page,begin_pattern,end_patter
|
||||
start_page is None
|
||||
):
|
||||
if begin_pattern.search(cleaned_text):
|
||||
print("附件")
|
||||
print("第二次尝试:匹配附件")
|
||||
start_page = i
|
||||
# 确定结束页
|
||||
if start_page is not None and end_pattern.search(cleaned_text):
|
||||
@ -562,8 +554,8 @@ def extract_pages_twice(pdf_path, output_folder, output_suffix, common_header,be
|
||||
|
||||
if start_page is None or end_page is None:
|
||||
if output_suffix == "qualification1":
|
||||
print(f"second: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中!")
|
||||
print("third:尝试提取评分办法章节...")
|
||||
# print(f"second: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中!")
|
||||
print("第三次尝试资格审查:尝试提取评分办法章节...")
|
||||
temp = truncate_pdf_main(pdf_path, output_folder, 2, "qualification2")
|
||||
if len(temp) > 0:
|
||||
return temp[0]
|
||||
@ -825,9 +817,9 @@ def truncate_pdf_specific_goods(pdf_path, output_folder, selections,unique_id="1
|
||||
|
||||
# TODO:交通智能系统和招标(1)(1)文件有问题 包头 绍兴 资格审查文件可能不需要默认与"evaluation"同一章 无效投标可能也要考虑 “more”的情况,类似工程标 唐山投标只有正文,没有附表
|
||||
|
||||
#ztbfile.pdf jiao通 广水农商行门禁控制主机及基础验证设备采购项目——磋商文件(定稿)(三次)_qualification2.pdf 唐山 包头
|
||||
#ztbfile.pdf少资格评审 包头少符合性评审
|
||||
if __name__ == "__main__":
|
||||
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
|
||||
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
|
||||
# input_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\f8b793b5-aa60-42d3-ae59-a3f474e06610\\ztbfile.pdf"
|
||||
# input_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\zbtest4_evaluation_method.pdf"
|
||||
# input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output1\\2-招标文件_procurement.pdf"
|
||||
@ -838,6 +830,6 @@ if __name__ == "__main__":
|
||||
# selections = [1,4]
|
||||
# files=truncate_pdf_specific_goods(input_path,output_folder,selections)
|
||||
# print(files)
|
||||
selection = 4# 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求
|
||||
selection = 3# 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求
|
||||
generated_files = truncate_pdf_main(input_path, output_folder, selection)
|
||||
# print(generated_files)
|
||||
print(generated_files)
|
Loading…
x
Reference in New Issue
Block a user