2.7 添加注释

This commit is contained in:
zy123 2025-02-11 12:21:39 +08:00
parent 6f33d65c8f
commit 4fcdf21639
2 changed files with 3 additions and 3 deletions

View File

@ -118,10 +118,10 @@ if __name__ == "__main__":
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\工程标"
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\094定稿-湖北工业大学轻武器模拟射击设备采购项目招标文件.pdf"
# pdf_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹 (2)\湖南开关实业有限公司采购习艺生产线台面、货架、工具柜、辅料柜项目(第二次).pdf"
pdf_path=r"C:\Users\Administrator\Desktop\fsdownload\d4f30cc2-1643-4576-bfb1-97a2f1e5ba51\ztbfile.pdf"
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
# input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest8.pdf"
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹 (2)"
output_folder = r"C:\Users\Administrator\Desktop\fsdownload\d4f30cc2-1643-4576-bfb1-97a2f1e5ba51\tmp"
# selections = [1, 4] # 仅处理 selection 4、1
# selections = [1, 2, 3, 5]
# files = truncate_pdf_multiple(pdf_path, output_folder, logger, 'goods', selections) #engineering

View File

@ -664,7 +664,7 @@ def extract_pages_tobidders_notice(pdf_path, output_folder, begin_page, common_h
r'(?<!对应\s*)(?<!根据\s*)(?<!按照\s*)(?<!见\s*)(?<!与\s*)(?<!同\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人|比选申请人).*须知前附表\s*$',
regex.MULTILINE
)
start_page, mid_page, end_page = run_extraction(new_begin_pattern, extraction_stage='third')
start_page, mid_page, end_page = run_extraction(pdf_document,new_begin_pattern, extraction_stage='third')
if start_page is not None and mid_page is not None and end_page is not None:
return perform_extraction_and_save(start_page, mid_page, end_page)
else: