2.7 添加注释
This commit is contained in:
parent
6f33d65c8f
commit
4fcdf21639
@ -118,10 +118,10 @@ if __name__ == "__main__":
|
|||||||
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\工程标"
|
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\工程标"
|
||||||
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\094定稿-湖北工业大学轻武器模拟射击设备采购项目招标文件.pdf"
|
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\094定稿-湖北工业大学轻武器模拟射击设备采购项目招标文件.pdf"
|
||||||
# pdf_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
|
# pdf_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
|
||||||
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹 (2)\湖南开关实业有限公司采购习艺生产线台面、货架、工具柜、辅料柜项目(第二次).pdf"
|
pdf_path=r"C:\Users\Administrator\Desktop\fsdownload\d4f30cc2-1643-4576-bfb1-97a2f1e5ba51\ztbfile.pdf"
|
||||||
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
|
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
|
||||||
# input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest8.pdf"
|
# input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest8.pdf"
|
||||||
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹 (2)"
|
output_folder = r"C:\Users\Administrator\Desktop\fsdownload\d4f30cc2-1643-4576-bfb1-97a2f1e5ba51\tmp"
|
||||||
# selections = [1, 4] # 仅处理 selection 4、1
|
# selections = [1, 4] # 仅处理 selection 4、1
|
||||||
# selections = [1, 2, 3, 5]
|
# selections = [1, 2, 3, 5]
|
||||||
# files = truncate_pdf_multiple(pdf_path, output_folder, logger, 'goods', selections) #engineering
|
# files = truncate_pdf_multiple(pdf_path, output_folder, logger, 'goods', selections) #engineering
|
||||||
|
@ -664,7 +664,7 @@ def extract_pages_tobidders_notice(pdf_path, output_folder, begin_page, common_h
|
|||||||
r'(?<!对应\s*)(?<!根据\s*)(?<!按照\s*)(?<!见\s*)(?<!与\s*)(?<!同\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人|比选申请人).*须知前附表\s*$',
|
r'(?<!对应\s*)(?<!根据\s*)(?<!按照\s*)(?<!见\s*)(?<!与\s*)(?<!同\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人|比选申请人).*须知前附表\s*$',
|
||||||
regex.MULTILINE
|
regex.MULTILINE
|
||||||
)
|
)
|
||||||
start_page, mid_page, end_page = run_extraction(new_begin_pattern, extraction_stage='third')
|
start_page, mid_page, end_page = run_extraction(pdf_document,new_begin_pattern, extraction_stage='third')
|
||||||
if start_page is not None and mid_page is not None and end_page is not None:
|
if start_page is not None and mid_page is not None and end_page is not None:
|
||||||
return perform_extraction_and_save(start_page, mid_page, end_page)
|
return perform_extraction_and_save(start_page, mid_page, end_page)
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user