From d8d5ece4fc67e7ef1002fbe4673a22d672593e1c Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Thu, 23 Jan 2025 14:21:39 +0800 Subject: [PATCH] =?UTF-8?q?1.23=20=E4=BF=AE=E4=BA=86=E5=BC=80=E8=AF=84?= =?UTF-8?q?=E5=AE=9A=E6=A0=87=E6=8F=90=E7=A4=BA=E8=AF=8D=20=E5=BA=9F?= =?UTF-8?q?=E6=A0=87=E6=97=A0=E6=95=88=E6=A0=87=20=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E4=BA=86'=E5=8D=81=E3=80=81=E5=BA=9F=E6=A0=87=E6=9D=A1?= =?UTF-8?q?=E4=BB=B6'=20=E8=BF=99=E7=A7=8D=E7=9A=84=E6=8F=90=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/无效标和废标公共代码.py | 8 ++++---- flask_app/routes/货物标解析main.py | 1 + flask_app/货物标/截取pdf货物标版.py | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/flask_app/general/无效标和废标公共代码.py b/flask_app/general/无效标和废标公共代码.py index 60a7be5..1bf1e36 100644 --- a/flask_app/general/无效标和废标公共代码.py +++ b/flask_app/general/无效标和废标公共代码.py @@ -351,7 +351,7 @@ def extract_text_with_keywords(processed_paragraphs, keywords, follow_up_keyword current_section_pattern = re.compile(combined_pattern) else: - process_matching_section(None) # Reusing the common function for matching sections + process_matching_section() # Reusing the common function for matching sections return current_index @@ -662,12 +662,12 @@ if __name__ == '__main__': # clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json" # doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx" # doc_path = r'C:\Users\Administrator\Desktop\new招标文件\tmp\2024-贵州-贵州省罗甸县 2024 年度广州市协作资金龙坪镇、边阳镇产业路硬化建设项目.docx' - pdf_path = r'C:\Users\Administrator\Desktop\新建文件夹 (3)\废标\2025-湖北-通羊镇港口村人饮项目谈判文件.pdf' + pdf_path = r'C:\Users\Administrator\Desktop\货物标\zbfiles\output6\招标文件(107国道)_invalid.pdf' - output_dir = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\废标" + output_dir = r"C:\Users\Administrator\Desktop\货物标\zbfiles\output6" # invalid_added = insert_mark(pdf_path) # invalid_added_docx = pdf2docx(invalid_added) - invalid_added_docx=r'C:\Users\Administrator\Desktop\新建文件夹 (3)\废标\invalid_added.docx' + invalid_added_docx=r'C:\Users\Administrator\Desktop\货物标\zbfiles\output6\invalid_added.docx' results = combine_find_invalid(invalid_added_docx, output_dir) end_time = time.time() print("Results:", json.dumps(results, ensure_ascii=False, indent=4)) diff --git a/flask_app/routes/货物标解析main.py b/flask_app/routes/货物标解析main.py index da2e454..5f0cf82 100644 --- a/flask_app/routes/货物标解析main.py +++ b/flask_app/routes/货物标解析main.py @@ -269,6 +269,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id): # 货物标和工程标的资格审查整合 ##TODO:陕西省公安厅交通警察总队高速公路交通安全智能感知巡查系统项目(1)_tobidders_notice_part2.pdf 唐山市公安交通警察支队机动车查验机构视频存储回放系统竞争性谈判-招标文件正文(1)_tobidders_notice_part1.pdf 不好搞 # 无法判断用户上传的是否为乱码文件,可以考虑并行调用大模型,如果为乱码文件直接return None +# 目前偏离表.py这块提取带星要求是通过大模型,若采购需求非常长且带星要求非常多,可能会超最大输出字数限制。 #截取json文件有些问题:C:\Users\Administrator\Desktop\新建文件夹 (3)\test keywords和special... if __name__ == "__main__": diff --git a/flask_app/货物标/截取pdf货物标版.py b/flask_app/货物标/截取pdf货物标版.py index 6184c3c..6f56667 100644 --- a/flask_app/货物标/截取pdf货物标版.py +++ b/flask_app/货物标/截取pdf货物标版.py @@ -319,11 +319,11 @@ if __name__ == "__main__": logger = get_global_logger("123") # input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标" # pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf" - pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test\泉州白濑水利枢纽工程高低压配电装置设计、制造及采购 (1).pdf" + pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\招标文件(107国道).pdf" # input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf" # pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf" - output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test" + output_folder = r"C:\Users\Administrator\Desktop\货物标\zbfiles\output6" # output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2" - selection = 4 # 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path + selection = 6 # 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger) print(generated_files)