From cad4affc9bb28145d1c1347f02633381af4b2940 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Tue, 21 Jan 2025 17:20:42 +0800 Subject: [PATCH] =?UTF-8?q?1.21=E8=A7=A3=E5=86=B3bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/merge_pdfs.py | 2 +- flask_app/general/截取pdf_main.py | 2 +- flask_app/routes/小解析main.py | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/flask_app/general/merge_pdfs.py b/flask_app/general/merge_pdfs.py index fe60fcb..f2bc4c8 100644 --- a/flask_app/general/merge_pdfs.py +++ b/flask_app/general/merge_pdfs.py @@ -118,7 +118,7 @@ def merge_selected_pdfs(output_folder, truncate_files, output_path, base_file_na required_suffixes = [ f'{base_file_name}_before.pdf', f'{base_file_name}_notice.pdf', - f'{base_file_name}_tobidders_notice_table1.pdf' + f'{base_file_name}_tobidders_notice_part1.pdf' ] optional_suffixes = [] elif mode == 'goods': diff --git a/flask_app/general/截取pdf_main.py b/flask_app/general/截取pdf_main.py index 3bb086c..29e0b01 100644 --- a/flask_app/general/截取pdf_main.py +++ b/flask_app/general/截取pdf_main.py @@ -88,7 +88,6 @@ def truncate_pdf_multiple(pdf_path, output_folder, logger,mode='goods',selection truncate_files.extend(handle_exception(selection)) # 定义合并后的输出路径 merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_baseinfo.pdf") - # 调用 merge_selected_pdfs 并获取返回值 merged_path = merge_selected_pdfs( output_folder, @@ -111,6 +110,7 @@ def truncate_pdf_multiple(pdf_path, output_folder, logger,mode='goods',selection logger.warning(f"合并失败,没有生成合并文件 for {pdf_path}") logger.info("已截取文件路径: " + str(truncate_files)) + print(truncate_files) return truncate_files if __name__ == "__main__": diff --git a/flask_app/routes/小解析main.py b/flask_app/routes/小解析main.py index 5a8a54f..54b5ee3 100644 --- a/flask_app/routes/小解析main.py +++ b/flask_app/routes/小解析main.py @@ -23,7 +23,7 @@ def little_parse_goods(output_folder, pdf_path,logger): dict: 包含 '基础信息' 的字典。 """ # 截取特定的货物 PDF 文件 - selections = [1,4,6] # 仅处理 selection 1和4 #公告+投标人须知 + selections = [1,4] # 仅处理 selection 1和4 #公告+投标人须知 files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods',selections) if not files: raise ValueError("未找到截取后的文件。") @@ -34,8 +34,8 @@ def little_parse_goods(output_folder, pdf_path,logger): # 上传文件并获取文件 ID file_id = upload_file(baseinfo_file_path) # 注意:以下路径被硬编码,确保该路径存在并且正确 - baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt' - # baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt' + # baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt' + baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt' # 从提示词文件中读取问题 questions = read_questions_from_file(baseinfo_prompt_file_path) # 多线程处理问题,使用指定的处理模式(2 代表使用 qianwen-long) @@ -59,7 +59,7 @@ def little_parse_engineering(output_folder, pdf_path,logger): dict: 包含 '基础信息' 的字典。 """ # 截取特定的工程 PDF 文件 - selections = [ 1,4] #公告+投标人须知前附表 + selections = [1,4] #公告+投标人须知前附表 files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering',selections) if not files: raise ValueError("未找到截取后的文件。") @@ -139,10 +139,10 @@ if __name__ == "__main__": # zb_type=2 #1:工程标 2:货物标 # input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf" - output_folder=r"C:\Users\Administrator\Desktop\fsdownload\b18e9c17-e866-4116-8db3-aaab722d1463\tmp" - zb_type=1 #1:工程 2:货物 + output_folder=r"C:\Users\Administrator\Desktop\fsdownload\20c9e7fa-0245-4de0-b004-d5231d0be940\tmp" + zb_type=2 #1:工程 2:货物 # input_file=r"C:\Users\Administrator\Desktop\fsdownload\865a5d46-a5f8-467a-8374-c71c415d0af9\ztbfile.pdf" - input_file=r"C:\Users\Administrator\Desktop\fsdownload\b18e9c17-e866-4116-8db3-aaab722d1463\ztbfile.pdf" + input_file=r"C:\Users\Administrator\Desktop\fsdownload\20c9e7fa-0245-4de0-b004-d5231d0be940\ztbfile.pdf" final_json_path=little_parse_main(output_folder, input_file, file_type, zb_type,"122334") with open(final_json_path, 'r', encoding='utf-8') as f: # logger.info('final_json_path:' + final_json_path)