diff --git a/flask_app/general/little_zbparse.py b/flask_app/general/little_zbparse.py index 71960d4..4520daf 100644 --- a/flask_app/general/little_zbparse.py +++ b/flask_app/general/little_zbparse.py @@ -10,10 +10,11 @@ from flask_app.general.多线程提问 import read_questions_from_file, multi_th from flask_app.general.通义千问long import upload_file from flask_app.货物标.基础信息解析main import aggregate_basic_info_goods from flask_app.货物标.截取pdf货物标版 import truncate_pdf_specific_goods -from flask_app.main.截取pdf import truncate_pdf_specific_engineering +from flask_app.main.截取pdf import truncate_pdf_specific_engineering,truncate_pdf_main from flask_app.general.post_processing import inner_post_processing from flask_app.old_version.基础信息整合 import aggregate_basic_info_engineering + def get_global_logger(unique_id): if unique_id is None: return logging.getLogger() # 获取默认的日志器 @@ -24,7 +25,7 @@ def get_global_logger(unique_id): logger = None #货物标 -def little_parse_goods(output_folder, file_path): +def little_parse_goods(output_folder, pdf_path): """ 解析货物相关的基础信息。 @@ -37,13 +38,13 @@ def little_parse_goods(output_folder, file_path): """ # 截取特定的货物 PDF 文件 selections = [1,4] # 仅处理 selection 1和4 #公告+投标人须知 - files = truncate_pdf_specific_goods(file_path, output_folder,selections) + files = truncate_pdf_specific_goods(pdf_path, output_folder,selections) if not files: raise ValueError("未找到截取后的文件。") # 假设最后一个文件是需要处理的基础信息文件 baseinfo_file_path = files[-1] if not baseinfo_file_path: - baseinfo_file_path=file_path #截取失败就传整份文件 + baseinfo_file_path=pdf_path #截取失败就传整份文件 # 上传文件并获取文件 ID file_id = upload_file(baseinfo_file_path) # 注意:以下路径被硬编码,确保该路径存在并且正确 @@ -60,7 +61,7 @@ def little_parse_goods(output_folder, file_path): return {"基础信息": aggregated_baseinfo} -def little_parse_engineering(output_folder, file_path): +def little_parse_engineering(output_folder, pdf_path): """ 解析工程相关的基础信息。 @@ -73,13 +74,14 @@ def little_parse_engineering(output_folder, file_path): """ # 截取特定的工程 PDF 文件 selections = [ 1,4] #公告+投标人须知前附表 - files = truncate_pdf_specific_engineering(file_path, output_folder,selections) + files = truncate_pdf_specific_engineering(pdf_path, output_folder,selections) if not files: raise ValueError("未找到截取后的文件。") # 假设最后一个文件是需要处理的基础信息文件 baseinfo_file_path = files[-1] if not baseinfo_file_path: - baseinfo_file_path=file_path #截取失败就传整份文件 + baseinfo_file_path=truncate_pdf_main(pdf_path,output_folder,5) + # baseinfo_file_path=pdf_path #截取失败就传整份文件 # 上传文件并获取文件 ID file_id = upload_file(baseinfo_file_path) # 注意:以下路径被硬编码,确保该路径存在并且正确 diff --git a/flask_app/货物标/截取pdf货物标版.py b/flask_app/货物标/截取pdf货物标版.py index a224ab3..e051811 100644 --- a/flask_app/货物标/截取pdf货物标版.py +++ b/flask_app/货物标/截取pdf货物标版.py @@ -752,6 +752,6 @@ if __name__ == "__main__": # selections = [1,4] # files=truncate_pdf_specific_goods(input_path,output_folder,selections) # print(files) - selection = 2# 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 + selection = 1# 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 generated_files = truncate_pdf_main(input_path, output_folder, selection) # print(generated_files) \ No newline at end of file