This commit is contained in:
zy123 2024-11-05 09:46:37 +08:00
parent 599915fe29
commit d0e7f060c8
2 changed files with 10 additions and 8 deletions

View File

@ -10,10 +10,11 @@ from flask_app.general.多线程提问 import read_questions_from_file, multi_th
from flask_app.general.通义千问long import upload_file
from flask_app.货物标.基础信息解析main import aggregate_basic_info_goods
from flask_app.货物标.截取pdf货物标版 import truncate_pdf_specific_goods
from flask_app.main.截取pdf import truncate_pdf_specific_engineering
from flask_app.main.截取pdf import truncate_pdf_specific_engineering,truncate_pdf_main
from flask_app.general.post_processing import inner_post_processing
from flask_app.old_version.基础信息整合 import aggregate_basic_info_engineering
def get_global_logger(unique_id):
if unique_id is None:
return logging.getLogger() # 获取默认的日志器
@ -24,7 +25,7 @@ def get_global_logger(unique_id):
logger = None
#货物标
def little_parse_goods(output_folder, file_path):
def little_parse_goods(output_folder, pdf_path):
"""
解析货物相关的基础信息
@ -37,13 +38,13 @@ def little_parse_goods(output_folder, file_path):
"""
# 截取特定的货物 PDF 文件
selections = [1,4] # 仅处理 selection 1和4 #公告+投标人须知
files = truncate_pdf_specific_goods(file_path, output_folder,selections)
files = truncate_pdf_specific_goods(pdf_path, output_folder,selections)
if not files:
raise ValueError("未找到截取后的文件。")
# 假设最后一个文件是需要处理的基础信息文件
baseinfo_file_path = files[-1]
if not baseinfo_file_path:
baseinfo_file_path=file_path #截取失败就传整份文件
baseinfo_file_path=pdf_path #截取失败就传整份文件
# 上传文件并获取文件 ID
file_id = upload_file(baseinfo_file_path)
# 注意:以下路径被硬编码,确保该路径存在并且正确
@ -60,7 +61,7 @@ def little_parse_goods(output_folder, file_path):
return {"基础信息": aggregated_baseinfo}
def little_parse_engineering(output_folder, file_path):
def little_parse_engineering(output_folder, pdf_path):
"""
解析工程相关的基础信息
@ -73,13 +74,14 @@ def little_parse_engineering(output_folder, file_path):
"""
# 截取特定的工程 PDF 文件
selections = [ 1,4] #公告+投标人须知前附表
files = truncate_pdf_specific_engineering(file_path, output_folder,selections)
files = truncate_pdf_specific_engineering(pdf_path, output_folder,selections)
if not files:
raise ValueError("未找到截取后的文件。")
# 假设最后一个文件是需要处理的基础信息文件
baseinfo_file_path = files[-1]
if not baseinfo_file_path:
baseinfo_file_path=file_path #截取失败就传整份文件
baseinfo_file_path=truncate_pdf_main(pdf_path,output_folder,5)
# baseinfo_file_path=pdf_path #截取失败就传整份文件
# 上传文件并获取文件 ID
file_id = upload_file(baseinfo_file_path)
# 注意:以下路径被硬编码,确保该路径存在并且正确

View File

@ -752,6 +752,6 @@ if __name__ == "__main__":
# selections = [1,4]
# files=truncate_pdf_specific_goods(input_path,output_folder,selections)
# print(files)
selection = 2# 例如1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2与评标办法一致 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求
selection = 1# 例如1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2与评标办法一致 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求
generated_files = truncate_pdf_main(input_path, output_folder, selection)
# print(generated_files)