Merge branch 'develop-1.17' into develop
This commit is contained in:
commit
ccb6043189
@ -171,7 +171,7 @@ def merge_selected_pdfs(output_folder, truncate_files, output_path, base_file_na
|
|||||||
required_suffixes = [
|
required_suffixes = [
|
||||||
f'{base_file_name}_before.pdf',
|
f'{base_file_name}_before.pdf',
|
||||||
f'{base_file_name}_notice.pdf',
|
f'{base_file_name}_notice.pdf',
|
||||||
f'{base_file_name}_tobidders_notice_table1.pdf'
|
f'{base_file_name}_tobidders_notice_part1.pdf'
|
||||||
]
|
]
|
||||||
optional_suffixes = []
|
optional_suffixes = []
|
||||||
elif mode == 'goods':
|
elif mode == 'goods':
|
||||||
|
@ -88,7 +88,6 @@ def truncate_pdf_multiple(pdf_path, output_folder, logger,mode='goods',selection
|
|||||||
truncate_files.extend(handle_exception(selection))
|
truncate_files.extend(handle_exception(selection))
|
||||||
# 定义合并后的输出路径
|
# 定义合并后的输出路径
|
||||||
merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_baseinfo.pdf")
|
merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_baseinfo.pdf")
|
||||||
|
|
||||||
# 调用 merge_selected_pdfs 并获取返回值
|
# 调用 merge_selected_pdfs 并获取返回值
|
||||||
merged_path = merge_selected_pdfs(
|
merged_path = merge_selected_pdfs(
|
||||||
output_folder,
|
output_folder,
|
||||||
@ -111,6 +110,7 @@ def truncate_pdf_multiple(pdf_path, output_folder, logger,mode='goods',selection
|
|||||||
logger.warning(f"合并失败,没有生成合并文件 for {pdf_path}")
|
logger.warning(f"合并失败,没有生成合并文件 for {pdf_path}")
|
||||||
|
|
||||||
logger.info("已截取文件路径: " + str(truncate_files))
|
logger.info("已截取文件路径: " + str(truncate_files))
|
||||||
|
print(truncate_files)
|
||||||
return truncate_files
|
return truncate_files
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -23,7 +23,7 @@ def little_parse_goods(output_folder, pdf_path,logger):
|
|||||||
dict: 包含 '基础信息' 的字典。
|
dict: 包含 '基础信息' 的字典。
|
||||||
"""
|
"""
|
||||||
# 截取特定的货物 PDF 文件
|
# 截取特定的货物 PDF 文件
|
||||||
selections = [1,4,6] # 仅处理 selection 1和4 #公告+投标人须知
|
selections = [1,4] # 仅处理 selection 1和4 #公告+投标人须知
|
||||||
files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods',selections)
|
files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods',selections)
|
||||||
if not files:
|
if not files:
|
||||||
raise ValueError("未找到截取后的文件。")
|
raise ValueError("未找到截取后的文件。")
|
||||||
@ -34,8 +34,8 @@ def little_parse_goods(output_folder, pdf_path,logger):
|
|||||||
# 上传文件并获取文件 ID
|
# 上传文件并获取文件 ID
|
||||||
file_id = upload_file(baseinfo_file_path)
|
file_id = upload_file(baseinfo_file_path)
|
||||||
# 注意:以下路径被硬编码,确保该路径存在并且正确
|
# 注意:以下路径被硬编码,确保该路径存在并且正确
|
||||||
baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt'
|
# baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt'
|
||||||
# baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt'
|
baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt'
|
||||||
# 从提示词文件中读取问题
|
# 从提示词文件中读取问题
|
||||||
questions = read_questions_from_file(baseinfo_prompt_file_path)
|
questions = read_questions_from_file(baseinfo_prompt_file_path)
|
||||||
# 多线程处理问题,使用指定的处理模式(2 代表使用 qianwen-long)
|
# 多线程处理问题,使用指定的处理模式(2 代表使用 qianwen-long)
|
||||||
@ -59,7 +59,7 @@ def little_parse_engineering(output_folder, pdf_path,logger):
|
|||||||
dict: 包含 '基础信息' 的字典。
|
dict: 包含 '基础信息' 的字典。
|
||||||
"""
|
"""
|
||||||
# 截取特定的工程 PDF 文件
|
# 截取特定的工程 PDF 文件
|
||||||
selections = [ 1,4] #公告+投标人须知前附表
|
selections = [1,4] #公告+投标人须知前附表
|
||||||
files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering',selections)
|
files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering',selections)
|
||||||
if not files:
|
if not files:
|
||||||
raise ValueError("未找到截取后的文件。")
|
raise ValueError("未找到截取后的文件。")
|
||||||
@ -139,10 +139,10 @@ if __name__ == "__main__":
|
|||||||
# zb_type=2 #1:工程标 2:货物标
|
# zb_type=2 #1:工程标 2:货物标
|
||||||
# input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
|
# input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
|
||||||
|
|
||||||
output_folder=r"C:\Users\Administrator\Desktop\fsdownload\b18e9c17-e866-4116-8db3-aaab722d1463\tmp"
|
output_folder=r"C:\Users\Administrator\Desktop\fsdownload\20c9e7fa-0245-4de0-b004-d5231d0be940\tmp"
|
||||||
zb_type=1 #1:工程 2:货物
|
zb_type=2 #1:工程 2:货物
|
||||||
# input_file=r"C:\Users\Administrator\Desktop\fsdownload\865a5d46-a5f8-467a-8374-c71c415d0af9\ztbfile.pdf"
|
# input_file=r"C:\Users\Administrator\Desktop\fsdownload\865a5d46-a5f8-467a-8374-c71c415d0af9\ztbfile.pdf"
|
||||||
input_file=r"C:\Users\Administrator\Desktop\fsdownload\b18e9c17-e866-4116-8db3-aaab722d1463\ztbfile.pdf"
|
input_file=r"C:\Users\Administrator\Desktop\fsdownload\20c9e7fa-0245-4de0-b004-d5231d0be940\ztbfile.pdf"
|
||||||
final_json_path=little_parse_main(output_folder, input_file, file_type, zb_type,"122334")
|
final_json_path=little_parse_main(output_folder, input_file, file_type, zb_type,"122334")
|
||||||
with open(final_json_path, 'r', encoding='utf-8') as f:
|
with open(final_json_path, 'r', encoding='utf-8') as f:
|
||||||
# logger.info('final_json_path:' + final_json_path)
|
# logger.info('final_json_path:' + final_json_path)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user