1.21解决bug

This commit is contained in:
zy123 2025-01-22 09:45:25 +08:00
parent e7051ea84e
commit 6e20f853f5
4 changed files with 7 additions and 7 deletions

View File

@ -119,14 +119,14 @@ if __name__ == "__main__":
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\工程标"
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\094定稿-湖北工业大学轻武器模拟射击设备采购项目招标文件.pdf"
# pdf_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\temp\鄂州市急救中心展厅布展项目.pdf"
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹\(存储存在问题)惠安县招标文件.pdf"
# pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf"
# input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest8.pdf"
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\temp"
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹"
# selections = [1, 4] # 仅处理 selection 4、1
# selections = [1, 2, 3, 5]
# files = truncate_pdf_multiple(pdf_path, output_folder, logger, 'goods', selections) #engineering
files = truncate_pdf_multiple(pdf_path, output_folder, logger, 'goods')
files = truncate_pdf_multiple(pdf_path, output_folder, logger, 'engineering')
print(files)
# print(files[-1])
# print(files[-2])

View File

@ -4,7 +4,7 @@ import re
import regex
from PyPDF2 import PdfReader
from flask_app.货物标.截取pdf货物标版 import clean_page_content, extract_common_header
from flask_app.general.截取pdf通用函数 import clean_page_content, extract_common_header
def compare_headings(current, new):

View File

@ -47,7 +47,7 @@ def construct_judge_questions(json_data):
return questions
def merge_json_to_list(merged,tobidders_notice=""):
def merge_json_to_list(merged):
# print(json.dumps(merged,ensure_ascii=False,indent=4))
"""Merge updates into the original data by modifying specific keys based on their value ('' or ''), and create a list based on these values."""
chosen_numbers = []

View File

@ -252,11 +252,11 @@ if __name__ == "__main__":
logger = get_global_logger("123")
start_time = time.time()
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\工程标"
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\temp\file1736998876340 (1).doc"
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹\(存储存在问题)惠安县招标文件.pdf"
# pdf_path = r"C:\Users\Administrator\Desktop\招标文件\招标02.pdf"
# input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest8.pdf"
output_folder = r"C:\Users\Administrator\Desktop\fsdownload\ec7d5328-9c57-450f-baf4-2e5a6f90ed1d\tmp"
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\新建文件夹"
selection = 4 # 例如1 - 招标公告, 2 - 评标办法, 3 -资格审查条件 4-投标人须知前附表+正文 5-无效标
generated_files = truncate_pdf_main_engineering(pdf_path, output_folder, selection, logger)
print(generated_files)