import os from PyPDF2 import PdfReader, PdfWriter #合并PDF def merge_pdfs(paths, output_path): pdf_writer = PdfWriter() last_page_text = None # 用于存储上一个PDF的最后一页的文本 for path in paths: pdf_reader = PdfReader(path) pages = pdf_reader.pages start_index = 0 # 从第一页开始添加 # 如果这不是第一个文件,并且有上一个文件的最后一页文本 if last_page_text is not None and len(pages) > 0: current_first_page_text = pages[0].extract_text() if pages[0].extract_text() else "" # 比较当前文件的第一页和上一个文件的最后一页的文本 if current_first_page_text == last_page_text: start_index = 1 # 如果相同,跳过当前文件的第一页 # 添加当前PDF的页面到写入器 for page in range(start_index, len(pages)): pdf_writer.add_page(pages[page]) # 更新last_page_text为当前PDF的最后一页的文本 if len(pages) > 0: last_page_text = pages[-1].extract_text() if pages[-1].extract_text() else "" # 写入合并后的PDF到文件 with open(output_path, 'wb') as out: pdf_writer.write(out) def judge_file_exist(original_path, new_suffix): # 提取目录路径和原始文件名 directory = os.path.dirname(original_path) original_filename = os.path.basename(original_path) # 替换文件名中的旧后缀为新后缀 # 假设原始文件名格式为 '2-招标文件_qualification.pdf' # 需要替换 '_qualification' 部分为 '_qualification2' new_filename = original_filename.replace("_qualification1", f"_{new_suffix}") new_filename = new_filename.replace("_qualification2", f"_{new_suffix}") # 生成新的文件路径 new_file_path = os.path.join(directory, new_filename) # 检查新文件是否存在 if os.path.isfile(new_file_path): return new_file_path else: return None def merge_and_cleanup(output_pdf_path, suffix_to_merge): another_file_path = judge_file_exist(output_pdf_path, suffix_to_merge) if another_file_path: paths = [output_pdf_path, another_file_path] # 需要合并的 PDF 文件路径 merge_pdfs(paths, output_pdf_path) os.remove(another_file_path) print(f"文件 {another_file_path} 已删除。") def find_and_merge(target_path, output_suffix): # 获取 target_path 所在的目录 directory = os.path.dirname(target_path) full_path="" # 遍历目录中的所有文件,寻找以 output_suffix 结尾的文件 for filename in os.listdir(directory): if filename.endswith(output_suffix): # 拼接目录路径和文件名,生成完整路径 full_path = os.path.join(directory, filename) if not full_path: paths=[target_path] else: paths=[target_path,full_path] merge_pdfs(paths,target_path)