diff --git a/flask_app/general/format_change.py b/flask_app/general/format_change.py
index fcf45c7..a69da6b 100644
--- a/flask_app/general/format_change.py
+++ b/flask_app/general/format_change.py
@@ -49,115 +49,118 @@ def pdf2docx(local_path_in):
     print(f"format_change p2d:have downloaded file to: {downloaded_filepath}")
     return downloaded_filepath
 
-# def doc2docx(local_path_in):
-#     remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2d'
-#     receive_download_url = upload_file(local_path_in, remote_url)
-#     print(receive_download_url)
-#     filename, folder = get_filename_and_folder(local_path_in)  # 输入输出在同一个文件夹
-#     local_filename = os.path.join(folder, filename)  # 输出文件名
-#     downloaded_filepath, file_type = download_file(receive_download_url, local_filename)
-#     print(f"format_change d2d:have downloaded file to: {downloaded_filepath}")
-#     return downloaded_filepath
-# def docx2pdf(local_path_in):
-#     remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2p'
-#     receive_download_url = upload_file(local_path_in, remote_url)
-#     filename, folder = get_filename_and_folder(local_path_in)  # 输入输出在同一个文件夹
-#     local_filename = os.path.join(folder, filename)  # 输出文件名
-#     downloaded_filepath,file_type = download_file(receive_download_url, local_filename)
-#     print(f"format_change d2p:have downloaded file to: {downloaded_filepath}")
-#     return downloaded_filepath
-def docx2pdf(file_path):
-    """
-    将本地的 .docx 或 .doc 文件转换为 .pdf 文件。
+def doc2docx(local_path_in):
+    remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2d'
+    receive_download_url = upload_file(local_path_in, remote_url)
+    print(receive_download_url)
+    filename, folder = get_filename_and_folder(local_path_in)  # 输入输出在同一个文件夹
+    local_filename = os.path.join(folder, filename)  # 输出文件名
+    downloaded_filepath, file_type = download_file(receive_download_url, local_filename)
+    print(f"format_change d2d:have downloaded file to: {downloaded_filepath}")
+    return downloaded_filepath
+def docx2pdf(local_path_in):
+    remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2p'
+    receive_download_url = upload_file(local_path_in, remote_url)
+    filename, folder = get_filename_and_folder(local_path_in)  # 输入输出在同一个文件夹
+    local_filename = os.path.join(folder, filename)  # 输出文件名
+    downloaded_filepath,file_type = download_file(receive_download_url, local_filename)
+    print(f"format_change d2p:have downloaded file to: {downloaded_filepath}")
+    return downloaded_filepath
 
-    参数：
-    - file_path: str, 本地文件的路径，支持 .docx 和 .doc 格式。
-    """
-    # 检查文件是否存在
-    if not os.path.isfile(file_path):
-        raise FileNotFoundError(f"文件未找到: {file_path}")
-
-    # 获取文件名和扩展名
-    base_name = os.path.basename(file_path)
-    name, ext = os.path.splitext(base_name)
-    ext = ext.lower().lstrip('.')
-
-    if ext not in ['docx', 'doc']:
-        raise ValueError(f"doc2pdf 仅支持 .docx 和 .doc 文件，当前文件扩展名为: .{ext}")
-
-    # 定义转换接口
-    endpoint = 'http://120.26.236.97:5008/convert_to_pdf'
-
-    # 获取文件所在目录
-    output_dir = os.path.dirname(file_path)
-
-    # 准备上传的文件
-    with open(file_path, 'rb') as f:
-        files = {'file': (base_name, f)}
-        try:
-            print(f"正在将 {base_name} 转换为 .pdf 格式...")
-            response = requests.post(endpoint, files=files)
-            response.raise_for_status()  # 检查请求是否成功
-        except requests.RequestException as e:
-            print(f"转换过程中发生错误: {e}")
-            return
-
-    # 准备保存转换后文件的路径
-    output_file_name = f"{name}.pdf"
-    output_path = os.path.join(output_dir, output_file_name)
-
-    # 保存转换后的文件
-    with open(output_path, 'wb') as out_file:
-        out_file.write(response.content)
-
-    print(f"文件已成功转换并保存至: {output_path}")
-
-
-def doc2docx(file_path):
-    """
-    将本地的 .doc 文件转换为 .docx 文件。
-
-    参数：
-    - file_path: str, 本地文件的路径，支持 .doc 格式。
-    """
-    # 检查文件是否存在
-    if not os.path.isfile(file_path):
-        raise FileNotFoundError(f"文件未找到: {file_path}")
-
-    # 获取文件名和扩展名
-    base_name = os.path.basename(file_path)
-    name, ext = os.path.splitext(base_name)
-    ext = ext.lower().lstrip('.')
-
-    if ext != 'doc':
-        raise ValueError(f"doc2docx 仅支持 .doc 文件，当前文件扩展名为: .{ext}")
-
-    # 定义转换接口
-    endpoint = 'http://120.26.236.97:5008/convert_to_docx'
-
-    # 获取文件所在目录
-    output_dir = os.path.dirname(file_path)
-
-    # 准备上传的文件
-    with open(file_path, 'rb') as f:
-        files = {'file': (base_name, f)}
-        try:
-            print(f"正在将 {base_name} 转换为 .docx 格式...")
-            response = requests.post(endpoint, files=files)
-            response.raise_for_status()  # 检查请求是否成功
-        except requests.RequestException as e:
-            print(f"转换过程中发生错误: {e}")
-            return
-
-    # 准备保存转换后文件的路径
-    output_file_name = f"{name}.docx"
-    output_path = os.path.join(output_dir, output_file_name)
-
-    # 保存转换后的文件
-    with open(output_path, 'wb') as out_file:
-        out_file.write(response.content)
-
-    print(f"文件已成功转换并保存至: {output_path}")
+# def docx2pdf(file_path):
+#     """
+#     将本地的 .docx 或 .doc 文件转换为 .pdf 文件。
+#
+#     参数：
+#     - file_path: str, 本地文件的路径，支持 .docx 和 .doc 格式。
+#     """
+#     # 检查文件是否存在
+#     if not os.path.isfile(file_path):
+#         raise FileNotFoundError(f"文件未找到: {file_path}")
+#
+#     # 获取文件名和扩展名
+#     base_name = os.path.basename(file_path)
+#     name, ext = os.path.splitext(base_name)
+#     ext = ext.lower().lstrip('.')
+#
+#     if ext not in ['docx', 'doc']:
+#         raise ValueError(f"doc2pdf 仅支持 .docx 和 .doc 文件，当前文件扩展名为: .{ext}")
+#
+#     # 定义转换接口
+#     endpoint = 'http://120.26.236.97:5008/convert_to_pdf'
+#
+#     # 获取文件所在目录
+#     output_dir = os.path.dirname(file_path)
+#
+#     # 准备上传的文件
+#     with open(file_path, 'rb') as f:
+#         files = {'file': (base_name, f)}
+#         try:
+#             print(f"正在将 {base_name} 转换为 .pdf 格式...")
+#             response = requests.post(endpoint, files=files)
+#             response.raise_for_status()  # 检查请求是否成功
+#         except requests.RequestException as e:
+#             print(f"转换过程中发生错误: {e}")
+#             return
+#
+#     # 准备保存转换后文件的路径
+#     output_file_name = f"{name}.pdf"
+#     output_path = os.path.join(output_dir, output_file_name)
+#
+#     # 保存转换后的文件
+#     with open(output_path, 'wb') as out_file:
+#         out_file.write(response.content)
+#
+#     print(f"文件已成功转换并保存至: {output_path}")
+#     return output_path
+#
+#
+# def doc2docx(file_path):
+#     """
+#     将本地的 .doc 文件转换为 .docx 文件。
+#
+#     参数：
+#     - file_path: str, 本地文件的路径，支持 .doc 格式。
+#     """
+#     # 检查文件是否存在
+#     if not os.path.isfile(file_path):
+#         raise FileNotFoundError(f"文件未找到: {file_path}")
+#
+#     # 获取文件名和扩展名
+#     base_name = os.path.basename(file_path)
+#     name, ext = os.path.splitext(base_name)
+#     ext = ext.lower().lstrip('.')
+#
+#     if ext != 'doc':
+#         raise ValueError(f"doc2docx 仅支持 .doc 文件，当前文件扩展名为: .{ext}")
+#
+#     # 定义转换接口
+#     endpoint = 'http://120.26.236.97:5008/convert_to_docx'
+#
+#     # 获取文件所在目录
+#     output_dir = os.path.dirname(file_path)
+#
+#     # 准备上传的文件
+#     with open(file_path, 'rb') as f:
+#         files = {'file': (base_name, f)}
+#         try:
+#             print(f"正在将 {base_name} 转换为 .docx 格式...")
+#             response = requests.post(endpoint, files=files)
+#             response.raise_for_status()  # 检查请求是否成功
+#         except requests.RequestException as e:
+#             print(f"转换过程中发生错误: {e}")
+#             return
+#
+#     # 准备保存转换后文件的路径
+#     output_file_name = f"{name}.docx"
+#     output_path = os.path.join(output_dir, output_file_name)
+#
+#     # 保存转换后的文件
+#     with open(output_path, 'wb') as out_file:
+#         out_file.write(response.content)
+#
+#     print(f"文件已成功转换并保存至: {output_path}")
+#     return output_path
 
 
 
@@ -168,8 +171,9 @@ if __name__ == '__main__':
     # local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf"
     # downloaded_file=doc2docx(local_path_in)
     # downloaded_file=pdf2docx(local_path_in)
-    downloaded_file=docx2pdf(local_path_in)
-    print(downloaded_file)
+    for i in range(1):
+        downloaded_file=docx2pdf(local_path_in)
+        print(downloaded_file)
 
 
 
diff --git a/flask_app/testdir/test1.py b/flask_app/testdir/截取文件格式.py
similarity index 62%
rename from flask_app/testdir/test1.py
rename to flask_app/testdir/截取文件格式.py
index 8ec64fc..2f7221a 100644
--- a/flask_app/testdir/test1.py
+++ b/flask_app/testdir/截取文件格式.py
@@ -1,8 +1,7 @@
 from PyPDF2 import PdfReader, PdfWriter
 import re  # 导入正则表达式库
 import os  # 用于文件和文件夹操作
-from flask_app.general.format_change import docx2pdf  # 确保此模块可用
-from flask_app.general.merge_pdfs import merge_pdfs  # 确保此模块可用
+# from flask_app.general.format_change import docx2pdf  # 确保此模块可用
 
 def clean_page_content(text, common_header):
     """
@@ -14,13 +13,14 @@ def clean_page_content(text, common_header):
                 # 替换首次出现的完整行
                 text = re.sub(r'^' + re.escape(header_line.strip()) + r'\n?', '', text, count=1)
 
-    # 删除页码，支持多种格式
-    text = re.sub(r'^\s*\d+\s*(?=\D)', '', text)  # 删除开头的页码，仅当紧跟非数字字符时
-    text = re.sub(r'\s+\d+\s*$', '', text)        # 删除结尾的页码
-    text = re.sub(r'\s*\/\s*\d+\s*', '', text)    # 删除形如 /129 的页码
-    text = re.sub(r'\s*[—-]\s*\d+\s*[—-]\s*', '', text)  # 删除形如 '—2—' 或 '-2-' 的页码
+    # 删除页码，合并多个正则表达式为一个
+    text = re.sub(
+        r'(^\s*\d+\s*(?=\D))|(\s+\d+\s*$)|(\s*/\s*\d+\s*)|(\s*[—-]\s*\d+\s*[—-]\s*)',
+        '',
+        text,
+        flags=re.MULTILINE
+    )
     return text
-
 def extract_common_header(pdf_path):
     """
     从PDF的前几页提取公共抬头。
@@ -39,34 +39,31 @@ def extract_common_header(pdf_path):
         if text:
             # 只取每页的前三行
             first_lines = text.strip().split('\n')[:3]
-            headers.append(first_lines)
+            headers.append(set(line.strip() for line in first_lines if line.strip()))
 
     if len(headers) < 2:
         return ""  # 如果没有足够的页来比较，返回空字符串
 
     # 寻找每一行中的公共部分，按顺序保留
-    common_headers = []
-    for lines in zip(*headers):
-        first_words = lines[0].split()
-        common_line = [word for word in first_words if all(word in line.split() for line in lines[1:])]
-        if common_line:
-            common_headers.append(' '.join(common_line))
-
+    common_headers = set.intersection(*headers)
     return '\n'.join(common_headers)
 
+
 def is_pdf_or_doc(filename):
     """
     判断文件是否为PDF或Word文档。
     """
     return filename.lower().endswith(('.pdf', '.doc', '.docx'))
 
-def convert_to_pdf(file_path):
-    """
-    如果是Word文档，则转换为PDF。
-    """
-    if file_path.lower().endswith(('.doc', '.docx')):
-        return docx2pdf(file_path)
-    return file_path
+
+# def convert_to_pdf(file_path):
+#     """
+#     如果是Word文档，则转换为PDF。
+#     """
+#     if file_path.lower().endswith(('.doc', '.docx')):
+#         return docx2pdf(file_path)
+#     return file_path
+
 
 def save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_folder, output_suffix):
     """
@@ -76,8 +73,8 @@ def save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_fo
         base_file_name = os.path.splitext(os.path.basename(pdf_path))[0]
         output_pdf_path = os.path.join(output_folder, f"{base_file_name}_{output_suffix}.pdf")
 
-        if start_page < 0 or end_page >= len(pdf_document.pages) or start_page > end_page:
-            print(f"无效的页面范围: {start_page} 到 {end_page}")
+        if start_page is None or end_page is None or start_page > end_page:
+            print(f"无效的页面范围: {start_page} 到 {end_page} 文件: {pdf_path}")
             return ""
 
         output_doc = PdfWriter()
@@ -88,24 +85,32 @@ def save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_fo
         print(f"{output_suffix} 已截取并保存页面从 {start_page} 到 {end_page} 为 {output_pdf_path}")
         return output_pdf_path
     except Exception as e:
-        print(f"Error in save_extracted_pages: {e}")
+        print(f"Error in save_extracted_pages for file {pdf_path}: {e}")
         return ""  # 返回空字符串
 
-def extract_pages_generic(pdf_document, begin_pattern, end_pattern, begin_page, common_header):
+
+def find_page_indices(pdf_document, begin_pattern, end_pattern, begin_page, common_header):
     """
-    通用函数，根据模式提取起始页和结束页。
+    查找起始页和结束页的索引。
     """
     start_page = None
     end_page = None
+    total_pages = len(pdf_document.pages)
+
     for i, page in enumerate(pdf_document.pages):
+        if i <= begin_page:
+            continue
         text = page.extract_text() or ""
         cleaned_text = clean_page_content(text, common_header)
-        if start_page is None and re.search(begin_pattern, cleaned_text) and i > begin_page:
+        if start_page is None and re.search(begin_pattern, cleaned_text):
             start_page = i
-        if start_page is not None and re.search(end_pattern, cleaned_text) and i > start_page:
+            continue
+        if start_page is not None and re.search(end_pattern, cleaned_text):
             end_page = i
             break
-    return start_page, end_page
+
+    return start_page, end_page if end_page else total_pages - 1
+
 
 def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix):
     """
@@ -114,37 +119,45 @@ def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_patter
     try:
         common_header = extract_common_header(pdf_path)
         pdf_document = PdfReader(pdf_path)
-        total_pages = len(pdf_document.pages) - 1  # 获取总页数
 
-        # 提取起始页和结束页
-        start_page, end_page = extract_pages_generic(pdf_document, begin_pattern, end_pattern, begin_page, common_header)
+        start_page, end_page = find_page_indices(pdf_document, begin_pattern, end_pattern, begin_page, common_header)
 
-        if output_suffix == "format":
+        if output_suffix == "format" and start_page is None:
+            print(f"{output_suffix}: 未找到起始页，尝试二次提取！ 文件: {pdf_path}")
+            # 二次提取逻辑，保留原有功能
+            start_page, end_page = find_page_indices(
+                pdf_document,
+                re.compile(r'^(?:响应|投标).*?格式.*', re.MULTILINE),
+                end_pattern,
+                begin_page,
+                common_header
+            )
             if start_page is None:
-                print(f"{output_suffix}: 未找到起始页，提取失败！")
+                print(f"{output_suffix}: 未找到起始页，提取失败！ 文件: {pdf_path}")
                 return ""
-            if end_page is None:
-                # 如果未匹配到结束页，默认截取到文件末尾
-                end_page = total_pages
-                print(f"{output_suffix}: 未找到结束页，默认截取到文件末尾。")
-            return save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_folder, output_suffix)
 
-        if start_page is None or end_page is None:
-            print(f"first: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中！")
+        if start_page is None:
+            print(f"未找到起始页，提取失败！ 文件: {pdf_path}")
             return ""
 
+        if end_page is None:
+            end_page = len(pdf_document.pages) - 1
+            print(f"{output_suffix}: 未找到结束页，默认截取到文件末尾。 文件: {pdf_path}")
+
         return save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_folder, output_suffix)
     except Exception as e:
         print(f"Error processing {pdf_path}: {e}")
         return ""
 
+
 def process_files(file_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix):
     """
     处理单个文件：转换为PDF（如果需要）并提取页面。
     """
-    pdf_path = convert_to_pdf(file_path)
-    result = extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix)
-    return result or ""
+    # pdf_path = convert_to_pdf(file_path)
+    pdf_path=file_path
+    return extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix) or ""
+
 
 def process_input(input_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix):
     """
@@ -159,20 +172,15 @@ def process_input(input_path, output_folder, begin_pattern, begin_page, end_patt
             file_path = os.path.join(input_path, file_name)
             if is_pdf_or_doc(file_path):
                 result = process_files(file_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix)
-                if isinstance(result, tuple):
-                    generated_files.extend([f if f else "" for f in result])  # 保留空字符串
-                else:
-                    generated_files.append(result)  # 直接添加result，可能是空字符串
+                generated_files.append(result)
     elif os.path.isfile(input_path) and is_pdf_or_doc(input_path):
         result = process_files(input_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix)
-        if isinstance(result, tuple):
-            generated_files.extend([f if f else "" for f in result])  # 保留空字符串
-        else:
-            generated_files.append(result)  # 直接添加result，可能是空字符串
+        generated_files.append(result)
     else:
-        print("提供的路径既不是文件夹也不是PDF文件。")
+        print("提供的路径既不是文件夹也不是PDF/Word文件。")
+
+    return [f for f in generated_files if f]  # 过滤空字符串
 
-    return generated_files
 
 def truncate_pdf_main(input_path, output_folder, selection):
     """
@@ -180,27 +188,30 @@ def truncate_pdf_main(input_path, output_folder, selection):
     """
     try:
         if selection == 1:  # 投标文件格式
-            begin_page = 5
+            begin_page = 10
             begin_pattern = re.compile(
-                r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:响应|投标).*?格式.*'
+                r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:响应|投标).*?格式.*',
+                re.MULTILINE
             )
             end_pattern = re.compile(
-                r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+', re.MULTILINE
+                r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+',
+                re.MULTILINE
             )
             local_output_suffix = "format"
         else:
             print("无效的选择:请选择1")
-            return None
+            return []
 
         # 调用相应的处理函数
-        return process_input(input_path, output_folder, begin_pattern, begin_page, end_pattern, local_output_suffix) or ""
+        return process_input(input_path, output_folder, begin_pattern, begin_page, end_pattern, local_output_suffix)
     except Exception as e:
         print(f"Error in truncate_pdf_main: {e}")
-        return ""  # 返回空字符串
+        return []
+
 
 if __name__ == "__main__":
     # 定义输入和输出路径
-    input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
+    input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\包头市公安支队机动车查验监管系统招标文201907.pdf"
     output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\新建文件夹"
     selection = 1  # 1 - 投标文件格式
     # 执行截取
diff --git a/flask_app/货物标/截取pdf货物标版.py b/flask_app/货物标/截取pdf货物标版.py
index 41e0ee1..e01493c 100644
--- a/flask_app/货物标/截取pdf货物标版.py
+++ b/flask_app/货物标/截取pdf货物标版.py
@@ -431,8 +431,7 @@ def extract_pages_twice(pdf_path, output_folder, output_suffix, common_header):
             else:
                 print(f"second: {output_suffix} 未找到起始或结束页在文件 {pdf_path} 中！")
                 return ""
-        return save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_folder, output_suffix,
-                                    )
+        return save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_folder, output_suffix)
     except Exception as e:
         print(f"Error in extract_pages_twice: {e}")
         return ""