diff --git a/flask_app/general/clean_pdf.py b/flask_app/general/clean_pdf.py
index 0e40bf2..d55dc6d 100644
--- a/flask_app/general/clean_pdf.py
+++ b/flask_app/general/clean_pdf.py
@@ -1,5 +1,9 @@
 import re
 from PyPDF2 import PdfReader
+
+from flask_app.general.format_change import docx2pdf
+
+
 def extract_common_header(pdf_path):
 
     def get_headers(pdf_document, start_page, pages_to_read):
@@ -105,7 +109,19 @@ def is_scanned_pdf(file_path, max_pages=15):
                 return False  # 不是扫描型
     return True  # 前 max_pages 页都没有文本
 
-
+def get_pdf_page_count(file_path):
+    """
+    获取 PDF 文件的页码数量
+    """
+    try:
+        pdf_path=file_path
+        if file_path.lower().endswith(('.doc', '.docx')):
+            pdf_path = docx2pdf(file_path)
+        reader = PdfReader(pdf_path)
+        return len(reader.pages)
+    except Exception as e:
+        print(f"读取 PDF 页码时出错：{e}")
+        return 0
 if __name__ == '__main__':
     file_path = r"C:\Users\Administrator\Documents\WeChat Files\wxid_d11awe5rp1y722\FileStorage\File\2024-12\2020-安徽-安徽省生态环境厅电梯采购.pdf"
     res=is_scanned_pdf(file_path)
diff --git a/flask_app/货物标/提取采购需求main.py b/flask_app/货物标/提取采购需求main.py
index 536c5d3..3d60e50 100644
--- a/flask_app/货物标/提取采购需求main.py
+++ b/flask_app/货物标/提取采购需求main.py
@@ -1,7 +1,7 @@
 import concurrent.futures
 import json
 import time
-
+from  flask_app.general.clean_pdf import get_pdf_page_count
 from flask_app.general.doubao import pdf2txt
 from flask_app.general.file2markdown import convert_file_to_markdown
 from flask_app.general.format_change import pdf2docx
@@ -28,12 +28,24 @@ def fetch_procurement_reqs(procurement_path, invalid_path):
         return DEFAULT_PROCUREMENT_REQS.copy()
 
     try:
-        processed_filepath = convert_file_to_markdown(procurement_path)   # 转markdown格式
+        if procurement_path == invalid_path:
+            # 读取 PDF 页码数
+            page_count = get_pdf_page_count(procurement_path)
+
+            if page_count > 80:  # 如果页码数大于 50
+                model_type = 0
+                processed_filepath = ""
+            else:
+                model_type = 1
+                processed_filepath = convert_file_to_markdown(procurement_path)  # 转markdown格式
+        else:
+            model_type = 1
+            processed_filepath = convert_file_to_markdown(procurement_path)  # 转markdown格式
         # processed_filepath = pdf2txt(procurement_path)  # 纯文本提取
         # 使用 ThreadPoolExecutor 并行处理 get_technical_requirements 和 get_business_requirements
         with concurrent.futures.ThreadPoolExecutor() as executor:
             # 提交任务给线程池
-            future_technical = executor.submit(get_technical_requirements, invalid_path, processed_filepath)
+            future_technical = executor.submit(get_technical_requirements, invalid_path, processed_filepath,model_type)
             time.sleep(0.5)  # 保持原有的延时
             future_business = executor.submit(get_business_requirements, procurement_path, processed_filepath)
             # 获取并行任务的结果