diff --git a/flask_app/general/投标人须知正文提取指定内容.py b/flask_app/general/投标人须知正文提取指定内容.py
index bdd3943..8f28e03 100644
--- a/flask_app/general/投标人须知正文提取指定内容.py
+++ b/flask_app/general/投标人须知正文提取指定内容.py
@@ -34,7 +34,7 @@ def preprocess_data(data):
 
 
 # 转换结构化的JSON数据
-#No parent found at level 1 for key '24.2'. Check the data structure.
+#生成结构化的数据
 def transform_json(data):
     result = {}
     temp = {0: result}  # 初始化根字典
@@ -183,19 +183,35 @@ def process_nested_data(data):
         # 到达最内层，处理非字典和非列表的元素（字符串）
         return post_process(data)
 
-def get_requirements_with_gpt(invalid_path, selection):
+#生成无结构的数据
+def concatenate_keys_values(section_content):
+    """
+    将章节内容的键值对拼接成一个字符串列表，每个元素为 "key value"。
+
+    Args:
+        section_content (dict): 章节内容的键值对。
+
+    Returns:
+        list of str: 拼接后的字符串列表。
+    """
+    concatenated = []
+    for key, value in section_content.items():
+        concatenated.append(f"{key} {value}")
+    return concatenated
+
+def get_requirements_with_gpt(merged_baseinfo_path, selection):
     """
     根据 selection 的值选择相应的用户查询，并调用大模型获取要求。
 
     Args:
-        invalid_path (str): 无效文件的路径，用于上传。
+        merged_baseinfo_path (str): 无效文件的路径，用于上传。
         selection (int): 选择的类型（1、2 或 3）。
 
     Returns:
         dict: 大模型返回的要求结果，或错误信息。
     """
     # 上传文件并获取 file_id
-    file_id = upload_file(invalid_path)
+    file_id = upload_file(merged_baseinfo_path)
     # 定义 selection 对应的用户查询
     user_queries = {
         # 1: """
diff --git a/flask_app/general/读取文件/按页读取pdf.py b/flask_app/general/读取文件/按页读取pdf.py
index 1828641..1dff018 100644
--- a/flask_app/general/读取文件/按页读取pdf.py
+++ b/flask_app/general/读取文件/按页读取pdf.py
@@ -95,8 +95,8 @@ def extract_text_by_page(file_path):
 
 
 if __name__ == '__main__':
-    file_path='D:\\flask_project\\flask_app\\static\\output\\output1\\648e094b-e677-47ce-9073-09e0c82af210\\ztbfile_tobidders_notice_part2.pdf'
-    # file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\2-招标文件（2020年广水市中小学教师办公电脑系统及多媒体“班班通”设备采购安装项目）_tobidders_notice_part2.pdf'
+    # file_path='D:\\flask_project\\flask_app\\static\\output\\output1\\648e094b-e677-47ce-9073-09e0c82af210\\ztbfile_tobidders_notice_part2.pdf'
+    file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\54d7aa40-8b36-4803-b6f7-278356ff1381\\ztbfile_tobidders_notice_part2.pdf'
     # file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\磋商文件_tobidders_notice_part2.pdf'
     # file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\截取test\\交警支队机动车查验监管系统项目采购_tobidders_notice_part1.pdf'
     # file_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\zbtest8.pdf"
diff --git a/flask_app/货物标/投标人须知正文提取指定内容货物标版.py b/flask_app/货物标/投标人须知正文提取指定内容货物标版.py
index 77dd412..49bb387 100644
--- a/flask_app/货物标/投标人须知正文提取指定内容货物标版.py
+++ b/flask_app/货物标/投标人须知正文提取指定内容货物标版.py
@@ -2,7 +2,7 @@ import json
 import re
 from functools import cmp_to_key
 
-from flask_app.general.投标人须知正文提取指定内容 import process_nested_data, transform_json, get_requirements_with_gpt
+from flask_app.general.投标人须知正文提取指定内容 import process_nested_data, transform_json, get_requirements_with_gpt,concatenate_keys_values
 
 
 #提取两个大标题之间的内容
@@ -103,19 +103,26 @@ def extract_from_notice(merged_baseinfo_path,clause_path, type):
 
         # 提取目标部分
         extracted_data = extract_between_sections(data, target_values)  # 读取json，截取大标题之间的内容
-        transformed_data = process_with_outer_key(extracted_data)
-        final_result = process_nested_data(transformed_data)
-        if not final_result:
+        if not extracted_data:
             final_result = get_requirements_with_gpt(merged_baseinfo_path, type)    #万一没用正则匹配到，那就调用大模型
-        return final_result
+            return final_result
+        # print(json.dumps(extracted_data,ensure_ascii=False,indent=4))
+        extracted_data_concatenated = {section: concatenate_keys_values(content)
+                                       for section, content in extracted_data.items()}
+
+        return extracted_data_concatenated
+        # transformed_data = process_with_outer_key(extracted_data)
+        # final_result = process_nested_data(transformed_data)
+        # return final_result
+
 
     except Exception as e:
         print(f"Error in extract_from_notice: {e}")
         return DEFAULT_RESULT
 
 if __name__ == "__main__":
-    clause_path = 'D:\\flask_project\\flask_app\\static\\output\\output1\\648e094b-e677-47ce-9073-09e0c82af210\\clause1.json'
-    merged_baseinfo_path=r"D:\flask_project\flask_app\static\output\output1\648e094b-e677-47ce-9073-09e0c82af210\ztbfile_merged_baseinfo.pdf"
+    clause_path = r'C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\\clause1.json'
+    merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\ztbfile_merged_baseinfo.pdf"
     # file_path = 'D:\\flask_project\\flask_app\\static\\output\\fee18877-0c60-4c28-911f-9a5f7d1325a7\\clause1.json'
     try:
         res = extract_from_notice(merged_baseinfo_path,clause_path, 1)  # 可以改变此处的 type 参数测试不同的场景
diff --git a/flask_app/货物标/投标人须知正文条款提取成json文件货物标版.py b/flask_app/货物标/投标人须知正文条款提取成json文件货物标版.py
index 09325d3..99355be 100644
--- a/flask_app/货物标/投标人须知正文条款提取成json文件货物标版.py
+++ b/flask_app/货物标/投标人须知正文条款提取成json文件货物标版.py
@@ -105,12 +105,14 @@ def should_add_newline(content, keywords, max_length=20):
     content_str = ''.join(content).strip()
     return any(keyword in content_str for keyword in keywords) or len(content_str) <= max_length
 
-def handle_content_append(current_content, line_content, append_newline, keywords):
+def handle_content_append(current_content, line_content, append_newline, keywords,in_special_section):
     if append_newline:
         if should_add_newline(current_content, keywords):
             current_content.append('\n')  # 添加换行符
         append_newline = False
     current_content.append(line_content)
+    if in_special_section:
+        current_content.append('\n')
     return append_newline
 
 """
@@ -134,7 +136,11 @@ def parse_text_by_heading(text):
     skip_subheadings = False
     last_main_number = None
     temp_title = None  # 临时存储以点号开头但不带数字的标题
-
+    pattern_numbered = re.compile(r'^\s*([一二三四五六七八九十]{1,2})\s*、\s*')
+    pattern_parentheses = re.compile(r'^\s*[（(]\s*([一二三四五六七八九十]{1,2})\s*[)）]\s*')
+    initial_heading_pattern = None
+    special_section_keywords = ['文件的组成', '文件的构成']  # 定义特殊章节关键词
+    in_special_section = False  # 标志是否在特殊章节中
     lines = text.split('\n')
 
     def check_year_pattern(line):
@@ -150,11 +156,22 @@ def parse_text_by_heading(text):
             current_content.append(line_stripped)
             continue
 
+        # **首先检查是否进入特殊章节**
+        if any(keyword in line_stripped for keyword in special_section_keywords):
+            in_special_section = True
+
         # 匹配带数字的标题，例如 '12.1 内容'
         match = re.match(r'^(?<![a-zA-Z（(])(\d+(?:\.\d+)+)\s*(.*)', line_stripped)
         if not match:
             match = re.match(r'^(\d+\.)\s*(.+)$', line_stripped)
 
+        # 检查是否退出特殊章节
+        if in_special_section:
+            # 如果匹配到了主要标题，且不包含特殊关键词，则退出特殊章节
+            if match and not any(keyword in line_stripped for keyword in special_section_keywords):
+                in_special_section = False
+
+        # 以下是原有的匹配逻辑
         # 匹配以点号开头并带有数字的情况，例如 '.12.1 内容'
         dot_match = re.match(r'^[．.](\d+(?:[．.]\d+)*)\s*(.+)$', line_stripped)
 
@@ -197,7 +214,7 @@ def parse_text_by_heading(text):
                 append_newline = len(new_key.rstrip('.').split('.')) <= 2
                 last_main_number = new_key.split('.')[0]
             else:
-                append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords)
+                append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords,in_special_section)
 
         elif dot_match:
             # 处理以点号开头并带有数字的情况
@@ -212,14 +229,15 @@ def parse_text_by_heading(text):
                 current_content = [line_content]
                 append_newline = True
             else:
-                append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords)
+                append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords,in_special_section)
 
         elif dot_text_match:
             # 处理以点号开头但不带数字的情况，存储到临时变量
             temp_title = dot_text_match.group(1).strip()
             continue  # 跳过进一步处理该行
 
-        elif pure_number_match:  # 处理不带点号的纯数字开头的情况，例如 '27xxxxx'
+        elif pure_number_match:
+            # 处理不带点号的纯数字开头的情况
             new_key_candidate, line_content = pure_number_match.groups()
             new_key_candidate += '.'  # 添加点号
             line_content = line_content.lstrip('.．、,')
@@ -247,69 +265,86 @@ def parse_text_by_heading(text):
                 current_content = [line_content]
                 append_newline = True
                 last_main_number = new_key_candidate.rstrip('.')
-                # if current_key is None or (current_key != new_key and (     #不给序号排序
-                #         len(current_content) == 0 or current_content[-1][-1] != '第')):
-                #     if current_key is not None:
-                #         content_string = ''.join(current_content).strip()
-                #         data[current_key] = data.get(current_key, '') + content_string.replace(' ', '')
-                #     current_key = new_key
-                #     current_content = [line_content]
-                #     append_newline = len(new_key.rstrip('.').split('.')) <= 2
-                #     last_main_number = new_key.split('.')[0]
             else:
                 # 将当前行视为当前标题的内容
-                append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords)
+                append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords,in_special_section)
 
         else:
-            if not skip_subheadings:   # 合并中文标题、字母标题、阿拉伯数字标题的匹配逻辑
-                pattern_title = re.compile(
-                    r'^\s*(?:[（(]\s*([一二三四五六七八九十]{1,2})\s*[)）]\s*|([一二三四五六七八九十]{1,2})\s*、\s*)')
-                chinese_match = pattern_title.match(line_stripped)
+            if not skip_subheadings and not in_special_section:  # 增加 in_special_section 判断
+                numbered_match = pattern_numbered.match(line_stripped)
+                parenthesis_match = pattern_parentheses.match(line_stripped)
                 letter_match = re.match(r'^([A-Z])\.\s*(.*)$', line_stripped)
                 arabic_match = re.match(r'^(\d+、)\s*(.+)$', line_stripped)
 
-                # 优化处理逻辑，减少冗余
-                if chinese_match or letter_match or arabic_match:
-                    # 保存之前的 key 的内容（无论是中文标题还是阿拉伯数字标题）
-                    if current_key is not None:
-                        content_string = ''.join(current_content).strip()
-                        data[current_key] = data.get(current_key, '') + content_string.replace(' ', '')
-                    if current_key_chinese is not None:
-                        data[current_key_chinese] = current_value_chinese
-                        current_key_chinese = None
+                # 判断当前行是否匹配了任何标题模式
+                if numbered_match or parenthesis_match or letter_match or arabic_match:
+                    # 如果初始标题模式尚未设置，则记录当前匹配的标题模式
+                    if initial_heading_pattern is None:
+                        if numbered_match:
+                            initial_heading_pattern = 'numbered'
+                        elif parenthesis_match:
+                            initial_heading_pattern = 'parentheses'
+                        elif letter_match:
+                            initial_heading_pattern = 'letter'
+                        elif arabic_match:
+                            initial_heading_pattern = 'arabic'
 
-                    # 处理中文标题
-                    if chinese_match:
-                        current_key_chinese = chinese_match.group(1) or chinese_match.group(2)
-                        current_value_chinese = line_stripped[chinese_match.end():].lstrip('.．、,').replace(' ', '')
-                        if current_key_chinese in data:
-                            handle_content_append(current_content, '\n' + line_stripped, append_newline, keywords)
-                            current_key_chinese = None
-                    # 处理字母标题
+                    # 确定当前匹配的标题模式
+                    if numbered_match:
+                        current_heading_pattern = 'numbered'
+                    elif parenthesis_match:
+                        current_heading_pattern = 'parentheses'
                     elif letter_match:
-                        letter_key, letter_value = letter_match.groups()
-                        letter_to_chinese = {
-                            'A': '一', 'B': '二', 'C': '三', 'D': '四', 'E': '五',
-                            'F': '六', 'G': '七', 'H': '八', 'I': '九', 'J': '十',
-                            'K': '十一', 'L': '十二', 'M': '十三', 'N': '十四', 'O': '十五'
-                        }
-                        current_key_chinese = letter_to_chinese.get(letter_key, letter_key)
-                        current_value_chinese = letter_value.lstrip('.．、,').replace(' ', '')
-                        if current_key_chinese in data:
-                            handle_content_append(current_content, '\n' + line_stripped, append_newline, keywords)
-                            current_key_chinese = None
-                    # 处理阿拉伯数字标题
+                        current_heading_pattern = 'letter'
                     elif arabic_match:
-                        arabic_key, arabic_value = arabic_match.groups()
-                        current_key = arabic_key.replace('、', '.')
-                        current_content = [arabic_value]
-                        append_newline = True
-                        last_main_number = current_key.rstrip('.')
-                    continue
+                        current_heading_pattern = 'arabic'
 
-            # 如果没有匹配标题，继续处理正文内容
-            if line_stripped:
-                append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords)
+                    # 如果当前标题模式与初始标题模式一致，创建新的键值对
+                    if current_heading_pattern == initial_heading_pattern:
+                        # 保存之前的 key 的内容
+                        if current_key is not None:
+                            content_string = ''.join(current_content).strip()
+                            data[current_key] = data.get(current_key, '') + content_string.replace(' ', '')
+                        if current_key_chinese is not None:
+                            data[current_key_chinese] = current_value_chinese
+                            current_key_chinese = None
+
+                        # 处理匹配到的标题
+                        if current_heading_pattern == 'numbered':
+                            current_key_chinese = numbered_match.group(1)
+                            current_value_chinese = line_stripped[numbered_match.end():].lstrip('.．、,').replace(' ', '')
+                        elif current_heading_pattern == 'parentheses':
+                            current_key_chinese = parenthesis_match.group(1)
+                            current_value_chinese = line_stripped[parenthesis_match.end():].lstrip('.．、,').replace(' ', '')
+                        elif current_heading_pattern == 'letter':
+                            # 字母标题处理
+                            letter_key, letter_value = letter_match.groups()
+                            letter_to_chinese = {
+                                'A': '一', 'B': '二', 'C': '三', 'D': '四', 'E': '五',
+                                'F': '六', 'G': '七', 'H': '八', 'I': '九', 'J': '十',
+                                'K': '十一', 'L': '十二', 'M': '十三', 'N': '十四', 'O': '十五'
+                            }
+                            current_key_chinese = letter_to_chinese.get(letter_key, letter_key)
+                            current_value_chinese = letter_value.lstrip('.．、,').replace(' ', '')
+                        elif current_heading_pattern == 'arabic':
+                            arabic_key, arabic_value = arabic_match.groups()
+                            current_key = arabic_key.replace('、', '.')
+                            current_content = [arabic_value]
+                            append_newline = True
+                            last_main_number = current_key.rstrip('.')
+                        continue
+                    else:
+                        # 当前标题模式与初始模式不一致，将该行视为内容
+                        if line_stripped:
+                            append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords,in_special_section)
+                else:
+                    # 未匹配到任何标题模式，将该行视为内容
+                    if line_stripped:
+                        append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords,in_special_section)
+            else:
+                # 在特殊章节中，所有内容都作为当前标题的内容
+                if line_stripped:
+                    append_newline = handle_content_append(current_content, line_stripped, append_newline, keywords,in_special_section)
 
     # 最后保存最后一个 key 对应的内容
     if current_key is not None:
@@ -448,11 +483,12 @@ def process_folder(input_folder, output_folder):
 
 #TODO:'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\广水农商行门禁控制主机及基础验证设备采购项目——磋商文件（定稿）（三次）_tobidders_notice_part2.pdf' PYPDF2库读取有遗漏
 #TODO: 投标人须知正文这块，序号可能是乱序的，或许可以删除判断序号大小的逻辑，只要出现在开头的序号就作为新的键 eg:2-招标文件。目前将这种情况当特殊处理
+#TODO:11.6 目前 '文件的组成' 是匹配任意行,可以考虑只匹配'11.文件的组成' 前面有序号的行    a110ed59-00e8-47ec-873a-bd4579a6e628\ztbfile_tobidders_notice_part2.pdf还有问题
 if __name__ == "__main__":
     # file_path = 'D:\\flask_project\\flask_app\\static\\output\\cfd4959d-5ea9-4112-8b50-9e543803f029\\ztbfile_tobidders_notice.pdf'
-    file_path='D:\\flask_project\\flask_app\\static\\output\\output1\\648e094b-e677-47ce-9073-09e0c82af210\\ztbfile_tobidders_notice_part2.pdf'
+    file_path=r'C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\ztbfile_tobidders_notice_part2.pdf'
     # file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\2-招标文件（2020年广水市中小学教师办公电脑系统及多媒体“班班通”设备采购安装项目）_tobidders_notice_part2.pdf'
-    output_folder = 'D:\\flask_project\\flask_app\\static\\output\\output1\\648e094b-e677-47ce-9073-09e0c82af210\\tmp'
+    output_folder = r'C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\\tmp'
     try:
         output_path = convert_clause_to_json(file_path,output_folder,1)
         print(f"Final JSON result saved to: {output_path}")
diff --git a/flask_app/货物标/货物标解析main.py b/flask_app/货物标/货物标解析main.py
index 03dcf48..0b7a8fb 100644
--- a/flask_app/货物标/货物标解析main.py
+++ b/flask_app/货物标/货物标解析main.py
@@ -243,6 +243,9 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
 #广水市 2022 年义务教育学校多媒体补充采购项目 资格审查有问题
 #TODO: 目前跳转可能有个问题,资格审查那边:既有原来的内容又有跳转后的内容;符合本采购文件第一章第二款要求，并提供合格有效的证明材料<br>1、满足《中华人民共和国政府采购法》第二十二条规定，即：<br>（1）具有独立承担
 
+#TODO:资格审查默认加上第一章的内容, 资格审查章节不做跳转逻辑.
+#TODO:开评定标那块,不做层次遍历  ing
+#TODO:技术要求提取更全面一点  fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628
 #good_list 金额  截取上下文
 if __name__ == "__main__":
     # 配置日志器