From 60769d77d6a9690f2b21f4e30d21b23470486476 Mon Sep 17 00:00:00 2001
From: zhangsan <646228430@qq.com>
Date: Mon, 24 Mar 2025 14:43:37 +0800
Subject: [PATCH] =?UTF-8?q?3.24=20=E5=B0=86=E8=BE=93=E5=87=BA=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E5=A4=B9=E7=BB=86=E5=88=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .env.example                    |   4 +-
 README.md                       |  13 ++--
 transfer_md/transfer.py         | 108 +++++++++++++++++++-------------
 typecho_markdown_upload/main.py |   3 +-
 4 files changed, 74 insertions(+), 54 deletions(-)

diff --git a/.env.example b/.env.example
index 519e7ae..88f550e 100644
--- a/.env.example
+++ b/.env.example
@@ -5,7 +5,7 @@ IMG_TOKEN=your_img_token
 # folder
 BASE_FOLDER=/your/base/folder
 OUTPUT_FOLDER=/your/base/folder/output
-EXCLUDE_FOLDERS=工作笔记
+EXCLUDE_FOLDERS=output
 
 # typecho config
 WEBSITE_XMLRPC_URL=https://example.com/index.php/action/xmlrpc
@@ -14,7 +14,7 @@ WEBSITE_PASSWORD=your_password
 
 # mysql config
 MYSQL_HOST=mysql
-MYSQL_PORT=3306 
+MYSQL_PORT=3306
 MYSQL_USERNAME=your_mysql_username
 MYSQL_PASSWORD=your_mysql_password
 MYSQL_TYPECHO_DATABASE=typecho
diff --git a/README.md b/README.md
index 335fddc..3e15bbe 100644
--- a/README.md
+++ b/README.md
@@ -81,9 +81,9 @@ md_files
 │   ├── file3.md
 │   └── file4.md
 └── output
-    ├── image1.png
-    ├── image2.jpg
-    └── ... (其他图片文件)
+    ├── assets_type
+    ├── pics
+    └── updated_files
 
 ```
 
@@ -92,13 +92,14 @@ md_files
 如果你现有的图片分散在系统中，可以使用 `transfer_md/transfer.py` 脚本来统一处理。该脚本需要传入三个参数：
 
 - **input_path：** 指定包含 Markdown 文件的根目录（例如上例中的 `md_files`）。
-- **output_path：** 指定统一存放处理后图片的目标文件夹（例如上例中的 `output`）。
+- **output_path：** 输出文件夹（例如上例中的 `output`）。
 - **type_value**：
   - `1`：扫描 `input_path` 下所有 Markdown 文件，将其中引用的本地图片复制到 `output_path` 中，同时更新 Markdown 文件中的图片 URL 为 `output_path` 内的路径；
-  - `2`：为每个 Markdown 文件建立单独的文件夹（以文件名命名），将 Markdown 文件及其依赖图片存入该文件夹中，图片存放在文件夹下的 `assets` 子目录中，整体保存在 `output_path` 内；
+  - `2`：为每个 Markdown 文件建立单独的文件夹（以文件名命名），图片存放在文件夹下的 `assets` 子目录中，整体存入`assets_type`文件夹中，；
   - `3`：扫描 Markdown 文件中的本地图片，将其上传到图床（获取公网 URL），并将 Markdown 文件中对应的图片 URL 替换为公网地址。
+  - `4`：预处理Markdown 文件，将公式块和代码块格式化，以便于Markdown解析器解析（本地typora编辑器对于md格式比较宽容，但博客中使用的md解析器插件不一定能正确渲染！）
+
 
-对于本项目，需要将图片统一用公网URL表示。即`type_value=3`
 
 
 
diff --git a/transfer_md/transfer.py b/transfer_md/transfer.py
index 0f6757d..da7c40f 100644
--- a/transfer_md/transfer.py
+++ b/transfer_md/transfer.py
@@ -31,7 +31,7 @@ def process_local_image_copy(abs_img_path, dest_folder):
     return new_filename
 
 
-def process_md_file_local(md_file, output_path):
+def process_md_file_local(md_file, pics_folder):
     """
     处理一个 Markdown 文件：
     - 提取 Markdown 和 HTML 格式的图片路径
@@ -48,16 +48,16 @@ def process_md_file_local(md_file, output_path):
 
     # 获取当前 md 文件所在目录
     md_dir = os.path.dirname(md_file)
-    abs_output_path = os.path.abspath(output_path)
+    abs_output_path = os.path.abspath(pics_folder)
 
     for img_path in img_paths:
         # 判断图片路径是本地路径还是网络 URL
         if img_path.startswith(('http://', 'https://')):
             # 处理网络图片
-            new_filename = download_image(img_path, output_path)
+            new_filename = download_image(img_path, pics_folder)
             if new_filename:
                 # 使用绝对路径替换
-                new_ref = os.path.join(output_path, new_filename).replace('\\', '/')
+                new_ref = os.path.join(pics_folder, new_filename).replace('\\', '/')
                 content = content.replace(img_path, new_ref)
         else:
             # 处理本地图片
@@ -75,8 +75,8 @@ def process_md_file_local(md_file, output_path):
             if os.path.exists(abs_img_path):
                 if os.path.isfile(abs_img_path):  # 确保是文件而不是文件夹
                     # 使用抽离的复制函数处理图片
-                    new_filename = process_local_image_copy(abs_img_path, output_path)
-                    dest_path = os.path.join(output_path, new_filename)
+                    new_filename = process_local_image_copy(abs_img_path, pics_folder)
+                    dest_path = os.path.join(pics_folder, new_filename)
                     print(f"已复制: {abs_img_path} → {dest_path}")
                     # 使用绝对路径替换
                     new_ref = dest_path.replace('\\', '/')
@@ -92,7 +92,7 @@ def process_md_file_local(md_file, output_path):
     print(f"已更新: {md_file}")
 
 
-def process_md_file_with_assets(md_file, output_base_path):
+def process_md_file_with_assets(md_file, output_path):
     """
     处理单个 Markdown 文件，将其拷贝到 output_base_path/<md_name>/ 下，
     并在该文件夹中建立 assets 文件夹保存相关图片。
@@ -101,7 +101,7 @@ def process_md_file_with_assets(md_file, output_base_path):
     # 创建对应的输出文件夹及 assets 子文件夹
     md_filename = os.path.basename(md_file)
     md_name, _ = os.path.splitext(md_filename)
-    target_folder = os.path.join(output_base_path, md_name)
+    target_folder = os.path.join(output_path, md_name)
     assets_folder = os.path.join(target_folder, "assets")
     os.makedirs(assets_folder, exist_ok=True)
 
@@ -194,38 +194,51 @@ def process_md_file_remote(md_file):
         f.write(content)
     print(f"已更新: {md_file}")
 
-def format_mdfile(filepath, language="text"):
-    '''
-    若代码块没有指定语言，Typera中能正常显示，但是博客中的markdown解析器通常会错误显示。
-    '''
+def format_mdfile(filepath, output_path, language="text"):
+    """
+    对代码块进行格式化：若代码块没有指定语言，则添加指定语言。
+    同时将修改后的文件保存到 output_path/updated_files/<category> 下，由于md格式存在不确定性，脚本处理结果不一定符合预期！。
+    """
     with open(filepath, 'r', encoding='utf-8') as f:
         lines = f.readlines()
 
     in_code_block = False
     new_lines = []
+    # 匹配整行仅包含可选空白、可选列表标记和三个反引号（无其他内容）
+    pattern = re.compile(r'^(\s*(?:[-*+]\s+)?)(```)(\s*)$')
+
     for line in lines:
-        stripped = line.strip()
-        # 判断是否为代码块标记
-        if stripped.startswith("```"):
-            if not in_code_block:
-                # 当前为代码块的起始位置
-                # 如果这一行只有三个反引号，则添加语言参数
-                if stripped == "```":
-                    # 这里使用replace仅替换第一次出现的```，保留原有的换行符和空白
-                    line = line.replace("```", f"```{language}", 1)
+        # 若不在代码块内，尝试匹配代码块起始行
+        if not in_code_block:
+            match = pattern.match(line)
+            if match:
+                prefix, backticks, suffix = match.groups()
+                # 添加语言参数后重构该行（保留原始尾随空白）
+                line = f"{prefix}{backticks}{language}{suffix}\n" if not line.endswith(
+                    "\n") else f"{prefix}{backticks}{language}{suffix}"
+                in_code_block = True
+            elif line.strip().startswith("```"):
+                # 如果行中含有除空白之外的其他内容，则视为已指定语言（或其他标记），直接进入代码块模式
                 in_code_block = True
-            else:
-                # 当前为代码块的结束位置，不添加语言
-                in_code_block = False
         else:
-            # 在非代码块的行中，将同行 $$公式$$ 替换为 $公式$
-            # 使用正则表达式进行替换，注意采用非贪婪匹配
-            line = re.sub(r'\$\$(.+?)\$\$', r'$\1$', line)
+            # 检测代码块结束（行中以 ``` 开头）
+            if line.strip().startswith("```"):
+                in_code_block = False
+
+        # 同时对每一行内的 $$公式$$ 替换为 $公式$
+        line = re.sub(r'\$\$(.+?)\$\$', r'$\1$', line)
         new_lines.append(line)
 
-    # 将修改后的内容写回文件（也可选择写入新文件）
-    with open(filepath, 'w', encoding='utf-8') as f:
+    # 计算保存路径：
+    # 取原文件所在文件夹的名称作为 category
+    category = os.path.basename(os.path.dirname(filepath))
+    target_folder = os.path.join(output_path, category)
+    os.makedirs(target_folder, exist_ok=True)
+    target_md_path = os.path.join(target_folder, os.path.basename(filepath))
+
+    with open(target_md_path, 'w', encoding='utf-8') as f:
         f.writelines(new_lines)
+    print(f"已格式化并保存: {target_md_path}")
 
 
 def scan_files(base_folder, exclude_folders):
@@ -246,35 +259,40 @@ def scan_files(base_folder, exclude_folders):
 
 def process_md_files(input_path, output_path, type, exclude_folders=None):
     """
-    处理输入目录下所有 Markdown 文件，并将处理后的图片保存到 output_path。
-    type 参数决定了使用哪种处理方式：
+    处理输入目录下所有 Markdown 文件，并将处理后的图片保存到 output_path/pics，
+    最后将经过 format_mdfile 格式化后的 Markdown 文件保存到 output_path/updated_files/<category> 下
+    type 参数决定了使用哪种图片处理方式：
         type == 1: process_md_file_local
         type == 2: process_md_file_with_assets
         type == 3: process_md_file_remote
-        type == 4:
+        type == 4: 仅执行格式化（format_mdfile）
     """
-    # 创建输出目录（如果不存在）
     os.makedirs(output_path, exist_ok=True)
-
-    # 获取 Markdown 文件列表
+    # 确保 pics 文件夹存在
+    pics_folder = os.path.join(output_path, "pics")
+    assets_type_folder=os.path.join(output_path, "assets_type")
+    updated_folder=os.path.join(output_path, "updated_files")
+    os.makedirs(pics_folder, exist_ok=True)
+    os.makedirs(assets_type_folder, exist_ok=True)
+    os.makedirs(updated_folder, exist_ok=True)
     if exclude_folders is None:
         exclude_folders = []
     md_files = scan_files(input_path, exclude_folders)
 
-    # 遍历处理所有 Markdown 文件
     for md_file in md_files:
+        # 根据不同类型先进行图片处理（如果需要）
         if type == 1:
-            process_md_file_local(md_file, output_path)  # url改为本地，图片存output_path
+            process_md_file_local(md_file, pics_folder)
         elif type == 2:
-            process_md_file_with_assets(md_file, output_path)  # url改为本地，assets方式，图片和md文件都存output_path
+            process_md_file_with_assets(md_file, assets_type_folder)
         elif type == 3:
-            process_md_file_remote(md_file)  # 图片url改为公网链接
+            process_md_file_remote(md_file)
         elif type == 4:
-            format_mdfile(md_file)
+            format_mdfile(md_file, updated_folder)
         else:
             print(f"未知的处理类型: {type}")
 
-    print("该文件夹下的md_files已全部处理完成！", os.path.abspath(input_path))
+    print("该文件夹下的 Markdown 文件已全部处理完成！")
 
 
 if __name__ == "__main__":
@@ -289,7 +307,9 @@ if __name__ == "__main__":
         type_value = 4
 
     # 这里的输入输出路径根据实际情况修改
-    input_path = os.getenv('BASE_FOLDER')  #docker环境
-    # input_path = r'D:\folder\study\md_files'
-    output_path = os.getenv('OUTPUT_FOLDER')
+    # input_path = os.getenv('BASE_FOLDER')  #docker环境
+    input_path=r'D:\folder\study\md_files'
+    # output_path = os.getenv('OUTPUT_FOLDER')
+    output_path=r'D:\folder\study\md_files\output'
+
     process_md_files(input_path, output_path, type_value)
diff --git a/typecho_markdown_upload/main.py b/typecho_markdown_upload/main.py
index 551bfd7..c605331 100644
--- a/typecho_markdown_upload/main.py
+++ b/typecho_markdown_upload/main.py
@@ -49,8 +49,7 @@ def execute_flow_with_typecho_mysql(file_path):
     这里 process_md_file_remote 用于处理 Markdown 文件（上传本地图片并替换为公网地址）。
     分类名称将从文件路径的上一级目录中获取。
     """
-    #为了实现版本控制，linux上不对md_files作修改。请本地自行运行预处理脚本再git push！
-    # format_mdfile(file_path)  #对 Markdown 文件进行处理，会对公式块和代码块进行格式化
+    #为了版本控制，请本地运行该函数或确保图片均为网络url再push
     # process_md_file_remote(file_path)  #上传本地图片并替换为公网地址
 
     with open(file_path, 'r', encoding='utf-8') as file: