2025-03-18 15:53:40 +08:00

237 lines
9.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
import shutil
import uuid
import requests
from urllib.parse import urlparse
from upload_img import upload_image
def download_image(url, output_path):
"""
从网络下载图片并保存到指定路径
"""
try:
response = requests.get(url, stream=True)
if response.status_code == 200:
# 获取图片扩展名
parsed_url = urlparse(url)
ext = os.path.splitext(parsed_url.path)[1]
if not ext:
ext = '.png' # 默认使用 .png 扩展名
# 生成新的文件名
new_filename = f"{uuid.uuid4()}{ext}"
dest_path = os.path.join(output_path, new_filename)
# 保存图片
with open(dest_path, 'wb') as f:
response.raw.decode_content = True
shutil.copyfileobj(response.raw, f)
print(f"已下载: {url}{dest_path}")
return new_filename
else:
print(f"警告: 无法下载图片 {url},状态码: {response.status_code}")
except Exception as e:
print(f"错误: 下载图片 {url} 时出错: {e}")
return None
def extract_image_paths(content):
"""
从 Markdown 内容中提取所有图片路径(支持 Markdown 和 HTML 格式)
"""
pattern_md = re.compile(r'!\[.*?\]\((.*?)\)')
pattern_html = re.compile(r'<img\s+[^>]*src\s*=\s*"(.*?)"')
return set(pattern_md.findall(content) + pattern_html.findall(content))
def process_local_image_copy(abs_img_path, dest_folder):
"""
复制本地图片到目标文件夹,并返回新文件名(使用 UUID 命名,保留扩展名)
"""
ext = os.path.splitext(abs_img_path)[1]
new_filename = f"{uuid.uuid4()}{ext}"
dest_path = os.path.join(dest_folder, new_filename)
shutil.copy2(abs_img_path, dest_path)
return new_filename
def process_md_file_local(md_file, output_path):
"""
处理一个 Markdown 文件:
- 提取 Markdown 和 HTML 格式的图片路径
- 复制本地图片到 output_path并修改 md 文件中的图片引用路径
- 下载网络图片到 output_path并修改 md 文件中的图片引用路径
- 图片复制时使用 UUID 作为文件名(保留扩展名)
- 更新后的图片路径为绝对路径
"""
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 使用抽离的函数提取图片路径
img_paths = extract_image_paths(content)
# 获取当前 md 文件所在目录
md_dir = os.path.dirname(md_file)
for img_path in img_paths:
# 判断图片路径是本地路径还是网络 URL
if img_path.startswith(('http://', 'https://')):
# 处理网络图片
new_filename = download_image(img_path, output_path)
if new_filename:
# 使用绝对路径替换
new_ref = os.path.join(output_path, new_filename).replace('\\', '/')
content = content.replace(img_path, new_ref)
else:
# 处理本地图片
if os.path.isabs(img_path):
abs_img_path = img_path
else:
abs_img_path = os.path.normpath(os.path.join(md_dir, img_path))
if os.path.exists(abs_img_path):
if os.path.isfile(abs_img_path): # 确保是文件而不是文件夹
# 使用抽离的复制函数处理图片
new_filename = process_local_image_copy(abs_img_path, output_path)
dest_path = os.path.join(output_path, new_filename)
print(f"已复制: {abs_img_path}{dest_path}")
# 使用绝对路径替换
new_ref = dest_path.replace('\\', '/')
content = content.replace(img_path, new_ref)
else:
print(f"警告: 跳过文件夹 {abs_img_path}")
else:
print(f"警告: 图片文件不存在 {abs_img_path}")
# 写回修改后的内容
with open(md_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"已更新: {md_file}")
def process_md_file_with_assets(md_file, output_base_path):
"""
处理单个 Markdown 文件,将其拷贝到 output_base_path/<md_name>/ 下,
并在该文件夹中建立 assets 文件夹保存相关图片。
同时更新 md 文件中图片的引用路径为相对路径 assets/<new_filename>
"""
# 创建对应的输出文件夹及 assets 子文件夹
md_filename = os.path.basename(md_file)
md_name, _ = os.path.splitext(md_filename)
target_folder = os.path.join(output_base_path, md_name)
assets_folder = os.path.join(target_folder, "assets")
os.makedirs(assets_folder, exist_ok=True)
# 读取 Markdown 文件内容
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 使用抽离的函数提取图片路径
img_paths = extract_image_paths(content)
# 获取 md 文件所在目录(用于处理相对路径的本地图片)
md_dir = os.path.dirname(md_file)
# 遍历所有图片路径
for img_path in img_paths:
new_filename = None
if img_path.startswith(('http://', 'https://')):
# 处理网络图片:下载图片到 assets_folder
try:
# 处理网络图片:下载图片到 assets_folder
new_filename = download_image(img_path, assets_folder)
except Exception as e:
print(f"错误: 下载图片 {img_path} 时出错: {e}")
else:
# 处理本地图片
if os.path.isabs(img_path):
abs_img_path = img_path
else:
abs_img_path = os.path.normpath(os.path.join(md_dir, img_path))
if os.path.exists(abs_img_path) and os.path.isfile(abs_img_path):
try:
# 使用抽离的复制函数处理图片
new_filename = process_local_image_copy(abs_img_path, assets_folder)
print(f"已复制: {abs_img_path}{os.path.join(assets_folder, new_filename)}")
except PermissionError as e:
print(f"错误: 无法复制文件 {abs_img_path},权限被拒绝: {e}")
else:
print(f"警告: 图片文件不存在或不是文件 {abs_img_path}")
# 如果成功处理图片,则替换 md 文件中的引用路径
if new_filename:
new_ref = f"assets/{new_filename}"
content = content.replace(img_path, new_ref)
# 将更新后的 md 内容写入目标文件夹中的 md 文件
target_md_path = os.path.join(target_folder, md_filename)
with open(target_md_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"已更新: {target_md_path}")
def process_md_file_remote(md_file):
"""
处理一个 Markdown 文件:
- 提取 Markdown 和 HTML 格式的图片路径
- 对于本地图片,调用 upload_image 上传到 easyimage 图床,
并替换 md 文件中的图片引用路径为返回的公网地址
- 对于网络图片,保持不变
"""
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
# 使用抽离的函数提取图片路径
img_paths = extract_image_paths(content)
# 获取当前 md 文件所在目录
md_dir = os.path.dirname(md_file)
for img_path in img_paths:
# 判断是否为本地图片(非网络 URL
if not img_path.startswith(('http://', 'https://')):
if os.path.isabs(img_path):
abs_img_path = img_path
else:
abs_img_path = os.path.normpath(os.path.join(md_dir, img_path))
if os.path.exists(abs_img_path) and os.path.isfile(abs_img_path):
try:
public_url = upload_image(abs_img_path)
print(f"图片已上传: {abs_img_path}{public_url}")
content = content.replace(img_path, public_url)
except Exception as e:
print(f"错误: 图片上传失败 {abs_img_path}: {e}")
else:
print(f"警告: 图片文件不存在 {abs_img_path}")
else:
print(f"跳过网络图片: {img_path}")
with open(md_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"已更新: {md_file}")
def process_md_files(input_path,output_path,type):
# 创建输出目录(如果不存在)
os.makedirs(output_path, exist_ok=True)
# 遍历处理所有 Markdown 文件
for root, _, files in os.walk(input_path):
for file in files:
if file.lower().endswith('.md'):
md_file = os.path.join(root, file)
if type==1:
process_md_file_local(md_file, output_path)
elif type==2:
process_md_file_with_assets(md_file,output_path)
elif type==3:
process_md_file_remote(md_file)
else:
pass
print("处理完成!所有图片已保存至:", os.path.abspath(output_path))
if __name__ == "__main__":
type=1
input_path = r'D:\folder\test\tt'
output_path = r'D:\folder\test\output2'
process_md_files(input_path,output_path,type)