3.18
This commit is contained in:
parent
390b9ccc92
commit
f7f099a50e
31
.gitignore
vendored
31
.gitignore
vendored
@ -1 +1,30 @@
|
|||||||
config.py
|
# 不提交环境变量文件
|
||||||
|
.env
|
||||||
|
|
||||||
|
# 忽略 Python 编译文件和缓存
|
||||||
|
*.pyc
|
||||||
|
__pycache__/
|
||||||
|
|
||||||
|
# 如果使用虚拟环境,也可以忽略这些目录(根据实际情况选择)
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# IntelliJ IDEA 项目配置(包括 PyCharm 等基于 IDEA 的 IDE)
|
||||||
|
.idea/
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
# Windows 系统生成的文件
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# macOS 系统生成的文件
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# 日志文件
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# 可选:忽略 Docker 相关的临时或覆盖配置文件(如果不需要提交)
|
||||||
|
docker-compose.override.yml
|
||||||
|
@ -1,6 +1,3 @@
|
|||||||
cos_python_sdk_v5==1.9.15
|
|
||||||
panflute==2.1.3
|
|
||||||
pypandoc==1.8
|
|
||||||
pytypecho==2.1.0
|
pytypecho==2.1.0
|
||||||
qcloud_cos==3.3.6
|
pymysql==1.0.2
|
||||||
pymysql==1.0.2
|
python-dotenv==0.21.0
|
||||||
|
33
transfer_md/download_img.py
Normal file
33
transfer_md/download_img.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
import uuid
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
def download_image(url, output_path):
|
||||||
|
"""
|
||||||
|
从网络下载图片并保存到指定路径
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
if response.status_code == 200:
|
||||||
|
# 获取图片扩展名
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
ext = os.path.splitext(parsed_url.path)[1]
|
||||||
|
if not ext:
|
||||||
|
ext = '.png' # 默认使用 .png 扩展名
|
||||||
|
|
||||||
|
# 生成新的文件名
|
||||||
|
new_filename = f"{uuid.uuid4()}{ext}"
|
||||||
|
dest_path = os.path.join(output_path, new_filename)
|
||||||
|
|
||||||
|
# 保存图片
|
||||||
|
with open(dest_path, 'wb') as f:
|
||||||
|
response.raw.decode_content = True
|
||||||
|
shutil.copyfileobj(response.raw, f)
|
||||||
|
print(f"已下载: {url} → {dest_path}")
|
||||||
|
return new_filename
|
||||||
|
else:
|
||||||
|
print(f"警告: 无法下载图片 {url},状态码: {response.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"错误: 下载图片 {url} 时出错: {e}")
|
||||||
|
return None
|
@ -2,40 +2,9 @@ import os
|
|||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import uuid
|
import uuid
|
||||||
import requests
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
from upload_img import upload_image
|
from upload_img import upload_image
|
||||||
|
|
||||||
|
|
||||||
def download_image(url, output_path):
|
|
||||||
"""
|
|
||||||
从网络下载图片并保存到指定路径
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
response = requests.get(url, stream=True)
|
|
||||||
if response.status_code == 200:
|
|
||||||
# 获取图片扩展名
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
ext = os.path.splitext(parsed_url.path)[1]
|
|
||||||
if not ext:
|
|
||||||
ext = '.png' # 默认使用 .png 扩展名
|
|
||||||
|
|
||||||
# 生成新的文件名
|
|
||||||
new_filename = f"{uuid.uuid4()}{ext}"
|
|
||||||
dest_path = os.path.join(output_path, new_filename)
|
|
||||||
|
|
||||||
# 保存图片
|
|
||||||
with open(dest_path, 'wb') as f:
|
|
||||||
response.raw.decode_content = True
|
|
||||||
shutil.copyfileobj(response.raw, f)
|
|
||||||
print(f"已下载: {url} → {dest_path}")
|
|
||||||
return new_filename
|
|
||||||
else:
|
|
||||||
print(f"警告: 无法下载图片 {url},状态码: {response.status_code}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"错误: 下载图片 {url} 时出错: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def extract_image_paths(content):
|
def extract_image_paths(content):
|
||||||
"""
|
"""
|
||||||
从 Markdown 内容中提取所有图片路径(支持 Markdown 和 HTML 格式)
|
从 Markdown 内容中提取所有图片路径(支持 Markdown 和 HTML 格式)
|
||||||
@ -209,29 +178,53 @@ def process_md_file_remote(md_file):
|
|||||||
print(f"已更新: {md_file}")
|
print(f"已更新: {md_file}")
|
||||||
|
|
||||||
|
|
||||||
def process_md_files(input_path,output_path,type):
|
def scan_files(base_folder, exclude_folders):
|
||||||
|
"""
|
||||||
|
扫描 base_folder 目录下所有 Markdown 文件,
|
||||||
|
并排除路径中包含 exclude_folders 中任一字符串的目录
|
||||||
|
"""
|
||||||
|
md_files = []
|
||||||
|
for root, dirs, files in os.walk(base_folder):
|
||||||
|
# 如果当前目录中包含需要排除的文件夹,则跳过该目录
|
||||||
|
if any(exclude in root for exclude in exclude_folders):
|
||||||
|
continue
|
||||||
|
for file in files:
|
||||||
|
if file.lower().endswith('.md'):
|
||||||
|
md_files.append(os.path.join(root, file))
|
||||||
|
return md_files
|
||||||
|
|
||||||
|
def process_md_files(input_path, output_path, type, exclude_folders=None):
|
||||||
|
"""
|
||||||
|
处理输入目录下所有 Markdown 文件,并将处理后的图片保存到 output_path。
|
||||||
|
type 参数决定了使用哪种处理方式:
|
||||||
|
type == 1: process_md_file_local
|
||||||
|
type == 2: process_md_file_with_assets
|
||||||
|
type == 3: process_md_file_remote
|
||||||
|
"""
|
||||||
# 创建输出目录(如果不存在)
|
# 创建输出目录(如果不存在)
|
||||||
os.makedirs(output_path, exist_ok=True)
|
os.makedirs(output_path, exist_ok=True)
|
||||||
|
|
||||||
|
# 获取 Markdown 文件列表
|
||||||
|
if exclude_folders is None:
|
||||||
|
exclude_folders = []
|
||||||
|
md_files = scan_files(input_path, exclude_folders)
|
||||||
|
|
||||||
# 遍历处理所有 Markdown 文件
|
# 遍历处理所有 Markdown 文件
|
||||||
for root, _, files in os.walk(input_path):
|
for md_file in md_files:
|
||||||
for file in files:
|
if type == 1:
|
||||||
if file.lower().endswith('.md'):
|
process_md_file_local(md_file, output_path) #url改为本地,图片存output_path
|
||||||
md_file = os.path.join(root, file)
|
elif type == 2:
|
||||||
if type==1:
|
process_md_file_with_assets(md_file, output_path) #url改为本地,图片和md都存output_path
|
||||||
process_md_file_local(md_file, output_path)
|
elif type == 3:
|
||||||
elif type==2:
|
process_md_file_remote(md_file) #url改公网链接
|
||||||
process_md_file_with_assets(md_file,output_path)
|
else:
|
||||||
elif type==3:
|
print(f"未知的处理类型: {type}")
|
||||||
process_md_file_remote(md_file)
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
print("处理完成!所有图片已保存至:", os.path.abspath(output_path))
|
print("处理完成!所有图片已保存至:", os.path.abspath(output_path))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
type=1
|
type=1
|
||||||
input_path = r'D:\folder\test\tt'
|
input_path = r'D:\folder\study\md_files\Java\zbparse'
|
||||||
output_path = r'D:\folder\test\output2'
|
output_path = r'D:\folder\test\output'
|
||||||
process_md_files(input_path,output_path,type)
|
process_md_files(input_path,output_path,type)
|
@ -1,12 +1,6 @@
|
|||||||
base_folder = 'D:/Notes/'
|
base_folder = 'D:/Notes/'
|
||||||
exclude_folders = ['工作笔记']
|
exclude_folders = ['工作笔记']
|
||||||
|
|
||||||
# cos config
|
|
||||||
secret_id = 'xxx' # 替换为用户的 SecretId,请登录访问管理控制台进行查看和管理,https://console.cloud.tencent.com/cam/capi
|
|
||||||
secret_key = 'xxx' # 替换为用户的 SecretKey,请登录访问管理控制台进行查看和管理,https://console.cloud.tencent.com/cam/capi
|
|
||||||
region = 'ap-shanghai'
|
|
||||||
bucket = 'xxx'
|
|
||||||
|
|
||||||
# typecho config
|
# typecho config
|
||||||
website_xmlrpc_url = '' # https://www.abc.com/index.php/action/xmlrpc
|
website_xmlrpc_url = '' # https://www.abc.com/index.php/action/xmlrpc
|
||||||
website_username = 'xxx'
|
website_username = 'xxx'
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
import os.path
|
|
||||||
|
|
||||||
from qcloud_cos import CosConfig, CosS3Client
|
|
||||||
|
|
||||||
|
|
||||||
class CosPicUploader:
|
|
||||||
def __init__(self, secret_id, secret_key, region, bucket):
|
|
||||||
self.__bucket = bucket
|
|
||||||
self.__config = CosConfig(Region=region, Secret_id=secret_id, Secret_key=secret_key)
|
|
||||||
self.__client = CosS3Client(self.__config)
|
|
||||||
|
|
||||||
def upload_file(self, key, file_path):
|
|
||||||
file_path = file_path.replace('\\', '/')
|
|
||||||
with open(file_path, 'rb') as f:
|
|
||||||
self.__client.put_object(Bucket=self.__bucket, Body=f, Key=key)
|
|
||||||
res = self.__client.get_object_url(Bucket=self.__bucket, Key=key)
|
|
||||||
return res
|
|
@ -1,73 +1,79 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
import logging
|
import logging
|
||||||
import os.path
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from markdown_file_searcher import scan_files
|
# 加载 .env 文件中的环境变量
|
||||||
from markdown_img_searcher import scan_imgs
|
load_dotenv()
|
||||||
from cos_pic_uploader import CosPicUploader
|
|
||||||
import config
|
from transfer_md.transfer import process_md_file_remote, scan_files # 假设该模块中实现了相应函数
|
||||||
from typecho_xmlrpc_publisher import TypechoXmlRpcPublisher
|
from typecho_xmlrpc_publisher import TypechoXmlRpcPublisher
|
||||||
from typecho_direct_mysql_publisher import TypechoDirectMysqlPublisher
|
from typecho_direct_mysql_publisher import TypechoDirectMysqlPublisher
|
||||||
|
|
||||||
uploader = CosPicUploader(
|
# 初始化发布器,直接使用 os.getenv 获取环境变量
|
||||||
config.secret_id,
|
|
||||||
secret_key=config.secret_key,
|
|
||||||
region=config.region,
|
|
||||||
bucket=config.bucket
|
|
||||||
)
|
|
||||||
|
|
||||||
typecho_publisher = TypechoXmlRpcPublisher(
|
typecho_publisher = TypechoXmlRpcPublisher(
|
||||||
config.website_xmlrpc_url,
|
os.getenv('WEBSITE_XMLRPC_URL'),
|
||||||
config.website_username,
|
os.getenv('WEBSITE_USERNAME'),
|
||||||
config.website_password
|
os.getenv('WEBSITE_PASSWORD')
|
||||||
)
|
)
|
||||||
|
|
||||||
mysql_publisher = TypechoDirectMysqlPublisher(
|
mysql_publisher = TypechoDirectMysqlPublisher(
|
||||||
config.mysql_host,
|
os.getenv('MYSQL_HOST'),
|
||||||
config.mysql_port,
|
int(os.getenv('MYSQL_PORT', 3306)),
|
||||||
config.mysql_username,
|
os.getenv('MYSQL_USERNAME'),
|
||||||
config.mysql_password,
|
os.getenv('MYSQL_PASSWORD'),
|
||||||
config.mysql_typecho_database,
|
os.getenv('MYSQL_TYPECHO_DATABASE'),
|
||||||
config.mysql_typecho_table_prefix
|
os.getenv('MYSQL_TYPECHO_TABLE_PREFIX')
|
||||||
)
|
)
|
||||||
|
|
||||||
def execute_flow_with_typecho_xmlrpc(file_path):
|
def execute_flow_with_typecho_xmlrpc(file_path):
|
||||||
|
"""
|
||||||
|
使用 XML-RPC 接口发布文章。
|
||||||
|
这里 process_md_file_remote 用于处理 Markdown 文件(上传本地图片并替换 URL)
|
||||||
|
"""
|
||||||
|
# 先对 Markdown 文件进行处理:上传本地图片并替换为公网地址
|
||||||
|
process_md_file_remote(file_path)
|
||||||
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as file:
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
file_base_path = os.path.dirname(file_path)
|
file_base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||||
file_base_name = os.path.splitext(os.path.basename(file_path))[0] #无后缀文件名
|
|
||||||
md_source_text = file.read()
|
md_source_text = file.read()
|
||||||
md_img_urls = scan_imgs(file_path)
|
# category_name = os.path.basename(os.path.dirname(file_path))
|
||||||
if len(md_img_urls) > 0:
|
# 注意:XML-RPC 方式不需要 category_name 参数
|
||||||
for md_img_url in md_img_urls:
|
post_id = typecho_publisher.publish_post(file_base_name, md_source_text)
|
||||||
img_file = os.path.join(file_base_path, md_img_url)
|
print('发布成功 --> ' + file_base_name + ' - ' + str(post_id))
|
||||||
img_file_name = os.path.basename(img_file)
|
|
||||||
oss_url = uploader.upload_file(key=file_base_name+'-'+img_file_name, file_path=img_file)
|
|
||||||
md_source_text = md_source_text.replace('](' + md_img_url + ')', '](' + oss_url + ')')
|
|
||||||
post_id = typecho_publisher.publish_post(file_base_name, md_source_text)
|
|
||||||
print('发布成功 --> ' + file_base_name + ' - ' + str(post_id))
|
|
||||||
|
|
||||||
|
|
||||||
def execute_flow_with_typecho_mysql(file_path):
|
def execute_flow_with_typecho_mysql(file_path):
|
||||||
|
"""
|
||||||
|
使用 MySQL 直连方式发布文章。
|
||||||
|
这里 process_md_file_remote 用于处理 Markdown 文件(上传本地图片并替换为公网地址)。
|
||||||
|
分类名称将从文件路径的上一级目录中获取。
|
||||||
|
"""
|
||||||
|
# 先对 Markdown 文件进行处理:上传本地图片并替换为公网地址
|
||||||
|
process_md_file_remote(file_path)
|
||||||
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as file:
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
file_base_path = os.path.dirname(file_path)
|
file_base_name = os.path.splitext(os.path.basename(file_path))[0] #os.path.basename(path)返回给定路径中的最后一个组件
|
||||||
file_base_name = os.path.splitext(os.path.basename(file_path))[0] #无后缀文件名
|
|
||||||
category_name = os.path.basename(file_base_path)
|
|
||||||
md_source_text = file.read()
|
md_source_text = file.read()
|
||||||
md_img_urls = scan_imgs(file_path)
|
|
||||||
if len(md_img_urls) > 0:
|
# 从文件的上一级目录获取分类名称
|
||||||
for md_img_url in md_img_urls:
|
category_name = os.path.basename(os.path.dirname(file_path)) #os.path.dirname(path)返回给定路径中目录部分(去掉最后一个)
|
||||||
img_file = os.path.join(file_base_path, md_img_url)
|
|
||||||
img_file_name = os.path.basename(img_file)
|
post_id = mysql_publisher.publish_post(file_base_name, md_source_text, category_name)
|
||||||
oss_url = uploader.upload_file(key=file_base_name+'-'+img_file_name, file_path=img_file)
|
print('发布成功 --> ' + file_base_name + ' - ' + str(post_id))
|
||||||
md_source_text = md_source_text.replace('](' + md_img_url + ')', '](' + oss_url + ')')
|
|
||||||
post_id = mysql_publisher.publish_post(file_base_name, md_source_text, category_name)
|
|
||||||
print('发布成功 --> ' + file_base_name + ' - ' + str(post_id))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
logging.basicConfig(level='ERROR')
|
logging.basicConfig(level='ERROR')
|
||||||
files = scan_files(config.base_folder, config.exclude_folders)
|
|
||||||
|
# 获取 base_folder 和 exclude_folders 配置
|
||||||
|
base_folder = os.getenv('BASE_FOLDER')
|
||||||
|
exclude_folders = os.getenv('EXCLUDE_FOLDERS', '').split(',')
|
||||||
|
|
||||||
|
files = scan_files(base_folder, exclude_folders)
|
||||||
|
|
||||||
for md_file in files:
|
for md_file in files:
|
||||||
|
# 根据需要选择使用哪种发布方式:
|
||||||
# execute_flow_with_typecho_xmlrpc(md_file)
|
# execute_flow_with_typecho_xmlrpc(md_file)
|
||||||
execute_flow_with_typecho_mysql(md_file)
|
execute_flow_with_typecho_mysql(md_file)
|
||||||
|
@ -1,27 +0,0 @@
|
|||||||
import os
|
|
||||||
from os import path
|
|
||||||
|
|
||||||
|
|
||||||
# md文件扫描
|
|
||||||
def __scaner_files(results, file_path, exclude_folders=[]):
|
|
||||||
file = os.listdir(file_path)
|
|
||||||
for f in file:
|
|
||||||
real_path = path.join(file_path, f)
|
|
||||||
if path.isfile(real_path):
|
|
||||||
if path.basename(real_path).endswith('.md'):
|
|
||||||
results.append(path.abspath(real_path))
|
|
||||||
# 如果是文件,则保存绝对路径
|
|
||||||
elif path.isdir(real_path):
|
|
||||||
# 如果是目录,则是递归
|
|
||||||
if path.basename(real_path) in exclude_folders:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
__scaner_files(results, real_path, exclude_folders)
|
|
||||||
else:
|
|
||||||
print("error")
|
|
||||||
|
|
||||||
|
|
||||||
def scan_files(file_path, exclude_folders):
|
|
||||||
results = []
|
|
||||||
__scaner_files(results, file_path, exclude_folders)
|
|
||||||
return results
|
|
@ -1,30 +0,0 @@
|
|||||||
import io
|
|
||||||
import os.path
|
|
||||||
|
|
||||||
import panflute
|
|
||||||
import pypandoc
|
|
||||||
|
|
||||||
|
|
||||||
# 读取md图片地址
|
|
||||||
def __prepare(doc):
|
|
||||||
doc.images = []
|
|
||||||
doc.links = []
|
|
||||||
|
|
||||||
|
|
||||||
def __action(elem, doc):
|
|
||||||
if isinstance(elem, panflute.Image):
|
|
||||||
doc.images.append(elem)
|
|
||||||
elif isinstance(elem, panflute.Link):
|
|
||||||
doc.links.append(elem)
|
|
||||||
|
|
||||||
|
|
||||||
def scan_imgs(file_path):
|
|
||||||
data = pypandoc.convert_file(file_path, 'json')
|
|
||||||
doc = panflute.load(io.StringIO(data))
|
|
||||||
doc.images = []
|
|
||||||
doc.links = []
|
|
||||||
doc = panflute.run_filter(__action, prepare=__prepare, doc=doc)
|
|
||||||
results = []
|
|
||||||
for image in doc.images:
|
|
||||||
results.append(image.url)
|
|
||||||
return results
|
|
@ -1,3 +1,4 @@
|
|||||||
|
#typecho_direct_mysql_publisher.py
|
||||||
import pymysql
|
import pymysql
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#typecho_xmlrpc_publisher.py
|
||||||
# typecho api调用
|
# typecho api调用
|
||||||
from pytypecho import Post, Typecho
|
from pytypecho import Post, Typecho
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user