zbparse/flask_app/general/format_change.py

183 lines
7.1 KiB
Python
Raw Normal View History

2024-08-29 16:37:09 +08:00
import json
import os
import requests
2024-08-29 17:30:49 +08:00
from flask_app.main.download import download_file
2024-08-29 16:37:09 +08:00
def upload_file(file_path, url):
receive_file_url = ""
# 定义文件名和路径
filename = file_path.split('/')[-1]
# 打开文件以二进制形式读取
with open(file_path, 'rb') as f:
# 使用multipart/form-data格式发送文件
files = {'file': (filename, f)}
# 发送POST请求
response = requests.post(url, files=files)
# 检查响应状态码
if response.status_code == 200:
2024-09-13 15:03:55 +08:00
print("format_change 文件上传成功")
2024-08-29 16:37:09 +08:00
receive_file_response = response.content.decode('utf-8')
receive_file_json = json.loads(receive_file_response)
receive_file_url = receive_file_json["data"]
else:
2024-09-13 15:03:55 +08:00
print(f"format_change 文件上传失败,状态码: {response.status_code}")
print(f"format_change {response.text}")
2024-08-29 16:37:09 +08:00
return receive_file_url
def get_filename_and_folder(file_path):
# 使用os.path.basename获取文件名
filename = os.path.splitext(os.path.basename(file_path))[0] #ztb_tobidders_notice_table,不包括扩展名
# 使用os.path.dirname获取文件所在的完整目录路径再次使用basename获取文件夹名称
directory = os.path.dirname(file_path)
return filename, directory
#参数为要转换的文件路径,以及输出的文件名,文件类型为自动补全。
def pdf2docx(local_path_in):
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/p2d'
2024-10-12 18:01:59 +08:00
receive_download_url = upload_file(local_path_in, remote_url) #转换完成,得到下载链接
2024-10-24 10:56:33 +08:00
print(receive_download_url)
2024-10-12 18:01:59 +08:00
filename, folder = get_filename_and_folder(local_path_in) #输入输出在同一个文件夹
local_filename=os.path.join(folder,filename) #输出文件名 C:\Users\Administrator\Desktop\货物标\zbfiles\6.2定版视频会议磋商文件 不带后缀
downloaded_filepath,file_type=download_file(receive_download_url, local_filename)
2024-09-13 15:03:55 +08:00
print(f"format_change p2d:have downloaded file to: {downloaded_filepath}")
2024-08-29 16:37:09 +08:00
return downloaded_filepath
2024-10-24 20:51:48 +08:00
# def doc2docx(local_path_in):
# remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2d'
# receive_download_url = upload_file(local_path_in, remote_url)
# print(receive_download_url)
# filename, folder = get_filename_and_folder(local_path_in) # 输入输出在同一个文件夹
# local_filename = os.path.join(folder, filename) # 输出文件名
# downloaded_filepath, file_type = download_file(receive_download_url, local_filename)
# print(f"format_change d2d:have downloaded file to: {downloaded_filepath}")
# return downloaded_filepath
# def docx2pdf(local_path_in):
# remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2p'
# receive_download_url = upload_file(local_path_in, remote_url)
# filename, folder = get_filename_and_folder(local_path_in) # 输入输出在同一个文件夹
# local_filename = os.path.join(folder, filename) # 输出文件名
# downloaded_filepath,file_type = download_file(receive_download_url, local_filename)
# print(f"format_change d2p:have downloaded file to: {downloaded_filepath}")
# return downloaded_filepath
def docx2pdf(file_path):
"""
将本地的 .docx .doc 文件转换为 .pdf 文件
参数
- file_path: str, 本地文件的路径支持 .docx .doc 格式
"""
# 检查文件是否存在
if not os.path.isfile(file_path):
raise FileNotFoundError(f"文件未找到: {file_path}")
# 获取文件名和扩展名
base_name = os.path.basename(file_path)
name, ext = os.path.splitext(base_name)
ext = ext.lower().lstrip('.')
if ext not in ['docx', 'doc']:
raise ValueError(f"doc2pdf 仅支持 .docx 和 .doc 文件,当前文件扩展名为: .{ext}")
# 定义转换接口
endpoint = 'http://120.26.236.97:5008/convert_to_pdf'
# endpoint = 'http://192.168.0.2:5008/convert_to_pdf'
# 获取文件所在目录
output_dir = os.path.dirname(file_path)
# 准备上传的文件
with open(file_path, 'rb') as f:
files = {'file': (base_name, f)}
try:
print(f"正在将 {base_name} 转换为 .pdf 格式...")
response = requests.post(endpoint, files=files)
response.raise_for_status() # 检查请求是否成功
except requests.RequestException as e:
print(f"转换过程中发生错误: {e}")
return
# 准备保存转换后文件的路径
output_file_name = f"{name}.pdf"
output_path = os.path.join(output_dir, output_file_name)
# 保存转换后的文件
with open(output_path, 'wb') as out_file:
out_file.write(response.content)
print(f"文件已成功转换并保存至: {output_path}")
return output_path
def doc2docx(file_path):
"""
将本地的 .doc 文件转换为 .docx 文件
参数
- file_path: str, 本地文件的路径支持 .doc 格式
"""
# 检查文件是否存在
if not os.path.isfile(file_path):
raise FileNotFoundError(f"文件未找到: {file_path}")
# 获取文件名和扩展名
base_name = os.path.basename(file_path)
name, ext = os.path.splitext(base_name)
ext = ext.lower().lstrip('.')
if ext != 'doc':
raise ValueError(f"doc2docx 仅支持 .doc 文件,当前文件扩展名为: .{ext}")
# 定义转换接口
endpoint = 'http://120.26.236.97:5008/convert_to_docx'
# 获取文件所在目录
output_dir = os.path.dirname(file_path)
# 准备上传的文件
with open(file_path, 'rb') as f:
files = {'file': (base_name, f)}
try:
print(f"正在将 {base_name} 转换为 .docx 格式...")
response = requests.post(endpoint, files=files)
response.raise_for_status() # 检查请求是否成功
except requests.RequestException as e:
print(f"转换过程中发生错误: {e}")
return
# 准备保存转换后文件的路径
output_file_name = f"{name}.docx"
output_path = os.path.join(output_dir, output_file_name)
# 保存转换后的文件
with open(output_path, 'wb') as out_file:
out_file.write(response.content)
print(f"文件已成功转换并保存至: {output_path}")
return output_path
2024-10-23 20:33:41 +08:00
2024-08-29 16:37:09 +08:00
if __name__ == '__main__':
# 替换为你的文件路径和API URL
2024-10-24 10:56:33 +08:00
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\1fbbb6ff-7ddc-40bb-8857-b7de37aece3f\\兴欣工程.pdf"
2024-10-24 20:51:48 +08:00
# local_path_in = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b151fcd0-4cd8-49b4-8de3-964057a9e653\\ztbfile.docx"
2024-10-24 15:52:35 +08:00
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf"
2024-10-29 20:40:14 +08:00
# local_path_in ="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\招标文件.pdf"
local_path_in="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\6.2定版视频会议磋商文件.doc"
2024-10-23 20:33:41 +08:00
# downloaded_file=doc2docx(local_path_in)
2024-10-29 20:40:14 +08:00
# downloaded_file=pdf2docx(local_path_in)
downloaded_file=docx2pdf(local_path_in)
2024-10-24 20:51:48 +08:00
print(downloaded_file)
2024-08-29 16:37:09 +08:00