zbparse/flask_app/general/format_change.py
2024-10-24 17:12:48 +08:00

180 lines
6.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import os
import requests
from flask_app.main.download import download_file
def upload_file(file_path, url):
receive_file_url = ""
# 定义文件名和路径
filename = file_path.split('/')[-1]
# 打开文件以二进制形式读取
with open(file_path, 'rb') as f:
# 使用multipart/form-data格式发送文件
files = {'file': (filename, f)}
# 发送POST请求
response = requests.post(url, files=files)
# 检查响应状态码
if response.status_code == 200:
print("format_change 文件上传成功")
receive_file_response = response.content.decode('utf-8')
receive_file_json = json.loads(receive_file_response)
receive_file_url = receive_file_json["data"]
else:
print(f"format_change 文件上传失败,状态码: {response.status_code}")
print(f"format_change {response.text}")
return receive_file_url
def get_filename_and_folder(file_path):
# 使用os.path.basename获取文件名
filename = os.path.splitext(os.path.basename(file_path))[0] #ztb_tobidders_notice_table,不包括扩展名
# 使用os.path.dirname获取文件所在的完整目录路径再次使用basename获取文件夹名称
directory = os.path.dirname(file_path)
return filename, directory
#参数为要转换的文件路径,以及输出的文件名,文件类型为自动补全。
def pdf2docx(local_path_in):
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/p2d'
receive_download_url = upload_file(local_path_in, remote_url) #转换完成,得到下载链接
print(receive_download_url)
filename, folder = get_filename_and_folder(local_path_in) #输入输出在同一个文件夹
local_filename=os.path.join(folder,filename) #输出文件名 C:\Users\Administrator\Desktop\货物标\zbfiles\6.2定版视频会议磋商文件 不带后缀
downloaded_filepath,file_type=download_file(receive_download_url, local_filename)
print(f"format_change p2d:have downloaded file to: {downloaded_filepath}")
return downloaded_filepath
def doc2docx(local_path_in):
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2d'
receive_download_url = upload_file(local_path_in, remote_url)
print(receive_download_url)
filename, folder = get_filename_and_folder(local_path_in) # 输入输出在同一个文件夹
local_filename = os.path.join(folder, filename) # 输出文件名
downloaded_filepath, file_type = download_file(receive_download_url, local_filename)
print(f"format_change d2d:have downloaded file to: {downloaded_filepath}")
return downloaded_filepath
def docx2pdf(local_path_in):
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2p'
receive_download_url = upload_file(local_path_in, remote_url)
filename, folder = get_filename_and_folder(local_path_in) # 输入输出在同一个文件夹
local_filename = os.path.join(folder, filename) # 输出文件名
downloaded_filepath,file_type = download_file(receive_download_url, local_filename)
print(f"format_change d2p:have downloaded file to: {downloaded_filepath}")
return downloaded_filepath
# def docx2pdf(file_path):
# """
# 将本地的 .docx 或 .doc 文件转换为 .pdf 文件。
#
# 参数:
# - file_path: str, 本地文件的路径,支持 .docx 和 .doc 格式。
# """
# # 检查文件是否存在
# if not os.path.isfile(file_path):
# raise FileNotFoundError(f"文件未找到: {file_path}")
#
# # 获取文件名和扩展名
# base_name = os.path.basename(file_path)
# name, ext = os.path.splitext(base_name)
# ext = ext.lower().lstrip('.')
#
# if ext not in ['docx', 'doc']:
# raise ValueError(f"doc2pdf 仅支持 .docx 和 .doc 文件,当前文件扩展名为: .{ext}")
#
# # 定义转换接口
# endpoint = 'http://120.26.236.97:5008/convert_to_pdf'
#
# # 获取文件所在目录
# output_dir = os.path.dirname(file_path)
#
# # 准备上传的文件
# with open(file_path, 'rb') as f:
# files = {'file': (base_name, f)}
# try:
# print(f"正在将 {base_name} 转换为 .pdf 格式...")
# response = requests.post(endpoint, files=files)
# response.raise_for_status() # 检查请求是否成功
# except requests.RequestException as e:
# print(f"转换过程中发生错误: {e}")
# return
#
# # 准备保存转换后文件的路径
# output_file_name = f"{name}.pdf"
# output_path = os.path.join(output_dir, output_file_name)
#
# # 保存转换后的文件
# with open(output_path, 'wb') as out_file:
# out_file.write(response.content)
#
# print(f"文件已成功转换并保存至: {output_path}")
# return output_path
#
#
# def doc2docx(file_path):
# """
# 将本地的 .doc 文件转换为 .docx 文件。
#
# 参数:
# - file_path: str, 本地文件的路径,支持 .doc 格式。
# """
# # 检查文件是否存在
# if not os.path.isfile(file_path):
# raise FileNotFoundError(f"文件未找到: {file_path}")
#
# # 获取文件名和扩展名
# base_name = os.path.basename(file_path)
# name, ext = os.path.splitext(base_name)
# ext = ext.lower().lstrip('.')
#
# if ext != 'doc':
# raise ValueError(f"doc2docx 仅支持 .doc 文件,当前文件扩展名为: .{ext}")
#
# # 定义转换接口
# endpoint = 'http://120.26.236.97:5008/convert_to_docx'
#
# # 获取文件所在目录
# output_dir = os.path.dirname(file_path)
#
# # 准备上传的文件
# with open(file_path, 'rb') as f:
# files = {'file': (base_name, f)}
# try:
# print(f"正在将 {base_name} 转换为 .docx 格式...")
# response = requests.post(endpoint, files=files)
# response.raise_for_status() # 检查请求是否成功
# except requests.RequestException as e:
# print(f"转换过程中发生错误: {e}")
# return
#
# # 准备保存转换后文件的路径
# output_file_name = f"{name}.docx"
# output_path = os.path.join(output_dir, output_file_name)
#
# # 保存转换后的文件
# with open(output_path, 'wb') as out_file:
# out_file.write(response.content)
#
# print(f"文件已成功转换并保存至: {output_path}")
# return output_path
if __name__ == '__main__':
# 替换为你的文件路径和API URL
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\1fbbb6ff-7ddc-40bb-8857-b7de37aece3f\\兴欣工程.pdf"
local_path_in = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b151fcd0-4cd8-49b4-8de3-964057a9e653\\ztbfile.docx"
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf"
# downloaded_file=doc2docx(local_path_in)
# downloaded_file=pdf2docx(local_path_in)
for i in range(1):
downloaded_file=docx2pdf(local_path_in)
print(downloaded_file)