zbparse/flask_app/main/format_change.py

69 lines
2.6 KiB
Python
Raw Normal View History

2024-08-29 16:37:09 +08:00
import json
import os
import requests
2024-08-29 17:30:49 +08:00
from flask_app.main.download import download_file
2024-08-29 16:37:09 +08:00
def upload_file(file_path, url):
receive_file_url = ""
# 定义文件名和路径
filename = file_path.split('/')[-1]
# 打开文件以二进制形式读取
with open(file_path, 'rb') as f:
# 使用multipart/form-data格式发送文件
files = {'file': (filename, f)}
# 发送POST请求
response = requests.post(url, files=files)
# 检查响应状态码
if response.status_code == 200:
print("文件上传成功")
receive_file_response = response.content.decode('utf-8')
receive_file_json = json.loads(receive_file_response)
receive_file_url = receive_file_json["data"]
else:
print(f"文件上传失败,状态码: {response.status_code}")
print(response.text)
return receive_file_url
def get_filename_and_folder(file_path):
# 使用os.path.basename获取文件名
filename = os.path.splitext(os.path.basename(file_path))[0] #ztb_tobidders_notice_table,不包括扩展名
# 使用os.path.dirname获取文件所在的完整目录路径再次使用basename获取文件夹名称
directory = os.path.dirname(file_path)
return filename, directory
#参数为要转换的文件路径,以及输出的文件名,文件类型为自动补全。
def pdf2docx(local_path_in):
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/p2d'
receive_download_url = upload_file(local_path_in, remote_url)
filename, folder = get_filename_and_folder(local_path_in) #输入输出在同一个文件夹
local_path_out=os.path.join(folder,filename) #输出文件名
downloaded_filepath,file_type=download_file(receive_download_url, local_path_out)
print("have downloaded file to:",downloaded_filepath)
return downloaded_filepath
def docx2pdf(local_path_in):
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2p'
receive_download_url = upload_file(local_path_in, remote_url)
filename, folder = get_filename_and_folder(local_path_in) # 输入输出在同一个文件夹
local_path_out = os.path.join(folder, filename) # 输出文件名
downloaded_filepath,file_type = download_file(receive_download_url, local_path_out)
print("have downloaded file to:", downloaded_filepath)
return downloaded_filepath
if __name__ == '__main__':
# 替换为你的文件路径和API URL
2024-09-03 09:36:18 +08:00
local_path_in="C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\test111.pdf"
2024-08-29 16:37:09 +08:00
# pdf2docx(local_path_in)
downloaded_file=docx2pdf(local_path_in)
print(downloaded_file)