zbparse/flask_app/main/转化格式/main_pdf_to_docx.py

40 lines
1.6 KiB
Python
Raw Normal View History

2024-08-29 16:37:09 +08:00
import time
from submit_conversion import submit_conversion_task
from check_status import get_download_url
from download import download_file
def download_pdf_convert_docx(url, downloaded_filename):
"""
Download a PDF from a URL, convert it to a DOCX, and save it locally.
Args:
url (str): The URL of the PDF to be downloaded.
downloaded_filename (str): The filename to save the converted DOCX as.
"""
# 提交转换任务并获取task_id
task_id = submit_conversion_task(url)
if task_id:
download_url = None
# 使用while循环进行每秒的查询
while not download_url:
time.sleep(0.5)
download_url = get_download_url(task_id)
# 如果得到下载链接
if download_url:
# 下载文件
download_file(download_url, downloaded_filename)
print(f'File downloaded and saved as {downloaded_filename}')
else:
print("Failed to get download URL.")
else:
print("Failed to submit conversion task.")
if __name__ == "__main__":
# PDF文件URL
pdf_url = "https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/pdf/02cf0a7a8cda432a8ba7a929862510eb.pdf?Expires=1724035295&OSSAccessKeyId=TMP.3Kj9nRWk3bspYRpZJJeKSSDjuoiSsd1SYBnHtac62JciczGbftutcSUcM5RpLTQNQXeANRNbdSxK2VnX9cQZ9bUgR3dWDv&Signature=MJfXEZe1fy5CEIoJ1IxhliSv0Ss%3D"
# pdf_url="C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\zbfile.pdf"
downloaded_filename = 'downloaded_document.docx'
download_pdf_convert_docx(pdf_url, downloaded_filename)