40 lines
1.6 KiB
Python
40 lines
1.6 KiB
Python
|
import time
|
||
|
|
||
|
from submit_conversion import submit_conversion_task
|
||
|
from check_status import get_download_url
|
||
|
from download import download_file
|
||
|
|
||
|
def download_pdf_convert_docx(url, downloaded_filename):
|
||
|
"""
|
||
|
Download a PDF from a URL, convert it to a DOCX, and save it locally.
|
||
|
|
||
|
Args:
|
||
|
url (str): The URL of the PDF to be downloaded.
|
||
|
downloaded_filename (str): The filename to save the converted DOCX as.
|
||
|
"""
|
||
|
# 提交转换任务并获取task_id
|
||
|
task_id = submit_conversion_task(url)
|
||
|
if task_id:
|
||
|
download_url = None
|
||
|
# 使用while循环进行每秒的查询
|
||
|
while not download_url:
|
||
|
time.sleep(0.5)
|
||
|
download_url = get_download_url(task_id)
|
||
|
|
||
|
# 如果得到下载链接
|
||
|
if download_url:
|
||
|
# 下载文件
|
||
|
download_file(download_url, downloaded_filename)
|
||
|
print(f'File downloaded and saved as {downloaded_filename}')
|
||
|
else:
|
||
|
print("Failed to get download URL.")
|
||
|
else:
|
||
|
print("Failed to submit conversion task.")
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
# PDF文件URL
|
||
|
pdf_url = "https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/pdf/02cf0a7a8cda432a8ba7a929862510eb.pdf?Expires=1724035295&OSSAccessKeyId=TMP.3Kj9nRWk3bspYRpZJJeKSSDjuoiSsd1SYBnHtac62JciczGbftutcSUcM5RpLTQNQXeANRNbdSxK2VnX9cQZ9bUgR3dWDv&Signature=MJfXEZe1fy5CEIoJ1IxhliSv0Ss%3D"
|
||
|
# pdf_url="C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\zbfile.pdf"
|
||
|
downloaded_filename = 'downloaded_document.docx'
|
||
|
download_pdf_convert_docx(pdf_url, downloaded_filename)
|