zbparse/flask_app/main/转化格式/main_pdf_to_docx.py

import time

from submit_conversion import submit_conversion_task
from check_status import get_download_url
from download import download_file

def download_pdf_convert_docx(url, downloaded_filename):
    """
    Download a PDF from a URL, convert it to a DOCX, and save it locally.

    Args:
    url (str): The URL of the PDF to be downloaded.
    downloaded_filename (str): The filename to save the converted DOCX as.
    """
    # 提交转换任务并获取task_id
    task_id = submit_conversion_task(url)
    if task_id:
        download_url = None
        # 使用while循环进行每秒的查询
        while not download_url:
            time.sleep(0.5)
            download_url = get_download_url(task_id)

        # 如果得到下载链接
        if download_url:
            # 下载文件
            download_file(download_url, downloaded_filename)
            print(f'File downloaded and saved as {downloaded_filename}')
        else:
            print("Failed to get download URL.")
    else:
        print("Failed to submit conversion task.")

if __name__ == "__main__":
    # PDF文件URL
    pdf_url = "https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/pdf/02cf0a7a8cda432a8ba7a929862510eb.pdf?Expires=1724035295&OSSAccessKeyId=TMP.3Kj9nRWk3bspYRpZJJeKSSDjuoiSsd1SYBnHtac62JciczGbftutcSUcM5RpLTQNQXeANRNbdSxK2VnX9cQZ9bUgR3dWDv&Signature=MJfXEZe1fy5CEIoJ1IxhliSv0Ss%3D"
    # pdf_url="C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\zbfile.pdf"
    downloaded_filename = 'downloaded_document.docx'
    download_pdf_convert_docx(pdf_url, downloaded_filename)
8.29 2024-08-29 16:37:09 +08:00			`import time`

			`from submit_conversion import submit_conversion_task`
			`from check_status import get_download_url`
			`from download import download_file`

			`def download_pdf_convert_docx(url, downloaded_filename):`
			`"""`
			`Download a PDF from a URL, convert it to a DOCX, and save it locally.`

			`Args:`
			`url (str): The URL of the PDF to be downloaded.`
			`downloaded_filename (str): The filename to save the converted DOCX as.`
			`"""`
			`# 提交转换任务并获取task_id`
			`task_id = submit_conversion_task(url)`
			`if task_id:`
			`download_url = None`
			`# 使用while循环进行每秒的查询`
			`while not download_url:`
			`time.sleep(0.5)`
			`download_url = get_download_url(task_id)`

			`# 如果得到下载链接`
			`if download_url:`
			`# 下载文件`
			`download_file(download_url, downloaded_filename)`
			`print(f'File downloaded and saved as {downloaded_filename}')`
			`else:`
			`print("Failed to get download URL.")`
			`else:`
			`print("Failed to submit conversion task.")`

			`if __name__ == "__main__":`
			`# PDF文件URL`
			`pdf_url = "https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/pdf/02cf0a7a8cda432a8ba7a929862510eb.pdf?Expires=1724035295&OSSAccessKeyId=TMP.3Kj9nRWk3bspYRpZJJeKSSDjuoiSsd1SYBnHtac62JciczGbftutcSUcM5RpLTQNQXeANRNbdSxK2VnX9cQZ9bUgR3dWDv&Signature=MJfXEZe1fy5CEIoJ1IxhliSv0Ss%3D"`
			`# pdf_url="C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\zbfile.pdf"`
			`downloaded_filename = 'downloaded_document.docx'`
			`download_pdf_convert_docx(pdf_url, downloaded_filename)`