zbparse/flask_app/main/download.py

import requests
import mimetypes


def download_file(url, local_filename):
    try:
        with requests.get(url, stream=True) as response:
            response.raise_for_status()  # 确保请求成功，否则抛出异常

            # 获取文件类型并设置适当的文件扩展名
            content_type = response.headers.get('Content-Type')
            extension = mimetypes.guess_extension(content_type, strict=False)
            if not extension:
                # 如果无法猜测扩展名，默认使用 .docx
                extension = '.docx'
            full_filename = local_filename + extension  # 追加扩展名

            with open(full_filename, 'wb') as file:
                for chunk in response.iter_content(chunk_size=8192):
                    file.write(chunk)

            # 根据扩展名返回对应的值
            if extension == '.docx':
                return full_filename,1
            elif extension == '.pdf':
                return full_filename,2
            else:
                return full_filename,3
    except requests.HTTPError as e:
        print(f"HTTP Error: {e}")
        return None
    except requests.RequestException as e:
        print(f"Error downloading the file: {e}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

if __name__ == '__main__':
    # 测试下载的URL
    test_url ="https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1725019436&OSSAccessKeyId=TMP.3KjfvBwPjtUPCu4BTNdkuN6BEvSbm1ibnrnTQX4ZdpSjCLX99a2Pq9bV52aA8JysVrbCZwhyuVjeMdJgdgxkqgPhwQfQoV&Signature=kXhJZZouEb82jQlhCwCpbm5%2Furs%3D"
    local_file_name = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output\\downloaded_file'
    file_path = download_file(test_url, local_file_name)
    if file_path:
        print(f"Downloaded file path: {file_path}")
8.29 2024-08-29 16:37:09 +08:00			`import requests`
			`import mimetypes`


			`def download_file(url, local_filename):`
			`try:`
			`with requests.get(url, stream=True) as response:`
			`response.raise_for_status() # 确保请求成功，否则抛出异常`

			`# 获取文件类型并设置适当的文件扩展名`
			`content_type = response.headers.get('Content-Type')`
			`extension = mimetypes.guess_extension(content_type, strict=False)`
			`if not extension:`
			`# 如果无法猜测扩展名，默认使用 .docx`
			`extension = '.docx'`
			`full_filename = local_filename + extension # 追加扩展名`

			`with open(full_filename, 'wb') as file:`
			`for chunk in response.iter_content(chunk_size=8192):`
			`file.write(chunk)`

			`# 根据扩展名返回对应的值`
			`if extension == '.docx':`
			`return full_filename,1`
			`elif extension == '.pdf':`
			`return full_filename,2`
			`else:`
			`return full_filename,3`
			`except requests.HTTPError as e:`
			`print(f"HTTP Error: {e}")`
			`return None`
			`except requests.RequestException as e:`
			`print(f"Error downloading the file: {e}")`
			`return None`
			`except Exception as e:`
			`print(f"An error occurred: {e}")`
			`return None`

			`if __name__ == '__main__':`
			`# 测试下载的URL`
8.30 2024-08-30 11:56:11 +08:00			`test_url ="https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1725019436&OSSAccessKeyId=TMP.3KjfvBwPjtUPCu4BTNdkuN6BEvSbm1ibnrnTQX4ZdpSjCLX99a2Pq9bV52aA8JysVrbCZwhyuVjeMdJgdgxkqgPhwQfQoV&Signature=kXhJZZouEb82jQlhCwCpbm5%2Furs%3D"`
8.29 2024-08-29 16:37:09 +08:00			`local_file_name = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output\\downloaded_file'`
			`file_path = download_file(test_url, local_file_name)`
			`if file_path:`
			`print(f"Downloaded file path: {file_path}")`