zbparse/flask_app/main/download.py
2024-10-14 10:52:31 +08:00

71 lines
2.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import requests
import mimetypes
def download_file(url, local_filename):
"""
下载文件并保存到本地基于Content-Type设置文件扩展名。
参数:
- url (str): 文件的URL地址。
- local_filename (str): 本地保存的文件名(不含扩展名)。
返回:
- tuple: (完整文件名, 文件类型代码)
文件类型代码:
1 - .docx
2 - .pdf
3 - .doc
4 - 其他
- None: 下载失败
"""
try:
with requests.get(url, stream=True) as response:
response.raise_for_status() # 确保请求成功,否则抛出异常
# 获取Content-Type并猜测文件扩展名
content_type = response.headers.get('Content-Type', '')
extension = mimetypes.guess_extension(content_type, strict=False) or '.docx'
# 分离文件名和现有扩展名
base, ext = os.path.splitext(local_filename)
if ext.lower() != extension:
full_filename = base + extension
else:
full_filename = local_filename
# 下载并保存文件
with open(full_filename, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
if chunk: # 避免写入空块
file.write(chunk)
# 定义扩展名到代码的映射
extension_mapping = {
'.docx': 1,
'.pdf': 2,
'.doc': 3
}
file_code = extension_mapping.get(extension.lower(), 4)
return full_filename, file_code
except requests.HTTPError as e:
print(f"download: HTTP 错误: {e}")
except requests.RequestException as e:
print(f"download: 下载文件时出错: {e}")
except Exception as e:
print(f"download: 发生错误: {e}")
return None
if __name__ == '__main__':
# 测试下载的URL
test_url ="https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/test/01%20%E6%8B%9B%E6%A0%87%E6%96%87%E4%BB%B6-%E5%A4%A7%E8%8D%94%E5%8E%BF%E5%85%AC%E5%AE%89%E5%B1%80%E6%83%85%E6%8C%87%E5%8B%A4%E8%88%86%E4%B8%80%E4%BD%93%E5%8C%96%E5%B9%B3%E5%8F%B0%E5%BB%BA%E8%AE%BE%E9%A1%B9%E7%9B%AE%28%E4%BA%8C%E6%AC%A1%29.pdf?Expires=1728751377&OSSAccessKeyId=TMP.3KeYhAGeJr2LGiNctSPvSmdSwxhrU8pbaDYRJcNCCgv8ijyWN613QahKb3nhXydfAvHaqpw4nTHXMzq7hmTHmNnPA77DgL&Signature=mwPHW8v7dVmHP1udTDL%2ByzllwCE%3D"
local_file_name = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output\\testdownload'
file_path,file_type = download_file(test_url, local_file_name)
if file_path:
print(f"Downloaded file path: {file_path}")
print(file_type)