69 lines
2.7 KiB
Python
69 lines
2.7 KiB
Python
import requests
|
||
import mimetypes
|
||
|
||
|
||
def download_file(url, local_filename):
|
||
"""
|
||
下载文件并保存到本地,基于Content-Type设置文件扩展名。
|
||
|
||
参数:
|
||
- url (str): 文件的URL地址。
|
||
- local_filename (str): 本地保存的文件名(不含扩展名)。
|
||
|
||
返回:
|
||
- tuple: (完整文件名, 文件类型代码)
|
||
文件类型代码:
|
||
1 - .docx
|
||
2 - .pdf
|
||
3 - .doc
|
||
4 - 其他
|
||
- None: 下载失败
|
||
"""
|
||
try:
|
||
with requests.get(url, stream=True) as response:
|
||
response.raise_for_status() # 确保请求成功,否则抛出异常
|
||
|
||
# 获取Content-Type并猜测文件扩展名
|
||
content_type = response.headers.get('Content-Type', '')
|
||
extension = mimetypes.guess_extension(content_type, strict=False) or '.docx'
|
||
|
||
# 分离文件名和现有扩展名
|
||
base, ext = os.path.splitext(local_filename)
|
||
if ext.lower() != extension:
|
||
full_filename = base + extension
|
||
else:
|
||
full_filename = local_filename
|
||
|
||
# 下载并保存文件
|
||
with open(full_filename, 'wb') as file:
|
||
for chunk in response.iter_content(chunk_size=8192):
|
||
if chunk: # 避免写入空块
|
||
file.write(chunk)
|
||
|
||
# 定义扩展名到代码的映射
|
||
extension_mapping = {
|
||
'.docx': 1,
|
||
'.pdf': 2,
|
||
'.doc': 3
|
||
}
|
||
file_code = extension_mapping.get(extension.lower(), 4)
|
||
return full_filename, file_code
|
||
|
||
except requests.HTTPError as e:
|
||
print(f"download: HTTP 错误: {e}")
|
||
except requests.RequestException as e:
|
||
print(f"download: 下载文件时出错: {e}")
|
||
except Exception as e:
|
||
print(f"download: 发生错误: {e}")
|
||
|
||
return None
|
||
|
||
if __name__ == '__main__':
|
||
# 测试下载的URL
|
||
test_url ="https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/test/01%20%E6%8B%9B%E6%A0%87%E6%96%87%E4%BB%B6-%E5%A4%A7%E8%8D%94%E5%8E%BF%E5%85%AC%E5%AE%89%E5%B1%80%E6%83%85%E6%8C%87%E5%8B%A4%E8%88%86%E4%B8%80%E4%BD%93%E5%8C%96%E5%B9%B3%E5%8F%B0%E5%BB%BA%E8%AE%BE%E9%A1%B9%E7%9B%AE%28%E4%BA%8C%E6%AC%A1%29.pdf?Expires=1728751377&OSSAccessKeyId=TMP.3KeYhAGeJr2LGiNctSPvSmdSwxhrU8pbaDYRJcNCCgv8ijyWN613QahKb3nhXydfAvHaqpw4nTHXMzq7hmTHmNnPA77DgL&Signature=mwPHW8v7dVmHP1udTDL%2ByzllwCE%3D"
|
||
local_file_name = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output\\testdownload'
|
||
file_path,file_type = download_file(test_url, local_file_name)
|
||
if file_path:
|
||
print(f"Downloaded file path: {file_path}")
|
||
print(file_type)
|