2024-08-29 16:37:09 +08:00
|
|
|
|
import logging
|
2024-10-16 20:18:55 +08:00
|
|
|
|
import re
|
2024-08-29 16:37:09 +08:00
|
|
|
|
import shutil
|
|
|
|
|
import time
|
|
|
|
|
import uuid
|
|
|
|
|
from datetime import datetime, timedelta
|
2024-09-11 12:02:09 +08:00
|
|
|
|
from flask import Flask, request, jsonify, Response, stream_with_context, g
|
2024-08-29 16:37:09 +08:00
|
|
|
|
import json
|
|
|
|
|
import os
|
2024-08-29 17:30:49 +08:00
|
|
|
|
from flask_app.main.download import download_file
|
2024-10-12 18:01:59 +08:00
|
|
|
|
from flask_app.main.招标文件解析 import engineering_bid_main
|
|
|
|
|
from flask_app.货物标.货物标解析main import goods_bid_main
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
app = Flask(__name__)
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
|
|
|
|
|
2024-08-29 16:37:09 +08:00
|
|
|
|
class CSTFormatter(logging.Formatter):
|
|
|
|
|
"""自定义的 Formatter,将日志的时间戳调整为中国标准时间(UTC+8)"""
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-08-29 16:37:09 +08:00
|
|
|
|
def formatTime(self, record, datefmt=None):
|
|
|
|
|
ct = datetime.fromtimestamp(record.created) + timedelta(hours=8)
|
|
|
|
|
if datefmt:
|
|
|
|
|
s = ct.strftime(datefmt)
|
|
|
|
|
else:
|
|
|
|
|
try:
|
|
|
|
|
s = ct.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
if self.usesTime():
|
|
|
|
|
s = f"{s},{record.msecs:03d}"
|
|
|
|
|
except ValueError:
|
|
|
|
|
s = ct.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
return s
|
|
|
|
|
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-09-11 12:02:09 +08:00
|
|
|
|
@app.before_request
|
|
|
|
|
def before_request():
|
|
|
|
|
# 每个请求开始前初始化 logger
|
|
|
|
|
create_logger() # 确保这个函数中设置了 g.logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_logger():
|
|
|
|
|
unique_id = str(uuid.uuid4())
|
|
|
|
|
g.unique_id = unique_id
|
2024-09-13 16:05:16 +08:00
|
|
|
|
output_folder = f"flask_app/static/output/{unique_id}"
|
2024-09-11 12:02:09 +08:00
|
|
|
|
os.makedirs(output_folder, exist_ok=True)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
log_filename = "log.txt"
|
|
|
|
|
log_path = os.path.join(output_folder, log_filename)
|
2024-09-11 12:02:09 +08:00
|
|
|
|
logger = logging.getLogger(unique_id)
|
|
|
|
|
if not logger.handlers:
|
2024-08-29 16:37:09 +08:00
|
|
|
|
file_handler = logging.FileHandler(log_path)
|
|
|
|
|
file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
|
file_handler.setFormatter(file_formatter)
|
|
|
|
|
logger.addHandler(file_handler)
|
2024-09-11 12:02:09 +08:00
|
|
|
|
stream_handler = logging.StreamHandler()
|
|
|
|
|
stream_handler.setFormatter(logging.Formatter('%(message)s'))
|
2024-08-29 16:37:09 +08:00
|
|
|
|
logger.addHandler(stream_handler)
|
|
|
|
|
logger.setLevel(logging.INFO)
|
2024-09-11 12:02:09 +08:00
|
|
|
|
g.logger = logger
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-09-23 17:44:34 +08:00
|
|
|
|
# @app.route('/upload', methods=['POST'])
|
|
|
|
|
# def zbparse():
|
|
|
|
|
# logger=g.logger
|
|
|
|
|
# file_url = validate_request()
|
|
|
|
|
# if isinstance(file_url, tuple): # Check if the returned value is an error response
|
|
|
|
|
# return file_url
|
|
|
|
|
# try:
|
|
|
|
|
# logger.info("starting parsing url:" + file_url)
|
|
|
|
|
# final_json_path, output_folder= download_and_process_file(file_url)
|
|
|
|
|
# if not final_json_path:
|
|
|
|
|
# return jsonify({'error': 'File processing failed'}), 500
|
|
|
|
|
# response = generate_response(final_json_path) # 先获取响应内容
|
|
|
|
|
# # remove_directory(output_folder) # 然后删除文件夹
|
|
|
|
|
# return response # 最后返回获取的响应
|
|
|
|
|
# except Exception as e:
|
|
|
|
|
# logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
|
|
|
|
|
# return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
2024-10-12 18:01:59 +08:00
|
|
|
|
# def download_and_process_file(file_url):
|
|
|
|
|
# logger = g.logger
|
|
|
|
|
# unique_id = g.unique_id
|
|
|
|
|
# output_folder = f"flask_app/static/output/{unique_id}" # 直接使用全局 unique_id 构建路径
|
|
|
|
|
# filename = "ztbfile"
|
|
|
|
|
# downloaded_filename = os.path.join(output_folder, filename)
|
|
|
|
|
#
|
|
|
|
|
# # 下载文件,假设 download_file 函数已正确处理异常并返回文件路径
|
|
|
|
|
# downloaded_filepath, file_type = download_file(file_url, downloaded_filename)
|
|
|
|
|
#
|
|
|
|
|
# if downloaded_filepath is None or file_type == 3:
|
|
|
|
|
# logger.error("Unsupported file type or failed to download file")
|
|
|
|
|
# return None, output_folder
|
|
|
|
|
#
|
|
|
|
|
# logger.info("Local file path: " + downloaded_filepath)
|
|
|
|
|
# processed_file_path = engineering_bid_main(output_folder, downloaded_filepath, file_type, unique_id)
|
|
|
|
|
# return processed_file_path, output_folder
|
|
|
|
|
|
|
|
|
|
# def generate_response(final_json_path):
|
|
|
|
|
# logger = g.logger
|
|
|
|
|
# # 检查final_json_path是否为空或None
|
|
|
|
|
# if not final_json_path:
|
|
|
|
|
# logger.error('Empty or None path provided for final_json.')
|
|
|
|
|
# return jsonify({'error': 'No path provided for final_json.'}), 400
|
|
|
|
|
# if not os.path.exists(final_json_path):
|
|
|
|
|
# logger.error('final_json not found at path: ' + final_json_path)
|
|
|
|
|
# return jsonify({'error': 'final_json not found'}), 404
|
|
|
|
|
# with open(final_json_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
# logger.info('final_json_path:' + final_json_path)
|
|
|
|
|
# zbparse_data = json.load(f)
|
|
|
|
|
# json_str = json.dumps(zbparse_data, ensure_ascii=False)
|
|
|
|
|
# return jsonify({
|
|
|
|
|
# 'message': 'File uploaded and processed successfully',
|
|
|
|
|
# 'filename': os.path.basename(final_json_path),
|
|
|
|
|
# 'data': json_str
|
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
def validate_request(default_zb_type=1):
|
|
|
|
|
if not request.is_json:
|
|
|
|
|
return jsonify({'error': 'Missing JSON in request'}), 400
|
|
|
|
|
file_url = request.json.get('file_url')
|
|
|
|
|
zb_type = request.json.get('zb_type', default_zb_type)
|
|
|
|
|
if not file_url:
|
|
|
|
|
return jsonify({'error': 'No file URL provided'}), 400
|
2024-10-14 10:52:31 +08:00
|
|
|
|
try:
|
|
|
|
|
zb_type = int(zb_type)
|
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
|
return jsonify({'error': 'Invalid zb_type provided'}), 400
|
|
|
|
|
return file_url, zb_type
|
2024-09-23 17:44:34 +08:00
|
|
|
|
|
|
|
|
|
# 流式
|
2024-08-29 16:37:09 +08:00
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
|
|
|
def zbparse():
|
2024-09-23 17:44:34 +08:00
|
|
|
|
logger = g.logger
|
2024-09-25 09:47:45 +08:00
|
|
|
|
logger.info("start!!!")
|
2024-09-25 09:42:38 +08:00
|
|
|
|
# 获取并显示接收到的 JSON 数据
|
|
|
|
|
received_data = request.get_json()
|
2024-09-25 09:45:40 +08:00
|
|
|
|
logger.info("Received JSON data: " + str(received_data))
|
2024-10-12 18:01:59 +08:00
|
|
|
|
file_url,zb_type = validate_request()
|
2024-09-25 09:52:04 +08:00
|
|
|
|
if isinstance(file_url, tuple): # Check if the returned value is an error response
|
|
|
|
|
return file_url
|
|
|
|
|
try:
|
|
|
|
|
logger.info("starting parsing url:" + file_url)
|
2024-10-12 18:01:59 +08:00
|
|
|
|
return Response(stream_with_context(process_and_stream(file_url,zb_type)), content_type='text/event-stream')
|
2024-09-25 09:52:04 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error('Exception occurred: ' + str(e))
|
|
|
|
|
return jsonify({'error': str(e)}), 500
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
def post_processing(combined_data, includes):
|
2024-09-26 13:43:47 +08:00
|
|
|
|
# 初始化结果字典,预设'其他'分类为空字典
|
2024-10-16 20:18:55 +08:00
|
|
|
|
processed_data = {"其他": {}}
|
|
|
|
|
|
|
|
|
|
# 初始化提取的信息字典
|
|
|
|
|
extracted_info = {}
|
|
|
|
|
|
|
|
|
|
# 定义一个辅助函数用于获取嵌套字典中的值
|
|
|
|
|
def get_nested(dic, keys, default=None):
|
|
|
|
|
for key in keys:
|
|
|
|
|
if isinstance(dic, dict):
|
|
|
|
|
dic = dic.get(key, default)
|
|
|
|
|
else:
|
|
|
|
|
return default
|
|
|
|
|
return dic
|
|
|
|
|
|
|
|
|
|
# 定义一个辅助函数用于递归查找包含特定子字符串的键
|
|
|
|
|
def find_keys_containing(dic, substring):
|
|
|
|
|
found_values = []
|
|
|
|
|
if isinstance(dic, dict):
|
|
|
|
|
for key, value in dic.items():
|
|
|
|
|
if substring in key:
|
|
|
|
|
found_values.append(value)
|
|
|
|
|
if isinstance(value, dict):
|
|
|
|
|
found_values.extend(find_keys_containing(value, substring))
|
|
|
|
|
elif isinstance(value, list):
|
|
|
|
|
for item in value:
|
|
|
|
|
if isinstance(item, dict):
|
|
|
|
|
found_values.extend(find_keys_containing(item, substring))
|
|
|
|
|
return found_values
|
|
|
|
|
|
|
|
|
|
# 定义一个辅助函数用于根据候选键列表提取值(部分匹配)
|
|
|
|
|
def extract_field(contact_info, candidate_keys):
|
|
|
|
|
for candidate in candidate_keys:
|
|
|
|
|
for key, value in contact_info.items():
|
|
|
|
|
if candidate in key and value not in ["未知", ""]:
|
|
|
|
|
return value
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# 定义一个辅助函数用于提取 '投标保证金'
|
|
|
|
|
def extract_bid_bond(guarantee_info):
|
|
|
|
|
# 定义投标保证金的候选键
|
|
|
|
|
bid_bond_candidates = ["投标保证金", "磋商保证金"]
|
|
|
|
|
|
|
|
|
|
# 第一步:查找包含 "投标保证金" 或 "磋商保证金" 的键
|
|
|
|
|
for candidate in bid_bond_candidates:
|
|
|
|
|
for key, value in guarantee_info.items():
|
|
|
|
|
if candidate in key:
|
|
|
|
|
if isinstance(value, dict):
|
|
|
|
|
# 在嵌套字典中查找包含 "金额" 的键
|
|
|
|
|
for sub_key, sub_value in value.items():
|
|
|
|
|
if "金额" in sub_key and sub_value not in ["未知", ""]:
|
|
|
|
|
return sub_value
|
|
|
|
|
elif isinstance(value, str):
|
|
|
|
|
if "金额" in key and value not in ["未知", ""]:
|
|
|
|
|
return value
|
|
|
|
|
else:
|
|
|
|
|
# 如果 value 既不是 dict 也不是 str,忽略
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 第二步:如果没有找到包含 "金额" 的键,尝试在所有键值中查找符合模式的值
|
|
|
|
|
amount_pattern = re.compile(r'(?:\d{1,3}(?:[,,]\d{3})*(?:\.\d+)?|\d+(?:\.\d+)?|[\u4e00-\u9fff]+(?:\.\d+)?)\s*(?:元|万元)')
|
|
|
|
|
for key, value in guarantee_info.items():
|
|
|
|
|
if isinstance(value, str):
|
|
|
|
|
match = amount_pattern.search(value)
|
|
|
|
|
if match:
|
|
|
|
|
return match.group()
|
|
|
|
|
elif isinstance(value, dict):
|
|
|
|
|
# 递归查找嵌套字典中的金额
|
|
|
|
|
found_amount = extract_bid_bond(value)
|
|
|
|
|
if found_amount:
|
|
|
|
|
return found_amount
|
|
|
|
|
# 如果都没有找到,则返回空字符串
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# 如果 '基础信息' 在 includes 中,则进行字段提取
|
|
|
|
|
if "基础信息" in includes:
|
|
|
|
|
base_info = combined_data.get("基础信息", {})
|
|
|
|
|
|
|
|
|
|
# 定义所需字段的映射关系,暂时不包含'联系人'和'联系电话'以及'招标项目地点'
|
|
|
|
|
mapping = {
|
|
|
|
|
"招标项目名称": [["项目信息", "项目名称"], ["项目信息", "工程名称"]],
|
|
|
|
|
"招标项目编号": [["项目信息", "项目编号"], ["项目信息", "招标编号"]],
|
|
|
|
|
"开标时间": [["关键时间/内容", "开标时间"]],
|
|
|
|
|
"报名截止日期": [["关键时间/内容", "投标文件递交截止日期"]],
|
|
|
|
|
"招标项目预算": [["项目信息", "招标控制价"]],
|
|
|
|
|
"招标单位名称": [["招标人/代理信息", "招标人"]],
|
|
|
|
|
"招标公告地址": [["关键时间/内容", "信息公示媒介"], ["关键时间/内容", "评标结果公示媒介"]]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 提取并映射字段
|
|
|
|
|
for new_key, paths in mapping.items():
|
|
|
|
|
value = None
|
|
|
|
|
for path in paths:
|
|
|
|
|
value = get_nested(base_info, path)
|
|
|
|
|
if value:
|
|
|
|
|
break
|
|
|
|
|
extracted_info[new_key] = value if value else ""
|
|
|
|
|
|
|
|
|
|
# 特殊处理 '招标项目地点'
|
|
|
|
|
# 在 '项目信息' 下查找包含 "地点" 的键
|
|
|
|
|
project_info = base_info.get("项目信息", {})
|
|
|
|
|
location_candidates = find_keys_containing(project_info, "地点")
|
|
|
|
|
if location_candidates:
|
|
|
|
|
# 选择第一个找到的地点
|
|
|
|
|
extracted_info["招标项目地点"] = location_candidates[0]
|
|
|
|
|
else:
|
|
|
|
|
extracted_info["招标项目地点"] = ""
|
|
|
|
|
|
|
|
|
|
# 特殊处理 '联系人' 和 '联系电话'
|
|
|
|
|
# 提取 '项目联系方式'
|
|
|
|
|
project_contact = get_nested(base_info, ["招标人/代理信息", "项目联系方式"], {})
|
|
|
|
|
|
|
|
|
|
# 提取 '招标人联系方式'
|
|
|
|
|
bidder_contact = get_nested(base_info, ["招标人/代理信息", "招标人联系方式"], {})
|
|
|
|
|
|
|
|
|
|
# 定义候选键列表,按优先级排序
|
|
|
|
|
name_candidates = ["名称", "联系人", "招标"]
|
|
|
|
|
phone_candidates = ["电话", "手机", "联系方式"]
|
|
|
|
|
|
|
|
|
|
# 提取 '联系人'
|
|
|
|
|
contact_names = [project_contact, bidder_contact]
|
|
|
|
|
contact_name = ""
|
|
|
|
|
for contact in contact_names:
|
|
|
|
|
extracted_name = extract_field(contact, name_candidates)
|
|
|
|
|
if extracted_name:
|
|
|
|
|
contact_name = extracted_name
|
|
|
|
|
break
|
|
|
|
|
extracted_info["联系人"] = contact_name
|
|
|
|
|
|
|
|
|
|
# 提取 '联系电话'
|
|
|
|
|
contact_phones = [project_contact, bidder_contact]
|
|
|
|
|
contact_phone = ""
|
|
|
|
|
for contact in contact_phones:
|
|
|
|
|
extracted_phone = extract_field(contact, phone_candidates)
|
|
|
|
|
if extracted_phone:
|
|
|
|
|
contact_phone = extracted_phone
|
|
|
|
|
break
|
|
|
|
|
extracted_info["联系电话"] = contact_phone
|
|
|
|
|
|
|
|
|
|
# 特殊处理 '投标保证金'
|
|
|
|
|
# 提取 '保证金相关'
|
|
|
|
|
guarantee_info = get_nested(base_info, ["保证金相关"], {})
|
|
|
|
|
extracted_info["投标保证金"] = extract_bid_bond(guarantee_info)
|
2024-09-26 13:43:47 +08:00
|
|
|
|
|
|
|
|
|
# 遍历原始字典的每一个键值对
|
2024-10-16 20:18:55 +08:00
|
|
|
|
for key, value in combined_data.items():
|
2024-09-26 13:43:47 +08:00
|
|
|
|
if key in includes:
|
2024-10-16 20:18:55 +08:00
|
|
|
|
if key == "基础信息":
|
|
|
|
|
# 已经处理 '基础信息',保留在处理后的数据中
|
|
|
|
|
processed_data[key] = value
|
|
|
|
|
else:
|
|
|
|
|
# 直接保留包含在 includes 列表中的键值对
|
|
|
|
|
processed_data[key] = value
|
2024-09-26 13:43:47 +08:00
|
|
|
|
else:
|
2024-10-16 20:18:55 +08:00
|
|
|
|
# 将不在 includes 列表中的键值对加入到 '其他' 分类中
|
|
|
|
|
processed_data["其他"][key] = value
|
|
|
|
|
|
|
|
|
|
# 如果 '其他' 分类没有任何内容,可以选择删除这个键
|
|
|
|
|
if not processed_data["其他"]:
|
|
|
|
|
del processed_data["其他"]
|
2024-09-26 13:43:47 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
return processed_data, extracted_info
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-09-23 17:44:34 +08:00
|
|
|
|
# 分段返回
|
2024-10-16 20:18:55 +08:00
|
|
|
|
def process_and_stream(file_url, zb_type):
|
2024-10-12 18:01:59 +08:00
|
|
|
|
"""
|
2024-10-16 20:18:55 +08:00
|
|
|
|
下载文件并进行处理,支持工程标和货物标的处理。
|
2024-10-12 18:01:59 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
参数:
|
|
|
|
|
- file_url (str): 文件的URL地址。
|
|
|
|
|
- zb_type (int): 标的类型,1表示工程标,2表示货物标。
|
2024-10-12 18:01:59 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
返回:
|
|
|
|
|
- generator: 生成处理过程中的流式响应。
|
|
|
|
|
"""
|
2024-09-23 17:44:34 +08:00
|
|
|
|
logger = g.logger
|
|
|
|
|
unique_id = g.unique_id
|
|
|
|
|
output_folder = f"flask_app/static/output/{unique_id}"
|
|
|
|
|
filename = "ztbfile"
|
|
|
|
|
downloaded_filename = os.path.join(output_folder, filename)
|
2024-09-23 12:41:56 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
start_time = time.time() # 记录开始时间
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 下载文件
|
|
|
|
|
downloaded = download_file(file_url, downloaded_filename)
|
|
|
|
|
if not downloaded:
|
|
|
|
|
logger.error("下载文件失败或不支持的文件类型")
|
|
|
|
|
error_response = {
|
|
|
|
|
'message': 'File processing failed',
|
|
|
|
|
'filename': None,
|
|
|
|
|
'data': json.dumps({'error': 'File processing failed'})
|
|
|
|
|
}
|
|
|
|
|
yield f"data: {json.dumps(error_response)}\n\n"
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
downloaded_filepath, file_type = downloaded
|
|
|
|
|
|
|
|
|
|
# 检查文件类型
|
|
|
|
|
if file_type == 4:
|
|
|
|
|
logger.error("不支持的文件类型")
|
|
|
|
|
error_response = {
|
|
|
|
|
'message': 'Unsupported file type',
|
|
|
|
|
'filename': None,
|
|
|
|
|
'data': json.dumps({'error': 'Unsupported file type'})
|
|
|
|
|
}
|
|
|
|
|
yield f"data: {json.dumps(error_response)}\n\n"
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
logger.info("本地文件路径: " + downloaded_filepath)
|
|
|
|
|
|
|
|
|
|
combined_data = {}
|
|
|
|
|
|
|
|
|
|
# 根据zb_type选择调用的处理函数
|
|
|
|
|
processing_functions = {
|
|
|
|
|
1: engineering_bid_main,
|
|
|
|
|
2: goods_bid_main
|
2024-10-12 18:01:59 +08:00
|
|
|
|
}
|
2024-10-16 20:18:55 +08:00
|
|
|
|
processing_func = processing_functions.get(zb_type, engineering_bid_main)
|
|
|
|
|
|
|
|
|
|
# 从 processing_func 获取数据
|
|
|
|
|
for data in processing_func(output_folder, downloaded_filepath, file_type, unique_id):
|
|
|
|
|
if not data.strip():
|
|
|
|
|
logger.error("Received empty data, skipping JSON parsing.")
|
|
|
|
|
continue # Skip processing empty data
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
parsed_data = json.loads(data)
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
logger.error(f"Failed to decode JSON: {e}")
|
|
|
|
|
logger.error(f"Data received: {data}")
|
|
|
|
|
continue # Skip data if JSON parsing fails
|
|
|
|
|
|
|
|
|
|
# 遍历 parsed_data 只提取内层内容进行合并
|
|
|
|
|
for outer_key, inner_dict in parsed_data.items():
|
|
|
|
|
if isinstance(inner_dict, dict):
|
|
|
|
|
combined_data.update(inner_dict)
|
|
|
|
|
# 日志记录已合并数据
|
2024-10-12 18:01:59 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
# 每次数据更新后,流式返回当前进度
|
|
|
|
|
response = {
|
|
|
|
|
'message': 'Processing',
|
|
|
|
|
'filename': os.path.basename(downloaded_filepath),
|
|
|
|
|
'data': data
|
|
|
|
|
}
|
|
|
|
|
yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
|
2024-09-23 17:44:34 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
# 日志记录已合并数据
|
|
|
|
|
logger.info(f"合并后的数据: {json.dumps(combined_data, ensure_ascii=False, indent=4)}")
|
2024-09-25 15:58:27 +08:00
|
|
|
|
|
2024-10-16 20:18:55 +08:00
|
|
|
|
# **保存 combined_data 到 output_folder 下的 'final_result.json'**
|
|
|
|
|
output_json_path = os.path.join(output_folder, 'final_result.json')
|
|
|
|
|
includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"]
|
|
|
|
|
final_result, extracted_info = post_processing(combined_data, includes)
|
2024-09-26 13:43:47 +08:00
|
|
|
|
|
|
|
|
|
try:
|
2024-10-16 20:18:55 +08:00
|
|
|
|
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
|
|
|
|
json.dump(final_result, json_file, ensure_ascii=False, indent=4)
|
|
|
|
|
logger.info(f"合并后的数据已保存到 '{output_json_path}'")
|
|
|
|
|
except IOError as e:
|
|
|
|
|
logger.error(f"保存JSON文件时出错: {e}")
|
|
|
|
|
|
|
|
|
|
extracted_info_response = {
|
|
|
|
|
'message': 'extracted_info',
|
2024-09-26 13:43:47 +08:00
|
|
|
|
'filename': os.path.basename(downloaded_filepath),
|
2024-10-16 20:18:55 +08:00
|
|
|
|
'data': json.dumps(extracted_info, ensure_ascii=False)
|
2024-09-26 13:43:47 +08:00
|
|
|
|
}
|
2024-10-16 20:18:55 +08:00
|
|
|
|
yield f"data: {json.dumps(extracted_info_response, ensure_ascii=False)}\n\n"
|
|
|
|
|
|
|
|
|
|
# 最后发送合并后的完整数据
|
|
|
|
|
complete_response = {
|
|
|
|
|
'message': 'Combined_data',
|
|
|
|
|
'filename': os.path.basename(downloaded_filepath),
|
|
|
|
|
'data': json.dumps(final_result, ensure_ascii=False)
|
|
|
|
|
}
|
|
|
|
|
yield f"data: {json.dumps(complete_response, ensure_ascii=False)}\n\n"
|
|
|
|
|
|
|
|
|
|
# 发送最终响应
|
|
|
|
|
final_response = {
|
|
|
|
|
'message': 'File uploaded and processed successfully',
|
|
|
|
|
'filename': os.path.basename(downloaded_filepath),
|
|
|
|
|
'data': 'END'
|
|
|
|
|
}
|
|
|
|
|
yield f"data: {json.dumps(final_response)}\n\n"
|
|
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
end_time = time.time() # 记录结束时间
|
|
|
|
|
duration = end_time - start_time
|
|
|
|
|
logger.info(f"Total processing time: {duration:.2f} seconds")
|
|
|
|
|
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-08-29 16:37:09 +08:00
|
|
|
|
@app.route('/api/test_zbparse', methods=['POST'])
|
|
|
|
|
def test_zbparse():
|
|
|
|
|
try:
|
|
|
|
|
return Response(stream_with_context(test_process_and_stream()), content_type='text/event-stream')
|
|
|
|
|
except Exception as e:
|
|
|
|
|
app.logger.error('Exception occurred: ' + str(e))
|
|
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-08-29 16:37:09 +08:00
|
|
|
|
def test_process_and_stream():
|
2024-09-23 11:48:21 +08:00
|
|
|
|
# 模拟七段数据,每段包含指定的中文键名和更多详细数据
|
2024-08-29 16:37:09 +08:00
|
|
|
|
data_segments = [
|
2024-09-23 11:48:21 +08:00
|
|
|
|
{
|
|
|
|
|
"base_info": {
|
|
|
|
|
"基础信息": {
|
|
|
|
|
"project_name": "测试项目1",
|
|
|
|
|
"project_code": "TP001",
|
|
|
|
|
"project_manager": "张三",
|
|
|
|
|
"start_date": "2024-01-10",
|
|
|
|
|
"end_date": "2024-12-31"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"qualification_review": {
|
|
|
|
|
"资格审查": {
|
|
|
|
|
"review_criteria": ["公司资质", "过往业绩", "财务报表"],
|
|
|
|
|
"required_documents": ["营业执照", "资质证书", "近三年财务报告"],
|
|
|
|
|
"minimum_requirements": {
|
|
|
|
|
"company_age": "至少5年",
|
|
|
|
|
"past_projects": "至少3个大型项目"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"technical_standards": {
|
|
|
|
|
"技术标": {
|
|
|
|
|
"technical_requirements": ["设备质量要求", "施工工艺", "安全标准"],
|
|
|
|
|
"materials_list": ["钢筋", "水泥", "电缆"],
|
|
|
|
|
"equipment_specs": {
|
|
|
|
|
"excavator": "型号X123",
|
|
|
|
|
"concrete_mixer": "型号Y456"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"commercial_standards": {
|
|
|
|
|
"商务标": {
|
|
|
|
|
"pricing_method": "固定总价",
|
|
|
|
|
"payment_schedule": ["30%合同签订", "40%中期支付", "30%项目完成支付"],
|
|
|
|
|
"contract_conditions": {
|
|
|
|
|
"warranty_period": "2年",
|
|
|
|
|
"penalty_clauses": "延期每周罚款5%"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"invalid_requirements": {
|
|
|
|
|
"无效标与废标项": {
|
|
|
|
|
"common_issues": ["未按要求提交保证金", "技术标不达标"],
|
|
|
|
|
"invalidation_reasons": {
|
|
|
|
|
"missing_documents": "缺少必要文件",
|
|
|
|
|
"unqualified_technical_specs": "技术规格不合要求"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"bidding_documents_requirements": {
|
|
|
|
|
"投标文件要求": {
|
|
|
|
|
"file_format": "PDF",
|
|
|
|
|
"submission_deadline": "2024-08-01 17:00",
|
|
|
|
|
"submission_location": "北京市某某大厦5楼",
|
|
|
|
|
"required_sections": ["公司简介", "技术方案", "商务报价"]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"opening_bid": {
|
|
|
|
|
"开评定标流程": {
|
|
|
|
|
"bid_opening_time": "2024-09-01 10:00",
|
|
|
|
|
"location": "会议室A",
|
|
|
|
|
"evaluation_criteria": ["价格", "技术能力", "项目经验"],
|
|
|
|
|
"evaluation_process": {
|
|
|
|
|
"first_round": "资格审查",
|
|
|
|
|
"second_round": "技术评分",
|
|
|
|
|
"final_round": "商务报价评定"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-08-29 16:37:09 +08:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
filename = "test_file.pdf"
|
|
|
|
|
|
|
|
|
|
for i, data in enumerate(data_segments, 1):
|
|
|
|
|
response = {
|
|
|
|
|
'message': f'Processing segment {i}',
|
|
|
|
|
'filename': filename,
|
|
|
|
|
'data': data
|
|
|
|
|
}
|
2024-09-23 11:48:21 +08:00
|
|
|
|
yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
|
2024-09-23 15:49:30 +08:00
|
|
|
|
time.sleep(3) # 每隔5秒发送一段数据
|
|
|
|
|
|
|
|
|
|
# 在结束信号之前发送完整的数据
|
|
|
|
|
combined_data = {}
|
|
|
|
|
for segment in data_segments:
|
2024-09-23 15:58:18 +08:00
|
|
|
|
for outer_key, inner_dict in segment.items():
|
|
|
|
|
# 获取内层字典的第一个(也是唯一的)键值对
|
|
|
|
|
inner_key, inner_value = next(iter(inner_dict.items()))
|
|
|
|
|
combined_data[inner_key] = inner_value
|
2024-09-23 15:49:30 +08:00
|
|
|
|
|
|
|
|
|
# 发送完整的大字典
|
|
|
|
|
complete_response = {
|
|
|
|
|
'message': 'Combined data',
|
|
|
|
|
'filename': filename,
|
|
|
|
|
'data': combined_data
|
|
|
|
|
}
|
|
|
|
|
yield f"data: {json.dumps(complete_response, ensure_ascii=False)}\n\n"
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
# 发送结束信号
|
|
|
|
|
final_response = {
|
|
|
|
|
'message': 'File processed successfully',
|
|
|
|
|
'filename': filename,
|
|
|
|
|
'data': 'END'
|
|
|
|
|
}
|
2024-09-23 11:48:21 +08:00
|
|
|
|
yield f"data: {json.dumps(final_response, ensure_ascii=False)}\n\n"
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# @app.route('/get_json', methods=['POST'])
|
|
|
|
|
# def testjson():
|
|
|
|
|
# final_json_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp4\\fd55f067-2cf6-475c-b7ce-4498f6606bf6\\final_result.json"
|
|
|
|
|
# with open(final_json_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
# print('final_json_path:'+final_json_path)
|
|
|
|
|
# zbparse_data = json.load(f)
|
|
|
|
|
# json_str = json.dumps(zbparse_data, ensure_ascii=False)
|
|
|
|
|
# print(json_str)
|
|
|
|
|
# return jsonify({
|
|
|
|
|
# 'message': 'File uploaded and processed successfully',
|
|
|
|
|
# 'filename': os.path.basename(final_json_path),
|
|
|
|
|
# 'data': json_str
|
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_directory(path):
|
2024-09-13 16:08:03 +08:00
|
|
|
|
logger = g.logger
|
2024-08-29 16:37:09 +08:00
|
|
|
|
try:
|
|
|
|
|
shutil.rmtree(path)
|
2024-09-13 16:08:03 +08:00
|
|
|
|
logger.info(f"Successfully removed directory: {path}") # 使用全局 logger 记录
|
2024-08-29 16:37:09 +08:00
|
|
|
|
except Exception as e:
|
2024-09-13 16:08:03 +08:00
|
|
|
|
logger.error(f"Failed to remove directory {path}: {str(e)}") # 使用全局 logger 记录
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
2024-08-30 11:56:11 +08:00
|
|
|
|
|
2024-08-29 16:37:09 +08:00
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
app.run(debug=True, host='0.0.0.0', port=5000)
|