9.26 分段解析完全版
This commit is contained in:
parent
f8be23deaa
commit
8afb58e82b
@ -93,7 +93,24 @@ def zbparse():
|
|||||||
logger.error('Exception occurred: ' + str(e))
|
logger.error('Exception occurred: ' + str(e))
|
||||||
return jsonify({'error': str(e)}), 500
|
return jsonify({'error': str(e)}), 500
|
||||||
|
|
||||||
|
def post_processing(data,includes):
|
||||||
|
# 初始化结果字典,预设'其他'分类为空字典
|
||||||
|
result = {"其他": {}}
|
||||||
|
|
||||||
|
# 遍历原始字典的每一个键值对
|
||||||
|
for key, value in data.items():
|
||||||
|
if key in includes:
|
||||||
|
# 如果键在includes列表中,直接保留这个键值对
|
||||||
|
result[key] = value
|
||||||
|
else:
|
||||||
|
# 如果键不在includes列表中,将这个键值对加入到'其他'分类中
|
||||||
|
result["其他"][key] = value
|
||||||
|
|
||||||
|
# 如果'其他'分类没有任何内容,可以选择删除这个键
|
||||||
|
if not result["其他"]:
|
||||||
|
del result["其他"]
|
||||||
|
|
||||||
|
return result
|
||||||
# 分段返回
|
# 分段返回
|
||||||
def process_and_stream(file_url):
|
def process_and_stream(file_url):
|
||||||
logger = g.logger
|
logger = g.logger
|
||||||
@ -120,50 +137,48 @@ def process_and_stream(file_url):
|
|||||||
|
|
||||||
# 从 main_processing 获取数据
|
# 从 main_processing 获取数据
|
||||||
for data in main_processing(output_folder, downloaded_filepath, file_type, unique_id):
|
for data in main_processing(output_folder, downloaded_filepath, file_type, unique_id):
|
||||||
response = {
|
|
||||||
'message': 'Processing',
|
|
||||||
'filename': os.path.basename(downloaded_filepath),
|
|
||||||
'data': data
|
|
||||||
}
|
|
||||||
# 日志记录和流式响应
|
|
||||||
yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
|
|
||||||
|
|
||||||
if not data:
|
|
||||||
logger.error(f"Empty data received: {data}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 解析 data 作为 JSON 格式数据
|
|
||||||
if not data.strip():
|
if not data.strip():
|
||||||
logger.error("Received empty data, skipping JSON parsing.")
|
logger.error("Received empty data, skipping JSON parsing.")
|
||||||
else:
|
continue # Skip processing empty data
|
||||||
try:
|
|
||||||
parsed_data = json.loads(data)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logger.error(f"Failed to decode JSON: {e}")
|
|
||||||
logger.error(f"Data received: {data}")
|
|
||||||
continue # 跳过该数据处理
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed_data = json.loads(data)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to decode JSON: {e}")
|
||||||
|
logger.error(f"Data received: {data}")
|
||||||
|
continue # Skip data if JSON parsing fails
|
||||||
# 遍历 parsed_data 只提取内层内容进行合并
|
# 遍历 parsed_data 只提取内层内容进行合并
|
||||||
for outer_key, inner_dict in parsed_data.items():
|
for outer_key, inner_dict in parsed_data.items():
|
||||||
if isinstance(inner_dict, dict):
|
if isinstance(inner_dict, dict):
|
||||||
combined_data.update(inner_dict)
|
combined_data.update(inner_dict)
|
||||||
logger.info(json.dumps(combined_data, ensure_ascii=False,indent=4))
|
# 日志记录已合并数据
|
||||||
# 等待所有数据都处理完后,发送整合后的完整数据
|
# 每次数据更新后,流式返回当前进度
|
||||||
|
response = {
|
||||||
|
'message': 'Processing',
|
||||||
|
'filename': os.path.basename(downloaded_filepath),
|
||||||
|
'data': json.dumps(data, ensure_ascii=False)
|
||||||
|
}
|
||||||
|
yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
|
||||||
|
# 日志记录已合并数据
|
||||||
|
logger.info(f"Updated combined data: {json.dumps(combined_data, ensure_ascii=False, indent=4)}")
|
||||||
|
# **保存 combined_data 到 output_folder 下的 'final_result.json'**
|
||||||
|
output_json_path = os.path.join(output_folder, 'final_result.json')
|
||||||
|
includes = ["基础信息", "资格审查", "商务标", "技术标", "无效标与废标项", "投标文件要求", "开评定标流程"]
|
||||||
|
result = post_processing(combined_data, includes)
|
||||||
|
try:
|
||||||
|
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
||||||
|
json.dump(result, json_file, ensure_ascii=False, indent=4)
|
||||||
|
logger.info(f"Combined data saved to '{output_json_path}'")
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"Error saving JSON file: {e}")
|
||||||
|
# 最后发送合并后的完整数据
|
||||||
complete_response = {
|
complete_response = {
|
||||||
'message': 'Combined data',
|
'message': 'Combined data',
|
||||||
'filename': os.path.basename(downloaded_filepath),
|
'filename': os.path.basename(downloaded_filepath),
|
||||||
'data': json.dumps(combined_data, ensure_ascii=False)
|
'data': json.dumps(result, ensure_ascii=False)
|
||||||
}
|
}
|
||||||
yield f"data: {json.dumps(complete_response, ensure_ascii=False)}\n\n"
|
yield f"data: {json.dumps(complete_response, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
# 发送最终响应
|
|
||||||
final_response = {
|
|
||||||
'message': 'File uploaded and processed successfully',
|
|
||||||
'filename': os.path.basename(downloaded_filepath),
|
|
||||||
'data': 'END'
|
|
||||||
}
|
|
||||||
yield f"data: {json.dumps(final_response, ensure_ascii=False)}\n\n"
|
|
||||||
|
|
||||||
|
|
||||||
def validate_request():
|
def validate_request():
|
||||||
if not request.is_json:
|
if not request.is_json:
|
||||||
|
@ -109,6 +109,11 @@ def combine_evaluation_standards(truncate2):
|
|||||||
# return evaluation_combined_res
|
# return evaluation_combined_res
|
||||||
return update_json #商务标技术标整合
|
return update_json #商务标技术标整合
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
truncate2="D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\ztbfile_evaluation_method.pdf"
|
truncate2="C:\\Users\\Administrator\\Desktop\\fsdownload\\0883895c-e61f-4a99-9308-697fca1d4b77\\ztbfile_evaluation_method.pdf"
|
||||||
res=combine_evaluation_standards(truncate2)
|
evaluation_standards_res=combine_evaluation_standards(truncate2)
|
||||||
print(json.dumps(res,ensure_ascii=False,indent=4))
|
# 从结果中提取"商务标"和"技术标"
|
||||||
|
technical_standards = {"技术标": evaluation_standards_res.get("技术标", {})}
|
||||||
|
commercial_standards = {"商务标": evaluation_standards_res.get("商务标", {})}
|
||||||
|
# 返回技术标和商务标
|
||||||
|
print(json.dumps(technical_standards,ensure_ascii=False,indent=4))
|
||||||
|
print(json.dumps(commercial_standards, ensure_ascii=False, indent=4))
|
@ -124,8 +124,8 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
|
|||||||
update_baseinfo_list=combine_basic_info(baseinfo_list) #整合基础信息核心代码
|
update_baseinfo_list=combine_basic_info(baseinfo_list) #整合基础信息核心代码
|
||||||
|
|
||||||
baseinfo_combined_res = combine_json_results(update_baseinfo_list) # 返回值是字典
|
baseinfo_combined_res = combine_json_results(update_baseinfo_list) # 返回值是字典
|
||||||
return nest_json_under_key(baseinfo_combined_res, "基础信息") #返回值是json字符串
|
# return nest_json_under_key(baseinfo_combined_res, "基础信息") #返回值是json字符串
|
||||||
|
return {"基础信息":baseinfo_combined_res}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
knowledge_name = "ztb"
|
knowledge_name = "ztb"
|
||||||
|
@ -171,7 +171,7 @@ def reformat_questions(match_keys, input_path, output_folder):
|
|||||||
|
|
||||||
def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause_json_path,input_file,output_folder):
|
def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause_json_path,input_file,output_folder):
|
||||||
combined_results2={}
|
combined_results2={}
|
||||||
matched_keys = extract_matching_keys(original_dict_data) #[{'形式评审标准.投标文件签字盖章': '符合第一章“投标人须知”第 3.7.3(4)目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}] 提取值中包含"符合"的字典
|
matched_keys = extract_matching_keys(original_dict_data) #output:[{'形式评审标准.投标文件签字盖章': '符合第一章“投标人须知”第 3.7.3(4)目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}] 提取值中包含"符合"的字典
|
||||||
entries_with_numbers, formatted_questions1,entries_with_numbers2, clause2_path = reformat_questions(matched_keys,input_file,output_folder)
|
entries_with_numbers, formatted_questions1,entries_with_numbers2, clause2_path = reformat_questions(matched_keys,input_file,output_folder)
|
||||||
combined_results = process_and_merge_entries(entries_with_numbers, truncate0_jsonpath,
|
combined_results = process_and_merge_entries(entries_with_numbers, truncate0_jsonpath,
|
||||||
clause_json_path) # 调用根据条款号整合json.py
|
clause_json_path) # 调用根据条款号整合json.py
|
||||||
|
@ -118,12 +118,17 @@ def fetch_evaluation_standards(truncate1): # 评标办法前附表
|
|||||||
# 获取评标办法前附表的字典结果
|
# 获取评标办法前附表的字典结果
|
||||||
evaluation_standards_res = combine_evaluation_standards(truncate1)
|
evaluation_standards_res = combine_evaluation_standards(truncate1)
|
||||||
|
|
||||||
# 从结果中提取"商务标"和"技术标"
|
# 获取技术标和商务标
|
||||||
technical_standards = evaluation_standards_res.get("技术标", {})
|
technical_standards = {"技术标": evaluation_standards_res.get("技术标", {})}
|
||||||
commercial_standards = evaluation_standards_res.get("商务标", {})
|
commercial_standards = {"商务标": evaluation_standards_res.get("商务标", {})}
|
||||||
|
|
||||||
logger.info("商务标和技术标 done")
|
logger.info("商务标和技术标 done")
|
||||||
# 返回技术标和商务标
|
|
||||||
return {"technical_standards": technical_standards, "commercial_standards": commercial_standards}
|
# 返回将 "技术标" 和 "商务标" 包含在新的键中
|
||||||
|
return {
|
||||||
|
"technical_standards": technical_standards,
|
||||||
|
"commercial_standards": commercial_standards
|
||||||
|
}
|
||||||
|
|
||||||
# def fetch_evaluation_standards(truncate1): # 评标办法前附表
|
# def fetch_evaluation_standards(truncate1): # 评标办法前附表
|
||||||
# logger.info("starting商务标技术标...")
|
# logger.info("starting商务标技术标...")
|
||||||
@ -147,8 +152,8 @@ def fetch_bidding_documents_requirements(clause_path):
|
|||||||
fetch_bidding_documents_requirements_json = extract_from_notice(clause_path, 1)
|
fetch_bidding_documents_requirements_json = extract_from_notice(clause_path, 1)
|
||||||
qualify_nested_res = nest_json_under_key(fetch_bidding_documents_requirements_json, "投标文件要求")
|
qualify_nested_res = nest_json_under_key(fetch_bidding_documents_requirements_json, "投标文件要求")
|
||||||
logger.info("投标文件要求done...")
|
logger.info("投标文件要求done...")
|
||||||
return qualify_nested_res
|
# return qualify_nested_res
|
||||||
|
return {"投标文件要求":fetch_bidding_documents_requirements_json}
|
||||||
|
|
||||||
# 开评定标流程
|
# 开评定标流程
|
||||||
def fetch_bid_opening(clause_path):
|
def fetch_bid_opening(clause_path):
|
||||||
@ -156,8 +161,8 @@ def fetch_bid_opening(clause_path):
|
|||||||
fetch_bid_opening_json = extract_from_notice(clause_path, 2)
|
fetch_bid_opening_json = extract_from_notice(clause_path, 2)
|
||||||
qualify_nested_res = nest_json_under_key(fetch_bid_opening_json, "开评定标流程")
|
qualify_nested_res = nest_json_under_key(fetch_bid_opening_json, "开评定标流程")
|
||||||
logger.info("开评定标流程done...")
|
logger.info("开评定标流程done...")
|
||||||
return qualify_nested_res
|
# return qualify_nested_res
|
||||||
|
return {"开评定标流程":fetch_bid_opening_json}
|
||||||
|
|
||||||
# def main_processing(output_folder, downloaded_file_path, file_type, unique_id): # file_type=1->docx file_type=2->pdf
|
# def main_processing(output_folder, downloaded_file_path, file_type, unique_id): # file_type=1->docx file_type=2->pdf
|
||||||
# global logger
|
# global logger
|
||||||
@ -243,12 +248,12 @@ def main_processing(output_folder, downloaded_file_path, file_type, unique_id):
|
|||||||
commercial_standards = result["commercial_standards"]
|
commercial_standards = result["commercial_standards"]
|
||||||
|
|
||||||
# 分别返回技术标和商务标
|
# 分别返回技术标和商务标
|
||||||
yield json.dumps(transform_json_values({'technical_standards': technical_standards}), ensure_ascii=False)
|
yield json.dumps({'technical_standards': transform_json_values(technical_standards)}, ensure_ascii=False)
|
||||||
yield json.dumps(transform_json_values({'commercial_standards': commercial_standards}), ensure_ascii=False)
|
yield json.dumps({'commercial_standards': transform_json_values(commercial_standards)}, ensure_ascii=False)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# 处理其他任务的结果
|
# 处理其他任务的结果
|
||||||
yield json.dumps(transform_json_values({key: result}), ensure_ascii=False)
|
yield json.dumps({key: transform_json_values(result)}, ensure_ascii=False)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error(f"Error processing {key}: {exc}")
|
logger.error(f"Error processing {key}: {exc}")
|
||||||
@ -275,7 +280,7 @@ def main_processing(output_folder, downloaded_file_path, file_type, unique_id):
|
|||||||
key = next(k for k, v in future_dependencies.items() if v == future)
|
key = next(k for k, v in future_dependencies.items() if v == future)
|
||||||
try:
|
try:
|
||||||
result = future.result()
|
result = future.result()
|
||||||
yield json.dumps(transform_json_values({key: result}), ensure_ascii=False)
|
yield json.dumps({key: transform_json_values(result)}, ensure_ascii=False)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error(f"Error processing {key}: {exc}")
|
logger.error(f"Error processing {key}: {exc}")
|
||||||
yield json.dumps({'error': f'Error processing {key}: {str(exc)}'}, ensure_ascii=False)
|
yield json.dumps({'error': f'Error processing {key}: {str(exc)}'}, ensure_ascii=False)
|
||||||
|
@ -338,8 +338,8 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
|
|||||||
combined_dict.update(d)
|
combined_dict.update(d)
|
||||||
|
|
||||||
print("无效标与废标done...")
|
print("无效标与废标done...")
|
||||||
return nest_json_under_key(combined_dict, "无效标与废标项")
|
# return nest_json_under_key(combined_dict, "无效标与废标项")
|
||||||
|
return {"无效标与废标项":combined_dict}
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -25,8 +25,8 @@ def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpa
|
|||||||
final_qualify_json = future_qualification.result()
|
final_qualify_json = future_qualification.result()
|
||||||
form_response_dict = future_form_response.result()
|
form_response_dict = future_form_response.result()
|
||||||
form_response_dict.update(final_qualify_json)
|
form_response_dict.update(final_qualify_json)
|
||||||
return nest_json_under_key(form_response_dict,"资格审查")
|
# return nest_json_under_key(form_response_dict,"资格审查")
|
||||||
|
return {"资格审查":form_response_dict}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
input_file="D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\ztbfile.pdf"
|
input_file="D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\ztbfile.pdf"
|
||||||
|
@ -6,6 +6,7 @@ from flask_app.main.基础信息整合 import combine_basic_info
|
|||||||
from flask_app.main.通义千问long import qianwen_long,upload_file
|
from flask_app.main.通义千问long import qianwen_long,upload_file
|
||||||
from flask_app.main.多线程提问 import multi_threading
|
from flask_app.main.多线程提问 import multi_threading
|
||||||
from flask_app.main.json_utils import combine_json_results,clean_json_string
|
from flask_app.main.json_utils import combine_json_results,clean_json_string
|
||||||
|
from flask_app.main.形式响应评审 import update_json_data,extract_matching_keys
|
||||||
#这个字典可能有嵌套,你需要遍历里面的键名,对键名作判断,而不是键值,具体是这样的:如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除,重新组织成一个字典格式的数据,你可以考虑用字符串列表来保持部分平级的数据
|
#这个字典可能有嵌套,你需要遍历里面的键名,对键名作判断,而不是键值,具体是这样的:如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除,重新组织成一个字典格式的数据,你可以考虑用字符串列表来保持部分平级的数据
|
||||||
#对于同级的键,如果数量>1且键名都统一,那么将键名去掉,用列表保持它们的键值
|
#对于同级的键,如果数量>1且键名都统一,那么将键名去掉,用列表保持它们的键值
|
||||||
#对于同一个字典中,可能存在若干键值对,若它们的键值都是""或者"/" 你就将它们的键值删去,它们的键名用字符串列表保存
|
#对于同一个字典中,可能存在若干键值对,若它们的键值都是""或者"/" 你就将它们的键值删去,它们的键名用字符串列表保存
|
||||||
@ -98,9 +99,30 @@ def process_dict(data):
|
|||||||
return result['items']
|
return result['items']
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def find_chapter_clause_references(data, parent_key=""):
|
||||||
|
result = []
|
||||||
|
# 正则匹配"第x章"或"第x款"
|
||||||
|
chapter_clause_pattern = re.compile(r'第[一二三四五六七八九十\d]+[章款]')
|
||||||
|
|
||||||
|
# 遍历字典中的键值对
|
||||||
|
for key, value in data.items():
|
||||||
|
# 生成当前的完整键名
|
||||||
|
full_key = f"{parent_key}.{key}" if parent_key else key
|
||||||
|
|
||||||
|
if isinstance(value, dict):
|
||||||
|
# 如果值是字典,递归调用函数
|
||||||
|
result.extend(find_chapter_clause_references(value, full_key))
|
||||||
|
elif isinstance(value, str):
|
||||||
|
# 如果值是字符串,检查是否匹配"第x章"或"第x款"
|
||||||
|
if chapter_clause_pattern.search(value):
|
||||||
|
result.append({full_key: value})
|
||||||
|
|
||||||
|
return result
|
||||||
def qualification_review(truncate_file):
|
def qualification_review(truncate_file):
|
||||||
file_id=upload_file(truncate_file)
|
file_id=upload_file(truncate_file)
|
||||||
user_query=["该招标文件中规定的资格性审查标准是怎样的?请以json格式给出,外层为'资格性审查',你的回答要与原文一致,不可擅自总结删减,也不要回答资格性审查的内容。","该招标文件中规定的符合性审查标准是怎样的?请以json格式给出,外层为'符合性审查',你的回答要与原文一致,不可擅自总结删减,也不要回答资格性审查的内容。"]
|
user_query=["该招标文件中规定的资格性审查标准是怎样的?请以json格式给出,外层为'资格性审查',你的回答要与原文一致,不可擅自总结删减,也不要回答有关符合性性审查的内容。","该招标文件中规定的符合性审查标准是怎样的?请以json格式给出,外层为'符合性审查',你的回答要与原文一致,不可擅自总结删减,也不要回答有关资格性审查的内容。"]
|
||||||
results=multi_threading(user_query,"",file_id,2)
|
results=multi_threading(user_query,"",file_id,2)
|
||||||
combined_res = {}
|
combined_res = {}
|
||||||
for question, response in results:
|
for question, response in results:
|
||||||
@ -109,10 +131,12 @@ def qualification_review(truncate_file):
|
|||||||
processed1 = preprocess_dict(cleaned_data)
|
processed1 = preprocess_dict(cleaned_data)
|
||||||
processed2 = process_dict(processed1)
|
processed2 = process_dict(processed1)
|
||||||
combined_res.update(processed2)
|
combined_res.update(processed2)
|
||||||
|
result=find_chapter_clause_references(combined_res)
|
||||||
|
print(result)
|
||||||
# 整合基础信息核心代码
|
# 整合基础信息核心代码
|
||||||
return combined_res
|
return combined_res
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\622二次视频会议磋商文件(1)_qualification2.pdf"
|
truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\6.2定版视频会议磋商文件_qualification2.pdf"
|
||||||
res=qualification_review(truncate_file)
|
res=qualification_review(truncate_file)
|
||||||
print(json.dumps(res,ensure_ascii=False, indent=4))
|
print(json.dumps(res,ensure_ascii=False, indent=4))
|
Loading…
x
Reference in New Issue
Block a user