1.10
This commit is contained in:
parent
72d787c899
commit
66b5f78cbb
@ -5,8 +5,9 @@ from flask_app.general.json_utils import clean_json_string
|
||||
from flask_app.general.model_continue_query import process_continue_answers
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long_stream
|
||||
|
||||
|
||||
# 提取两个大标题之间的内容
|
||||
def extract_between_sections(data, target_values):
|
||||
def extract_between_sections(data, target_values, flag=False):
|
||||
target_found = False
|
||||
extracted_data = {}
|
||||
current_section_title = ""
|
||||
@ -27,7 +28,8 @@ def extract_between_sections(data, target_values):
|
||||
target_found = False
|
||||
|
||||
# 检查当前标题是否包含 target_values 中的任意关键词
|
||||
if any(tv in value for tv in target_values) and not file_pattern.search(value):
|
||||
if any(tv in value for tv in target_values):
|
||||
if (flag and not file_pattern.search(value)) or not flag:
|
||||
target_found = True # 找到了目标章节,开始捕获后续内容
|
||||
current_section_title = value # 保存章节标题内容
|
||||
|
||||
@ -40,6 +42,7 @@ def extract_between_sections(data, target_values):
|
||||
|
||||
return extracted_data
|
||||
|
||||
|
||||
# 生成无结构的数据货物标
|
||||
def postprocess_formatted1(section_content):
|
||||
# print(json.dumps(section_content, ensure_ascii=False, indent=4))
|
||||
@ -77,6 +80,7 @@ def postprocess_formatted1(section_content):
|
||||
|
||||
return concatenated
|
||||
|
||||
|
||||
# -----------以下为直接从序号中提取,无大标题的相关函数----------
|
||||
|
||||
# 对于每个target_value元素,如果有完美匹配json_data中的键,那就加入这个完美匹配的键名,否则,把全部模糊匹配到的键名都加入
|
||||
@ -92,6 +96,8 @@ def find_keys_by_value(target_value, json_data):
|
||||
def find_keys_with_prefix(key_prefix, json_data):
|
||||
subheadings = [k for k in json_data if k.startswith(key_prefix)]
|
||||
return subheadings
|
||||
|
||||
|
||||
def extract_json(data, target_values):
|
||||
results = {}
|
||||
for target_value in target_values:
|
||||
@ -113,6 +119,7 @@ def extract_json(data, target_values):
|
||||
results[subkey] = data[subkey]
|
||||
return results
|
||||
|
||||
|
||||
# 生成无结构的数据工程标,对提取出的若干键值对,生成外键为target_value,值为列表的新键值对
|
||||
def postprocess_formatted2(data, target_values):
|
||||
"""
|
||||
@ -183,6 +190,7 @@ def postprocess_formatted2(data, target_values):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_requirements_with_gpt(merged_baseinfo_path, selection):
|
||||
"""
|
||||
根据 selection 的值选择相应的用户查询,并调用大模型获取要求。
|
||||
@ -338,6 +346,7 @@ def get_requirements_with_gpt(merged_baseinfo_path, selection):
|
||||
except Exception as e:
|
||||
return {"error": "调用大模型失败"}
|
||||
|
||||
|
||||
# 读取JSON数据,提取内容,转换结构,并打印结果
|
||||
def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
||||
"""
|
||||
@ -365,8 +374,8 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
||||
3: ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"],
|
||||
4: ["评标"] # 测试
|
||||
}
|
||||
|
||||
# 获取对应 type 的 target_values
|
||||
flag = (type == 2)
|
||||
target_values = type_target_map.get(type)
|
||||
if not target_values:
|
||||
print(f"Error: Invalid type specified: {type}. Use 1, 2, 3, or 4.")
|
||||
@ -378,7 +387,7 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
||||
data = json.load(file)
|
||||
if len(data) >= 60:
|
||||
# 尝试使用大章节筛选
|
||||
extracted_data = extract_between_sections(data, target_values)
|
||||
extracted_data = extract_between_sections(data, target_values, flag)
|
||||
if extracted_data:
|
||||
# 后处理并返回结果
|
||||
extracted_data_concatenated = {
|
||||
@ -402,12 +411,13 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
||||
print(f"Error occurred while processing clause_path '{clause_path}': {e}")
|
||||
return DEFAULT_RESULT
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\3bffaa84-2434-4bd0-a8ee-5c234ccd7fa0\\clause1.json'
|
||||
merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\ec7d5328-9c57-450f-baf4-2e5a6f90ed1d\tmp\merged_baseinfo_path_more.pdf"
|
||||
clause_path=r"C:\Users\Administrator\Desktop\fsdownload\ec7d5328-9c57-450f-baf4-2e5a6f90ed1d\tmp\clause1.json"
|
||||
merged_baseinfo_path = r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\ztbfile_merged_baseinfo.pdf"
|
||||
clause_path = r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\clause1.json"
|
||||
try:
|
||||
res = extract_from_notice(merged_baseinfo_path,clause_path, 2) # 可以改变此处的 type 参数测试不同的场景
|
||||
res = extract_from_notice(merged_baseinfo_path, clause_path, 1) # 可以改变此处的 type 参数测试不同的场景
|
||||
res2 = json.dumps(res, ensure_ascii=False, indent=4)
|
||||
print(res2)
|
||||
except ValueError as e:
|
||||
|
Loading…
x
Reference in New Issue
Block a user