2024-10-25 10:18:38 +08:00
|
|
|
|
# -*- encoding:utf-8 -*-
|
2024-10-28 17:40:02 +08:00
|
|
|
|
import ast
|
2024-10-24 14:34:37 +08:00
|
|
|
|
import json
|
2024-10-28 17:40:02 +08:00
|
|
|
|
import re
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
from flask_app.general.多线程提问 import multi_threading
|
|
|
|
|
from flask_app.general.通义千问long import upload_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_data(json_path):
|
|
|
|
|
"""
|
|
|
|
|
从指定的JSON文件中加载数据。
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
json_path (str): JSON文件的路径。
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
dict: 加载的JSON数据字典。
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
with open(json_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
return data
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
print(f"错误:文件未找到 - {json_path}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
print(f"错误:解析JSON文件时出错 - {e}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def define_target_names():
|
|
|
|
|
"""
|
|
|
|
|
定义目标名称列表。
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: 目标名称列表。
|
|
|
|
|
"""
|
|
|
|
|
return [
|
|
|
|
|
"营业执照",
|
|
|
|
|
# "开户信息",
|
|
|
|
|
"法定代表人身份证",
|
|
|
|
|
# "法定代表人授权人身份证",
|
|
|
|
|
"人员证书",
|
|
|
|
|
"人员社保资料",
|
|
|
|
|
# "劳动合同",
|
|
|
|
|
"企业证书",
|
|
|
|
|
"企业业绩",
|
|
|
|
|
"财务信息(财务审计报告)",
|
|
|
|
|
"财务信息(缴纳税收证明)",
|
|
|
|
|
"财务信息(公司缴纳社保证明)"
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
2024-10-24 14:34:37 +08:00
|
|
|
|
def generate_user_query(target, chapters, keywords):
|
2024-10-28 17:40:02 +08:00
|
|
|
|
"""
|
|
|
|
|
根据目标、章节和关键词生成用户查询模板。
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
target (str): 目标名称。
|
|
|
|
|
chapters (list): 相关章节列表。
|
|
|
|
|
keywords (list): 相关关键词列表。
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
str: 生成的用户查询字符串。
|
|
|
|
|
"""
|
|
|
|
|
template3 = f"""这是投标文件模板,作为投标人,我需要把不同的投标材料填充到对应位置,请你根据该文件回答:{target}应该插入在该文件哪个地方?你可能需要查找以下关键词出现的地方:{', '.join([f"'{kw}'" for kw in keywords])},并确认插入的位置。我已在原文中打上若干待插入位置的标记,形如'[$$第17个可插入位置$$]',它的标记与它上面的小节内容关联。你需要返回给我{target}应该插入位置的标记序号,即'[$$第17个可插入位置$$]'中的'17',而不是页码,若有多个位置需要插入,可以返回多个序号,你的回答以数组返回,如[17, 19],若插入位置不明确,那么返回[-1]。
|
2024-10-24 14:34:37 +08:00
|
|
|
|
"""
|
2024-10-28 17:40:02 +08:00
|
|
|
|
return template3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_user_queries(target_names, data_dict):
|
|
|
|
|
"""
|
|
|
|
|
为每个目标生成对应的用户查询。
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
target_names (list): 目标名称列表。
|
|
|
|
|
data_dict (dict): 数据字典。
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: 包含目标和查询的字典列表。
|
2024-10-25 10:18:38 +08:00
|
|
|
|
"""
|
2024-10-28 17:40:02 +08:00
|
|
|
|
user_queries = []
|
|
|
|
|
for target in target_names:
|
|
|
|
|
if target in data_dict:
|
|
|
|
|
chapters = data_dict[target].get("章节", [])
|
|
|
|
|
keywords = data_dict[target].get("关键字", [])
|
|
|
|
|
query = generate_user_query(target, chapters, keywords)
|
|
|
|
|
user_queries.append({
|
|
|
|
|
"target": target,
|
|
|
|
|
"query": query
|
|
|
|
|
})
|
|
|
|
|
else:
|
|
|
|
|
print(f"警告:'{target}'未在数据字典中找到相关信息。")
|
|
|
|
|
return user_queries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_string_list(string_list):
|
|
|
|
|
"""
|
|
|
|
|
处理字符串列表,提取方括号内的内容并转换为实际列表。
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
string_list (str): 包含方括号的字符串。
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: 解析后的列表内容。
|
|
|
|
|
"""
|
|
|
|
|
match = re.search(r'\[(.*?)\]', string_list)
|
|
|
|
|
if match:
|
|
|
|
|
content_inside = match.group(1).strip()
|
|
|
|
|
if content_inside:
|
|
|
|
|
items = [item.strip() for item in content_inside.split(',')]
|
|
|
|
|
if all(item.isdigit() for item in items):
|
|
|
|
|
formatted_list = [int(item) for item in items]
|
|
|
|
|
else:
|
|
|
|
|
formatted_list = items
|
|
|
|
|
return formatted_list
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
# 定义JSON文件路径
|
|
|
|
|
# json_path = "flask_app/general/static/插入位置.json"
|
|
|
|
|
json_path = "D:\\flask_project\\flask_app\\general\\static\\插入位置.json"
|
|
|
|
|
# 加载数据
|
|
|
|
|
data_dict = load_data(json_path)
|
|
|
|
|
|
|
|
|
|
# 定义目标名称
|
|
|
|
|
target_names = define_target_names()
|
|
|
|
|
|
|
|
|
|
# 生成用户查询列表
|
|
|
|
|
user_query_list = generate_user_queries(target_names, data_dict)
|
|
|
|
|
|
|
|
|
|
if not user_query_list:
|
|
|
|
|
print("没有生成任何用户查询。")
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
# 提取查询
|
|
|
|
|
queries = [item['query'] for item in user_query_list]
|
|
|
|
|
|
|
|
|
|
# 定义文件路径
|
|
|
|
|
format_part = "C:\\Users\\Administrator\\Desktop\\outzb2 (2).pdf"
|
|
|
|
|
|
|
|
|
|
# 检查文件是否存在
|
|
|
|
|
if not os.path.isfile(format_part):
|
|
|
|
|
print(f"错误:文件未找到 - {format_part}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
# 上传文件并获取file_id
|
|
|
|
|
file_id = upload_file(format_part)
|
|
|
|
|
|
|
|
|
|
if not file_id:
|
|
|
|
|
print("错误:文件上传失败。")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
# 使用多线程并行处理查询
|
|
|
|
|
results = multi_threading(queries, "", file_id,2)
|
|
|
|
|
|
|
|
|
|
if not results:
|
|
|
|
|
print("错误:未收到任何处理结果。")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
# 清理返回结果
|
|
|
|
|
baseinfo_list = [process_string_list(res) for _, res in results]
|
|
|
|
|
|
|
|
|
|
# 输出结果
|
|
|
|
|
for info in baseinfo_list:
|
|
|
|
|
print(f'{target_names}:{info}')
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|