This commit is contained in:
zy123 2024-11-04 10:52:23 +08:00
parent 6e05393961
commit 5f040b8725
8 changed files with 248 additions and 163 deletions

View File

@ -241,7 +241,7 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
# assistant=create_assistant(knowledge_name)
elif llm_type==2:
print(f"qianwen_long! question:{question}")
qianwen_res = qianwen_long(file_id,question)
qianwen_res,usage = qianwen_long(file_id,question)
result_queue.put((ans_index,(question,qianwen_res)))
return
else :

View File

@ -27,7 +27,39 @@ def qianwen_long(file_id, user_query):
# Generate a response based on the file ID
completion = client.chat.completions.create(
model="qwen-long",
top_p=0.5,
# top_p=0.5,
temperature=0.5,
# response_format={"type":"json_object"},
messages=[
{
'role': 'system',
'content': f'fileid://{file_id}'
},
{
'role': 'user',
'content': user_query
}
],
stream=False
)
# Return the response content
return completion.choices[0].message.content
def qianwen_long_text(file_id, user_query):
print("call qianwen-long text...")
"""
Uses a previously uploaded file to generate a response based on a user query.
"""
client = OpenAI(
api_key=os.getenv("DASHSCOPE_API_KEY"),
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
# Generate a response based on the file ID
completion = client.chat.completions.create(
model="qwen-long",
# top_p=0.5,
temperature=0.5,
messages=[
{
@ -51,14 +83,15 @@ if __name__ == "__main__":
file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output4\\招标文件111_tobidders_notice_part1.docx"
file_id = upload_file(file_path)
user_query1 = ("根据该文档中的投标人(供应商、磋商)须知前附表请你保留原有层次关系以json格式返回给我表格中的信息。")
user_query1 = "该招标文件前附表中的项目名称是什么请以json格式返回给我"
user_query2 = ("请提供文件中关于资格审查的具体内容和标准。")
start_time=time.time()
# First query
print("starting qianwen-long...")
result1 = qianwen_long(file_id, user_query1)
result1 ,result2= qianwen_long(file_id, user_query1)
print("First Query Result:", result1)
print(type(result1))
print(result2)
# # Second query
# print("starting qianwen-long...")
# result2 = qianwen_long(file_id, user_query2)

View File

@ -1,5 +1,8 @@
# -*- encoding:utf-8 -*-
import ast
import json
import re
from collections import OrderedDict
from flask_app.general.json_utils import clean_json_string
from flask_app.general.多线程提问 import multi_threading
@ -66,4 +69,5 @@ def process_string_list(string_list):
return []
else:
# 如果没有匹配到内容,返回空列表
return []
return []

View File

@ -3,7 +3,7 @@ import json
import os.path
import time
import re
from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.general.通义千问long import upload_file, qianwen_long,qianwen_long_text
from concurrent.futures import ThreadPoolExecutor
from flask_app.main.禁止投标情形 import find_forbidden, process_string_list
@ -197,7 +197,7 @@ def clean_dict_datas(extracted_contents, keywords,excludes): #让正则表达
all_texts1.append(cleaned_text_no_spaces)
else:
print(text_list)
# print(text_list)
new_text_list=preprocess_text_list(text_list)
# print(new_text_list)
pattern = r'^\s*([(]\d+[)]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、|)?|[一二三四五六七八九十]+、)'
@ -321,80 +321,104 @@ def extract_values_if_contains(data, includes):
#TODO:truncate_json_path为空的时候单独提取表格数据
def handle_query(file_path, user_query, output_file, result_key, keywords, truncate_json_path):
excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:"]
follow_up_keywords = [r'\s*形\s*之\s*一', r'\s*况\s*之\s*一', r'\s*列', r'\s*下']
extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) #提取正文(除表格)
# print(extracted_contents)
all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表
all_tables1, all_tables2 = extract_sentences_from_json(truncate_json_path, keywords, follow_up_keywords) #提取表格数据json_data
qianwen_txt = all_texts1 + all_tables1
selected_contents = set() # 使用 set 去重
try:
excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:"]
follow_up_keywords = [r'\s*形\s*之\s*一', r'\s*况\s*之\s*一', r'\s*列', r'\s*下']
extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) # 提取正文(除表格)
# print(extracted_contents)
all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表
all_tables1, all_tables2 = extract_sentences_from_json(truncate_json_path, keywords, follow_up_keywords) # 提取表格数据json_data
qianwen_txt = all_texts1 + all_tables1
selected_contents = set() # 使用 set 去重
if qianwen_txt:
with open(output_file, 'w', encoding='utf-8') as file:
counter = 1
for content in qianwen_txt:
file.write("..............." + '\n')
file.write(f"{counter}. {content}\n")
counter += 1
if qianwen_txt:
with open(output_file, 'w', encoding='utf-8') as file:
counter = 1
for content in qianwen_txt:
file.write("..............." + '\n')
file.write(f"{counter}. {content}\n")
counter += 1
file_id = upload_file(output_file)
qianwen_ans = qianwen_long(file_id, user_query)
num_list = process_string_list(qianwen_ans)
print(result_key+"选中的序号:"+str(num_list))
file_id = upload_file(output_file)
# qianwen_ans = qianwen_long(file_id, user_query)
qianwen_ans = qianwen_long_text(file_id, user_query)
num_list = process_string_list(qianwen_ans)
print(result_key + "选中的序号:" + str(num_list))
for index in num_list:
if index - 1 < len(qianwen_txt):
content = qianwen_txt[index - 1]
selected_contents.add(content)
for index in num_list:
if 1 <= index <= len(qianwen_txt):
content = qianwen_txt[index - 1]
selected_contents.add(content)
# 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容
selected_contents.update(all_texts2)
selected_contents.update(all_tables2)
# 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容
selected_contents.update(all_texts2)
selected_contents.update(all_tables2)
# 如果 selected_contents 不为空,则返回结果,否则返回空字符串
if selected_contents:
res = {result_key: list(selected_contents)}
else:
res = {result_key: ""}
# 如果 selected_contents 不为空,则返回结果,否则返回空字符串
if selected_contents:
res = {result_key: list(selected_contents)}
else:
res = {result_key: ""}
return res
return res
except Exception as e:
print(f"handle_query 在处理 {result_key} 时发生异常: {e}")
return {result_key: ""}
def combine_find_invalid(invalid_docpath, output_dir, truncate_json_path,clause_path,qualification):
def combine_find_invalid(invalid_docpath, output_dir, truncate_json_path, clause_path, qualification):
queries = [
(r'\s*决|无\s*效\s*投\s*标|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp1.txt"), "否决和无效投标情形"),
(r'\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp2.txt"), "废标项")
(
r'\s*决|无\s*效\s*投\s*标|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp1.txt"),
"否决和无效投标情形"
),
(
r'\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp2.txt"),
"废标项"
)
]
results = []
# 使用线程池来并行处理查询
with ThreadPoolExecutor() as executor:
futures = []
for keywords, user_query, output_file, result_key in queries:
future = executor.submit(handle_query, invalid_docpath, user_query, output_file, result_key, keywords,
truncate_json_path)
futures.append(future)
time.sleep(1) # 暂停1秒后再提交下一个任务
future = executor.submit(
handle_query,
invalid_docpath,
user_query,
output_file,
result_key,
keywords,
truncate_json_path
)
futures.append((future, result_key))
time.sleep(0.5) # 暂停0.5秒后再提交下一个任务
# 按照提交的顺序收集结果
for future, result_key in futures:
try:
result = future.result()
except Exception as e:
print(f"线程处理 {result_key} 时出错: {e}")
result = {result_key: ""}
results.append(result)
for future in futures:
results.append(future.result())
#禁止投标
print("starting不得存在的情形...")
forbidden_res = find_forbidden(truncate_json_path, clause_path, qualification)
# 禁止投标find_forbidden部分
try:
# print("starting不得存在的情形...")
forbidden_res = find_forbidden(truncate_json_path, clause_path, qualification)
except Exception as e:
print(f"find_forbidden 处理时出错: {e}")
forbidden_res = {'不得存在的其他情形': ""}
results.append(forbidden_res)
combined_dict = {}
for d in results:
combined_dict.update(d)
print("无效标与废标done...")
# return nest_json_under_key(combined_dict, "无效标与废标项")
return {"无效标与废标项":combined_dict}
# print("无效标与废标done...")
return {"无效标与废标项": combined_dict}
if __name__ == '__main__':
start_time = time.time()

View File

@ -4,7 +4,7 @@ import re
from PyPDF2 import PdfWriter, PdfReader
from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.general.通义千问long import upload_file, qianwen_long, qianwen_long_text
from flask_app.general.通用功能函数 import process_string_list
@ -135,28 +135,31 @@ def merge_pdfs(paths, output_filename):
print("禁止投标情形: No files to merge.")
return output_path
def find_forbidden(truncate_json_path,clause_path,qualification=""): #投标人须知前附表 条款 评分前附表和资格审查表中
# output_filename="merged.pdf"
# paths=[truncate1,truncate4]
# merged_filepath=merge_pdfs(paths,output_filename) #暂时废弃,评分前附表中的在'否决投标'中摘录了。
if qualification:
file_id=upload_file(qualification)
# user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些请按json列表格式给我提供信息键名为'不得存在的其他情形',请你不要回答有关\"信誉要求\"的内容,若文件中未说明,请在键值中填'未知'。"
user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些,请以列表给我提供信息,形如[xx,xx,...],请你不要回答有关\"信誉要求\"的内容,若原文未提及,返回[]。"
qianwen_forbidden_str = qianwen_long(file_id, user_query_forbidden)
else:
qianwen_forbidden_str="[]"
actual_list=process_string_list(qianwen_forbidden_str) #提取出字符串列表 ["xxx","xx"]
includes = ["不得存在", "不得与", "禁止投标", "对投标人的纪律"]
excludes = ["招标", "评标", "定标"]
forbidden_results = extract_and_format_from_paths([truncate_json_path, clause_path], includes,excludes)
# print(forbidden_results)
processed_results = extract_unique_items_from_texts(forbidden_results)
# print(processed_results)
merged_forbidden_list = list(dict.fromkeys(actual_list + processed_results))
forbidden_dict={'不得存在的其他情形':merged_forbidden_list}
return forbidden_dict
def find_forbidden(truncate_json_path, clause_path, qualification=""):
try:
if qualification:
file_id = upload_file(qualification)
user_query_forbidden = (
"该招标文件规定的投标人不得存在的其他情形有哪些,请以列表给我提供信息,形如[xx,xx,...],"
"请你不要回答有关\"信誉要求\"的内容,若原文未提及,返回[]。"
)
qianwen_forbidden_str = qianwen_long_text(file_id, user_query_forbidden)
else:
qianwen_forbidden_str = "[]"
actual_list = process_string_list(qianwen_forbidden_str) # 提取出字符串列表 ["xxx","xx"]
includes = ["不得存在", "不得与", "禁止投标", "对投标人的纪律"]
excludes = ["招标", "评标", "定标"]
forbidden_results = extract_and_format_from_paths([truncate_json_path, clause_path], includes, excludes)
processed_results = extract_unique_items_from_texts(forbidden_results)
merged_forbidden_list = list(dict.fromkeys(actual_list + processed_results))
forbidden_dict = {'不得存在的其他情形': merged_forbidden_list}
return forbidden_dict
except Exception as e:
print(f"find_forbidden 在处理时发生异常: {e}")
return {'不得存在的其他情形': ""}
#TODO:不得存在的情况文中有很多内容,货物标中采用了全文搜索的逻辑。
if __name__ == '__main__':
truncate_json_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\006decc2-b3b5-4898-9b9f-4b0eab4e173f\\truncate_output.json"

View File

@ -3,10 +3,10 @@ import json
import os.path
import time
import re
from flask_app.general.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor
from flask_app.general.通义千问long import upload_file, qianwen_long,qianwen_long_text
from flask_app.general.通用功能函数 import process_string_list
from docx import Document
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import OrderedDict
#处理跨页的段落
def preprocess_paragraphs(paragraphs):
@ -429,60 +429,71 @@ def extract_values_if_contains(data, includes):
# 以上是原文内容,文本内的信息以'...............'分割请你根据该信息回答否决投标或拒绝投标或无效投标或使投标失效的情况有哪些文本中可能存在无关的信息请你准确筛选所需的信息并返回。最终结果以json列表格式返回给我键名为'否决和无效投标情形',你的回答完全忠于原文内容,且回答内容与原文内容一致,要求完整与准确,不能擅自总结或者概括。",
def handle_query(file_path, user_query, output_file, result_key, keywords):
excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:", "我方"]
follow_up_keywords = [r'\s*形\s*之\s*一', r'\s*况\s*之\s*一', r'\s*列', r'\s*下']
extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) # 字典结果
all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表
# table_data_list=read_docx_last_column(truncate_file) #从投标人须知前附表中提取信息生成列表data每个元素为'一行信息'
table_data_list = read_tables_from_docx(file_path)
all_tables1, all_tables2 = extract_table_with_keywords(table_data_list, keywords, follow_up_keywords)
qianwen_txt = all_texts1 + all_tables1
# Proceed only if there is content to write
selected_contents = set() # 使用 set 去重
try:
excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:", "我方"]
follow_up_keywords = [r'\s*形\s*之\s*一', r'\s*况\s*之\s*一', r'\s*列', r'\s*下']
extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) # 字典结果
all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表
# table_data_list=read_docx_last_column(truncate_file) #从投标人须知前附表中提取信息生成列表data每个元素为'一行信息'
table_data_list = read_tables_from_docx(file_path)
all_tables1, all_tables2 = extract_table_with_keywords(table_data_list, keywords, follow_up_keywords)
qianwen_txt = all_texts1 + all_tables1
# Proceed only if there is content to write
selected_contents = set() # 使用 set 去重
if qianwen_txt:
with open(output_file, 'w', encoding='utf-8') as file:
counter = 1
for content in qianwen_txt:
file.write(f"{counter}. {content}\n")
file.write("..............." + '\n')
counter += 1
if qianwen_txt:
with open(output_file, 'w', encoding='utf-8') as file:
counter = 1
for content in qianwen_txt:
file.write(f"{counter}. {content}\n")
file.write("..............." + '\n')
counter += 1
file_id = upload_file(output_file)
qianwen_ans = qianwen_long(file_id, user_query)
num_list = process_string_list(qianwen_ans)
print(result_key + "选中的序号:" + str(num_list))
file_id = upload_file(output_file)
# qianwen_ans = qianwen_long(file_id, user_query)
qianwen_ans = qianwen_long_text(file_id, user_query)
num_list = process_string_list(qianwen_ans)
print(result_key + "选中的序号:" + str(num_list))
for index in num_list:
if index - 1 < len(qianwen_txt):
content = qianwen_txt[index - 1]
selected_contents.add(content)
for index in num_list:
if index - 1 < len(qianwen_txt):
content = qianwen_txt[index - 1]
selected_contents.add(content)
# 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容
selected_contents.update(all_texts2)
selected_contents.update(all_tables2)
# 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容
selected_contents.update(all_texts2)
selected_contents.update(all_tables2)
# 如果 selected_contents 不为空,则返回结果,否则返回空字符串
if selected_contents:
res = {result_key: list(selected_contents)}
else:
res = {result_key: ""}
return res
# 如果 selected_contents 不为空,则返回结果,否则返回空字符串
if selected_contents:
res = {result_key: list(selected_contents)}
else:
res = {result_key: ""}
return res
except Exception as e:
print(f"handle_query 在处理 {result_key} 时发生异常: {e}")
return {result_key: ""}
def combine_find_invalid(file_path, output_dir):
queries = [
(
r'\s*决|无\s*效\s*投\s*标|无\s*效\s*文\s*件|文\s*件\s*无\s*效|无\s*效\s*响\s*应|无\s*效\s*报\s*价|无\s*效\s*标|视\s*为\s*无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效',
# r'否\s*决|无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp1.txt"), "否决和无效投标情形"),
(r'\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp2.txt"), "废标项"),
(r'\s*得|禁\s*止\s*投\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,每条信息规定了各方不得存在的情形,请回答:在这些信息中,主语是投标人或中标人或供应商或联合体投标各方或磋商小组的信息有哪些?不要返回主语是招标人或采购人或评标委员会的信息,请你筛选所需的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,示例返回为[1,4,6],若情况不存在,返回[]。",
os.path.join(output_dir, "temp3.txt"), "不得存在的情形")
r'\s*决|无\s*效\s*投\s*标|无\s*效\s*文\s*件|文\s*件\s*无\s*效|无\s*效\s*响\s*应|无\s*效\s*报\s*价|无\s*效\s*标|视\s*为\s*无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp1.txt"),
"否决和无效投标情形"
),
(
r'\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。",
os.path.join(output_dir, "temp2.txt"),
"废标项"
),
(
r'\s*得|禁\s*止\s*投\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,每条信息规定了各方不得存在的情形,请回答:在这些信息中,主语是投标人或中标人或供应商或联合体投标各方或磋商小组的信息有哪些?不要返回主语是招标人或采购人或评标委员会的信息,请你筛选所需的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,示例返回为[1,4,6],若情况不存在,返回[]。",
os.path.join(output_dir, "temp3.txt"),
"不得存在的情形"
)
]
results = []
@ -491,23 +502,22 @@ def combine_find_invalid(file_path, output_dir):
futures = []
for keywords, user_query, output_file, result_key in queries:
future = executor.submit(handle_query, file_path, user_query, output_file, result_key, keywords)
futures.append(future)
time.sleep(1) # 暂停1秒后再提交下一个任务
futures.append((future, result_key)) # 保持顺序
time.sleep(0.5) # 暂停0.5秒后再提交下一个任务
for future in futures:
results.append(future.result())
# #禁止投标
# print("starting不得存在的情形...")
# forbidden_res = find_forbidden(truncate_json_path, clause_path)
# results.append(forbidden_res)
for future, result_key in futures:
try:
result = future.result()
except Exception as e:
print(f"线程处理 {result_key} 时出错: {e}")
result = {result_key: ""}
results.append(result)
combined_dict = {}
for d in results:
combined_dict.update(d)
print("无效标与废标done...")
# return nest_json_under_key(combined_dict, "无效标与废标项")
return {"无效标与废标项": combined_dict}
@ -518,8 +528,8 @@ if __name__ == '__main__':
# truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output4\\招标文件实高电子显示屏_tobidders_notice_part1.docx"
clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json"
# doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx"
doc_path = 'D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\ztbfile.docx'
output_dir = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b"
doc_path = 'D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.docx'
output_dir = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\tmp"
results = combine_find_invalid(doc_path, output_dir)
end_time = time.time()
print("Elapsed time:", str(end_time - start_time))

View File

@ -3,7 +3,7 @@ import json
import re
import time
from collections import defaultdict
from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.general.通义千问long import upload_file, qianwen_long, qianwen_long_text
def combine_technical_and_business(data, target_values):
@ -96,12 +96,15 @@ def parse_json_with_duplicates(raw_string):
def custom_object_pairs_hook(pairs):
d = defaultdict(list)
for key, value in pairs:
# 如果值是字典或列表,递归处理
if isinstance(value, dict):
value = process_dict(value)
elif isinstance(value, list):
value = process_list(value)
d[key].append(value)
try:
# 如果值是字典或列表,递归处理
if isinstance(value, dict):
value = process_dict(value)
elif isinstance(value, list):
value = process_list(value)
d[key].append(value)
except Exception as e:
d[key].append(value) # 根据需求决定是否跳过或保留原值
# 将有多个值的键转换为列表,单个值的键保持原样
return {key: (values if len(values) > 1 else values[0]) for key, values in d.items()}
@ -115,7 +118,10 @@ def parse_json_with_duplicates(raw_string):
Returns:
dict: 处理后的字典
"""
return custom_object_pairs_hook(d.items())
try:
return custom_object_pairs_hook(d.items())
except Exception as e:
return {}
def process_list(l):
"""
@ -127,7 +133,10 @@ def parse_json_with_duplicates(raw_string):
Returns:
list: 处理后的列表
"""
return [process_dict(item) if isinstance(item, dict) else item for item in l]
try:
return [process_dict(item) if isinstance(item, dict) else item for item in l]
except Exception as e:
return []
"""输入字符串,提取 { 和 } 之间的内容,并将其解析为字典"""
if not raw_string.strip():
@ -198,13 +207,11 @@ def combine_evaluation_standards(truncate_file):
# 定义用户查询
user_query1 = (
"根据该文档,你判断它是否有具体的关于技术评分或商务评分或投标报价的评分要求,"
"如果有,返回'',否则返回''"
"根据该文档,你判断它是否有关于技术评分或商务评分或投标报价的具体的评分及要求,如果有,返回'',否则返回''"
) # 应对竞争性谈判这种无评分要求的情况
# 执行查询
judge_res = qianwen_long(file_id, user_query1)
judge_res = qianwen_long_text(file_id, user_query1)
# 默认 judge 为 True
judge = True
@ -254,7 +261,7 @@ def combine_evaluation_standards(truncate_file):
"""
)
# 执行第二个查询
evaluation_res = qianwen_long(file_id, user_query)
evaluation_res = qianwen_long_text(file_id, user_query) #有些重复的键名只有qianwen_long_text能保留
# print(evaluation_res)
# 清理和处理响应
cleaned_evaluation_res = parse_json_with_duplicates(evaluation_res) #处理重复键名的情况
@ -290,7 +297,7 @@ def combine_evaluation_standards(truncate_file):
if __name__ == "__main__":
start_time=time.time()
truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\招标文件(107国道)_evaluation_method.pdf"
truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_evaluation_method.pdf"
# truncate_file = "C:\\Users\\Administrator\\Desktop\\货物标\\output2\\2-招标文件统计局智能终端二次招标_evaluation_method.pdf"
# truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\广水市妇幼招标文件最新W改_evaluation_method.pdf"
# truncate_file = "C:\\Users\\Administrator\\Desktop\\fsdownload\\2d481945-1f82-45a5-8e56-7fafea4a7793\\ztbfile_evaluation_method.pdf"

View File

@ -94,7 +94,11 @@ def process_dict(data):
elif re.match(r'^\d+\.\d+$', key): # 单层小数点
return (float(key),)
else: # 多层序号,按字符串处理
return tuple(map(int, key.split('.')))
try:
return tuple(int(part) for part in key.split('.') if part.isdigit())
except ValueError:
# 处理无法转换的部分,例如返回一个默认值或记录错误
return ()
# 按键排序,确保顺序一致
numeric_keys_sorted = sorted(numeric_keys, key=sort_key)
result['items'] = [process_dict(item[1]) for item in numeric_keys_sorted]
@ -417,12 +421,12 @@ def combine_qualification_review(invalid_path, output_folder, qualification_path
# [{'资格性审查.资格要求': '符合本采购文件第一章第二款要求,并提供合格有效的证明材料'}, {'资格性审查.没有重大违法记录的书面声明': '是否提交参加政府采购活动前三年内在经营活动中没有重大违法记录的书面承诺或声明(格式要求详见本项目采购文件第六章相关格式要求)'}]
if __name__ == "__main__":
# qualification_path="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\6.2定版视频会议磋商文件_qualification2.pdf"
output_folder = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\tmp"
qualification_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output3\\2-招标文件_qualification1.pdf"
output_folder = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89"
qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_qualification2.pdf"
# qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf"
# notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf"
notice_path="C:\\Users\\Administrator\\Desktop\\货物标\\output5\\2-招标文件_notice.pdf"
notice_path="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_notice.pdf"
# knowledge_name = "6.2视频会议docx"
invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile.pdf"
invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
res = combine_qualification_review(invalid_path,output_folder, qualification_path, notice_path)
print(json.dumps(res, ensure_ascii=False, indent=4))