diff --git a/flask_app/general/多线程提问.py b/flask_app/general/多线程提问.py index 8c14688..4ee6a38 100644 --- a/flask_app/general/多线程提问.py +++ b/flask_app/general/多线程提问.py @@ -241,7 +241,7 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type # assistant=create_assistant(knowledge_name) elif llm_type==2: print(f"qianwen_long! question:{question}") - qianwen_res = qianwen_long(file_id,question) + qianwen_res,usage = qianwen_long(file_id,question) result_queue.put((ans_index,(question,qianwen_res))) return else : diff --git a/flask_app/general/通义千问long.py b/flask_app/general/通义千问long.py index f2e5f54..f9a86fa 100644 --- a/flask_app/general/通义千问long.py +++ b/flask_app/general/通义千问long.py @@ -27,7 +27,39 @@ def qianwen_long(file_id, user_query): # Generate a response based on the file ID completion = client.chat.completions.create( model="qwen-long", - top_p=0.5, + # top_p=0.5, + temperature=0.5, + # response_format={"type":"json_object"}, + messages=[ + { + 'role': 'system', + 'content': f'fileid://{file_id}' + }, + { + 'role': 'user', + 'content': user_query + } + ], + stream=False + ) + + # Return the response content + return completion.choices[0].message.content + +def qianwen_long_text(file_id, user_query): + print("call qianwen-long text...") + """ + Uses a previously uploaded file to generate a response based on a user query. + """ + client = OpenAI( + api_key=os.getenv("DASHSCOPE_API_KEY"), + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" + ) + + # Generate a response based on the file ID + completion = client.chat.completions.create( + model="qwen-long", + # top_p=0.5, temperature=0.5, messages=[ { @@ -51,14 +83,15 @@ if __name__ == "__main__": file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output4\\招标文件111_tobidders_notice_part1.docx" file_id = upload_file(file_path) - user_query1 = ("根据该文档中的投标人(供应商、磋商)须知前附表,请你保留原有层次关系,以json格式返回给我表格中的信息。") + user_query1 = "该招标文件前附表中的项目名称是什么,请以json格式返回给我" user_query2 = ("请提供文件中关于资格审查的具体内容和标准。") start_time=time.time() # First query print("starting qianwen-long...") - result1 = qianwen_long(file_id, user_query1) + result1 ,result2= qianwen_long(file_id, user_query1) print("First Query Result:", result1) - + print(type(result1)) + print(result2) # # Second query # print("starting qianwen-long...") # result2 = qianwen_long(file_id, user_query2) diff --git a/flask_app/general/通用功能函数.py b/flask_app/general/通用功能函数.py index 28edc54..38e72f3 100644 --- a/flask_app/general/通用功能函数.py +++ b/flask_app/general/通用功能函数.py @@ -1,5 +1,8 @@ +# -*- encoding:utf-8 -*- import ast +import json import re +from collections import OrderedDict from flask_app.general.json_utils import clean_json_string from flask_app.general.多线程提问 import multi_threading @@ -66,4 +69,5 @@ def process_string_list(string_list): return [] else: # 如果没有匹配到内容,返回空列表 - return [] \ No newline at end of file + return [] + diff --git a/flask_app/main/无效标和废标和禁止投标整合.py b/flask_app/main/无效标和废标和禁止投标整合.py index e671b2b..e828d41 100644 --- a/flask_app/main/无效标和废标和禁止投标整合.py +++ b/flask_app/main/无效标和废标和禁止投标整合.py @@ -3,7 +3,7 @@ import json import os.path import time import re -from flask_app.general.通义千问long import upload_file, qianwen_long +from flask_app.general.通义千问long import upload_file, qianwen_long,qianwen_long_text from concurrent.futures import ThreadPoolExecutor from flask_app.main.禁止投标情形 import find_forbidden, process_string_list @@ -197,7 +197,7 @@ def clean_dict_datas(extracted_contents, keywords,excludes): #让正则表达 all_texts1.append(cleaned_text_no_spaces) else: - print(text_list) + # print(text_list) new_text_list=preprocess_text_list(text_list) # print(new_text_list) pattern = r'^\s*([((]\d+[))]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、|.)?|[一二三四五六七八九十]+、)' @@ -321,80 +321,104 @@ def extract_values_if_contains(data, includes): #TODO:truncate_json_path为空的时候,单独提取表格数据 def handle_query(file_path, user_query, output_file, result_key, keywords, truncate_json_path): - excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:"] - follow_up_keywords = [r'情\s*形\s*之\s*一', r'情\s*况\s*之\s*一', r'下\s*列', r'以\s*下'] - extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) #提取正文(除表格) - # print(extracted_contents) - all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表 - all_tables1, all_tables2 = extract_sentences_from_json(truncate_json_path, keywords, follow_up_keywords) #提取表格数据(json_data) - qianwen_txt = all_texts1 + all_tables1 - selected_contents = set() # 使用 set 去重 + try: + excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:"] + follow_up_keywords = [r'情\s*形\s*之\s*一', r'情\s*况\s*之\s*一', r'下\s*列', r'以\s*下'] + extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) # 提取正文(除表格) + # print(extracted_contents) + all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表 + all_tables1, all_tables2 = extract_sentences_from_json(truncate_json_path, keywords, follow_up_keywords) # 提取表格数据(json_data) + qianwen_txt = all_texts1 + all_tables1 + selected_contents = set() # 使用 set 去重 - if qianwen_txt: - with open(output_file, 'w', encoding='utf-8') as file: - counter = 1 - for content in qianwen_txt: - file.write("..............." + '\n') - file.write(f"{counter}. {content}\n") - counter += 1 + if qianwen_txt: + with open(output_file, 'w', encoding='utf-8') as file: + counter = 1 + for content in qianwen_txt: + file.write("..............." + '\n') + file.write(f"{counter}. {content}\n") + counter += 1 - file_id = upload_file(output_file) - qianwen_ans = qianwen_long(file_id, user_query) - num_list = process_string_list(qianwen_ans) - print(result_key+"选中的序号:"+str(num_list)) + file_id = upload_file(output_file) + # qianwen_ans = qianwen_long(file_id, user_query) + qianwen_ans = qianwen_long_text(file_id, user_query) + num_list = process_string_list(qianwen_ans) + print(result_key + "选中的序号:" + str(num_list)) - for index in num_list: - if index - 1 < len(qianwen_txt): - content = qianwen_txt[index - 1] - selected_contents.add(content) + for index in num_list: + if 1 <= index <= len(qianwen_txt): + content = qianwen_txt[index - 1] + selected_contents.add(content) - # 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容 - selected_contents.update(all_texts2) - selected_contents.update(all_tables2) + # 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容 + selected_contents.update(all_texts2) + selected_contents.update(all_tables2) - # 如果 selected_contents 不为空,则返回结果,否则返回空字符串 - if selected_contents: - res = {result_key: list(selected_contents)} - else: - res = {result_key: ""} + # 如果 selected_contents 不为空,则返回结果,否则返回空字符串 + if selected_contents: + res = {result_key: list(selected_contents)} + else: + res = {result_key: ""} - return res + return res + except Exception as e: + print(f"handle_query 在处理 {result_key} 时发生异常: {e}") + return {result_key: ""} -def combine_find_invalid(invalid_docpath, output_dir, truncate_json_path,clause_path,qualification): +def combine_find_invalid(invalid_docpath, output_dir, truncate_json_path, clause_path, qualification): queries = [ - (r'否\s*决|无\s*效\s*投\s*标|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效', - "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", - os.path.join(output_dir, "temp1.txt"), "否决和无效投标情形"), - (r'废\s*标', - "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", - os.path.join(output_dir, "temp2.txt"), "废标项") + ( + r'否\s*决|无\s*效\s*投\s*标|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效', + "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", + os.path.join(output_dir, "temp1.txt"), + "否决和无效投标情形" + ), + ( + r'废\s*标', + "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", + os.path.join(output_dir, "temp2.txt"), + "废标项" + ) ] results = [] - # 使用线程池来并行处理查询 with ThreadPoolExecutor() as executor: futures = [] for keywords, user_query, output_file, result_key in queries: - future = executor.submit(handle_query, invalid_docpath, user_query, output_file, result_key, keywords, - truncate_json_path) - futures.append(future) - time.sleep(1) # 暂停1秒后再提交下一个任务 + future = executor.submit( + handle_query, + invalid_docpath, + user_query, + output_file, + result_key, + keywords, + truncate_json_path + ) + futures.append((future, result_key)) + time.sleep(0.5) # 暂停0.5秒后再提交下一个任务 + # 按照提交的顺序收集结果 + for future, result_key in futures: + try: + result = future.result() + except Exception as e: + print(f"线程处理 {result_key} 时出错: {e}") + result = {result_key: ""} + results.append(result) - for future in futures: - results.append(future.result()) - - #禁止投标 - print("starting不得存在的情形...") - forbidden_res = find_forbidden(truncate_json_path, clause_path, qualification) + # 禁止投标(find_forbidden)部分 + try: + # print("starting不得存在的情形...") + forbidden_res = find_forbidden(truncate_json_path, clause_path, qualification) + except Exception as e: + print(f"find_forbidden 处理时出错: {e}") + forbidden_res = {'不得存在的其他情形': ""} results.append(forbidden_res) combined_dict = {} for d in results: combined_dict.update(d) - - print("无效标与废标done...") - # return nest_json_under_key(combined_dict, "无效标与废标项") - return {"无效标与废标项":combined_dict} + # print("无效标与废标done...") + return {"无效标与废标项": combined_dict} if __name__ == '__main__': start_time = time.time() diff --git a/flask_app/main/禁止投标情形.py b/flask_app/main/禁止投标情形.py index c19bad9..6ed3b5b 100644 --- a/flask_app/main/禁止投标情形.py +++ b/flask_app/main/禁止投标情形.py @@ -4,7 +4,7 @@ import re from PyPDF2 import PdfWriter, PdfReader -from flask_app.general.通义千问long import upload_file, qianwen_long +from flask_app.general.通义千问long import upload_file, qianwen_long, qianwen_long_text from flask_app.general.通用功能函数 import process_string_list @@ -135,28 +135,31 @@ def merge_pdfs(paths, output_filename): print("禁止投标情形: No files to merge.") return output_path -def find_forbidden(truncate_json_path,clause_path,qualification=""): #投标人须知前附表 条款 评分前附表和资格审查表中 - # output_filename="merged.pdf" - # paths=[truncate1,truncate4] - # merged_filepath=merge_pdfs(paths,output_filename) #暂时废弃,评分前附表中的在'否决投标'中摘录了。 - if qualification: - file_id=upload_file(qualification) - # user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些,请按json列表格式给我提供信息,键名为'不得存在的其他情形',请你不要回答有关\"信誉要求\"的内容,若文件中未说明,请在键值中填'未知'。" - user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些,请以列表给我提供信息,形如[xx,xx,...],请你不要回答有关\"信誉要求\"的内容,若原文未提及,返回[]。" - qianwen_forbidden_str = qianwen_long(file_id, user_query_forbidden) - else: - qianwen_forbidden_str="[]" - actual_list=process_string_list(qianwen_forbidden_str) #提取出字符串列表 ["xxx","xx"] - includes = ["不得存在", "不得与", "禁止投标", "对投标人的纪律"] - excludes = ["招标", "评标", "定标"] - forbidden_results = extract_and_format_from_paths([truncate_json_path, clause_path], includes,excludes) - # print(forbidden_results) - processed_results = extract_unique_items_from_texts(forbidden_results) - # print(processed_results) - merged_forbidden_list = list(dict.fromkeys(actual_list + processed_results)) - forbidden_dict={'不得存在的其他情形':merged_forbidden_list} - return forbidden_dict +def find_forbidden(truncate_json_path, clause_path, qualification=""): + try: + if qualification: + file_id = upload_file(qualification) + user_query_forbidden = ( + "该招标文件规定的投标人不得存在的其他情形有哪些,请以列表给我提供信息,形如[xx,xx,...]," + "请你不要回答有关\"信誉要求\"的内容,若原文未提及,返回[]。" + ) + qianwen_forbidden_str = qianwen_long_text(file_id, user_query_forbidden) + else: + qianwen_forbidden_str = "[]" + + actual_list = process_string_list(qianwen_forbidden_str) # 提取出字符串列表 ["xxx","xx"] + includes = ["不得存在", "不得与", "禁止投标", "对投标人的纪律"] + excludes = ["招标", "评标", "定标"] + forbidden_results = extract_and_format_from_paths([truncate_json_path, clause_path], includes, excludes) + processed_results = extract_unique_items_from_texts(forbidden_results) + merged_forbidden_list = list(dict.fromkeys(actual_list + processed_results)) + forbidden_dict = {'不得存在的其他情形': merged_forbidden_list} + return forbidden_dict + except Exception as e: + print(f"find_forbidden 在处理时发生异常: {e}") + return {'不得存在的其他情形': ""} + #TODO:不得存在的情况文中有很多内容,货物标中采用了全文搜索的逻辑。 if __name__ == '__main__': truncate_json_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\006decc2-b3b5-4898-9b9f-4b0eab4e173f\\truncate_output.json" diff --git a/flask_app/货物标/无效标和废标和禁止投标整合main.py b/flask_app/货物标/无效标和废标和禁止投标整合main.py index bbc2c0d..ea8f986 100644 --- a/flask_app/货物标/无效标和废标和禁止投标整合main.py +++ b/flask_app/货物标/无效标和废标和禁止投标整合main.py @@ -3,10 +3,10 @@ import json import os.path import time import re -from flask_app.general.通义千问long import upload_file, qianwen_long -from concurrent.futures import ThreadPoolExecutor +from flask_app.general.通义千问long import upload_file, qianwen_long,qianwen_long_text from flask_app.general.通用功能函数 import process_string_list from docx import Document +from concurrent.futures import ThreadPoolExecutor, as_completed from collections import OrderedDict #处理跨页的段落 def preprocess_paragraphs(paragraphs): @@ -429,60 +429,71 @@ def extract_values_if_contains(data, includes): # 以上是原文内容,文本内的信息以'...............'分割,请你根据该信息回答:否决投标或拒绝投标或无效投标或使投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选所需的信息并返回。最终结果以json列表格式返回给我,键名为'否决和无效投标情形',你的回答完全忠于原文内容,且回答内容与原文内容一致,要求完整与准确,不能擅自总结或者概括。", def handle_query(file_path, user_query, output_file, result_key, keywords): - excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:", "我方"] - follow_up_keywords = [r'情\s*形\s*之\s*一', r'情\s*况\s*之\s*一', r'下\s*列', r'以\s*下'] - extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) # 字典结果 - all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表 - # table_data_list=read_docx_last_column(truncate_file) #从投标人须知前附表中提取信息生成列表data,每个元素为'一行信息' - table_data_list = read_tables_from_docx(file_path) - all_tables1, all_tables2 = extract_table_with_keywords(table_data_list, keywords, follow_up_keywords) - qianwen_txt = all_texts1 + all_tables1 - # Proceed only if there is content to write - selected_contents = set() # 使用 set 去重 + try: + excludes = ["说明表", "重新招标", "否决所有", "否决投标的条件", "备注:", "本人保证:", "我方"] + follow_up_keywords = [r'情\s*形\s*之\s*一', r'情\s*况\s*之\s*一', r'下\s*列', r'以\s*下'] + extracted_contents = extract_text_with_keywords(file_path, [keywords], follow_up_keywords) # 字典结果 + all_texts1, all_texts2 = clean_dict_datas(extracted_contents, keywords, excludes) # 列表 + # table_data_list=read_docx_last_column(truncate_file) #从投标人须知前附表中提取信息生成列表data,每个元素为'一行信息' + table_data_list = read_tables_from_docx(file_path) + all_tables1, all_tables2 = extract_table_with_keywords(table_data_list, keywords, follow_up_keywords) + qianwen_txt = all_texts1 + all_tables1 + # Proceed only if there is content to write + selected_contents = set() # 使用 set 去重 - if qianwen_txt: - with open(output_file, 'w', encoding='utf-8') as file: - counter = 1 - for content in qianwen_txt: - file.write(f"{counter}. {content}\n") - file.write("..............." + '\n') - counter += 1 + if qianwen_txt: + with open(output_file, 'w', encoding='utf-8') as file: + counter = 1 + for content in qianwen_txt: + file.write(f"{counter}. {content}\n") + file.write("..............." + '\n') + counter += 1 - file_id = upload_file(output_file) - qianwen_ans = qianwen_long(file_id, user_query) - num_list = process_string_list(qianwen_ans) - print(result_key + "选中的序号:" + str(num_list)) + file_id = upload_file(output_file) + # qianwen_ans = qianwen_long(file_id, user_query) + qianwen_ans = qianwen_long_text(file_id, user_query) + num_list = process_string_list(qianwen_ans) + print(result_key + "选中的序号:" + str(num_list)) - for index in num_list: - if index - 1 < len(qianwen_txt): - content = qianwen_txt[index - 1] - selected_contents.add(content) + for index in num_list: + if index - 1 < len(qianwen_txt): + content = qianwen_txt[index - 1] + selected_contents.add(content) - # 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容 - selected_contents.update(all_texts2) - selected_contents.update(all_tables2) + # 无论 qianwen_txt 是否为空,都添加 all_texts2 和 all_tables2 的内容 + selected_contents.update(all_texts2) + selected_contents.update(all_tables2) - # 如果 selected_contents 不为空,则返回结果,否则返回空字符串 - if selected_contents: - res = {result_key: list(selected_contents)} - else: - res = {result_key: ""} - - return res + # 如果 selected_contents 不为空,则返回结果,否则返回空字符串 + if selected_contents: + res = {result_key: list(selected_contents)} + else: + res = {result_key: ""} + return res + except Exception as e: + print(f"handle_query 在处理 {result_key} 时发生异常: {e}") + return {result_key: ""} def combine_find_invalid(file_path, output_dir): queries = [ ( - r'否\s*决|无\s*效\s*投\s*标|无\s*效\s*文\s*件|文\s*件\s*无\s*效|无\s*效\s*响\s*应|无\s*效\s*报\s*价|无\s*效\s*标|视\s*为\s*无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效', - # r'否\s*决|无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝', - "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", - os.path.join(output_dir, "temp1.txt"), "否决和无效投标情形"), - (r'废\s*标', - "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", - os.path.join(output_dir, "temp2.txt"), "废标项"), - (r'不\s*得|禁\s*止\s*投\s*标', - "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,每条信息规定了各方不得存在的情形,请回答:在这些信息中,主语是投标人或中标人或供应商或联合体投标各方或磋商小组的信息有哪些?不要返回主语是招标人或采购人或评标委员会的信息,请你筛选所需的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,示例返回为[1,4,6],若情况不存在,返回[]。", - os.path.join(output_dir, "temp3.txt"), "不得存在的情形") + r'否\s*决|无\s*效\s*投\s*标|无\s*效\s*文\s*件|文\s*件\s*无\s*效|无\s*效\s*响\s*应|无\s*效\s*报\s*价|无\s*效\s*标|视\s*为\s*无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效', + "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", + os.path.join(output_dir, "temp1.txt"), + "否决和无效投标情形" + ), + ( + r'废\s*标', + "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。", + os.path.join(output_dir, "temp2.txt"), + "废标项" + ), + ( + r'不\s*得|禁\s*止\s*投\s*标', + "以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,每条信息规定了各方不得存在的情形,请回答:在这些信息中,主语是投标人或中标人或供应商或联合体投标各方或磋商小组的信息有哪些?不要返回主语是招标人或采购人或评标委员会的信息,请你筛选所需的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,示例返回为[1,4,6],若情况不存在,返回[]。", + os.path.join(output_dir, "temp3.txt"), + "不得存在的情形" + ) ] results = [] @@ -491,23 +502,22 @@ def combine_find_invalid(file_path, output_dir): futures = [] for keywords, user_query, output_file, result_key in queries: future = executor.submit(handle_query, file_path, user_query, output_file, result_key, keywords) - futures.append(future) - time.sleep(1) # 暂停1秒后再提交下一个任务 + futures.append((future, result_key)) # 保持顺序 + time.sleep(0.5) # 暂停0.5秒后再提交下一个任务 - for future in futures: - results.append(future.result()) - - # #禁止投标 - # print("starting不得存在的情形...") - # forbidden_res = find_forbidden(truncate_json_path, clause_path) - # results.append(forbidden_res) + for future, result_key in futures: + try: + result = future.result() + except Exception as e: + print(f"线程处理 {result_key} 时出错: {e}") + result = {result_key: ""} + results.append(result) combined_dict = {} for d in results: combined_dict.update(d) print("无效标与废标done...") - # return nest_json_under_key(combined_dict, "无效标与废标项") return {"无效标与废标项": combined_dict} @@ -518,8 +528,8 @@ if __name__ == '__main__': # truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output4\\招标文件(实高电子显示屏)_tobidders_notice_part1.docx" clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json" # doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx" - doc_path = 'D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\ztbfile.docx' - output_dir = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b" + doc_path = 'D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.docx' + output_dir = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\tmp" results = combine_find_invalid(doc_path, output_dir) end_time = time.time() print("Elapsed time:", str(end_time - start_time)) diff --git a/flask_app/货物标/评分标准提取main.py b/flask_app/货物标/评分标准提取main.py index 16ce5b5..8ac6a7c 100644 --- a/flask_app/货物标/评分标准提取main.py +++ b/flask_app/货物标/评分标准提取main.py @@ -3,7 +3,7 @@ import json import re import time from collections import defaultdict -from flask_app.general.通义千问long import upload_file, qianwen_long +from flask_app.general.通义千问long import upload_file, qianwen_long, qianwen_long_text def combine_technical_and_business(data, target_values): @@ -96,12 +96,15 @@ def parse_json_with_duplicates(raw_string): def custom_object_pairs_hook(pairs): d = defaultdict(list) for key, value in pairs: - # 如果值是字典或列表,递归处理 - if isinstance(value, dict): - value = process_dict(value) - elif isinstance(value, list): - value = process_list(value) - d[key].append(value) + try: + # 如果值是字典或列表,递归处理 + if isinstance(value, dict): + value = process_dict(value) + elif isinstance(value, list): + value = process_list(value) + d[key].append(value) + except Exception as e: + d[key].append(value) # 根据需求决定是否跳过或保留原值 # 将有多个值的键转换为列表,单个值的键保持原样 return {key: (values if len(values) > 1 else values[0]) for key, values in d.items()} @@ -115,7 +118,10 @@ def parse_json_with_duplicates(raw_string): Returns: dict: 处理后的字典。 """ - return custom_object_pairs_hook(d.items()) + try: + return custom_object_pairs_hook(d.items()) + except Exception as e: + return {} def process_list(l): """ @@ -127,7 +133,10 @@ def parse_json_with_duplicates(raw_string): Returns: list: 处理后的列表。 """ - return [process_dict(item) if isinstance(item, dict) else item for item in l] + try: + return [process_dict(item) if isinstance(item, dict) else item for item in l] + except Exception as e: + return [] """输入字符串,提取 { 和 } 之间的内容,并将其解析为字典""" if not raw_string.strip(): @@ -198,13 +207,11 @@ def combine_evaluation_standards(truncate_file): # 定义用户查询 user_query1 = ( - "根据该文档,你判断它是否有具体的关于技术评分或商务评分或投标报价的评分要求," - "如果有,返回'是',否则返回'否'。" + "根据该文档,你判断它是否有关于技术评分或商务评分或投标报价的具体的评分及要求,如果有,返回'是',否则返回'否'。" ) # 应对竞争性谈判这种无评分要求的情况 # 执行查询 - judge_res = qianwen_long(file_id, user_query1) - + judge_res = qianwen_long_text(file_id, user_query1) # 默认 judge 为 True judge = True @@ -254,7 +261,7 @@ def combine_evaluation_standards(truncate_file): """ ) # 执行第二个查询 - evaluation_res = qianwen_long(file_id, user_query) + evaluation_res = qianwen_long_text(file_id, user_query) #有些重复的键名,只有qianwen_long_text能保留 # print(evaluation_res) # 清理和处理响应 cleaned_evaluation_res = parse_json_with_duplicates(evaluation_res) #处理重复键名的情况 @@ -290,7 +297,7 @@ def combine_evaluation_standards(truncate_file): if __name__ == "__main__": start_time=time.time() - truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\招标文件(107国道)_evaluation_method.pdf" + truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_evaluation_method.pdf" # truncate_file = "C:\\Users\\Administrator\\Desktop\\货物标\\output2\\2-招标文件(统计局智能终端二次招标)_evaluation_method.pdf" # truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\广水市妇幼招标文件最新(W改)_evaluation_method.pdf" # truncate_file = "C:\\Users\\Administrator\\Desktop\\fsdownload\\2d481945-1f82-45a5-8e56-7fafea4a7793\\ztbfile_evaluation_method.pdf" diff --git a/flask_app/货物标/资格审查main.py b/flask_app/货物标/资格审查main.py index ab57c5c..9499949 100644 --- a/flask_app/货物标/资格审查main.py +++ b/flask_app/货物标/资格审查main.py @@ -94,7 +94,11 @@ def process_dict(data): elif re.match(r'^\d+\.\d+$', key): # 单层小数点 return (float(key),) else: # 多层序号,按字符串处理 - return tuple(map(int, key.split('.'))) + try: + return tuple(int(part) for part in key.split('.') if part.isdigit()) + except ValueError: + # 处理无法转换的部分,例如返回一个默认值或记录错误 + return () # 按键排序,确保顺序一致 numeric_keys_sorted = sorted(numeric_keys, key=sort_key) result['items'] = [process_dict(item[1]) for item in numeric_keys_sorted] @@ -417,12 +421,12 @@ def combine_qualification_review(invalid_path, output_folder, qualification_path # [{'资格性审查.资格要求': '符合本采购文件第一章第二款要求,并提供合格有效的证明材料'}, {'资格性审查.没有重大违法记录的书面声明': '是否提交参加政府采购活动前三年内在经营活动中没有重大违法记录的书面承诺或声明(格式要求详见本项目采购文件第六章相关格式要求)'}] if __name__ == "__main__": # qualification_path="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\6.2定版视频会议磋商文件_qualification2.pdf" - output_folder = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\tmp" - qualification_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output3\\2-招标文件_qualification1.pdf" + output_folder = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89" + qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_qualification2.pdf" # qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf" # notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf" - notice_path="C:\\Users\\Administrator\\Desktop\\货物标\\output5\\2-招标文件_notice.pdf" + notice_path="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_notice.pdf" # knowledge_name = "6.2视频会议docx" - invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile.pdf" + invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf" res = combine_qualification_review(invalid_path,output_folder, qualification_path, notice_path) print(json.dumps(res, ensure_ascii=False, indent=4))