10.22代码结构优化

This commit is contained in:
zy123 2024-10-22 10:06:22 +08:00
parent b4e79b0654
commit a15230873a
65 changed files with 130 additions and 242 deletions

View File

@ -4,17 +4,15 @@ import logging
import os import os
import time import time
from flask_app.main.format_change import docx2pdf, pdf2docx from flask_app.general.format_change import docx2pdf
from flask_app.main.json_utils import clean_json_string from flask_app.general.json_utils import clean_json_string
from flask_app.main.判断是否分包等 import merge_json_to_list, read_questions_from_judge from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
from flask_app.main.基础信息整合 import judge_consortium_bidding from flask_app.general.通义千问long import upload_file
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
from flask_app.main.通义千问long import upload_file
from flask_app.货物标.基础信息解析main import aggregate_basic_info_goods from flask_app.货物标.基础信息解析main import aggregate_basic_info_goods
from flask_app.货物标.货物标截取pdf import truncate_pdf_specific_goods from flask_app.货物标.货物标截取pdf import truncate_pdf_specific_goods
from flask_app.main.截取pdf import truncate_pdf_specific_engineering from flask_app.main.截取pdf import truncate_pdf_specific_engineering
from flask_app.main.post_processing import inner_post_processing from flask_app.general.post_processing import inner_post_processing
from flask_app.main.基础信息整合 import aggregate_basic_info_engineering from flask_app.old_version.基础信息整合 import aggregate_basic_info_engineering
def get_global_logger(unique_id): def get_global_logger(unique_id):
if unique_id is None: if unique_id is None:

View File

@ -9,7 +9,8 @@ import time
import requests import requests
from dashscope import Assistants, Messages, Runs, Threads from dashscope import Assistants, Messages, Runs, Threads
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
from flask_app.main.通义千问long import qianwen_long, upload_file from flask_app.general.通义千问long import qianwen_long
prompt = """ prompt = """
# 角色 # 角色
你是一个文档处理专家专门负责理解和操作基于特定内容的文档任务这包括解析总结搜索或生成与给定文档相关的各类信息 你是一个文档处理专家专门负责理解和操作基于特定内容的文档任务这包括解析总结搜索或生成与给定文档相关的各类信息

View File

@ -0,0 +1,47 @@
import ast
import re
def judge_consortium_bidding(baseinfo_list):
updated_list = []
accept_bidding = False
for baseinfo in baseinfo_list:
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "":
accept_bidding = True
# 从字典中移除特定键值对
baseinfo.pop("是否接受联合体投标", None)
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
# updated_info = json.dumps(json_data)
updated_list.append(baseinfo)
# 更新原始列表,如果你想保留修改
baseinfo_list[:] = updated_list
return accept_bidding
def process_string_list(string_list):
# 使用正则表达式匹配方括号内的内容
match = re.search(r'\[(.*?)\]', string_list)
if match:
# 获取匹配的内容,即方括号内的部分
content_inside_brackets = match.group(1)
if content_inside_brackets: # 检查内容是否为空
# 检查内容是否是数字列表
if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
# 如果是数字,不用加引号,直接保留数字
formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
else:
# 如果不全是数字,按字符串处理
formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
else:
return [] # 直接返回空列表如果内容为空
# 使用 ast.literal_eval 来解析格式化后的字符串
try:
actual_list = ast.literal_eval(formatted_list)
return actual_list
except SyntaxError as e:
print(f"禁止投标情形: Error parsing list: {e}")
return []
else:
# 如果没有匹配到内容,返回空列表
return []

View File

@ -1,6 +0,0 @@
from ..main.通义千问long import qianwen_long,upload_file
def read_dictory(file_path):
file_id=upload_file(file_path)
user_query="根据该文档中的评标办法前附表,请你列出该文件的技术标以及它对应的具体评分要求,若对应内容中存在其他信息,在嵌套键如'技术标'中新增键名'备注'存放该信息。如果评分内容不是这3个则返回文档中给定的评分内容以及它的评分要求都以json的格式返回结果。请不要回答有关形式、资格、响应性评审标准的内容"

View File

@ -1,58 +0,0 @@
import json
def find_keys_by_value(target_value, json_data):
matched_keys = [k for k, v in json_data.items() if v == target_value]
if not matched_keys:
matched_keys = [k for k, v in json_data.items() if isinstance(v, str) and v.startswith(target_value)]
return matched_keys
def find_keys_with_prefix(key_prefix, json_data):
subheadings = [k for k in json_data if k.startswith(key_prefix) and k != key_prefix]
return subheadings
def extract_json(data, target_values):
results = {}
for target_value in target_values:
matched_keys = find_keys_by_value(target_value, data)
for key in matched_keys:
key_and_subheadings = {key: data[key]}
subheadings = find_keys_with_prefix(key, data)
for subkey in subheadings:
key_and_subheadings[subkey] = data[subkey]
results[target_value] = key_and_subheadings
return results
def renumber_keys(data, level=1):
if isinstance(data, dict):
new_dict = {}
for key in data:
parts = key.split('.')
parts[0] = '1'
new_key = '.'.join(parts)
new_dict[new_key] = renumber_keys(data[key], level + 1)
return new_dict
else:
return data
def json_results(extr_json):
renumbered_data = {}
for key in extr_json:
renumbered_data[key] = renumber_keys(extr_json[key])
return renumbered_data
if __name__ == "__main__":
target_values = ["投标文件"]
with open('clause3.json', 'r', encoding='utf-8') as file:
data = json.load(file)
extracted_data = extract_json(data, target_values)
renumbered_data = json_results(extracted_data)
with open('output_results1.json', 'w', encoding='utf-8') as file:
json.dump(renumbered_data, file, indent=4, ensure_ascii=False)
print("JSON文件已按要求重新编号并保存.")

View File

@ -1,28 +0,0 @@
import json
def search_key_in_json(file_path, search_key):
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
# 递归函数查找键
def recursive_search(data, key):
if key in data:
return key, data[key]
for k, v in data.items():
if isinstance(v, dict):
result = recursive_search(v, key)
if result:
return result
return None
result = recursive_search(data, search_key)
if result:
return f"{result[0]} : {result[1]}"
else:
return f"{search_key} : /"
# 用法示例
file_path = 'C:/Users/Administrator/Downloads/truncate_output2.json' # 替换为你的 JSON 文件路径
search_key = '多标段投标' # 替换为你想搜索的键
print(search_key_in_json(file_path, search_key))

View File

@ -6,10 +6,9 @@ from datetime import datetime, timedelta
from flask import Flask, request, jsonify, Response, stream_with_context, g from flask import Flask, request, jsonify, Response, stream_with_context, g
import json import json
import os import os
from flask_app.general.little_zbparse import little_parse_main from flask_app.general.little_zbparse import little_parse_main
from flask_app.main.download import download_file from flask_app.main.download import download_file
from flask_app.main.post_processing import outer_post_processing from flask_app.general.post_processing import outer_post_processing
from flask_app.main.工程标解析main import engineering_bid_main from flask_app.main.工程标解析main import engineering_bid_main
from flask_app.货物标.货物标解析main import goods_bid_main from flask_app.货物标.货物标解析main import goods_bid_main
from flask_app.货物标.技术要求提取 import get_technical_requirements_main from flask_app.货物标.技术要求提取 import get_technical_requirements_main

View File

@ -2,10 +2,10 @@
import json import json
import os.path import os.path
import re import re
from flask_app.main.json_utils import extract_content_from_json # 可以选择性地导入特定的函数 from flask_app.general.json_utils import extract_content_from_json # 可以选择性地导入特定的函数
from flask_app.main.提取打勾符号 import read_pdf_and_judge_main from flask_app.main.提取打勾符号 import read_pdf_and_judge_main
from flask_app.main.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.main.通义千问long import qianwen_long,upload_file from flask_app.general.通义千问long import qianwen_long,upload_file
#调用qianwen-ask之后组织提示词问百炼。 #调用qianwen-ask之后组织提示词问百炼。
def construct_judge_questions(json_data): def construct_judge_questions(json_data):

View File

@ -1,8 +1,8 @@
# -*- encoding:utf-8 -*- # -*- encoding:utf-8 -*-
import json import json
from flask_app.main.json_utils import clean_json_string from flask_app.general.json_utils import clean_json_string
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long
# def combine_technical_and_business(data, target_values1, target_values2): # def combine_technical_and_business(data, target_values1, target_values2):
# extracted_data = {} # 根级别存储所有数据 # extracted_data = {} # 根级别存储所有数据

View File

@ -1,13 +1,13 @@
import copy import copy
import json import json
import threading
import time import time
import concurrent.futures import concurrent.futures
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key from flask_app.general.json_utils import clean_json_string, rename_outer_key
from flask_app.general.通用功能函数 import judge_consortium_bidding
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge, merge_json_to_list from flask_app.main.判断是否分包等 import read_questions_from_judge, merge_json_to_list
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
from flask_app.main.通义千问long import upload_file from flask_app.general.通义千问long import upload_file
def aggregate_basic_info_engineering(baseinfo_list): def aggregate_basic_info_engineering(baseinfo_list):
@ -98,22 +98,6 @@ def dynamic_key_handling(key_groups, detected_keys):
key_groups["其他信息"].append(key) key_groups["其他信息"].append(key)
def judge_consortium_bidding(baseinfo_list):
updated_list = []
accept_bidding = False
for baseinfo in baseinfo_list:
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "":
accept_bidding = True
# 从字典中移除特定键值对
baseinfo.pop("是否接受联合体投标", None)
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
# updated_info = json.dumps(json_data)
updated_list.append(baseinfo)
# 更新原始列表,如果你想保留修改
baseinfo_list[:] = updated_list
return accept_bidding
def update_baseinfo_lists(baseinfo_list1, baseinfo_list2): def update_baseinfo_lists(baseinfo_list1, baseinfo_list2):
# 创建一个字典,用于存储 baseinfo_list1 中的所有键值对 # 创建一个字典,用于存储 baseinfo_list1 中的所有键值对
combined_dict = {} combined_dict = {}

View File

@ -6,15 +6,15 @@ from concurrent.futures import ThreadPoolExecutor
from flask_app.main.截取pdf import truncate_pdf_multiple from flask_app.main.截取pdf import truncate_pdf_multiple
from flask_app.main.table_content_extraction import extract_tables_main from flask_app.main.table_content_extraction import extract_tables_main
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
from flask_app.main.json_utils import transform_json_values, combine_json_results from flask_app.general.json_utils import transform_json_values
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
import concurrent.futures import concurrent.futures
from flask_app.main.基础信息整合快速版 import combine_basic_info from flask_app.main.基础信息整合快速版 import combine_basic_info
from flask_app.main.资格审查模块 import combine_review_standards from flask_app.main.资格审查模块 import combine_review_standards
from flask_app.main.商务标技术标整合 import combine_evaluation_standards from flask_app.main.商务标技术标整合 import combine_evaluation_standards
from flask_app.main.format_change import pdf2docx, docx2pdf from flask_app.general.format_change import pdf2docx, docx2pdf
from flask_app.main.docx截取docx import copy_docx from flask_app.general.docx截取docx import copy_docx
def get_global_logger(unique_id): def get_global_logger(unique_id):
if unique_id is None: if unique_id is None:

View File

@ -4,12 +4,11 @@ import re
import json import json
import time import time
from flask_app.main.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.main.根据条款号整合json import process_and_merge_entries,process_and_merge2 from flask_app.main.根据条款号整合json import process_and_merge_entries,process_and_merge2
from flask_app.main.json_utils import clean_json_string from flask_app.general.json_utils import clean_json_string
from flask_app.main.截取pdf import truncate_pdf_main
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
from flask_app.main.多线程提问 import upload_file from flask_app.general.通义千问long import upload_file
from flask_app.main.截取pdf import merge_pdfs from flask_app.main.截取pdf import merge_pdfs
prompt = """ prompt = """
# 角色 # 角色

View File

@ -1,7 +1,7 @@
from PyPDF2 import PdfReader, PdfWriter from PyPDF2 import PdfReader, PdfWriter
import re # 导入正则表达式库 import re # 导入正则表达式库
import os # 用于文件和文件夹操作 import os # 用于文件和文件夹操作
from flask_app.main.merge_pdfs import merge_pdfs from flask_app.general.merge_pdfs import merge_pdfs
def clean_page_content(text, common_header): def clean_page_content(text, common_header):
# 首先删除抬头公共部分 # 首先删除抬头公共部分
if common_header: # 确保有公共抬头才进行替换 if common_header: # 确保有公共抬头才进行替换

View File

@ -3,8 +3,7 @@ import json
import os.path import os.path
import time import time
import re import re
from flask_app.main.json_utils import combine_json_results, nest_json_under_key from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.main.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.main.禁止投标情形 import find_forbidden, process_string_list from flask_app.main.禁止投标情形 import find_forbidden, process_string_list

View File

@ -1,11 +1,12 @@
import ast
import json import json
import os import os
import re import re
from PyPDF2 import PdfWriter, PdfReader from PyPDF2 import PdfWriter, PdfReader
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.general.通用功能函数 import process_string_list
def extract_and_format_from_paths(json_paths, includes, excludes): def extract_and_format_from_paths(json_paths, includes, excludes):
""" """
@ -130,33 +131,6 @@ def merge_pdfs(paths, output_filename):
print("禁止投标情形: No files to merge.") print("禁止投标情形: No files to merge.")
return output_path return output_path
def process_string_list(string_list):
# 使用正则表达式匹配方括号内的内容
match = re.search(r'\[(.*?)\]', string_list)
if match:
# 获取匹配的内容,即方括号内的部分
content_inside_brackets = match.group(1)
if content_inside_brackets: # 检查内容是否为空
# 检查内容是否是数字列表
if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
# 如果是数字,不用加引号,直接保留数字
formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
else:
# 如果不全是数字,按字符串处理
formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
else:
return [] # 直接返回空列表如果内容为空
# 使用 ast.literal_eval 来解析格式化后的字符串
try:
actual_list = ast.literal_eval(formatted_list)
return actual_list
except SyntaxError as e:
print(f"禁止投标情形: Error parsing list: {e}")
return []
else:
# 如果没有匹配到内容,返回空列表
return []
def find_forbidden(truncate_json_path,clause_path,truncate3=""): #投标人须知前附表 条款 评分前附表和资格审查表中 def find_forbidden(truncate_json_path,clause_path,truncate3=""): #投标人须知前附表 条款 评分前附表和资格审查表中
# output_filename="merged.pdf" # output_filename="merged.pdf"
# paths=[truncate1,truncate4] # paths=[truncate1,truncate4]

View File

@ -1,12 +1,10 @@
import json import json
import os
import time import time
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.general.json_utils import extract_content_from_json
from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json
from flask_app.main.形式响应评审 import process_reviews from flask_app.main.形式响应评审 import process_reviews
from flask_app.main.资格评审 import process_qualification from flask_app.main.资格评审 import process_qualification
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor

View File

@ -1,12 +1,10 @@
# -*- encoding:utf-8 -*- # -*- encoding:utf-8 -*-
# 资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的加入matching_keys列表否则保留原字典 # 资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的加入matching_keys列表否则保留原字典
import json import json
import os
import re import re
from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json, nest_json_under_key from flask_app.general.json_utils import clean_json_string, combine_json_results, add_keys_to_json
from flask_app.main.多线程提问 import multi_threading, read_questions_from_file from flask_app.general.多线程提问 import multi_threading, read_questions_from_file
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.main.截取pdf import truncate_pdf_main
# 这个函数的主要用途是将多个相关的字典(都包含 'common_key' 键)合并成一个更大的、综合的字典,所有相关信息都集中在 'common_key' 键下 # 这个函数的主要用途是将多个相关的字典(都包含 'common_key' 键)合并成一个更大的、综合的字典,所有相关信息都集中在 'common_key' 键下

View File

@ -4,7 +4,7 @@ import queue
import concurrent.futures import concurrent.futures
from dashscope import Assistants, Messages, Runs, Threads from dashscope import Assistants, Messages, Runs, Threads
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
from json_utils import extract_content_from_json from flask_app.general.json_utils import extract_content_from_json
prompt = """ prompt = """
# 角色 # 角色
你是一个文档处理专家专门负责理解和操作基于特定内容的文档任务这包括解析总结搜索或生成与给定文档相关的各类信息 你是一个文档处理专家专门负责理解和操作基于特定内容的文档任务这包括解析总结搜索或生成与给定文档相关的各类信息

View File

@ -1,11 +1,11 @@
import json import json
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key from flask_app.general.json_utils import clean_json_string, rename_outer_key
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
from flask_app.main.通义千问long import upload_file from flask_app.general.通义千问long import upload_file
from flask_app.general.通用功能函数 import judge_consortium_bidding
def aggregate_basic_info_engineering(baseinfo_list): def aggregate_basic_info_engineering(baseinfo_list):
""" """
@ -94,22 +94,6 @@ def dynamic_key_handling(key_groups, detected_keys):
elif "偏离" in key: elif "偏离" in key:
key_groups["其他信息"].append(key) key_groups["其他信息"].append(key)
def judge_consortium_bidding(baseinfo_list):
updated_list = []
accept_bidding = False
for baseinfo in baseinfo_list:
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "":
accept_bidding = True
# 从字典中移除特定键值对
baseinfo.pop("是否接受联合体投标", None)
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
# updated_info = json.dumps(json_data)
updated_list.append(baseinfo)
# 更新原始列表,如果你想保留修改
baseinfo_list[:] = updated_list
return accept_bidding
def combine_basic_info(knowledge_name, truncate0, output_folder, clause_path): def combine_basic_info(knowledge_name, truncate0, output_folder, clause_path):
""" """
综合和处理基础信息生成最终的基础信息字典 综合和处理基础信息生成最终的基础信息字典

View File

@ -3,9 +3,9 @@ import re
import json import json
import time import time
from flask_app.main.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.main.根据条款号整合json import process_and_merge_entries,process_and_merge2 from flask_app.main.根据条款号整合json import process_and_merge_entries,process_and_merge2
from flask_app.main.json_utils import extract_content_from_json from flask_app.general.json_utils import extract_content_from_json
from flask_app.main.截取pdf import truncate_pdf_main from flask_app.main.截取pdf import truncate_pdf_main
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
prompt = """ prompt = """

View File

@ -5,17 +5,17 @@ import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.main.截取pdf import truncate_pdf_multiple from flask_app.main.截取pdf import truncate_pdf_multiple
from flask_app.main.table_content_extraction import extract_tables_main from flask_app.main.table_content_extraction import extract_tables_main
from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge from flask_app.old_version.文档理解大模型版知识库处理.知识库操作 import addfileToKnowledge, deleteKnowledge
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
from flask_app.main.json_utils import transform_json_values, combine_json_results from flask_app.general.json_utils import transform_json_values
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
import concurrent.futures import concurrent.futures
from flask_app.main.基础信息整合 import combine_basic_info from flask_app.old_version.基础信息整合 import combine_basic_info
from flask_app.main.资格审查模块old import combine_review_standards from flask_app.old_version.资格审查模块old import combine_review_standards
from flask_app.main.商务标技术标整合 import combine_evaluation_standards from flask_app.main.商务标技术标整合 import combine_evaluation_standards
from flask_app.main.format_change import pdf2docx, docx2pdf from flask_app.general.format_change import pdf2docx, docx2pdf
from flask_app.main.docx截取docx import copy_docx from flask_app.general.docx截取docx import copy_docx
def get_global_logger(unique_id): def get_global_logger(unique_id):
if unique_id is None: if unique_id is None:

View File

@ -3,7 +3,7 @@ import uuid
from llama_index.readers.dashscope.base import DashScopeParse from llama_index.readers.dashscope.base import DashScopeParse
from llama_index.readers.dashscope.utils import ResultType from llama_index.readers.dashscope.utils import ResultType
from llama_index.indices.managed.dashscope import DashScopeCloudIndex from llama_index.indices.managed.dashscope import DashScopeCloudIndex
from flask_app.main.删除知识库 import delete_index, create_client from flask_app.old_version.文档理解大模型版知识库处理.删除知识库 import delete_index, create_client
def addfileToKnowledge(filepath,knowledge_name): def addfileToKnowledge(filepath,knowledge_name):
@ -26,7 +26,7 @@ def deleteKnowledge(index):
workspace_id = os.environ.get('DASHSCOPE_WORKSPACE_ID') workspace_id = os.environ.get('DASHSCOPE_WORKSPACE_ID')
client = create_client() client = create_client()
delete_index(client,workspace_id,index_id) delete_index(client,workspace_id,index_id)
print("knowledge deleted successfully!!!") print("knowledge old_version successfully!!!")
@ -42,7 +42,7 @@ def deleteFileFromKnowledge(index, documents):
file_id = getattr(document, 'id_', None) # 使用 getattr 防止属性不存在时抛出异常 file_id = getattr(document, 'id_', None) # 使用 getattr 防止属性不存在时抛出异常
if file_id: if file_id:
file_ids.append(file_id) # 将 id 添加到列表中 file_ids.append(file_id) # 将 id 添加到列表中
print("deleted successfully") print("old_version successfully")
index.delete_ref_doc(file_ids) index.delete_ref_doc(file_ids)

View File

@ -21,7 +21,7 @@ def submit_file():
client = docmind_api20220711Client(config) client = docmind_api20220711Client(config)
request = docmind_api20220711_models.SubmitDocParserJobAdvanceRequest( request = docmind_api20220711_models.SubmitDocParserJobAdvanceRequest(
# file_url_object : 本地文件流 # file_url_object : 本地文件流
file_url_object=open("./zbtest4.pdf", "rb"), file_url_object=open("zbtest4.pdf", "rb"),
# file_name :文件名称。名称必须包含文件类型 # file_name :文件名称。名称必须包含文件类型
file_name='zbtest4.pdf' file_name='zbtest4.pdf'
# file_name_extension : 文件后缀格式。与文件名二选一 # file_name_extension : 文件后缀格式。与文件名二选一

View File

@ -1,10 +1,10 @@
import os import os
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json from flask_app.general.json_utils import extract_content_from_json
from flask_app.main.形式响应评审old import process_reviews from flask_app.old_version.形式响应评审old import process_reviews
from flask_app.main.资格评审old import process_qualification from flask_app.old_version.资格评审old import process_qualification
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor

View File

@ -2,9 +2,9 @@
# 资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的加入matching_keys列表否则保留原字典 # 资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的加入matching_keys列表否则保留原字典
import json import json
import re import re
from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json, nest_json_under_key from flask_app.general.json_utils import clean_json_string, combine_json_results, add_keys_to_json
from flask_app.main.多线程提问 import multi_threading, read_questions_from_file from flask_app.general.多线程提问 import multi_threading, read_questions_from_file
from flask_app.main.通义千问long import upload_file from flask_app.general.通义千问long import upload_file
def merge_dictionaries_under_common_key(dicts, common_key): def merge_dictionaries_under_common_key(dicts, common_key):

View File

@ -1,9 +1,9 @@
import json import json
import re import re
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
from flask_app.main.json_utils import clean_json_string, combine_json_results from flask_app.general.json_utils import combine_json_results
from flask_app.main.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.main.通义千问long import qianwen_long, upload_file from flask_app.general.通义千问long import upload_file
from flask_app.货物标.货物标截取pdf import extract_common_header, clean_page_content from flask_app.货物标.货物标截取pdf import extract_common_header, clean_page_content

View File

@ -3,10 +3,10 @@ import json
import threading import threading
import time import time
from flask_app.main.json_utils import clean_json_string from flask_app.general.json_utils import clean_json_string
from flask_app.main.基础信息整合 import judge_consortium_bidding from flask_app.general.通用功能函数 import judge_consortium_bidding
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file
from flask_app.main.判断是否分包等 import merge_json_to_list, read_questions_from_judge from flask_app.main.判断是否分包等 import merge_json_to_list, read_questions_from_judge
from flask_app.货物标.提取采购需求main import fetch_procurement_reqs from flask_app.货物标.提取采购需求main import fetch_procurement_reqs

View File

@ -2,9 +2,9 @@
import json import json
import os import os
from flask_app.main.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.main.通义千问long import qianwen_long, upload_file from flask_app.general.通义千问long import qianwen_long, upload_file
from flask_app.main.json_utils import clean_json_string, combine_json_results from flask_app.general.json_utils import clean_json_string, combine_json_results
from flask_app.货物标.货物标截取pdf import truncate_pdf_main from flask_app.货物标.货物标截取pdf import truncate_pdf_main
def generate_key_paths(data, parent_key=''): def generate_key_paths(data, parent_key=''):

View File

@ -1,11 +1,11 @@
import concurrent.futures import concurrent.futures
import json import json
import time import time
from flask_app.货物标.技术要求提取 import get_technical_requirements from flask_app.货物标.技术要求提取 import get_technical_requirements
from flask_app.main.通义千问long import upload_file from flask_app.general.通义千问long import upload_file
from flask_app.货物标.商务服务其他要求提取 import get_business_requirements from flask_app.货物标.商务服务其他要求提取 import get_business_requirements
from flask_app.main.json_utils import nest_json_under_key
#获取采购清单 #获取采购清单
def fetch_procurement_reqs(truncate_file): def fetch_procurement_reqs(truncate_file):
# 定义默认的 procurement_reqs 字典 # 定义默认的 procurement_reqs 字典

View File

@ -3,9 +3,9 @@ import json
import os.path import os.path
import time import time
import re import re
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.main.禁止投标情形 import find_forbidden, process_string_list from flask_app.general.通用功能函数 import process_string_list
from docx import Document from docx import Document
#如果当前段落有序号,则向下匹配直接遇到相同的序号样式 #如果当前段落有序号,则向下匹配直接遇到相同的序号样式
@ -482,6 +482,7 @@ def combine_find_invalid(file_path, output_dir):
# return nest_json_under_key(combined_dict, "无效标与废标项") # return nest_json_under_key(combined_dict, "无效标与废标项")
return {"无效标与废标项":combined_dict} return {"无效标与废标项":combined_dict}
#TODO:无效投标更多项目
if __name__ == '__main__': if __name__ == '__main__':
start_time = time.time() start_time = time.time()
# truncate_json_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output4\\tmp2\\竞争性谈判文件(3)_tobidders_notice_part1\\truncate_output.json" # truncate_json_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output4\\tmp2\\竞争性谈判文件(3)_tobidders_notice_part1\\truncate_output.json"

View File

@ -2,7 +2,7 @@
import json import json
import re import re
from collections import defaultdict from collections import defaultdict
from flask_app.main.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long
def combine_technical_and_business(data, target_values): def combine_technical_and_business(data, target_values):

View File

@ -1,8 +1,8 @@
from PyPDF2 import PdfReader, PdfWriter from PyPDF2 import PdfReader, PdfWriter
import re # 导入正则表达式库 import re # 导入正则表达式库
import os # 用于文件和文件夹操作 import os # 用于文件和文件夹操作
from flask_app.main.format_change import docx2pdf from flask_app.general.format_change import docx2pdf
from flask_app.main.merge_pdfs import merge_and_cleanup,merge_pdfs from flask_app.general.merge_pdfs import merge_and_cleanup,merge_pdfs
def clean_page_content(text, common_header): def clean_page_content(text, common_header):
# 首先删除抬头公共部分 # 首先删除抬头公共部分

View File

@ -1,9 +1,8 @@
# 竞磋 竞谈 磋商 询价 邀请 单一来源 # 竞磋 竞谈 磋商 询价 邀请 单一来源
import json import json
import time
from flask_app.main.format_change import docx2pdf, pdf2docx from flask_app.general.format_change import docx2pdf, pdf2docx
from flask_app.main.json_utils import transform_json_values from flask_app.general.json_utils import transform_json_values
from flask_app.货物标.基础信息解析main import combine_basic_info from flask_app.货物标.基础信息解析main import combine_basic_info
from flask_app.货物标.投标人须知正文提取指定内容货物标版 import extract_from_notice from flask_app.货物标.投标人须知正文提取指定内容货物标版 import extract_from_notice
from flask_app.货物标.货物标截取pdf import truncate_pdf_multiple from flask_app.货物标.货物标截取pdf import truncate_pdf_multiple

View File

@ -1,10 +1,9 @@
# -*- encoding:utf-8 -*- # -*- encoding:utf-8 -*-
import copy
import json import json
import re import re
from flask_app.main.通义千问long import qianwen_long, upload_file from flask_app.general.通义千问long import upload_file
from flask_app.main.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.main.json_utils import extract_content_from_json, clean_json_string from flask_app.general.json_utils import clean_json_string
from flask_app.货物标.投标人须知正文条款提取成json文件货物标版 import convert_clause_to_json from flask_app.货物标.投标人须知正文条款提取成json文件货物标版 import convert_clause_to_json
# 这个字典可能有嵌套,你需要遍历里面的键名,对键名作判断,而不是键值,具体是这样的:如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除重新组织成一个字典格式的数据你可以考虑用字符串列表来保持部分平级的数据 # 这个字典可能有嵌套,你需要遍历里面的键名,对键名作判断,而不是键值,具体是这样的:如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除重新组织成一个字典格式的数据你可以考虑用字符串列表来保持部分平级的数据