10.22代码结构优化
This commit is contained in:
commit
f31aee3812
@ -4,17 +4,15 @@ import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
from flask_app.main.format_change import docx2pdf, pdf2docx
|
||||
from flask_app.main.json_utils import clean_json_string
|
||||
from flask_app.main.判断是否分包等 import merge_json_to_list, read_questions_from_judge
|
||||
from flask_app.main.基础信息整合 import judge_consortium_bidding
|
||||
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.main.通义千问long import upload_file
|
||||
from flask_app.general.format_change import docx2pdf
|
||||
from flask_app.general.json_utils import clean_json_string
|
||||
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.货物标.基础信息解析main import aggregate_basic_info_goods
|
||||
from flask_app.货物标.货物标截取pdf import truncate_pdf_specific_goods
|
||||
from flask_app.main.截取pdf import truncate_pdf_specific_engineering
|
||||
from flask_app.main.post_processing import inner_post_processing
|
||||
from flask_app.main.基础信息整合 import aggregate_basic_info_engineering
|
||||
from flask_app.general.post_processing import inner_post_processing
|
||||
from flask_app.old_version.基础信息整合 import aggregate_basic_info_engineering
|
||||
|
||||
def get_global_logger(unique_id):
|
||||
if unique_id is None:
|
||||
|
@ -9,7 +9,8 @@ import time
|
||||
import requests
|
||||
from dashscope import Assistants, Messages, Runs, Threads
|
||||
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
||||
from flask_app.main.通义千问long import qianwen_long, upload_file
|
||||
from flask_app.general.通义千问long import qianwen_long
|
||||
|
||||
prompt = """
|
||||
# 角色
|
||||
你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。
|
47
flask_app/general/通用功能函数.py
Normal file
47
flask_app/general/通用功能函数.py
Normal file
@ -0,0 +1,47 @@
|
||||
import ast
|
||||
import re
|
||||
|
||||
|
||||
def judge_consortium_bidding(baseinfo_list):
|
||||
updated_list = []
|
||||
accept_bidding = False
|
||||
for baseinfo in baseinfo_list:
|
||||
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
|
||||
if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "是":
|
||||
accept_bidding = True
|
||||
# 从字典中移除特定键值对
|
||||
baseinfo.pop("是否接受联合体投标", None)
|
||||
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
|
||||
# updated_info = json.dumps(json_data)
|
||||
updated_list.append(baseinfo)
|
||||
# 更新原始列表,如果你想保留修改
|
||||
baseinfo_list[:] = updated_list
|
||||
return accept_bidding
|
||||
|
||||
def process_string_list(string_list):
|
||||
# 使用正则表达式匹配方括号内的内容
|
||||
match = re.search(r'\[(.*?)\]', string_list)
|
||||
if match:
|
||||
# 获取匹配的内容,即方括号内的部分
|
||||
content_inside_brackets = match.group(1)
|
||||
if content_inside_brackets: # 检查内容是否为空
|
||||
# 检查内容是否是数字列表
|
||||
if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
|
||||
# 如果是数字,不用加引号,直接保留数字
|
||||
formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
# 如果不全是数字,按字符串处理
|
||||
formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
return [] # 直接返回空列表如果内容为空
|
||||
|
||||
# 使用 ast.literal_eval 来解析格式化后的字符串
|
||||
try:
|
||||
actual_list = ast.literal_eval(formatted_list)
|
||||
return actual_list
|
||||
except SyntaxError as e:
|
||||
print(f"禁止投标情形: Error parsing list: {e}")
|
||||
return []
|
||||
else:
|
||||
# 如果没有匹配到内容,返回空列表
|
||||
return []
|
@ -1,6 +0,0 @@
|
||||
from ..main.通义千问long import qianwen_long,upload_file
|
||||
|
||||
|
||||
def read_dictory(file_path):
|
||||
file_id=upload_file(file_path)
|
||||
user_query="根据该文档中的评标办法前附表,请你列出该文件的技术标以及它对应的具体评分要求,若对应内容中存在其他信息,在嵌套键如'技术标'中新增键名'备注'存放该信息。如果评分内容不是这3个,则返回文档中给定的评分内容以及它的评分要求,都以json的格式返回结果。请不要回答有关形式、资格、响应性评审标准的内容"
|
@ -1,58 +0,0 @@
|
||||
import json
|
||||
|
||||
|
||||
def find_keys_by_value(target_value, json_data):
|
||||
matched_keys = [k for k, v in json_data.items() if v == target_value]
|
||||
if not matched_keys:
|
||||
matched_keys = [k for k, v in json_data.items() if isinstance(v, str) and v.startswith(target_value)]
|
||||
return matched_keys
|
||||
|
||||
|
||||
def find_keys_with_prefix(key_prefix, json_data):
|
||||
subheadings = [k for k in json_data if k.startswith(key_prefix) and k != key_prefix]
|
||||
return subheadings
|
||||
|
||||
|
||||
def extract_json(data, target_values):
|
||||
results = {}
|
||||
for target_value in target_values:
|
||||
matched_keys = find_keys_by_value(target_value, data)
|
||||
for key in matched_keys:
|
||||
key_and_subheadings = {key: data[key]}
|
||||
subheadings = find_keys_with_prefix(key, data)
|
||||
for subkey in subheadings:
|
||||
key_and_subheadings[subkey] = data[subkey]
|
||||
results[target_value] = key_and_subheadings
|
||||
return results
|
||||
|
||||
|
||||
def renumber_keys(data, level=1):
|
||||
if isinstance(data, dict):
|
||||
new_dict = {}
|
||||
for key in data:
|
||||
parts = key.split('.')
|
||||
parts[0] = '1'
|
||||
new_key = '.'.join(parts)
|
||||
new_dict[new_key] = renumber_keys(data[key], level + 1)
|
||||
return new_dict
|
||||
else:
|
||||
return data
|
||||
|
||||
|
||||
def json_results(extr_json):
|
||||
renumbered_data = {}
|
||||
for key in extr_json:
|
||||
renumbered_data[key] = renumber_keys(extr_json[key])
|
||||
return renumbered_data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
target_values = ["投标文件"]
|
||||
with open('clause3.json', 'r', encoding='utf-8') as file:
|
||||
data = json.load(file)
|
||||
extracted_data = extract_json(data, target_values)
|
||||
renumbered_data = json_results(extracted_data)
|
||||
|
||||
with open('output_results1.json', 'w', encoding='utf-8') as file:
|
||||
json.dump(renumbered_data, file, indent=4, ensure_ascii=False)
|
||||
print("JSON文件已按要求重新编号并保存.")
|
@ -1,28 +0,0 @@
|
||||
import json
|
||||
|
||||
def search_key_in_json(file_path, search_key):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
data = json.load(file)
|
||||
|
||||
# 递归函数查找键
|
||||
def recursive_search(data, key):
|
||||
if key in data:
|
||||
return key, data[key]
|
||||
for k, v in data.items():
|
||||
if isinstance(v, dict):
|
||||
result = recursive_search(v, key)
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
|
||||
result = recursive_search(data, search_key)
|
||||
if result:
|
||||
return f"{result[0]} : {result[1]}"
|
||||
else:
|
||||
return f"{search_key} : /"
|
||||
|
||||
|
||||
# 用法示例
|
||||
file_path = 'C:/Users/Administrator/Downloads/truncate_output2.json' # 替换为你的 JSON 文件路径
|
||||
search_key = '多标段投标' # 替换为你想搜索的键
|
||||
print(search_key_in_json(file_path, search_key))
|
@ -6,10 +6,9 @@ from datetime import datetime, timedelta
|
||||
from flask import Flask, request, jsonify, Response, stream_with_context, g
|
||||
import json
|
||||
import os
|
||||
|
||||
from flask_app.general.little_zbparse import little_parse_main
|
||||
from flask_app.main.download import download_file
|
||||
from flask_app.main.post_processing import outer_post_processing
|
||||
from flask_app.general.post_processing import outer_post_processing
|
||||
from flask_app.main.工程标解析main import engineering_bid_main
|
||||
from flask_app.货物标.货物标解析main import goods_bid_main
|
||||
from flask_app.货物标.技术要求提取 import get_technical_requirements_main
|
||||
|
@ -2,10 +2,10 @@
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
from flask_app.main.json_utils import extract_content_from_json # 可以选择性地导入特定的函数
|
||||
from flask_app.general.json_utils import extract_content_from_json # 可以选择性地导入特定的函数
|
||||
from flask_app.main.提取打勾符号 import read_pdf_and_judge_main
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.main.通义千问long import qianwen_long,upload_file
|
||||
from flask_app.general.多线程提问 import multi_threading
|
||||
from flask_app.general.通义千问long import qianwen_long,upload_file
|
||||
#调用qianwen-ask之后,组织提示词问百炼。
|
||||
|
||||
def construct_judge_questions(json_data):
|
||||
|
@ -1,8 +1,8 @@
|
||||
# -*- encoding:utf-8 -*-
|
||||
import json
|
||||
|
||||
from flask_app.main.json_utils import clean_json_string
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.json_utils import clean_json_string
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
|
||||
# def combine_technical_and_business(data, target_values1, target_values2):
|
||||
# extracted_data = {} # 根级别存储所有数据
|
||||
|
@ -1,13 +1,13 @@
|
||||
import copy
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
import concurrent.futures
|
||||
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key
|
||||
from flask_app.general.json_utils import clean_json_string, rename_outer_key
|
||||
from flask_app.general.通用功能函数 import judge_consortium_bidding
|
||||
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge, merge_json_to_list
|
||||
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.main.通义千问long import upload_file
|
||||
from flask_app.main.判断是否分包等 import read_questions_from_judge, merge_json_to_list
|
||||
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
|
||||
|
||||
def aggregate_basic_info_engineering(baseinfo_list):
|
||||
@ -98,22 +98,6 @@ def dynamic_key_handling(key_groups, detected_keys):
|
||||
key_groups["其他信息"].append(key)
|
||||
|
||||
|
||||
def judge_consortium_bidding(baseinfo_list):
|
||||
updated_list = []
|
||||
accept_bidding = False
|
||||
for baseinfo in baseinfo_list:
|
||||
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
|
||||
if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "是":
|
||||
accept_bidding = True
|
||||
# 从字典中移除特定键值对
|
||||
baseinfo.pop("是否接受联合体投标", None)
|
||||
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
|
||||
# updated_info = json.dumps(json_data)
|
||||
updated_list.append(baseinfo)
|
||||
# 更新原始列表,如果你想保留修改
|
||||
baseinfo_list[:] = updated_list
|
||||
return accept_bidding
|
||||
|
||||
def update_baseinfo_lists(baseinfo_list1, baseinfo_list2):
|
||||
# 创建一个字典,用于存储 baseinfo_list1 中的所有键值对
|
||||
combined_dict = {}
|
||||
|
@ -6,15 +6,15 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
from flask_app.main.截取pdf import truncate_pdf_multiple
|
||||
from flask_app.main.table_content_extraction import extract_tables_main
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from flask_app.main.json_utils import transform_json_values, combine_json_results
|
||||
from flask_app.general.json_utils import transform_json_values
|
||||
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
|
||||
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||
import concurrent.futures
|
||||
from flask_app.main.基础信息整合快速版 import combine_basic_info
|
||||
from flask_app.main.资格审查模块 import combine_review_standards
|
||||
from flask_app.main.商务标技术标整合 import combine_evaluation_standards
|
||||
from flask_app.main.format_change import pdf2docx, docx2pdf
|
||||
from flask_app.main.docx截取docx import copy_docx
|
||||
from flask_app.general.format_change import pdf2docx, docx2pdf
|
||||
from flask_app.general.docx截取docx import copy_docx
|
||||
|
||||
def get_global_logger(unique_id):
|
||||
if unique_id is None:
|
||||
|
@ -4,12 +4,11 @@ import re
|
||||
import json
|
||||
import time
|
||||
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.general.多线程提问 import multi_threading
|
||||
from flask_app.main.根据条款号整合json import process_and_merge_entries,process_and_merge2
|
||||
from flask_app.main.json_utils import clean_json_string
|
||||
from flask_app.main.截取pdf import truncate_pdf_main
|
||||
from flask_app.general.json_utils import clean_json_string
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from flask_app.main.多线程提问 import upload_file
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.main.截取pdf import merge_pdfs
|
||||
prompt = """
|
||||
# 角色
|
||||
|
@ -1,7 +1,7 @@
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
import re # 导入正则表达式库
|
||||
import os # 用于文件和文件夹操作
|
||||
from flask_app.main.merge_pdfs import merge_pdfs
|
||||
from flask_app.general.merge_pdfs import merge_pdfs
|
||||
def clean_page_content(text, common_header):
|
||||
# 首先删除抬头公共部分
|
||||
if common_header: # 确保有公共抬头才进行替换
|
||||
|
@ -3,8 +3,7 @@ import json
|
||||
import os.path
|
||||
import time
|
||||
import re
|
||||
from flask_app.main.json_utils import combine_json_results, nest_json_under_key
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from flask_app.main.禁止投标情形 import find_forbidden, process_string_list
|
||||
|
||||
|
@ -1,11 +1,12 @@
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from PyPDF2 import PdfWriter, PdfReader
|
||||
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.通用功能函数 import process_string_list
|
||||
|
||||
|
||||
def extract_and_format_from_paths(json_paths, includes, excludes):
|
||||
"""
|
||||
@ -130,33 +131,6 @@ def merge_pdfs(paths, output_filename):
|
||||
print("禁止投标情形: No files to merge.")
|
||||
return output_path
|
||||
|
||||
def process_string_list(string_list):
|
||||
# 使用正则表达式匹配方括号内的内容
|
||||
match = re.search(r'\[(.*?)\]', string_list)
|
||||
if match:
|
||||
# 获取匹配的内容,即方括号内的部分
|
||||
content_inside_brackets = match.group(1)
|
||||
if content_inside_brackets: # 检查内容是否为空
|
||||
# 检查内容是否是数字列表
|
||||
if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
|
||||
# 如果是数字,不用加引号,直接保留数字
|
||||
formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
# 如果不全是数字,按字符串处理
|
||||
formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
return [] # 直接返回空列表如果内容为空
|
||||
|
||||
# 使用 ast.literal_eval 来解析格式化后的字符串
|
||||
try:
|
||||
actual_list = ast.literal_eval(formatted_list)
|
||||
return actual_list
|
||||
except SyntaxError as e:
|
||||
print(f"禁止投标情形: Error parsing list: {e}")
|
||||
return []
|
||||
else:
|
||||
# 如果没有匹配到内容,返回空列表
|
||||
return []
|
||||
def find_forbidden(truncate_json_path,clause_path,truncate3=""): #投标人须知前附表 条款 评分前附表和资格审查表中
|
||||
# output_filename="merged.pdf"
|
||||
# paths=[truncate1,truncate4]
|
||||
|
@ -1,12 +1,10 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json
|
||||
from flask_app.general.json_utils import extract_content_from_json
|
||||
from flask_app.main.形式响应评审 import process_reviews
|
||||
from flask_app.main.资格评审 import process_qualification
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
||||
|
@ -1,12 +1,10 @@
|
||||
# -*- encoding:utf-8 -*-
|
||||
# 资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的,加入matching_keys列表,否则保留原字典
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json, nest_json_under_key
|
||||
from flask_app.main.多线程提问 import multi_threading, read_questions_from_file
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.main.截取pdf import truncate_pdf_main
|
||||
from flask_app.general.json_utils import clean_json_string, combine_json_results, add_keys_to_json
|
||||
from flask_app.general.多线程提问 import multi_threading, read_questions_from_file
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
# 这个函数的主要用途是将多个相关的字典(都包含 'common_key' 键)合并成一个更大的、综合的字典,所有相关信息都集中在 'common_key' 键下
|
||||
|
@ -4,7 +4,7 @@ import queue
|
||||
import concurrent.futures
|
||||
from dashscope import Assistants, Messages, Runs, Threads
|
||||
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
||||
from json_utils import extract_content_from_json
|
||||
from flask_app.general.json_utils import extract_content_from_json
|
||||
prompt = """
|
||||
# 角色
|
||||
你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。
|
@ -1,11 +1,11 @@
|
||||
import json
|
||||
|
||||
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key
|
||||
from flask_app.general.json_utils import clean_json_string, rename_outer_key
|
||||
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
|
||||
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.main.通义千问long import upload_file
|
||||
|
||||
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.general.通用功能函数 import judge_consortium_bidding
|
||||
|
||||
def aggregate_basic_info_engineering(baseinfo_list):
|
||||
"""
|
||||
@ -94,22 +94,6 @@ def dynamic_key_handling(key_groups, detected_keys):
|
||||
elif "偏离" in key:
|
||||
key_groups["其他信息"].append(key)
|
||||
|
||||
|
||||
def judge_consortium_bidding(baseinfo_list):
|
||||
updated_list = []
|
||||
accept_bidding = False
|
||||
for baseinfo in baseinfo_list:
|
||||
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
|
||||
if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "是":
|
||||
accept_bidding = True
|
||||
# 从字典中移除特定键值对
|
||||
baseinfo.pop("是否接受联合体投标", None)
|
||||
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
|
||||
# updated_info = json.dumps(json_data)
|
||||
updated_list.append(baseinfo)
|
||||
# 更新原始列表,如果你想保留修改
|
||||
baseinfo_list[:] = updated_list
|
||||
return accept_bidding
|
||||
def combine_basic_info(knowledge_name, truncate0, output_folder, clause_path):
|
||||
"""
|
||||
综合和处理基础信息,生成最终的基础信息字典。
|
@ -3,9 +3,9 @@ import re
|
||||
import json
|
||||
import time
|
||||
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.general.多线程提问 import multi_threading
|
||||
from flask_app.main.根据条款号整合json import process_and_merge_entries,process_and_merge2
|
||||
from flask_app.main.json_utils import extract_content_from_json
|
||||
from flask_app.general.json_utils import extract_content_from_json
|
||||
from flask_app.main.截取pdf import truncate_pdf_main
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
prompt = """
|
@ -5,17 +5,17 @@ import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from flask_app.main.截取pdf import truncate_pdf_multiple
|
||||
from flask_app.main.table_content_extraction import extract_tables_main
|
||||
from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge
|
||||
from flask_app.old_version.文档理解大模型版知识库处理.知识库操作 import addfileToKnowledge, deleteKnowledge
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from flask_app.main.json_utils import transform_json_values, combine_json_results
|
||||
from flask_app.general.json_utils import transform_json_values
|
||||
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
|
||||
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||
import concurrent.futures
|
||||
from flask_app.main.基础信息整合 import combine_basic_info
|
||||
from flask_app.main.资格审查模块old import combine_review_standards
|
||||
from flask_app.old_version.基础信息整合 import combine_basic_info
|
||||
from flask_app.old_version.资格审查模块old import combine_review_standards
|
||||
from flask_app.main.商务标技术标整合 import combine_evaluation_standards
|
||||
from flask_app.main.format_change import pdf2docx, docx2pdf
|
||||
from flask_app.main.docx截取docx import copy_docx
|
||||
from flask_app.general.format_change import pdf2docx, docx2pdf
|
||||
from flask_app.general.docx截取docx import copy_docx
|
||||
|
||||
def get_global_logger(unique_id):
|
||||
if unique_id is None:
|
@ -3,7 +3,7 @@ import uuid
|
||||
from llama_index.readers.dashscope.base import DashScopeParse
|
||||
from llama_index.readers.dashscope.utils import ResultType
|
||||
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
|
||||
from flask_app.main.删除知识库 import delete_index, create_client
|
||||
from flask_app.old_version.文档理解大模型版知识库处理.删除知识库 import delete_index, create_client
|
||||
|
||||
|
||||
def addfileToKnowledge(filepath,knowledge_name):
|
||||
@ -26,7 +26,7 @@ def deleteKnowledge(index):
|
||||
workspace_id = os.environ.get('DASHSCOPE_WORKSPACE_ID')
|
||||
client = create_client()
|
||||
delete_index(client,workspace_id,index_id)
|
||||
print("knowledge deleted successfully!!!")
|
||||
print("knowledge old_version successfully!!!")
|
||||
|
||||
|
||||
|
||||
@ -42,7 +42,7 @@ def deleteFileFromKnowledge(index, documents):
|
||||
file_id = getattr(document, 'id_', None) # 使用 getattr 防止属性不存在时抛出异常
|
||||
if file_id:
|
||||
file_ids.append(file_id) # 将 id 添加到列表中
|
||||
print("deleted successfully")
|
||||
print("old_version successfully")
|
||||
index.delete_ref_doc(file_ids)
|
||||
|
||||
|
@ -21,7 +21,7 @@ def submit_file():
|
||||
client = docmind_api20220711Client(config)
|
||||
request = docmind_api20220711_models.SubmitDocParserJobAdvanceRequest(
|
||||
# file_url_object : 本地文件流
|
||||
file_url_object=open("./zbtest4.pdf", "rb"),
|
||||
file_url_object=open("zbtest4.pdf", "rb"),
|
||||
# file_name :文件名称。名称必须包含文件类型
|
||||
file_name='zbtest4.pdf'
|
||||
# file_name_extension : 文件后缀格式。与文件名二选一
|
@ -1,10 +1,10 @@
|
||||
import os
|
||||
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json
|
||||
from flask_app.main.形式响应评审old import process_reviews
|
||||
from flask_app.main.资格评审old import process_qualification
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.json_utils import extract_content_from_json
|
||||
from flask_app.old_version.形式响应评审old import process_reviews
|
||||
from flask_app.old_version.资格评审old import process_qualification
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
@ -2,9 +2,9 @@
|
||||
# 资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的,加入matching_keys列表,否则保留原字典
|
||||
import json
|
||||
import re
|
||||
from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json, nest_json_under_key
|
||||
from flask_app.main.多线程提问 import multi_threading, read_questions_from_file
|
||||
from flask_app.main.通义千问long import upload_file
|
||||
from flask_app.general.json_utils import clean_json_string, combine_json_results, add_keys_to_json
|
||||
from flask_app.general.多线程提问 import multi_threading, read_questions_from_file
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
|
||||
|
||||
def merge_dictionaries_under_common_key(dicts, common_key):
|
@ -1,4 +1,4 @@
|
||||
from flask_app.main.读取文件.按页读取pdf import extract_text_by_page
|
||||
from flask_app.general.读取文件.按页读取pdf import extract_text_by_page
|
||||
|
||||
def check_strings_in_pdf(file_path):
|
||||
judge_list=['施工机械设备', '企业信息登记']
|
0
flask_app/old_version/转化格式/__init__.py
Normal file
0
flask_app/old_version/转化格式/__init__.py
Normal file
0
flask_app/old_version/转化格式/pydocx_p2d.py
Normal file
0
flask_app/old_version/转化格式/pydocx_p2d.py
Normal file
@ -1,9 +1,9 @@
|
||||
import json
|
||||
import re
|
||||
from PyPDF2 import PdfReader
|
||||
from flask_app.main.json_utils import clean_json_string, combine_json_results
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.main.通义千问long import qianwen_long, upload_file
|
||||
from flask_app.general.json_utils import combine_json_results
|
||||
from flask_app.general.多线程提问 import multi_threading
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.货物标.货物标截取pdf import extract_common_header, clean_page_content
|
||||
|
||||
|
||||
|
@ -3,10 +3,10 @@ import json
|
||||
import threading
|
||||
import time
|
||||
|
||||
from flask_app.main.json_utils import clean_json_string
|
||||
from flask_app.main.基础信息整合 import judge_consortium_bidding
|
||||
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.json_utils import clean_json_string
|
||||
from flask_app.general.通用功能函数 import judge_consortium_bidding
|
||||
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.main.判断是否分包等 import merge_json_to_list, read_questions_from_judge
|
||||
from flask_app.货物标.提取采购需求main import fetch_procurement_reqs
|
||||
|
||||
|
@ -2,9 +2,9 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.main.通义千问long import qianwen_long, upload_file
|
||||
from flask_app.main.json_utils import clean_json_string, combine_json_results
|
||||
from flask_app.general.多线程提问 import multi_threading
|
||||
from flask_app.general.通义千问long import qianwen_long, upload_file
|
||||
from flask_app.general.json_utils import clean_json_string, combine_json_results
|
||||
from flask_app.货物标.货物标截取pdf import truncate_pdf_main
|
||||
|
||||
def generate_key_paths(data, parent_key=''):
|
||||
|
@ -1,11 +1,11 @@
|
||||
import concurrent.futures
|
||||
import json
|
||||
import time
|
||||
|
||||
from flask_app.货物标.技术要求提取 import get_technical_requirements
|
||||
from flask_app.main.通义千问long import upload_file
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.货物标.商务服务其他要求提取 import get_business_requirements
|
||||
from flask_app.main.json_utils import nest_json_under_key
|
||||
|
||||
|
||||
#获取采购清单
|
||||
def fetch_procurement_reqs(truncate_file):
|
||||
# 定义默认的 procurement_reqs 字典
|
||||
|
@ -3,9 +3,9 @@ import json
|
||||
import os.path
|
||||
import time
|
||||
import re
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from flask_app.main.禁止投标情形 import find_forbidden, process_string_list
|
||||
from flask_app.general.通用功能函数 import process_string_list
|
||||
from docx import Document
|
||||
|
||||
#如果当前段落有序号,则向下匹配直接遇到相同的序号样式
|
||||
@ -482,6 +482,7 @@ def combine_find_invalid(file_path, output_dir):
|
||||
# return nest_json_under_key(combined_dict, "无效标与废标项")
|
||||
return {"无效标与废标项":combined_dict}
|
||||
|
||||
#TODO:无效投标更多项目
|
||||
if __name__ == '__main__':
|
||||
start_time = time.time()
|
||||
# truncate_json_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output4\\tmp2\\竞争性谈判文件(3)_tobidders_notice_part1\\truncate_output.json"
|
||||
|
@ -2,7 +2,7 @@
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
def combine_technical_and_business(data, target_values):
|
||||
|
@ -1,8 +1,8 @@
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
import re # 导入正则表达式库
|
||||
import os # 用于文件和文件夹操作
|
||||
from flask_app.main.format_change import docx2pdf
|
||||
from flask_app.main.merge_pdfs import merge_and_cleanup,merge_pdfs
|
||||
from flask_app.general.format_change import docx2pdf
|
||||
from flask_app.general.merge_pdfs import merge_and_cleanup,merge_pdfs
|
||||
|
||||
def clean_page_content(text, common_header):
|
||||
# 首先删除抬头公共部分
|
||||
|
@ -1,9 +1,8 @@
|
||||
# 竞磋 竞谈 磋商 询价 邀请 单一来源
|
||||
import json
|
||||
import time
|
||||
|
||||
from flask_app.main.format_change import docx2pdf, pdf2docx
|
||||
from flask_app.main.json_utils import transform_json_values
|
||||
from flask_app.general.format_change import docx2pdf, pdf2docx
|
||||
from flask_app.general.json_utils import transform_json_values
|
||||
from flask_app.货物标.基础信息解析main import combine_basic_info
|
||||
from flask_app.货物标.投标人须知正文提取指定内容货物标版 import extract_from_notice
|
||||
from flask_app.货物标.货物标截取pdf import truncate_pdf_multiple
|
||||
|
@ -1,10 +1,9 @@
|
||||
# -*- encoding:utf-8 -*-
|
||||
import copy
|
||||
import json
|
||||
import re
|
||||
from flask_app.main.通义千问long import qianwen_long, upload_file
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.main.json_utils import extract_content_from_json, clean_json_string
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.general.多线程提问 import multi_threading
|
||||
from flask_app.general.json_utils import clean_json_string
|
||||
from flask_app.货物标.投标人须知正文条款提取成json文件货物标版 import convert_clause_to_json
|
||||
|
||||
# 这个字典可能有嵌套,你需要遍历里面的键名,对键名作判断,而不是键值,具体是这样的:如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除,重新组织成一个字典格式的数据,你可以考虑用字符串列表来保持部分平级的数据
|
||||
|
Loading…
x
Reference in New Issue
Block a user