8.29
This commit is contained in:
parent
6463a9e593
commit
a72fe30e4f
@ -6,9 +6,6 @@ WORKDIR /ZbparseProjects
|
|||||||
|
|
||||||
RUN pip config set global.progress_bar off
|
RUN pip config set global.progress_bar off
|
||||||
|
|
||||||
# 复制 requirements.txt 并安装依赖,确保每次构建都可以使用缓存(除非 requirements.txt 改变)
|
|
||||||
COPY ../../requirements.txt .
|
|
||||||
|
|
||||||
# 安装依赖
|
# 安装依赖
|
||||||
RUN pip install --upgrade pip --default-timeout=100 \
|
RUN pip install --upgrade pip --default-timeout=100 \
|
||||||
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
|
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
|
||||||
@ -26,4 +23,4 @@ ENV ALIBABA_CLOUD_ACCESS_KEY_SECRET=88oyw7LniqV8i0SnOuSFS5lprfrPtw
|
|||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
# 在容器启动时运行你的应用
|
# 在容器启动时运行你的应用
|
||||||
CMD ["python", "main/start_up.py"]
|
CMD ["python", "flask_app/main/start_up.py"]
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from download import download_file
|
from flask_app.main.download import download_file
|
||||||
|
|
||||||
|
|
||||||
def upload_file(file_path, url):
|
def upload_file(file_path, url):
|
||||||
|
@ -5,11 +5,11 @@ import time
|
|||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from flask import Flask, request, jsonify, send_file, Response, stream_with_context
|
from flask import Flask, request, jsonify, Response, stream_with_context
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from download import download_file
|
from flask_app.main.download import download_file
|
||||||
from 招标文件解析 import main_processing
|
from flask_app.main.招标文件解析 import main_processing
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
class CSTFormatter(logging.Formatter):
|
class CSTFormatter(logging.Formatter):
|
||||||
|
@ -3,9 +3,9 @@ import json
|
|||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
from json_utils import extract_content_from_json # 可以选择性地导入特定的函数
|
from json_utils import extract_content_from_json # 可以选择性地导入特定的函数
|
||||||
from 提取打勾符号 import read_pdf_and_judge_main
|
from flask_app.main.提取打勾符号 import read_pdf_and_judge_main
|
||||||
from 通义千问 import qianwen_ask
|
from flask_app.main.通义千问 import qianwen_ask
|
||||||
from 通义千问long import qianwen_long,upload_file
|
from flask_app.main.通义千问long import qianwen_long,upload_file
|
||||||
#调用qianwen-ask之后,组织提示词问百炼。
|
#调用qianwen-ask之后,组织提示词问百炼。
|
||||||
|
|
||||||
def construct_judge_questions(json_data):
|
def construct_judge_questions(json_data):
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from json_utils import clean_json_string, combine_json_results
|
from json_utils import clean_json_string, combine_json_results
|
||||||
from 通义千问long import upload_file, qianwen_long
|
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||||
|
|
||||||
|
|
||||||
def combine_business_and_bidding(data):
|
def combine_business_and_bidding(data):
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results
|
from json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results
|
||||||
from 投标人须知正文提取指定内容 import extract_from_notice
|
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||||
from 判断是否分包等 import judge_whether_main, read_questions_from_judge
|
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
|
||||||
from 多线程提问 import read_questions_from_file, multi_threading
|
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||||
from 通义千问long import upload_file
|
from flask_app.main.通义千问long import upload_file
|
||||||
def combine_basic_info(baseinfo_list):
|
def combine_basic_info(baseinfo_list):
|
||||||
combined_baseinfo_list = []
|
combined_baseinfo_list = []
|
||||||
key_groups = {
|
key_groups = {
|
||||||
@ -68,7 +68,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
|
|||||||
print("starting基础信息...")
|
print("starting基础信息...")
|
||||||
baseinfo_list = []
|
baseinfo_list = []
|
||||||
# baseinfo_file_path='../static/提示词/前两章提问总结.txt'
|
# baseinfo_file_path='../static/提示词/前两章提问总结.txt'
|
||||||
baseinfo_file_path = 'static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径
|
baseinfo_file_path = 'flask_app/static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径
|
||||||
questions = read_questions_from_file(baseinfo_file_path)
|
questions = read_questions_from_file(baseinfo_file_path)
|
||||||
res1 = multi_threading(questions, knowledge_name)
|
res1 = multi_threading(questions, knowledge_name)
|
||||||
for _, response in res1: # _占位,代表ques;response[0]也是ques;response[1]是ans
|
for _, response in res1: # _占位,代表ques;response[0]也是ques;response[1]是ans
|
||||||
@ -84,7 +84,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
|
|||||||
chosen_numbers, merged = judge_whether_main(truncate0,output_folder)
|
chosen_numbers, merged = judge_whether_main(truncate0,output_folder)
|
||||||
baseinfo_list.append(merged)
|
baseinfo_list.append(merged)
|
||||||
# judge_file_path = '../static/提示词/是否相关问题.txt'
|
# judge_file_path = '../static/提示词/是否相关问题.txt'
|
||||||
judge_file_path ='static/提示词/是否相关问题.txt'
|
judge_file_path ='flask_app/static/提示词/是否相关问题.txt'
|
||||||
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ import time
|
|||||||
|
|
||||||
from dashscope import Assistants, Messages, Runs, Threads
|
from dashscope import Assistants, Messages, Runs, Threads
|
||||||
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
||||||
from 通义千问long import qianwen_long, upload_file
|
from flask_app.main.通义千问long import qianwen_long, upload_file
|
||||||
|
|
||||||
prompt = """
|
prompt = """
|
||||||
# 角色
|
# 角色
|
||||||
|
@ -2,10 +2,9 @@ import re
|
|||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from 多线程提问 import multi_threading
|
from flask_app.main.多线程提问 import multi_threading
|
||||||
from 根据条款号整合json import process_and_merge_entries
|
from flask_app.main.根据条款号整合json import process_and_merge_entries
|
||||||
from 通义千问long import qianwen_long
|
from flask_app.main.json_utils import extract_content_from_json
|
||||||
from json_utils import extract_content_from_json
|
|
||||||
prompt = """
|
prompt = """
|
||||||
# 角色
|
# 角色
|
||||||
你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。
|
你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。
|
||||||
|
@ -3,18 +3,18 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from 截取pdf import truncate_pdf_multiple
|
from flask_app.main.截取pdf import truncate_pdf_multiple
|
||||||
from table_content_extraction import extract_tables_main
|
from flask_app.main.table_content_extraction import extract_tables_main
|
||||||
from 知识库操作 import addfileToKnowledge, deleteKnowledge
|
from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge
|
||||||
from 投标人须知正文条款提取成json文件 import convert_clause_to_json
|
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||||
from json_utils import nest_json_under_key, transform_json_values, combine_json_results
|
from flask_app.main.json_utils import nest_json_under_key, transform_json_values, combine_json_results
|
||||||
from 无效标和废标和禁止投标整合 import combine_find_invalid
|
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
|
||||||
from 投标人须知正文提取指定内容 import extract_from_notice
|
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
from 基础信息整合 import project_basic_info
|
from flask_app.main.基础信息整合 import project_basic_info
|
||||||
from 资格审查模块 import combine_review_standards
|
from flask_app.main.资格审查模块 import combine_review_standards
|
||||||
from 商务标技术标整合 import combine_evaluation_standards
|
from flask_app.main.商务标技术标整合 import combine_evaluation_standards
|
||||||
from format_change import pdf2docx,docx2pdf
|
from flask_app.main.format_change import pdf2docx,docx2pdf
|
||||||
|
|
||||||
global_logger=None
|
global_logger=None
|
||||||
def get_global_logger(unique_id):
|
def get_global_logger(unique_id):
|
||||||
|
@ -3,10 +3,10 @@ import json
|
|||||||
import os.path
|
import os.path
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
from json_utils import combine_json_results, nest_json_under_key
|
from flask_app.main.json_utils import combine_json_results, nest_json_under_key
|
||||||
from 通义千问long import upload_file, qianwen_long
|
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from 禁止投标情形 import find_forbidden
|
from flask_app.main.禁止投标情形 import find_forbidden
|
||||||
|
|
||||||
#如果当前段落有序号,则向下匹配直接遇到相同的序号样式
|
#如果当前段落有序号,则向下匹配直接遇到相同的序号样式
|
||||||
#如果当前段落无序号,则向下匹配序号,把若干同类的序号都摘出来。
|
#如果当前段落无序号,则向下匹配序号,把若干同类的序号都摘出来。
|
||||||
|
@ -4,7 +4,7 @@ import uuid
|
|||||||
from llama_index.readers.dashscope.base import DashScopeParse
|
from llama_index.readers.dashscope.base import DashScopeParse
|
||||||
from llama_index.readers.dashscope.utils import ResultType
|
from llama_index.readers.dashscope.utils import ResultType
|
||||||
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
|
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
|
||||||
from 删除知识库 import delete_index, create_client
|
from flask_app.main.删除知识库 import delete_index, create_client
|
||||||
|
|
||||||
|
|
||||||
def addfileToKnowledge(filepath,knowledge_name):
|
def addfileToKnowledge(filepath,knowledge_name):
|
||||||
|
@ -5,8 +5,7 @@ import re
|
|||||||
|
|
||||||
from PyPDF2 import PdfWriter, PdfReader
|
from PyPDF2 import PdfWriter, PdfReader
|
||||||
|
|
||||||
from 通义千问long import upload_file, qianwen_long
|
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||||
from json_utils import clean_json_string
|
|
||||||
|
|
||||||
def extract_and_format_from_paths(json_paths, includes):
|
def extract_and_format_from_paths(json_paths, includes):
|
||||||
"""
|
"""
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from 投标人须知正文条款提取成json文件 import convert_clause_to_json
|
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||||
from json_utils import nest_json_under_key, extract_content_from_json
|
from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json
|
||||||
from 形式响应评审 import process_reviews
|
from flask_app.main.形式响应评审 import process_reviews
|
||||||
from 资格评审 import process_qualification
|
from flask_app.main.资格评审 import process_qualification
|
||||||
from 通义千问long import upload_file, qianwen_long
|
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||||
|
|
||||||
|
|
||||||
def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表
|
def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的,加入matching_keys列表,否则保留原字典
|
#资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的,加入matching_keys列表,否则保留原字典
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from json_utils import clean_json_string, combine_json_results, add_keys_to_json
|
from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json
|
||||||
from 多线程提问 import multi_threading, read_questions_from_file
|
from flask_app.main.多线程提问 import multi_threading, read_questions_from_file
|
||||||
from 通义千问long import upload_file
|
from flask_app.main.通义千问long import upload_file
|
||||||
|
|
||||||
def merge_dictionaries_under_common_key(dicts, common_key):
|
def merge_dictionaries_under_common_key(dicts, common_key):
|
||||||
# 初始化一个空字典来保存合并的结果
|
# 初始化一个空字典来保存合并的结果
|
||||||
@ -70,7 +70,7 @@ def process_qualification(qualification_review,truncate4,knowledge_name):
|
|||||||
merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审')
|
merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审')
|
||||||
qualify_list = []
|
qualify_list = []
|
||||||
# qualification_review_file_path = '../static/提示词/资格评审问题.txt' # 替换为你的txt文件路径
|
# qualification_review_file_path = '../static/提示词/资格评审问题.txt' # 替换为你的txt文件路径
|
||||||
qualification_review_file_path='static/提示词/资格评审问题.txt'
|
qualification_review_file_path='flask_app/static/提示词/资格评审问题.txt'
|
||||||
qualification_review_questions = read_questions_from_file(qualification_review_file_path) # 联合体投标
|
qualification_review_questions = read_questions_from_file(qualification_review_file_path) # 联合体投标
|
||||||
results1 = multi_threading(qualification_review_questions, knowledge_name)
|
results1 = multi_threading(qualification_review_questions, knowledge_name)
|
||||||
for _, response in results1: # _占位,代表ques;response[0]也是ques;response[1]是ans
|
for _, response in results1: # _占位,代表ques;response[0]也是ques;response[1]是ans
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from 按页读取pdf import extract_text_by_page
|
from flask_app.main.按页读取pdf import extract_text_by_page
|
||||||
|
|
||||||
def check_strings_in_pdf(file_path):
|
def check_strings_in_pdf(file_path):
|
||||||
judge_list=['施工机械设备', '企业信息登记']
|
judge_list=['施工机械设备', '企业信息登记']
|
||||||
|
@ -2,10 +2,10 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
from 货物标截取pdf import truncate_pdf_main
|
from 货物标截取pdf import truncate_pdf_main
|
||||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||||
# from ..main.format_change import docx2pdf
|
from flask_app.main.format_change import docx2pdf
|
||||||
# from ..main.多线程提问 import multi_threading
|
from flask_app.main.多线程提问 import multi_threading
|
||||||
from ..main.通义千问long import upload_file,qianwen_long
|
from flask_app.main.通义千问long import upload_file,qianwen_long
|
||||||
from ..main.json_utils import clean_json_string
|
from flask_app.main.json_utils import clean_json_string
|
||||||
|
|
||||||
def generate_key_paths(data, parent_key=''):
|
def generate_key_paths(data, parent_key=''):
|
||||||
key_paths = []
|
key_paths = []
|
||||||
@ -22,7 +22,7 @@ def generate_key_paths(data, parent_key=''):
|
|||||||
#获取采购清单
|
#获取采购清单
|
||||||
def fetch_purchasing_list(file_path):
|
def fetch_purchasing_list(file_path):
|
||||||
output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output"
|
output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output"
|
||||||
# file_path = docx2pdf(file_path)
|
file_path = docx2pdf(file_path)
|
||||||
truncate_path=truncate_pdf_main(file_path,output_folder,1)
|
truncate_path=truncate_pdf_main(file_path,output_folder,1)
|
||||||
user_query="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。"
|
user_query="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。"
|
||||||
file_id=upload_file(truncate_path)
|
file_id=upload_file(truncate_path)
|
||||||
@ -32,5 +32,5 @@ def fetch_purchasing_list(file_path):
|
|||||||
print(keys_list)
|
print(keys_list)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招招招标文件(一中多媒体报告厅教学设备)_20240829101650_tobidders_notice_table.pdf"
|
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道).docf"
|
||||||
fetch_purchasing_list(file_path)
|
fetch_purchasing_list(file_path)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user