This commit is contained in:
zy123 2024-08-29 17:30:49 +08:00
parent 6463a9e593
commit a72fe30e4f
16 changed files with 51 additions and 57 deletions

View File

@ -6,9 +6,6 @@ WORKDIR /ZbparseProjects
RUN pip config set global.progress_bar off RUN pip config set global.progress_bar off
# 复制 requirements.txt 并安装依赖,确保每次构建都可以使用缓存(除非 requirements.txt 改变)
COPY ../../requirements.txt .
# 安装依赖 # 安装依赖
RUN pip install --upgrade pip --default-timeout=100 \ RUN pip install --upgrade pip --default-timeout=100 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt && pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
@ -26,4 +23,4 @@ ENV ALIBABA_CLOUD_ACCESS_KEY_SECRET=88oyw7LniqV8i0SnOuSFS5lprfrPtw
EXPOSE 5000 EXPOSE 5000
# 在容器启动时运行你的应用 # 在容器启动时运行你的应用
CMD ["python", "main/start_up.py"] CMD ["python", "flask_app/main/start_up.py"]

View File

@ -1,8 +1,7 @@
import json import json
import os import os
import requests import requests
from download import download_file from flask_app.main.download import download_file
def upload_file(file_path, url): def upload_file(file_path, url):

View File

@ -5,11 +5,11 @@ import time
import uuid import uuid
from datetime import datetime, timedelta from datetime import datetime, timedelta
from flask import Flask, request, jsonify, send_file, Response, stream_with_context from flask import Flask, request, jsonify, Response, stream_with_context
import json import json
import os import os
from download import download_file from flask_app.main.download import download_file
from 招标文件解析 import main_processing from flask_app.main.招标文件解析 import main_processing
app = Flask(__name__) app = Flask(__name__)
class CSTFormatter(logging.Formatter): class CSTFormatter(logging.Formatter):

View File

@ -3,9 +3,9 @@ import json
import os.path import os.path
import re import re
from json_utils import extract_content_from_json # 可以选择性地导入特定的函数 from json_utils import extract_content_from_json # 可以选择性地导入特定的函数
from 提取打勾符号 import read_pdf_and_judge_main from flask_app.main.提取打勾符号 import read_pdf_and_judge_main
from 通义千问 import qianwen_ask from flask_app.main.通义千问 import qianwen_ask
from 通义千问long import qianwen_long,upload_file from flask_app.main.通义千问long import qianwen_long,upload_file
#调用qianwen-ask之后组织提示词问百炼。 #调用qianwen-ask之后组织提示词问百炼。
def construct_judge_questions(json_data): def construct_judge_questions(json_data):

View File

@ -1,7 +1,7 @@
import json import json
from json_utils import clean_json_string, combine_json_results from json_utils import clean_json_string, combine_json_results
from 通义千问long import upload_file, qianwen_long from flask_app.main.通义千问long import upload_file, qianwen_long
def combine_business_and_bidding(data): def combine_business_and_bidding(data):

View File

@ -1,8 +1,8 @@
from json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results from json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results
from 投标人须知正文提取指定内容 import extract_from_notice from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
from 判断是否分包等 import judge_whether_main, read_questions_from_judge from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
from 多线程提问 import read_questions_from_file, multi_threading from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
from 通义千问long import upload_file from flask_app.main.通义千问long import upload_file
def combine_basic_info(baseinfo_list): def combine_basic_info(baseinfo_list):
combined_baseinfo_list = [] combined_baseinfo_list = []
key_groups = { key_groups = {
@ -68,7 +68,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
print("starting基础信息...") print("starting基础信息...")
baseinfo_list = [] baseinfo_list = []
# baseinfo_file_path='../static/提示词/前两章提问总结.txt' # baseinfo_file_path='../static/提示词/前两章提问总结.txt'
baseinfo_file_path = 'static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径 baseinfo_file_path = 'flask_app/static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径
questions = read_questions_from_file(baseinfo_file_path) questions = read_questions_from_file(baseinfo_file_path)
res1 = multi_threading(questions, knowledge_name) res1 = multi_threading(questions, knowledge_name)
for _, response in res1: # _占位代表ques;response[0]也是ques;response[1]是ans for _, response in res1: # _占位代表ques;response[0]也是ques;response[1]是ans
@ -84,7 +84,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
chosen_numbers, merged = judge_whether_main(truncate0,output_folder) chosen_numbers, merged = judge_whether_main(truncate0,output_folder)
baseinfo_list.append(merged) baseinfo_list.append(merged)
# judge_file_path = '../static/提示词/是否相关问题.txt' # judge_file_path = '../static/提示词/是否相关问题.txt'
judge_file_path ='static/提示词/是否相关问题.txt' judge_file_path ='flask_app/static/提示词/是否相关问题.txt'
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers) judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)

View File

@ -7,7 +7,7 @@ import time
from dashscope import Assistants, Messages, Runs, Threads from dashscope import Assistants, Messages, Runs, Threads
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
from 通义千问long import qianwen_long, upload_file from flask_app.main.通义千问long import qianwen_long, upload_file
prompt = """ prompt = """
# 角色 # 角色

View File

@ -2,10 +2,9 @@ import re
import json import json
import time import time
from 多线程提问 import multi_threading from flask_app.main.多线程提问 import multi_threading
from 根据条款号整合json import process_and_merge_entries from flask_app.main.根据条款号整合json import process_and_merge_entries
from 通义千问long import qianwen_long from flask_app.main.json_utils import extract_content_from_json
from json_utils import extract_content_from_json
prompt = """ prompt = """
# 角色 # 角色
你是一个文档处理专家专门负责理解和操作基于特定内容的文档任务这包括解析总结搜索或生成与给定文档相关的各类信息 你是一个文档处理专家专门负责理解和操作基于特定内容的文档任务这包括解析总结搜索或生成与给定文档相关的各类信息

View File

@ -3,18 +3,18 @@ import json
import logging import logging
import os import os
import time import time
from 截取pdf import truncate_pdf_multiple from flask_app.main.截取pdf import truncate_pdf_multiple
from table_content_extraction import extract_tables_main from flask_app.main.table_content_extraction import extract_tables_main
from 知识库操作 import addfileToKnowledge, deleteKnowledge from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge
from 投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
from json_utils import nest_json_under_key, transform_json_values, combine_json_results from flask_app.main.json_utils import nest_json_under_key, transform_json_values, combine_json_results
from 无效标和废标和禁止投标整合 import combine_find_invalid from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
from 投标人须知正文提取指定内容 import extract_from_notice from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
import concurrent.futures import concurrent.futures
from 基础信息整合 import project_basic_info from flask_app.main.基础信息整合 import project_basic_info
from 资格审查模块 import combine_review_standards from flask_app.main.资格审查模块 import combine_review_standards
from 商务标技术标整合 import combine_evaluation_standards from flask_app.main.商务标技术标整合 import combine_evaluation_standards
from format_change import pdf2docx,docx2pdf from flask_app.main.format_change import pdf2docx,docx2pdf
global_logger=None global_logger=None
def get_global_logger(unique_id): def get_global_logger(unique_id):

View File

@ -3,10 +3,10 @@ import json
import os.path import os.path
import time import time
import re import re
from json_utils import combine_json_results, nest_json_under_key from flask_app.main.json_utils import combine_json_results, nest_json_under_key
from 通义千问long import upload_file, qianwen_long from flask_app.main.通义千问long import upload_file, qianwen_long
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from 禁止投标情形 import find_forbidden from flask_app.main.禁止投标情形 import find_forbidden
#如果当前段落有序号,则向下匹配直接遇到相同的序号样式 #如果当前段落有序号,则向下匹配直接遇到相同的序号样式
#如果当前段落无序号,则向下匹配序号,把若干同类的序号都摘出来。 #如果当前段落无序号,则向下匹配序号,把若干同类的序号都摘出来。

View File

@ -4,7 +4,7 @@ import uuid
from llama_index.readers.dashscope.base import DashScopeParse from llama_index.readers.dashscope.base import DashScopeParse
from llama_index.readers.dashscope.utils import ResultType from llama_index.readers.dashscope.utils import ResultType
from llama_index.indices.managed.dashscope import DashScopeCloudIndex from llama_index.indices.managed.dashscope import DashScopeCloudIndex
from 删除知识库 import delete_index, create_client from flask_app.main.删除知识库 import delete_index, create_client
def addfileToKnowledge(filepath,knowledge_name): def addfileToKnowledge(filepath,knowledge_name):

View File

@ -5,8 +5,7 @@ import re
from PyPDF2 import PdfWriter, PdfReader from PyPDF2 import PdfWriter, PdfReader
from 通义千问long import upload_file, qianwen_long from flask_app.main.通义千问long import upload_file, qianwen_long
from json_utils import clean_json_string
def extract_and_format_from_paths(json_paths, includes): def extract_and_format_from_paths(json_paths, includes):
""" """

View File

@ -1,11 +1,11 @@
import json import json
import os import os
from 投标人须知正文条款提取成json文件 import convert_clause_to_json from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
from json_utils import nest_json_under_key, extract_content_from_json from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json
from 形式响应评审 import process_reviews from flask_app.main.形式响应评审 import process_reviews
from 资格评审 import process_qualification from flask_app.main.资格评审 import process_qualification
from 通义千问long import upload_file, qianwen_long from flask_app.main.通义千问long import upload_file, qianwen_long
def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表 def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表

View File

@ -1,9 +1,9 @@
#资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的加入matching_keys列表否则保留原字典 #资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的加入matching_keys列表否则保留原字典
import re import re
from json_utils import clean_json_string, combine_json_results, add_keys_to_json from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json
from 多线程提问 import multi_threading, read_questions_from_file from flask_app.main.多线程提问 import multi_threading, read_questions_from_file
from 通义千问long import upload_file from flask_app.main.通义千问long import upload_file
def merge_dictionaries_under_common_key(dicts, common_key): def merge_dictionaries_under_common_key(dicts, common_key):
# 初始化一个空字典来保存合并的结果 # 初始化一个空字典来保存合并的结果
@ -70,7 +70,7 @@ def process_qualification(qualification_review,truncate4,knowledge_name):
merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审') merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审')
qualify_list = [] qualify_list = []
# qualification_review_file_path = '../static/提示词/资格评审问题.txt' # 替换为你的txt文件路径 # qualification_review_file_path = '../static/提示词/资格评审问题.txt' # 替换为你的txt文件路径
qualification_review_file_path='static/提示词/资格评审问题.txt' qualification_review_file_path='flask_app/static/提示词/资格评审问题.txt'
qualification_review_questions = read_questions_from_file(qualification_review_file_path) # 联合体投标 qualification_review_questions = read_questions_from_file(qualification_review_file_path) # 联合体投标
results1 = multi_threading(qualification_review_questions, knowledge_name) results1 = multi_threading(qualification_review_questions, knowledge_name)
for _, response in results1: # _占位代表ques;response[0]也是ques;response[1]是ans for _, response in results1: # _占位代表ques;response[0]也是ques;response[1]是ans

View File

@ -1,4 +1,4 @@
from 按页读取pdf import extract_text_by_page from flask_app.main.按页读取pdf import extract_text_by_page
def check_strings_in_pdf(file_path): def check_strings_in_pdf(file_path):
judge_list=['施工机械设备', '企业信息登记'] judge_list=['施工机械设备', '企业信息登记']

View File

@ -2,10 +2,10 @@ import os
import sys import sys
from 货物标截取pdf import truncate_pdf_main from 货物标截取pdf import truncate_pdf_main
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
# from ..main.format_change import docx2pdf from flask_app.main.format_change import docx2pdf
# from ..main.多线程提问 import multi_threading from flask_app.main.多线程提问 import multi_threading
from ..main.通义千问long import upload_file,qianwen_long from flask_app.main.通义千问long import upload_file,qianwen_long
from ..main.json_utils import clean_json_string from flask_app.main.json_utils import clean_json_string
def generate_key_paths(data, parent_key=''): def generate_key_paths(data, parent_key=''):
key_paths = [] key_paths = []
@ -22,7 +22,7 @@ def generate_key_paths(data, parent_key=''):
#获取采购清单 #获取采购清单
def fetch_purchasing_list(file_path): def fetch_purchasing_list(file_path):
output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output" output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output"
# file_path = docx2pdf(file_path) file_path = docx2pdf(file_path)
truncate_path=truncate_pdf_main(file_path,output_folder,1) truncate_path=truncate_pdf_main(file_path,output_folder,1)
user_query="这是一份货物标中采购要求部分的内容你需要摘取出需要采购的系统货物一个大系统大项中可能包含多个小系统小项你需要保留这种层次关系给出货物名称请以json格式返回外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"" user_query="这是一份货物标中采购要求部分的内容你需要摘取出需要采购的系统货物一个大系统大项中可能包含多个小系统小项你需要保留这种层次关系给出货物名称请以json格式返回外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\""
file_id=upload_file(truncate_path) file_id=upload_file(truncate_path)
@ -32,5 +32,5 @@ def fetch_purchasing_list(file_path):
print(keys_list) print(keys_list)
if __name__ == "__main__": if __name__ == "__main__":
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招招标文件一中多媒体报告厅教学设备_20240829101650_tobidders_notice_table.pdf" file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\标文件(107国道).docf"
fetch_purchasing_list(file_path) fetch_purchasing_list(file_path)