8.29
This commit is contained in:
parent
6463a9e593
commit
a72fe30e4f
@ -6,9 +6,6 @@ WORKDIR /ZbparseProjects
|
||||
|
||||
RUN pip config set global.progress_bar off
|
||||
|
||||
# 复制 requirements.txt 并安装依赖,确保每次构建都可以使用缓存(除非 requirements.txt 改变)
|
||||
COPY ../../requirements.txt .
|
||||
|
||||
# 安装依赖
|
||||
RUN pip install --upgrade pip --default-timeout=100 \
|
||||
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
|
||||
@ -26,4 +23,4 @@ ENV ALIBABA_CLOUD_ACCESS_KEY_SECRET=88oyw7LniqV8i0SnOuSFS5lprfrPtw
|
||||
EXPOSE 5000
|
||||
|
||||
# 在容器启动时运行你的应用
|
||||
CMD ["python", "main/start_up.py"]
|
||||
CMD ["python", "flask_app/main/start_up.py"]
|
||||
|
@ -1,8 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import requests
|
||||
from download import download_file
|
||||
from flask_app.main.download import download_file
|
||||
|
||||
|
||||
def upload_file(file_path, url):
|
||||
|
@ -5,11 +5,11 @@ import time
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from flask import Flask, request, jsonify, send_file, Response, stream_with_context
|
||||
from flask import Flask, request, jsonify, Response, stream_with_context
|
||||
import json
|
||||
import os
|
||||
from download import download_file
|
||||
from 招标文件解析 import main_processing
|
||||
from flask_app.main.download import download_file
|
||||
from flask_app.main.招标文件解析 import main_processing
|
||||
|
||||
app = Flask(__name__)
|
||||
class CSTFormatter(logging.Formatter):
|
||||
|
@ -3,9 +3,9 @@ import json
|
||||
import os.path
|
||||
import re
|
||||
from json_utils import extract_content_from_json # 可以选择性地导入特定的函数
|
||||
from 提取打勾符号 import read_pdf_and_judge_main
|
||||
from 通义千问 import qianwen_ask
|
||||
from 通义千问long import qianwen_long,upload_file
|
||||
from flask_app.main.提取打勾符号 import read_pdf_and_judge_main
|
||||
from flask_app.main.通义千问 import qianwen_ask
|
||||
from flask_app.main.通义千问long import qianwen_long,upload_file
|
||||
#调用qianwen-ask之后,组织提示词问百炼。
|
||||
|
||||
def construct_judge_questions(json_data):
|
||||
|
@ -1,7 +1,7 @@
|
||||
import json
|
||||
|
||||
from json_utils import clean_json_string, combine_json_results
|
||||
from 通义千问long import upload_file, qianwen_long
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
def combine_business_and_bidding(data):
|
||||
|
@ -1,8 +1,8 @@
|
||||
from json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results
|
||||
from 投标人须知正文提取指定内容 import extract_from_notice
|
||||
from 判断是否分包等 import judge_whether_main, read_questions_from_judge
|
||||
from 多线程提问 import read_questions_from_file, multi_threading
|
||||
from 通义千问long import upload_file
|
||||
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
|
||||
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.main.通义千问long import upload_file
|
||||
def combine_basic_info(baseinfo_list):
|
||||
combined_baseinfo_list = []
|
||||
key_groups = {
|
||||
@ -68,7 +68,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
|
||||
print("starting基础信息...")
|
||||
baseinfo_list = []
|
||||
# baseinfo_file_path='../static/提示词/前两章提问总结.txt'
|
||||
baseinfo_file_path = 'static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径
|
||||
baseinfo_file_path = 'flask_app/static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径
|
||||
questions = read_questions_from_file(baseinfo_file_path)
|
||||
res1 = multi_threading(questions, knowledge_name)
|
||||
for _, response in res1: # _占位,代表ques;response[0]也是ques;response[1]是ans
|
||||
@ -84,7 +84,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
|
||||
chosen_numbers, merged = judge_whether_main(truncate0,output_folder)
|
||||
baseinfo_list.append(merged)
|
||||
# judge_file_path = '../static/提示词/是否相关问题.txt'
|
||||
judge_file_path ='static/提示词/是否相关问题.txt'
|
||||
judge_file_path ='flask_app/static/提示词/是否相关问题.txt'
|
||||
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@ import time
|
||||
|
||||
from dashscope import Assistants, Messages, Runs, Threads
|
||||
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
||||
from 通义千问long import qianwen_long, upload_file
|
||||
from flask_app.main.通义千问long import qianwen_long, upload_file
|
||||
|
||||
prompt = """
|
||||
# 角色
|
||||
|
@ -2,10 +2,9 @@ import re
|
||||
import json
|
||||
import time
|
||||
|
||||
from 多线程提问 import multi_threading
|
||||
from 根据条款号整合json import process_and_merge_entries
|
||||
from 通义千问long import qianwen_long
|
||||
from json_utils import extract_content_from_json
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.main.根据条款号整合json import process_and_merge_entries
|
||||
from flask_app.main.json_utils import extract_content_from_json
|
||||
prompt = """
|
||||
# 角色
|
||||
你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。
|
||||
|
@ -3,18 +3,18 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from 截取pdf import truncate_pdf_multiple
|
||||
from table_content_extraction import extract_tables_main
|
||||
from 知识库操作 import addfileToKnowledge, deleteKnowledge
|
||||
from 投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from json_utils import nest_json_under_key, transform_json_values, combine_json_results
|
||||
from 无效标和废标和禁止投标整合 import combine_find_invalid
|
||||
from 投标人须知正文提取指定内容 import extract_from_notice
|
||||
from flask_app.main.截取pdf import truncate_pdf_multiple
|
||||
from flask_app.main.table_content_extraction import extract_tables_main
|
||||
from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from flask_app.main.json_utils import nest_json_under_key, transform_json_values, combine_json_results
|
||||
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
|
||||
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||
import concurrent.futures
|
||||
from 基础信息整合 import project_basic_info
|
||||
from 资格审查模块 import combine_review_standards
|
||||
from 商务标技术标整合 import combine_evaluation_standards
|
||||
from format_change import pdf2docx,docx2pdf
|
||||
from flask_app.main.基础信息整合 import project_basic_info
|
||||
from flask_app.main.资格审查模块 import combine_review_standards
|
||||
from flask_app.main.商务标技术标整合 import combine_evaluation_standards
|
||||
from flask_app.main.format_change import pdf2docx,docx2pdf
|
||||
|
||||
global_logger=None
|
||||
def get_global_logger(unique_id):
|
||||
|
@ -3,10 +3,10 @@ import json
|
||||
import os.path
|
||||
import time
|
||||
import re
|
||||
from json_utils import combine_json_results, nest_json_under_key
|
||||
from 通义千问long import upload_file, qianwen_long
|
||||
from flask_app.main.json_utils import combine_json_results, nest_json_under_key
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from 禁止投标情形 import find_forbidden
|
||||
from flask_app.main.禁止投标情形 import find_forbidden
|
||||
|
||||
#如果当前段落有序号,则向下匹配直接遇到相同的序号样式
|
||||
#如果当前段落无序号,则向下匹配序号,把若干同类的序号都摘出来。
|
||||
|
@ -4,7 +4,7 @@ import uuid
|
||||
from llama_index.readers.dashscope.base import DashScopeParse
|
||||
from llama_index.readers.dashscope.utils import ResultType
|
||||
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
|
||||
from 删除知识库 import delete_index, create_client
|
||||
from flask_app.main.删除知识库 import delete_index, create_client
|
||||
|
||||
|
||||
def addfileToKnowledge(filepath,knowledge_name):
|
||||
|
@ -5,8 +5,7 @@ import re
|
||||
|
||||
from PyPDF2 import PdfWriter, PdfReader
|
||||
|
||||
from 通义千问long import upload_file, qianwen_long
|
||||
from json_utils import clean_json_string
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
|
||||
def extract_and_format_from_paths(json_paths, includes):
|
||||
"""
|
||||
|
@ -1,11 +1,11 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from 投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from json_utils import nest_json_under_key, extract_content_from_json
|
||||
from 形式响应评审 import process_reviews
|
||||
from 资格评审 import process_qualification
|
||||
from 通义千问long import upload_file, qianwen_long
|
||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||
from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json
|
||||
from flask_app.main.形式响应评审 import process_reviews
|
||||
from flask_app.main.资格评审 import process_qualification
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表
|
||||
|
@ -1,9 +1,9 @@
|
||||
#资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的,加入matching_keys列表,否则保留原字典
|
||||
import re
|
||||
|
||||
from json_utils import clean_json_string, combine_json_results, add_keys_to_json
|
||||
from 多线程提问 import multi_threading, read_questions_from_file
|
||||
from 通义千问long import upload_file
|
||||
from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json
|
||||
from flask_app.main.多线程提问 import multi_threading, read_questions_from_file
|
||||
from flask_app.main.通义千问long import upload_file
|
||||
|
||||
def merge_dictionaries_under_common_key(dicts, common_key):
|
||||
# 初始化一个空字典来保存合并的结果
|
||||
@ -70,7 +70,7 @@ def process_qualification(qualification_review,truncate4,knowledge_name):
|
||||
merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审')
|
||||
qualify_list = []
|
||||
# qualification_review_file_path = '../static/提示词/资格评审问题.txt' # 替换为你的txt文件路径
|
||||
qualification_review_file_path='static/提示词/资格评审问题.txt'
|
||||
qualification_review_file_path='flask_app/static/提示词/资格评审问题.txt'
|
||||
qualification_review_questions = read_questions_from_file(qualification_review_file_path) # 联合体投标
|
||||
results1 = multi_threading(qualification_review_questions, knowledge_name)
|
||||
for _, response in results1: # _占位,代表ques;response[0]也是ques;response[1]是ans
|
||||
|
@ -1,4 +1,4 @@
|
||||
from 按页读取pdf import extract_text_by_page
|
||||
from flask_app.main.按页读取pdf import extract_text_by_page
|
||||
|
||||
def check_strings_in_pdf(file_path):
|
||||
judge_list=['施工机械设备', '企业信息登记']
|
||||
|
@ -2,10 +2,10 @@ import os
|
||||
import sys
|
||||
from 货物标截取pdf import truncate_pdf_main
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||
# from ..main.format_change import docx2pdf
|
||||
# from ..main.多线程提问 import multi_threading
|
||||
from ..main.通义千问long import upload_file,qianwen_long
|
||||
from ..main.json_utils import clean_json_string
|
||||
from flask_app.main.format_change import docx2pdf
|
||||
from flask_app.main.多线程提问 import multi_threading
|
||||
from flask_app.main.通义千问long import upload_file,qianwen_long
|
||||
from flask_app.main.json_utils import clean_json_string
|
||||
|
||||
def generate_key_paths(data, parent_key=''):
|
||||
key_paths = []
|
||||
@ -22,7 +22,7 @@ def generate_key_paths(data, parent_key=''):
|
||||
#获取采购清单
|
||||
def fetch_purchasing_list(file_path):
|
||||
output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output"
|
||||
# file_path = docx2pdf(file_path)
|
||||
file_path = docx2pdf(file_path)
|
||||
truncate_path=truncate_pdf_main(file_path,output_folder,1)
|
||||
user_query="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。"
|
||||
file_id=upload_file(truncate_path)
|
||||
@ -32,5 +32,5 @@ def fetch_purchasing_list(file_path):
|
||||
print(keys_list)
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招招招标文件(一中多媒体报告厅教学设备)_20240829101650_tobidders_notice_table.pdf"
|
||||
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道).docf"
|
||||
fetch_purchasing_list(file_path)
|
||||
|
Loading…
x
Reference in New Issue
Block a user