diff --git a/Dockerfile b/Dockerfile index 2927062..da62327 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,9 +6,6 @@ WORKDIR /ZbparseProjects RUN pip config set global.progress_bar off -# 复制 requirements.txt 并安装依赖,确保每次构建都可以使用缓存(除非 requirements.txt 改变) -COPY ../../requirements.txt . - # 安装依赖 RUN pip install --upgrade pip --default-timeout=100 \ && pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt @@ -26,4 +23,4 @@ ENV ALIBABA_CLOUD_ACCESS_KEY_SECRET=88oyw7LniqV8i0SnOuSFS5lprfrPtw EXPOSE 5000 # 在容器启动时运行你的应用 -CMD ["python", "main/start_up.py"] +CMD ["python", "flask_app/main/start_up.py"] diff --git a/flask_app/main/format_change.py b/flask_app/main/format_change.py index 5c448bd..fe7eb44 100644 --- a/flask_app/main/format_change.py +++ b/flask_app/main/format_change.py @@ -1,8 +1,7 @@ import json import os - import requests -from download import download_file +from flask_app.main.download import download_file def upload_file(file_path, url): diff --git a/flask_app/main/start_up.py b/flask_app/main/start_up.py index be35118..1adc1f1 100644 --- a/flask_app/main/start_up.py +++ b/flask_app/main/start_up.py @@ -5,11 +5,11 @@ import time import uuid from datetime import datetime, timedelta -from flask import Flask, request, jsonify, send_file, Response, stream_with_context +from flask import Flask, request, jsonify, Response, stream_with_context import json import os -from download import download_file -from 招标文件解析 import main_processing +from flask_app.main.download import download_file +from flask_app.main.招标文件解析 import main_processing app = Flask(__name__) class CSTFormatter(logging.Formatter): diff --git a/flask_app/main/判断是否分包等.py b/flask_app/main/判断是否分包等.py index 3cff093..da5a6d0 100644 --- a/flask_app/main/判断是否分包等.py +++ b/flask_app/main/判断是否分包等.py @@ -3,9 +3,9 @@ import json import os.path import re from json_utils import extract_content_from_json # 可以选择性地导入特定的函数 -from 提取打勾符号 import read_pdf_and_judge_main -from 通义千问 import qianwen_ask -from 通义千问long import qianwen_long,upload_file +from flask_app.main.提取打勾符号 import read_pdf_and_judge_main +from flask_app.main.通义千问 import qianwen_ask +from flask_app.main.通义千问long import qianwen_long,upload_file #调用qianwen-ask之后,组织提示词问百炼。 def construct_judge_questions(json_data): diff --git a/flask_app/main/商务标技术标整合.py b/flask_app/main/商务标技术标整合.py index 642a191..d2a40ff 100644 --- a/flask_app/main/商务标技术标整合.py +++ b/flask_app/main/商务标技术标整合.py @@ -1,7 +1,7 @@ import json from json_utils import clean_json_string, combine_json_results -from 通义千问long import upload_file, qianwen_long +from flask_app.main.通义千问long import upload_file, qianwen_long def combine_business_and_bidding(data): diff --git a/flask_app/main/基础信息整合.py b/flask_app/main/基础信息整合.py index d3e6cc4..624f311 100644 --- a/flask_app/main/基础信息整合.py +++ b/flask_app/main/基础信息整合.py @@ -1,8 +1,8 @@ from json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results -from 投标人须知正文提取指定内容 import extract_from_notice -from 判断是否分包等 import judge_whether_main, read_questions_from_judge -from 多线程提问 import read_questions_from_file, multi_threading -from 通义千问long import upload_file +from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice +from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge +from flask_app.main.多线程提问 import read_questions_from_file, multi_threading +from flask_app.main.通义千问long import upload_file def combine_basic_info(baseinfo_list): combined_baseinfo_list = [] key_groups = { @@ -68,7 +68,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): # print("starting基础信息...") baseinfo_list = [] # baseinfo_file_path='../static/提示词/前两章提问总结.txt' - baseinfo_file_path = 'static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径 + baseinfo_file_path = 'flask_app/static/提示词/前两章提问总结.txt' # 替换为你的txt文件路径 questions = read_questions_from_file(baseinfo_file_path) res1 = multi_threading(questions, knowledge_name) for _, response in res1: # _占位,代表ques;response[0]也是ques;response[1]是ans @@ -84,7 +84,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): # chosen_numbers, merged = judge_whether_main(truncate0,output_folder) baseinfo_list.append(merged) # judge_file_path = '../static/提示词/是否相关问题.txt' - judge_file_path ='static/提示词/是否相关问题.txt' + judge_file_path ='flask_app/static/提示词/是否相关问题.txt' judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers) diff --git a/flask_app/main/多线程提问.py b/flask_app/main/多线程提问.py index 3cfdc10..1f2de7e 100644 --- a/flask_app/main/多线程提问.py +++ b/flask_app/main/多线程提问.py @@ -7,7 +7,7 @@ import time from dashscope import Assistants, Messages, Runs, Threads from llama_index.indices.managed.dashscope import DashScopeCloudRetriever -from 通义千问long import qianwen_long, upload_file +from flask_app.main.通义千问long import qianwen_long, upload_file prompt = """ # 角色 diff --git a/flask_app/main/形式响应评审.py b/flask_app/main/形式响应评审.py index 911a59a..a1505fc 100644 --- a/flask_app/main/形式响应评审.py +++ b/flask_app/main/形式响应评审.py @@ -2,10 +2,9 @@ import re import json import time -from 多线程提问 import multi_threading -from 根据条款号整合json import process_and_merge_entries -from 通义千问long import qianwen_long -from json_utils import extract_content_from_json +from flask_app.main.多线程提问 import multi_threading +from flask_app.main.根据条款号整合json import process_and_merge_entries +from flask_app.main.json_utils import extract_content_from_json prompt = """ # 角色 你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。 diff --git a/flask_app/main/招标文件解析.py b/flask_app/main/招标文件解析.py index 67cdad6..fd214fb 100644 --- a/flask_app/main/招标文件解析.py +++ b/flask_app/main/招标文件解析.py @@ -3,18 +3,18 @@ import json import logging import os import time -from 截取pdf import truncate_pdf_multiple -from table_content_extraction import extract_tables_main -from 知识库操作 import addfileToKnowledge, deleteKnowledge -from 投标人须知正文条款提取成json文件 import convert_clause_to_json -from json_utils import nest_json_under_key, transform_json_values, combine_json_results -from 无效标和废标和禁止投标整合 import combine_find_invalid -from 投标人须知正文提取指定内容 import extract_from_notice +from flask_app.main.截取pdf import truncate_pdf_multiple +from flask_app.main.table_content_extraction import extract_tables_main +from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge +from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json +from flask_app.main.json_utils import nest_json_under_key, transform_json_values, combine_json_results +from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid +from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice import concurrent.futures -from 基础信息整合 import project_basic_info -from 资格审查模块 import combine_review_standards -from 商务标技术标整合 import combine_evaluation_standards -from format_change import pdf2docx,docx2pdf +from flask_app.main.基础信息整合 import project_basic_info +from flask_app.main.资格审查模块 import combine_review_standards +from flask_app.main.商务标技术标整合 import combine_evaluation_standards +from flask_app.main.format_change import pdf2docx,docx2pdf global_logger=None def get_global_logger(unique_id): diff --git a/flask_app/main/无效标和废标和禁止投标整合.py b/flask_app/main/无效标和废标和禁止投标整合.py index 6ba0152..877ba85 100644 --- a/flask_app/main/无效标和废标和禁止投标整合.py +++ b/flask_app/main/无效标和废标和禁止投标整合.py @@ -3,10 +3,10 @@ import json import os.path import time import re -from json_utils import combine_json_results, nest_json_under_key -from 通义千问long import upload_file, qianwen_long +from flask_app.main.json_utils import combine_json_results, nest_json_under_key +from flask_app.main.通义千问long import upload_file, qianwen_long from concurrent.futures import ThreadPoolExecutor -from 禁止投标情形 import find_forbidden +from flask_app.main.禁止投标情形 import find_forbidden #如果当前段落有序号,则向下匹配直接遇到相同的序号样式 #如果当前段落无序号,则向下匹配序号,把若干同类的序号都摘出来。 diff --git a/flask_app/main/知识库操作.py b/flask_app/main/知识库操作.py index e383b02..78d674a 100644 --- a/flask_app/main/知识库操作.py +++ b/flask_app/main/知识库操作.py @@ -4,7 +4,7 @@ import uuid from llama_index.readers.dashscope.base import DashScopeParse from llama_index.readers.dashscope.utils import ResultType from llama_index.indices.managed.dashscope import DashScopeCloudIndex -from 删除知识库 import delete_index, create_client +from flask_app.main.删除知识库 import delete_index, create_client def addfileToKnowledge(filepath,knowledge_name): diff --git a/flask_app/main/禁止投标情形.py b/flask_app/main/禁止投标情形.py index 089f78c..436477c 100644 --- a/flask_app/main/禁止投标情形.py +++ b/flask_app/main/禁止投标情形.py @@ -5,8 +5,7 @@ import re from PyPDF2 import PdfWriter, PdfReader -from 通义千问long import upload_file, qianwen_long -from json_utils import clean_json_string +from flask_app.main.通义千问long import upload_file, qianwen_long def extract_and_format_from_paths(json_paths, includes): """ diff --git a/flask_app/main/资格审查模块.py b/flask_app/main/资格审查模块.py index 45ea6f4..784ddc5 100644 --- a/flask_app/main/资格审查模块.py +++ b/flask_app/main/资格审查模块.py @@ -1,11 +1,11 @@ import json import os -from 投标人须知正文条款提取成json文件 import convert_clause_to_json -from json_utils import nest_json_under_key, extract_content_from_json -from 形式响应评审 import process_reviews -from 资格评审 import process_qualification -from 通义千问long import upload_file, qianwen_long +from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json +from flask_app.main.json_utils import nest_json_under_key, extract_content_from_json +from flask_app.main.形式响应评审 import process_reviews +from flask_app.main.资格评审 import process_qualification +from flask_app.main.通义千问long import upload_file, qianwen_long def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表 diff --git a/flask_app/main/资格评审.py b/flask_app/main/资格评审.py index b753641..5ed586c 100644 --- a/flask_app/main/资格评审.py +++ b/flask_app/main/资格评审.py @@ -1,9 +1,9 @@ #资格审查中,首先排除'联合体投标'和'不得存在的情况',有'符合'等的,加入matching_keys列表,否则保留原字典 import re -from json_utils import clean_json_string, combine_json_results, add_keys_to_json -from 多线程提问 import multi_threading, read_questions_from_file -from 通义千问long import upload_file +from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json +from flask_app.main.多线程提问 import multi_threading, read_questions_from_file +from flask_app.main.通义千问long import upload_file def merge_dictionaries_under_common_key(dicts, common_key): # 初始化一个空字典来保存合并的结果 @@ -70,7 +70,7 @@ def process_qualification(qualification_review,truncate4,knowledge_name): merged_dict = merge_dictionaries_under_common_key(res_list, '资格评审') qualify_list = [] # qualification_review_file_path = '../static/提示词/资格评审问题.txt' # 替换为你的txt文件路径 - qualification_review_file_path='static/提示词/资格评审问题.txt' + qualification_review_file_path='flask_app/static/提示词/资格评审问题.txt' qualification_review_questions = read_questions_from_file(qualification_review_file_path) # 联合体投标 results1 = multi_threading(qualification_review_questions, knowledge_name) for _, response in results1: # _占位,代表ques;response[0]也是ques;response[1]是ans diff --git a/flask_app/main/资格评审前判断.py b/flask_app/main/资格评审前判断.py index 9a220d5..48fc979 100644 --- a/flask_app/main/资格评审前判断.py +++ b/flask_app/main/资格评审前判断.py @@ -1,4 +1,4 @@ -from 按页读取pdf import extract_text_by_page +from flask_app.main.按页读取pdf import extract_text_by_page def check_strings_in_pdf(file_path): judge_list=['施工机械设备', '企业信息登记'] diff --git a/flask_app/货物标/extract_procurement_requirements.py b/flask_app/货物标/extract_procurement_requirements.py index 88a6b04..379a9fd 100644 --- a/flask_app/货物标/extract_procurement_requirements.py +++ b/flask_app/货物标/extract_procurement_requirements.py @@ -2,10 +2,10 @@ import os import sys from 货物标截取pdf import truncate_pdf_main sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) -# from ..main.format_change import docx2pdf -# from ..main.多线程提问 import multi_threading -from ..main.通义千问long import upload_file,qianwen_long -from ..main.json_utils import clean_json_string +from flask_app.main.format_change import docx2pdf +from flask_app.main.多线程提问 import multi_threading +from flask_app.main.通义千问long import upload_file,qianwen_long +from flask_app.main.json_utils import clean_json_string def generate_key_paths(data, parent_key=''): key_paths = [] @@ -22,7 +22,7 @@ def generate_key_paths(data, parent_key=''): #获取采购清单 def fetch_purchasing_list(file_path): output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output" - # file_path = docx2pdf(file_path) + file_path = docx2pdf(file_path) truncate_path=truncate_pdf_main(file_path,output_folder,1) user_query="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。" file_id=upload_file(truncate_path) @@ -32,5 +32,5 @@ def fetch_purchasing_list(file_path): print(keys_list) if __name__ == "__main__": - file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招招招标文件(一中多媒体报告厅教学设备)_20240829101650_tobidders_notice_table.pdf" + file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道).docf" fetch_purchasing_list(file_path)