10.14
This commit is contained in:
parent
9a2141c3a9
commit
82de3ae202
@ -5,33 +5,48 @@ from flask_app.main.投标人须知正文提取指定内容 import extract_from_
|
|||||||
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
|
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
|
||||||
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||||
from flask_app.main.通义千问long import upload_file
|
from flask_app.main.通义千问long import upload_file
|
||||||
def combine_basic_info(baseinfo_list):
|
def aggregate_basic_info(baseinfo_list):
|
||||||
|
"""
|
||||||
|
将基础信息列表中的数据进行合并和分类。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
- baseinfo_list (list): 包含多个基础信息的列表。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
- list: 合并和分类后的基础信息列表。
|
||||||
|
"""
|
||||||
combined_baseinfo_list = []
|
combined_baseinfo_list = []
|
||||||
key_groups = {
|
key_groups = {
|
||||||
"招标人/代理信息": ["招标人","招标人联系方式", "招标代理机构","招标代理机构联系方式"],
|
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
|
||||||
"项目信息": ["工程名称", "招标编号","工程概况","招标范围","招标控制价","投标竞争下浮率"],
|
"项目信息": ["工程名称", "招标编号", "工程概况", "招标范围", "招标控制价", "投标竞争下浮率"],
|
||||||
"关键时间/内容":["投标文件递交截止日期","递交方式","投标人要求澄清招标文件的截止时间","投标有效期","评标结果公示媒介"],
|
"关键时间/内容": [
|
||||||
"保证金相关":['质量保证金','退还投标保证金'],
|
"投标文件递交截止日期",
|
||||||
"其他信息":["重新招标、不再招标和终止招标","是否退还投标文件","费用承担"]
|
"递交方式",
|
||||||
|
"投标人要求澄清招标文件的截止时间",
|
||||||
|
"投标有效期",
|
||||||
|
"评标结果公示媒介"
|
||||||
|
],
|
||||||
|
"保证金相关": ["质量保证金", "退还投标保证金"],
|
||||||
|
"其他信息": [
|
||||||
|
"重新招标、不再招标和终止招标",
|
||||||
|
"是否退还投标文件",
|
||||||
|
"费用承担"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
# 将所有基础信息合并到一个字典中
|
|
||||||
combined_data = {}
|
combined_data = {}
|
||||||
relevant_keys_detected = set()
|
relevant_keys_detected = set()
|
||||||
|
|
||||||
# 预处理以决定哪些键名将被使用
|
# 合并所有基础信息并收集相关键
|
||||||
for baseinfo in baseinfo_list:
|
for baseinfo in baseinfo_list:
|
||||||
json_data = clean_json_string(baseinfo)
|
json_data = clean_json_string(baseinfo)
|
||||||
combined_data.update(json_data)
|
combined_data.update(json_data)
|
||||||
relevant_keys_detected.update(json_data.keys())
|
relevant_keys_detected.update(json_data.keys())
|
||||||
# for key in relevant_keys.keys():
|
|
||||||
# if key in json_data:
|
|
||||||
# relevant_keys[key] = True
|
|
||||||
|
|
||||||
# 根据检测到的键动态调整 key_groups
|
# 动态调整键组
|
||||||
dynamic_key_handling(key_groups, relevant_keys_detected)
|
dynamic_key_handling(key_groups, relevant_keys_detected)
|
||||||
|
|
||||||
|
# 按键组分类并嵌套
|
||||||
# 使用合并后的字典创建最终输出
|
|
||||||
for group_name, keys in key_groups.items():
|
for group_name, keys in key_groups.items():
|
||||||
group_data = {key: combined_data.get(key, "未提供") for key in keys}
|
group_data = {key: combined_data.get(key, "未提供") for key in keys}
|
||||||
combined_json = nest_json_under_key(group_data, group_name)
|
combined_json = nest_json_under_key(group_data, group_name)
|
||||||
@ -74,69 +89,72 @@ def judge_consortium_bidding(baseinfo_list):
|
|||||||
# 更新原始列表,如果你想保留修改
|
# 更新原始列表,如果你想保留修改
|
||||||
baseinfo_list[:] = updated_list
|
baseinfo_list[:] = updated_list
|
||||||
return accept_bidding
|
return accept_bidding
|
||||||
def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #投标人须知前附表
|
def combine_basic_info(knowledge_name, truncate0, output_folder, clause_path):
|
||||||
# 调用大模型回答项目基础信息
|
"""
|
||||||
|
综合和处理基础信息,生成最终的基础信息字典。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
- knowledge_name (str): 知识名称。
|
||||||
|
- truncate0 (str): 文件路径。
|
||||||
|
- output_folder (str): 输出文件夹路径。
|
||||||
|
- clause_path (str): 条款路径。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
- dict: 综合后的基础信息。
|
||||||
|
"""
|
||||||
baseinfo_list = []
|
baseinfo_list = []
|
||||||
baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt'
|
baseinfo_file_path = 'flask_app/static/提示词/前两章提问总结.txt'
|
||||||
# baseinfo_file_path='D:\\flask_project\\flask_app\\static\\提示词\\前两章提问总结.txt'
|
|
||||||
questions = read_questions_from_file(baseinfo_file_path)
|
questions = read_questions_from_file(baseinfo_file_path)
|
||||||
res1 = multi_threading(questions, knowledge_name)
|
res1 = multi_threading(questions, knowledge_name)
|
||||||
|
|
||||||
for _, response in res1: # _占位,代表ques;response[0]也是ques;response[1]是ans
|
for index, response in res1:
|
||||||
try:
|
try:
|
||||||
if response and len(response) > 1: # 检查response存在且有至少两个元素
|
if response and len(response) > 1:
|
||||||
baseinfo_list.append(response[1])
|
baseinfo_list.append(response[1])
|
||||||
else:
|
else:
|
||||||
print(f"基础信息整合: Warning: Missing or incomplete response data for query index {_}.")
|
print(f"基础信息整合: Warning: Missing or incomplete response data for query index {index}.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"基础信息整合: Error processing response for query index {_}: {e}")
|
print(f"基础信息整合: Error processing response for query index {index}: {e}")
|
||||||
|
|
||||||
# 判断是否分包、是否需要递交投标保证金等
|
# 判断是否分包、是否需要递交投标保证金等
|
||||||
chosen_numbers, merged = judge_whether_main(truncate0,output_folder)
|
chosen_numbers, merged = judge_whether_main(truncate0, output_folder)
|
||||||
baseinfo_list.append(merged)
|
baseinfo_list.append(merged)
|
||||||
judge_file_path ='flask_app/static/提示词/是否相关问题.txt'
|
|
||||||
# judge_file_path='D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt'
|
|
||||||
|
|
||||||
|
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
|
||||||
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
||||||
|
judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标
|
||||||
|
|
||||||
judge_consortium = judge_consortium_bidding(baseinfo_list) #通过招标公告判断是否接受联合体投标
|
|
||||||
if judge_consortium:
|
if judge_consortium:
|
||||||
judge_consortium_question = "该招标文件对于联合体投标的要求是怎样的,请按json格式给我提供信息,外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"是\""
|
judge_consortium_question = (
|
||||||
|
"该招标文件对于联合体投标的要求是怎样的,请按json格式给我提供信息,"
|
||||||
|
"外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"是\""
|
||||||
|
)
|
||||||
judge_questions.append(judge_consortium_question)
|
judge_questions.append(judge_consortium_question)
|
||||||
|
|
||||||
file_id=upload_file(truncate0)
|
file_id = upload_file(truncate0)
|
||||||
res2 = multi_threading(judge_questions, "",file_id,2) #调用千问-long
|
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
|
||||||
|
|
||||||
if not res2:
|
if not res2:
|
||||||
print("基础信息整合: multi_threading errror!")
|
print("基础信息整合: multi_threading error!")
|
||||||
else:
|
else:
|
||||||
# 打印结果
|
|
||||||
for question, response in res2:
|
for question, response in res2:
|
||||||
baseinfo_list.append(response)
|
baseinfo_list.append(response)
|
||||||
# for _, response in res2: # _占位,代表ques;response[0]也是ques;response[1]是ans #调用百炼rag
|
|
||||||
# try:
|
|
||||||
# if response and len(response) > 1: # 检查response存在且有至少两个元素
|
|
||||||
# baseinfo_list.append(response[1])
|
|
||||||
# else:
|
|
||||||
# print(f"基础信息整合: Warning: Missing or incomplete response data for query index {_}.")
|
|
||||||
# except Exception as e:
|
|
||||||
# print(f"基础信息整合: Error processing response for query index {_}: {e}")
|
|
||||||
|
|
||||||
rebidding_situation = extract_from_notice(clause_path, 3) #"重新招标, 不再招标和终止招标"需从投标人须知正文提取
|
rebidding_situation = extract_from_notice(clause_path, 3) # "重新招标, 不再招标和终止招标"需从投标人须知正文提取
|
||||||
|
update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
|
||||||
update_json=rename_outer_key(rebidding_situation,"重新招标、不再招标和终止招标")
|
|
||||||
baseinfo_list.append(update_json)
|
baseinfo_list.append(update_json)
|
||||||
|
|
||||||
update_baseinfo_list=combine_basic_info(baseinfo_list) #整合基础信息核心代码
|
aggregated_baseinfo = aggregate_basic_info(baseinfo_list) # 整合基础信息核心代码
|
||||||
|
baseinfo_combined_res = combine_json_results(aggregated_baseinfo) # 返回值是字典
|
||||||
|
|
||||||
baseinfo_combined_res = combine_json_results(update_baseinfo_list) # 返回值是字典
|
return {"基础信息": baseinfo_combined_res}
|
||||||
# return nest_json_under_key(baseinfo_combined_res, "基础信息") #返回值是json字符串
|
|
||||||
return {"基础信息":baseinfo_combined_res}
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
knowledge_name = "ztb"
|
knowledge_name = "ztb"
|
||||||
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
|
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
|
||||||
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
|
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
|
||||||
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
|
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
|
||||||
res=project_basic_info(knowledge_name,truncate0,output_folder,clause_path)
|
res=combine_basic_info(knowledge_name,truncate0,output_folder,clause_path)
|
||||||
print(json.dumps(res,ensure_ascii=False,indent=4))
|
print(json.dumps(res,ensure_ascii=False,indent=4))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
# 基于知识库提问的通用模板,
|
# 基于知识库提问的通用模板,
|
||||||
# assistant_id
|
# assistant_id
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import queue
|
import queue
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import time
|
import time
|
||||||
|
import requests
|
||||||
from dashscope import Assistants, Messages, Runs, Threads
|
from dashscope import Assistants, Messages, Runs, Threads
|
||||||
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
|
||||||
from flask_app.main.通义千问long import qianwen_long, upload_file
|
from flask_app.main.通义千问long import qianwen_long, upload_file
|
||||||
@ -100,6 +102,98 @@ def rag_assistant(knowledge_name):
|
|||||||
)
|
)
|
||||||
return assistant
|
return assistant
|
||||||
|
|
||||||
|
#TODO:http格式,有bug还没修改
|
||||||
|
def create_assistant(knowledge_name):
|
||||||
|
"""
|
||||||
|
Create an assistant using DashScope API via HTTP request based on the provided knowledge name.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
knowledge_name (str): The name of the knowledge base to associate with the assistant.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Response from the API containing assistant details.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the DASHSCOPE_API_KEY environment variable is not set.
|
||||||
|
Exception: If any error occurs during the HTTP request.
|
||||||
|
"""
|
||||||
|
# Step 1: Initialize the Retriever and get the Pipeline ID
|
||||||
|
try:
|
||||||
|
retriever = DashScopeCloudRetriever(knowledge_name)
|
||||||
|
pipeline_id = str(retriever.pipeline_id)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error retrieving pipeline ID for knowledge '{knowledge_name}': {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 2: Fetch the API Key from Environment Variables
|
||||||
|
api_key = os.getenv("DASHSCOPE_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
|
||||||
|
|
||||||
|
# Step 3: Define the API Endpoint and Headers
|
||||||
|
url = 'https://dashscope.aliyuncs.com/api/v1/assistants'
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {api_key}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Step 4: Construct the Instructions
|
||||||
|
instructions = (
|
||||||
|
"请记住以下材料,他们对回答问题有帮助,请你简洁准确地给出回答,不要给出无关内容。${documents}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 5: Define the Tools
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
"type": "code_interpreter"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "rag",
|
||||||
|
"prompt_ra": {
|
||||||
|
"pipeline_id": pipeline_id,
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"query_word": {
|
||||||
|
"type": "str",
|
||||||
|
"value": "${documents}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Step 6: Construct the Payload
|
||||||
|
payload = {
|
||||||
|
"model": "qwen-max",
|
||||||
|
"name": "智能小助手", # "Smart Helper" in Chinese
|
||||||
|
"description": "智能助手,支持知识库查询和插件调用。",
|
||||||
|
"temperature": 0.3,
|
||||||
|
"instructions": instructions,
|
||||||
|
"tools": tools,
|
||||||
|
"file_ids": [], # Add file IDs if necessary
|
||||||
|
"metadata": {} # Add metadata if necessary
|
||||||
|
}
|
||||||
|
|
||||||
|
# Optional: If you have specific file_ids or metadata, you can modify the payload accordingly
|
||||||
|
# For example:
|
||||||
|
# payload["file_ids"] = ["file_id_1", "file_id_2"]
|
||||||
|
# payload["metadata"] = {"key1": "value1", "key2": "value2"}
|
||||||
|
|
||||||
|
# Step 7: Make the HTTP POST Request
|
||||||
|
try:
|
||||||
|
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
||||||
|
response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
|
||||||
|
assistant = response.json()
|
||||||
|
print("Assistant created successfully:")
|
||||||
|
print(json.dumps(assistant, indent=4, ensure_ascii=False))
|
||||||
|
return assistant
|
||||||
|
except requests.exceptions.HTTPError as http_err:
|
||||||
|
print(f"HTTP error occurred: {http_err} - Response: {response.text}")
|
||||||
|
except Exception as err:
|
||||||
|
print(f"An error occurred: {err}")
|
||||||
|
|
||||||
|
|
||||||
def pure_assistant():
|
def pure_assistant():
|
||||||
assistant = Assistants.create(
|
assistant = Assistants.create(
|
||||||
@ -119,6 +213,7 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
|
|||||||
if llm_type==1:
|
if llm_type==1:
|
||||||
print(f"rag_assistant! question:{question}")
|
print(f"rag_assistant! question:{question}")
|
||||||
assistant = rag_assistant(knowledge_name)
|
assistant = rag_assistant(knowledge_name)
|
||||||
|
# assistant=create_assistant(knowledge_name)
|
||||||
elif llm_type==2:
|
elif llm_type==2:
|
||||||
print(f"qianwen_long! question:{question}")
|
print(f"qianwen_long! question:{question}")
|
||||||
qianwen_res = qianwen_long(file_id,question)
|
qianwen_res = qianwen_long(file_id,question)
|
||||||
@ -185,24 +280,25 @@ if __name__ == "__main__":
|
|||||||
# print(f"Question: {question}")
|
# print(f"Question: {question}")
|
||||||
# print(f"Response: {response}")
|
# print(f"Response: {response}")
|
||||||
|
|
||||||
# file_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\ztb_evaluation_method.pdf"
|
file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件(1)\\6.2定版视频会议磋商文件_1-21.pdf"
|
||||||
# file_id = upload_file(file_path)
|
file_id = upload_file(file_path)
|
||||||
# questions=["根据该文档中的评标办法前附表,请你列出该文件的技术标,以json的格式返回结果","根据该文档中的评标办法前附表,请你列出该文件的商务标,以json的格式返回结果","根据该文档中的评标办法前附表,请你列出该文件的投标报价,以json的格式返回结果"]
|
questions=["该招标文件的项目名称是?项目编号(或招标编号)是?采购人(或招标人)是?采购代理机构(或招标代理机构)是?请按json格式给我提供信息,键名分别是'项目名称','项目编号','采购人','采购代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的项目概况是?项目基本情况是?请按json格式给我提供信息,键名分别为'项目概况','项目基本情况',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。"]
|
||||||
# results=multi_threading(questions,"",file_id,2) #1代表使用百炼rag 2代表使用qianwen-long
|
results=multi_threading(questions,"",file_id,2) #1代表使用百炼rag 2代表使用qianwen-long
|
||||||
# if not results:
|
|
||||||
# print("errror!")
|
|
||||||
# else:
|
|
||||||
# # 打印结果
|
|
||||||
# for question, response in results:
|
|
||||||
# print(f"Question: {question}")
|
|
||||||
# print(f"Response: {response}")
|
|
||||||
ques=["关于'资格要求',本采购文件第一章第二款要求的内容是怎样的?请按json格式给我提供信息,键名为'资格要求',而键值需要完全与原文保持一致,不要擅自总结、删减,如果存在未知信息,请在对应键值处填'未知'。"]
|
|
||||||
# ques=["该招标文件的工程名称(项目名称)是?招标编号是?招标人是?招标代理机构是?请按json格式给我提供信息,键名分别是'工程名称','招标编号','招标人','招标代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的工程概况(或项目概况)是?招标范围是?请按json格式给我提供信息,键名分别为'工程概况','招标范围',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,若存在未知信息,在对应的键值中填'未知'。"]
|
|
||||||
results = multi_threading(ques, "6.2视频会议docx")
|
|
||||||
if not results:
|
if not results:
|
||||||
print("errror!")
|
print("errror!")
|
||||||
else:
|
else:
|
||||||
# 打印结果
|
# 打印结果
|
||||||
for question, response in results:
|
for question, response in results:
|
||||||
print(f"Question: {question}")
|
print(f"Question: {question}")
|
||||||
print(f"Response: {response}")
|
print(f"Response: {response}")
|
||||||
|
|
||||||
|
# ques=["关于'资格要求',本采购文件第一章第二款要求的内容是怎样的?请按json格式给我提供信息,键名为'资格要求',而键值需要完全与原文保持一致,不要擅自总结、删减,如果存在未知信息,请在对应键值处填'未知'。"]
|
||||||
|
# # ques=["该招标文件的工程名称(项目名称)是?招标编号是?招标人是?招标代理机构是?请按json格式给我提供信息,键名分别是'工程名称','招标编号','招标人','招标代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的工程概况(或项目概况)是?招标范围是?请按json格式给我提供信息,键名分别为'工程概况','招标范围',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,若存在未知信息,在对应的键值中填'未知'。"]
|
||||||
|
# results = multi_threading(ques, "6.2视频会议docx")
|
||||||
|
# if not results:
|
||||||
|
# print("errror!")
|
||||||
|
# else:
|
||||||
|
# # 打印结果
|
||||||
|
# for question, response in results:
|
||||||
|
# print(f"Question: {question}")
|
||||||
|
# print(f"Response: {response}")
|
@ -9,11 +9,11 @@ from flask_app.main.截取pdf import truncate_pdf_multiple
|
|||||||
from flask_app.main.table_content_extraction import extract_tables_main
|
from flask_app.main.table_content_extraction import extract_tables_main
|
||||||
from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge
|
from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge
|
||||||
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
from flask_app.main.投标人须知正文条款提取成json文件 import convert_clause_to_json
|
||||||
from flask_app.main.json_utils import nest_json_under_key, transform_json_values, combine_json_results
|
from flask_app.main.json_utils import transform_json_values, combine_json_results
|
||||||
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
|
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid
|
||||||
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
from flask_app.main.基础信息整合 import project_basic_info
|
from flask_app.main.基础信息整合 import combine_basic_info
|
||||||
from flask_app.main.资格审查模块 import combine_review_standards
|
from flask_app.main.资格审查模块 import combine_review_standards
|
||||||
from flask_app.main.商务标技术标整合 import combine_evaluation_standards
|
from flask_app.main.商务标技术标整合 import combine_evaluation_standards
|
||||||
from flask_app.main.format_change import pdf2docx, docx2pdf
|
from flask_app.main.format_change import pdf2docx, docx2pdf
|
||||||
@ -96,7 +96,7 @@ def post_processing(data,includes):
|
|||||||
# 基本信息
|
# 基本信息
|
||||||
def fetch_project_basic_info(knowledge_name, truncate0, output_folder, clause_path): # 投标人须知前附表
|
def fetch_project_basic_info(knowledge_name, truncate0, output_folder, clause_path): # 投标人须知前附表
|
||||||
logger.info("starting基础信息...")
|
logger.info("starting基础信息...")
|
||||||
basic_res = project_basic_info(knowledge_name, truncate0, output_folder, clause_path)
|
basic_res = combine_basic_info(knowledge_name, truncate0, output_folder, clause_path)
|
||||||
logger.info("基础信息done")
|
logger.info("基础信息done")
|
||||||
return basic_res
|
return basic_res
|
||||||
|
|
||||||
@ -149,18 +149,14 @@ def fetch_invalid_requirements(invalid_docpath, output_folder, truncate0_jsonpat
|
|||||||
def fetch_bidding_documents_requirements(clause_path):
|
def fetch_bidding_documents_requirements(clause_path):
|
||||||
logger.info("starting投标文件要求...")
|
logger.info("starting投标文件要求...")
|
||||||
fetch_bidding_documents_requirements_json = extract_from_notice(clause_path, 1)
|
fetch_bidding_documents_requirements_json = extract_from_notice(clause_path, 1)
|
||||||
qualify_nested_res = nest_json_under_key(fetch_bidding_documents_requirements_json, "投标文件要求")
|
|
||||||
logger.info("投标文件要求done...")
|
logger.info("投标文件要求done...")
|
||||||
# return qualify_nested_res
|
|
||||||
return {"投标文件要求":fetch_bidding_documents_requirements_json}
|
return {"投标文件要求":fetch_bidding_documents_requirements_json}
|
||||||
|
|
||||||
# 开评定标流程
|
# 开评定标流程
|
||||||
def fetch_bid_opening(clause_path):
|
def fetch_bid_opening(clause_path):
|
||||||
logger.info("starting开评定标流程...")
|
logger.info("starting开评定标流程...")
|
||||||
fetch_bid_opening_json = extract_from_notice(clause_path, 2)
|
fetch_bid_opening_json = extract_from_notice(clause_path, 2)
|
||||||
qualify_nested_res = nest_json_under_key(fetch_bid_opening_json, "开评定标流程")
|
|
||||||
logger.info("开评定标流程done...")
|
logger.info("开评定标流程done...")
|
||||||
# return qualify_nested_res
|
|
||||||
return {"开评定标流程":fetch_bid_opening_json}
|
return {"开评定标流程":fetch_bid_opening_json}
|
||||||
|
|
||||||
# def main_processing(output_folder, downloaded_file_path, file_type, unique_id): # file_type=1->docx file_type=2->pdf
|
# def main_processing(output_folder, downloaded_file_path, file_type, unique_id): # file_type=1->docx file_type=2->pdf
|
||||||
|
@ -65,12 +65,41 @@ def read_tables_from_docx(file_path):
|
|||||||
# 返回符合条件的单元格内容
|
# 返回符合条件的单元格内容
|
||||||
return cell_contents
|
return cell_contents
|
||||||
|
|
||||||
|
|
||||||
|
def read_docx_by_paragraphs(file_path):
|
||||||
|
"""
|
||||||
|
按段落读取指定路径的 .docx 文件。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
file_path (str): .docx 文件的路径。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
list: 包含所有段落文本的列表。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 打开文档
|
||||||
|
doc = Document(file_path)
|
||||||
|
|
||||||
|
# 读取所有段落的文本
|
||||||
|
paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
|
||||||
|
|
||||||
|
return paragraphs
|
||||||
|
except Exception as e:
|
||||||
|
print(f"读取 .docx 文件时发生错误: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\2-招标文件.docx'
|
file_path = 'D:\\flask_project\\flask_app\\static\\output\\015d997e-c32c-49d1-a611-a2e817ace6a1\\ztbfile.docx'
|
||||||
# output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\tmp" # 前附表json文件
|
read_docx(file_path) #按行读取
|
||||||
# read_docx(file_path)
|
|
||||||
read_docx_tables(file_path)
|
# paragraphs = read_docx_by_paragraphs(file_path) #按段落读取
|
||||||
list=read_tables_from_docx(file_path)
|
#
|
||||||
for i in list:
|
# print(f"共读取到 {len(paragraphs)} 个段落。\n")
|
||||||
print(i)
|
# for idx, para in enumerate(paragraphs, 1):
|
||||||
print("--------------")
|
# print(f"段落 {idx}: {para}\n")
|
||||||
|
|
||||||
|
# read_docx_tables(file_path)
|
||||||
|
# list=read_tables_from_docx(file_path)
|
||||||
|
# for i in list:
|
||||||
|
# print(i)
|
||||||
|
# print("--------------")
|
26
flask_app/static/提示词/基本信息货物标.txt
Normal file
26
flask_app/static/提示词/基本信息货物标.txt
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
1.该招标文件的项目名称是?项目编号(或招标编号)是?采购人(或招标人)是?采购代理机构(或招标代理机构)是?请按json格式给我提供信息,键名分别是'项目名称','项目编号','采购人','采购代理机构',若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
2.该招标文件的项目概况是?项目基本情况是?请按json格式给我提供信息,键名分别为'项目概况','项目基本情况',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
3.该招标文件的最高限价(或招标控制价)是?请按json格式给我提供信息,键名为'招标控制价',若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
4.投标文件(或响应文件)递交截止时间是?递交地点(或方式)是?请按json格式给我提供信息,键名分别是'投标文件递交截止日期','递交地点'(或'递交方式'),若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
5.采购人(招标人)和采购代理机构(或招标代理机构)的联系方式是?请按json格式给我提供信息,键名分别是'采购人联系方式','采购代理机构联系方式',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
6.该招标文件的信息公示媒介在哪?请按json格式给我提供信息,键名是'信息公示媒介',若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
7.该招标文件的投标竞争下浮率是多少?请按json格式给我提供信息,键名是'投标竞争下浮率',若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
8.该项目的投标有效期(或响应文件有效期)是什么?请按json格式给我提供信息,键名是'投标有效期',若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
9.该招标文件对投标人准备和参加投标活动发生的费用是如何规定的?请以json的格式给我提供信息,键名是'费用承担',若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
10.求澄清的招标文件截止时间是?请以json的格式给我提供信息,键名是'投标人要求澄清招标文件的截止时间',若存在未知信息,在对应的键值中填'未知'。
|
||||||
|
|
||||||
|
11.该文档要求扣留的质量保证金百分比是多少,请以json格式给我提供信息,键名为'质量保证金',如果没有则以'未知'填充。
|
||||||
|
|
||||||
|
12.该项目是否接受联合体投标?请按json格式给我提供信息,键名为'是否接受联合体投标','是否接受联合体投标'的键值仅限于'是'、'否'、'未知'。
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,4 +1,89 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results
|
||||||
|
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||||||
|
from flask_app.main.通义千问long import upload_file
|
||||||
|
|
||||||
|
|
||||||
|
def dynamic_key_handling(key_groups, detected_keys):
|
||||||
|
# 检查和调整键组配置
|
||||||
|
for key in detected_keys:
|
||||||
|
if "投标保证金" in key or "履约保证金" in key:
|
||||||
|
key_groups["保证金相关"].append(key)
|
||||||
|
elif "是否接受联合体" in key:
|
||||||
|
key_groups["项目信息"].append(key)
|
||||||
|
elif "联合体投标要求" in key:
|
||||||
|
key_groups["项目信息"].append(key)
|
||||||
|
elif "分包" in key:
|
||||||
|
key_groups["项目信息"].append(key)
|
||||||
|
elif "踏勘现场" in key:
|
||||||
|
key_groups["其他信息"].append(key)
|
||||||
|
elif "投标预备会" in key:
|
||||||
|
key_groups["其他信息"].append(key)
|
||||||
|
elif "偏离" in key:
|
||||||
|
key_groups["其他信息"].append(key)
|
||||||
|
def aggregate_basic_info(baseinfo_list):
|
||||||
|
"""
|
||||||
|
将基础信息列表中的数据进行合并和分类。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
- baseinfo_list (list): 包含多个基础信息的列表。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
- list: 合并和分类后的基础信息列表。
|
||||||
|
"""
|
||||||
|
combined_baseinfo_list = []
|
||||||
|
key_groups = {
|
||||||
|
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
|
||||||
|
"项目信息": ["工程名称", "招标编号", "工程概况", "招标范围", "招标控制价", "投标竞争下浮率"],
|
||||||
|
"关键时间/内容": [
|
||||||
|
"投标文件递交截止日期",
|
||||||
|
"递交方式",
|
||||||
|
"投标人要求澄清招标文件的截止时间",
|
||||||
|
"投标有效期",
|
||||||
|
"评标结果公示媒介"
|
||||||
|
],
|
||||||
|
"保证金相关": ["质量保证金", "退还投标保证金"],
|
||||||
|
"其他信息": [
|
||||||
|
"重新招标、不再招标和终止招标",
|
||||||
|
"是否退还投标文件",
|
||||||
|
"费用承担"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
combined_data = {}
|
||||||
|
relevant_keys_detected = set()
|
||||||
|
|
||||||
|
# 合并所有基础信息并收集相关键
|
||||||
|
for baseinfo in baseinfo_list:
|
||||||
|
json_data = clean_json_string(baseinfo)
|
||||||
|
combined_data.update(json_data)
|
||||||
|
relevant_keys_detected.update(json_data.keys())
|
||||||
|
|
||||||
|
# 动态调整键组
|
||||||
|
dynamic_key_handling(key_groups, relevant_keys_detected)
|
||||||
|
|
||||||
|
# 按键组分类并嵌套
|
||||||
|
for group_name, keys in key_groups.items():
|
||||||
|
group_data = {key: combined_data.get(key, "未提供") for key in keys}
|
||||||
|
combined_json = nest_json_under_key(group_data, group_name)
|
||||||
|
combined_baseinfo_list.append(combined_json)
|
||||||
|
|
||||||
|
return combined_baseinfo_list
|
||||||
|
|
||||||
def combine_basic_info(knowledge_name,output_folder,clause_path):
|
def combine_basic_info(knowledge_name,output_folder,clause_path):
|
||||||
|
# file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件(1)\\6.2定版视频会议磋商文件_1-21.pdf"
|
||||||
|
# file_id = upload_file(file_path)
|
||||||
|
# baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt'
|
||||||
|
# questions=read_questions_from_file(baseinfo_file_path)
|
||||||
|
# results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
|
||||||
|
# if not results:
|
||||||
|
# print("errror!")
|
||||||
|
# else:
|
||||||
|
# # 打印结果
|
||||||
|
# for question, response in results:
|
||||||
|
# print(f"Question: {question}")
|
||||||
|
# print(f"Response: {response}")
|
||||||
baseinfo_combined_res={
|
baseinfo_combined_res={
|
||||||
"招标人/代理信息": {
|
"招标人/代理信息": {
|
||||||
"招标人": "黄石临空建设管理有限公司",
|
"招标人": "黄石临空建设管理有限公司",
|
||||||
@ -27,4 +112,12 @@ def combine_basic_info(knowledge_name,output_folder,clause_path):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {"基础信息":baseinfo_combined_res}
|
return {"基础信息":baseinfo_combined_res}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
knowledge_name = "ztb"
|
||||||
|
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
|
||||||
|
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
|
||||||
|
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
|
||||||
|
res=combine_basic_info(knowledge_name,output_folder,clause_path)
|
||||||
|
print(json.dumps(res,ensure_ascii=False,indent=4))
|
@ -57,12 +57,12 @@ def extract_text_with_keywords(doc_path, keywords, follow_up_keywords):
|
|||||||
extracted_paragraphs[active_key] = [text]
|
extracted_paragraphs[active_key] = [text]
|
||||||
if match_keywords(text, follow_up_keywords):
|
if match_keywords(text, follow_up_keywords):
|
||||||
continue_collecting = True
|
continue_collecting = True
|
||||||
section_number = re.match(r'(\d+(\s*\.\s*\d+)*)', text)
|
section_number = re.match(r'^(\d+(\s*\.\s*\d+)*)\s*、', text) # 修改后的正则,支持 '数字 、' 格式
|
||||||
if section_number:
|
if section_number:
|
||||||
current_section_number = section_number.group(1)
|
current_section_number = section_number.group(1)
|
||||||
level_count = current_section_number.count('.')
|
level_count = current_section_number.count('.')
|
||||||
|
|
||||||
# Pattern to match current level, e.g., 3.4.5
|
# Pattern to match current level, e.g., 3.4.5 或者 3
|
||||||
pattern = r'^' + (r'\d+\s*\.\s*') * level_count + r'\d+'
|
pattern = r'^' + (r'\d+\s*\.\s*') * level_count + r'\d+'
|
||||||
|
|
||||||
# Generate patterns for next section at same level and parent level
|
# Generate patterns for next section at same level and parent level
|
||||||
@ -71,20 +71,23 @@ def extract_text_with_keywords(doc_path, keywords, follow_up_keywords):
|
|||||||
|
|
||||||
# Next section at same level
|
# Next section at same level
|
||||||
parts[-1] = str(int(parts[-1]) + 1)
|
parts[-1] = str(int(parts[-1]) + 1)
|
||||||
next_pattern = r'^' + r'\s*\.\s*'.join(parts)
|
next_pattern = r'^' + r'\.\s*'.join(parts)
|
||||||
matched_patterns.append(next_pattern)
|
matched_patterns.append(next_pattern)
|
||||||
|
|
||||||
# Parent section (if applicable)
|
# Parent section (if applicable)
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
parent_section_parts = parts[:-1]
|
parent_section_parts = parts[:-1]
|
||||||
parent_section_parts[-1] = str(int(parent_section_parts[-1]) + 1)
|
parent_section_parts[-1] = str(int(parent_section_parts[-1]) + 1)
|
||||||
parent_pattern = r'^' + r'\s*\.\s*'.join(parent_section_parts)
|
parent_pattern = r'^' + r'\.\s*'.join(parent_section_parts)
|
||||||
matched_patterns.append(parent_pattern)
|
matched_patterns.append(parent_pattern)
|
||||||
|
|
||||||
|
# 添加对 '数字 、' 格式的支持
|
||||||
|
digit_comma_pattern = r'^\d+\s*、'
|
||||||
|
matched_patterns.append(digit_comma_pattern)
|
||||||
|
|
||||||
# Combine the patterns
|
# Combine the patterns
|
||||||
combined_pattern = r'(' + r')|('.join(matched_patterns) + r')'
|
combined_pattern = r'(' + r')|('.join(matched_patterns) + r')'
|
||||||
current_section_pattern = re.compile(combined_pattern)
|
current_section_pattern = re.compile(combined_pattern)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
found_next_number = False
|
found_next_number = False
|
||||||
current_section_pattern = None
|
current_section_pattern = None
|
||||||
@ -93,7 +96,8 @@ def extract_text_with_keywords(doc_path, keywords, follow_up_keywords):
|
|||||||
current_index += 1
|
current_index += 1
|
||||||
next_text = doc.paragraphs[current_index].text.strip()
|
next_text = doc.paragraphs[current_index].text.strip()
|
||||||
if not found_next_number:
|
if not found_next_number:
|
||||||
next_section_number = re.match(r'^([A-Za-z0-9]+(?:\.[A-Za-z0-9]+)*)|(\(\d+\))', next_text)
|
# 修改后的正则,支持 '数字 、' 格式
|
||||||
|
next_section_number = re.match(r'^([A-Za-z0-9]+(?:\.[A-Za-z0-9]+)*)|(\(\d+\))|(\d+\s*、)', next_text)
|
||||||
if next_section_number:
|
if next_section_number:
|
||||||
found_next_number = True
|
found_next_number = True
|
||||||
if next_section_number.group(1):
|
if next_section_number.group(1):
|
||||||
@ -101,12 +105,14 @@ def extract_text_with_keywords(doc_path, keywords, follow_up_keywords):
|
|||||||
dynamic_pattern = r'^' + r'\.'.join([r'[A-Za-z0-9]+' for _ in section_parts]) + r'\b'
|
dynamic_pattern = r'^' + r'\.'.join([r'[A-Za-z0-9]+' for _ in section_parts]) + r'\b'
|
||||||
elif next_section_number.group(2):
|
elif next_section_number.group(2):
|
||||||
dynamic_pattern = r'^[\(\(]\d+[\)\)]'
|
dynamic_pattern = r'^[\(\(]\d+[\)\)]'
|
||||||
|
elif next_section_number.group(3):
|
||||||
|
dynamic_pattern = r'^\d+\s*、'
|
||||||
current_section_pattern = re.compile(dynamic_pattern)
|
current_section_pattern = re.compile(dynamic_pattern)
|
||||||
if current_section_pattern and re.match(current_section_pattern, next_text):
|
if current_section_pattern and re.match(current_section_pattern, next_text):
|
||||||
extracted_paragraphs[active_key].append(next_text)
|
extracted_paragraphs[active_key].append(next_text)
|
||||||
else:
|
else:
|
||||||
continue_collecting = False
|
continue_collecting = False
|
||||||
active_key=None
|
active_key = None
|
||||||
break
|
break
|
||||||
|
|
||||||
return current_index
|
return current_index
|
||||||
@ -118,6 +124,8 @@ def extract_text_with_keywords(doc_path, keywords, follow_up_keywords):
|
|||||||
|
|
||||||
return extracted_paragraphs
|
return extracted_paragraphs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
eg:
|
eg:
|
||||||
text_list = ["这是第一句。 1. 接下来是第二句! (3) 最后一句。"]
|
text_list = ["这是第一句。 1. 接下来是第二句! (3) 最后一句。"]
|
||||||
@ -143,6 +151,7 @@ def clean_dict_datas(extracted_contents, keywords,excludes): #让正则表达
|
|||||||
for key, text_list in extracted_contents.items():
|
for key, text_list in extracted_contents.items():
|
||||||
if len(text_list) == 1:
|
if len(text_list) == 1:
|
||||||
for data in text_list:
|
for data in text_list:
|
||||||
|
# print(data)
|
||||||
# 检查是否包含任何需要排除的字符串
|
# 检查是否包含任何需要排除的字符串
|
||||||
if any(exclude in data for exclude in excludes):
|
if any(exclude in data for exclude in excludes):
|
||||||
continue # 如果包含任何排除字符串,跳过这个数据
|
continue # 如果包含任何排除字符串,跳过这个数据
|
||||||
|
@ -464,9 +464,10 @@ def truncate_pdf_multiple(input_path, output_folder):
|
|||||||
|
|
||||||
# TODO:交通智能系统和招标(1)(1)文件有问题 sele=4的时候excludsion有问题
|
# TODO:交通智能系统和招标(1)(1)文件有问题 sele=4的时候excludsion有问题
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
|
input_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b4601ea1-f087-4fa2-88ae-336ad4d8e1e9\\ztbfile.pdf"
|
||||||
output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\outputtest"
|
output_folder = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b4601ea1-f087-4fa2-88ae-336ad4d8e1e9"
|
||||||
# files=truncate_pdf_multiple(input_path,output_folder)
|
files=truncate_pdf_multiple(input_path,output_folder)
|
||||||
# print(files)
|
print(files)
|
||||||
selection = 1 # 例如:1 - 商务技术服务要求, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2
|
|
||||||
generated_files = truncate_pdf_main(input_path, output_folder, selection)
|
# selection = 1 # 例如:1 - 商务技术服务要求, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2
|
||||||
|
# generated_files = truncate_pdf_main(input_path, output_folder, selection)
|
||||||
|
@ -36,6 +36,9 @@ def preprocess_files(output_folder, file_path, file_type, unique_id):
|
|||||||
elif file_type == 2: # pdf
|
elif file_type == 2: # pdf
|
||||||
pdf_path = file_path
|
pdf_path = file_path
|
||||||
docx_path = pdf2docx(pdf_path) # 将pdf转换为docx以供上传到知识库
|
docx_path = pdf2docx(pdf_path) # 将pdf转换为docx以供上传到知识库
|
||||||
|
elif file_type ==3: #doc
|
||||||
|
pdf_path=docx2pdf(file_path)
|
||||||
|
docx_path=pdf2docx(pdf_path)
|
||||||
else:
|
else:
|
||||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||||
return None
|
return None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user