8.30
This commit is contained in:
parent
76bf9e8770
commit
c396e1833f
@ -11,8 +11,8 @@ COPY requirements.txt .
|
|||||||
RUN pip config set global.progress_bar off
|
RUN pip config set global.progress_bar off
|
||||||
|
|
||||||
# 安装依赖
|
# 安装依赖
|
||||||
RUN pip install --upgrade pip --default-timeout=100 \
|
RUN pip install --upgrade pip --default-timeout=200 \
|
||||||
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
|
&& pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
|
||||||
|
|
||||||
# 将当前目录的内容复制到容器的 /ZbparseProjects 中
|
# 将当前目录的内容复制到容器的 /ZbparseProjects 中
|
||||||
COPY . .
|
COPY . .
|
||||||
|
@ -53,31 +53,31 @@ def create_logger(unique_id):
|
|||||||
return logger, output_folder
|
return logger, output_folder
|
||||||
|
|
||||||
@app.route('/upload', methods=['POST'])
|
@app.route('/upload', methods=['POST'])
|
||||||
# def zbparse():
|
|
||||||
# file_url = validate_request()
|
|
||||||
# if isinstance(file_url, tuple): # Check if the returned value is an error response
|
|
||||||
# return file_url
|
|
||||||
# try:
|
|
||||||
# app.logger.info("starting parsing url:"+file_url)
|
|
||||||
# final_json_path, output_folder,logger = download_and_process_file(file_url)
|
|
||||||
# if not final_json_path:
|
|
||||||
# return jsonify({'error': 'File processing failed'}), 500
|
|
||||||
# response = generate_response(final_json_path,logger) # 先获取响应内容
|
|
||||||
# # remove_directory(output_folder) # 然后删除文件夹
|
|
||||||
# return response # 最后返回获取的响应
|
|
||||||
# except Exception as e:
|
|
||||||
# app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
|
|
||||||
# return jsonify({'error': str(e)}), 500
|
|
||||||
def zbparse():
|
def zbparse():
|
||||||
file_url = validate_request()
|
file_url = validate_request()
|
||||||
if isinstance(file_url, tuple): # Check if the returned value is an error response
|
if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||||
return file_url
|
return file_url
|
||||||
try:
|
try:
|
||||||
app.logger.info("starting parsing url:" + file_url)
|
app.logger.info("starting parsing url:"+file_url)
|
||||||
return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
|
final_json_path, output_folder,logger = download_and_process_file(file_url)
|
||||||
|
if not final_json_path:
|
||||||
|
return jsonify({'error': 'File processing failed'}), 500
|
||||||
|
response = generate_response(final_json_path,logger) # 先获取响应内容
|
||||||
|
# remove_directory(output_folder) # 然后删除文件夹
|
||||||
|
return response # 最后返回获取的响应
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
app.logger.error('Exception occurred: ' + str(e))
|
app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
|
||||||
return jsonify({'error': str(e)}), 500
|
return jsonify({'error': str(e)}), 500
|
||||||
|
# def zbparse():
|
||||||
|
# file_url = validate_request()
|
||||||
|
# if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||||
|
# return file_url
|
||||||
|
# try:
|
||||||
|
# app.logger.info("starting parsing url:" + file_url)
|
||||||
|
# return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
|
||||||
|
# except Exception as e:
|
||||||
|
# app.logger.error('Exception occurred: ' + str(e))
|
||||||
|
# return jsonify({'error': str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
def process_and_stream(file_url):
|
def process_and_stream(file_url):
|
||||||
@ -178,19 +178,19 @@ def test_process_and_stream():
|
|||||||
yield f"data: {json.dumps(final_response)}\n\n"
|
yield f"data: {json.dumps(final_response)}\n\n"
|
||||||
|
|
||||||
|
|
||||||
# def generate_response(final_json_path,logger):
|
def generate_response(final_json_path,logger):
|
||||||
# if not os.path.exists(final_json_path):
|
if not os.path.exists(final_json_path):
|
||||||
# logger.error('JSON file not found at path: ' + final_json_path)
|
logger.error('JSON file not found at path: ' + final_json_path)
|
||||||
# return jsonify({'error': 'JSON file not found'}), 404
|
return jsonify({'error': 'JSON file not found'}), 404
|
||||||
# with open(final_json_path, 'r', encoding='utf-8') as f:
|
with open(final_json_path, 'r', encoding='utf-8') as f:
|
||||||
# logger.info('final_json_path:'+final_json_path)
|
logger.info('final_json_path:'+final_json_path)
|
||||||
# zbparse_data = json.load(f)
|
zbparse_data = json.load(f)
|
||||||
# json_str = json.dumps(zbparse_data, ensure_ascii=False)
|
json_str = json.dumps(zbparse_data, ensure_ascii=False)
|
||||||
# return jsonify({
|
return jsonify({
|
||||||
# 'message': 'File uploaded and processed successfully',
|
'message': 'File uploaded and processed successfully',
|
||||||
# 'filename': os.path.basename(final_json_path),
|
'filename': os.path.basename(final_json_path),
|
||||||
# 'data': json_str
|
'data': json_str
|
||||||
# })
|
})
|
||||||
|
|
||||||
|
|
||||||
# @app.route('/get_json', methods=['POST'])
|
# @app.route('/get_json', methods=['POST'])
|
||||||
|
File diff suppressed because one or more lines are too long
@ -25,12 +25,12 @@ def fetch_purchasing_list(file_path):
|
|||||||
file_path = docx2pdf(file_path)
|
file_path = docx2pdf(file_path)
|
||||||
truncate_path=truncate_pdf_main(file_path,output_folder,1)
|
truncate_path=truncate_pdf_main(file_path,output_folder,1)
|
||||||
user_query="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。"
|
user_query="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。"
|
||||||
file_id=upload_file(truncate_path)
|
file_id=upload_file(truncate_path[0])
|
||||||
res=qianwen_long(file_id,user_query)
|
res=qianwen_long(file_id,user_query)
|
||||||
cleaned_res=clean_json_string(res)
|
cleaned_res=clean_json_string(res)
|
||||||
keys_list=generate_key_paths(cleaned_res['采购需求'])
|
keys_list=generate_key_paths(cleaned_res['采购需求'])
|
||||||
print(keys_list)
|
print(keys_list)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道).docf"
|
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\磋商文件.doc"
|
||||||
fetch_purchasing_list(file_path)
|
fetch_purchasing_list(file_path)
|
||||||
|
@ -75,9 +75,9 @@ def process_input(input_path, output_folder, begin_pattern, begin_page, end_patt
|
|||||||
def truncate_pdf_main(input_path, output_folder, selection):
|
def truncate_pdf_main(input_path, output_folder, selection):
|
||||||
if selection == 1:
|
if selection == 1:
|
||||||
# Configure patterns and phrases for "投标人须知前附表"
|
# Configure patterns and phrases for "投标人须知前附表"
|
||||||
begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:项目|服务|商务).*?要求')
|
begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:服务|项目|商务).*?要求|第[一二三四五六七八九十百千]+章.*?采购.*')
|
||||||
begin_page = 5
|
begin_page = 5
|
||||||
end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法)')
|
end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法|评定办法)')
|
||||||
# 示例文本进行测试
|
# 示例文本进行测试
|
||||||
output_suffix = "tobidders_notice_table"
|
output_suffix = "tobidders_notice_table"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user