8.30
This commit is contained in:
parent
76bf9e8770
commit
c396e1833f
@ -11,8 +11,8 @@ COPY requirements.txt .
|
||||
RUN pip config set global.progress_bar off
|
||||
|
||||
# 安装依赖
|
||||
RUN pip install --upgrade pip --default-timeout=100 \
|
||||
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
|
||||
RUN pip install --upgrade pip --default-timeout=200 \
|
||||
&& pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
|
||||
|
||||
# 将当前目录的内容复制到容器的 /ZbparseProjects 中
|
||||
COPY . .
|
||||
|
@ -53,31 +53,31 @@ def create_logger(unique_id):
|
||||
return logger, output_folder
|
||||
|
||||
@app.route('/upload', methods=['POST'])
|
||||
# def zbparse():
|
||||
# file_url = validate_request()
|
||||
# if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||
# return file_url
|
||||
# try:
|
||||
# app.logger.info("starting parsing url:"+file_url)
|
||||
# final_json_path, output_folder,logger = download_and_process_file(file_url)
|
||||
# if not final_json_path:
|
||||
# return jsonify({'error': 'File processing failed'}), 500
|
||||
# response = generate_response(final_json_path,logger) # 先获取响应内容
|
||||
# # remove_directory(output_folder) # 然后删除文件夹
|
||||
# return response # 最后返回获取的响应
|
||||
# except Exception as e:
|
||||
# app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
|
||||
# return jsonify({'error': str(e)}), 500
|
||||
def zbparse():
|
||||
file_url = validate_request()
|
||||
if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||
return file_url
|
||||
try:
|
||||
app.logger.info("starting parsing url:" + file_url)
|
||||
return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
|
||||
app.logger.info("starting parsing url:"+file_url)
|
||||
final_json_path, output_folder,logger = download_and_process_file(file_url)
|
||||
if not final_json_path:
|
||||
return jsonify({'error': 'File processing failed'}), 500
|
||||
response = generate_response(final_json_path,logger) # 先获取响应内容
|
||||
# remove_directory(output_folder) # 然后删除文件夹
|
||||
return response # 最后返回获取的响应
|
||||
except Exception as e:
|
||||
app.logger.error('Exception occurred: ' + str(e))
|
||||
app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
|
||||
return jsonify({'error': str(e)}), 500
|
||||
# def zbparse():
|
||||
# file_url = validate_request()
|
||||
# if isinstance(file_url, tuple): # Check if the returned value is an error response
|
||||
# return file_url
|
||||
# try:
|
||||
# app.logger.info("starting parsing url:" + file_url)
|
||||
# return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
|
||||
# except Exception as e:
|
||||
# app.logger.error('Exception occurred: ' + str(e))
|
||||
# return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
def process_and_stream(file_url):
|
||||
@ -178,19 +178,19 @@ def test_process_and_stream():
|
||||
yield f"data: {json.dumps(final_response)}\n\n"
|
||||
|
||||
|
||||
# def generate_response(final_json_path,logger):
|
||||
# if not os.path.exists(final_json_path):
|
||||
# logger.error('JSON file not found at path: ' + final_json_path)
|
||||
# return jsonify({'error': 'JSON file not found'}), 404
|
||||
# with open(final_json_path, 'r', encoding='utf-8') as f:
|
||||
# logger.info('final_json_path:'+final_json_path)
|
||||
# zbparse_data = json.load(f)
|
||||
# json_str = json.dumps(zbparse_data, ensure_ascii=False)
|
||||
# return jsonify({
|
||||
# 'message': 'File uploaded and processed successfully',
|
||||
# 'filename': os.path.basename(final_json_path),
|
||||
# 'data': json_str
|
||||
# })
|
||||
def generate_response(final_json_path,logger):
|
||||
if not os.path.exists(final_json_path):
|
||||
logger.error('JSON file not found at path: ' + final_json_path)
|
||||
return jsonify({'error': 'JSON file not found'}), 404
|
||||
with open(final_json_path, 'r', encoding='utf-8') as f:
|
||||
logger.info('final_json_path:'+final_json_path)
|
||||
zbparse_data = json.load(f)
|
||||
json_str = json.dumps(zbparse_data, ensure_ascii=False)
|
||||
return jsonify({
|
||||
'message': 'File uploaded and processed successfully',
|
||||
'filename': os.path.basename(final_json_path),
|
||||
'data': json_str
|
||||
})
|
||||
|
||||
|
||||
# @app.route('/get_json', methods=['POST'])
|
||||
|
File diff suppressed because one or more lines are too long
@ -25,12 +25,12 @@ def fetch_purchasing_list(file_path):
|
||||
file_path = docx2pdf(file_path)
|
||||
truncate_path=truncate_pdf_main(file_path,output_folder,1)
|
||||
user_query="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。"
|
||||
file_id=upload_file(truncate_path)
|
||||
file_id=upload_file(truncate_path[0])
|
||||
res=qianwen_long(file_id,user_query)
|
||||
cleaned_res=clean_json_string(res)
|
||||
keys_list=generate_key_paths(cleaned_res['采购需求'])
|
||||
print(keys_list)
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道).docf"
|
||||
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\磋商文件.doc"
|
||||
fetch_purchasing_list(file_path)
|
||||
|
@ -75,9 +75,9 @@ def process_input(input_path, output_folder, begin_pattern, begin_page, end_patt
|
||||
def truncate_pdf_main(input_path, output_folder, selection):
|
||||
if selection == 1:
|
||||
# Configure patterns and phrases for "投标人须知前附表"
|
||||
begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:项目|服务|商务).*?要求')
|
||||
begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:服务|项目|商务).*?要求|第[一二三四五六七八九十百千]+章.*?采购.*')
|
||||
begin_page = 5
|
||||
end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法)')
|
||||
end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法|评定办法)')
|
||||
# 示例文本进行测试
|
||||
output_suffix = "tobidders_notice_table"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user