This commit is contained in:
zy123 2024-08-30 09:53:04 +08:00
parent 76bf9e8770
commit c396e1833f
5 changed files with 120 additions and 120 deletions

View File

@ -11,8 +11,8 @@ COPY requirements.txt .
RUN pip config set global.progress_bar off
# 安装依赖
RUN pip install --upgrade pip --default-timeout=100 \
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
RUN pip install --upgrade pip --default-timeout=200 \
&& pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
# 将当前目录的内容复制到容器的 /ZbparseProjects 中
COPY . .

View File

@ -53,31 +53,31 @@ def create_logger(unique_id):
return logger, output_folder
@app.route('/upload', methods=['POST'])
# def zbparse():
# file_url = validate_request()
# if isinstance(file_url, tuple): # Check if the returned value is an error response
# return file_url
# try:
# app.logger.info("starting parsing url:"+file_url)
# final_json_path, output_folder,logger = download_and_process_file(file_url)
# if not final_json_path:
# return jsonify({'error': 'File processing failed'}), 500
# response = generate_response(final_json_path,logger) # 先获取响应内容
# # remove_directory(output_folder) # 然后删除文件夹
# return response # 最后返回获取的响应
# except Exception as e:
# app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
# return jsonify({'error': str(e)}), 500
def zbparse():
file_url = validate_request()
if isinstance(file_url, tuple): # Check if the returned value is an error response
return file_url
try:
app.logger.info("starting parsing url:"+file_url)
return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
final_json_path, output_folder,logger = download_and_process_file(file_url)
if not final_json_path:
return jsonify({'error': 'File processing failed'}), 500
response = generate_response(final_json_path,logger) # 先获取响应内容
# remove_directory(output_folder) # 然后删除文件夹
return response # 最后返回获取的响应
except Exception as e:
app.logger.error('Exception occurred: ' + str(e))
app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
return jsonify({'error': str(e)}), 500
# def zbparse():
# file_url = validate_request()
# if isinstance(file_url, tuple): # Check if the returned value is an error response
# return file_url
# try:
# app.logger.info("starting parsing url:" + file_url)
# return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
# except Exception as e:
# app.logger.error('Exception occurred: ' + str(e))
# return jsonify({'error': str(e)}), 500
def process_and_stream(file_url):
@ -178,19 +178,19 @@ def test_process_and_stream():
yield f"data: {json.dumps(final_response)}\n\n"
# def generate_response(final_json_path,logger):
# if not os.path.exists(final_json_path):
# logger.error('JSON file not found at path: ' + final_json_path)
# return jsonify({'error': 'JSON file not found'}), 404
# with open(final_json_path, 'r', encoding='utf-8') as f:
# logger.info('final_json_path:'+final_json_path)
# zbparse_data = json.load(f)
# json_str = json.dumps(zbparse_data, ensure_ascii=False)
# return jsonify({
# 'message': 'File uploaded and processed successfully',
# 'filename': os.path.basename(final_json_path),
# 'data': json_str
# })
def generate_response(final_json_path,logger):
if not os.path.exists(final_json_path):
logger.error('JSON file not found at path: ' + final_json_path)
return jsonify({'error': 'JSON file not found'}), 404
with open(final_json_path, 'r', encoding='utf-8') as f:
logger.info('final_json_path:'+final_json_path)
zbparse_data = json.load(f)
json_str = json.dumps(zbparse_data, ensure_ascii=False)
return jsonify({
'message': 'File uploaded and processed successfully',
'filename': os.path.basename(final_json_path),
'data': json_str
})
# @app.route('/get_json', methods=['POST'])

File diff suppressed because one or more lines are too long

View File

@ -25,12 +25,12 @@ def fetch_purchasing_list(file_path):
file_path = docx2pdf(file_path)
truncate_path=truncate_pdf_main(file_path,output_folder,1)
user_query="这是一份货物标中采购要求部分的内容你需要摘取出需要采购的系统货物一个大系统大项中可能包含多个小系统小项你需要保留这种层次关系给出货物名称请以json格式返回外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\""
file_id=upload_file(truncate_path)
file_id=upload_file(truncate_path[0])
res=qianwen_long(file_id,user_query)
cleaned_res=clean_json_string(res)
keys_list=generate_key_paths(cleaned_res['采购需求'])
print(keys_list)
if __name__ == "__main__":
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道).docf"
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\磋商文件.doc"
fetch_purchasing_list(file_path)

View File

@ -75,9 +75,9 @@ def process_input(input_path, output_folder, begin_pattern, begin_page, end_patt
def truncate_pdf_main(input_path, output_folder, selection):
if selection == 1:
# Configure patterns and phrases for "投标人须知前附表"
begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:项目|服务|商务).*?要求')
begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:服务|项目|商务).*?要求|第[一二三四五六七八九十百千]+章.*?采购.*')
begin_page = 5
end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法)')
end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法|评定办法)')
# 示例文本进行测试
output_suffix = "tobidders_notice_table"