8.30

2024-08-30 09:53:04 +08:00 · 2024-08-30 09:53:04 +08:00 · c396e1833f
commit c396e1833f
parent 76bf9e8770
5 changed files with 120 additions and 120 deletions
--- a/4
+++ b/4
@ -11,8 +11,8 @@ COPY requirements.txt .
 RUN pip config set global.progress_bar off

 # 安装依赖
-RUN pip install --upgrade pip --default-timeout=100 \
-    && pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
+RUN pip install --upgrade pip --default-timeout=200 \
+    && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt

 # 将当前目录的内容复制到容器的 /ZbparseProjects 中
 COPY . .
--- a/flask_app/main/start_up.py
+++ b/flask_app/main/start_up.py
@ -53,31 +53,31 @@ def create_logger(unique_id):
    return logger, output_folder

@app.route('/upload', methods=['POST'])
-# def zbparse():
-#     file_url = validate_request()
-#     if isinstance(file_url, tuple):  # Check if the returned value is an error response
-#         return file_url
-#     try:
-#         app.logger.info("starting parsing url:"+file_url)
-#         final_json_path, output_folder,logger = download_and_process_file(file_url)
-#         if not final_json_path:
-#             return jsonify({'error': 'File processing failed'}), 500
-#         response = generate_response(final_json_path,logger)  # 先获取响应内容
-#         # remove_directory(output_folder)  # 然后删除文件夹
-#         return response  # 最后返回获取的响应
-#     except Exception as e:
-#         app.logger.error('Exception occurred: ' + str(e))  # 使用全局 logger 记录
-#         return jsonify({'error': str(e)}), 500
 def zbparse():
    file_url = validate_request()
    if isinstance(file_url, tuple):  # Check if the returned value is an error response
        return file_url
    try:
-        app.logger.info("starting parsing url:" + file_url)
-        return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
+        app.logger.info("starting parsing url:"+file_url)
+        final_json_path, output_folder,logger = download_and_process_file(file_url)
+        if not final_json_path:
+            return jsonify({'error': 'File processing failed'}), 500
+        response = generate_response(final_json_path,logger)  # 先获取响应内容
+        # remove_directory(output_folder)  # 然后删除文件夹
+        return response  # 最后返回获取的响应
    except Exception as e:
-        app.logger.error('Exception occurred: ' + str(e))
+        app.logger.error('Exception occurred: ' + str(e))  # 使用全局 logger 记录
        return jsonify({'error': str(e)}), 500
+# def zbparse():
+#     file_url = validate_request()
+#     if isinstance(file_url, tuple):  # Check if the returned value is an error response
+#         return file_url
+#     try:
+#         app.logger.info("starting parsing url:" + file_url)
+#         return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
+#     except Exception as e:
+#         app.logger.error('Exception occurred: ' + str(e))
+#         return jsonify({'error': str(e)}), 500


 def process_and_stream(file_url):
@ -178,19 +178,19 @@ def test_process_and_stream():
    yield f"data: {json.dumps(final_response)}\n\n"


-# def generate_response(final_json_path,logger):
-#     if not os.path.exists(final_json_path):
-#         logger.error('JSON file not found at path: ' + final_json_path)
-#         return jsonify({'error': 'JSON file not found'}), 404
-#     with open(final_json_path, 'r', encoding='utf-8') as f:
-#         logger.info('final_json_path:'+final_json_path)
-#         zbparse_data = json.load(f)
-#         json_str = json.dumps(zbparse_data, ensure_ascii=False)
-#     return jsonify({
-#         'message': 'File uploaded and processed successfully',
-#         'filename': os.path.basename(final_json_path),
-#         'data': json_str
-#     })
+def generate_response(final_json_path,logger):
+    if not os.path.exists(final_json_path):
+        logger.error('JSON file not found at path: ' + final_json_path)
+        return jsonify({'error': 'JSON file not found'}), 404
+    with open(final_json_path, 'r', encoding='utf-8') as f:
+        logger.info('final_json_path:'+final_json_path)
+        zbparse_data = json.load(f)
+        json_str = json.dumps(zbparse_data, ensure_ascii=False)
+    return jsonify({
+        'message': 'File uploaded and processed successfully',
+        'filename': os.path.basename(final_json_path),
+        'data': json_str
+    })


 # @app.route('/get_json', methods=['POST'])
--- a/flask_app/main/招标文件解析.py
+++ b/flask_app/main/招标文件解析.py
--- a/flask_app/货物标/extract_procurement_requirements.py
+++ b/flask_app/货物标/extract_procurement_requirements.py
@ -25,12 +25,12 @@ def fetch_purchasing_list(file_path):
    file_path = docx2pdf(file_path)
    truncate_path=truncate_pdf_main(file_path,output_folder,1)
    user_query="这是一份货物标中采购要求部分的内容，你需要摘取出需要采购的系统（货物），一个大系统（大项）中可能包含多个小系统（小项），你需要保留这种层次关系，给出货物名称，请以json格式返回，外层键名为\"采购需求\"，嵌套键名为对应的系统名称或货物名称，无需给出采购数量和单位，如有未知内容，在对应键值处填\"未知\"。"
-    file_id=upload_file(truncate_path)
+    file_id=upload_file(truncate_path[0])
    res=qianwen_long(file_id,user_query)
    cleaned_res=clean_json_string(res)
    keys_list=generate_key_paths(cleaned_res['采购需求'])
    print(keys_list)

 if __name__ == "__main__":
-    file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道).docf"
+    file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\磋商文件.doc"
    fetch_purchasing_list(file_path)
--- a/flask_app/货物标/货物标截取pdf.py
+++ b/flask_app/货物标/货物标截取pdf.py
@ -75,9 +75,9 @@ def process_input(input_path, output_folder, begin_pattern, begin_page, end_patt
 def truncate_pdf_main(input_path, output_folder, selection):
    if selection == 1:
        # Configure patterns and phrases for "投标人须知前附表"
-        begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:项目|服务|商务).*?要求')
+        begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:服务|项目|商务).*?要求|第[一二三四五六七八九十百千]+章.*?采购.*')
        begin_page = 5
-        end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法)')
+        end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法|评定办法)')
        # 示例文本进行测试
        output_suffix = "tobidders_notice_table"