From d161965f5d21b631a52f48e707808a77eb919014 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Tue, 3 Dec 2024 09:07:14 +0800 Subject: [PATCH] =?UTF-8?q?12.2=20=E6=B8=85=E7=90=86bug=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/file2markdown.py | 3 ++- .../文档理解main.py | 22 ++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/flask_app/general/file2markdown.py b/flask_app/general/file2markdown.py index ba858d8..376749a 100644 --- a/flask_app/general/file2markdown.py +++ b/flask_app/general/file2markdown.py @@ -72,6 +72,7 @@ def convert_pdf_to_markdown(file_path): if __name__ == "__main__": - file_path=r"C:\Users\Administrator\Desktop\fsdownload\e702f1e6-095d-443d-bb7d-ef2e42037cb1\ztbfile_procurement.pdf" + # file_path=r"C:\Users\Administrator\Desktop\fsdownload\e702f1e6-095d-443d-bb7d-ef2e42037cb1\ztbfile_procurement.pdf" + file_path=r"C:\Users\Administrator\Desktop\货物标\output1\招标文件(实高电子显示屏)_procurement.pdf" res=convert_pdf_to_markdown(file_path) print(res) \ No newline at end of file diff --git a/flask_app/old_version/文档理解大模型版知识库处理/文档理解main.py b/flask_app/old_version/文档理解大模型版知识库处理/文档理解main.py index 7a1b8f0..312e976 100644 --- a/flask_app/old_version/文档理解大模型版知识库处理/文档理解main.py +++ b/flask_app/old_version/文档理解大模型版知识库处理/文档理解main.py @@ -111,9 +111,25 @@ def main(): if status_info.status.lower() == 'success': print("Job completed successfully.") # Step 3: Retrieve the parsing result - result = docmind_client.get_result(job_id) - print("Parsing Result:") - print(result) + try: + result = docmind_client.get_result(job_id) + except Exception as e: + print(f"获取结果失败: {e}") + return + + # 提取并连接每个布局的 'markdownContent' + try: + layouts = result.get('layouts', []) + markdown_contents = [layout.get('markdownContent', '') for layout in layouts] + concatenated_markdown = '\n'.join(markdown_contents) + + # 将连接后的 markdown 写入 'extract.txt' + with open('extract.txt', 'w', encoding='utf-8') as extract_file: + extract_file.write(concatenated_markdown) + + print("Markdown 内容已成功提取到 'extract.txt'。") + except Exception as e: + print(f"处理并写入 Markdown 内容失败: {e}") else: print("Job failed. Please check the error logs for more details.")