import time from typing import Any, Dict from alibabacloud_credentials.client import Client as CredClient from alibabacloud_docmind_api20220711.client import Client as DocMindClient20220711 from alibabacloud_docmind_api20220711 import models as docmind_models from alibabacloud_tea_openapi import models as open_api_models from alibabacloud_tea_util.client import Client as UtilClient from alibabacloud_tea_util import models as util_models class DocMindClient: def __init__(self, endpoint: str = 'docmind-api.cn-hangzhou.aliyuncs.com'): # Initialize credentials cred_client = CredClient() credential = cred_client.get_credential() # Configure OpenAPI config = open_api_models.Config( access_key_id=credential.access_key_id, access_key_secret=credential.access_key_secret, ) config.endpoint = endpoint # Initialize DocMind API client self.client = DocMindClient20220711(config) def submit_job(self, file_path: str, file_name: str) -> str: """ Submits a document parsing job. :param file_path: Path to the local file to be uploaded. :param file_name: Name of the file, including the extension. :return: The ID of the submitted job. """ try: with open(file_path, "rb") as file_stream: request = docmind_models.SubmitDocParserJobAdvanceRequest( file_url_object=file_stream, file_name=file_name ) runtime = util_models.RuntimeOptions() response = self.client.submit_doc_parser_job_advance(request, runtime) job_id = response.body.data.id print(f"Job submitted successfully. Job ID: {job_id}") return job_id except Exception as error: UtilClient.assert_as_string(error.message) raise def query_status(self, job_id: str) -> Dict[str, Any]: """ Queries the status of a submitted job. :param job_id: The ID of the job to query. :return: A dictionary containing the status and related information. """ try: request = docmind_models.QueryDocParserStatusRequest(id=job_id) response = self.client.query_doc_parser_status(request) status_info = response.body.data print(f"Job Status: {status_info.status}") return status_info except Exception as error: UtilClient.assert_as_string(error.message) raise def get_result(self, job_id: str, layout_step_size: int = 10, layout_num: int = 0) -> Dict[str, Any]: """ Retrieves the result of a completed job. :param job_id: The ID of the completed job. :param layout_step_size: Step size for layout processing. :param layout_num: Number of layouts to retrieve. :return: A dictionary containing the parsing results. """ try: request = docmind_models.GetDocParserResultRequest( id=job_id, layout_step_size=layout_step_size, layout_num=layout_num ) response = self.client.get_doc_parser_result(request) result = response.body.data print(f"Result retrieved for Job ID: {job_id}") return result except Exception as error: UtilClient.assert_as_string(error.message) raise def main(): # Configuration file_path=r'C:\Users\Administrator\Desktop\货物标\output1\招标文件(实高电子显示屏)_procurement.pdf' file_name = "test1.pdf" # Ensure the file name includes the extension # Initialize DocMind client docmind_client = DocMindClient() # Step 1: Submit the file for parsing job_id = docmind_client.submit_job(file_path, file_name) # Step 2: Poll for job status until completion while True: status_info = docmind_client.query_status(job_id) if status_info.status.lower() in ['success', 'failed']: break print("Job is still processing. Waiting for 10 seconds before retrying...") time.sleep(10) # Wait for 10 seconds before checking again if status_info.status.lower() == 'success': print("Job completed successfully.") # Step 3: Retrieve the parsing result result = docmind_client.get_result(job_id) print("Parsing Result:") print(result) else: print("Job failed. Please check the error logs for more details.") if __name__ == "__main__": main()