123 lines
4.6 KiB
Python
Raw Normal View History

2024-12-02 16:48:47 +08:00
import time
from typing import Any, Dict
from alibabacloud_credentials.client import Client as CredClient
from alibabacloud_docmind_api20220711.client import Client as DocMindClient20220711
from alibabacloud_docmind_api20220711 import models as docmind_models
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util.client import Client as UtilClient
from alibabacloud_tea_util import models as util_models
class DocMindClient:
def __init__(self, endpoint: str = 'docmind-api.cn-hangzhou.aliyuncs.com'):
# Initialize credentials
cred_client = CredClient()
credential = cred_client.get_credential()
# Configure OpenAPI
config = open_api_models.Config(
access_key_id=credential.access_key_id,
access_key_secret=credential.access_key_secret,
)
config.endpoint = endpoint
# Initialize DocMind API client
self.client = DocMindClient20220711(config)
def submit_job(self, file_path: str, file_name: str) -> str:
"""
Submits a document parsing job.
:param file_path: Path to the local file to be uploaded.
:param file_name: Name of the file, including the extension.
:return: The ID of the submitted job.
"""
try:
with open(file_path, "rb") as file_stream:
request = docmind_models.SubmitDocParserJobAdvanceRequest(
file_url_object=file_stream,
file_name=file_name
)
runtime = util_models.RuntimeOptions()
response = self.client.submit_doc_parser_job_advance(request, runtime)
job_id = response.body.data.id
print(f"Job submitted successfully. Job ID: {job_id}")
return job_id
except Exception as error:
UtilClient.assert_as_string(error.message)
raise
def query_status(self, job_id: str) -> Dict[str, Any]:
"""
Queries the status of a submitted job.
:param job_id: The ID of the job to query.
:return: A dictionary containing the status and related information.
"""
try:
request = docmind_models.QueryDocParserStatusRequest(id=job_id)
response = self.client.query_doc_parser_status(request)
status_info = response.body.data
print(f"Job Status: {status_info.status}")
return status_info
except Exception as error:
UtilClient.assert_as_string(error.message)
raise
def get_result(self, job_id: str, layout_step_size: int = 10, layout_num: int = 0) -> Dict[str, Any]:
"""
Retrieves the result of a completed job.
:param job_id: The ID of the completed job.
:param layout_step_size: Step size for layout processing.
:param layout_num: Number of layouts to retrieve.
:return: A dictionary containing the parsing results.
"""
try:
request = docmind_models.GetDocParserResultRequest(
id=job_id,
layout_step_size=layout_step_size,
layout_num=layout_num
)
response = self.client.get_doc_parser_result(request)
result = response.body.data
print(f"Result retrieved for Job ID: {job_id}")
return result
except Exception as error:
UtilClient.assert_as_string(error.message)
raise
def main():
# Configuration
file_path=r'C:\Users\Administrator\Desktop\货物标\output1\招标文件实高电子显示屏_procurement.pdf'
file_name = "test1.pdf" # Ensure the file name includes the extension
# Initialize DocMind client
docmind_client = DocMindClient()
# Step 1: Submit the file for parsing
job_id = docmind_client.submit_job(file_path, file_name)
# Step 2: Poll for job status until completion
while True:
status_info = docmind_client.query_status(job_id)
if status_info.status.lower() in ['success', 'failed']:
break
print("Job is still processing. Waiting for 10 seconds before retrying...")
time.sleep(10) # Wait for 10 seconds before checking again
if status_info.status.lower() == 'success':
print("Job completed successfully.")
# Step 3: Retrieve the parsing result
result = docmind_client.get_result(job_id)
print("Parsing Result:")
print(result)
else:
print("Job failed. Please check the error logs for more details.")
if __name__ == "__main__":
main()