This example shows a Python client application that creates a prediction request from an asynchronous Model API, polls periodically for completion, and retrieves the result.
import json
import logging
import requests
import sys
import time
logging.basicConfig(stream=sys.stdout, level=logging.INFO) # change logging setup as required
# TO EDIT: update the example request parameters for your model
REQUEST_PARAMETERS = {
"param1": "value1",
"param2": "value2",
"param3": 3
}
# TO EDIT: copy these values from "Calling your Model" on the Model API overview page
DOMINO_URL = "https://domino.mycompany.com:443"
MODEL_ID = "5a4131c5aad8e00eefb676b7"
MODEL_ACCESS_TOKEN = "o2pnVAqFOrQBEZMCuzt797d676E6k4eS3mZMKJVKbeid8V6Bbig6kOdh6y9YSf3R"
# DO NOT EDIT these values
MODEL_BASE_URL = f"{DOMINO_URL}/api/modelApis/async/v1/{MODEL_ID}"
SUCCEEDED_STATUS = "succeeded"
FAILED_STATUS = "failed"
QUEUED_STATUS = "queued"
TERMINAL_STATUSES = [SUCCEEDED_STATUS, FAILED_STATUS]
PENDING_STATUSES = [QUEUED_STATUS]
MAX_RETRY_DELAY_SEC = 60
### CREATE REQUEST ###
create_response = None
retry_delay_sec = 0
while (
create_response is None
or (500 <= create_response.status_code < 600) # retry for transient 5xx errors
):
# status polling with a time interval that backs off up to MAX_RETRY_DELAY_SEC
if retry_delay_sec > 0:
time.sleep(retry_delay_sec)
retry_delay_sec = min(max(retry_delay_sec * 2, 1), MAX_RETRY_DELAY_SEC)
create_response = requests.post(
MODEL_BASE_URL,
headers={"Authorization": f"Bearer {MODEL_ACCESS_TOKEN}"},
json={"parameters": REQUEST_PARAMETERS}
)
if create_response.status_code != 200:
raise Exception(f"create prediction request failed, response: {create_response}")
prediction_id = create_response.json()["asyncPredictionId"]
logging.info(f"prediction id: {prediction_id}")
### POLL STATUS AND RETRIEVE RESULT ###
status_response = None
retry_delay_sec = 0
while (
status_response is None
or (500 <= status_response.status_code < 600) # retry for transient 5xx errors
or (status_response.status_code == 200 and status_response.json()["status"] in PENDING_STATUSES)
):
# status polling with a time interval that backs off up to MAX_RETRY_DELAY_SEC
if retry_delay_sec > 0:
time.sleep(retry_delay_sec)
retry_delay_sec = min(max(retry_delay_sec * 2, 1), MAX_RETRY_DELAY_SEC)
status_response = requests.get(
f"{MODEL_BASE_URL}/{prediction_id}",
headers={"Authorization": f"Bearer {MODEL_ACCESS_TOKEN}"},
)
if status_response.status_code != 200:
raise Exception(f"prediction status request failed, response: {create_response}")
prediction_status = status_response.json()["status"]
if prediction_status == SUCCEEDED_STATUS: # succeeded response includes the prediction result in "result"
result = status_response.json()["result"]
logging.info(f"prediction succeeded, result:
{json.dumps(result, indent = 2)}")
elif prediction_status == FAILED_STATUS: # failed response includes the error messages in "errors"
errors = status_response.json()["errors"]
logging.error(f"prediction failed, errors:
{json.dumps(errors, indent = 2)}")
else:
raise Exception(f"unexpected terminal prediction response status: {prediction_status}")