Example Python client: Asynchronous Domino endpoint

This example shows a Python client application that creates a prediction request from an asynchronous Domino endpoint, polls periodically for completion, and retrieves the result.

import json
import logging
import requests
import sys
import time

logging.basicConfig(stream=sys.stdout, level=logging.INFO)  # change logging setup as required

# TO EDIT: update the example request parameters for your model
REQUEST_PARAMETERS = {
    "param1": "value1",
    "param2": "value2",
    "param3": 3
}
# TO EDIT: copy these values from "Calling your endpoint" on the Domino endpoint overview page
DOMINO_URL = "https://domino.mycompany.com:443"
MODEL_ID = "5a4131c5aad8e00eefb676b7"
MODEL_ACCESS_TOKEN = "o2pnVAqFOrQBEZMCuzt797d676E6k4eS3mZMKJVKbeid8V6Bbig6kOdh6y9YSf3R"

# DO NOT EDIT these values
MODEL_BASE_URL = f"{DOMINO_URL}/api/modelApis/async/v1/{MODEL_ID}"
SUCCEEDED_STATUS = "succeeded"
FAILED_STATUS = "failed"
QUEUED_STATUS = "queued"
TERMINAL_STATUSES = [SUCCEEDED_STATUS, FAILED_STATUS]
PENDING_STATUSES = [QUEUED_STATUS]
MAX_RETRY_DELAY_SEC = 60


### CREATE REQUEST ###

create_response = None
retry_delay_sec = 0
while (
        create_response is None
        or (500 <= create_response.status_code < 600)  # retry for transient 5xx errors
):
    # status polling with a time interval that backs off up to MAX_RETRY_DELAY_SEC
    if retry_delay_sec > 0:
        time.sleep(retry_delay_sec)
    retry_delay_sec = min(max(retry_delay_sec * 2, 1), MAX_RETRY_DELAY_SEC)

    create_response = requests.post(
        MODEL_BASE_URL,
        headers={"Authorization": f"Bearer {MODEL_ACCESS_TOKEN}"},
        json={"parameters": REQUEST_PARAMETERS}
    )

if create_response.status_code != 200:
    raise Exception(f"create prediction request failed, response: {create_response}")

prediction_id = create_response.json()["asyncPredictionId"]
logging.info(f"prediction id: {prediction_id}")


### POLL STATUS AND RETRIEVE RESULT ###

status_response = None
retry_delay_sec = 0
while (
        status_response is None
        or (500 <= status_response.status_code < 600)  # retry for transient 5xx errors
        or (status_response.status_code == 200 and status_response.json()["status"] in PENDING_STATUSES)
):
    # status polling with a time interval that backs off up to MAX_RETRY_DELAY_SEC
    if retry_delay_sec > 0:
        time.sleep(retry_delay_sec)
    retry_delay_sec = min(max(retry_delay_sec * 2, 1), MAX_RETRY_DELAY_SEC)

    status_response = requests.get(
        f"{MODEL_BASE_URL}/{prediction_id}",
        headers={"Authorization": f"Bearer {MODEL_ACCESS_TOKEN}"},
    )

if status_response.status_code != 200:
    raise Exception(f"prediction status request failed, response: {create_response}")

prediction_status = status_response.json()["status"]
if prediction_status == SUCCEEDED_STATUS:  # succeeded response includes the prediction result in "result"
    result = status_response.json()["result"]
    logging.info(f"prediction succeeded, result:
{json.dumps(result, indent = 2)}")
elif prediction_status == FAILED_STATUS:  # failed response includes the error messages in "errors"
    errors = status_response.json()["errors"]
    logging.error(f"prediction failed, errors:
{json.dumps(errors, indent = 2)}")
else:
    raise Exception(f"unexpected terminal prediction response status: {prediction_status}")