Multimodal model use

Send Requests

The request body is the same as for the text-only models, except for the messages parameter. The content is then not a string, but an array of text content and/or image content.

python

import requests
import os
import base64

def encode_image(image_data):
    return base64.b64encode(image_data).decode('utf-8')

# You can use a local image this way:
# with open('path/to/image.jpg', 'rb') as image_file:
#     example_image = image_file.read()

# For this example, we'll use a random cat image from the web:
example_image = requests.get('https://cataas.com/cat?width=1024').content

# We need to encode the image in base64 to send it to the API
encoded_image = encode_image(example_image)

api_key = os.environ["EXXA_API_KEY"]
url = "https://api.withexxa.com/v1/requests"
headers = {"X-API-Key": api_key, "Content-Type": "application/json"}
payload = {
    "request_body": {
        "model": "beta:qwen-2-vl-72b-instruct-fp16",
        "messages": [{"role": "user",
          "content": [
                {
                  "type": "text",
                  "text": "Describe the following image in detail"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url" : f"data:image/jpeg;base64,{encoded_image}", 
                        # you could also use an online image this way:
                        # "url" : "https://cataas.com/cat?width=1024"
                    },
                },
            ],
          }],
    }
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Send Requests​

Send Requests