Multimodal model use
Send Requests
The request body is the same as for the text-only models, except for the messages
parameter. The content is then not a string, but
an array of text content and/or image content.
- python
import requests
import os
import base64
def encode_image(image_data):
return base64.b64encode(image_data).decode('utf-8')
# You can use a local image this way:
# with open('path/to/image.jpg', 'rb') as image_file:
# example_image = image_file.read()
# For this example, we'll use a random cat image from the web:
example_image = requests.get('https://cataas.com/cat?width=1024').content
# We need to encode the image in base64 to send it to the API
encoded_image = encode_image(example_image)
api_key = os.environ["EXXA_API_KEY"]
url = "https://api.withexxa.com/v1/requests"
headers = {"X-API-Key": api_key, "Content-Type": "application/json"}
payload = {
"request_body": {
"model": "beta:qwen-2-vl-72b-instruct-fp16",
"messages": [{"role": "user",
"content": [
{
"type": "text",
"text": "Describe the following image in detail"
},
{
"type": "image_url",
"image_url": {
"url" : f"data:image/jpeg;base64,{encoded_image}",
# you could also use an online image this way:
# "url" : "https://cataas.com/cat?width=1024"
},
},
],
}],
}
}
response = requests.post(url, headers=headers, json=payload)
print(response.json())