Florence-2 Roberta Vqa
A tool that takes an image and analyzes its contents, generates detailed captions and then tries to answer the given question using the generated context. It returns text as an answer to the question.
Output
python
import requests
import base64
url = "https://api.landing.ai/v1/tools/florence2-qa"
with open("{{path_to_image}}", "rb") as image_file:
base64_string = base64.b64encode(image_file.read()).decode('utf-8')
payload = {
"image": base64_string,
"question": "{{prompt}}",
"function_name": "florence2_roberta_vqa"
}
headers = {
"Content-Type": "application/json",
"Accept": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
print(response.json())