InternVL3

InternVL3 is an advanced multimodal large language model (MLLM) with capabilities to encompass tool usage, GUI agents, industrial image analysis, 3D vision perception, and more.

Model details

CategoryDetails
Model NameInternVL3
VersionInstruct 1B
Model CategoryVLM
Size1B parameters
HuggingFace Modelopengvlab-internvl3-1b-instruct
OpenAI Compatible EndpointChat Completions
LicenseApache 2.0

Capabilities

FeatureDetails
Tool Calling
Azion Long-term Support (LTS)
Context Length16k tokens
Supports LoRA
Input dataText + Image

Usage

Basic Chat Completion

This is a basic chat completion example using this model:

const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
"stream": true,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Name the european capitals"
}
]
})
PropertyTypeDescription
streambooleanIndicates whether to stream the response.
messages[]arrayArray of message objects.
messages[].rolestringThe role of the message sender.
messages[].contentstringThe content of the message.

Multimodal (text + image) example

This is a multimodal example using this model:

const modelResponse = await Azion.AI.run("opengvlab-internvl3-1b-instruct", {
"stream": true,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
}
]
}
]
})
PropertyTypeDescription
messages[]arrayArray containing the messages sent.
messages[].rolestringThe role of the message sender.
messages[].contentstringThe content of the message.
messages[].content[].typestringThe type of the content item.
messages[].content[].textstringContent of the message (only if type is "text").
messages[].content[].image_urlstringContent of the message (only if type is "image_url").
messages[].content[].image_url.urlstringThe actual URL of the image.

JSON Schema

{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
"messages"
],
"properties": {
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 2
},
"top_p": {
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 1
},
"n": {
"type": "integer",
"minimum": 1,
"default": 1
},
"stream": {
"type": "boolean",
"default": false
},
"max_tokens": {
"type": "integer",
"minimum": 1
},
"presence_penalty": {
"type": "number",
"minimum": -2,
"maximum": 2,
"default": 0
},
"frequency_penalty": {
"type": "number",
"minimum": -2,
"maximum": 2,
"default": 0
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
}
}
},
"components": {
"schemas": {
"Message": {
"oneOf": [
{
"$ref": "#/components/schemas/SystemMessage"
},
{
"$ref": "#/components/schemas/UserMessage"
},
{
"$ref": "#/components/schemas/AssistantMessage"
}
]
},
"SystemMessage": {
"type": "object",
"required": [
"role",
"content"
],
"properties": {
"role": {
"type": "string",
"enum": [
"system"
]
},
"content": {
"$ref": "#/components/schemas/TextContent"
}
}
},
"UserMessage": {
"type": "object",
"required": [
"role",
"content"
],
"properties": {
"role": {
"type": "string",
"enum": [
"user"
]
},
"content": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"oneOf": [
{
"$ref": "#/components/schemas/TextContentItem"
},
{
"$ref": "#/components/schemas/ImageContentItem"
}
]
}
}
]
}
}
},
"AssistantMessage": {
"oneOf": [
{
"$ref": "#/components/schemas/AssistantMessageWithoutToolCalls"
}
]
},
"AssistantMessageWithoutToolCalls": {
"type": "object",
"required": [
"role",
"content"
],
"properties": {
"role": {
"type": "string",
"enum": [
"assistant"
]
},
"content": {
"$ref": "#/components/schemas/TextContent"
}
},
"not": {
"required": [
"tool_calls"
]
}
},
"TextContent": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/TextContentItem"
}
}
],
"description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects"
},
"ImageContent": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ImageContentItem"
}
},
"TextContentItem": {
"type": "object",
"required": [
"type",
"text"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
},
"text": {
"type": "string"
}
}
},
"ImageContentItem": {
"type": "object",
"required": [
"type",
"image_url"
],
"properties": {
"type": {
"type": "string",
"enum": [
"image_url"
]
},
"image_url": {
"type": "object",
"required": [
"url"
],
"properties": {
"url": {
"type": "string",
"format": "uri"
}
}
}
}
}
}
}
}