Mistral 3 Small (24B AWQ)

Mistral 3 Small is a language model that delivers capabilities comparable to larger models while being compact. It’s ideal for conversational agents, function calling, fine-tuning, and local inference with sensitive data.

Model details

CategoryDetails
Model NameMistral 3 Small
Version24B AWQ
Model CategoryLarge Language Model (LLM)
Size24B parameters
HuggingFace Modelcasperhansen/mistral-small-24b-instruct-2501-awq
OpenAI Compatible EndpointChat Completions
LicenseApache 2.0

Capabilities

FeatureDetails
Tool Calling
Azion Long-term Support (LTS)
Context Length32,000
Supports LoRA
Input DataText

Usage

Basic chat completion

This is an example of a basic chat completion request using this model:

Terminal window
curl http://endpoint-url/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"stream": true,
"model": "casperhansen/mistral-small-24b-instruct-2501-awq",
"max_tokens": 1024,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Name the european capitals"}
]
}'

Tool Calling example

This is an example of a tool calling request using this model:

Terminal window
curl http://endpoint-url/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"stream": true,
"model": "casperhansen/mistral-small-24b-instruct-2501-awq",
"max_tokens": 1024,
"messages": [
{"role": "system", "content": "You are a helpful assistant with access to tools."},
{"role": "user", "content": "What is the weather in London?"}
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state"
}
},
"required": ["location"]
}
}
}
]
}'

Multimodal (text + image) example

This is an example of a multimodal request using this model:

Terminal window
curl http://endpoint-url/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"stream": true,
"model": "casperhansen/mistral-small-24b-instruct-2501-awq",
"max_tokens": 1024,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": [
{"type": "text", "text": "What is in this image?"},
{"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}}
]}
]
}'

Performance

The model has been quantized using AWQ, resulting in a 50% reduction in model size while maintaining performance. It supports a large context window of 32,000 tokens, making it suitable for processing long documents and maintaining context in conversations.

Running with Edge Functions:

This is an example of how to use this model with Edge Functions:

const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", {
"stream": true,
"max_tokens": 1024,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Name the european capitals"
}
]
})

JSON schema

{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
"messages"
],
"properties": {
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 2
},
"top_p": {
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 1
},
"n": {
"type": "integer",
"minimum": 1,
"default": 1
},
"stream": {
"type": "boolean",
"default": false
},
"max_tokens": {
"type": "integer",
"minimum": 1
},
"presence_penalty": {
"type": "number",
"minimum": -2,
"maximum": 2,
"default": 0
},
"frequency_penalty": {
"type": "number",
"minimum": -2,
"maximum": 2,
"default": 0
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
}
}
},
"components": {
"schemas": {
"Message": {
"oneOf": [
{
"$ref": "#/components/schemas/SystemMessage"
},
{
"$ref": "#/components/schemas/UserMessage"
},
{
"$ref": "#/components/schemas/AssistantMessage"
},
{
"$ref": "#/components/schemas/ToolMessage"
}
]
},
"SystemMessage": {
"type": "object",
"required": [
"role",
"content"
],
"properties": {
"role": {
"type": "string",
"enum": [
"system"
]
},
"content": {
"$ref": "#/components/schemas/TextContent"
}
}
},
"UserMessage": {
"type": "object",
"required": [
"role",
"content"
],
"properties": {
"role": {
"type": "string",
"enum": [
"user"
]
},
"content": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"oneOf": [
{
"$ref": "#/components/schemas/TextContentItem"
}
]
}
}
]
}
}
},
"AssistantMessage": {
"oneOf": [
{
"$ref": "#/components/schemas/AssistantMessageWithoutToolCalls"
},
{
"$ref": "#/components/schemas/AssistantMessageWithToolCalls"
}
]
},
"ToolMessage": {
"type": "object",
"required": [
"role",
"content",
"tool_call_id"
],
"properties": {
"role": {
"enum": [
"tool"
]
},
"content": {
"type": "string"
},
"tool_call_id": {
"type": "string"
}
}
},
"AssistantMessageWithoutToolCalls": {
"type": "object",
"required": [
"role",
"content"
],
"properties": {
"role": {
"type": "string",
"enum": [
"assistant"
]
},
"content": {
"$ref": "#/components/schemas/TextContent"
}
},
"not": {
"required": [
"tool_calls"
]
}
},
"AssistantMessageWithToolCalls": {
"type": "object",
"required": [
"role",
"tool_calls"
],
"properties": {
"role": {
"type": "string",
"enum": [
"assistant"
]
},
"tool_calls": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolCalls"
}
}
}
},
"TextContent": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/TextContentItem"
}
}
],
"description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects"
},
"TextContentItem": {
"type": "object",
"required": [
"type",
"text"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
},
"text": {
"type": "string"
}
}
},
"ToolCalls": {
"type": "object",
"required": [
"function",
"id",
"type"
],
"properties": {
"function": {
"type": "object",
"required": [
"name",
"arguments"
],
"properties": {
"name": {
"type": "string"
},
"arguments": {
"type": "string"
}
}
},
"id": {
"type": "string"
},
"type": {
"enum": [
"function"
]
}
},
"description":"The name and arguments of a function that should be called, as generated by the model."
},
"ToolDefinition": {
"type": "object",
"required": [
"type",
"function"
],
"properties": {
"type": {
"type": "string",
"enum": [
"function"
]
},
"function": {
"type": "object",
"required": [
"name"
],
"properties": {
"name": {
"type": "string"
},
"description": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": true
},
"strict": {
"type": "boolean",
"default": false
}
}
}
},
"description": "Definition of a tool that can be used by the model"
}
}
}
}