Mistral 3 Small (24B AWQ)
Mistral 3 Small is a language model that delivers capabilities comparable to larger models while being compact. It’s ideal for conversational agents, function calling, fine-tuning, and local inference with sensitive data.
Model details
Category | Details |
---|---|
Model Name | Mistral 3 Small |
Version | 24B AWQ |
Model Category | Large Language Model (LLM) |
Size | 24B parameters |
HuggingFace Model | casperhansen/mistral-small-24b-instruct-2501-awq |
OpenAI Compatible Endpoint | Chat Completions |
License | Apache 2.0 |
Capabilities
Feature | Details |
---|---|
Tool Calling | ✅ |
Azion Long-term Support (LTS) | ✅ |
Context Length | 32,000 |
Supports LoRA | ❌ |
Input Data | Text |
Usage
Basic chat completion
This is an example of a basic chat completion request using this model:
curl http://endpoint-url/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "stream": true, "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "max_tokens": 1024, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Name the european capitals"} ]}'
Tool Calling example
This is an example of a tool calling request using this model:
curl http://endpoint-url/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "stream": true, "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "max_tokens": 1024, "messages": [ {"role": "system", "content": "You are a helpful assistant with access to tools."}, {"role": "user", "content": "What is the weather in London?"} ], "tools": [ { "type": "function", "function": { "name": "get_weather", "description": "Get the current weather for a location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state" } }, "required": ["location"] } } } ]}'
Multimodal (text + image) example
This is an example of a multimodal request using this model:
curl http://endpoint-url/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "stream": true, "model": "casperhansen/mistral-small-24b-instruct-2501-awq", "max_tokens": 1024, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ {"type": "text", "text": "What is in this image?"}, {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} ]} ]}'
Performance
The model has been quantized using AWQ, resulting in a 50% reduction in model size while maintaining performance. It supports a large context window of 32,000 tokens, making it suitable for processing long documents and maintaining context in conversations.
Running with Edge Functions:
This is an example of how to use this model with Edge Functions:
const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", { "stream": true, "max_tokens": 1024, "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "Name the european capitals" } ]})
JSON schema
{ "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "required": [ "messages" ], "properties": { "messages": { "type": "array", "items": { "$ref": "#/components/schemas/Message" } }, "temperature": { "type": "number", "minimum": 0, "maximum": 2 }, "top_p": { "type": "number", "minimum": 0, "maximum": 1, "default": 1 }, "n": { "type": "integer", "minimum": 1, "default": 1 }, "stream": { "type": "boolean", "default": false }, "max_tokens": { "type": "integer", "minimum": 1 }, "presence_penalty": { "type": "number", "minimum": -2, "maximum": 2, "default": 0 }, "frequency_penalty": { "type": "number", "minimum": -2, "maximum": 2, "default": 0 }, "tools": { "type": "array", "items": { "$ref": "#/components/schemas/ToolDefinition" } } }, "components": { "schemas": { "Message": { "oneOf": [ { "$ref": "#/components/schemas/SystemMessage" }, { "$ref": "#/components/schemas/UserMessage" }, { "$ref": "#/components/schemas/AssistantMessage" }, { "$ref": "#/components/schemas/ToolMessage" } ] }, "SystemMessage": { "type": "object", "required": [ "role", "content" ], "properties": { "role": { "type": "string", "enum": [ "system" ] }, "content": { "$ref": "#/components/schemas/TextContent" } } }, "UserMessage": { "type": "object", "required": [ "role", "content" ], "properties": { "role": { "type": "string", "enum": [ "user" ] }, "content": { "oneOf": [ { "type": "string" }, { "type": "array", "items": { "oneOf": [ { "$ref": "#/components/schemas/TextContentItem" } ] } } ] } } }, "AssistantMessage": { "oneOf": [ { "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" }, { "$ref": "#/components/schemas/AssistantMessageWithToolCalls" } ] }, "ToolMessage": { "type": "object", "required": [ "role", "content", "tool_call_id" ], "properties": { "role": { "enum": [ "tool" ] }, "content": { "type": "string" }, "tool_call_id": { "type": "string" } } }, "AssistantMessageWithoutToolCalls": { "type": "object", "required": [ "role", "content" ], "properties": { "role": { "type": "string", "enum": [ "assistant" ] }, "content": { "$ref": "#/components/schemas/TextContent" } }, "not": { "required": [ "tool_calls" ] } }, "AssistantMessageWithToolCalls": { "type": "object", "required": [ "role", "tool_calls" ], "properties": { "role": { "type": "string", "enum": [ "assistant" ] }, "tool_calls": { "type": "array", "items": { "$ref": "#/components/schemas/ToolCalls" } } } }, "TextContent": { "oneOf": [ { "type": "string" }, { "type": "array", "items": { "$ref": "#/components/schemas/TextContentItem" } } ], "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" }, "TextContentItem": { "type": "object", "required": [ "type", "text" ], "properties": { "type": { "type": "string", "enum": [ "text" ] }, "text": { "type": "string" } } }, "ToolCalls": { "type": "object", "required": [ "function", "id", "type" ], "properties": { "function": { "type": "object", "required": [ "name", "arguments" ], "properties": { "name": { "type": "string" }, "arguments": { "type": "string" } } }, "id": { "type": "string" }, "type": { "enum": [ "function" ] } }, "description":"The name and arguments of a function that should be called, as generated by the model." }, "ToolDefinition": { "type": "object", "required": [ "type", "function" ], "properties": { "type": { "type": "string", "enum": [ "function" ] }, "function": { "type": "object", "required": [ "name" ], "properties": { "name": { "type": "string" }, "description": { "type": "string" }, "parameters": { "type": "object", "additionalProperties": true }, "strict": { "type": "boolean", "default": false } } } }, "description": "Definition of a tool that can be used by the model" } } }}