Skip to main content

Function Calling with Cortex.cpp

This guide demonstrates how to use function calling capabilities with Cortex.cpp that are compatible with the OpenAI API specification. We'll use the mistral-nemo:12b-gguf-q4-km model for these examples, following similar patterns to the OpenAI function calling documentation.

Implementation Guide

1. Start the Server

First, launch the Cortex server with your chosen model:


cortex run -d llama3.1:8b-gguf-q4-km

2. Initialize the Python Client

Create a new Python script named function_calling.py and set up the OpenAI client:


from datetime import datetime
from openai import OpenAI
from pydantic import BaseModel
import json
MODEL = "llama3.1:8b-gguf-q4-km"
client = OpenAI(
base_url="http://localhost:39281/v1",
api_key="not-needed" # Authentication is not required for local deployment
)

3. Implement Function Calling

Define your function schema and create a chat completion:


tools = [
{
"type": "function",
"function": {
"name": "get_delivery_date",
"strict": True,
"description": "Get the delivery date for a customer's order. Call this whenever you need to know the delivery date, for example when a customer asks 'Where is my package'",
"parameters": {
"type": "object",
"properties": {
"order_id": {
"type": "string",
"description": "The customer's order ID.",
},
},
"required": ["order_id"],
"additionalProperties": False,
},
}
}
]
completion_payload = {
"messages": [
{
"role": "system",
"content": 'You have access to the following CUSTOM functions:\n\n<CUSTOM_FUNCTIONS>\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => `<function`\nparameters => a JSON dict with the function argument name as key and function argument value as value.\nend_tag => `</function>`\n\nHere is an example,\n<function=example_function_name>{"example_name": "example_value"}</function>\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
},
{
"role": "user",
"content": "Hi, can you tell me the delivery date for my order?"
},
]
}
response = client.chat.completions.create(
top_p=0.9,
temperature=0.6,
model="llama3.1:8b-gguf-q4-km",
messages=completion_payload["messages"],
tools=tools,
)

Since no order_id was provided, the model will request it:


# Example Response
ChatCompletion(
id='54yeEjbaFbldGfSPyl2i',
choices=[
Choice(
finish_reason='tool_calls',
index=0,
logprobs=None,
message=ChatCompletionMessage(
content='',
refusal=None,
role='assistant',
audio=None,
function_call=None,
tool_calls=[
ChatCompletionMessageToolCall(
id=None,
function=Function(arguments='{"order_id": "12345"}', name='get_delivery_date'),
type='function'
)
]
)
)
],
created=1738543890,
model='_',
object='chat.completion',
service_tier=None,
system_fingerprint='_',
usage=CompletionUsage(
completion_tokens=16,
prompt_tokens=443,
total_tokens=459,
completion_tokens_details=None,
prompt_tokens_details=None
)
)

4. Handle User Input

Once the user provides their order ID:


completion_payload = {
"messages": [
{
"role": "system",
"content": 'You have access to the following CUSTOM functions:\n\n<CUSTOM_FUNCTIONS>\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => `<function`\nparameters => a JSON dict with the function argument name as key and function argument value as value.\nend_tag => `</function>`\n\nHere is an example,\n<function=example_function_name>{"example_name": "example_value"}</function>\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
},
{
"role": "user",
"content": "Hi, can you tell me the delivery date for my order?"
},
{
"role": "assistant",
"content": "Of course! Please provide your order ID so I can look it up."
},
{
"role": "user",
"content": "i think it is order_70705"
},
]
}
response = client.chat.completions.create(
model="llama3.1:8b-gguf-q4-km",
messages=completion_payload["messages"],
tools=tools,
temperature=0.6,
top_p=0.9
)

5. Process Function Results

Handle the function call response and generate the final answer:


# Simulate function execution
order_id = "order_12345"
delivery_date = datetime.now()
function_call_result_message = {
"role": "tool",
"content": json.dumps({
"order_id": order_id,
"delivery_date": delivery_date.strftime('%Y-%m-%d %H:%M:%S')
}),
"tool_call_id": "call_62136354"
}
final_messages = completion_payload["messages"] + [
{
"role": "assistant",
"tool_calls": [{
"id": "call_62136354",
"type": "function",
"function": {
"arguments": "{'order_id': 'order_12345'}",
"name": "get_delivery_date"
}
}]
},
function_call_result_message
]


response = client.chat.completions.create(
model="llama3.1:8b-gguf-q4-km",
messages=final_messages,
tools=tools,
temperature=0.6,
top_p=0.9
)
print(response)


ChatCompletion(
id='UMIoW4aNrqKXW2DR1ksX',
choices=[
Choice(
finish_reason='stop',
index=0,
logprobs=None,
message=ChatCompletionMessage(
content='The delivery date for your order (order_12345) is February 3, 2025 at 11:53 AM.',
refusal=None,
role='assistant',
audio=None,
function_call=None,
tool_calls=None
)
)
],
created=1738544037,
model='_',
object='chat.completion',
service_tier=None,
system_fingerprint='_',
usage=CompletionUsage(
completion_tokens=27,
prompt_tokens=535,
total_tokens=562,
completion_tokens_details=None,
prompt_tokens_details=None
)
)

Advanced Features

Parallel Function Calls

Cortex.cpp supports calling multiple functions simultaneously:


tools = [
{
"type": "function",
"function": {
"name": "get_delivery_date",
"strict": True,
"description": "Get the delivery date for a customer's order.",
"parameters": {
"type": "object",
"properties": {
"order_id": {
"type": "string",
"description": "The customer's order ID.",
},
},
"required": ["order_id"],
"additionalProperties": False,
},
}
},
{
"type": "function",
"function": {
"name": "get_current_conditions",
"description": "Get the current weather conditions for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g., San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["Celsius", "Fahrenheit"]
}
},
"required": ["location", "unit"]
}
}
}
]

Controlling Function Execution

You can control function calling behavior using the tool_choice parameter:


# Disable function calling
response = client.chat.completions.create(
model=MODEL,
messages=messages,
tools=tools,
tool_choice="none"
)
# Force specific function
response = client.chat.completions.create(
model=MODEL,
messages=messages,
tools=tools,
tool_choice={"type": "function", "function": {"name": "get_current_conditions"}}
)

Enhanced Function Definitions

Use enums to improve function accuracy:


{
"name": "pick_tshirt_size",
"description": "Handle t-shirt size selection",
"parameters": {
"type": "object",
"properties": {
"size": {
"type": "string",
"enum": ["s", "m", "l"],
"description": "T-shirt size selection"
}
},
"required": ["size"]
}
}

Important Notes

  • Function calling accuracy depends on model quality. Smaller models (8B-12B) work best with simple use cases.
  • Cortex.cpp implements function calling through prompt engineering, injecting system prompts when tools are specified.
  • Best compatibility with llama3.1 and derivatives (mistral-nemo, qwen)
  • System prompts can be customized for specific use cases (see implementation details)
  • For complete implementation examples, refer to our detailed guide