OpenAI-compatible API
The flagship drop-in surface. Anything written against the OpenAI Chat Completions API works by changing only the base URL.
Chat completions
Section titled “Chat completions”Standard messages, system prompts, and sampling parameters. The model id is treated as intent and echoed back unchanged.
from openai import OpenAI
client = OpenAI(api_key="llm_live_...", base_url="https://app.directinference.com/di/v1")
resp = client.chat.completions.create( model="gpt-5.5-mini", messages=[ {"role": "system", "content": "You are a concise assistant."}, {"role": "user", "content": "Name three uses for a paperclip."}, ], temperature=0.7, max_tokens=300,)
print(resp.choices[0].message.content)import OpenAI from "openai";
const client = new OpenAI({ apiKey: "llm_live_...", baseURL: "https://app.directinference.com/di/v1",});
const resp = await client.chat.completions.create({ model: "gpt-5.5-mini", messages: [ { role: "system", content: "You are a concise assistant." }, { role: "user", content: "Name three uses for a paperclip." }, ], temperature: 0.7, max_tokens: 300,});
console.log(resp.choices[0].message.content);curl https://app.directinference.com/di/v1/chat/completions \ -H "Authorization: Bearer llm_live_..." \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-5.5-mini", "messages": [ { "role": "system", "content": "You are a concise assistant." }, { "role": "user", "content": "Name three uses for a paperclip." } ], "temperature": 0.7, "max_tokens": 300 }'client := openai.NewClient( option.WithAPIKey("llm_live_..."), option.WithBaseURL("https://app.directinference.com/di/v1"),)
resp, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ Model: "gpt-5.5-mini", Messages: []openai.ChatCompletionMessageParamUnion{ openai.SystemMessage("You are a concise assistant."), openai.UserMessage("Name three uses for a paperclip."), }, Temperature: openai.Float(0.7), MaxTokens: openai.Int(300),})if err != nil { panic(err)}
fmt.Println(resp.Choices[0].Message.Content)Streaming
Section titled “Streaming”Set stream: true for token-by-token Server-Sent Events. The stream terminates with the usual [DONE] sentinel.
stream = client.chat.completions.create( model="gpt-5.5-mini", messages=[{"role": "user", "content": "Stream a haiku about latency."}], stream=True,)
for chunk in stream: delta = chunk.choices[0].delta.content if delta: print(delta, end="", flush=True)const stream = await client.chat.completions.create({ model: "gpt-5.5-mini", messages: [{ role: "user", content: "Stream a haiku about latency." }], stream: true,});
for await (const chunk of stream) { process.stdout.write(chunk.choices[0]?.delta?.content ?? "");}curl https://app.directinference.com/di/v1/chat/completions \ -H "Authorization: Bearer llm_live_..." \ -H "Content-Type: application/json" \ -N \ -d '{ "model": "gpt-5.5-mini", "messages": [{ "role": "user", "content": "Stream a haiku about latency." }], "stream": true }'stream := client.Chat.Completions.NewStreaming(context.TODO(), openai.ChatCompletionNewParams{ Model: "gpt-5.5-mini", Messages: []openai.ChatCompletionMessageParamUnion{ openai.UserMessage("Stream a haiku about latency."), },})
for stream.Next() { chunk := stream.Current() if len(chunk.Choices) > 0 { fmt.Print(chunk.Choices[0].Delta.Content) }}if err := stream.Err(); err != nil { panic(err)}Tools & function calling
Section titled “Tools & function calling”Pass tools with JSON-Schema parameters; the response carries tool_calls to execute and feed back. Tool-shaped requests map to the code request type.
tools = [{ "type": "function", "function": { "name": "get_weather", "description": "Get the current weather for a city.", "parameters": { "type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"], }, },}]
resp = client.chat.completions.create( model="gpt-5.5-mini", messages=[{"role": "user", "content": "What is the weather in Paris?"}], tools=tools,)
for call in resp.choices[0].message.tool_calls or []: print(call.function.name, call.function.arguments)const tools = [{ type: "function", function: { name: "get_weather", description: "Get the current weather for a city.", parameters: { type: "object", properties: { city: { type: "string" } }, required: ["city"], }, },}] as const;
const resp = await client.chat.completions.create({ model: "gpt-5.5-mini", messages: [{ role: "user", content: "What is the weather in Paris?" }], tools,});
for (const call of resp.choices[0].message.tool_calls ?? []) { console.log(call.function.name, call.function.arguments);}curl https://app.directinference.com/di/v1/chat/completions \ -H "Authorization: Bearer llm_live_..." \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-5.5-mini", "messages": [{ "role": "user", "content": "What is the weather in Paris?" }], "tools": [{ "type": "function", "function": { "name": "get_weather", "description": "Get the current weather for a city.", "parameters": { "type": "object", "properties": { "city": { "type": "string" } }, "required": ["city"] } } }] }'tools := []openai.ChatCompletionToolParam{{ Function: openai.FunctionDefinitionParam{ Name: "get_weather", Description: openai.String("Get the current weather for a city."), Parameters: openai.FunctionParameters{ "type": "object", "properties": map[string]any{ "city": map[string]string{"type": "string"}, }, "required": []string{"city"}, }, },}}
resp, err := client.Chat.Completions.New(context.TODO(), openai.ChatCompletionNewParams{ Model: "gpt-5.5-mini", Messages: []openai.ChatCompletionMessageParamUnion{ openai.UserMessage("What is the weather in Paris?"), }, Tools: tools,})if err != nil { panic(err)}
for _, call := range resp.Choices[0].Message.ToolCalls { fmt.Println(call.Function.Name, call.Function.Arguments)}Vision
Section titled “Vision”Send image content parts alongside text. Image input always uses the vision request type, regardless of the model id you send.
resp = client.chat.completions.create( model="gpt-5.5-mini", messages=[{ "role": "user", "content": [ {"type": "text", "text": "What is in this image?"}, {"type": "image_url", "image_url": {"url": "https://example.com/photo.jpg"}}, ], }],)
print(resp.choices[0].message.content)const resp = await client.chat.completions.create({ model: "gpt-5.5-mini", messages: [{ role: "user", content: [ { type: "text", text: "What is in this image?" }, { type: "image_url", image_url: { url: "https://example.com/photo.jpg" } }, ], }],});
console.log(resp.choices[0].message.content);curl https://app.directinference.com/di/v1/chat/completions \ -H "Authorization: Bearer llm_live_..." \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-5.5-mini", "messages": [{ "role": "user", "content": [ { "type": "text", "text": "What is in this image?" }, { "type": "image_url", "image_url": { "url": "https://example.com/photo.jpg" } } ] }] }'Structured output
Section titled “Structured output”Use response_format with a JSON schema to constrain the reply. A response schema maps the call to the json request type.
resp = client.chat.completions.create( model="gpt-5.5-mini", messages=[{"role": "user", "content": "Extract the name and age from: Ada is 36."}], response_format={ "type": "json_schema", "json_schema": { "name": "person", "schema": { "type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, "required": ["name", "age"], }, }, },)
print(resp.choices[0].message.content) # strict JSONconst resp = await client.chat.completions.create({ model: "gpt-5.5-mini", messages: [{ role: "user", content: "Extract the name and age from: Ada is 36." }], response_format: { type: "json_schema", json_schema: { name: "person", schema: { type: "object", properties: { name: { type: "string" }, age: { type: "integer" } }, required: ["name", "age"], }, }, },});
console.log(resp.choices[0].message.content); // strict JSONcurl https://app.directinference.com/di/v1/chat/completions \ -H "Authorization: Bearer llm_live_..." \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-5.5-mini", "messages": [{ "role": "user", "content": "Extract the name and age from: Ada is 36." }], "response_format": { "type": "json_schema", "json_schema": { "name": "person", "schema": { "type": "object", "properties": { "name": { "type": "string" }, "age": { "type": "integer" } }, "required": ["name", "age"] } } } }'Caching & response headers
Section titled “Caching & response headers”Reuse a stable prompt prefix to cut cost and time-to-first-token: add a cache_control breakpoint to the cacheable content — see Prompt caching. Every response also reports the classified request type in the X-DI-Request-Type header (Response headers).