Anthropic Messages API
Drop in the Anthropic SDK or any Messages-API client. Text, streaming, tool use, and base64 PDF documents all work unchanged.
Messages
Section titled “Messages”max_tokens is required on this surface, as the Messages API itself requires. The model id is echoed back on the response.
from anthropic import Anthropic
client = Anthropic( api_key="llm_live_...", base_url="https://app.directinference.com/di", # SDK appends /v1/messages)
msg = client.messages.create( model="claude-sonnet-4-6", max_tokens=512, messages=[{"role": "user", "content": "Name three uses for a paperclip."}],)
print(msg.content[0].text)print(msg.model) # echoes "claude-sonnet-4-6"import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ apiKey: "llm_live_...", baseURL: "https://app.directinference.com/di", // SDK appends /v1/messages});
const msg = await client.messages.create({ model: "claude-sonnet-4-6", max_tokens: 512, messages: [{ role: "user", content: "Name three uses for a paperclip." }],});
const first = msg.content[0];if (first.type === "text") console.log(first.text);curl https://app.directinference.com/di/v1/messages \ -H "x-api-key: llm_live_..." \ -H "anthropic-version: 2023-06-01" \ -H "Content-Type: application/json" \ -d '{ "model": "claude-sonnet-4-6", "max_tokens": 512, "messages": [{ "role": "user", "content": "Name three uses for a paperclip." }] }'client := anthropic.NewClient( option.WithAPIKey("llm_live_..."), option.WithBaseURL("https://app.directinference.com/di"),)
msg, err := client.Messages.New(context.TODO(), anthropic.MessageNewParams{ Model: "claude-sonnet-4-6", MaxTokens: 512, Messages: []anthropic.MessageParam{ anthropic.NewUserMessage(anthropic.NewTextBlock("Name three uses for a paperclip.")), },})if err != nil { panic(err)}
fmt.Println(msg.Content[0].Text)Streaming
Section titled “Streaming”Stream incremental content_block_delta events with the SDK helper or "stream": true over raw HTTP.
with client.messages.stream( model="claude-sonnet-4-6", max_tokens=512, messages=[{"role": "user", "content": "Stream a haiku about latency."}],) as stream: for text in stream.text_stream: print(text, end="", flush=True)const stream = client.messages.stream({ model: "claude-sonnet-4-6", max_tokens: 512, messages: [{ role: "user", content: "Stream a haiku about latency." }],});
for await (const event of stream) { if (event.type === "content_block_delta" && event.delta.type === "text_delta") { process.stdout.write(event.delta.text); }}curl https://app.directinference.com/di/v1/messages \ -H "x-api-key: llm_live_..." \ -H "anthropic-version: 2023-06-01" \ -H "Content-Type: application/json" \ -N \ -d '{ "model": "claude-sonnet-4-6", "max_tokens": 512, "messages": [{ "role": "user", "content": "Stream a haiku about latency." }], "stream": true }'Tool use
Section titled “Tool use”Anthropic tools use input_schema; the reply contains tool_use blocks. Tool-shaped requests map to the code request type.
tools = [{ "name": "get_weather", "description": "Get the current weather for a city.", "input_schema": { "type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"], },}]
msg = client.messages.create( model="claude-sonnet-4-6", max_tokens=512, tools=tools, messages=[{"role": "user", "content": "What is the weather in Paris?"}],)
for block in msg.content: if block.type == "tool_use": print(block.name, block.input)const tools = [{ name: "get_weather", description: "Get the current weather for a city.", input_schema: { type: "object", properties: { city: { type: "string" } }, required: ["city"], },}] as const;
const msg = await client.messages.create({ model: "claude-sonnet-4-6", max_tokens: 512, tools, messages: [{ role: "user", content: "What is the weather in Paris?" }],});
for (const block of msg.content) { if (block.type === "tool_use") console.log(block.name, block.input);}curl https://app.directinference.com/di/v1/messages \ -H "x-api-key: llm_live_..." \ -H "anthropic-version: 2023-06-01" \ -H "Content-Type: application/json" \ -d '{ "model": "claude-sonnet-4-6", "max_tokens": 512, "messages": [{ "role": "user", "content": "What is the weather in Paris?" }], "tools": [{ "name": "get_weather", "description": "Get the current weather for a city.", "input_schema": { "type": "object", "properties": { "city": { "type": "string" } }, "required": ["city"] } }] }'PDF documents
Section titled “PDF documents”Send a base64 PDF as a document content block. Document input always uses the document request type — it takes priority over vision, long-context, model id, and effort hints, so a contract never silently falls to a cheaper model.
import base64from anthropic import Anthropic
client = Anthropic(api_key="llm_live_...", base_url="https://app.directinference.com/di")
pdf_b64 = base64.standard_b64encode(open("contract.pdf", "rb").read()).decode()
msg = client.messages.create( model="claude-sonnet-4-6", max_tokens=1024, messages=[{ "role": "user", "content": [ { "type": "document", "title": "contract.pdf", "source": { "type": "base64", "media_type": "application/pdf", "data": pdf_b64, }, }, {"type": "text", "text": "Summarize the key terms of this contract."}, ], }],)
print(msg.content[0].text)import { readFileSync } from "node:fs";import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ apiKey: "llm_live_...", baseURL: "https://app.directinference.com/di" });
const pdfB64 = readFileSync("contract.pdf").toString("base64");
const msg = await client.messages.create({ model: "claude-sonnet-4-6", max_tokens: 1024, messages: [{ role: "user", content: [ { type: "document", title: "contract.pdf", source: { type: "base64", media_type: "application/pdf", data: pdfB64 }, }, { type: "text", text: "Summarize the key terms of this contract." }, ], }],});
const first = msg.content[0];if (first.type === "text") console.log(first.text);# output_config constrains the reply to a JSON schema (extraction use case)curl https://app.directinference.com/di/v1/messages \ -H "x-api-key: llm_live_..." \ -H "anthropic-version: 2023-06-01" \ -H "Content-Type: application/json" \ -d '{ "model": "claude-sonnet-4-6", "max_tokens": 1024, "messages": [{ "role": "user", "content": [ { "type": "document", "title": "contract.pdf", "source": { "type": "base64", "media_type": "application/pdf", "data": "<base64-pdf>" } }, { "type": "text", "text": "Classify this document." } ] }], "output_config": { "format": { "type": "json_schema", "schema": { "type": "object", "properties": { "document_type": { "type": "string" }, "confidence": { "type": "number" } }, "required": ["document_type"] } } } }'Caching & response headers
Section titled “Caching & response headers”The Messages API’s native cache_control breakpoints work here — mark a stable system prompt, tool catalog, or document and pay a fraction to reuse it. Usage comes back as cache_read_input_tokens / cache_creation_input_tokens; see Prompt caching. Each response also carries X-DI-Request-Type (Response headers).