Anthropic Messages API

Drop in the Anthropic SDK or any Messages-API client. Text, streaming, tool use, and base64 PDF documents all work unchanged.

Messages

max_tokens is required on this surface, as the Messages API itself requires. The model id is echoed back on the response.

from anthropic import Anthropic

client = Anthropic(
    api_key="llm_live_...",
    base_url="https://app.directinference.com/di",   # SDK appends /v1/messages
)

msg = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=512,
    messages=[{"role": "user", "content": "Name three uses for a paperclip."}],
)

print(msg.content[0].text)
print(msg.model)   # echoes "claude-sonnet-4-6"

import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
  apiKey: "llm_live_...",
  baseURL: "https://app.directinference.com/di", // SDK appends /v1/messages
});

const msg = await client.messages.create({
  model: "claude-sonnet-4-6",
  max_tokens: 512,
  messages: [{ role: "user", content: "Name three uses for a paperclip." }],
});

const first = msg.content[0];
if (first.type === "text") console.log(first.text);

curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 512,
    "messages": [{ "role": "user", "content": "Name three uses for a paperclip." }]
  }'

client := anthropic.NewClient(
  option.WithAPIKey("llm_live_..."),
  option.WithBaseURL("https://app.directinference.com/di"),
)

msg, err := client.Messages.New(context.TODO(), anthropic.MessageNewParams{
  Model:     "claude-sonnet-4-6",
  MaxTokens: 512,
  Messages: []anthropic.MessageParam{
    anthropic.NewUserMessage(anthropic.NewTextBlock("Name three uses for a paperclip.")),
  },
})
if err != nil {
  panic(err)
}

fmt.Println(msg.Content[0].Text)

Streaming

Stream incremental content_block_delta events with the SDK helper or "stream": true over raw HTTP.

with client.messages.stream(
    model="claude-sonnet-4-6",
    max_tokens=512,
    messages=[{"role": "user", "content": "Stream a haiku about latency."}],
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)

const stream = client.messages.stream({
  model: "claude-sonnet-4-6",
  max_tokens: 512,
  messages: [{ role: "user", content: "Stream a haiku about latency." }],
});

for await (const event of stream) {
  if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
    process.stdout.write(event.delta.text);
  }
}

curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -N \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 512,
    "messages": [{ "role": "user", "content": "Stream a haiku about latency." }],
    "stream": true
  }'

Tool use

Anthropic tools use input_schema; the reply contains tool_use blocks. Tool-shaped requests map to the code request type.

tools = [{
    "name": "get_weather",
    "description": "Get the current weather for a city.",
    "input_schema": {
        "type": "object",
        "properties": {"city": {"type": "string"}},
        "required": ["city"],
    },
}]

msg = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=512,
    tools=tools,
    messages=[{"role": "user", "content": "What is the weather in Paris?"}],
)

for block in msg.content:
    if block.type == "tool_use":
        print(block.name, block.input)

const tools = [{
  name: "get_weather",
  description: "Get the current weather for a city.",
  input_schema: {
    type: "object",
    properties: { city: { type: "string" } },
    required: ["city"],
  },
}] as const;

const msg = await client.messages.create({
  model: "claude-sonnet-4-6",
  max_tokens: 512,
  tools,
  messages: [{ role: "user", content: "What is the weather in Paris?" }],
});

for (const block of msg.content) {
  if (block.type === "tool_use") console.log(block.name, block.input);
}

curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 512,
    "messages": [{ "role": "user", "content": "What is the weather in Paris?" }],
    "tools": [{
      "name": "get_weather",
      "description": "Get the current weather for a city.",
      "input_schema": {
        "type": "object",
        "properties": { "city": { "type": "string" } },
        "required": ["city"]
      }
    }]
  }'

PDF documents

Send a base64 PDF as a document content block. Document input always uses the document request type — it takes priority over vision, long-context, model id, and effort hints, so a contract never silently falls to a cheaper model.

import base64
from anthropic import Anthropic

client = Anthropic(api_key="llm_live_...", base_url="https://app.directinference.com/di")

pdf_b64 = base64.standard_b64encode(open("contract.pdf", "rb").read()).decode()

msg = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": [
            {
                "type": "document",
                "title": "contract.pdf",
                "source": {
                    "type": "base64",
                    "media_type": "application/pdf",
                    "data": pdf_b64,
                },
            },
            {"type": "text", "text": "Summarize the key terms of this contract."},
        ],
    }],
)

print(msg.content[0].text)

import { readFileSync } from "node:fs";
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({ apiKey: "llm_live_...", baseURL: "https://app.directinference.com/di" });

const pdfB64 = readFileSync("contract.pdf").toString("base64");

const msg = await client.messages.create({
  model: "claude-sonnet-4-6",
  max_tokens: 1024,
  messages: [{
    role: "user",
    content: [
      {
        type: "document",
        title: "contract.pdf",
        source: { type: "base64", media_type: "application/pdf", data: pdfB64 },
      },
      { type: "text", text: "Summarize the key terms of this contract." },
    ],
  }],
});

const first = msg.content[0];
if (first.type === "text") console.log(first.text);

# output_config constrains the reply to a JSON schema (extraction use case)
curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 1024,
    "messages": [{
      "role": "user",
      "content": [
        {
          "type": "document",
          "title": "contract.pdf",
          "source": { "type": "base64", "media_type": "application/pdf", "data": "<base64-pdf>" }
        },
        { "type": "text", "text": "Classify this document." }
      ]
    }],
    "output_config": {
      "format": {
        "type": "json_schema",
        "schema": {
          "type": "object",
          "properties": { "document_type": { "type": "string" }, "confidence": { "type": "number" } },
          "required": ["document_type"]
        }
      }
    }
  }'

Caching & response headers

The Messages API’s native cache_control breakpoints work here — mark a stable system prompt, tool catalog, or document and pay a fraction to reuse it. Usage comes back as cache_read_input_tokens / cache_creation_input_tokens; see Prompt caching. Each response also carries X-DI-Request-Type (Response headers).