# Anthropic Messages API

Drop in the Anthropic SDK or any Messages-API client. Text, streaming, tool use, and base64 PDF documents all work unchanged.

:::caution[Base URL ends in /di — not /di/v1]
Set the SDK base URL to `https://app.directinference.com/di`. The Anthropic SDK appends `/v1/messages` itself; including `/v1` yourself produces a double path. Authenticate with `x-api-key` and `anthropic-version: 2023-06-01`.
:::

## Messages

`max_tokens` is required on this surface, as the Messages API itself requires. The `model` id is echoed back on the response.

```python
from anthropic import Anthropic

client = Anthropic(
    api_key="llm_live_...",
    base_url="https://app.directinference.com/di",   # SDK appends /v1/messages
)

msg = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=512,
    messages=[{"role": "user", "content": "Name three uses for a paperclip."}],
)

print(msg.content[0].text)
print(msg.model)   # echoes "claude-sonnet-4-6"
```

```typescript
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
  apiKey: "llm_live_...",
  baseURL: "https://app.directinference.com/di", // SDK appends /v1/messages
});

const msg = await client.messages.create({
  model: "claude-sonnet-4-6",
  max_tokens: 512,
  messages: [{ role: "user", content: "Name three uses for a paperclip." }],
});

const first = msg.content[0];
if (first.type === "text") console.log(first.text);
```

```bash
curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 512,
    "messages": [{ "role": "user", "content": "Name three uses for a paperclip." }]
  }'
```

```go
client := anthropic.NewClient(
	option.WithAPIKey("llm_live_..."),
	option.WithBaseURL("https://app.directinference.com/di"),
)

msg, err := client.Messages.New(context.TODO(), anthropic.MessageNewParams{
	Model:     "claude-sonnet-4-6",
	MaxTokens: 512,
	Messages: []anthropic.MessageParam{
		anthropic.NewUserMessage(anthropic.NewTextBlock("Name three uses for a paperclip.")),
	},
})
if err != nil {
	panic(err)
}

fmt.Println(msg.Content[0].Text)
```

## Streaming

Stream incremental `content_block_delta` events with the SDK helper or `"stream": true` over raw HTTP.

```python
with client.messages.stream(
    model="claude-sonnet-4-6",
    max_tokens=512,
    messages=[{"role": "user", "content": "Stream a haiku about latency."}],
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)
```

```typescript
const stream = client.messages.stream({
  model: "claude-sonnet-4-6",
  max_tokens: 512,
  messages: [{ role: "user", content: "Stream a haiku about latency." }],
});

for await (const event of stream) {
  if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
    process.stdout.write(event.delta.text);
  }
}
```

```bash
curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -N \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 512,
    "messages": [{ "role": "user", "content": "Stream a haiku about latency." }],
    "stream": true
  }'
```

## Tool use

Anthropic tools use `input_schema`; the reply contains `tool_use` blocks. Tool-shaped requests map to the `code` request type.

```python
tools = [{
    "name": "get_weather",
    "description": "Get the current weather for a city.",
    "input_schema": {
        "type": "object",
        "properties": {"city": {"type": "string"}},
        "required": ["city"],
    },
}]

msg = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=512,
    tools=tools,
    messages=[{"role": "user", "content": "What is the weather in Paris?"}],
)

for block in msg.content:
    if block.type == "tool_use":
        print(block.name, block.input)
```

```typescript
const tools = [{
  name: "get_weather",
  description: "Get the current weather for a city.",
  input_schema: {
    type: "object",
    properties: { city: { type: "string" } },
    required: ["city"],
  },
}] as const;

const msg = await client.messages.create({
  model: "claude-sonnet-4-6",
  max_tokens: 512,
  tools,
  messages: [{ role: "user", content: "What is the weather in Paris?" }],
});

for (const block of msg.content) {
  if (block.type === "tool_use") console.log(block.name, block.input);
}
```

```bash
curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 512,
    "messages": [{ "role": "user", "content": "What is the weather in Paris?" }],
    "tools": [{
      "name": "get_weather",
      "description": "Get the current weather for a city.",
      "input_schema": {
        "type": "object",
        "properties": { "city": { "type": "string" } },
        "required": ["city"]
      }
    }]
  }'
```

## PDF documents

Send a base64 PDF as a `document` content block. Document input always uses the `document` request type — it takes priority over vision, long-context, model id, and effort hints, so a contract never silently falls to a cheaper model.

```python
import base64
from anthropic import Anthropic

client = Anthropic(api_key="llm_live_...", base_url="https://app.directinference.com/di")

pdf_b64 = base64.standard_b64encode(open("contract.pdf", "rb").read()).decode()

msg = client.messages.create(
    model="claude-sonnet-4-6",
    max_tokens=1024,
    messages=[{
        "role": "user",
        "content": [
            {
                "type": "document",
                "title": "contract.pdf",
                "source": {
                    "type": "base64",
                    "media_type": "application/pdf",
                    "data": pdf_b64,
                },
            },
            {"type": "text", "text": "Summarize the key terms of this contract."},
        ],
    }],
)

print(msg.content[0].text)
```

```typescript
import { readFileSync } from "node:fs";
import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({ apiKey: "llm_live_...", baseURL: "https://app.directinference.com/di" });

const pdfB64 = readFileSync("contract.pdf").toString("base64");

const msg = await client.messages.create({
  model: "claude-sonnet-4-6",
  max_tokens: 1024,
  messages: [{
    role: "user",
    content: [
      {
        type: "document",
        title: "contract.pdf",
        source: { type: "base64", media_type: "application/pdf", data: pdfB64 },
      },
      { type: "text", text: "Summarize the key terms of this contract." },
    ],
  }],
});

const first = msg.content[0];
if (first.type === "text") console.log(first.text);
```

```bash
# output_config constrains the reply to a JSON schema (extraction use case)
curl https://app.directinference.com/di/v1/messages \
  -H "x-api-key: llm_live_..." \
  -H "anthropic-version: 2023-06-01" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "claude-sonnet-4-6",
    "max_tokens": 1024,
    "messages": [{
      "role": "user",
      "content": [
        {
          "type": "document",
          "title": "contract.pdf",
          "source": { "type": "base64", "media_type": "application/pdf", "data": "<base64-pdf>" }
        },
        { "type": "text", "text": "Classify this document." }
      ]
    }],
    "output_config": {
      "format": {
        "type": "json_schema",
        "schema": {
          "type": "object",
          "properties": { "document_type": { "type": "string" }, "confidence": { "type": "number" } },
          "required": ["document_type"]
        }
      }
    }
  }'
```

:::note[Supported document sources]
Today: `source.type: "base64"` with `media_type: "application/pdf"`. URL and file-id sources, and non-PDF media types, return a clear `400`. The optional `output_config` JSON schema (shown in the curl tab) constrains the reply for extraction. See [Request types](https://docs.directinference.com/request-types/).
:::

## Caching & response headers

The Messages API's native `cache_control` breakpoints work here — mark a stable system prompt, tool catalog, or document and pay a fraction to reuse it. Usage comes back as `cache_read_input_tokens` / `cache_creation_input_tokens`; see [Prompt caching](https://docs.directinference.com/caching/). Each response also carries `X-DI-Request-Type` ([Response headers](https://docs.directinference.com/headers/)).