Skip to main content

Tutorial: authorize then commit

Goal: wire the core runtime gate loop into one real code path.

You will:

  1. Authorize work (POST /mgt/v1/gate/authorize) to obtain a lease_token.
  2. Execute the protected work.
  3. Commit usage (POST /mgt/v1/gate/commits) with actual post-call usage.
  4. Cancel best-effort (POST /mgt/v1/gate/cancel) if protected work fails before commit.

Prereqs

Step 1: choose identifiers

Use:

  • principal_id: a stable org/team/customer identifier.
  • feature_code: the feature you are gating (example: openai.gpt5).
  • feature_family_code: optional grouping chosen by your packaging and entitlement model.
  • quantity_minor: integer feature-level usage in minor units as a string.
  • meters[]: optional meter-level breakdown when separate priced resources matter.

For LLM workloads:

  • top-level quantity_minor should usually be total tokens
  • if input, output, or cached tokens price differently, also send meters[]

Step 2: authorize

curl

Use the SDK for the simplest first pass.

TypeScript (SDK)

import {
createServiceClient,
type RequestContext,
} from '@neurowaveai/sdk'

function env(name: string): string {
const value = process.env[name]
if (!value) {
throw new Error(`Missing env: ${name}`)
}
return value
}

async function main() {
const client = createServiceClient({
config: {
baseUrl: process.env.VLUNA_SERVICE_BASE_URL || 'https://api.us-east-1.vluna.ai/mgt/v1',
realmId: env('VLUNA_REALM_ID'),
},
serviceKey: {
keyId: env('VLUNA_SERVICE_KEY_ID'),
secret: env('VLUNA_SERVICE_KEY_SECRET'),
},
errorMode: 'result',
})

try {
const principalId = 'customer_123'
const ctx: RequestContext = {
principalId,
idempotencyKey: 'ik_authorize_0001',
}

const authz = await client.s2s.gate.authorize(
{
feature_code: 'openai.gpt5',
feature_family_code: 'llm.standard',
estimated_quantity_minor: '1200',
},
ctx,
)

console.log(authz)
} finally {
await client.close()
}
}

void main()

Python (SDK)

import asyncio
import os

from vlunaai_sdk import (
VlunaAIConfig,
RequestContext,
ServiceClientOptions,
ServiceKeyCredentials,
create_service_client,
)


def env(name: str) -> str:
v = os.environ.get(name)
if not v:
raise RuntimeError(f"Missing env: {name}")
return v


async def main() -> None:
client = create_service_client(
ServiceClientOptions(
config=VlunaAIConfig(
base_url=os.environ.get('VLUNA_SERVICE_BASE_URL', 'https://api.us-east-1.vluna.ai/mgt/v1'),
realm_id=env('VLUNA_REALM_ID'),
),
service_key=ServiceKeyCredentials(
key_id=env('VLUNA_SERVICE_KEY_ID'),
secret=env('VLUNA_SERVICE_KEY_SECRET'),
),
)
)

try:
principal_id = 'customer_123'
ctx = RequestContext(principal_id=principal_id, idempotency_key='ik_authorize_0001')
authz = await client.authorize(
body={
'feature_code': 'openai.gpt5',
'feature_family_code': 'llm.standard',
'estimated_quantity_minor': '1200',
},
context=ctx,
)
print(authz.model_dump())
finally:
await client.close()


asyncio.run(main())

Verify:

  • You receive an envelope with data.lease_token.
  • hints may be present. Log them.

Step 3: commit

At this point, execute the protected work and collect actual usage.

For LLM workloads, this usually means:

  • make the model call
  • wait until final usage is known
  • use total tokens as top-level quantity_minor
  • add meters[] when you need separate input/output/cached pricing

curl

Use the SDK for the simplest first pass.

TypeScript (SDK)

import {
createServiceClient,
type RequestContext,
} from '@neurowaveai/sdk'

function env(name: string): string {
const value = process.env[name]
if (!value) {
throw new Error(`Missing env: ${name}`)
}
return value
}

async function main() {
const client = createServiceClient({
config: {
baseUrl: process.env.VLUNA_SERVICE_BASE_URL || 'https://api.us-east-1.vluna.ai/mgt/v1',
realmId: env('VLUNA_REALM_ID'),
},
serviceKey: {
keyId: env('VLUNA_SERVICE_KEY_ID'),
secret: env('VLUNA_SERVICE_KEY_SECRET'),
},
errorMode: 'result',
})

try {
const principalId = 'customer_123'
const ctx: RequestContext = {
principalId,
}

const authz = await client.s2s.gate.authorize(
{
feature_code: 'openai.gpt5',
feature_family_code: 'llm.standard',
estimated_quantity_minor: '1200',
},
{
...ctx,
idempotencyKey: 'ik_authorize_0001',
},
)

const envelope = authz.data as { ok?: boolean; data?: { lease_token?: string } } | undefined
const leaseToken = envelope?.data?.lease_token
if (!leaseToken) {
throw new Error('authorize failed')
}

const totalTokens = '1200'
const commit = await client.s2s.gate.commit(
{
lease_token: leaseToken,
feature_code: 'openai.gpt5',
quantity_minor: totalTokens,
meters: [
{ meter_code: 'openai.gpt5.tokens.input', quantity_minor: '800' },
{ meter_code: 'openai.gpt5.tokens.output', quantity_minor: '400' },
],
labels: { request_id: 'r-1' },
},
{
...ctx,
idempotencyKey: 'ik_commit_0001',
},
)

console.log(commit)
} finally {
await client.close()
}
}

void main()

Python (SDK)

import asyncio
import os

from vlunaai_sdk import (
VlunaAIConfig,
RequestContext,
ServiceClientOptions,
ServiceKeyCredentials,
create_service_client,
)


def env(name: str) -> str:
v = os.environ.get(name)
if not v:
raise RuntimeError(f"Missing env: {name}")
return v


async def main() -> None:
client = create_service_client(
ServiceClientOptions(
config=VlunaAIConfig(
base_url=os.environ.get('VLUNA_SERVICE_BASE_URL', 'https://api.us-east-1.vluna.ai/mgt/v1'),
realm_id=env('VLUNA_REALM_ID'),
),
service_key=ServiceKeyCredentials(
key_id=env('VLUNA_SERVICE_KEY_ID'),
secret=env('VLUNA_SERVICE_KEY_SECRET'),
),
)
)

try:
principal_id = 'customer_123'
ctx = RequestContext(principal_id=principal_id)

authz = await client.authorize(
body={
'feature_code': 'openai.gpt5',
'feature_family_code': 'llm.standard',
'estimated_quantity_minor': '1200',
},
context=ctx.model_copy(update={'idempotency_key': 'ik_authorize_0001'}),
)
if not authz.ok or not authz.data:
raise RuntimeError(f"authorize failed: {authz.model_dump()}")

commit = await client.commit(
body={
'lease_token': authz.data.lease_token,
'feature_code': 'openai.gpt5',
'quantity_minor': '1200',
'meters': [
{'meter_code': 'openai.gpt5.tokens.input', 'quantity_minor': '800'},
{'meter_code': 'openai.gpt5.tokens.output', 'quantity_minor': '400'},
],
'labels': {'request_id': 'r-1'},
},
context=ctx.model_copy(update={'idempotency_key': 'ik_commit_0001'}),
)
print(commit.model_dump())
finally:
await client.close()


asyncio.run(main())

Verify:

  • Commit returns an envelope with data.amount_xusd (it may be 0 depending on pricing).
  • hints may be present. Treat them as machine-readable signals.

Step 4: cancel on failure

If protected work fails after authorize but before commit, do best-effort cancel.

This matters most for LLM and agent systems where upstream execution can fail after admission but before final usage is known.

Troubleshoot

  • 402: blocked by enforcement (funding, quota, or budget).
  • 409: Idempotency-Key conflict (same key used with a different body).
  • 429: rate limit. Retry with backoff and stable idempotency keys for replays.
  • 422: validation failed (invalid identifiers, policy window issues, or feature/meter restrictions).

Next