Create a model response - SambaNova Documentation

JavaScript

import SambaNova from 'sambanova';

const client = new SambaNova({
  apiKey: process.env['SAMBANOVA_API_KEY'], // This is the default and can be omitted
});

const response = await client.responses.create({
  input: [
    {
      content: 'What is the weather in San Francisco?',
      role: 'user',
      type: 'message',
    },
    {
      content: [
        { text: 'The weather in San Francisco is 65°F and partly cloudy.', type: 'output_text' },
      ],
      role: 'assistant',
      type: 'message',
    },
    {
      content: 'What should I wear?',
      role: 'user',
      type: 'message',
    },
  ],
  model: 'gpt-oss-120b',
});

console.log(response);

{
  "id": "resp_18cb0a9a457241f5b0383ebeee31b80d",
  "object": "response",
  "created_at": 1775853213,
  "completed_at": 1775853214,
  "status": "completed",
  "model": "gpt-oss-120b",
  "output": [
    {
      "id": "rs_d608e0328e6340a69919f808ee38df6c",
      "type": "reasoning",
      "status": "completed",
      "summary": [],
      "content": [
        {
          "type": "reasoning_text",
          "text": "User wants a three-sentence bedtime story about a unicorn. Provide three sentences, gentle, bedtime. Should be concise."
        }
      ]
    },
    {
      "id": "msg_c1eb06dccbc64f469533bb51be664a9e",
      "type": "message",
      "role": "assistant",
      "status": "completed",
      "content": [
        {
          "type": "output_text",
          "annotations": [],
          "logprobs": [],
          "text": "Under a moonlit canopy of twinkling stars, a silver-mane unicorn tiptoed into the meadow, leaving a trail of soft, glowing stardust behind each gentle step. She whispered a lullaby to the sleepy flowers, and they swayed in rhythm, their petals folding like tiny blankets around the night. As the world hushed, the unicorn curled around a sleepy child's dream, gifting them a night of sweet, sparkling adventures until sunrise."
        }
      ]
    }
  ],
  "usage": {
    "input_tokens": 78,
    "output_tokens": 126,
    "total_tokens": 204,
    "input_tokens_details": {
      "cached_tokens": 0
    },
    "output_tokens_details": {
      "reasoning_tokens": 48
    }
  },
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "parallel_tool_calls": true,
  "tool_choice": "auto",
  "tools": [],
  "temperature": 1,
  "top_p": 1,
  "frequency_penalty": 0,
  "presence_penalty": 0,
  "reasoning": {
    "effort": "medium",
    "summary": null
  },
  "text": {
    "format": {
      "type": "text"
    }
  },
  "truncation": "disabled",
  "store": false,
  "background": false,
  "service_tier": "free",
  "user": null,
  "previous_response_id": null
}

POST

responses

JavaScript

import SambaNova from 'sambanova';

const client = new SambaNova({
  apiKey: process.env['SAMBANOVA_API_KEY'], // This is the default and can be omitted
});

const response = await client.responses.create({
  input: [
    {
      content: 'What is the weather in San Francisco?',
      role: 'user',
      type: 'message',
    },
    {
      content: [
        { text: 'The weather in San Francisco is 65°F and partly cloudy.', type: 'output_text' },
      ],
      role: 'assistant',
      type: 'message',
    },
    {
      content: 'What should I wear?',
      role: 'user',
      type: 'message',
    },
  ],
  model: 'gpt-oss-120b',
});

console.log(response);

{
  "id": "resp_18cb0a9a457241f5b0383ebeee31b80d",
  "object": "response",
  "created_at": 1775853213,
  "completed_at": 1775853214,
  "status": "completed",
  "model": "gpt-oss-120b",
  "output": [
    {
      "id": "rs_d608e0328e6340a69919f808ee38df6c",
      "type": "reasoning",
      "status": "completed",
      "summary": [],
      "content": [
        {
          "type": "reasoning_text",
          "text": "User wants a three-sentence bedtime story about a unicorn. Provide three sentences, gentle, bedtime. Should be concise."
        }
      ]
    },
    {
      "id": "msg_c1eb06dccbc64f469533bb51be664a9e",
      "type": "message",
      "role": "assistant",
      "status": "completed",
      "content": [
        {
          "type": "output_text",
          "annotations": [],
          "logprobs": [],
          "text": "Under a moonlit canopy of twinkling stars, a silver-mane unicorn tiptoed into the meadow, leaving a trail of soft, glowing stardust behind each gentle step. She whispered a lullaby to the sleepy flowers, and they swayed in rhythm, their petals folding like tiny blankets around the night. As the world hushed, the unicorn curled around a sleepy child's dream, gifting them a night of sweet, sparkling adventures until sunrise."
        }
      ]
    }
  ],
  "usage": {
    "input_tokens": 78,
    "output_tokens": 126,
    "total_tokens": 204,
    "input_tokens_details": {
      "cached_tokens": 0
    },
    "output_tokens_details": {
      "reasoning_tokens": 48
    }
  },
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": {},
  "parallel_tool_calls": true,
  "tool_choice": "auto",
  "tools": [],
  "temperature": 1,
  "top_p": 1,
  "frequency_penalty": 0,
  "presence_penalty": 0,
  "reasoning": {
    "effort": "medium",
    "summary": null
  },
  "text": {
    "format": {
      "type": "text"
    }
  },
  "truncation": "disabled",
  "store": false,
  "background": false,
  "service_tier": "free",
  "user": null,
  "previous_response_id": null
}

Authorizations

Authorization

string

header

required

SambaNova API key, sent as a bearer token in the Authorization header (Authorization: Bearer <key>). Default authentication scheme used by the SambaNova SDK across every OpenAI compatible endpoint.

Body

application/json

Response creation parameters

responses request object

model

required

The model ID to use (e.g. gpt-oss-120b). See available models

input

required

A plain text input equivalent to a user-role message.

instructions

string | null

Inserts a system (or developer) message as the first item in the model's context. Equivalent to a system-role message prepended to input[].

stream

boolean | null

default:false

If true, the response is delivered as server-sent events (SSE).

max_output_tokens

integer | null

Upper bound on the number of tokens the model may generate, including visible output tokens and reasoning tokens.

Example:

1024

temperature

number | null

default:0.7

Controls randomness in generation. Range: 0–2. It is recommended to alter this, top_p, or top_k but not more than one at a time.

Required range: 0 <= x <= 2

Example:

0.7

top_p

number | null

default:1

Nucleus sampling cutoff. Range: 0–1. It is recommended to alter this, temperature, or top_k but not more than one at a time.

Required range: 0 <= x <= 1

Example:

1

top_k

integer | null

Limits sampling to the top K most probable tokens. It is recommended to alter this, top_p, or temperature but not more than one at a time.

Required range: 1 <= x <= 100

Example:

5

top_logprobs

integer | null

Number of top log-probability entries to return per output token. Null means log probabilities are not returned.

frequency_penalty

number | null

default:0

Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. Not currently implemented; accepted for API compatibility and echoed in the response.

Required range: -2 <= x <= 2

presence_penalty

number | null

default:0

Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. Not currently implemented; accepted for API compatibility and echoed in the response.

Required range: -2 <= x <= 2

tools

Response Tool · object[] | null

Tools available to the model. Only type: "function" is supported; all other tool types are filtered server-side.

Maximum array length: 128

Show child attributes

parallel_tool_calls

boolean | null

default:true

Whether the model may issue multiple tool calls in parallel within one turn.

max_tool_calls

integer | null

Maximum number of tool calls the model may make in a single response turn. Not currently implemented; accepted for API compatibility.

tool_choice

String shorthand for tool selection behavior.

Available options:

none,

auto,

required

text

Text · object

Response format configuration. Supports plain text, json_object, and json_schema.

Show child attributes

reasoning

Reasoning · object

Reasoning configuration for models that support it. Ignored on non-reasoning models.

Show child attributes

user

string | null

Included for API compatibility, but only echoed back in response

metadata

Metadata · object

Included for API compatibility, but not supported

Show child attributes

store

boolean | null

default:false

SambaNova is stateless - this field is accepted for API compatibility but has no effect. Always echoed back as false.

truncation

enum<string> | null

default:disabled

Accepted for API compatibility and echoed in the response. Context truncation behavior is not currently configurable via this field in SambaNova.

Available options:

auto,

disabled

previous_response_id

string | null

Not supported. SambaNova is stateless and does not maintain server-side conversation state. Accepted for API compatibility but ignored; clients must supply the full conversation history in input[].

background

boolean | null

Accepted for API compatibility and echoed back in the response. Has no effect on server behavior.

service_tier

string | null

Accepted for API compatibility and echoed back in the response. Has no effect on server behavior.

Response

Successful response. Returns a ResponseResponse object (non-streaming), or a stream of server-sent ResponseStreamEvent object events ending with a response.completed event (when stream: true).

Response Response
Response Created Event
Response In Progress Event
Response Output Item Added Event
Response Content Part Added Event
Response Reasoning Text Delta Event
Response Reasoning Text Done Event
Response Output Text Delta Event
Response Output Text Done Event
Response Function Call Arguments Delta Event
Response Function Call Arguments Done Event
Response Content Part Done Event
Response Output Item Done Event
Response Completed Event

A response object returned by POST /responses (non-streaming). Contains the model's output items, echoed input parameters, lifecycle metadata, and token usage.

string

required

Unique identifier for this response.

object

enum<string>

required

The object type. Always "response".

Available options:

response

status

enum<string>

required

Lifecycle status of the response. "completed" means the model finished successfully. "failed" means an error occurred during generation. "incomplete" means generation was cut short (e.g. max_output_tokens reached).

Available options:

completed,

failed,

in_progress,

incomplete

created_at

integer

required

Unix timestamp (seconds) when the response was created.

model

string

required

The model ID used to generate this response.

output

(Response Message · object | Response Function Call · object | Response Output Reasoning · object)[]

required

Ordered array of output items generated by the model. Items may be of type "message", "reasoning", or "function_call".

A message item. When used as input, id and status are optional. When present in output[], id and status are always set by the server. Role "assistant" with content type "output_text" represents a prior model turn; user/system/developer turns use content type "input_text". Plain string content is accepted in all roles on input.

Response Message
Response Function Call
Response Output Reasoning

Show child attributes

Example:

{
  "type": "message",
  "role": "user",
  "content": "What is the weather in San Francisco?"
}

error

Error · object

required

In-band error object present when status is "failed". Null when the response completed successfully.

Show child attributes

incomplete_details

Incomplete Details · object

required

Present when status is "incomplete". Describes why generation stopped before completion (e.g. max_output_tokens reached).

Show child attributes

temperature

number | null

required

The temperature value used for this response.

top_p

number | null

required

The top_p value used for this response.

frequency_penalty

number | null

required

The frequency_penalty value echoed from the request. Not currently implemented; accepted for API compatibility

presence_penalty

number | null

required

The presence_penalty value echoed from the request. Not currently implemented; accepted for API compatibility

tools

Response Tool · object[]

required

Tool definitions available to the model for this response.

Show child attributes

parallel_tool_calls

boolean

required

Whether parallel tool calls were enabled.

tool_choice

required

String shorthand for tool selection behavior.

Available options:

none,

auto,

required

truncation

enum<string> | null

required

The truncation value echoed from the request.

Available options:

auto,

disabled

background

boolean | null

required

Whether background generation was requested.

metadata

Metadata · object

required

The metadata echoed from the request.

Show child attributes

store

boolean | null

required

Whether the response was stored server-side. SambaNova is stateless - always false.

service_tier

string | null

required

The service tier used to process this request, as reported by the server.

user

string | null

required

The user, echoed back from request.

completed_at

integer | null

Unix timestamp (seconds) when the response finished generating.

usage

Response Usage · object

Token usage statistics for this response.

Show child attributes

Example:

{
  "input_tokens": 248,
  "output_tokens": 72,
  "total_tokens": 320,
  "input_tokens_details": { "cached_tokens": 0 },
  "output_tokens_details": { "reasoning_tokens": 18 },
  "start_time": 1737642515.445,
  "end_time": 1737642515.904,
  "time_to_first_token": 0.084,
  "total_latency": 0.459,
  "output_tokens_per_sec": 156.8,
  "output_tokens_after_first_per_sec": 161.2,
  "total_tokens_per_sec": 311.6,
  "acceptance_rate": 4.06,
  "is_last_response": true
}

instructions

string | null

The system instructions echoed from the request, or null if none were provided.

top_k

integer | null

The top_k value used for this response.

max_tool_calls

integer | null

The max_tool_calls value echoed from the request.

text

Text · object

The text format configuration (structured output mode) used for this response.

Show child attributes

reasoning

Reasoning · object

The reasoning configuration used for this response.

Show child attributes

max_output_tokens

integer | null

The max_output_tokens limit echoed from the request.

top_logprobs

integer | null

The top_logprobs value echoed from the request.

previous_response_id

string | null

Not supported. Always null. SambaNova is stateless; use input[] to supply full conversation history.

Create completion Create a message

⌘I