Create a response - Nebius Token Factory documentation

Create a response

curl --request POST \
  --url https://api.tokenfactory.nebius.com/v1/responses \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "input": "<string>",
  "model": "<string>",
  "background": false,
  "include": [],
  "instructions": "<string>",
  "max_output_tokens": 123,
  "max_tool_calls": 123,
  "metadata": {},
  "parallel_tool_calls": true,
  "previous_response_id": "<string>",
  "prompt": {
    "id": "<string>",
    "variables": {},
    "version": "<string>"
  },
  "reasoning": {},
  "service_tier": "auto",
  "store": true,
  "stream": true,
  "temperature": 1,
  "text": {
    "format": {
      "type": "<string>"
    }
  },
  "tool_choice": "auto",
  "tools": [
    {
      "name": "<string>",
      "type": "<string>",
      "parameters": {},
      "strict": true,
      "defer_loading": true,
      "description": "<string>"
    }
  ],
  "top_logprobs": 1,
  "top_p": 0.5,
  "truncation": "disabled",
  "user": "<string>",
  "prompt_cache_key": "<string>"
}
'

import requests

url = "https://api.tokenfactory.nebius.com/v1/responses"

payload = {
    "input": "<string>",
    "model": "<string>",
    "background": False,
    "include": [],
    "instructions": "<string>",
    "max_output_tokens": 123,
    "max_tool_calls": 123,
    "metadata": {},
    "parallel_tool_calls": True,
    "previous_response_id": "<string>",
    "prompt": {
        "id": "<string>",
        "variables": {},
        "version": "<string>"
    },
    "reasoning": {},
    "service_tier": "auto",
    "store": True,
    "stream": True,
    "temperature": 1,
    "text": { "format": { "type": "<string>" } },
    "tool_choice": "auto",
    "tools": [
        {
            "name": "<string>",
            "type": "<string>",
            "parameters": {},
            "strict": True,
            "defer_loading": True,
            "description": "<string>"
        }
    ],
    "top_logprobs": 1,
    "top_p": 0.5,
    "truncation": "disabled",
    "user": "<string>",
    "prompt_cache_key": "<string>"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    input: '<string>',
    model: '<string>',
    background: false,
    include: [],
    instructions: '<string>',
    max_output_tokens: 123,
    max_tool_calls: 123,
    metadata: {},
    parallel_tool_calls: true,
    previous_response_id: '<string>',
    prompt: {id: '<string>', variables: {}, version: '<string>'},
    reasoning: {},
    service_tier: 'auto',
    store: true,
    stream: true,
    temperature: 1,
    text: {format: {type: '<string>'}},
    tool_choice: 'auto',
    tools: [
      {
        name: '<string>',
        type: '<string>',
        parameters: {},
        strict: true,
        defer_loading: true,
        description: '<string>'
      }
    ],
    top_logprobs: 1,
    top_p: 0.5,
    truncation: 'disabled',
    user: '<string>',
    prompt_cache_key: '<string>'
  })
};

fetch('https://api.tokenfactory.nebius.com/v1/responses', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.tokenfactory.nebius.com/v1/responses",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'input' => '<string>',
    'model' => '<string>',
    'background' => false,
    'include' => [
        
    ],
    'instructions' => '<string>',
    'max_output_tokens' => 123,
    'max_tool_calls' => 123,
    'metadata' => [
        
    ],
    'parallel_tool_calls' => true,
    'previous_response_id' => '<string>',
    'prompt' => [
        'id' => '<string>',
        'variables' => [
                
        ],
        'version' => '<string>'
    ],
    'reasoning' => [
        
    ],
    'service_tier' => 'auto',
    'store' => true,
    'stream' => true,
    'temperature' => 1,
    'text' => [
        'format' => [
                'type' => '<string>'
        ]
    ],
    'tool_choice' => 'auto',
    'tools' => [
        [
                'name' => '<string>',
                'type' => '<string>',
                'parameters' => [
                                
                ],
                'strict' => true,
                'defer_loading' => true,
                'description' => '<string>'
        ]
    ],
    'top_logprobs' => 1,
    'top_p' => 0.5,
    'truncation' => 'disabled',
    'user' => '<string>',
    'prompt_cache_key' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.tokenfactory.nebius.com/v1/responses"

	payload := strings.NewReader("{\n  \"input\": \"<string>\",\n  \"model\": \"<string>\",\n  \"background\": false,\n  \"include\": [],\n  \"instructions\": \"<string>\",\n  \"max_output_tokens\": 123,\n  \"max_tool_calls\": 123,\n  \"metadata\": {},\n  \"parallel_tool_calls\": true,\n  \"previous_response_id\": \"<string>\",\n  \"prompt\": {\n    \"id\": \"<string>\",\n    \"variables\": {},\n    \"version\": \"<string>\"\n  },\n  \"reasoning\": {},\n  \"service_tier\": \"auto\",\n  \"store\": true,\n  \"stream\": true,\n  \"temperature\": 1,\n  \"text\": {\n    \"format\": {\n      \"type\": \"<string>\"\n    }\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"type\": \"<string>\",\n      \"parameters\": {},\n      \"strict\": true,\n      \"defer_loading\": true,\n      \"description\": \"<string>\"\n    }\n  ],\n  \"top_logprobs\": 1,\n  \"top_p\": 0.5,\n  \"truncation\": \"disabled\",\n  \"user\": \"<string>\",\n  \"prompt_cache_key\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.tokenfactory.nebius.com/v1/responses")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"input\": \"<string>\",\n  \"model\": \"<string>\",\n  \"background\": false,\n  \"include\": [],\n  \"instructions\": \"<string>\",\n  \"max_output_tokens\": 123,\n  \"max_tool_calls\": 123,\n  \"metadata\": {},\n  \"parallel_tool_calls\": true,\n  \"previous_response_id\": \"<string>\",\n  \"prompt\": {\n    \"id\": \"<string>\",\n    \"variables\": {},\n    \"version\": \"<string>\"\n  },\n  \"reasoning\": {},\n  \"service_tier\": \"auto\",\n  \"store\": true,\n  \"stream\": true,\n  \"temperature\": 1,\n  \"text\": {\n    \"format\": {\n      \"type\": \"<string>\"\n    }\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"type\": \"<string>\",\n      \"parameters\": {},\n      \"strict\": true,\n      \"defer_loading\": true,\n      \"description\": \"<string>\"\n    }\n  ],\n  \"top_logprobs\": 1,\n  \"top_p\": 0.5,\n  \"truncation\": \"disabled\",\n  \"user\": \"<string>\",\n  \"prompt_cache_key\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.tokenfactory.nebius.com/v1/responses")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"input\": \"<string>\",\n  \"model\": \"<string>\",\n  \"background\": false,\n  \"include\": [],\n  \"instructions\": \"<string>\",\n  \"max_output_tokens\": 123,\n  \"max_tool_calls\": 123,\n  \"metadata\": {},\n  \"parallel_tool_calls\": true,\n  \"previous_response_id\": \"<string>\",\n  \"prompt\": {\n    \"id\": \"<string>\",\n    \"variables\": {},\n    \"version\": \"<string>\"\n  },\n  \"reasoning\": {},\n  \"service_tier\": \"auto\",\n  \"store\": true,\n  \"stream\": true,\n  \"temperature\": 1,\n  \"text\": {\n    \"format\": {\n      \"type\": \"<string>\"\n    }\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"type\": \"<string>\",\n      \"parameters\": {},\n      \"strict\": true,\n      \"defer_loading\": true,\n      \"description\": \"<string>\"\n    }\n  ],\n  \"top_logprobs\": 1,\n  \"top_p\": 0.5,\n  \"truncation\": \"disabled\",\n  \"user\": \"<string>\",\n  \"prompt_cache_key\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "created_at": 123,
  "model": "<string>",
  "output": [
    {
      "id": "<string>",
      "content": [
        {
          "annotations": [
            {
              "file_id": "<string>",
              "filename": "<string>",
              "index": 123,
              "type": "<string>"
            }
          ],
          "text": "<string>",
          "type": "<string>",
          "logprobs": [
            {
              "token": "<string>",
              "bytes": [
                123
              ],
              "logprob": 123,
              "top_logprobs": [
                {
                  "token": "<string>",
                  "bytes": [
                    123
                  ],
                  "logprob": 123
                }
              ]
            }
          ]
        }
      ],
      "role": "<string>",
      "type": "<string>"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": 123,
  "tools": [
    {
      "name": "<string>",
      "type": "<string>",
      "parameters": {},
      "strict": true,
      "defer_loading": true,
      "description": "<string>"
    }
  ],
  "top_p": 123,
  "background": true,
  "max_output_tokens": 123,
  "error": {
    "message": "<string>"
  },
  "incomplete_details": {},
  "instructions": "<string>",
  "metadata": {},
  "object": "response",
  "max_tool_calls": 123,
  "previous_response_id": "<string>",
  "prompt": {
    "id": "<string>",
    "variables": {},
    "version": "<string>"
  },
  "reasoning": {},
  "text": {
    "format": {
      "type": "<string>"
    }
  },
  "top_logprobs": 123,
  "usage": {
    "input_tokens": 123,
    "input_tokens_details": {
      "cached_tokens": 123,
      "input_tokens_per_turn": [
        123
      ],
      "cached_tokens_per_turn": [
        123
      ]
    },
    "output_tokens": 123,
    "output_tokens_details": {
      "reasoning_tokens": 0,
      "tool_output_tokens": 0,
      "output_tokens_per_turn": [
        123
      ],
      "tool_output_tokens_per_turn": [
        123
      ]
    },
    "total_tokens": 123
  },
  "user": "<string>"
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

POST

responses

Create a response

curl --request POST \
  --url https://api.tokenfactory.nebius.com/v1/responses \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "input": "<string>",
  "model": "<string>",
  "background": false,
  "include": [],
  "instructions": "<string>",
  "max_output_tokens": 123,
  "max_tool_calls": 123,
  "metadata": {},
  "parallel_tool_calls": true,
  "previous_response_id": "<string>",
  "prompt": {
    "id": "<string>",
    "variables": {},
    "version": "<string>"
  },
  "reasoning": {},
  "service_tier": "auto",
  "store": true,
  "stream": true,
  "temperature": 1,
  "text": {
    "format": {
      "type": "<string>"
    }
  },
  "tool_choice": "auto",
  "tools": [
    {
      "name": "<string>",
      "type": "<string>",
      "parameters": {},
      "strict": true,
      "defer_loading": true,
      "description": "<string>"
    }
  ],
  "top_logprobs": 1,
  "top_p": 0.5,
  "truncation": "disabled",
  "user": "<string>",
  "prompt_cache_key": "<string>"
}
'

import requests

url = "https://api.tokenfactory.nebius.com/v1/responses"

payload = {
    "input": "<string>",
    "model": "<string>",
    "background": False,
    "include": [],
    "instructions": "<string>",
    "max_output_tokens": 123,
    "max_tool_calls": 123,
    "metadata": {},
    "parallel_tool_calls": True,
    "previous_response_id": "<string>",
    "prompt": {
        "id": "<string>",
        "variables": {},
        "version": "<string>"
    },
    "reasoning": {},
    "service_tier": "auto",
    "store": True,
    "stream": True,
    "temperature": 1,
    "text": { "format": { "type": "<string>" } },
    "tool_choice": "auto",
    "tools": [
        {
            "name": "<string>",
            "type": "<string>",
            "parameters": {},
            "strict": True,
            "defer_loading": True,
            "description": "<string>"
        }
    ],
    "top_logprobs": 1,
    "top_p": 0.5,
    "truncation": "disabled",
    "user": "<string>",
    "prompt_cache_key": "<string>"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    input: '<string>',
    model: '<string>',
    background: false,
    include: [],
    instructions: '<string>',
    max_output_tokens: 123,
    max_tool_calls: 123,
    metadata: {},
    parallel_tool_calls: true,
    previous_response_id: '<string>',
    prompt: {id: '<string>', variables: {}, version: '<string>'},
    reasoning: {},
    service_tier: 'auto',
    store: true,
    stream: true,
    temperature: 1,
    text: {format: {type: '<string>'}},
    tool_choice: 'auto',
    tools: [
      {
        name: '<string>',
        type: '<string>',
        parameters: {},
        strict: true,
        defer_loading: true,
        description: '<string>'
      }
    ],
    top_logprobs: 1,
    top_p: 0.5,
    truncation: 'disabled',
    user: '<string>',
    prompt_cache_key: '<string>'
  })
};

fetch('https://api.tokenfactory.nebius.com/v1/responses', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.tokenfactory.nebius.com/v1/responses",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'input' => '<string>',
    'model' => '<string>',
    'background' => false,
    'include' => [
        
    ],
    'instructions' => '<string>',
    'max_output_tokens' => 123,
    'max_tool_calls' => 123,
    'metadata' => [
        
    ],
    'parallel_tool_calls' => true,
    'previous_response_id' => '<string>',
    'prompt' => [
        'id' => '<string>',
        'variables' => [
                
        ],
        'version' => '<string>'
    ],
    'reasoning' => [
        
    ],
    'service_tier' => 'auto',
    'store' => true,
    'stream' => true,
    'temperature' => 1,
    'text' => [
        'format' => [
                'type' => '<string>'
        ]
    ],
    'tool_choice' => 'auto',
    'tools' => [
        [
                'name' => '<string>',
                'type' => '<string>',
                'parameters' => [
                                
                ],
                'strict' => true,
                'defer_loading' => true,
                'description' => '<string>'
        ]
    ],
    'top_logprobs' => 1,
    'top_p' => 0.5,
    'truncation' => 'disabled',
    'user' => '<string>',
    'prompt_cache_key' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.tokenfactory.nebius.com/v1/responses"

	payload := strings.NewReader("{\n  \"input\": \"<string>\",\n  \"model\": \"<string>\",\n  \"background\": false,\n  \"include\": [],\n  \"instructions\": \"<string>\",\n  \"max_output_tokens\": 123,\n  \"max_tool_calls\": 123,\n  \"metadata\": {},\n  \"parallel_tool_calls\": true,\n  \"previous_response_id\": \"<string>\",\n  \"prompt\": {\n    \"id\": \"<string>\",\n    \"variables\": {},\n    \"version\": \"<string>\"\n  },\n  \"reasoning\": {},\n  \"service_tier\": \"auto\",\n  \"store\": true,\n  \"stream\": true,\n  \"temperature\": 1,\n  \"text\": {\n    \"format\": {\n      \"type\": \"<string>\"\n    }\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"type\": \"<string>\",\n      \"parameters\": {},\n      \"strict\": true,\n      \"defer_loading\": true,\n      \"description\": \"<string>\"\n    }\n  ],\n  \"top_logprobs\": 1,\n  \"top_p\": 0.5,\n  \"truncation\": \"disabled\",\n  \"user\": \"<string>\",\n  \"prompt_cache_key\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.tokenfactory.nebius.com/v1/responses")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"input\": \"<string>\",\n  \"model\": \"<string>\",\n  \"background\": false,\n  \"include\": [],\n  \"instructions\": \"<string>\",\n  \"max_output_tokens\": 123,\n  \"max_tool_calls\": 123,\n  \"metadata\": {},\n  \"parallel_tool_calls\": true,\n  \"previous_response_id\": \"<string>\",\n  \"prompt\": {\n    \"id\": \"<string>\",\n    \"variables\": {},\n    \"version\": \"<string>\"\n  },\n  \"reasoning\": {},\n  \"service_tier\": \"auto\",\n  \"store\": true,\n  \"stream\": true,\n  \"temperature\": 1,\n  \"text\": {\n    \"format\": {\n      \"type\": \"<string>\"\n    }\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"type\": \"<string>\",\n      \"parameters\": {},\n      \"strict\": true,\n      \"defer_loading\": true,\n      \"description\": \"<string>\"\n    }\n  ],\n  \"top_logprobs\": 1,\n  \"top_p\": 0.5,\n  \"truncation\": \"disabled\",\n  \"user\": \"<string>\",\n  \"prompt_cache_key\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.tokenfactory.nebius.com/v1/responses")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"input\": \"<string>\",\n  \"model\": \"<string>\",\n  \"background\": false,\n  \"include\": [],\n  \"instructions\": \"<string>\",\n  \"max_output_tokens\": 123,\n  \"max_tool_calls\": 123,\n  \"metadata\": {},\n  \"parallel_tool_calls\": true,\n  \"previous_response_id\": \"<string>\",\n  \"prompt\": {\n    \"id\": \"<string>\",\n    \"variables\": {},\n    \"version\": \"<string>\"\n  },\n  \"reasoning\": {},\n  \"service_tier\": \"auto\",\n  \"store\": true,\n  \"stream\": true,\n  \"temperature\": 1,\n  \"text\": {\n    \"format\": {\n      \"type\": \"<string>\"\n    }\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"name\": \"<string>\",\n      \"type\": \"<string>\",\n      \"parameters\": {},\n      \"strict\": true,\n      \"defer_loading\": true,\n      \"description\": \"<string>\"\n    }\n  ],\n  \"top_logprobs\": 1,\n  \"top_p\": 0.5,\n  \"truncation\": \"disabled\",\n  \"user\": \"<string>\",\n  \"prompt_cache_key\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "created_at": 123,
  "model": "<string>",
  "output": [
    {
      "id": "<string>",
      "content": [
        {
          "annotations": [
            {
              "file_id": "<string>",
              "filename": "<string>",
              "index": 123,
              "type": "<string>"
            }
          ],
          "text": "<string>",
          "type": "<string>",
          "logprobs": [
            {
              "token": "<string>",
              "bytes": [
                123
              ],
              "logprob": 123,
              "top_logprobs": [
                {
                  "token": "<string>",
                  "bytes": [
                    123
                  ],
                  "logprob": 123
                }
              ]
            }
          ]
        }
      ],
      "role": "<string>",
      "type": "<string>"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": 123,
  "tools": [
    {
      "name": "<string>",
      "type": "<string>",
      "parameters": {},
      "strict": true,
      "defer_loading": true,
      "description": "<string>"
    }
  ],
  "top_p": 123,
  "background": true,
  "max_output_tokens": 123,
  "error": {
    "message": "<string>"
  },
  "incomplete_details": {},
  "instructions": "<string>",
  "metadata": {},
  "object": "response",
  "max_tool_calls": 123,
  "previous_response_id": "<string>",
  "prompt": {
    "id": "<string>",
    "variables": {},
    "version": "<string>"
  },
  "reasoning": {},
  "text": {
    "format": {
      "type": "<string>"
    }
  },
  "top_logprobs": 123,
  "usage": {
    "input_tokens": 123,
    "input_tokens_details": {
      "cached_tokens": 123,
      "input_tokens_per_turn": [
        123
      ],
      "cached_tokens_per_turn": [
        123
      ]
    },
    "output_tokens": 123,
    "output_tokens_details": {
      "reasoning_tokens": 0,
      "tool_output_tokens": 0,
      "output_tokens_per_turn": [
        123
      ],
      "tool_output_tokens_per_turn": [
        123
      ]
    },
    "total_tokens": 123
  },
  "user": "<string>"
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Query Parameters

ai_project_id

string | null

current project ID

Body

application/json

input

required

Text, image, or file inputs to the model, used to generate a response.

model

string

required

The model used for the chat completion.

background

boolean | null

Whether to run the model response in the background.

Example:

false

include

enum<string>[] | null

Specify additional output data to include in the model response.

Available options:

code_interpreter_call.outputs,

computer_call_output.output.image_url,

file_search_call.results,

message.input_image.image_url,

message.output_text.logprobs,

reasoning.encrypted_content

instructions

string | null

A system (or developer) message inserted into the model's context.

max_output_tokens

integer | null

An upper bound for the number of tokens that can be generated for a response, including visible output tokens and reasoning tokens.

max_tool_calls

integer | null

The maximum number of total calls to built-in tools that can be processed in a response. This maximum number applies across all built-in tool calls, not per individual tool. Any further attempts to call a tool by the model will be ignored.

metadata

Metadata · object | null

Set of 16 key-value pairs that can be attached to an object.

Show child attributes

parallel_tool_calls

boolean | null

Whether to allow the model to run tool calls in parallel.

previous_response_id

string | null

The unique ID of the previous response to the model.

prompt

ResponsePrompt · object | null

Reference to a prompt template and its variables.

Show child attributes

reasoning

Reasoning · object | null

Configuration options for reasoning models.

Show child attributes

service_tier

enum<string>

default:auto

The service tier to use for the request.

Available options:

auto,

default,

over-limit,

flex,

no-limit

Examples:

"auto"

"flex"

store

boolean | null

Whether to store the generated model response for later retrieval via API.

stream

boolean | null

If set to true, the model response data will be streamed to the client as it is generated using server-sent events.

temperature

number | null

default:1

What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.

We generally recommend altering this or top_p but not both.

Required range: 0 <= x <= 2

text

ResponseTextConfig · object | null

Configuration options for a text response from the model.

Show child attributes

tool_choice

default:auto

How the model should select which tool (or tools) to use when generating a response.

Available options:

none,

auto,

required

tools

An array of tools the model may call while generating a response.

Defines a function in your own code the model can choose to call.

Learn more about function calling.

Show child attributes

top_logprobs

integer | null

A non-negative integer specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.

Required range: x >= 0

top_p

number | null

An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.

We generally recommend altering this or temperature but not both.

Required range: 0 <= x <= 1

truncation

enum<string>

default:disabled

The truncation strategy to use for the model response.

Available options:

auto,

disabled

user

string | null

A unique identifier representing your end-user, which can help us to monitor and detect abuse. Learn more.

prompt_cache_key

string | null

Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field.

Response

Successful Response

string

required

created_at

integer

required

model

string

required

output

required

An output message from the model.

Show child attributes

parallel_tool_calls

boolean

required

temperature

number

required

tool_choice

required

Available options:

none,

auto,

required

tools

required

Defines a function in your own code the model can choose to call.

Learn more about function calling.

Show child attributes

top_p

number

required

background

boolean

required

max_output_tokens

integer

required

service_tier

enum<string>

required

Represents the service tier for requests.

Attributes: Auto: Automatically choose the best available tier for the request (Default or OverLimit). Analyze response to determine which tier was used. Default: Return 429 errors on hitting the rate limit, do not exceed to the OverLimit tier. OverLimit: Indicate that the request was over the user limit. This tier cannot be set by user in the request, but us used in a response for tier=Auto. Flex: Do not consume rate-limit credits, but run with lower priority. May still result in 429 errors in case of if there is no resources to process.

Available options:

auto,

default,

over-limit,

flex,

no-limit

status

enum<string>

required

Available options:

completed,

failed,

in_progress,

cancelled,

queued,

incomplete

truncation

enum<string>

required

Available options:

auto,

disabled

error

ResponseError · object | null

An error object returned when the model fails to generate a Response.

Show child attributes

incomplete_details

IncompleteDetails · object | null

Details about why the response is incomplete.

Show child attributes

instructions

string | null

metadata

Metadata · object | null

Show child attributes

object

string

default:response

Allowed value: "response"

max_tool_calls

integer | null

previous_response_id

string | null

prompt

ResponsePrompt · object | null

Reference to a prompt template and its variables. Learn more.

Show child attributes

reasoning

Reasoning · object | null

gpt-5 and o-series models only

Configuration options for reasoning models.

Show child attributes

text

ResponseTextConfig · object | null

Configuration options for a text response from the model.

Can be plain text or structured JSON data. Learn more:

Show child attributes

top_logprobs

integer | null

usage

ResponseUsage · object | null

Show child attributes

user

string | null

Rerank documents Generate

⌘I