> ## Documentation Index
> Fetch the complete documentation index at: https://docs.tokenfactory.nebius.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Create completion

> Creates a model completion for the given input prompt.



## OpenAPI

````yaml https://api.tokenfactory.nebius.com/openapi.json post /v1/completions
openapi: 3.1.0
info:
  title: Nebius OpenAI-compatible inference API
  version: 20260506-297d05704
servers:
  - url: https://api.tokenfactory.nebius.com
security: []
paths:
  /v1/completions:
    post:
      tags:
        - inference
      summary: Create completion
      description: Creates a model completion for the given input prompt.
      operationId: create_completion_v1_completions_post
      parameters:
        - name: ai_project_id
          in: query
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            description: current project ID
            title: Ai Project Id
          description: current project ID
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
              example:
                id: cmpl-bd18c4194f544c189578cfcb273a2f74
                choices:
                  - finish_reason: stop
                    index: 0
                    text: >-
                      Hello! It's nice to meet you. Is there something I can
                      help you with, or would you like to chat?
                created: 1717516032
                model: meta-llama/Llama-3.3-70B-Instruct
                object: text_completion
                usage:
                  completion_tokens: 26
                  prompt_tokens: 13
                  total_tokens: 39
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      security:
        - HTTPBearer: []
components:
  schemas:
    CompletionRequest:
      properties:
        model:
          type: string
          title: Model
          description: ID of the model to use.
          examples:
            - meta-llama/Meta-Llama-3.1-70B-Instruct
        prompt:
          anyOf:
            - type: string
            - items:
                type: string
              type: array
            - items:
                type: integer
              type: array
          title: Prompt
          description: >-
            The prompt(s) to generate completions for, encoded as a string,
            array of strings, array of tokens, or array of token arrays.
          examples:
            - Say this is a test
        stream:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Stream
          description: Enable response streaming.
          default: false
        stream_options:
          anyOf:
            - additionalProperties:
                type: boolean
              type: object
            - type: 'null'
          title: Stream Options
          description: >-
            If set to {"include_usage": True}, usage stats will be sent with the
            last chunk of data
          examples:
            - null
        max_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Max Tokens
          description: Max completion token count
          examples:
            - 100
        temperature:
          anyOf:
            - type: number
            - type: 'null'
          title: Temperature
          description: >-
            What sampling temperature to use, between 0 and 2. Higher values
            like 0.8 will make the output more random, while lower values like
            0.2 will make it more focused and deterministic.
          default: 1
        top_p:
          anyOf:
            - type: number
            - type: 'null'
          title: Top P
          description: >-
            An alternative to sampling with temperature, called nucleus
            sampling, where the model considers the results of the tokens with
            top_p probability mass. So 0.1 means only the tokens comprising the
            top 10% probability mass are considered.
          default: 1
        'n':
          anyOf:
            - type: integer
            - type: 'null'
          title: 'N'
          description: How many completions to generate for each prompt.
          default: 1
        logprobs:
          anyOf:
            - type: integer
            - type: 'null'
          title: Logprobs
          description: >-
            Include the log probabilities on the `logprobs` most likely tokens,
            as well the chosen tokens. So for example, if `logprobs` is 5, the
            API will return a list of the 5 most likely tokens. The API will
            always return the `logprob` of the sampled token, so there may be up
            to `logprobs+1` elements in the response.
          examples:
            - null
        echo:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Echo
          description: Echo back the prompt in addition to the completion.
          default: false
        stop:
          anyOf:
            - type: string
            - items:
                type: string
              type: array
            - type: 'null'
          title: Stop
          description: Up to 4 sequences where the API will stop generating further tokens.
        presence_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Presence Penalty
          description: >-
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on whether they appear in the text so far, increasing the
            model's likelihood to talk about new topics.
          default: 0
        frequency_penalty:
          anyOf:
            - type: number
            - type: 'null'
          title: Frequency Penalty
          description: >-
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on their existing frequency in the text so far, decreasing the
            model's likelihood to repeat the same line verbatim.
          default: 0
        logit_bias:
          anyOf:
            - additionalProperties:
                type: number
              type: object
            - type: 'null'
          title: Logit Bias
          description: >-
            Modify the likelihood of specified tokens appearing in the
            completion. Accepts a json object that maps tokens (specified by
            their token ID in the tokenizer) to an associated bias value from
            -100 to 100. Mathematically, the bias is added to the logits
            generated by the model prior to sampling. The exact effect will vary
            per model, but values between -1 and 1 should decrease or increase
            likelihood of selection; values like -100 or 100 should result in a
            ban or exclusive selection of the relevant token.
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: >-
            A unique identifier representing your end-user, which can help
            OpenAI to monitor and detect abuse.
        extra_body:
          anyOf:
            - type: object
            - type: 'null'
          title: Extra Body
          description: To provide extra parameters.
          examples:
            - null
        service_tier:
          allOf:
            - $ref: '#/components/schemas/ServiceTier'
          description: The service tier to use for the request.
          default: auto
          examples:
            - auto
            - flex
      additionalProperties: true
      type: object
      required:
        - model
        - prompt
      title: CompletionRequest
    CompletionResponse:
      properties:
        id:
          type: string
          title: Id
          description: A unique identifier for the chat completion.
        object:
          type: string
          title: Object
          description: The object type, which is always `text_completion`.
        created:
          type: integer
          title: Created
          description: The Unix timestamp of when the completion was created.
        model:
          type: string
          title: Model
          description: The model used for the chat completion.
        choices:
          items:
            $ref: '#/components/schemas/CompletionChoice'
          type: array
          title: Choices
          description: A list of completion choices.
        usage:
          allOf:
            - $ref: '#/components/schemas/Usage'
          description: Usage statistics for the completion request.
        service_tier:
          allOf:
            - $ref: '#/components/schemas/ServiceTier'
          description: The service tier used for the request.
      type: object
      required:
        - id
        - object
        - created
        - model
        - choices
        - usage
        - service_tier
      title: CompletionResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    ServiceTier:
      type: string
      enum:
        - auto
        - default
        - over-limit
        - flex
        - no-limit
      title: ServiceTier
      description: |-
        Represents the service tier for requests.

        Attributes:
            Auto: Automatically choose the best available tier for the request (Default or OverLimit).
            Analyze response to determine which tier was used.
            Default: Return 429 errors on hitting the rate limit, do not exceed to the OverLimit tier.
            OverLimit: Indicate that the request was over the user limit.
                    This tier cannot be set by user in the request, but us used in a response for tier=Auto.
            Flex: Do not consume rate-limit credits, but run with lower priority. May still result in 429 errors
            in case of if there is no resources to process.
    CompletionChoice:
      properties:
        index:
          type: integer
          title: Index
          description: The index of the choice in the list of choices.
        text:
          type: string
          title: Text
          description: A completion message generated by the model.
        finish_reason:
          allOf:
            - $ref: '#/components/schemas/CompletionFinishReason'
          description: The reason the model stopped generating tokens.
      type: object
      required:
        - index
        - text
        - finish_reason
      title: CompletionChoice
    Usage:
      properties:
        completion_tokens:
          type: integer
          title: Completion Tokens
          description: Number of tokens in the generated completion.
        prompt_tokens:
          type: integer
          title: Prompt Tokens
          description: Number of tokens in the prompt.
        total_tokens:
          type: integer
          title: Total Tokens
          description: Total number of tokens used in the request (prompt + completion).
        prompt_tokens_details:
          anyOf:
            - $ref: '#/components/schemas/PromptTokensDetails'
            - type: 'null'
          description: Breakdown of tokens used in the prompt.
      type: object
      required:
        - completion_tokens
        - prompt_tokens
        - total_tokens
      title: Usage
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    CompletionFinishReason:
      type: string
      enum:
        - stop
        - length
        - content_filter
      title: CompletionFinishReason
    PromptTokensDetails:
      properties:
        cached_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Cached Tokens
      type: object
      title: PromptTokensDetails
  securitySchemes:
    HTTPBearer:
      type: http
      scheme: bearer

````