> ## Documentation Index
> Fetch the complete documentation index at: https://docs.tokenfactory.nebius.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Create embeddings

> Creates a model response for the given text.



## OpenAPI

````yaml https://api.tokenfactory.nebius.com/openapi.json post /v1/embeddings
openapi: 3.1.0
info:
  title: Nebius OpenAI-compatible inference API
  version: 20260506-297d05704
servers:
  - url: https://api.tokenfactory.nebius.com
security: []
paths:
  /v1/embeddings:
    post:
      tags:
        - inference
      summary: Create embeddings
      description: Creates a model response for the given text.
      operationId: create_embeddings_v1_embeddings_post
      parameters:
        - name: ai_project_id
          in: query
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            description: current project ID
            title: Ai Project Id
          description: current project ID
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingRequest'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'
              example:
                object: list
                data:
                  - object: embedding
                    embedding:
                      - 0.0023064255
                      - -0.009327292
                      - -0.0028842222
                    index: 0
                model: BAAI/bge-en-icl
                usage:
                  prompt_tokens: 8
                  total_tokens: 8
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      security:
        - HTTPBearer: []
components:
  schemas:
    EmbeddingRequest:
      properties:
        model:
          type: string
          title: Model
          description: ID of the model to use.
          examples:
            - BAAI/bge-en-icl
        input:
          anyOf:
            - type: string
            - items:
                type: integer
              type: array
            - items:
                type: string
              type: array
            - items:
                items:
                  type: integer
                type: array
              type: array
          title: Input
          description: Input text to embed, encoded as a string or array of tokens.
          examples:
            - What's a nice vector, Victor?
        encoding_format:
          anyOf:
            - type: string
            - type: 'null'
          title: Encoding Format
          description: >-
            The format to return the embeddings in. Can be either float or
            base64.
          default: float
        user:
          anyOf:
            - type: string
            - type: 'null'
          title: User
          description: A unique identifier representing your end-user.
        service_tier:
          allOf:
            - $ref: '#/components/schemas/ServiceTier'
          description: The service tier to use for the request.
          default: auto
          examples:
            - auto
            - flex
        dimensions:
          anyOf:
            - type: integer
            - type: 'null'
          title: Dimensions
          description: The dimensions to use for the request.
          examples:
            - 4096
            - 8192
      type: object
      required:
        - model
        - input
      title: EmbeddingRequest
    EmbeddingResponse:
      properties:
        object:
          type: string
          title: Object
          description: always 'list'.
        model:
          type: string
          title: Model
          description: The model used for the embedding.
        usage:
          allOf:
            - $ref: '#/components/schemas/Usage'
          description: Token usage stats.
        data:
          items:
            $ref: '#/components/schemas/Embedding'
          type: array
          title: Data
          description: List of Embedding objects
        service_tier:
          allOf:
            - $ref: '#/components/schemas/ServiceTier'
          description: The service tier used for the request.
      type: object
      required:
        - object
        - model
        - usage
        - data
        - service_tier
      title: EmbeddingResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    ServiceTier:
      type: string
      enum:
        - auto
        - default
        - over-limit
        - flex
        - no-limit
      title: ServiceTier
      description: |-
        Represents the service tier for requests.

        Attributes:
            Auto: Automatically choose the best available tier for the request (Default or OverLimit).
            Analyze response to determine which tier was used.
            Default: Return 429 errors on hitting the rate limit, do not exceed to the OverLimit tier.
            OverLimit: Indicate that the request was over the user limit.
                    This tier cannot be set by user in the request, but us used in a response for tier=Auto.
            Flex: Do not consume rate-limit credits, but run with lower priority. May still result in 429 errors
            in case of if there is no resources to process.
    Usage:
      properties:
        completion_tokens:
          type: integer
          title: Completion Tokens
          description: Number of tokens in the generated completion.
        prompt_tokens:
          type: integer
          title: Prompt Tokens
          description: Number of tokens in the prompt.
        total_tokens:
          type: integer
          title: Total Tokens
          description: Total number of tokens used in the request (prompt + completion).
        prompt_tokens_details:
          anyOf:
            - $ref: '#/components/schemas/PromptTokensDetails'
            - type: 'null'
          description: Breakdown of tokens used in the prompt.
      type: object
      required:
        - completion_tokens
        - prompt_tokens
        - total_tokens
      title: Usage
    Embedding:
      properties:
        object:
          type: string
          title: Object
          description: Always 'embedding'.
        embedding:
          anyOf:
            - items:
                type: number
              type: array
            - type: string
          title: Embedding
          description: The embedding vector.
        index:
          type: integer
          title: Index
          description: index of embedding
      type: object
      required:
        - object
        - embedding
        - index
      title: Embedding
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    PromptTokensDetails:
      properties:
        cached_tokens:
          anyOf:
            - type: integer
            - type: 'null'
          title: Cached Tokens
      type: object
      title: PromptTokensDetails
  securitySchemes:
    HTTPBearer:
      type: http
      scheme: bearer

````