Create chat completion

POST

chat

completions

curl --request POST \
  --url https://inference.api.nscale.com/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json; charset=utf-8' \
  --data '{
  "frequency_penalty": 0,
  "logit_bias": null,
  "logprobs": false,
  "max_completion_tokens": 100,
  "max_tokens": 100,
  "messages": [
    {
      "content": "Hello, how are you?",
      "role": "user"
    }
  ],
  "model": "meta-llama/Llama-3.1-8B-Instruct",
  "n": 1,
  "presence_penalty": 0,
  "stop": "\n",
  "stream": false,
  "stream_options": null,
  "temperature": 1,
  "top_logprobs": 0,
  "top_p": 1
}'

{
  "id": "<string>",
  "object": "<string>",
  "created": 123,
  "model": "<string>",
  "choices": [
    {
      "index": 123,
      "message": {
        "role": "<string>",
        "content": "<string>",
        "reasoning_content": "<string>",
        "tool_calls": [
          "<any>"
        ]
      },
      "logprobs": {
        "content": [
          {
            "token": "<string>",
            "logprob": 123,
            "bytes": [
              123
            ],
            "top_logprobs": [
              {
                "token": "<string>",
                "logprob": 123,
                "bytes": [
                  123
                ]
              }
            ]
          }
        ]
      },
      "finish_reason": "<string>",
      "stop_reason": "<string>"
    }
  ],
  "usage": {
    "prompt_tokens": 123,
    "total_tokens": 123,
    "completion_tokens": 123,
    "prompt_tokens_details": "<any>"
  },
  "prompt_logprobs": "<string>"
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json; charset=utf-8

Response

200

application/json; charset=utf-8

The response is of type object.

curl --request POST \
  --url https://inference.api.nscale.com/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json; charset=utf-8' \
  --data '{
  "frequency_penalty": 0,
  "logit_bias": null,
  "logprobs": false,
  "max_completion_tokens": 100,
  "max_tokens": 100,
  "messages": [
    {
      "content": "Hello, how are you?",
      "role": "user"
    }
  ],
  "model": "meta-llama/Llama-3.1-8B-Instruct",
  "n": 1,
  "presence_penalty": 0,
  "stop": "\n",
  "stream": false,
  "stream_options": null,
  "temperature": 1,
  "top_logprobs": 0,
  "top_p": 1
}'

{
  "id": "<string>",
  "object": "<string>",
  "created": 123,
  "model": "<string>",
  "choices": [
    {
      "index": 123,
      "message": {
        "role": "<string>",
        "content": "<string>",
        "reasoning_content": "<string>",
        "tool_calls": [
          "<any>"
        ]
      },
      "logprobs": {
        "content": [
          {
            "token": "<string>",
            "logprob": 123,
            "bytes": [
              123
            ],
            "top_logprobs": [
              {
                "token": "<string>",
                "logprob": 123,
                "bytes": [
                  123
                ]
              }
            ]
          }
        ]
      },
      "finish_reason": "<string>",
      "stop_reason": "<string>"
    }
  ],
  "usage": {
    "prompt_tokens": 123,
    "total_tokens": 123,
    "completion_tokens": 123,
    "prompt_tokens_details": "<any>"
  },
  "prompt_logprobs": "<string>"
}

POST

chat

completions

curl --request POST \
  --url https://inference.api.nscale.com/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json; charset=utf-8' \
  --data '{
  "frequency_penalty": 0,
  "logit_bias": null,
  "logprobs": false,
  "max_completion_tokens": 100,
  "max_tokens": 100,
  "messages": [
    {
      "content": "Hello, how are you?",
      "role": "user"
    }
  ],
  "model": "meta-llama/Llama-3.1-8B-Instruct",
  "n": 1,
  "presence_penalty": 0,
  "stop": "\n",
  "stream": false,
  "stream_options": null,
  "temperature": 1,
  "top_logprobs": 0,
  "top_p": 1
}'

{
  "id": "<string>",
  "object": "<string>",
  "created": 123,
  "model": "<string>",
  "choices": [
    {
      "index": 123,
      "message": {
        "role": "<string>",
        "content": "<string>",
        "reasoning_content": "<string>",
        "tool_calls": [
          "<any>"
        ]
      },
      "logprobs": {
        "content": [
          {
            "token": "<string>",
            "logprob": 123,
            "bytes": [
              123
            ],
            "top_logprobs": [
              {
                "token": "<string>",
                "logprob": 123,
                "bytes": [
                  123
                ]
              }
            ]
          }
        ]
      },
      "finish_reason": "<string>",
      "stop_reason": "<string>"
    }
  ],
  "usage": {
    "prompt_tokens": 123,
    "total_tokens": 123,
    "completion_tokens": 123,
    "prompt_tokens_details": "<any>"
  },
  "prompt_logprobs": "<string>"
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json; charset=utf-8

Response

200

application/json; charset=utf-8

The response is of type object.

curl --request POST \
  --url https://inference.api.nscale.com/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json; charset=utf-8' \
  --data '{
  "frequency_penalty": 0,
  "logit_bias": null,
  "logprobs": false,
  "max_completion_tokens": 100,
  "max_tokens": 100,
  "messages": [
    {
      "content": "Hello, how are you?",
      "role": "user"
    }
  ],
  "model": "meta-llama/Llama-3.1-8B-Instruct",
  "n": 1,
  "presence_penalty": 0,
  "stop": "\n",
  "stream": false,
  "stream_options": null,
  "temperature": 1,
  "top_logprobs": 0,
  "top_p": 1
}'

{
  "id": "<string>",
  "object": "<string>",
  "created": 123,
  "model": "<string>",
  "choices": [
    {
      "index": 123,
      "message": {
        "role": "<string>",
        "content": "<string>",
        "reasoning_content": "<string>",
        "tool_calls": [
          "<any>"
        ]
      },
      "logprobs": {
        "content": [
          {
            "token": "<string>",
            "logprob": 123,
            "bytes": [
              123
            ],
            "top_logprobs": [
              {
                "token": "<string>",
                "logprob": 123,
                "bytes": [
                  123
                ]
              }
            ]
          }
        ]
      },
      "finish_reason": "<string>",
      "stop_reason": "<string>"
    }
  ],
  "usage": {
    "prompt_tokens": 123,
    "total_tokens": 123,
    "completion_tokens": 123,
    "prompt_tokens_details": "<any>"
  },
  "prompt_logprobs": "<string>"
}

Create chat completion

Authorizations

Body

Response

Inference

Fine Tuning

Datasets

Create chat completion

Authorizations

Body

Response