Gemini 2.5 Flash TTS Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

POST

gemini-2.5-flash-tts

Gemini 2.5 Flash TTS Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

Convert text to speech based on the Vertex AI generateContent API. The request body format is fully consistent with the official Vertex AI API. Both synchronous (single request, single response) and streaming (single request, streaming response) modes are supported. The output is in LINEAR16 PCM format (24kHz, mono, 16-bit signed little-endian) and does not include a WAV header.

Request Headers

string

required

Enum value: application/json

string

required

Bearer authentication format: Bearer {{API Key}}.

Request Body

object

required

Hide properties

string

default:"user"

required

Role, fixed as userAllowed value: user

object

required

Hide properties

string

required

The text content to synthesize into speech. The Vertex AI API combines the prompt and text into a single field in the format ’: ’, for example ‘Say the following in a curious way: OK, so… tell me about this AI thing.’. The total size can be up to 8000 bytes, and audio exceeding 655 seconds will be truncated. Inline marker tags are supported: [sigh], [laughing], [uhm], [sarcasm], [robotic], [shouting], [whispering], [extremely fast], [short pause], [medium pause], [long pause]Length limit: 0 - 8000

object

required

Hide properties

number

default:2

Temperature parameter, controlling the randomness and creativity of speech generation. Higher values produce more creative and diverse results, while lower values are more predictable and focused. Valid range: (0.0, 2.0], recommended value: 2.0Value range: [0, 2]

object

required

Hide properties

object

Single-speaker voice configuration. Choose either this or multi_speaker_voice_config

Hide properties

object

Hide properties

string

Prebuilt voice name (case-insensitive). 30 available voices (both male and female voices are available)Allowed values: Achernar, Achird, Algenib, Algieba, Alnilam, Aoede, Autonoe, Callirrhoe, Charon, Despina, Enceladus, Erinome, Fenrir, Gacrux, Iapetus, Kore, Laomedeia, Leda, Orus, Pulcherrima, Puck, Rasalgethi, Sadachbia, Sadaltager, Schedar, Sulafat, Umbriel, Vindemiatrix, Zephyr, Zubenelgenubi

string

required

Language code (BCP-47 format, case-insensitive). GA languages: ar-EG, bn-BD, nl-NL, en-IN, en-US, fr-FR, de-DE, hi-IN, id-ID, it-IT, ja-JP, ko-KR, mr-IN, pl-PL, pt-BR, ro-RO, ru-RU, es-ES, ta-IN, te-IN, th-TH, tr-TR, uk-UA, vi-VN. Preview languages include cmn-CN (Mandarin Chinese) and 63 othersAllowed values: af-ZA, am-ET, ar-001, ar-EG, az-AZ, be-BY, bg-BG, bn-BD, ca-ES, ceb-PH, cmn-CN, cmn-TW, cs-CZ, da-DK, de-DE, el-GR, en-AU, en-GB, en-IN, en-US, es-419, es-ES, es-MX, et-EE, eu-ES, fa-IR, fi-FI, fil-PH, fr-CA, fr-FR, gl-ES, gu-IN, he-IL, hi-IN, hr-HR, ht-HT, hu-HU, hy-AM, id-ID, is-IS, it-IT, ja-JP, jv-JV, ka-GE, kn-IN, ko-KR, kok-IN, la-VA, lb-LU, lo-LA, lt-LT, lv-LV, mai-IN, mg-MG, mk-MK, ml-IN, mn-MN, mr-IN, ms-MY, my-MM, nb-NO, ne-NP, nl-NL, nn-NO, or-IN, pa-IN, pl-PL, ps-AF, pt-BR, pt-PT, ro-RO, ru-RU, sd-IN, si-LK, sk-SK, sl-SI, sq-AL, sr-RS, sv-SE, sw-KE, ta-IN, te-IN, th-TH, tr-TR, uk-UA, ur-PK, vi-VN

object

Multi-speaker voice configuration. Choose either this or voice_config. Note: gemini-2.5-flash-lite-preview-tts does not support multi-speaker synthesis

Hide properties

object[]

List of speaker voice configurations

Hide properties

string

required

Speaker alias, which must consist only of alphanumeric characters and contain no spaces. It must match the speaker identifier in contents.parts.text

object

required

Hide properties

object

Hide properties

string

Response Information

string

Base64-encoded audio content. The format is LINEAR16 PCM (24kHz, mono, 16-bit signed little-endian) and does not include a WAV header. The client can use ffmpeg to convert it: ffmpeg -f s16le -ar 24k -ac 1 -i input.raw output.wav

object

Hide properties

integer

Total number of tokens (promptTokenCount + candidatesTokenCount)

integer

Number of tokens consumed by the input text

integer

Number of tokens consumed by the output audio (approximately 25 tokens per second of audio)

Fish Audio Audio Cloning

MiniMax Music

API Basics

Large Language Models

Images

Video

Audio

Gemini 2.5 Flash TTS Text-to-Speech

Request Headers

Request Body

Response Information

​Request Headers

​Request Body

​Response Information

Request Headers

Request Body

Response Information