Gemini 2.5 Flash TTS Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

POST

gemini-2.5-flash-tts

Gemini 2.5 Flash TTS Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

Konvertiert Text über die Vertex AI generateContent-Schnittstelle in Sprache. Das Format des Request-Bodys ist vollständig identisch mit der offiziellen Vertex AI API. Unterstützt sowohl den synchronen Modus (eine Anfrage, eine Antwort) als auch den Streaming-Modus (eine Anfrage, Streaming-Antwort). Die Ausgabe erfolgt im LINEAR16-PCM-Format (24kHz, Mono, 16-bit signed little-endian) und enthält keinen WAV-Header.

Request-Header

string

erforderlich

Enumerationswert: application/json

string

erforderlich

Format der Bearer-Authentifizierung: Bearer {{API Key}}.

Request-Body

object

erforderlich

Ausblenden properties

string

Standard:"user"

erforderlich

Rolle, fest auf user gesetztOptionale Werte: user

object

erforderlich

Ausblenden properties

string

erforderlich

Textinhalt, der in Sprache synthetisiert werden soll. Die Vertex AI API führt Prompt und Text in einem Feld zusammen, im Format ’: ’, z. B. ‘Say the following in a curious way: OK, so… tell me about this AI thing.’. Die Gesamtgröße beträgt maximal 8000 Byte; Audio, das 655 Sekunden überschreitet, wird abgeschnitten. Inline-Markup-Tags werden unterstützt: [sigh], [laughing], [uhm], [sarcasm], [robotic], [shouting], [whispering], [extremely fast], [short pause], [medium pause], [long pause]Längenbegrenzung: 0 - 8000

object

erforderlich

Ausblenden properties

number

Standard:2

Temperaturparameter, steuert die Zufälligkeit und Kreativität der Sprachgenerierung. Höhere Werte erzeugen kreativere und vielfältigere Ergebnisse, niedrigere Werte sind vorhersehbarer und fokussierter. Gültiger Bereich (0.0, 2.0], empfohlener Wert ist 2.0Wertebereich: [0, 2]

object

erforderlich

Ausblenden properties

object

Sprachkonfiguration für eine einzelne Person. Entweder voice_config oder multi_speaker_voice_config

Ausblenden properties

object

Ausblenden properties

string

Name der vorgefertigten Stimme (Groß-/Kleinschreibung wird nicht beachtet). 30 optionale Stimmen verfügbar (sowohl weibliche als auch männliche Stimmen)Optionale Werte: Achernar, Achird, Algenib, Algieba, Alnilam, Aoede, Autonoe, Callirrhoe, Charon, Despina, Enceladus, Erinome, Fenrir, Gacrux, Iapetus, Kore, Laomedeia, Leda, Orus, Pulcherrima, Puck, Rasalgethi, Sadachbia, Sadaltager, Schedar, Sulafat, Umbriel, Vindemiatrix, Zephyr, Zubenelgenubi

string

erforderlich

Sprachcode (BCP-47-Format, Groß-/Kleinschreibung wird nicht beachtet). GA-Sprachen: ar-EG, bn-BD, nl-NL, en-IN, en-US, fr-FR, de-DE, hi-IN, id-ID, it-IT, ja-JP, ko-KR, mr-IN, pl-PL, pt-BR, ro-RO, ru-RU, es-ES, ta-IN, te-IN, th-TH, tr-TR, uk-UA, vi-VN. Preview-Sprachen umfassen unter anderem cmn-CN (Chinesisch, Mandarin) und insgesamt 63 SprachenOptionale Werte: af-ZA, am-ET, ar-001, ar-EG, az-AZ, be-BY, bg-BG, bn-BD, ca-ES, ceb-PH, cmn-CN, cmn-TW, cs-CZ, da-DK, de-DE, el-GR, en-AU, en-GB, en-IN, en-US, es-419, es-ES, es-MX, et-EE, eu-ES, fa-IR, fi-FI, fil-PH, fr-CA, fr-FR, gl-ES, gu-IN, he-IL, hi-IN, hr-HR, ht-HT, hu-HU, hy-AM, id-ID, is-IS, it-IT, ja-JP, jv-JV, ka-GE, kn-IN, ko-KR, kok-IN, la-VA, lb-LU, lo-LA, lt-LT, lv-LV, mai-IN, mg-MG, mk-MK, ml-IN, mn-MN, mr-IN, ms-MY, my-MM, nb-NO, ne-NP, nl-NL, nn-NO, or-IN, pa-IN, pl-PL, ps-AF, pt-BR, pt-PT, ro-RO, ru-RU, sd-IN, si-LK, sk-SK, sl-SI, sq-AL, sr-RS, sv-SE, sw-KE, ta-IN, te-IN, th-TH, tr-TR, uk-UA, ur-PK, vi-VN

object

Sprachkonfiguration für mehrere Sprecher. Entweder voice_config oder multi_speaker_voice_config. Hinweis: gemini-2.5-flash-lite-preview-tts unterstützt keine Mehrsprecher-Synthese

Ausblenden properties

object[]

Liste der Sprecher-Sprachkonfigurationen

Ausblenden properties

string

erforderlich

Sprecher-Alias, darf nur aus alphanumerischen Zeichen bestehen und keine Leerzeichen enthalten. Muss mit der Sprecherkennung in contents.parts.text übereinstimmen

object

erforderlich

Ausblenden properties

object

Ausblenden properties

string

Antwortinformationen

string

Base64-codierter Audioinhalt. Format: LINEAR16 PCM (24kHz, Mono, 16-bit signed little-endian), ohne WAV-Header. Clients können ffmpeg zur Konvertierung verwenden: ffmpeg -f s16le -ar 24k -ac 1 -i input.raw output.wav

object

Ausblenden properties

integer

Gesamtanzahl der Tokens (promptTokenCount + candidatesTokenCount)

integer

Anzahl der Tokens, die vom Eingabetext verbraucht wurden

integer

Anzahl der Tokens, die vom Ausgabeaudio verbraucht wurden (ca. 25 Tokens pro Sekunde Audio)

Fish Audio Audio-Klonen

Zurück

MiniMax Music

Weiter

API-Grundlagen

Große Sprachmodelle

Bilder

Video

Audio

Gemini 2.5 Flash TTS Text-to-Speech

Request-Header

Request-Body

Antwortinformationen

​Request-Header

​Request-Body

​Antwortinformationen

Request-Header

Request-Body

Antwortinformationen