Gemini 2.5 Flash TTS Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

POST

gemini-2.5-flash-tts

Gemini 2.5 Flash TTS Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

Convierte texto en voz mediante la interfaz generateContent de Vertex AI. El formato del cuerpo de la solicitud es totalmente coherente con la API oficial de Vertex AI. Admite dos modos: síncrono (una solicitud, una respuesta) y streaming (una solicitud, respuesta en streaming). La salida está en formato LINEAR16 PCM (24kHz, mono, 16-bit signed little-endian) y no incluye encabezado WAV.

Encabezados de solicitud

string

requerido

Valores enumerados: application/json

string

requerido

Formato de autenticación Bearer: Bearer {{API Key}}.

Cuerpo de la solicitud

object

requerido

Ocultar properties

string

predeterminado:"user"

requerido

Rol, fijo como userValores opcionales: user

object

requerido

Ocultar properties

string

requerido

Contenido de texto que se sintetizará como voz. La API de Vertex AI combina el prompt y el texto en un solo campo, con el formato ’: ’, por ejemplo: ‘Say the following in a curious way: OK, so… tell me about this AI thing.’. El tamaño total es de hasta 8000 bytes; el audio que supere los 655 segundos se truncará. Admite etiquetas de marcado en línea: [sigh], [laughing], [uhm], [sarcasm], [robotic], [shouting], [whispering], [extremely fast], [short pause], [medium pause], [long pause]Límite de longitud: 0 - 8000

object

requerido

Ocultar properties

number

predeterminado:2

Parámetro de temperatura, controla la aleatoriedad y creatividad de la generación de voz. Cuanto mayor sea el valor, más creativa y diversa será; cuanto menor sea, más predecible y enfocada. Rango válido (0.0, 2.0]; el valor recomendado es 2.0Rango de valores: [0, 2]

object

requerido

Ocultar properties

object

Configuración de voz de una sola persona. Elige una de las dos opciones junto con multi_speaker_voice_config

Ocultar properties

object

Ocultar properties

string

Nombre de voz predefinida (no distingue mayúsculas y minúsculas). 30 voces opcionales disponibles (incluye voces masculinas y femeninas)Valores opcionales: Achernar, Achird, Algenib, Algieba, Alnilam, Aoede, Autonoe, Callirrhoe, Charon, Despina, Enceladus, Erinome, Fenrir, Gacrux, Iapetus, Kore, Laomedeia, Leda, Orus, Pulcherrima, Puck, Rasalgethi, Sadachbia, Sadaltager, Schedar, Sulafat, Umbriel, Vindemiatrix, Zephyr, Zubenelgenubi

string

requerido

Código de idioma (formato BCP-47, no distingue mayúsculas y minúsculas). Idiomas GA: ar-EG, bn-BD, nl-NL, en-IN, en-US, fr-FR, de-DE, hi-IN, id-ID, it-IT, ja-JP, ko-KR, mr-IN, pl-PL, pt-BR, ro-RO, ru-RU, es-ES, ta-IN, te-IN, th-TH, tr-TR, uk-UA, vi-VN. Los idiomas Preview incluyen cmn-CN (chino mandarín) y otros 63Valores opcionales: af-ZA, am-ET, ar-001, ar-EG, az-AZ, be-BY, bg-BG, bn-BD, ca-ES, ceb-PH, cmn-CN, cmn-TW, cs-CZ, da-DK, de-DE, el-GR, en-AU, en-GB, en-IN, en-US, es-419, es-ES, es-MX, et-EE, eu-ES, fa-IR, fi-FI, fil-PH, fr-CA, fr-FR, gl-ES, gu-IN, he-IL, hi-IN, hr-HR, ht-HT, hu-HU, hy-AM, id-ID, is-IS, it-IT, ja-JP, jv-JV, ka-GE, kn-IN, ko-KR, kok-IN, la-VA, lb-LU, lo-LA, lt-LT, lv-LV, mai-IN, mg-MG, mk-MK, ml-IN, mn-MN, mr-IN, ms-MY, my-MM, nb-NO, ne-NP, nl-NL, nn-NO, or-IN, pa-IN, pl-PL, ps-AF, pt-BR, pt-PT, ro-RO, ru-RU, sd-IN, si-LK, sk-SK, sl-SI, sq-AL, sr-RS, sv-SE, sw-KE, ta-IN, te-IN, th-TH, tr-TR, uk-UA, ur-PK, vi-VN

object

Configuración de voz de múltiples personas. Elige una de las dos opciones junto con voice_config. Nota: gemini-2.5-flash-lite-preview-tts no admite síntesis de múltiples personas

Ocultar properties

object[]

Lista de configuraciones de voces de hablantes

Ocultar properties

string

requerido

Alias del hablante; debe constar únicamente de caracteres alfanuméricos y no contener espacios. Debe coincidir con el identificador de hablante en contents.parts.text

object

requerido

Ocultar properties

object

Ocultar properties

string

Información de respuesta

string

Contenido de audio codificado en Base64. El formato es LINEAR16 PCM (24kHz, mono, 16-bit signed little-endian) y no incluye encabezado WAV. El cliente puede convertirlo usando ffmpeg: ffmpeg -f s16le -ar 24k -ac 1 -i input.raw output.wav

object

Ocultar properties

integer

Número total de tokens (promptTokenCount + candidatesTokenCount)

integer

Número de tokens consumidos por el texto de entrada

integer

Número de tokens consumidos por el audio de salida (aproximadamente 25 tokens por segundo de audio)

Clonación de audio de Fish Audio

MiniMax Music

Conceptos básicos de API

Modelos de lenguaje

Imágenes

Vídeo

Audio

Gemini 2.5 Flash TTS Text-to-Speech

Encabezados de solicitud

Cuerpo de la solicitud

Información de respuesta

​Encabezados de solicitud

​Cuerpo de la solicitud

​Información de respuesta

Encabezados de solicitud

Cuerpo de la solicitud

Información de respuesta