Gemini 2.5 Flash TTS Synthèse vocale

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

POST

gemini-2.5-flash-tts

Gemini 2.5 Flash TTS Synthèse vocale

curl --request POST \
  --url https://api.highwayapi.ai/v3/gemini-2.5-flash-tts \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "contents": {
    "role": "<string>",
    "parts": {
      "text": "<string>"
    }
  },
  "generation_config": {
    "temperature": 123,
    "speech_config": {
      "voice_config": {
        "prebuilt_voice_config": {
          "voice_name": "<string>"
        }
      },
      "language_code": "<string>",
      "multi_speaker_voice_config": {
        "speaker_voice_configs": [
          {
            "speaker": "<string>",
            "voice_config": {
              "prebuilt_voice_config": {
                "voice_name": "<string>"
              }
            }
          }
        ]
      }
    }
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

payload = {
    "contents": {
        "role": "<string>",
        "parts": { "text": "<string>" }
    },
    "generation_config": {
        "temperature": 123,
        "speech_config": {
            "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } },
            "language_code": "<string>",
            "multi_speaker_voice_config": { "speaker_voice_configs": [
                    {
                        "speaker": "<string>",
                        "voice_config": { "prebuilt_voice_config": { "voice_name": "<string>" } }
                    }
                ] }
        }
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    contents: {role: '<string>', parts: {text: '<string>'}},
    generation_config: {
      temperature: 123,
      speech_config: {
        voice_config: {prebuilt_voice_config: {voice_name: '<string>'}},
        language_code: '<string>',
        multi_speaker_voice_config: {
          speaker_voice_configs: [
            {
              speaker: '<string>',
              voice_config: {prebuilt_voice_config: {voice_name: '<string>'}}
            }
          ]
        }
      }
    }
  })
};

fetch('https://api.highwayapi.ai/v3/gemini-2.5-flash-tts', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'contents' => [
        'role' => '<string>',
        'parts' => [
                'text' => '<string>'
        ]
    ],
    'generation_config' => [
        'temperature' => 123,
        'speech_config' => [
                'voice_config' => [
                                'prebuilt_voice_config' => [
                                                                'voice_name' => '<string>'
                                ]
                ],
                'language_code' => '<string>',
                'multi_speaker_voice_config' => [
                                'speaker_voice_configs' => [
                                                                [
                                                                                                                                'speaker' => '<string>',
                                                                                                                                'voice_config' => [
                                                                                                                                                                                                                                                                'prebuilt_voice_config' => [
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                'voice_name' => '<string>'
                                                                                                                                                                                                                                                                ]
                                                                                                                                ]
                                                                ]
                                ]
                ]
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/gemini-2.5-flash-tts"

	payload := strings.NewReader("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/gemini-2.5-flash-tts")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"contents\": {\n    \"role\": \"<string>\",\n    \"parts\": {\n      \"text\": \"<string>\"\n    }\n  },\n  \"generation_config\": {\n    \"temperature\": 123,\n    \"speech_config\": {\n      \"voice_config\": {\n        \"prebuilt_voice_config\": {\n          \"voice_name\": \"<string>\"\n        }\n      },\n      \"language_code\": \"<string>\",\n      \"multi_speaker_voice_config\": {\n        \"speaker_voice_configs\": [\n          {\n            \"speaker\": \"<string>\",\n            \"voice_config\": {\n              \"prebuilt_voice_config\": {\n                \"voice_name\": \"<string>\"\n              }\n            }\n          }\n        ]\n      }\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audioContent": "<string>",
  "usageMetadata": {
    "totalTokenCount": 123,
    "promptTokenCount": 123,
    "candidatesTokenCount": 123
  }
}

Convertit du texte en parole via l’interface generateContent de Vertex AI. Le format du corps de requête est entièrement cohérent avec l’API officielle Vertex AI. Prend en charge deux modes : synchrone (une requête, une réponse) et streaming (une requête, réponse en flux). La sortie est au format LINEAR16 PCM (24 kHz, mono, 16-bit signed little-endian), sans en-tête WAV.

En-têtes de requête

string

requis

Valeur d’énumération : application/json

string

requis

Format d’authentification Bearer : Bearer {{API Key}}.

Corps de requête

object

requis

Masquer properties

string

défaut:"user"

requis

Rôle, fixé à userValeur possible : user

object

requis

Masquer properties

string

requis

Contenu textuel à synthétiser en parole. L’API Vertex AI combine l’invite et le texte dans un seul champ, au format ’: ’, par exemple ‘Say the following in a curious way: OK, so… tell me about this AI thing.’. Taille totale maximale : 8000 octets ; l’audio dépassant 655 secondes sera tronqué. Prend en charge les balises de marquage en ligne : [sigh], [laughing], [uhm], [sarcasm], [robotic], [shouting], [whispering], [extremely fast], [short pause], [medium pause], [long pause]Limite de longueur : 0 - 8000

object

requis

Masquer properties

number

défaut:2

Paramètre de température, qui contrôle l’aléatoire et la créativité de la génération vocale. Une valeur plus élevée produit plus de créativité et de diversité ; une valeur plus basse est plus prévisible et focalisée. Plage valide : (0.0, 2.0], valeur recommandée : 2.0Plage de valeurs : [0, 2]

object

requis

Masquer properties

object

Configuration vocale pour une seule personne. À choisir exclusivement avec multi_speaker_voice_config

Masquer properties

object

Masquer properties

string

Nom de la voix prédéfinie (insensible à la casse). 30 voix disponibles au choix (voix masculines et féminines)Valeurs possibles : Achernar, Achird, Algenib, Algieba, Alnilam, Aoede, Autonoe, Callirrhoe, Charon, Despina, Enceladus, Erinome, Fenrir, Gacrux, Iapetus, Kore, Laomedeia, Leda, Orus, Pulcherrima, Puck, Rasalgethi, Sadachbia, Sadaltager, Schedar, Sulafat, Umbriel, Vindemiatrix, Zephyr, Zubenelgenubi

string

requis

Code de langue (format BCP-47, insensible à la casse). Langues GA : ar-EG, bn-BD, nl-NL, en-IN, en-US, fr-FR, de-DE, hi-IN, id-ID, it-IT, ja-JP, ko-KR, mr-IN, pl-PL, pt-BR, ro-RO, ru-RU, es-ES, ta-IN, te-IN, th-TH, tr-TR, uk-UA, vi-VN. Les langues Preview incluent cmn-CN (mandarin chinois), entre autres, pour un total de 63 languesValeurs possibles : af-ZA, am-ET, ar-001, ar-EG, az-AZ, be-BY, bg-BG, bn-BD, ca-ES, ceb-PH, cmn-CN, cmn-TW, cs-CZ, da-DK, de-DE, el-GR, en-AU, en-GB, en-IN, en-US, es-419, es-ES, es-MX, et-EE, eu-ES, fa-IR, fi-FI, fil-PH, fr-CA, fr-FR, gl-ES, gu-IN, he-IL, hi-IN, hr-HR, ht-HT, hu-HU, hy-AM, id-ID, is-IS, it-IT, ja-JP, jv-JV, ka-GE, kn-IN, ko-KR, kok-IN, la-VA, lb-LU, lo-LA, lt-LT, lv-LV, mai-IN, mg-MG, mk-MK, ml-IN, mn-MN, mr-IN, ms-MY, my-MM, nb-NO, ne-NP, nl-NL, nn-NO, or-IN, pa-IN, pl-PL, ps-AF, pt-BR, pt-PT, ro-RO, ru-RU, sd-IN, si-LK, sk-SK, sl-SI, sq-AL, sr-RS, sv-SE, sw-KE, ta-IN, te-IN, th-TH, tr-TR, uk-UA, ur-PK, vi-VN

object

Configuration vocale multi-locuteurs. À choisir exclusivement avec voice_config. Remarque : gemini-2.5-flash-lite-preview-tts ne prend pas en charge la synthèse multi-locuteurs

Masquer properties

object[]

Liste des configurations vocales des locuteurs

Masquer properties

string

requis

Alias du locuteur, qui doit être composé uniquement de caractères alphanumériques et ne contenir aucun espace. Il doit correspondre à l’identifiant du locuteur dans contents.parts.text

object

requis

Masquer properties

object

Masquer properties

string

Informations de réponse

string

Contenu audio encodé en Base64. Format : LINEAR16 PCM (24 kHz, mono, 16-bit signed little-endian), sans en-tête WAV. Le client peut utiliser ffmpeg pour la conversion : ffmpeg -f s16le -ar 24k -ac 1 -i input.raw output.wav

object

Masquer properties

integer

Nombre total de tokens (promptTokenCount + candidatesTokenCount)

integer

Nombre de tokens consommés par le texte d’entrée

integer

Nombre de tokens consommés par l’audio de sortie (environ 25 tokens par seconde d’audio)

Clonage audio Fish Audio

MiniMax Music

Bases de l'API

Grands modèles de langage

Images

Vidéo

Audio

Gemini 2.5 Flash TTS Synthèse vocale

En-têtes de requête

Corps de requête

Informations de réponse

​En-têtes de requête

​Corps de requête

​Informations de réponse

En-têtes de requête

Corps de requête

Informations de réponse