Synthèse vocale Fish Audio S2 Pro

curl --request POST \
  --url https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "top_p": 123,
  "format": "<string>",
  "latency": "<string>",
  "prosody": {
    "speed": 123,
    "volume": 123,
    "normalize_loudness": true
  },
  "normalize": true,
  "references": [
    {
      "text": "<string>",
      "audio": "<string>"
    }
  ],
  "mp3_bitrate": 123,
  "sample_rate": 123,
  "temperature": 123,
  "chunk_length": 123,
  "opus_bitrate": 123,
  "reference_id": "<string>",
  "max_new_tokens": 123,
  "min_chunk_length": 123,
  "repetition_penalty": 123,
  "early_stop_threshold": 123,
  "condition_on_previous_chunks": true
}
'

import requests

url = "https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech"

payload = {
    "text": "<string>",
    "top_p": 123,
    "format": "<string>",
    "latency": "<string>",
    "prosody": {
        "speed": 123,
        "volume": 123,
        "normalize_loudness": True
    },
    "normalize": True,
    "references": [
        {
            "text": "<string>",
            "audio": "<string>"
        }
    ],
    "mp3_bitrate": 123,
    "sample_rate": 123,
    "temperature": 123,
    "chunk_length": 123,
    "opus_bitrate": 123,
    "reference_id": "<string>",
    "max_new_tokens": 123,
    "min_chunk_length": 123,
    "repetition_penalty": 123,
    "early_stop_threshold": 123,
    "condition_on_previous_chunks": True
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    top_p: 123,
    format: '<string>',
    latency: '<string>',
    prosody: {speed: 123, volume: 123, normalize_loudness: true},
    normalize: true,
    references: [{text: '<string>', audio: '<string>'}],
    mp3_bitrate: 123,
    sample_rate: 123,
    temperature: 123,
    chunk_length: 123,
    opus_bitrate: 123,
    reference_id: '<string>',
    max_new_tokens: 123,
    min_chunk_length: 123,
    repetition_penalty: 123,
    early_stop_threshold: 123,
    condition_on_previous_chunks: true
  })
};

fetch('https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'top_p' => 123,
    'format' => '<string>',
    'latency' => '<string>',
    'prosody' => [
        'speed' => 123,
        'volume' => 123,
        'normalize_loudness' => true
    ],
    'normalize' => true,
    'references' => [
        [
                'text' => '<string>',
                'audio' => '<string>'
        ]
    ],
    'mp3_bitrate' => 123,
    'sample_rate' => 123,
    'temperature' => 123,
    'chunk_length' => 123,
    'opus_bitrate' => 123,
    'reference_id' => '<string>',
    'max_new_tokens' => 123,
    'min_chunk_length' => 123,
    'repetition_penalty' => 123,
    'early_stop_threshold' => 123,
    'condition_on_previous_chunks' => true
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"top_p\": 123,\n  \"format\": \"<string>\",\n  \"latency\": \"<string>\",\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123,\n    \"normalize_loudness\": true\n  },\n  \"normalize\": true,\n  \"references\": [\n    {\n      \"text\": \"<string>\",\n      \"audio\": \"<string>\"\n    }\n  ],\n  \"mp3_bitrate\": 123,\n  \"sample_rate\": 123,\n  \"temperature\": 123,\n  \"chunk_length\": 123,\n  \"opus_bitrate\": 123,\n  \"reference_id\": \"<string>\",\n  \"max_new_tokens\": 123,\n  \"min_chunk_length\": 123,\n  \"repetition_penalty\": 123,\n  \"early_stop_threshold\": 123,\n  \"condition_on_previous_chunks\": true\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"top_p\": 123,\n  \"format\": \"<string>\",\n  \"latency\": \"<string>\",\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123,\n    \"normalize_loudness\": true\n  },\n  \"normalize\": true,\n  \"references\": [\n    {\n      \"text\": \"<string>\",\n      \"audio\": \"<string>\"\n    }\n  ],\n  \"mp3_bitrate\": 123,\n  \"sample_rate\": 123,\n  \"temperature\": 123,\n  \"chunk_length\": 123,\n  \"opus_bitrate\": 123,\n  \"reference_id\": \"<string>\",\n  \"max_new_tokens\": 123,\n  \"min_chunk_length\": 123,\n  \"repetition_penalty\": 123,\n  \"early_stop_threshold\": 123,\n  \"condition_on_previous_chunks\": true\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"top_p\": 123,\n  \"format\": \"<string>\",\n  \"latency\": \"<string>\",\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123,\n    \"normalize_loudness\": true\n  },\n  \"normalize\": true,\n  \"references\": [\n    {\n      \"text\": \"<string>\",\n      \"audio\": \"<string>\"\n    }\n  ],\n  \"mp3_bitrate\": 123,\n  \"sample_rate\": 123,\n  \"temperature\": 123,\n  \"chunk_length\": 123,\n  \"opus_bitrate\": 123,\n  \"reference_id\": \"<string>\",\n  \"max_new_tokens\": 123,\n  \"min_chunk_length\": 123,\n  \"repetition_penalty\": 123,\n  \"early_stop_threshold\": 123,\n  \"condition_on_previous_chunks\": true\n}"

response = http.request(request)
puts response.read_body

POST

fish-audio-s2-pro-text-to-speech

Synthèse vocale Fish Audio S2 Pro

curl --request POST \
  --url https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "top_p": 123,
  "format": "<string>",
  "latency": "<string>",
  "prosody": {
    "speed": 123,
    "volume": 123,
    "normalize_loudness": true
  },
  "normalize": true,
  "references": [
    {
      "text": "<string>",
      "audio": "<string>"
    }
  ],
  "mp3_bitrate": 123,
  "sample_rate": 123,
  "temperature": 123,
  "chunk_length": 123,
  "opus_bitrate": 123,
  "reference_id": "<string>",
  "max_new_tokens": 123,
  "min_chunk_length": 123,
  "repetition_penalty": 123,
  "early_stop_threshold": 123,
  "condition_on_previous_chunks": true
}
'

import requests

url = "https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech"

payload = {
    "text": "<string>",
    "top_p": 123,
    "format": "<string>",
    "latency": "<string>",
    "prosody": {
        "speed": 123,
        "volume": 123,
        "normalize_loudness": True
    },
    "normalize": True,
    "references": [
        {
            "text": "<string>",
            "audio": "<string>"
        }
    ],
    "mp3_bitrate": 123,
    "sample_rate": 123,
    "temperature": 123,
    "chunk_length": 123,
    "opus_bitrate": 123,
    "reference_id": "<string>",
    "max_new_tokens": 123,
    "min_chunk_length": 123,
    "repetition_penalty": 123,
    "early_stop_threshold": 123,
    "condition_on_previous_chunks": True
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    top_p: 123,
    format: '<string>',
    latency: '<string>',
    prosody: {speed: 123, volume: 123, normalize_loudness: true},
    normalize: true,
    references: [{text: '<string>', audio: '<string>'}],
    mp3_bitrate: 123,
    sample_rate: 123,
    temperature: 123,
    chunk_length: 123,
    opus_bitrate: 123,
    reference_id: '<string>',
    max_new_tokens: 123,
    min_chunk_length: 123,
    repetition_penalty: 123,
    early_stop_threshold: 123,
    condition_on_previous_chunks: true
  })
};

fetch('https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'top_p' => 123,
    'format' => '<string>',
    'latency' => '<string>',
    'prosody' => [
        'speed' => 123,
        'volume' => 123,
        'normalize_loudness' => true
    ],
    'normalize' => true,
    'references' => [
        [
                'text' => '<string>',
                'audio' => '<string>'
        ]
    ],
    'mp3_bitrate' => 123,
    'sample_rate' => 123,
    'temperature' => 123,
    'chunk_length' => 123,
    'opus_bitrate' => 123,
    'reference_id' => '<string>',
    'max_new_tokens' => 123,
    'min_chunk_length' => 123,
    'repetition_penalty' => 123,
    'early_stop_threshold' => 123,
    'condition_on_previous_chunks' => true
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"top_p\": 123,\n  \"format\": \"<string>\",\n  \"latency\": \"<string>\",\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123,\n    \"normalize_loudness\": true\n  },\n  \"normalize\": true,\n  \"references\": [\n    {\n      \"text\": \"<string>\",\n      \"audio\": \"<string>\"\n    }\n  ],\n  \"mp3_bitrate\": 123,\n  \"sample_rate\": 123,\n  \"temperature\": 123,\n  \"chunk_length\": 123,\n  \"opus_bitrate\": 123,\n  \"reference_id\": \"<string>\",\n  \"max_new_tokens\": 123,\n  \"min_chunk_length\": 123,\n  \"repetition_penalty\": 123,\n  \"early_stop_threshold\": 123,\n  \"condition_on_previous_chunks\": true\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"top_p\": 123,\n  \"format\": \"<string>\",\n  \"latency\": \"<string>\",\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123,\n    \"normalize_loudness\": true\n  },\n  \"normalize\": true,\n  \"references\": [\n    {\n      \"text\": \"<string>\",\n      \"audio\": \"<string>\"\n    }\n  ],\n  \"mp3_bitrate\": 123,\n  \"sample_rate\": 123,\n  \"temperature\": 123,\n  \"chunk_length\": 123,\n  \"opus_bitrate\": 123,\n  \"reference_id\": \"<string>\",\n  \"max_new_tokens\": 123,\n  \"min_chunk_length\": 123,\n  \"repetition_penalty\": 123,\n  \"early_stop_threshold\": 123,\n  \"condition_on_previous_chunks\": true\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/fish-audio-s2-pro-text-to-speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"top_p\": 123,\n  \"format\": \"<string>\",\n  \"latency\": \"<string>\",\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123,\n    \"normalize_loudness\": true\n  },\n  \"normalize\": true,\n  \"references\": [\n    {\n      \"text\": \"<string>\",\n      \"audio\": \"<string>\"\n    }\n  ],\n  \"mp3_bitrate\": 123,\n  \"sample_rate\": 123,\n  \"temperature\": 123,\n  \"chunk_length\": 123,\n  \"opus_bitrate\": 123,\n  \"reference_id\": \"<string>\",\n  \"max_new_tokens\": 123,\n  \"min_chunk_length\": 123,\n  \"repetition_penalty\": 123,\n  \"early_stop_threshold\": 123,\n  \"condition_on_previous_chunks\": true\n}"

response = http.request(request)
puts response.read_body

Le modèle de synthèse vocale Fish Audio S2 Pro convertit du texte en parole naturelle et prend en charge les voix de référence, le contrôle de l’échantillonnage, la segmentation, les formats audio et le contrôle de la prosodie.

En-têtes de requête

string

requis

Valeur d’énumération : application/json

string

requis

Format d’authentification Bearer : Bearer {{API Key}}.

Corps de la requête

string

requis

Texte à convertir en parole. Le texte multi-locuteur S2-Pro peut utiliser les balises <|speaker:0|>你好<|speaker:1|>你好呀.

number

Contrôle de la diversité de l’échantillonnage par noyau.Plage de valeurs : [0, 1]

string

défaut:"mp3"

Format audio de sortie.Valeurs possibles : wav, pcm, mp3, opus

string

défaut:"normal"

Niveau de latence.Valeurs possibles : low, normal, balanced

object

Contrôle de la prosodie.

Masquer properties

number

défaut:1

Multiplicateur de vitesse d’élocution.

number

défaut:0

Réglage du volume.

boolean

défaut:true

Indique s’il faut normaliser la sonie de sortie.

boolean

défaut:true

Normalise les textes chinois et anglais.

object[]

Échantillons audio de référence pour le clonage vocal zero-shot.

Masquer properties

string

Texte correspondant à l’audio de référence.

string

Audio de référence, transmis en base64 ou via une URL selon la prise en charge du fournisseur.

integer

défaut:128

Débit MP3, en kbps.Valeurs possibles : 64, 128, 192

integer

Taux d’échantillonnage de sortie en Hz. Si vide, la valeur par défaut du format est utilisée : 48000 Hz pour opus, généralement 44100 Hz pour les autres formats.

number

Contrôle de l’expressivité.Plage de valeurs : [0, 1]

integer

défaut:300

Taille de segmentation du texte.Plage de valeurs : [100, 300]

integer

Débit Opus, en bps ; -1000 signifie automatique.Valeurs possibles : -1000, 24000, 32000, 48000, 64000

string

ID du modèle de voix ; dans les scénarios multi-locuteurs, vous pouvez transmettre un tableau correspondant à l’index speaker.

integer

défaut:1024

Nombre maximal de tokens audio par segment.

integer

défaut:50

Nombre minimal de caractères avant segmentation.Plage de valeurs : [0, 100]

number

Coefficient de pénalité réduisant la répétition des motifs audio.

number

défaut:1

Seuil d’arrêt anticipé.Plage de valeurs : [0, 1]

boolean

défaut:true

Utilise les segments audio précédents comme contexte.

Informations de réponse

Audio généré. Format : binary

Réplication rapide d’audio ElevenLabs

Synthèse vocale Fish Audio

Bases de l'API

Grands modèles de langage

Images

Vidéo

Audio

Synthèse vocale Fish Audio S2 Pro

En-têtes de requête

Corps de la requête

Informations de réponse

​En-têtes de requête

​Corps de la requête

​Informations de réponse

En-têtes de requête

Corps de la requête

Informations de réponse