Text-to-Speech API | Fish Audio

Fish Audio Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

POST

v4beta

txt2speech

Fish Audio Text-to-Speech

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

For best results, we recommend using audio cloning to upload reference audio before using this API. This will improve speech quality and reduce latency.

Fish Audio converts text into speech. Supported audio formats:

WAV / PCM
- Sample rates: 8kHz, 16kHz, 24kHz, 32kHz, 44.1kHz
- Default sample rate: 44.1kHz
- 16-bit, mono
MP3
- Sample rates: 32kHz, 44.1kHz
- Default sample rate: 44.1kHz
- Mono
- Bitrates: 64kbps, 128kbps (default), 192kbps
Opus
- Sample rate: 48kHz
- Default sample rate: 48kHz
- Mono
- Bitrates: -1000 (automatic), 24kbps, 32kbps (default), 48kbps, 64kbps

Request Headers

string

required

Enum value: application/json

string

required

Bearer authentication format: Bearer {{API Key}}.

Request Body

string

required

The text to convert to speech.

number

Controls the randomness of speech generation. Higher values (for example, 1.0) make the output more random, while lower values (for example, 0.1) make it more deterministic. We recommend using 0.9 for the s1 model.Required range: 0 <= x <= 1

number

Controls diversity through nucleus sampling. Lower values (for example, 0.1) make the output more focused, while higher values (for example, 1.0) allow more diversity. We recommend using 0.9 for the s1 model.Required range: 0 <= x <= 1

ReferenceAudio · object[] | null

Reference audio for the voice. This requires MessagePack serialization and will override reference_voices and reference_texts.

Show Properties

file

required

The reference audio file.

string

required

The reference text corresponding to the audio.

string | null

The reference model ID for the voice.

ProsodyControl · object

Prosody control for the voice.

Show Properties

number

default:1

Speech speed control.

number

default:0

Speech volume control.

integer

default:200

The chunk length for the voice.Required range: 100 <= x <= 300

boolean

default:true

Whether to normalize the speech. This will reduce latency, but may reduce performance when handling numbers and dates.

enum<string>

default:"mp3"

The format for the speech.Optional values: wav, pcm, mp3, opus

integer | null

The sample rate for the speech.

enum<integer>

default:128

The MP3 bitrate for the speech.Optional values: 64, 128, 192

enum<integer>

default:32

The Opus bitrate for the speech.Optional values: -1000, 24, 32, 48, 64

enum<string>

default:"normal"

The latency setting for the speech. balanced reduces latency but may cause performance degradation.Optional values: normal, balanced

Response Information

The API will directly return an audio stream in the format specified by the format parameter (default: mp3).

Fish Audio S2 Pro Text to Speech

Fish Audio Audio Cloning

API Basics

Large Language Models

Images

Video

Audio

Fish Audio Text-to-Speech

Request Headers

Request Body

Response Information

​Request Headers

​Request Body

​Response Information

Request Headers

Request Body

Response Information