API de síntese de voz

Síntese de voz Fish Audio

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

POST

v4beta

txt2speech

Síntese de voz Fish Audio

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

Para obter os melhores resultados, recomendamos usar a clonagem de áudio para enviar um áudio de referência antes de usar esta API. Isso melhorará a qualidade da voz e reduzirá a latência.

O Fish Audio converte texto em fala. Formatos de áudio compatíveis:

WAV / PCM
- Taxa de amostragem: 8kHz, 16kHz, 24kHz, 32kHz, 44.1kHz
- Taxa de amostragem padrão: 44.1kHz
- 16-bit, mono
MP3
- Taxa de amostragem: 32kHz, 44.1kHz
- Taxa de amostragem padrão: 44.1kHz
- Mono
- Taxa de bits: 64kbps, 128kbps (padrão), 192kbps
Opus
- Taxa de amostragem: 48kHz
- Taxa de amostragem padrão: 48kHz
- Mono
- Taxa de bits: -1000 (automático), 24kbps, 32kbps (padrão), 48kbps, 64kbps

Cabeçalhos da requisição

string

obrigatório

Valores enumerados: application/json

string

obrigatório

Formato de autenticação Bearer: Bearer {{API Key}}.

Corpo da requisição

string

obrigatório

O texto a ser convertido em fala.

number

Controla a aleatoriedade da geração de fala. Valores mais altos (por exemplo, 1.0) tornam a saída mais aleatória, enquanto valores mais baixos (por exemplo, 0.1) a tornam mais determinística. Recomendamos usar 0.9 para o modelo s1.Intervalo obrigatório: 0 <= x <= 1

number

Controla a diversidade por meio de amostragem de núcleo. Valores mais baixos (por exemplo, 0.1) tornam a saída mais focada, enquanto valores mais altos (por exemplo, 1.0) permitem mais diversidade. Recomendamos usar 0.9 para o modelo s1.Intervalo obrigatório: 0 <= x <= 1

ReferenceAudio · object[] | null

Áudios de referência usados para a voz; isso requer serialização MessagePack, que substituirá reference_voices e reference_texts.

Mostrar propriedades

file

obrigatório

Arquivo de áudio de referência.

string

obrigatório

Texto de referência correspondente ao áudio.

string | null

ID do modelo de referência usado para a voz.

ProsodyControl · object

Controle de prosódia usado para a voz.

Mostrar propriedades

number

padrão:1

Controle de velocidade da fala.

number

padrão:0

Controle de volume da fala.

integer

padrão:200

Comprimento dos blocos usado para a voz.Intervalo obrigatório: 100 <= x <= 300

boolean

padrão:true

Se deve normalizar a fala; isso reduzirá a latência, mas pode diminuir o desempenho no processamento de números e datas.

enum<string>

padrão:"mp3"

Formato usado para a voz.Valores opcionais: wav, pcm, mp3, opus

integer | null

Taxa de amostragem usada para a voz.

enum<integer>

padrão:128

Taxa de bits MP3 usada para a voz.Valores opcionais: 64, 128, 192

enum<integer>

padrão:32

Taxa de bits Opus usada para a voz.Valores opcionais: -1000, 24, 32, 48, 64

enum<string>

padrão:"normal"

Configuração de latência usada para a voz; balanced reduzirá a latência, mas pode causar queda de desempenho.Valores opcionais: normal, balanced

Informações de resposta

A API retornará diretamente um fluxo de áudio no formato especificado pelo parâmetro format (padrão: mp3).

Fish Audio S2 Pro Texto para fala

Replicação de áudio Fish Audio

Noções básicas da API

Modelos de linguagem

Imagens

Vídeo

Áudio

Síntese de voz Fish Audio

Cabeçalhos da requisição

Corpo da requisição

Informações de resposta

​Cabeçalhos da requisição

​Corpo da requisição

​Informações de resposta

Cabeçalhos da requisição

Corpo da requisição

Informações de resposta