Sprachsynthese-API | Fish Audio

Fish Audio Sprachsynthese

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

POST

v4beta

txt2speech

Fish Audio Sprachsynthese

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

Für optimale Ergebnisse empfehlen wir, vor der Verwendung dieser API zunächst Referenzaudio mit Audio-Klonen hochzuladen. Dies verbessert die Sprachqualität und reduziert die Latenz.

Fish Audio wandelt Text in Sprache um. Unterstützte Audioformate:

WAV / PCM
- Abtastrate: 8kHz, 16kHz, 24kHz, 32kHz, 44.1kHz
- Standard-Abtastrate: 44.1kHz
- 16-bit, Mono
MP3
- Abtastrate: 32kHz, 44.1kHz
- Standard-Abtastrate: 44.1kHz
- Mono
- Bitrate: 64kbps, 128kbps (Standard), 192kbps
Opus
- Abtastrate: 48kHz
- Standard-Abtastrate: 48kHz
- Mono
- Bitrate: -1000 (automatisch), 24kbps, 32kbps (Standard), 48kbps, 64kbps

Anfrageheader

string

erforderlich

Enumerationswert: application/json

string

erforderlich

Bearer-Authentifizierungsformat: Bearer {{API Key}}.

Anfragetext

string

erforderlich

Der Text, der in Sprache umgewandelt werden soll.

number

Steuert die Zufälligkeit der Sprachgenerierung. Höhere Werte (z. B. 1.0) machen die Ausgabe zufälliger, niedrigere Werte (z. B. 0.1) machen sie deterministischer. Wir empfehlen für das Modell s1 die Verwendung von 0.9.Erforderlicher Bereich: 0 <= x <= 1

number

Steuert die Vielfalt durch Nucleus-Sampling. Niedrigere Werte (z. B. 0.1) machen die Ausgabe fokussierter, höhere Werte (z. B. 1.0) erlauben mehr Vielfalt. Wir empfehlen für das Modell s1 die Verwendung von 0.9.Erforderlicher Bereich: 0 <= x <= 1

ReferenceAudio · object[] | null

Referenzaudio für die Stimme. Dies erfordert MessagePack-Serialisierung und überschreibt reference_voices und reference_texts.

Anzeigen Eigenschaften

file

erforderlich

Referenz-Audiodatei.

string

erforderlich

Der zum Audio gehörende Referenztext.

string | null

Referenzmodell-ID für die Stimme.

ProsodyControl · object

Prosodiesteuerung für die Stimme.

Anzeigen Eigenschaften

number

Standard:1

Steuerung der Sprechgeschwindigkeit.

number

Standard:0

Steuerung der Sprachlautstärke.

integer

Standard:200

Chunk-Länge für die Sprache.Erforderlicher Bereich: 100 <= x <= 300

boolean

Standard:true

Ob die Sprache normalisiert werden soll. Dies reduziert die Latenz, kann jedoch die Verarbeitungsleistung für Zahlen und Datumsangaben verringern.

enum<string>

Standard:"mp3"

Format für die Sprache.Optionale Werte: wav, pcm, mp3, opus

integer | null

Abtastrate für die Sprache.

enum<integer>

Standard:128

MP3-Bitrate für die Sprache.Optionale Werte: 64, 128, 192

enum<integer>

Standard:32

Opus-Bitrate für die Sprache.Optionale Werte: -1000, 24, 32, 48, 64

enum<string>

Standard:"normal"

Latenzeinstellung für die Sprache. balanced reduziert die Latenz, kann jedoch zu Leistungseinbußen führen.Optionale Werte: normal, balanced

Antwortinformationen

Die API gibt direkt einen Audiostream in dem durch den Parameter format angegebenen Format zurück (Standard: mp3).

Fish Audio S2 Pro Text to Speech

Zurück

Fish Audio Audio-Klonen

Weiter

API-Grundlagen

Große Sprachmodelle

Bilder

Video

Audio

Fish Audio Sprachsynthese

Anfrageheader

Anfragetext

Antwortinformationen

​Anfrageheader

​Anfragetext

​Antwortinformationen

Anfrageheader

Anfragetext

Antwortinformationen