音声合成 API | Fish Audio

Fish Audio 音声合成

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

POST

v4beta

txt2speech

Fish Audio 音声合成

curl --request POST \
  --url https://api.highwayapi.ai/v4beta/txt2speech \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "temperature": 123,
  "top_p": 123,
  "references": {
    "text": "<string>"
  },
  "reference_id": {},
  "prosody": {
    "speed": 123,
    "volume": 123
  },
  "chunk_length": 123,
  "normalize": true,
  "format": {},
  "sample_rate": {},
  "mp3_bitrate": {},
  "opus_bitrate": {},
  "latency": {}
}
'

import requests

url = "https://api.highwayapi.ai/v4beta/txt2speech"

payload = {
    "text": "<string>",
    "temperature": 123,
    "top_p": 123,
    "references": { "text": "<string>" },
    "reference_id": {},
    "prosody": {
        "speed": 123,
        "volume": 123
    },
    "chunk_length": 123,
    "normalize": True,
    "format": {},
    "sample_rate": {},
    "mp3_bitrate": {},
    "opus_bitrate": {},
    "latency": {}
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    temperature: 123,
    top_p: 123,
    references: {text: '<string>'},
    reference_id: {},
    prosody: {speed: 123, volume: 123},
    chunk_length: 123,
    normalize: true,
    format: {},
    sample_rate: {},
    mp3_bitrate: {},
    opus_bitrate: {},
    latency: {}
  })
};

fetch('https://api.highwayapi.ai/v4beta/txt2speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v4beta/txt2speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'temperature' => 123,
    'top_p' => 123,
    'references' => [
        'text' => '<string>'
    ],
    'reference_id' => [
        
    ],
    'prosody' => [
        'speed' => 123,
        'volume' => 123
    ],
    'chunk_length' => 123,
    'normalize' => true,
    'format' => [
        
    ],
    'sample_rate' => [
        
    ],
    'mp3_bitrate' => [
        
    ],
    'opus_bitrate' => [
        
    ],
    'latency' => [
        
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v4beta/txt2speech"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v4beta/txt2speech")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v4beta/txt2speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"temperature\": 123,\n  \"top_p\": 123,\n  \"references\": {\n    \"text\": \"<string>\"\n  },\n  \"reference_id\": {},\n  \"prosody\": {\n    \"speed\": 123,\n    \"volume\": 123\n  },\n  \"chunk_length\": 123,\n  \"normalize\": true,\n  \"format\": {},\n  \"sample_rate\": {},\n  \"mp3_bitrate\": {},\n  \"opus_bitrate\": {},\n  \"latency\": {}\n}"

response = http.request(request)
puts response.read_body

最良の結果を得るには、この API を使用する前に、まず音声クローンを使用して参照音声をアップロードすることをお勧めします。これにより、音声品質が向上し、レイテンシが低減されます。

Fish Audio はテキストを音声に変換します。対応している音声形式：

WAV / PCM
- サンプルレート：8kHz, 16kHz, 24kHz, 32kHz, 44.1kHz
- デフォルトのサンプルレート：44.1kHz
- 16-bit、モノラル
MP3
- サンプルレート：32kHz, 44.1kHz
- デフォルトのサンプルレート：44.1kHz
- モノラル
- ビットレート：64kbps, 128kbps (デフォルト), 192kbps
Opus
- サンプルレート：48kHz
- デフォルトのサンプルレート：48kHz
- モノラル
- ビットレート：-1000 (自動), 24kbps, 32kbps (デフォルト), 48kbps, 64kbps

リクエストヘッダー

string

必須

列挙値: application/json

string

必須

Bearer 認証形式: Bearer {{API Key}}。

リクエストボディ

string

必須

音声に変換するテキスト。

number

音声生成のランダム性を制御します。値が高いほど（例：1.0）出力はよりランダムになり、値が低いほど（例：0.1）より決定的になります。s1 モデルでは 0.9 の使用をお勧めします。必須範囲：0 <= x <= 1

number

nucleus sampling によって多様性を制御します。値が低いほど（例：0.1）出力はより集中し、値が高いほど（例：1.0）より多様性が許容されます。s1 モデルでは 0.9 の使用をお勧めします。必須範囲：0 <= x <= 1

ReferenceAudio · object[] | null

音声に使用する参照音声です。これには MessagePack シリアライズが必要で、reference_voices と reference_texts を上書きします。

表示 properties

file

必須

参照音声ファイル。

string

必須

音声に対応する参照テキスト。

string | null

音声に使用する参照モデル ID。

ProsodyControl · object

音声に使用する韻律制御。

表示 properties

number

デフォルト:1

音声速度の制御。

number

デフォルト:0

音声音量の制御。

integer

デフォルト:200

音声に使用するチャンク長。必須範囲：100 <= x <= 300

boolean

デフォルト:true

音声を正規化するかどうか。これによりレイテンシは低減されますが、数字や日付の処理性能が低下する可能性があります。

enum<string>

デフォルト:"mp3"

音声に使用する形式。選択可能な値：wav, pcm, mp3, opus

integer | null

音声に使用するサンプルレート。

enum<integer>

デフォルト:128

音声に使用する MP3 ビットレート。選択可能な値：64, 128, 192

enum<integer>

デフォルト:32

音声に使用する Opus ビットレート。選択可能な値：-1000, 24, 32, 48, 64

enum<string>

デフォルト:"normal"

音声に使用するレイテンシ設定。balanced はレイテンシを低減しますが、性能低下を引き起こす可能性があります。選択可能な値：normal, balanced

レスポンス情報

API は format パラメータで指定された形式の音声ストリームを直接返します（デフォルト：mp3）。

Fish Audio S2 Pro テキスト読み上げ

Fish Audio 音声クローン

API の基本

大規模言語モデル

画像

動画

音声

Fish Audio 音声合成

リクエストヘッダー

リクエストボディ

レスポンス情報

​リクエストヘッダー

​リクエストボディ

​レスポンス情報

リクエストヘッダー

リクエストボディ

レスポンス情報