TTA Speech 2.6 HD ASYNC API | MiniMax 高品質音声合成

MiniMax Speech-2.6-hd 非同期音声合成

curl --request POST \
  --url https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "voice_setting": {
    "speed": 123,
    "vol": 123,
    "pitch": 123,
    "voice_id": "<string>",
    "emotion": "<string>",
    "text_normalization": true
  },
  "audio_setting": {
    "sample_rate": 123,
    "bitrate": 123,
    "format": "<string>",
    "channel": 123
  },
  "pronunciation_dict": {
    "tone": [
      {}
    ]
  },
  "language_boost": "<string>",
  "voice_modify": {
    "pitch": 123,
    "intensity": 123,
    "timbre": 123,
    "sound_effects": "<string>"
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd"

payload = {
    "text": "<string>",
    "voice_setting": {
        "speed": 123,
        "vol": 123,
        "pitch": 123,
        "voice_id": "<string>",
        "emotion": "<string>",
        "text_normalization": True
    },
    "audio_setting": {
        "sample_rate": 123,
        "bitrate": 123,
        "format": "<string>",
        "channel": 123
    },
    "pronunciation_dict": { "tone": [{}] },
    "language_boost": "<string>",
    "voice_modify": {
        "pitch": 123,
        "intensity": 123,
        "timbre": 123,
        "sound_effects": "<string>"
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    voice_setting: {
      speed: 123,
      vol: 123,
      pitch: 123,
      voice_id: '<string>',
      emotion: '<string>',
      text_normalization: true
    },
    audio_setting: {sample_rate: 123, bitrate: 123, format: '<string>', channel: 123},
    pronunciation_dict: {tone: [{}]},
    language_boost: '<string>',
    voice_modify: {pitch: 123, intensity: 123, timbre: 123, sound_effects: '<string>'}
  })
};

fetch('https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_setting' => [
        'speed' => 123,
        'vol' => 123,
        'pitch' => 123,
        'voice_id' => '<string>',
        'emotion' => '<string>',
        'text_normalization' => true
    ],
    'audio_setting' => [
        'sample_rate' => 123,
        'bitrate' => 123,
        'format' => '<string>',
        'channel' => 123
    ],
    'pronunciation_dict' => [
        'tone' => [
                [
                                
                ]
        ]
    ],
    'language_boost' => '<string>',
    'voice_modify' => [
        'pitch' => 123,
        'intensity' => 123,
        'timbre' => 123,
        'sound_effects' => '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "task_id": "<string>"
}

POST

async

minimax-speech-2.6-hd

MiniMax Speech-2.6-hd 非同期音声合成

curl --request POST \
  --url https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "voice_setting": {
    "speed": 123,
    "vol": 123,
    "pitch": 123,
    "voice_id": "<string>",
    "emotion": "<string>",
    "text_normalization": true
  },
  "audio_setting": {
    "sample_rate": 123,
    "bitrate": 123,
    "format": "<string>",
    "channel": 123
  },
  "pronunciation_dict": {
    "tone": [
      {}
    ]
  },
  "language_boost": "<string>",
  "voice_modify": {
    "pitch": 123,
    "intensity": 123,
    "timbre": 123,
    "sound_effects": "<string>"
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd"

payload = {
    "text": "<string>",
    "voice_setting": {
        "speed": 123,
        "vol": 123,
        "pitch": 123,
        "voice_id": "<string>",
        "emotion": "<string>",
        "text_normalization": True
    },
    "audio_setting": {
        "sample_rate": 123,
        "bitrate": 123,
        "format": "<string>",
        "channel": 123
    },
    "pronunciation_dict": { "tone": [{}] },
    "language_boost": "<string>",
    "voice_modify": {
        "pitch": 123,
        "intensity": 123,
        "timbre": 123,
        "sound_effects": "<string>"
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    voice_setting: {
      speed: 123,
      vol: 123,
      pitch: 123,
      voice_id: '<string>',
      emotion: '<string>',
      text_normalization: true
    },
    audio_setting: {sample_rate: 123, bitrate: 123, format: '<string>', channel: 123},
    pronunciation_dict: {tone: [{}]},
    language_boost: '<string>',
    voice_modify: {pitch: 123, intensity: 123, timbre: 123, sound_effects: '<string>'}
  })
};

fetch('https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_setting' => [
        'speed' => 123,
        'vol' => 123,
        'pitch' => 123,
        'voice_id' => '<string>',
        'emotion' => '<string>',
        'text_normalization' => true
    ],
    'audio_setting' => [
        'sample_rate' => 123,
        'bitrate' => 123,
        'format' => '<string>',
        'channel' => 123
    ],
    'pronunciation_dict' => [
        'tone' => [
                [
                                
                ]
        ]
    ],
    'language_boost' => '<string>',
    'voice_modify' => [
        'pitch' => 123,
        'intensity' => 123,
        'timbre' => 123,
        'sound_effects' => '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/async/minimax-speech-2.6-hd")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"language_boost\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "task_id": "<string>"
}

この API は、テキストから音声への非同期生成に対応しています。1 回のテキスト生成送信で最大 100 万文字をサポートし、生成された完全な音声結果は非同期方式で取得できます。100+ のシステムボイス、クローンボイスを自由に選択可能です。また、イントネーション、話速、音量、ビットレート、サンプリングレート、出力形式を自由に調整できます。長文の音声合成リクエストを送信した後、返される url の有効期限は url が返されてから 24 時間である点に注意してください。ダウンロード情報の有効時間にご注意ください。

書籍全体などの長文テキストの音声生成に適しています。タスクのキューイングに時間がかかる場合があります。短文生成、音声チャット、オンラインソーシャルなどのシーンでは、同期音声合成呼び出しの使用を推奨します。

リクエストヘッダー

string

必須

列挙値: application/json

string

必須

Bearer 認証形式: Bearer {{API シークレットキー}}。

リクエストボディ

string

必須

合成するテキスト。最大 5 万文字まで。

object

必須

表示 properties

number

範囲[0.5,2]、デフォルト値は 1.0生成音声の話速。任意。値が大きいほど、話速が速くなります。

number

範囲（0,10]、デフォルト値は 1.0生成音声の音量。任意。値が大きいほど、音量が大きくなります。

number

デフォルト:0

範囲[-12,12]、デフォルト値は 0生成音声のイントネーション。任意。（0 は元のボイスの出力で、値は整数である必要があります）。

string

リクエストするボイス番号。システムボイス(id)およびクローンボイス（id）の 2 種類をサポートしています。システムボイス（ID）は以下のとおりです：

初々しい青年ボイス：male-qn-qingse
エリート青年ボイス：male-qn-jingying
霸道系青年ボイス：male-qn-badao
青年大学生ボイス：male-qn-daxuesheng
少女ボイス：female-shaonv
大人のお姉さんボイス：female-yujie
成熟女性ボイス：female-chengshu
甘い女性ボイス：female-tianmei
男性司会者：presenter_male
女性司会者：presenter_female
男性オーディオブック 1：audiobook_male_1
男性オーディオブック 2：audiobook_male_2
女性オーディオブック 1：audiobook_female_1
女性オーディオブック 2：audiobook_female_2
初々しい青年ボイス-beta：male-qn-qingse-jingpin
エリート青年ボイス-beta：male-qn-jingying-jingpin
霸道系青年ボイス-beta：male-qn-badao-jingpin
青年大学生ボイス-beta：male-qn-daxuesheng-jingpin
少女ボイス-beta：female-shaonv-jingpin
大人のお姉さんボイス-beta：female-yujie-jingpin
成熟女性ボイス-beta：female-chengshu-jingpin
甘い女性ボイス-beta：female-tianmei-jingpin
賢い男の子：clever_boy
可愛い男の子：cute_boy
萌え系女の子：lovely_girl
カートン豚の小琪：cartoon_pig
ヤンデレ弟：bingjiao_didi
爽やかな彼氏：junlang_nanyou
純真な後輩男子：chunzhen_xuedi
クールな先輩：lengdan_xiongzhang
霸道系のお坊ちゃま：badao_shaoye
スイートな小玲：tianxin_xiaoling
おちゃめな萌え妹：qiaopi_mengmei
妖艶なお姉さん：wumei_yujie
甘えん坊の後輩女子：diadia_xuemei
上品な先輩女子：danya_xuejie
Santa Claus：Santa_Claus
Grinch：Grinch
Rudolph：Rudolph
Arnold：Arnold
Charming Santa：Charming_Santa
Charming Lady：Charming_Lady
Sweet Girl：Sweet_Girl
Cute Elf：Cute_Elf
Attractive Girl：Attractive_Girl
Serene Woman：Serene_Woman

string

合成音声の感情を制御します；現在 7 種類の感情をサポートしています：喜び、悲しみ、怒り、恐れ、嫌悪、驚き、ニュートラル；パラメータ範囲：["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"]

bool

デフォルト:"false"

このパラメータは英語テキストの正規化をサポートし、数字の読み上げシーンでの性能を向上できますが、レイテンシがわずかに増加します。指定しない場合、デフォルト値は false です。

object

表示 properties

number

デフォルト:32000

範囲【8000，16000，22050，24000，32000，44100】生成音声のサンプリングレート。任意。デフォルトは 32000 です。

number

デフォルト:128000

範囲【32000，64000，128000，256000】生成音声のビットレート。任意。デフォルト値は 128000 です。このパラメータは mp3 形式の音声にのみ有効です。

string

デフォルト:"mp3"

生成される音声形式。デフォルトは mp3。選択可能：mp3, pcm, flac, wav。wav は非ストリーミング出力でのみサポートされます。

number

デフォルト:1

生成音声のチャンネル数。デフォルト 1：モノラル。選択可能：1：モノラル2：ステレオ

object

表示 properties

list

特殊な注釈が必要な文字、記号、および対応する発音表記を置換します。発音の置換（声調の調整/他の文字の発音への置換）の形式は以下のとおりです：["燕少飞/(yan4)(shao3)(fei1)","达菲/(da2)(fei1)"，"omg/oh my god"]声調は数字で表します。一声（陰平）は 1、二声（陽平）は 2、三声（上声）は 3、四声（去声）は 4、軽声は 5 です。

string

デフォルト:"null"

指定した小語種および方言の認識能力を強化し、設定後は指定した小語種/方言のシーンでの音声表現を向上できます。小語種の種類が不明な場合は、“auto” を選択できます。モデルが小語種の種類を自動的に判断します。以下の値をサポートしています：

'Chinese', 'Chinese,Yue', 'English', 'Arabic', 'Russian', 'Spanish', 'French', 'Portuguese', 'German', 'Turkish', 'Dutch', 'Ukrainian', 'Vietnamese', 'Indonesian', 'Japanese', 'Italian', 'Korean', 'Thai', 'Polish', 'Romanian', 'Greek', 'Czech', 'Finnish', 'Hindi', 'Bulgarian', 'Danish', 'Hebrew', 'Malay', 'Persian', 'Slovak', 'Swedish', 'Croatian', 'Filipino', 'Hungarian', 'Norwegian', 'Slovenian', 'Catalan', 'Nynorsk', 'Tamil', 'Afrikaans', 'auto'

object

ボイスエフェクター設定。このパラメータがサポートする音声形式：mp3, wav, flac

表示 properties

integer

ピッチ調整（低い/明るい）。範囲 [-100,100]。値が -100 に近いほど声はより低くなり、100 に近いほど声はより明るくなります

integer

強度調整（力強い/柔らかい）。範囲 [-100,100]。値が -100 に近いほど声はより力強くなり、100 に近いほど声はより柔らかくなります

integer

音色調整（磁性的/澄んだ）。範囲 [-100,100]。値が -100 に近いほど声はより厚みが増し、100 に近いほど声はより澄んだ音になります

string

音響効果設定。1 回につき 1 種類のみ選択できます。選択可能な値：

spacious_echo（広い空間のエコー）
auditorium_echo（講堂放送）
lofi_telephone（電話風歪み）
robotic（電子音）

レスポンス情報パラメータ

string

必須

非同期タスクの task_id。この task_id を使用してタスク結果照会 API にリクエストし、生成結果を取得してください

MiniMax Speech-2.6-hd 同期音声合成

MiniMax Speech-2.6-turbo 同期音声合成

API の基本

大規模言語モデル

画像

動画

音声

MiniMax Speech-2.6-hd 非同期音声合成

リクエストヘッダー

リクエストボディ

レスポンス情報パラメータ

​リクエストヘッダー

​リクエストボディ

​レスポンス情報パラメータ

リクエストヘッダー

リクエストボディ

レスポンス情報パラメータ