TTA Speech 2.6 Turbo API | MiniMax High-Quality Speech Synthesis

MiniMax Speech-2.6-turbo Synchronous Speech Synthesis

curl --request POST \
  --url https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "voice_setting": {
    "speed": 123,
    "vol": 123,
    "pitch": 123,
    "voice_id": "<string>",
    "emotion": "<string>",
    "latex_read": true,
    "text_normalization": true
  },
  "audio_setting": {
    "sample_rate": 123,
    "bitrate": 123,
    "format": "<string>",
    "channel": 123
  },
  "pronunciation_dict": {
    "tone": [
      {}
    ]
  },
  "timbre_weights": [
    {
      "voice_id": "<string>",
      "weight": 123
    }
  ],
  "stream": true,
  "stream_options": {
    "exclude_aggregated_audio": true
  },
  "language_boost": "<string>",
  "output_format": "<string>",
  "voice_modify": {
    "pitch": 123,
    "intensity": 123,
    "timbre": 123,
    "sound_effects": "<string>"
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo"

payload = {
    "text": "<string>",
    "voice_setting": {
        "speed": 123,
        "vol": 123,
        "pitch": 123,
        "voice_id": "<string>",
        "emotion": "<string>",
        "latex_read": True,
        "text_normalization": True
    },
    "audio_setting": {
        "sample_rate": 123,
        "bitrate": 123,
        "format": "<string>",
        "channel": 123
    },
    "pronunciation_dict": { "tone": [{}] },
    "timbre_weights": [
        {
            "voice_id": "<string>",
            "weight": 123
        }
    ],
    "stream": True,
    "stream_options": { "exclude_aggregated_audio": True },
    "language_boost": "<string>",
    "output_format": "<string>",
    "voice_modify": {
        "pitch": 123,
        "intensity": 123,
        "timbre": 123,
        "sound_effects": "<string>"
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    voice_setting: {
      speed: 123,
      vol: 123,
      pitch: 123,
      voice_id: '<string>',
      emotion: '<string>',
      latex_read: true,
      text_normalization: true
    },
    audio_setting: {sample_rate: 123, bitrate: 123, format: '<string>', channel: 123},
    pronunciation_dict: {tone: [{}]},
    timbre_weights: [{voice_id: '<string>', weight: 123}],
    stream: true,
    stream_options: {exclude_aggregated_audio: true},
    language_boost: '<string>',
    output_format: '<string>',
    voice_modify: {pitch: 123, intensity: 123, timbre: 123, sound_effects: '<string>'}
  })
};

fetch('https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_setting' => [
        'speed' => 123,
        'vol' => 123,
        'pitch' => 123,
        'voice_id' => '<string>',
        'emotion' => '<string>',
        'latex_read' => true,
        'text_normalization' => true
    ],
    'audio_setting' => [
        'sample_rate' => 123,
        'bitrate' => 123,
        'format' => '<string>',
        'channel' => 123
    ],
    'pronunciation_dict' => [
        'tone' => [
                [
                                
                ]
        ]
    ],
    'timbre_weights' => [
        [
                'voice_id' => '<string>',
                'weight' => 123
        ]
    ],
    'stream' => true,
    'stream_options' => [
        'exclude_aggregated_audio' => true
    ],
    'language_boost' => '<string>',
    'output_format' => '<string>',
    'voice_modify' => [
        'pitch' => 123,
        'intensity' => 123,
        'timbre' => 123,
        'sound_effects' => '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"timbre_weights\": [\n    {\n      \"voice_id\": \"<string>\",\n      \"weight\": 123\n    }\n  ],\n  \"stream\": true,\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"language_boost\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"timbre_weights\": [\n    {\n      \"voice_id\": \"<string>\",\n      \"weight\": 123\n    }\n  ],\n  \"stream\": true,\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"language_boost\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"timbre_weights\": [\n    {\n      \"voice_id\": \"<string>\",\n      \"weight\": 123\n    }\n  ],\n  \"stream\": true,\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"language_boost\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audio": "<string>",
  "status": 123
}

POST

minimax-speech-2.6-turbo

MiniMax Speech-2.6-turbo Synchronous Speech Synthesis

curl --request POST \
  --url https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "voice_setting": {
    "speed": 123,
    "vol": 123,
    "pitch": 123,
    "voice_id": "<string>",
    "emotion": "<string>",
    "latex_read": true,
    "text_normalization": true
  },
  "audio_setting": {
    "sample_rate": 123,
    "bitrate": 123,
    "format": "<string>",
    "channel": 123
  },
  "pronunciation_dict": {
    "tone": [
      {}
    ]
  },
  "timbre_weights": [
    {
      "voice_id": "<string>",
      "weight": 123
    }
  ],
  "stream": true,
  "stream_options": {
    "exclude_aggregated_audio": true
  },
  "language_boost": "<string>",
  "output_format": "<string>",
  "voice_modify": {
    "pitch": 123,
    "intensity": 123,
    "timbre": 123,
    "sound_effects": "<string>"
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo"

payload = {
    "text": "<string>",
    "voice_setting": {
        "speed": 123,
        "vol": 123,
        "pitch": 123,
        "voice_id": "<string>",
        "emotion": "<string>",
        "latex_read": True,
        "text_normalization": True
    },
    "audio_setting": {
        "sample_rate": 123,
        "bitrate": 123,
        "format": "<string>",
        "channel": 123
    },
    "pronunciation_dict": { "tone": [{}] },
    "timbre_weights": [
        {
            "voice_id": "<string>",
            "weight": 123
        }
    ],
    "stream": True,
    "stream_options": { "exclude_aggregated_audio": True },
    "language_boost": "<string>",
    "output_format": "<string>",
    "voice_modify": {
        "pitch": 123,
        "intensity": 123,
        "timbre": 123,
        "sound_effects": "<string>"
    }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    voice_setting: {
      speed: 123,
      vol: 123,
      pitch: 123,
      voice_id: '<string>',
      emotion: '<string>',
      latex_read: true,
      text_normalization: true
    },
    audio_setting: {sample_rate: 123, bitrate: 123, format: '<string>', channel: 123},
    pronunciation_dict: {tone: [{}]},
    timbre_weights: [{voice_id: '<string>', weight: 123}],
    stream: true,
    stream_options: {exclude_aggregated_audio: true},
    language_boost: '<string>',
    output_format: '<string>',
    voice_modify: {pitch: 123, intensity: 123, timbre: 123, sound_effects: '<string>'}
  })
};

fetch('https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'voice_setting' => [
        'speed' => 123,
        'vol' => 123,
        'pitch' => 123,
        'voice_id' => '<string>',
        'emotion' => '<string>',
        'latex_read' => true,
        'text_normalization' => true
    ],
    'audio_setting' => [
        'sample_rate' => 123,
        'bitrate' => 123,
        'format' => '<string>',
        'channel' => 123
    ],
    'pronunciation_dict' => [
        'tone' => [
                [
                                
                ]
        ]
    ],
    'timbre_weights' => [
        [
                'voice_id' => '<string>',
                'weight' => 123
        ]
    ],
    'stream' => true,
    'stream_options' => [
        'exclude_aggregated_audio' => true
    ],
    'language_boost' => '<string>',
    'output_format' => '<string>',
    'voice_modify' => [
        'pitch' => 123,
        'intensity' => 123,
        'timbre' => 123,
        'sound_effects' => '<string>'
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"timbre_weights\": [\n    {\n      \"voice_id\": \"<string>\",\n      \"weight\": 123\n    }\n  ],\n  \"stream\": true,\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"language_boost\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"timbre_weights\": [\n    {\n      \"voice_id\": \"<string>\",\n      \"weight\": 123\n    }\n  ],\n  \"stream\": true,\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"language_boost\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/minimax-speech-2.6-turbo")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"voice_setting\": {\n    \"speed\": 123,\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"voice_id\": \"<string>\",\n    \"emotion\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"audio_setting\": {\n    \"sample_rate\": 123,\n    \"bitrate\": 123,\n    \"format\": \"<string>\",\n    \"channel\": 123\n  },\n  \"pronunciation_dict\": {\n    \"tone\": [\n      {}\n    ]\n  },\n  \"timbre_weights\": [\n    {\n      \"voice_id\": \"<string>\",\n      \"weight\": 123\n    }\n  ],\n  \"stream\": true,\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"language_boost\": \"<string>\",\n  \"output_format\": \"<string>\",\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"intensity\": 123,\n    \"timbre\": 123,\n    \"sound_effects\": \"<string>\"\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "audio": "<string>",
  "status": 123
}

This API supports synchronous text-to-speech generation, with a maximum of 10,000 characters per text submission. It supports 100+ system voices and custom cloned voices; volume, pitch, speed, and output format adjustments; proportional voice mixing and fixed interval timing control; and multiple audio specifications and formats, including mp3, pcm, flac, and wav. Streaming output is also supported. After submitting a long-text speech synthesis request, note that the returned url is valid for 24 hours from the time the url is returned. Please download the information in time.

Suitable for short sentence generation, voice chat, online social scenarios, and similar use cases. It has low latency, but the text length is limited to less than 10,000 characters. For long text, we recommend using asynchronous speech synthesis.

Request Headers

string

required

Enum value: application/json

string

required

Bearer authentication format: Bearer {{API Key}}.

Request Body

string

required

The text to synthesize. The length limit is less than 10,000 characters. Use line breaks to replace paragraph breaks. (If you need to control intervals in the speech, add <#x#> between characters, where x is in seconds, supports 0.01-99.99, with at most two decimal places). Custom time intervals between texts are supported to achieve custom pauses in text-to-speech. Note that the text interval must be set between two pieces of text that can be pronounced, and multiple consecutive time intervals cannot be set.

object

required

Show properties

float

default:"1.0"

Range [0.5,2], default value is 1.0The speaking speed of the generated voice. Optional. The larger the value, the faster the speech.

float

default:"1.0"

Range (0,10], default value is 1.0The volume of the generated voice. Optional. The larger the value, the higher the volume.

int

default:"0"

Range [-12,12], default value is 0The pitch of the generated voice. Optional. (0 outputs the original voice; the value must be an integer).

string

The requested voice ID. Either this or timbre_weights is “required”.Supports two types: system voices (id) and cloned voices (id). The system voices (ID) are as follows:

Young male voice: male-qn-qingse
Elite young male voice: male-qn-jingying
Dominant young male voice: male-qn-badao
Male college student voice: male-qn-daxuesheng
Young girl voice: female-shaonv
Mature lady voice: female-yujie
Mature female voice: female-chengshu
Sweet female voice: female-tianmei
Male presenter: presenter_male
Female presenter: presenter_female
Male audiobook 1: audiobook_male_1
Male audiobook 2: audiobook_male_2
Female audiobook 1: audiobook_female_1
Female audiobook 2: audiobook_female_2
Young male voice-beta: male-qn-qingse-jingpin
Elite young male voice-beta: male-qn-jingying-jingpin
Dominant young male voice-beta: male-qn-badao-jingpin
Male college student voice-beta: male-qn-daxuesheng-jingpin
Young girl voice-beta: female-shaonv-jingpin
Mature lady voice-beta: female-yujie-jingpin
Mature female voice-beta: female-chengshu-jingpin
Sweet female voice-beta: female-tianmei-jingpin
Clever boy: clever_boy
Cute boy: cute_boy
Lovely girl: lovely_girl
Cartoon Pig Xiaoqi: cartoon_pig
Yandere younger brother: bingjiao_didi
Handsome boyfriend: junlang_nanyou
Innocent junior schoolmate: chunzhen_xuedi
Aloof senior schoolmate: lengdan_xiongzhang
Dominant young master: badao_shaoye
Sweetheart Xiaoling: tianxin_xiaoling
Playful cute girl: qiaopi_mengmei
Charming mature lady: wumei_yujie
Cutesy junior schoolmate: diadia_xuemei
Elegant senior schoolmate: danya_xuejie
Santa Claus: Santa_Claus
Grinch: Grinch
Rudolph: Rudolph
Arnold: Arnold
Charming Santa: Charming_Santa
Charming Lady: Charming_Lady
Sweet Girl: Sweet_Girl
Cute Elf: Cute_Elf
Attractive Girl: Attractive_Girl
Serene Woman: Serene_Woman

string

Controls the emotion of the synthesized speech;Currently supports 7 emotions: happy, sad, angry, fearful, disgusted, surprised, and neutral;Parameter range: ["happy", "sad", "angry", "fearful", "disgusted", "surprised", "neutral"]

bool

default:"false"

Controls whether reading latex formulas is supported. Default is false.Notes:

Formulas in the request need to be enclosed with $$ at the beginning and end;
If the formula in the request contains "", it must be escaped as ”\”.

Example: The basic formula for derivatives is $$\\frac{d}{dx}(x^n) = nx^{n-1}$$

bool

default:"false"

This parameter supports English text normalization, which can improve performance in number-reading scenarios, but will slightly increase latency. If not provided, the default value is false.

object

Show properties

int

default:"32000"

Range 【8000，16000，22050，24000，32000，44100】The sample rate of the generated voice. Optional, default is 32000.

int

default:"128000"

Range 【32000，64000，128000，256000】The bitrate of the generated voice. Optional, default value is 128000. This parameter only takes effect for audio in mp3 format.

string

default:"mp3"

The generated audio format. Default is mp3, range [mp3,pcm,flac,wav]. wav is only supported for non-streaming output.

int

default:"1"

The number of channels for the generated audio. Default is 1: mono. Options:1: mono2: stereo

object

Show properties

list

Replace text, symbols, and corresponding pronunciations that require special annotation.Pronunciation replacement (adjust tones/replace pronunciation with other characters), in the following format:["燕少飞/(yan4)(shao3)(fei1)","达菲/(da2)(fei1)"，"omg/oh my god"]Use numbers to represent tones: first tone (yinping) is 1, second tone (yangping) is 2, third tone (shangsheng) is 3, fourth tone (qusheng) is 4, and neutral tone is 5.

object[]

Either this or voice_id is required

Show properties

string

The requested voice id. Must be filled in together with the weight parameter.

int

Range [1,100]Weight. Must be filled in together with voice_id. Mixing up to 4 voices is supported. The value must be an integer. The higher the proportion of a single voice, the more the synthesized voice will resemble it.

boolean

default:"false"

Whether to stream. Default is false, meaning streaming is not enabled.

object

Show properties

boolean

default:"false"

When this parameter is set to True, the final chunk in streaming will not contain the complete concatenated speech hex data. Default is False, meaning the final chunk contains the complete concatenated speech hex data.

string

default:"null"

Enhances recognition capability for specified minority languages and dialects. After setting it, speech performance can be improved in the specified minority language/dialect scenarios. If the minority language type is unclear, you can choose “auto”, and the model will determine the minority language type automatically. The following values are supported:

'Chinese', 'Chinese,Yue', 'English', 'Arabic', 'Russian', 'Spanish', 'French', 'Portuguese', 'German', 'Turkish', 'Dutch', 'Ukrainian', 'Vietnamese', 'Indonesian', 'Japanese', 'Italian', 'Korean', 'Thai', 'Polish', 'Romanian', 'Greek', 'Czech', 'Finnish', 'Hindi', 'Bulgarian', 'Danish', 'Hebrew', 'Malay', 'Persian', 'Slovak', 'Swedish', 'Croatian', 'Filipino', 'Hungarian', 'Norwegian', 'Slovenian', 'Catalan', 'Nynorsk', 'Tamil', 'Afrikaans', 'auto'

string

default:"hex"

Controls the form of the output result. Optional values are url hex. Default value is hex. This parameter only takes effect in non-streaming scenarios. Streaming scenarios only support returning data in hex form. The returned url is valid for 24 hours.

object

Voice effects settings. The audio formats supported by this parameter are:

Non-streaming: mp3, wav, flac
Streaming: mp3

Show properties

integer

Pitch adjustment (deep/bright), range [-100,100]. Values close to -100 make the voice deeper; values close to 100 make the voice brighter.

integer

Intensity adjustment (powerful/soft), range [-100,100]. Values close to -100 make the voice stronger; values close to 100 make the voice softer.

integer

Timbre adjustment (magnetic/crisp), range [-100,100]. Values close to -100 make the voice richer; values close to 100 make the voice crisper.

string

Sound effect settings. Only one can be selected per request. Optional values:

spacious_echo (spacious echo)
auditorium_echo (auditorium broadcast)
lofi_telephone (telephone distortion)
robotic (electronic voice)

Response Information

string

The synthesized audio segment, encoded in hex, generated according to the input-defined format (audio_setting.format) (mp3/pcm/flac). The return form is determined by the definition of output_format. When stream is true, only hex return is supported.

number

The current audio stream status. Returned only when stream is true. 1 indicates synthesis in progress, and 2 indicates synthesis ended.

MiniMax Speech-2.6-hd Asynchronous Speech Synthesis

MiniMax Speech-2.6-turbo Async Speech Synthesis

API Basics

Large Language Models

Images

Video

Audio

MiniMax Speech-2.6-turbo Synchronous Speech Synthesis

Request Headers

Request Body

Response Information

​Request Headers

​Request Body

​Response Information

Request Headers

Request Body

Response Information