Síntesis de voz síncrona MiniMax Speech 2.8 Turbo

curl --request POST \
  --url https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "stream": true,
  "voice_modify": {
    "pitch": 123,
    "timbre": 123,
    "intensity": 123,
    "sound_effects": "<string>"
  },
  "audio_setting": {
    "format": "<string>",
    "bitrate": 123,
    "channel": 123,
    "force_cbr": true,
    "sample_rate": 123
  },
  "output_format": "<string>",
  "voice_setting": {
    "vol": 123,
    "pitch": 123,
    "speed": 123,
    "emotion": "<string>",
    "voice_id": "<string>",
    "latex_read": true,
    "text_normalization": true
  },
  "aigc_watermark": true,
  "language_boost": "<string>",
  "stream_options": {
    "exclude_aggregated_audio": true
  },
  "timber_weights": [
    {
      "weight": 123,
      "voice_id": "<string>"
    }
  ],
  "subtitle_enable": true,
  "continuous_sound": true,
  "pronunciation_dict": {
    "tone": [
      "<string>"
    ]
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo"

payload = {
    "text": "<string>",
    "stream": True,
    "voice_modify": {
        "pitch": 123,
        "timbre": 123,
        "intensity": 123,
        "sound_effects": "<string>"
    },
    "audio_setting": {
        "format": "<string>",
        "bitrate": 123,
        "channel": 123,
        "force_cbr": True,
        "sample_rate": 123
    },
    "output_format": "<string>",
    "voice_setting": {
        "vol": 123,
        "pitch": 123,
        "speed": 123,
        "emotion": "<string>",
        "voice_id": "<string>",
        "latex_read": True,
        "text_normalization": True
    },
    "aigc_watermark": True,
    "language_boost": "<string>",
    "stream_options": { "exclude_aggregated_audio": True },
    "timber_weights": [
        {
            "weight": 123,
            "voice_id": "<string>"
        }
    ],
    "subtitle_enable": True,
    "continuous_sound": True,
    "pronunciation_dict": { "tone": ["<string>"] }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    stream: true,
    voice_modify: {pitch: 123, timbre: 123, intensity: 123, sound_effects: '<string>'},
    audio_setting: {
      format: '<string>',
      bitrate: 123,
      channel: 123,
      force_cbr: true,
      sample_rate: 123
    },
    output_format: '<string>',
    voice_setting: {
      vol: 123,
      pitch: 123,
      speed: 123,
      emotion: '<string>',
      voice_id: '<string>',
      latex_read: true,
      text_normalization: true
    },
    aigc_watermark: true,
    language_boost: '<string>',
    stream_options: {exclude_aggregated_audio: true},
    timber_weights: [{weight: 123, voice_id: '<string>'}],
    subtitle_enable: true,
    continuous_sound: true,
    pronunciation_dict: {tone: ['<string>']}
  })
};

fetch('https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'stream' => true,
    'voice_modify' => [
        'pitch' => 123,
        'timbre' => 123,
        'intensity' => 123,
        'sound_effects' => '<string>'
    ],
    'audio_setting' => [
        'format' => '<string>',
        'bitrate' => 123,
        'channel' => 123,
        'force_cbr' => true,
        'sample_rate' => 123
    ],
    'output_format' => '<string>',
    'voice_setting' => [
        'vol' => 123,
        'pitch' => 123,
        'speed' => 123,
        'emotion' => '<string>',
        'voice_id' => '<string>',
        'latex_read' => true,
        'text_normalization' => true
    ],
    'aigc_watermark' => true,
    'language_boost' => '<string>',
    'stream_options' => [
        'exclude_aggregated_audio' => true
    ],
    'timber_weights' => [
        [
                'weight' => 123,
                'voice_id' => '<string>'
        ]
    ],
    'subtitle_enable' => true,
    'continuous_sound' => true,
    'pronunciation_dict' => [
        'tone' => [
                '<string>'
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"continuous_sound\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"continuous_sound\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"continuous_sound\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "data": {},
  "trace_id": "<string>",
  "base_resp": {},
  "extra_info": {}
}

POST

minimax-speech-2.8-turbo

Síntesis de voz síncrona MiniMax Speech 2.8 Turbo

curl --request POST \
  --url https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "text": "<string>",
  "stream": true,
  "voice_modify": {
    "pitch": 123,
    "timbre": 123,
    "intensity": 123,
    "sound_effects": "<string>"
  },
  "audio_setting": {
    "format": "<string>",
    "bitrate": 123,
    "channel": 123,
    "force_cbr": true,
    "sample_rate": 123
  },
  "output_format": "<string>",
  "voice_setting": {
    "vol": 123,
    "pitch": 123,
    "speed": 123,
    "emotion": "<string>",
    "voice_id": "<string>",
    "latex_read": true,
    "text_normalization": true
  },
  "aigc_watermark": true,
  "language_boost": "<string>",
  "stream_options": {
    "exclude_aggregated_audio": true
  },
  "timber_weights": [
    {
      "weight": 123,
      "voice_id": "<string>"
    }
  ],
  "subtitle_enable": true,
  "continuous_sound": true,
  "pronunciation_dict": {
    "tone": [
      "<string>"
    ]
  }
}
'

import requests

url = "https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo"

payload = {
    "text": "<string>",
    "stream": True,
    "voice_modify": {
        "pitch": 123,
        "timbre": 123,
        "intensity": 123,
        "sound_effects": "<string>"
    },
    "audio_setting": {
        "format": "<string>",
        "bitrate": 123,
        "channel": 123,
        "force_cbr": True,
        "sample_rate": 123
    },
    "output_format": "<string>",
    "voice_setting": {
        "vol": 123,
        "pitch": 123,
        "speed": 123,
        "emotion": "<string>",
        "voice_id": "<string>",
        "latex_read": True,
        "text_normalization": True
    },
    "aigc_watermark": True,
    "language_boost": "<string>",
    "stream_options": { "exclude_aggregated_audio": True },
    "timber_weights": [
        {
            "weight": 123,
            "voice_id": "<string>"
        }
    ],
    "subtitle_enable": True,
    "continuous_sound": True,
    "pronunciation_dict": { "tone": ["<string>"] }
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    text: '<string>',
    stream: true,
    voice_modify: {pitch: 123, timbre: 123, intensity: 123, sound_effects: '<string>'},
    audio_setting: {
      format: '<string>',
      bitrate: 123,
      channel: 123,
      force_cbr: true,
      sample_rate: 123
    },
    output_format: '<string>',
    voice_setting: {
      vol: 123,
      pitch: 123,
      speed: 123,
      emotion: '<string>',
      voice_id: '<string>',
      latex_read: true,
      text_normalization: true
    },
    aigc_watermark: true,
    language_boost: '<string>',
    stream_options: {exclude_aggregated_audio: true},
    timber_weights: [{weight: 123, voice_id: '<string>'}],
    subtitle_enable: true,
    continuous_sound: true,
    pronunciation_dict: {tone: ['<string>']}
  })
};

fetch('https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'text' => '<string>',
    'stream' => true,
    'voice_modify' => [
        'pitch' => 123,
        'timbre' => 123,
        'intensity' => 123,
        'sound_effects' => '<string>'
    ],
    'audio_setting' => [
        'format' => '<string>',
        'bitrate' => 123,
        'channel' => 123,
        'force_cbr' => true,
        'sample_rate' => 123
    ],
    'output_format' => '<string>',
    'voice_setting' => [
        'vol' => 123,
        'pitch' => 123,
        'speed' => 123,
        'emotion' => '<string>',
        'voice_id' => '<string>',
        'latex_read' => true,
        'text_normalization' => true
    ],
    'aigc_watermark' => true,
    'language_boost' => '<string>',
    'stream_options' => [
        'exclude_aggregated_audio' => true
    ],
    'timber_weights' => [
        [
                'weight' => 123,
                'voice_id' => '<string>'
        ]
    ],
    'subtitle_enable' => true,
    'continuous_sound' => true,
    'pronunciation_dict' => [
        'tone' => [
                '<string>'
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo"

	payload := strings.NewReader("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"continuous_sound\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"continuous_sound\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/minimax-speech-2.8-turbo")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"text\": \"<string>\",\n  \"stream\": true,\n  \"voice_modify\": {\n    \"pitch\": 123,\n    \"timbre\": 123,\n    \"intensity\": 123,\n    \"sound_effects\": \"<string>\"\n  },\n  \"audio_setting\": {\n    \"format\": \"<string>\",\n    \"bitrate\": 123,\n    \"channel\": 123,\n    \"force_cbr\": true,\n    \"sample_rate\": 123\n  },\n  \"output_format\": \"<string>\",\n  \"voice_setting\": {\n    \"vol\": 123,\n    \"pitch\": 123,\n    \"speed\": 123,\n    \"emotion\": \"<string>\",\n    \"voice_id\": \"<string>\",\n    \"latex_read\": true,\n    \"text_normalization\": true\n  },\n  \"aigc_watermark\": true,\n  \"language_boost\": \"<string>\",\n  \"stream_options\": {\n    \"exclude_aggregated_audio\": true\n  },\n  \"timber_weights\": [\n    {\n      \"weight\": 123,\n      \"voice_id\": \"<string>\"\n    }\n  ],\n  \"subtitle_enable\": true,\n  \"continuous_sound\": true,\n  \"pronunciation_dict\": {\n    \"tone\": [\n      \"<string>\"\n    ]\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "data": {},
  "trace_id": "<string>",
  "base_resp": {},
  "extra_info": {}
}

Convierte texto en voz, con soporte para múltiples voces, control de emociones, ajuste de velocidad de habla y otras funciones. La longitud del texto debe ser inferior a 10000 caracteres. Si la longitud del texto supera los 3000 caracteres, se recomienda usar salida en streaming.

Encabezados de solicitud

string

requerido

Valor enum: application/json

string

requerido

Formato de autenticación Bearer: Bearer {{API Key}}.

Cuerpo de la solicitud

string

requerido

Texto que se debe sintetizar en voz. La longitud debe ser inferior a 10000 caracteres. Si la longitud del texto supera los 3000 caracteres, se recomienda usar salida en streaming. Admite cambios de párrafo (saltos de línea), control de pausas (marcador <#x#>) y etiquetas de muletillas/expresiones vocales (como (laughs), (coughs), etc.; solo compatibles con speech-2.8-hd/turbo)

boolean

predeterminado:false

Controla si se usa salida en streaming. El valor predeterminado es false, es decir, el streaming no está habilitado

object

Ocultar propiedades

integer

Ajuste de tono (grave/brillante), rango [-100, 100]. Cuanto más cerca esté el valor de -100, más grave será la voz; cuanto más cerca esté de 100, más brillante seráRango de valores: [-100, 100]

integer

Ajuste de timbre (magnético/cristalino), rango [-100, 100]. Cuanto más cerca esté el valor de -100, más densa será la voz; cuanto más cerca esté de 100, más cristalina seráRango de valores: [-100, 100]

integer

Ajuste de intensidad (potente/suave), rango [-100, 100]. Cuanto más cerca esté el valor de -100, más enérgica será la voz; cuanto más cerca esté de 100, más suave seráRango de valores: [-100, 100]

string

Configuración de efectos de sonido. Solo se puede seleccionar uno por vez. Valores disponibles: spacious_echo (eco en espacio abierto), auditorium_echo (megafonía de auditorio), lofi_telephone (distorsión telefónica), robotic (sonido electrónico)Valores permitidos: spacious_echo, auditorium_echo, lofi_telephone, robotic

object

Ocultar propiedades

string

predeterminado:"mp3"

Formato del audio generado. wav solo es compatible con salida no streamingValores permitidos: mp3, pcm, flac, wav

integer

predeterminado:128000

Tasa de bits del audio generado. Rango disponible [32000, 64000, 128000, 256000], valor predeterminado 128000. Este parámetro solo tiene efecto para audio en formato mp3Valores permitidos: 32000, 64000, 128000, 256000

integer

predeterminado:1

Número de canales del audio generado. Rango disponible: [1, 2], donde 1 es mono y 2 es estéreo. El valor predeterminado es 1Valores permitidos: 1, 2

boolean

predeterminado:false

Control de tasa de bits constante (cbr) para el audio. Valores disponibles: false, true. Cuando este parámetro se establece en true, el audio se codifica con tasa de bits constante. Nota: este parámetro solo tiene efecto cuando el audio está configurado como salida en streaming y el formato de audio es mp3

integer

predeterminado:32000

Frecuencia de muestreo del audio generado. Rango disponible [8000, 16000, 22050, 24000, 32000, 44100], valor predeterminado 32000Valores permitidos: 8000, 16000, 22050, 24000, 32000, 44100

string

predeterminado:"hex"

Parámetro que controla el formato del resultado de salida. Los valores disponibles son url y hex; el valor predeterminado es hex. Este parámetro solo tiene efecto en escenarios no streaming; en escenarios de streaming solo se admite devolver el formato hex. La url devuelta tiene una validez de 24 horasValores permitidos: url, hex

object

Ocultar propiedades

number

predeterminado:1

Volumen del audio sintetizado. Cuanto mayor sea el valor, mayor será el volumen. Rango de valores (0, 10], valor predeterminado 1.0Rango de valores: [0, 10]

integer

predeterminado:0

Entonación del audio sintetizado. Rango de valores [-12, 12], valor predeterminado 0, donde 0 corresponde a la salida con la voz originalRango de valores: [-12, 12]

number

predeterminado:1

Velocidad de habla del audio sintetizado. Cuanto mayor sea el valor, más rápida será la velocidad. Rango de valores [0.5, 2], valor predeterminado 1.0Rango de valores: [0.5, 2]

string

Controla la emoción de la voz sintetizada. El rango de parámetros corresponde a 8 emociones: alegría (happy), tristeza (sad), ira (angry), miedo (fearful), asco (disgusted), sorpresa (surprised), neutralidad (calm), expresividad (fluent), susurro (whisper). El modelo suele emparejar automáticamente la emoción adecuada según el texto de entrada, por lo que normalmente no es necesario especificarla manualmenteValores permitidos: happy, sad, angry, fearful, disgusted, surprised, calm, fluent, whisper

string

requerido

Identificador de la voz para el audio sintetizado. Si necesita configurar una voz mixta, establezca el parámetro timber_weights y deje este parámetro en blanco. Admite tres tipos: voces del sistema, voces clonadas y voces generadas a partir de texto

boolean

predeterminado:false

Controla si se leen fórmulas latex. El valor predeterminado es false. Solo es compatible con chino; al habilitar este parámetro, el parámetro language_boost se establecerá en Chinese

boolean

predeterminado:false

Indica si se habilita la normalización de texto en chino e inglés. Al habilitarla, puede mejorar el rendimiento en escenarios de lectura de números, pero aumentará ligeramente la latencia. El valor predeterminado es false

boolean

predeterminado:false

Controla si se agrega un identificador de ritmo de audio al final del audio sintetizado. El valor predeterminado es false. Este parámetro solo tiene efecto para síntesis no streaming

string

Indica si se mejora la capacidad de reconocimiento para el idioma minoritario o dialecto especificado. El valor predeterminado es null; puede establecerse en auto para que el modelo lo determine automáticamenteValores permitidos: Chinese, Chinese,Yue, English, Arabic, Russian, Spanish, French, Portuguese, German, Turkish, Dutch, Ukrainian, Vietnamese, Indonesian, Japanese, Italian, Korean, Thai, Polish, Romanian, Greek, Czech, Finnish, Hindi, Bulgarian, Danish, Hebrew, Malay, Persian, Slovak, Swedish, Croatian, Filipino, Hungarian, Norwegian, Slovenian, Catalan, Nynorsk, Tamil, Afrikaans, auto

object

Ocultar propiedades

boolean

predeterminado:false

Configura si el último chunk contiene los datos hex de la voz concatenada. El valor predeterminado es false, es decir, el último chunk contiene los datos hex completos de la voz concatenada

object[]

Configuración de voz mixta. Admite la mezcla de hasta 4 voces

Ocultar propiedades

integer

requerido

Peso correspondiente a cada voz en el audio sintetizado; debe completarse junto con voice_id. El rango de valores disponible es [1, 100]. Admite la mezcla de hasta 4 voces. Cuanto mayor sea el peso de una sola voz, mayor será la similitud de la voz sintetizada con esa vozRango de valores: [1, 100]

string

requerido

Identificador de la voz para el audio sintetizado; debe completarse junto con el parámetro weight. Admite tres tipos: voces del sistema, voces clonadas y voces generadas a partir de texto

boolean

predeterminado:false

Controla si se habilita el servicio de subtítulos. El valor predeterminado es false. Este parámetro solo es válido en escenarios de salida no streaming y solo para los modelos speech-2.6-hd, speech-2.6-turbo, speech-01-turbo, speech-01-hd

boolean

predeterminado:false

Habilite este parámetro para que las transiciones entre cláusulas sean más naturales. Solo es compatible con los modelos speech-2.8-hd y speech-2.8-turbo

object

Ocultar propiedades

string[]

Define reglas de sustitución de anotación fonética o pronunciación para texto o símbolos que requieren una marca especial. En textos chinos, los tonos se representan con números: el primer tono es 1, el segundo tono es 2, el tercer tono es 3, el cuarto tono es 4 y el tono neutro es 5. Ejemplo: [“燕少飞/(yan4)(shao3)(fei1)”, “omg/oh my god”]

Información de respuesta

object

Objeto de datos sintetizados devuelto. Puede ser null, por lo que debe realizarse una comprobación de no nulidad

string

id de esta sesión, usado para ayudar a localizar problemas durante consultas o comentarios

object

Código de estado y detalles de esta solicitud

object

Información adicional del audio

Síntesis de voz asíncrona MiniMax Speech 2.8 Turbo

Síntesis de voz asíncrona MiniMax Speech 2.8 HD

Conceptos básicos de API

Modelos de lenguaje

Imágenes

Vídeo

Audio

Síntesis de voz síncrona MiniMax Speech 2.8 Turbo

Encabezados de solicitud

Cuerpo de la solicitud

Información de respuesta

​Encabezados de solicitud

​Cuerpo de la solicitud

​Información de respuesta

Encabezados de solicitud

Cuerpo de la solicitud

Información de respuesta