ElevenLabs Speech-to-Text V1

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v1 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v1', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

POST

elevenlabs-scribe-v1

ElevenLabs Speech-to-Text V1

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v1 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v1', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

Transcreve arquivos de áudio ou vídeo. Quando use_multi_channel for true e o áudio enviado tiver vários canais, retorna um objeto ‘transcripts’, com uma transcrição por canal. Caso contrário, retorna um único resultado de transcrição.

Cabeçalhos da solicitação

string

obrigatório

Valores enumerados: application/json

string

obrigatório

Formato de autenticação Bearer: Bearer {{API Key}}.

Corpo da solicitação

integer

Se especificado, o sistema fará o possível para amostrar de forma determinística; solicitações com o mesmo seed e os mesmos parâmetros devem retornar o mesmo resultado, mas a determinismo absoluto não é garantido. Deve ser um inteiro entre 0 e 2147483647.Intervalo de valores: [0, 2147483647]

boolean

padrão:false

Indica se deve rotular o falante atual no arquivo enviado.

string

padrão:"other"

Formato do áudio de entrada. Pode ser ‘pcm_s16le_16’ ou ‘other’. pcm_s16le_16 exige que o áudio esteja em taxa de amostragem de 16 kHz, inteiro de 16 bits, mono e little-endian, com menor latência em comparação a formas de onda codificadas.Valores possíveis: pcm_s16le_16, other

number

Controla a aleatoriedade da saída da transcrição. O intervalo é de 0.0 a 2.0; valores mais altos tornam os resultados mais variados e menos determinísticos. Se omitido, será usada a temperatura padrão do modelo selecionado (geralmente 0).Intervalo de valores: [0, 2]

integer

Número máximo de falantes no arquivo enviado. Pode ser usado para auxiliar na diferenciação dos falantes, com suporte a até 32 falantes.Intervalo de valores: [1, 32]

string

Especifica o código de idioma ISO-639-1 ou ISO-639-3 do arquivo de áudio. Informar antecipadamente às vezes pode melhorar o desempenho da transcrição. O padrão é null, e o idioma será detectado automaticamente.

boolean

padrão:true

Indica se eventos de áudio, como (laughter) e (footsteps), devem ser marcados na transcrição.

string

obrigatório

Link HTTPS do arquivo a ser transcrito. file e cloud_storage_url são mutuamente exclusivos; um dos dois deve ser fornecido. O arquivo deve ser acessível via HTTPS e ter menos de 2 GB. Qualquer endereço HTTPS válido é compatível, incluindo armazenamento em nuvem (AWS S3, GCS, Cloudflare R2 etc.), CDN ou outras origens HTTPS, com suporte a links pré-assinados com token ou autenticação por parâmetros de consulta na URL.

boolean

padrão:false

Indica se o arquivo de áudio é multicanal e se cada canal contém apenas um único falante. Quando ativado, cada canal será transcrito de forma independente e os resultados serão combinados; cada palavra no conteúdo de saída incluirá o campo channel_index. Suporta até 5 canais.

number

Limiar de diarização. Com valores maiores, é menor a probabilidade de uma pessoa ser dividida em várias, mas maior a probabilidade de pessoas diferentes serem mescladas em uma só (menos falantes identificados); com valores menores, aumenta a probabilidade de uma pessoa ser dividida em várias, mas diminui a probabilidade de pessoas diferentes serem mescladas em uma só (mais falantes). Só pode ser definido quando diarize=True e num_speakers=None. O padrão é None, e o limiar será escolhido com base no id do modelo (geralmente 0.22).Intervalo de valores: [0.1, 0.4]

string

padrão:"word"

Granularidade dos timestamps no conteúdo da transcrição. ‘word’ fornece timestamps em nível de palavra, enquanto ‘character’ fornece timestamps para cada caractere.Valores possíveis: none, word, character

Informações da resposta

A resposta pode ser um dos seguintes tipos:

Tipo de resposta 1

string

obrigatório

Texto bruto transcrito.

object[]

obrigatório

Lista de palavras e suas informações de tempo.

Ocultar properties

number

Horário de término desta palavra ou som no áudio (em segundos).

string

obrigatório

Conteúdo da palavra ou som transcrito.

string

obrigatório

Tipo desta palavra ou som. ‘audio_event’ é usado para sons que não são palavras, como risadas ou passos.Valores possíveis: word, spacing, audio_event

number

Horário de início desta palavra ou som no áudio (em segundos).

number

obrigatório

Logaritmo da probabilidade ao prever esta palavra. O intervalo de logprob é [-infinity, 0]; quanto maior o valor, maior a confiança do modelo na previsão.

object[]

Caracteres que compõem a palavra e suas respectivas informações de tempo.

Ocultar properties

number

Horário de término do caractere no áudio (em segundos).

string

obrigatório

Conteúdo do caractere transcrito.

number

Horário de início do caractere no áudio (em segundos).

string

Identificador único do falante correspondente a esta palavra.

integer

Índice do canal correspondente a esta transcrição (válido para áudio multicanal).

string

obrigatório

Código do idioma detectado (por exemplo, ‘eng’ para inglês).

string

ID único da transcrição desta resposta.

number

obrigatório

Confiança da detecção de idioma (entre 0 e 1).

Tipo de resposta 2

object[]

obrigatório

Lista de transcrições correspondente a cada canal de áudio. Cada transcrição inclui o texto do canal correspondente e detalhes em nível de palavra.

Ocultar properties

string

obrigatório

Texto bruto transcrito.

object[]

obrigatório

Lista de palavras e suas informações de tempo.

Ocultar properties

number

Horário de término desta palavra ou som no áudio (em segundos).

string

obrigatório

Conteúdo da palavra ou som transcrito.

string

obrigatório

Tipo desta palavra ou som. ‘audio_event’ é usado para sons que não são palavras, como risadas ou passos.Valores possíveis: word, spacing, audio_event

number

Horário de início desta palavra ou som no áudio (em segundos).

number

obrigatório

Logaritmo da probabilidade ao prever esta palavra. O intervalo de logprob é [-infinity, 0]; quanto maior o valor, maior a confiança do modelo na previsão.

object[]

Caracteres que compõem a palavra e suas respectivas informações de tempo.

Ocultar properties

number

Horário de término do caractere no áudio (em segundos).

string

obrigatório

Conteúdo do caractere transcrito.

number

Horário de início do caractere no áudio (em segundos).

string

Identificador único do falante correspondente a esta palavra.

integer

Índice do canal correspondente a esta transcrição (válido para áudio multicanal).

string

obrigatório

Código do idioma detectado (por exemplo, ‘eng’ para inglês).

string

ID único da transcrição desta resposta.

number

obrigatório

Confiança da detecção de idioma (entre 0 e 1).

string

ID único da transcrição desta resposta.

Clonagem rápida de áudio MiniMax

ElevenLabs Conversão de fala em texto V2

Noções básicas da API

Modelos de linguagem

Imagens

Vídeo

Áudio

ElevenLabs Speech-to-Text V1

Cabeçalhos da solicitação

Corpo da solicitação

Informações da resposta

​Cabeçalhos da solicitação

​Corpo da solicitação

​Informações da resposta

Cabeçalhos da solicitação

Corpo da solicitação

Informações da resposta