ElevenLabs Conversão de fala em texto V2

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v2 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v2', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

POST

elevenlabs-scribe-v2

ElevenLabs Conversão de fala em texto V2

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v2 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v2', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

Transcreve arquivos de áudio ou vídeo. Quando use_multi_channel for true e o áudio enviado tiver múltiplos canais, retorna um objeto ‘transcripts’, com uma transcrição por canal. Caso contrário, retorna um único resultado de transcrição.

Cabeçalhos da solicitação

string

obrigatório

Valores enumerados: application/json

string

obrigatório

Formato de autenticação Bearer: Bearer {{API Key}}.

Corpo da solicitação

integer

Se especificado, o sistema fará o possível para amostrar de forma determinística; solicitações com o mesmo seed e os mesmos parâmetros devem retornar o mesmo resultado, mas determinismo absoluto não é garantido. Deve ser um inteiro entre 0 e 2147483647.Intervalo de valores: [0, 2147483647]

boolean

padrão:false

Se deve marcar o falante atual no arquivo enviado.

string

padrão:"other"

Formato do áudio de entrada. Pode ser ‘pcm_s16le_16’ ou ‘other’. pcm_s16le_16 exige que o áudio tenha taxa de amostragem de 16kHz, inteiro de 16 bits, mono, formato little-endian, com menor latência em comparação a formas de onda codificadas.Valores opcionais: pcm_s16le_16, other

number

Controla a aleatoriedade da saída da transcrição. O intervalo de valores é de 0.0 a 2.0; valores mais altos produzem resultados mais diversos e menos determinísticos. Se omitido, será usada a temperatura padrão do modelo selecionado (geralmente 0).Intervalo de valores: [0, 2]

integer

Número máximo de falantes no arquivo enviado. Pode ser usado para auxiliar na diferenciação de falantes; suporta até 32 falantes.Intervalo de valores: [1, 32]

string

Especifica o código de idioma ISO-639-1 ou ISO-639-3 do arquivo de áudio. Indicar antecipadamente às vezes pode melhorar o desempenho da transcrição. O padrão é null, e o idioma será detectado automaticamente.

boolean

padrão:true

Se deve marcar eventos de áudio como (laughter) e (footsteps) na transcrição.

string

obrigatório

Link HTTPS do arquivo a ser transcrito. file e cloud_storage_url são mutuamente exclusivos; um deles deve ser fornecido. O arquivo deve ser acessível via HTTPS e ter menos de 2GB. Qualquer endereço HTTPS válido é aceito, incluindo armazenamento em nuvem (AWS S3, GCS, Cloudflare R2 etc.), CDN ou outras origens HTTPS, com suporte a links pré-assinados com token ou autenticação por parâmetros de consulta de URL.

boolean

padrão:false

Se o arquivo de áudio é multicanal e cada canal contém apenas um único falante. Quando habilitado, cada canal será transcrito independentemente e os resultados serão combinados; cada palavra no conteúdo de saída inclui o campo channel_index. Suporta até 5 canais.

number

Limiar de diarização. Com valores mais altos, a probabilidade de uma pessoa ser dividida em várias é menor, mas a probabilidade de pessoas diferentes serem mescladas em uma só é maior (menos falantes identificados); com valores mais baixos, a probabilidade de uma pessoa ser dividida em várias aumenta, mas a probabilidade de pessoas diferentes serem mescladas em uma só diminui (mais falantes). Só pode ser definido quando diarize=True e num_speakers=None. O padrão é None, e o limiar será escolhido de acordo com o model id (geralmente 0.22).Intervalo de valores: [0.1, 0.4]

string

padrão:"word"

Granularidade dos timestamps no conteúdo transcrito. ‘word’ fornece timestamps em nível de palavra, e ‘character’ fornece timestamps para cada caractere.Valores opcionais: none, word, character

Informações da resposta

A resposta pode ser um dos seguintes tipos de resposta:

Tipo de resposta 1

string

obrigatório

Texto original transcrito.

object[]

obrigatório

Lista de palavras e suas informações de tempo.

Ocultar properties

number

Tempo de término desta palavra ou som no áudio (em segundos).

string

obrigatório

Conteúdo da palavra ou som transcrito.

string

obrigatório

Tipo desta palavra ou som. ‘audio_event’ é usado para sons que não são palavras, como risadas ou passos.Valores opcionais: word, spacing, audio_event

number

Tempo de início desta palavra ou som no áudio (em segundos).

number

obrigatório

Logaritmo da probabilidade ao prever esta palavra. O intervalo de logprob é [-infinity, 0]; valores mais altos indicam que o modelo está mais confiante na previsão.

object[]

Caracteres que compõem a palavra e suas respectivas informações de tempo.

Ocultar properties

number

Tempo de término do caractere no áudio (em segundos).

string

obrigatório

Conteúdo do caractere transcrito.

number

Tempo de início do caractere no áudio (em segundos).

string

Identificador único do falante correspondente a esta palavra.

integer

Índice do canal correspondente a esta transcrição (válido para áudio multicanal).

string

obrigatório

Código de idioma detectado (por exemplo, ‘eng’ indica inglês).

string

ID único de transcrição desta resposta.

number

obrigatório

Confiança da detecção de idioma (entre 0 e 1).

Tipo de resposta 2

object[]

obrigatório

Lista de transcrições correspondentes a cada canal de áudio. Cada transcrição contém o texto do canal correspondente e detalhes em nível de palavra.

Ocultar properties

string

obrigatório

Texto original transcrito.

object[]

obrigatório

Lista de palavras e suas informações de tempo.

Ocultar properties

number

Tempo de término desta palavra ou som no áudio (em segundos).

string

obrigatório

Conteúdo da palavra ou som transcrito.

string

obrigatório

Tipo desta palavra ou som. ‘audio_event’ é usado para sons que não são palavras, como risadas ou passos.Valores opcionais: word, spacing, audio_event

number

Tempo de início desta palavra ou som no áudio (em segundos).

number

obrigatório

Logaritmo da probabilidade ao prever esta palavra. O intervalo de logprob é [-infinity, 0]; valores mais altos indicam que o modelo está mais confiante na previsão.

object[]

Caracteres que compõem a palavra e suas respectivas informações de tempo.

Ocultar properties

number

Tempo de término do caractere no áudio (em segundos).

string

obrigatório

Conteúdo do caractere transcrito.

number

Tempo de início do caractere no áudio (em segundos).

string

Identificador único do falante correspondente a esta palavra.

integer

Índice do canal correspondente a esta transcrição (válido para áudio multicanal).

string

obrigatório

Código de idioma detectado (por exemplo, ‘eng’ indica inglês).

string

ID único de transcrição desta resposta.

number

obrigatório

Confiança da detecção de idioma (entre 0 e 1).

string

ID único de transcrição desta resposta.

ElevenLabs Speech-to-Text V1

ElevenLabs Texto para fala Flash V2

Noções básicas da API

Modelos de linguagem

Imagens

Vídeo

Áudio

ElevenLabs Conversão de fala em texto V2

Cabeçalhos da solicitação

Corpo da solicitação

Informações da resposta

​Cabeçalhos da solicitação

​Corpo da solicitação

​Informações da resposta

Cabeçalhos da solicitação

Corpo da solicitação

Informações da resposta