ElevenLabs 语音转文本 V2

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v2 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v2', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

POST

elevenlabs-scribe-v2

ElevenLabs 语音转文本 V2

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v2 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v2', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

转录音频或视频文件。当 use_multi_channel 为 true 且上传的音频有多个声道时，返回 ‘transcripts’ 对象，每个声道一个转录。否则返回单一转录结果。

请求头

Content-Type

string

必填

枚举值: application/json

Authorization

string

必填

Bearer 身份验证格式: Bearer {{API 密钥}}。

请求体

seed

integer

如指定，系统会尽力按确定性方式采样，相同 seed 和参数的请求应返回相同结果，但不保证绝对确定性。必须为 0 到 2147483647 之间的整数。取值范围：[0, 2147483647]

diarize

boolean

默认值:false

是否标注上传文件中当前说话者。

file_format

string

默认值:"other"

输入音频格式。可选 ‘pcm_s16le_16’ 或 ‘other’。pcm_s16le_16 要求音频为 16kHz 采样率、16 位整型、单声道、小端格式，相较于编码波形延迟较低。可选值：pcm_s16le_16, other

temperature

number

控制转录输出的随机性。取值范围 0.0 ～ 2.0，值越高结果越多样且越不确定。如省略，将使用所选模型的默认温度（通常为0）。取值范围：[0, 2]

num_speakers

integer

上传文件中讲话者的最大数量。可用于辅助区分说话人，最多支持 32 名讲话者。取值范围：[1, 32]

language_code

string

指定音频文件的 ISO-639-1 或 ISO-639-3 语言代码。提前指出有时可提升转录表现。默认 null，将自动识别语言。

tag_audio_events

boolean

默认值:true

是否在转录中标记如（laughter）（footsteps）等音频事件。

cloud_storage_url

string

必填

待转录文件的 HTTPS 链接。file 和 cloud_storage_url 必须二选一。文件须可通过 HTTPS 访问且小于 2GB，支持任何合法 HTTPS 地址，包括云存储（AWS S3、GCS、Cloudflare R2 等）、CDN 或其他 HTTPS 来源，支持带 token 的预签名链接或 URL 查询参数鉴权。

use_multi_channel

boolean

默认值:false

音频文件是否为多声道，且每个声道仅包含单一讲话人。启用后将独立转录每个声道并合成结果，输出内容的每个单词包含 channel_index 字段，最多支持 5 个声道。

diarization_threshold

number

说话人分离（diarization）阈值。值大时，一个人被分为多人的概率低，但不同人被合并为一人的概率高（识别出的讲话人较少）；值小时，一个人被分成多人的概率提高，但不同人合并为一人的概率降低（讲话人数更多）。仅当 diarize=True 且 num_speakers=None 时可设。默认 None，会根据模型 id 选择阈值（通常 0.22）。取值范围：[0.1, 0.4]

timestamps_granularity

string

默认值:"word"

转录内容中时间戳的粒度。‘word’ 提供单词级时间戳，‘character’ 提供每个字符的时间戳。可选值：none, word, character

响应信息

响应可能为以下响应类型之一：

响应类型 1

text

string

必填

转录的原始文本。

words

object[]

必填

单词及其时间信息列表。

隐藏 properties

end

number

该单词或声音在音频中的结束时间（秒）。

text

string

必填

已转录的单词或声音内容。

type

string

必填

此单词或声音的类型。‘audio_event’ 用于非单词声音，如笑声或脚步声等。可选值：word, spacing, audio_event

start

number

该单词或声音在音频中的起始时间（秒）。

logprob

number

必填

预测该单词时的概率对数。logprob 范围为 [-infinity, 0]，值越高表示模型预测越有信心。

characters

object[]

构成单词的字符及其对应的时间信息。

隐藏 properties

end

number

字符在音频中的结束时间（秒）。

text

string

必填

已转录的字符内容。

start

number

字符在音频中的起始时间（秒）。

speaker_id

string

该单词对应说话人的唯一标识。

channel_index

integer

该条转录对应的声道索引（多声道音频时有效）。

language_code

string

必填

检测到的语言代码（例如 ‘eng’ 表示英语）。

transcription_id

string

该响应的转录唯一 ID。

language_probability

number

必填

语言检测的置信度（0 到 1 之间）。

响应类型 2

transcripts

object[]

必填

每个音频声道对应的转录列表。每条转录包含所属声道的文本及单词级别详细信息。

隐藏 properties

text

string

必填

转录的原始文本。

words

object[]

必填

单词及其时间信息列表。

隐藏 properties

end

number

该单词或声音在音频中的结束时间（秒）。

text

string

必填

已转录的单词或声音内容。

type

string

必填

此单词或声音的类型。‘audio_event’ 用于非单词声音，如笑声或脚步声等。可选值：word, spacing, audio_event

start

number

该单词或声音在音频中的起始时间（秒）。

logprob

number

必填

预测该单词时的概率对数。logprob 范围为 [-infinity, 0]，值越高表示模型预测越有信心。

characters

object[]

构成单词的字符及其对应的时间信息。

隐藏 properties

end

number

字符在音频中的结束时间（秒）。

text

string

必填

已转录的字符内容。

start

number

字符在音频中的起始时间（秒）。

speaker_id

string

该单词对应说话人的唯一标识。

channel_index

integer

该条转录对应的声道索引（多声道音频时有效）。

language_code

string

必填

检测到的语言代码（例如 ‘eng’ 表示英语）。

transcription_id

string

该响应的转录唯一 ID。

language_probability

number

必填

语言检测的置信度（0 到 1 之间）。

transcription_id

string

该响应的转录唯一 ID。

ElevenLabs 语音转文本 V1

ElevenLabs 文字转语音 Flash V2

API 基础

大语言

图像

视频

音频

ElevenLabs 语音转文本 V2

请求头

请求体

响应信息

​请求头

​请求体

​响应信息

请求头

请求体

响应信息