ElevenLabs 音声テキスト変換 V2

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v2 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v2', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

POST

elevenlabs-scribe-v2

ElevenLabs 音声テキスト変換 V2

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v2 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v2', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v2"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v2")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

音声または動画ファイルを文字起こしします。use_multi_channel が true で、アップロードされた音声に複数のチャンネルがある場合、各チャンネルにつき 1 つの文字起こしを含む ‘transcripts’ オブジェクトを返します。それ以外の場合は、単一の文字起こし結果を返します。

リクエストヘッダー

string

必須

列挙値: application/json

string

必須

Bearer 認証形式: Bearer {{API Key}}。

リクエストボディ

integer

指定した場合、システムは可能な限り決定論的にサンプリングします。同じ seed とパラメータのリクエストは同じ結果を返すはずですが、完全な決定性は保証されません。0 から 2147483647 までの整数である必要があります。値の範囲：[0, 2147483647]

boolean

デフォルト:false

アップロードファイル内の現在の話者をラベル付けするかどうか。

string

デフォルト:"other"

入力音声形式。‘pcm_s16le_16’ または ‘other’ を選択できます。pcm_s16le_16 は、音声が 16kHz サンプリングレート、16 ビット整数、モノラル、リトルエンディアン形式であることを要求し、エンコードされた波形と比べて遅延が低くなります。選択可能な値：pcm_s16le_16, other

number

文字起こし出力のランダム性を制御します。値の範囲は 0.0 ～ 2.0 で、値が高いほど結果は多様になり、不確実性も高くなります。省略した場合、選択したモデルのデフォルト温度（通常は0）が使用されます。値の範囲：[0, 2]

integer

アップロードファイル内の話者の最大数。話者の区別を補助するために使用でき、最大 32 人の話者をサポートします。値の範囲：[1, 32]

string

音声ファイルの ISO-639-1 または ISO-639-3 言語コードを指定します。事前に指定すると、文字起こし性能が向上する場合があります。デフォルトは null で、言語は自動検出されます。

boolean

デフォルト:true

文字起こし内で（laughter）（footsteps）などの音声イベントをタグ付けするかどうか。

string

必須

文字起こし対象ファイルの HTTPS リンク。file と cloud_storage_url のいずれか一方を指定する必要があります。ファイルは HTTPS 経由でアクセス可能かつ 2GB 未満である必要があり、クラウドストレージ（AWS S3、GCS、Cloudflare R2 など）、CDN、その他の HTTPS ソースを含む任意の有効な HTTPS アドレスをサポートします。token 付きの事前署名済みリンクや URL クエリパラメータによる認証もサポートします。

boolean

デフォルト:false

音声ファイルがマルチチャンネルであり、各チャンネルに単一の話者のみが含まれるかどうか。有効にすると、各チャンネルを個別に文字起こしして結果を合成し、出力内容の各単語には channel_index フィールドが含まれます。最大 5 チャンネルをサポートします。

number

話者分離（diarization）のしきい値。値が大きいほど、1 人が複数人として分割される確率は低くなりますが、異なる人が 1 人として結合される確率は高くなります（識別される話者数は少なくなります）。値が小さいほど、1 人が複数人として分割される確率は高くなりますが、異なる人が 1 人として結合される確率は低くなります（話者数は多くなります）。diarize=True かつ num_speakers=None の場合にのみ設定できます。デフォルトは None で、モデル id に基づいてしきい値が選択されます（通常は 0.22）。値の範囲：[0.1, 0.4]

string

デフォルト:"word"

文字起こし内容におけるタイムスタンプの粒度。‘word’ は単語レベルのタイムスタンプを提供し、‘character’ は各文字のタイムスタンプを提供します。選択可能な値：none, word, character

レスポンス情報

レスポンスは以下のレスポンスタイプのいずれかになる場合があります：

レスポンスタイプ 1

string

必須

文字起こしされた元のテキスト。

object[]

必須

単語とその時間情報のリスト。

非表示 properties

number

この単語または音の音声内での終了時間（秒）。

string

必須

文字起こしされた単語または音の内容。

string

必須

この単語または音のタイプ。‘audio_event’ は、笑い声や足音などの単語ではない音に使用されます。選択可能な値：word, spacing, audio_event

number

この単語または音の音声内での開始時間（秒）。

number

必須

この単語を予測した際の対数確率。logprob の範囲は [-infinity, 0] で、値が高いほどモデルの予測の信頼度が高いことを示します。

object[]

単語を構成する文字と、それに対応する時間情報。

非表示 properties

number

文字の音声内での終了時間（秒）。

string

必須

文字起こしされた文字の内容。

number

文字の音声内での開始時間（秒）。

string

この単語に対応する話者の一意の識別子。

integer

この文字起こしに対応するチャンネルインデックス（マルチチャンネル音声の場合に有効）。

string

必須

検出された言語コード（例：‘eng’ は英語を表します）。

string

このレスポンスの文字起こしの一意の ID。

number

必須

言語検出の信頼度（0 から 1 の間）。

レスポンスタイプ 2

object[]

必須

各音声チャンネルに対応する文字起こしのリスト。各文字起こしには、対応するチャンネルのテキストと単語レベルの詳細情報が含まれます。

非表示 properties

string

必須

文字起こしされた元のテキスト。

object[]

必須

単語とその時間情報のリスト。

非表示 properties

number

この単語または音の音声内での終了時間（秒）。

string

必須

文字起こしされた単語または音の内容。

string

必須

この単語または音のタイプ。‘audio_event’ は、笑い声や足音などの単語ではない音に使用されます。選択可能な値：word, spacing, audio_event

number

この単語または音の音声内での開始時間（秒）。

number

必須

この単語を予測した際の対数確率。logprob の範囲は [-infinity, 0] で、値が高いほどモデルの予測の信頼度が高いことを示します。

object[]

単語を構成する文字と、それに対応する時間情報。

非表示 properties

number

文字の音声内での終了時間（秒）。

string

必須

文字起こしされた文字の内容。

number

文字の音声内での開始時間（秒）。

string

この単語に対応する話者の一意の識別子。

integer

この文字起こしに対応するチャンネルインデックス（マルチチャンネル音声の場合に有効）。

string

必須

検出された言語コード（例：‘eng’ は英語を表します）。

string

このレスポンスの文字起こしの一意の ID。

number

必須

言語検出の信頼度（0 から 1 の間）。

string

このレスポンスの文字起こしの一意の ID。

ElevenLabs 音声文字起こし V1

ElevenLabs テキスト読み上げ Flash V2

API の基本

大規模言語モデル

画像

動画

音声

ElevenLabs 音声テキスト変換 V2

リクエストヘッダー

リクエストボディ

レスポンス情報

​リクエストヘッダー

​リクエストボディ

​レスポンス情報

リクエストヘッダー

リクエストボディ

レスポンス情報