ElevenLabs 音声文字起こし V1

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v1 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v1', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

POST

elevenlabs-scribe-v1

ElevenLabs 音声文字起こし V1

curl --request POST \
  --url https://api.highwayapi.ai/v3/elevenlabs-scribe-v1 \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: <content-type>' \
  --data '
{
  "seed": 123,
  "diarize": true,
  "file_format": "<string>",
  "temperature": 123,
  "num_speakers": 123,
  "language_code": "<string>",
  "tag_audio_events": true,
  "cloud_storage_url": "<string>",
  "use_multi_channel": true,
  "diarization_threshold": 123,
  "timestamps_granularity": "<string>"
}
'

import requests

url = "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

payload = {
    "seed": 123,
    "diarize": True,
    "file_format": "<string>",
    "temperature": 123,
    "num_speakers": 123,
    "language_code": "<string>",
    "tag_audio_events": True,
    "cloud_storage_url": "<string>",
    "use_multi_channel": True,
    "diarization_threshold": 123,
    "timestamps_granularity": "<string>"
}
headers = {
    "Content-Type": "<content-type>",
    "Authorization": "<authorization>"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'Content-Type': '<content-type>', Authorization: '<authorization>'},
  body: JSON.stringify({
    seed: 123,
    diarize: true,
    file_format: '<string>',
    temperature: 123,
    num_speakers: 123,
    language_code: '<string>',
    tag_audio_events: true,
    cloud_storage_url: '<string>',
    use_multi_channel: true,
    diarization_threshold: 123,
    timestamps_granularity: '<string>'
  })
};

fetch('https://api.highwayapi.ai/v3/elevenlabs-scribe-v1', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'seed' => 123,
    'diarize' => true,
    'file_format' => '<string>',
    'temperature' => 123,
    'num_speakers' => 123,
    'language_code' => '<string>',
    'tag_audio_events' => true,
    'cloud_storage_url' => '<string>',
    'use_multi_channel' => true,
    'diarization_threshold' => 123,
    'timestamps_granularity' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: <content-type>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.highwayapi.ai/v3/elevenlabs-scribe-v1"

	payload := strings.NewReader("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "<content-type>")
	req.Header.Add("Authorization", "<authorization>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")
  .header("Content-Type", "<content-type>")
  .header("Authorization", "<authorization>")
  .body("{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.highwayapi.ai/v3/elevenlabs-scribe-v1")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = '<content-type>'
request["Authorization"] = '<authorization>'
request.body = "{\n  \"seed\": 123,\n  \"diarize\": true,\n  \"file_format\": \"<string>\",\n  \"temperature\": 123,\n  \"num_speakers\": 123,\n  \"language_code\": \"<string>\",\n  \"tag_audio_events\": true,\n  \"cloud_storage_url\": \"<string>\",\n  \"use_multi_channel\": true,\n  \"diarization_threshold\": 123,\n  \"timestamps_granularity\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

音声または動画ファイルを文字起こしします。use_multi_channel が true で、アップロードされた音声に複数のチャンネルがある場合、‘transcripts’ オブジェクトを返し、各チャンネルにつき 1 つの文字起こしが含まれます。それ以外の場合は単一の文字起こし結果を返します。

リクエストヘッダー

string

必須

列挙値: application/json

string

必須

Bearer 認証形式: Bearer {{API Key}}。

リクエストボディ

integer

指定した場合、システムは可能な限り決定論的にサンプリングします。同じ seed とパラメータのリクエストは同じ結果を返すはずですが、完全な決定性は保証されません。0 から 2147483647 までの整数である必要があります。値の範囲：[0, 2147483647]

boolean

デフォルト:false

アップロードファイル内の現在の話者を注釈するかどうか。

string

デフォルト:"other"

入力音声形式。‘pcm_s16le_16’ または ‘other’ を選択できます。pcm_s16le_16 では、音声が 16kHz サンプリングレート、16 ビット整数、モノラル、リトルエンディアン形式である必要があり、エンコードされた波形と比べて低遅延です。使用可能な値：pcm_s16le_16, other

number

文字起こし出力のランダム性を制御します。値の範囲は 0.0 ～ 2.0 で、値が高いほど結果は多様になり、不確実性も高くなります。省略した場合、選択したモデルのデフォルト温度（通常は 0）が使用されます。値の範囲：[0, 2]

integer

アップロードファイル内の話者の最大数。話者の区別を補助するために使用でき、最大 32 名の話者までサポートします。値の範囲：[1, 32]

string

音声ファイルの ISO-639-1 または ISO-639-3 言語コードを指定します。事前に指定すると、文字起こし性能が向上する場合があります。デフォルトは null で、言語は自動検出されます。

boolean

デフォルト:true

文字起こし内で（laughter）（footsteps）などの音声イベントをタグ付けするかどうか。

string

必須

文字起こし対象ファイルの HTTPS リンク。file と cloud_storage_url のいずれか一方が必須です。ファイルは HTTPS 経由でアクセス可能かつ 2GB 未満である必要があります。クラウドストレージ（AWS S3、GCS、Cloudflare R2 など）、CDN、その他の HTTPS ソースを含む任意の有効な HTTPS アドレスをサポートし、token 付きの事前署名リンクや URL クエリパラメータによる認証にも対応します。

boolean

デフォルト:false

音声ファイルがマルチチャンネルであり、各チャンネルに単一の話者のみが含まれるかどうか。有効にすると各チャンネルを個別に文字起こしして結果を合成し、出力内容の各単語に channel_index フィールドが含まれます。最大 5 チャンネルまでサポートします。

number

話者分離（diarization）のしきい値。値が大きいほど、1 人が複数人に分割される確率は低くなりますが、異なる人が 1 人に統合される確率は高くなります（識別される話者数が少なくなります）。値が小さいほど、1 人が複数人に分割される確率は高くなりますが、異なる人が 1 人に統合される確率は低くなります（話者数が多くなります）。diarize=True かつ num_speakers=None の場合にのみ設定できます。デフォルトは None で、モデル id に応じてしきい値が選択されます（通常は 0.22）。値の範囲：[0.1, 0.4]

string

デフォルト:"word"

文字起こし内容におけるタイムスタンプの粒度。‘word’ は単語レベルのタイムスタンプを提供し、‘character’ は各文字のタイムスタンプを提供します。使用可能な値：none, word, character

レスポンス情報

レスポンスは以下のレスポンスタイプのいずれかになる場合があります：

レスポンスタイプ 1

string

必須

文字起こしされた元のテキスト。

object[]

必須

単語とその時刻情報のリスト。

非表示 properties

number

この単語または音声の音声内での終了時刻（秒）。

string

必須

文字起こしされた単語または音声の内容。

string

必須

この単語または音声のタイプ。‘audio_event’ は笑い声や足音など、単語ではない音声に使用されます。使用可能な値：word, spacing, audio_event

number

この単語または音声の音声内での開始時刻（秒）。

number

必須

この単語を予測した際の対数確率。logprob の範囲は [-infinity, 0] で、値が高いほどモデルの予測信頼度が高いことを示します。

object[]

単語を構成する文字と、それに対応する時刻情報。

非表示 properties

number

文字の音声内での終了時刻（秒）。

string

必須

文字起こしされた文字の内容。

number

文字の音声内での開始時刻（秒）。

string

この単語に対応する話者の一意識別子。

integer

この文字起こしに対応するチャンネルインデックス（マルチチャンネル音声の場合に有効）。

string

必須

検出された言語コード（例：‘eng’ は英語を表します）。

string

このレスポンスの文字起こし一意 ID。

number

必須

言語検出の信頼度（0 から 1 の間）。

レスポンスタイプ 2

object[]

必須

各音声チャンネルに対応する文字起こしのリスト。各文字起こしには、該当チャンネルのテキストと単語レベルの詳細情報が含まれます。

非表示 properties

string

必須

文字起こしされた元のテキスト。

object[]

必須

単語とその時刻情報のリスト。

非表示 properties

number

この単語または音声の音声内での終了時刻（秒）。

string

必須

文字起こしされた単語または音声の内容。

string

必須

この単語または音声のタイプ。‘audio_event’ は笑い声や足音など、単語ではない音声に使用されます。使用可能な値：word, spacing, audio_event

number

この単語または音声の音声内での開始時刻（秒）。

number

必須

この単語を予測した際の対数確率。logprob の範囲は [-infinity, 0] で、値が高いほどモデルの予測信頼度が高いことを示します。

object[]

単語を構成する文字と、それに対応する時刻情報。

非表示 properties

number

文字の音声内での終了時刻（秒）。

string

必須

文字起こしされた文字の内容。

number

文字の音声内での開始時刻（秒）。

string

この単語に対応する話者の一意識別子。

integer

この文字起こしに対応するチャンネルインデックス（マルチチャンネル音声の場合に有効）。

string

必須

検出された言語コード（例：‘eng’ は英語を表します）。

string

このレスポンスの文字起こし一意 ID。

number

必須

言語検出の信頼度（0 から 1 の間）。

string

このレスポンスの文字起こし一意 ID。

MiniMax 音声クイック複製

ElevenLabs 音声テキスト変換 V2

API の基本

大規模言語モデル

画像

動画

音声

ElevenLabs 音声文字起こし V1

リクエストヘッダー

リクエストボディ

レスポンス情報

​リクエストヘッダー

​リクエストボディ

​レスポンス情報

リクエストヘッダー

リクエストボディ

レスポンス情報