Audio
This section provides endpoints to transcribe and translate audio to text, and to synthesize speech from text.
Create transcription
Section titled “Create transcription”POST https://api.aifoundryhub.com/v1/audio/transcriptions
Transcribes audio into text using a speech‑to‑text model.
Example request
Section titled “Example request”curl -X POST "https://api.aifoundryhub.com/v1/audio/transcriptions" \ -H "Authorization: Bearer $AI_FOUNDRY_HUB_API_KEY" \ -H "Content-Type: multipart/form-data" \ -F model=whisper-large-v3 \ -F file=@/path/to/audio.mp3import fs from "node:fs";import OpenAI from "openai";
const client = new OpenAI({ apiKey: process.env.AI_FOUNDRY_HUB_API_KEY, baseURL: "https://api.aifoundryhub.com/v1",});
const transcription = await client.audio.transcriptions.create({ model: "whisper-large-v3", file: fs.createReadStream("audio.mp3"),});
console.log(transcription.text);package main
import ( "context" "os"
openai "github.com/openai/openai-go" "github.com/openai/openai-go/option")
func main() { client := openai.NewClient( option.WithAPIKey(os.Getenv("AI_FOUNDRY_HUB_API_KEY")), option.WithBaseURL("https://api.aifoundryhub.com/v1"), ) ctx := context.Background()
file, err := os.Open("speech.mp3") if err != nil { panic(err) }
transcription, err := client.Audio.Transcriptions.New(ctx, openai.AudioTranscriptionNewParams{ Model: openai.AudioModelWhisper1, File: file, }) if err != nil { panic(err) }
println(transcription.Text)}import osfrom openai import OpenAI
client = OpenAI( api_key=os.getenv("AI_FOUNDRY_HUB_API_KEY"), base_url="https://api.aifoundryhub.com/v1",)
transcript = client.audio.transcriptions.create( model="whisper-large-v3", file=open("audio.mp3", "rb"),)
print(transcript.text)Returns
Section titled “Returns”A transcription object.
Example response
Section titled “Example response”{ "text": "Hello world." }Create translation
Section titled “Create translation”POST https://api.aifoundryhub.com/v1/audio/translations
Translates non‑English speech to English text.
Example request
Section titled “Example request”curl -X POST "https://api.aifoundryhub.com/v1/audio/translations" \ -H "Authorization: Bearer $AI_FOUNDRY_HUB_API_KEY" \ -H "Content-Type: multipart/form-data" \ -F model=whisper-large-v3 \ -F file=@/path/to/audio.m4aimport fs from "node:fs";import OpenAI from "openai";
const client = new OpenAI({ apiKey: process.env.AI_FOUNDRY_HUB_API_KEY, baseURL: "https://api.aifoundryhub.com/v1",});
const translation = await client.audio.translations.create({ model: "whisper-large-v3", file: fs.createReadStream("audio.m4a"),});
console.log(translation.text);package main
import ( "context" "os"
openai "github.com/openai/openai-go" "github.com/openai/openai-go/option")
func main() { client := openai.NewClient( option.WithAPIKey(os.Getenv("AI_FOUNDRY_HUB_API_KEY")), option.WithBaseURL("https://api.aifoundryhub.com/v1"), ) ctx := context.Background()
file, err := os.Open("speech.mp3") if err != nil { panic(err) }
transcription, err := client.Audio.Translations.New(ctx, openai.AudioTranslationNewParams{ Model: openai.AudioModelWhisper1, File: file, }) if err != nil { panic(err) }
println(transcription.Text)}import osfrom openai import OpenAI
client = OpenAI( api_key=os.getenv("AI_FOUNDRY_HUB_API_KEY"), base_url="https://api.aifoundryhub.com/v1",)
transcript = client.audio.translations.create( model="whisper-large-v3", file=open("audio.mp3", "rb"),)
print(transcript.text)Returns
Section titled “Returns”A translation object.
Example response
Section titled “Example response”{ "text": "Hello world." }Create speech
Section titled “Create speech”POST https://api.aifoundryhub.com/v1/audio/speech
Generates spoken audio from text using a text‑to‑speech model.
Example request
Section titled “Example request”curl -X POST "https://api.aifoundryhub.com/v1/audio/speech" \ -H "Authorization: Bearer $AI_FOUNDRY_HUB_API_KEY" \ -H "Content-Type: application/json" \ -o speech.mp3 \ -d '{ "model": "tts-1", "voice": "alloy", "input": "Hello! This is a test.", "format": "mp3" }'import fs from "node:fs";import OpenAI from "openai";
const client = new OpenAI({ apiKey: process.env.AI_FOUNDRY_HUB_API_KEY, baseURL: "https://api.aifoundryhub.com/v1",});
const mp3 = await client.audio.speech.create({ model: "tts-1", voice: "alloy", input: "Hello! This is a test.", format: "mp3",});
const buffer = Buffer.from(await mp3.arrayBuffer());await fs.promises.writeFile("speech.mp3", buffer);package main
import ( "context" "io" "os"
openai "github.com/openai/openai-go" "github.com/openai/openai-go/option")
func main() { client := openai.NewClient( option.WithAPIKey(os.Getenv("AI_FOUNDRY_HUB_API_KEY")), option.WithBaseURL("https://api.aifoundryhub.com/v1"), )
params := openai.AudioSpeechNewParams{ Model: openai.SpeechModelTTS1, Input: `Why did the chicken cross the road? To get to the other side.`, ResponseFormat: openai.AudioSpeechNewParamsResponseFormatPCM, Voice: openai.AudioSpeechNewParamsVoiceAlloy, }
ctx := context.Background() res, err := client.Audio.Speech.New(ctx, params) if err != nil { panic(err) }
out, _ := os.Create("speech.mp3") defer out.Close() io.Copy(out, res.Body)}import osfrom openai import OpenAI
client = OpenAI( api_key=os.getenv("AI_FOUNDRY_HUB_API_KEY"), base_url="https://api.aifoundryhub.com/v1",)
mp3 = client.audio.speech.create( model="tts-1", voice="alloy", input="Hello! This is a test.", format="mp3",)
with open("speech.mp3", "wb") as f: f.write(mp3.content)Returns
Section titled “Returns”Binary audio in the requested container. The Content-Type will match the chosen format.