Audio

This section provides endpoints to transcribe and translate audio to text, and to synthesize speech from text.

Create transcription

POST https://api.aifoundryhub.com/v1/audio/transcriptions

Transcribes audio into text using a speech‑to‑text model.

Example request

curl -X POST "https://api.aifoundryhub.com/v1/audio/transcriptions" \
  -H "Authorization: Bearer $AI_FOUNDRY_HUB_API_KEY" \
  -H "Content-Type: multipart/form-data" \
  -F model=whisper-large-v3 \
  -F file=@/path/to/audio.mp3

import fs from "node:fs";
import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.AI_FOUNDRY_HUB_API_KEY,
  baseURL: "https://api.aifoundryhub.com/v1",
});

const transcription = await client.audio.transcriptions.create({
  model: "whisper-large-v3",
  file: fs.createReadStream("audio.mp3"),
});

console.log(transcription.text);

package main

import (
  "context"
  "os"

  openai "github.com/openai/openai-go"
  "github.com/openai/openai-go/option"
)

func main() {
  client := openai.NewClient(
    option.WithAPIKey(os.Getenv("AI_FOUNDRY_HUB_API_KEY")),
    option.WithBaseURL("https://api.aifoundryhub.com/v1"),
  )
  ctx := context.Background()

  file, err := os.Open("speech.mp3")
  if err != nil {
    panic(err)
  }

  transcription, err := client.Audio.Transcriptions.New(ctx, openai.AudioTranscriptionNewParams{
    Model: openai.AudioModelWhisper1,
    File:  file,
  })
  if err != nil {
    panic(err)
  }

  println(transcription.Text)
}

import os
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv("AI_FOUNDRY_HUB_API_KEY"),
    base_url="https://api.aifoundryhub.com/v1",
)

transcript = client.audio.transcriptions.create(
    model="whisper-large-v3",
    file=open("audio.mp3", "rb"),
)

print(transcript.text)

Returns

A transcription object.

Example response

{ "text": "Hello world." }

Create translation

POST https://api.aifoundryhub.com/v1/audio/translations

Translates non‑English speech to English text.

Example request

curl -X POST "https://api.aifoundryhub.com/v1/audio/translations" \
  -H "Authorization: Bearer $AI_FOUNDRY_HUB_API_KEY" \
  -H "Content-Type: multipart/form-data" \
  -F model=whisper-large-v3 \
  -F file=@/path/to/audio.m4a

import fs from "node:fs";
import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.AI_FOUNDRY_HUB_API_KEY,
  baseURL: "https://api.aifoundryhub.com/v1",
});

const translation = await client.audio.translations.create({
  model: "whisper-large-v3",
  file: fs.createReadStream("audio.m4a"),
});

console.log(translation.text);

package main

import (
  "context"
  "os"

  openai "github.com/openai/openai-go"
  "github.com/openai/openai-go/option"
)

func main() {
  client := openai.NewClient(
    option.WithAPIKey(os.Getenv("AI_FOUNDRY_HUB_API_KEY")),
    option.WithBaseURL("https://api.aifoundryhub.com/v1"),
  )
  ctx := context.Background()

  file, err := os.Open("speech.mp3")
  if err != nil {
    panic(err)
  }

  transcription, err := client.Audio.Translations.New(ctx, openai.AudioTranslationNewParams{
    Model: openai.AudioModelWhisper1,
    File:  file,
  })
  if err != nil {
    panic(err)
  }

  println(transcription.Text)
}

import os
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv("AI_FOUNDRY_HUB_API_KEY"),
    base_url="https://api.aifoundryhub.com/v1",
)

transcript = client.audio.translations.create(
    model="whisper-large-v3",
    file=open("audio.mp3", "rb"),
)

print(transcript.text)

Returns

A translation object.

Example response

{ "text": "Hello world." }

Create speech

POST https://api.aifoundryhub.com/v1/audio/speech

Generates spoken audio from text using a text‑to‑speech model.

Example request

curl -X POST "https://api.aifoundryhub.com/v1/audio/speech" \
  -H "Authorization: Bearer $AI_FOUNDRY_HUB_API_KEY" \
  -H "Content-Type: application/json" \
  -o speech.mp3 \
  -d '{
    "model": "tts-1",
    "voice": "alloy",
    "input": "Hello! This is a test.",
    "format": "mp3"
  }'

import fs from "node:fs";
import OpenAI from "openai";

const client = new OpenAI({
  apiKey: process.env.AI_FOUNDRY_HUB_API_KEY,
  baseURL: "https://api.aifoundryhub.com/v1",
});

const mp3 = await client.audio.speech.create({
  model: "tts-1",
  voice: "alloy",
  input: "Hello! This is a test.",
  format: "mp3",
});

const buffer = Buffer.from(await mp3.arrayBuffer());
await fs.promises.writeFile("speech.mp3", buffer);

package main

import (
  "context"
  "io"
  "os"

  openai "github.com/openai/openai-go"
  "github.com/openai/openai-go/option"
)

func main() {
  client := openai.NewClient(
    option.WithAPIKey(os.Getenv("AI_FOUNDRY_HUB_API_KEY")),
    option.WithBaseURL("https://api.aifoundryhub.com/v1"),
  )

  params := openai.AudioSpeechNewParams{
    Model:          openai.SpeechModelTTS1,
    Input:          `Why did the chicken cross the road? To get to the other side.`,
    ResponseFormat: openai.AudioSpeechNewParamsResponseFormatPCM,
    Voice:          openai.AudioSpeechNewParamsVoiceAlloy,
  }

  ctx := context.Background()
  res, err := client.Audio.Speech.New(ctx, params)
  if err != nil {
    panic(err)
  }

  out, _ := os.Create("speech.mp3")
  defer out.Close()
  io.Copy(out, res.Body)
}

import os
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv("AI_FOUNDRY_HUB_API_KEY"),
    base_url="https://api.aifoundryhub.com/v1",
)

mp3 = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="Hello! This is a test.",
    format="mp3",
)

with open("speech.mp3", "wb") as f:
    f.write(mp3.content)

Returns

Binary audio in the requested container. The Content-Type will match the chosen format.

Audio

Create transcription

Example request

Returns

Example response

Create translation

Example request

Returns

Example response

Create speech

Example request

Returns

Objects

Transcription / translation object