Doubao Speech SDK - Go Implementation

Import: github.com/haivivi/giztoy/pkg/doubaospeech

Clients

Speech API Client

type Client struct {
    // V1 Services (Classic)
    TTS *TTSService
    ASR *ASRService
    
    // V2 Services (BigModel)
    TTSV2 *TTSServiceV2
    ASRV2 *ASRServiceV2
    
    // Shared Services
    VoiceClone  *VoiceCloneService
    Realtime    *RealtimeService
    Meeting     *MeetingService
    Podcast     *PodcastService
    Translation *TranslationService
    Media       *MediaService
}

Constructor:

// With API Key (recommended)
client := doubaospeech.NewClient("app-id",
    doubaospeech.WithAPIKey("your-api-key"),
    doubaospeech.WithCluster("volcano_tts"),
)

// With Bearer Token
client := doubaospeech.NewClient("app-id",
    doubaospeech.WithBearerToken("your-token"),
)

// With V2 API Key (for BigModel APIs)
client := doubaospeech.NewClient("app-id",
    doubaospeech.WithV2APIKey("access-key", "app-key"),
    doubaospeech.WithResourceID("seed-tts-2.0"),
)

Console API Client

console := doubaospeech.NewConsole("access-key", "secret-key")

Services

TTS V1 (Classic)

// Synchronous
resp, err := client.TTS.Synthesize(ctx, &doubaospeech.TTSRequest{
    Text:      "你好，世界！",
    VoiceType: "zh_female_cancan",
})
// resp.Audio contains audio bytes

// Streaming (Go 1.23+ iter.Seq2)
for chunk, err := range client.TTS.SynthesizeStream(ctx, req) {
    if err != nil {
        return err
    }
    buf.Write(chunk.Audio)
}

TTS V2 (BigModel)

// Streaming HTTP
for chunk, err := range client.TTSV2.SynthesizeStream(ctx, &doubaospeech.TTSV2Request{
    Text:       "你好，世界！",
    VoiceType:  "zh_female_cancan",
    ResourceID: "seed-tts-2.0",
}) {
    // Process chunk
}

// Async (long text)
task, err := client.TTSV2.SubmitAsync(ctx, &doubaospeech.AsyncTTSRequest{
    Text: longText,
})
result, err := task.Wait(ctx)

ASR (Speech Recognition)

// One-sentence (V1)
resp, err := client.ASR.Recognize(ctx, &doubaospeech.ASRRequest{
    Audio:    audioData,
    Format:   "pcm",
    Language: "zh-CN",
})

// Streaming (WebSocket)
session, err := client.ASR.OpenStreamSession(ctx, &doubaospeech.StreamASRConfig{
    Format:     "pcm",
    SampleRate: 16000,
})
defer session.Close()

// Send audio chunks
session.SendAudio(ctx, audioData, false)
session.SendAudio(ctx, lastData, true)

// Receive results
for chunk, err := range session.Recv() {
    if err != nil {
        break
    }
    fmt.Println(chunk.Text)
}

Voice Clone

// Upload audio for training
result, err := client.VoiceClone.Upload(ctx, &doubaospeech.VoiceCloneRequest{
    AudioData: audioData,
    VoiceID:   "my-custom-voice",
})

// Check status
status, err := client.VoiceClone.GetStatus(ctx, "my-custom-voice")

// Activate voice
err := client.VoiceClone.Activate(ctx, "my-custom-voice")

Realtime Dialogue

session, err := client.Realtime.Connect(ctx, &doubaospeech.RealtimeConfig{
    Model: "speech-dialog-001",
})
defer session.Close()

// Send audio
session.SendAudio(audioData)

// Receive events
for event := range session.Events() {
    switch event.Type {
    case "asr_result":
        fmt.Println("User:", event.AsrResult.Text)
    case "tts_audio":
        play(event.TtsAudio)
    }
}

Console API

// List available voices
voices, err := console.ListSpeakers(ctx, &doubaospeech.ListSpeakersRequest{})

// List timbres
timbres, err := console.ListTimbres(ctx, &doubaospeech.ListTimbresRequest{})

// Check voice clone status
status, err := console.ListVoiceCloneStatus(ctx, &doubaospeech.ListVoiceCloneStatusRequest{
    VoiceID: "my-custom-voice",
})

Options

Option	Description
`WithAPIKey(key)`	x-api-key authentication
`WithBearerToken(token)`	Bearer token authentication
`WithV2APIKey(access, app)`	V2/V3 API authentication
`WithCluster(cluster)`	Set cluster name (V1)
`WithResourceID(id)`	Set resource ID (V2)
`WithBaseURL(url)`	Custom HTTP base URL
`WithWebSocketURL(url)`	Custom WebSocket URL
`WithHTTPClient(client)`	Custom HTTP client
`WithTimeout(duration)`	Request timeout
`WithUserID(id)`	User identifier

Error Handling

if err != nil {
    if e, ok := doubaospeech.AsError(err); ok {
        fmt.Printf("Error %d: %s\n", e.Code, e.Message)
        if e.IsRateLimit() {
            // Handle rate limiting
        }
    }
}

Giztoy Documentation