Doubao Speech SDK - Go Implementation
Import: github.com/haivivi/giztoy/pkg/doubaospeech
Clients
Speech API Client
type Client struct {
// V1 Services (Classic)
TTS *TTSService
ASR *ASRService
// V2 Services (BigModel)
TTSV2 *TTSServiceV2
ASRV2 *ASRServiceV2
// Shared Services
VoiceClone *VoiceCloneService
Realtime *RealtimeService
Meeting *MeetingService
Podcast *PodcastService
Translation *TranslationService
Media *MediaService
}
Constructor:
// With API Key (recommended)
client := doubaospeech.NewClient("app-id",
doubaospeech.WithAPIKey("your-api-key"),
doubaospeech.WithCluster("volcano_tts"),
)
// With Bearer Token
client := doubaospeech.NewClient("app-id",
doubaospeech.WithBearerToken("your-token"),
)
// With V2 API Key (for BigModel APIs)
client := doubaospeech.NewClient("app-id",
doubaospeech.WithV2APIKey("access-key", "app-key"),
doubaospeech.WithResourceID("seed-tts-2.0"),
)
Console API Client
console := doubaospeech.NewConsole("access-key", "secret-key")
Services
TTS V1 (Classic)
// Synchronous
resp, err := client.TTS.Synthesize(ctx, &doubaospeech.TTSRequest{
Text: "你好,世界!",
VoiceType: "zh_female_cancan",
})
// resp.Audio contains audio bytes
// Streaming (Go 1.23+ iter.Seq2)
for chunk, err := range client.TTS.SynthesizeStream(ctx, req) {
if err != nil {
return err
}
buf.Write(chunk.Audio)
}
TTS V2 (BigModel)
// Streaming HTTP
for chunk, err := range client.TTSV2.SynthesizeStream(ctx, &doubaospeech.TTSV2Request{
Text: "你好,世界!",
VoiceType: "zh_female_cancan",
ResourceID: "seed-tts-2.0",
}) {
// Process chunk
}
// Async (long text)
task, err := client.TTSV2.SubmitAsync(ctx, &doubaospeech.AsyncTTSRequest{
Text: longText,
})
result, err := task.Wait(ctx)
ASR (Speech Recognition)
// One-sentence (V1)
resp, err := client.ASR.Recognize(ctx, &doubaospeech.ASRRequest{
Audio: audioData,
Format: "pcm",
Language: "zh-CN",
})
// Streaming (WebSocket)
session, err := client.ASR.OpenStreamSession(ctx, &doubaospeech.StreamASRConfig{
Format: "pcm",
SampleRate: 16000,
})
defer session.Close()
// Send audio chunks
session.SendAudio(ctx, audioData, false)
session.SendAudio(ctx, lastData, true)
// Receive results
for chunk, err := range session.Recv() {
if err != nil {
break
}
fmt.Println(chunk.Text)
}
Voice Clone
// Upload audio for training
result, err := client.VoiceClone.Upload(ctx, &doubaospeech.VoiceCloneRequest{
AudioData: audioData,
VoiceID: "my-custom-voice",
})
// Check status
status, err := client.VoiceClone.GetStatus(ctx, "my-custom-voice")
// Activate voice
err := client.VoiceClone.Activate(ctx, "my-custom-voice")
Realtime Dialogue
session, err := client.Realtime.Connect(ctx, &doubaospeech.RealtimeConfig{
Model: "speech-dialog-001",
})
defer session.Close()
// Send audio
session.SendAudio(audioData)
// Receive events
for event := range session.Events() {
switch event.Type {
case "asr_result":
fmt.Println("User:", event.AsrResult.Text)
case "tts_audio":
play(event.TtsAudio)
}
}
Console API
// List available voices
voices, err := console.ListSpeakers(ctx, &doubaospeech.ListSpeakersRequest{})
// List timbres
timbres, err := console.ListTimbres(ctx, &doubaospeech.ListTimbresRequest{})
// Check voice clone status
status, err := console.ListVoiceCloneStatus(ctx, &doubaospeech.ListVoiceCloneStatusRequest{
VoiceID: "my-custom-voice",
})
Options
| Option | Description |
|---|---|
WithAPIKey(key) | x-api-key authentication |
WithBearerToken(token) | Bearer token authentication |
WithV2APIKey(access, app) | V2/V3 API authentication |
WithCluster(cluster) | Set cluster name (V1) |
WithResourceID(id) | Set resource ID (V2) |
WithBaseURL(url) | Custom HTTP base URL |
WithWebSocketURL(url) | Custom WebSocket URL |
WithHTTPClient(client) | Custom HTTP client |
WithTimeout(duration) | Request timeout |
WithUserID(id) | User identifier |
Error Handling
if err != nil {
if e, ok := doubaospeech.AsError(err); ok {
fmt.Printf("Error %d: %s\n", e.Code, e.Message)
if e.IsRateLimit() {
// Handle rate limiting
}
}
}