数字人项目迁移

This commit is contained in:
2026-04-27 11:07:21 +08:00
parent ba360bc89b
commit 995e038541
38 changed files with 4844 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
package service
import (
"context"
"encoding/base64"
"digital-human/consts/public"
"github.com/gogf/gf/v2/errors/gerror"
"github.com/gogf/gf/v2/frame/g"
)
type tts struct{}
// TTS 统一的模型异步调用封装(通过 model-asynch 中间件)
var TTS = new(tts)
// CreateVoiceDesignTask 设计音频任务VoiceDesign
func (s *tts) CreateVoiceDesignTask(
ctx context.Context,
text string,
instruct string,
language string, // 空则 Auto
speed float64, // <=0 则 1.0
) (taskID string, err error) {
if language == "" {
language = "Auto"
}
if speed <= 0 {
speed = 1.0
}
payload := map[string]any{
"text": text,
"language": language,
"instruct": instruct,
"speed": speed,
"response_format": "wav",
}
g.Log().Info(ctx, "[CreateVoiceDesignTask] %v", payload)
return createModelAsynchTask(ctx, public.ModelNameVoiceDesign, payload, "")
}
// CreateCustomVoiceTask 预设音色CustomVoice任务
// - speaker: 预设说话人(如 Vivian/Serena/Ryan/...
// - instruct: 可选,情绪/风格控制
func (s *tts) CreateCustomVoiceTask(
ctx context.Context,
text string,
speaker string,
language string, // 例如 "Chinese"/"English"/"Auto",空则默认 "Auto"
instruct string, // 可空
speed float64, // 0.5~2.0<=0 则默认 1.0
) (taskID string, err error) {
if language == "" {
language = "Auto"
}
if speed <= 0 {
speed = 1.0
}
payload := map[string]any{
"text": text,
"language": language,
"speaker": speaker,
"instruct": instruct,
"speed": speed,
"response_format": "wav", // 建议统一用 wav
}
g.Log().Info(ctx, "[CreateCustomVoiceTask] %v", payload)
return createModelAsynchTask(ctx, public.ModelNameCustomVoice, payload, "")
}
// CreateBaseTask 声音克隆Base / clone任务
// 说明ref_audio_url 与 ref_audio_base64 二选一
func (s *tts) CreateBaseTask(
ctx context.Context,
text string,
language string, // 例如 "Chinese"/"English"/"Auto",空则默认 "Auto"
refText string, // 当 xVectorOnlyMode=false 时必填
refAudioURL string, // 可空
refAudioBase64 string, // 可空(不带 data: 前缀也可以)
xVectorOnlyMode bool, // true=不需要 refText但质量可能下降
speed float64, // 0.5~2.0<=0 则默认 1.0
) (taskID string, err error) {
if language == "" {
language = "Auto"
}
if speed <= 0 {
speed = 1.0
}
payload := map[string]any{
"text": text,
"language": language,
"ref_text": refText,
"ref_audio_url": refAudioURL,
"ref_audio_base64": refAudioBase64,
"x_vector_only_mode": xVectorOnlyMode,
"speed": speed,
"response_format": "wav",
}
g.Log().Info(ctx, "[CreateBaseTask] %v", payload)
return createModelAsynchTask(ctx, public.ModelNameBase, payload, "")
}
// SpeechToText 语音转文本(预留)
// audioBase64base64 编码的音频数据WAV/MP3等
func (s *tts) SpeechToText(ctx context.Context, audioBase64 string) (text string, err error) {
_ = ctx
if audioBase64 == "" {
return "", gerror.New("audioBase64 不能为空")
}
// 简单校验 base64 合法性
if _, err := base64.StdEncoding.DecodeString(audioBase64); err != nil {
return "", gerror.Wrap(err, "audioBase64 非法")
}
return "", gerror.New("SpeechToText 暂未实现:后续接入语音识别模型后补齐")
}