Files
ai-agent/digitalhuman/service/audio_service.go
2026-04-27 11:24:13 +08:00

269 lines
7.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"context"
"encoding/base64"
"digital-human/digitalhuman/consts"
"digital-human/digitalhuman/consts/public"
"digital-human/digitalhuman/dao"
"digital-human/digitalhuman/model/dto"
"digital-human/digitalhuman/model/entity"
"github.com/gogf/gf/v2/errors/gerror"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/util/gconv"
)
type audio struct{}
// Audio 音频服务
var Audio = new(audio)
// UploadFileResponse OSS 文件上传响应结构
type UploadFileResponse struct {
Code int `json:"code"`
Msg string `json:"msg"`
Data struct {
FileURL string `json:"fileURL" dc:"上传地址"`
FileSize int `json:"fileSize" dc:"文件大小"`
FileName string `json:"fileName" dc:"文件名称"`
FileFormat string `json:"fileFormat" dc:"文件格式"`
FileAddressPrefix string `json:"fileAddressPrefix"`
} `json:"data"`
}
// Create 创建音频
func (s *audio) Create(ctx context.Context, req *dto.CreateAudioReq) (res *dto.CreateAudioRes, err error) {
// 设置默认音色
if req.Voice == "" {
req.Voice = "Serena" // 默认音色
}
if req.VoiceType == "" {
req.VoiceType = "Preset" // 默认预设音色
}
// 如果是自定义音色,验证音色是否存在
if req.VoiceType == "custom" && req.CustomVoice != "" {
customVoiceID := gconv.Int64(req.CustomVoice)
_, err := dao.CustomVoice.GetOne(ctx, customVoiceID)
if err != nil {
return nil, gerror.Wrapf(err, "自定义音色不存在: %s", req.CustomVoice)
}
}
// 插入数据库(初始状态为生成中)
audioID, err := dao.Audio.Insert(ctx, req)
if err != nil {
return nil, err
}
// 通过 model-asynch 创建异步任务(由中间件执行模型调用与产物落 OSS
// 约定:
// - custom克隆音色 -> base 模型(需要参考音频/参考文本) 否则 -> customvoice 模型
var taskID string
if req.VoiceType == "custom" {
customVoiceID := gconv.Int64(req.CustomVoice)
// 1. 先获取自定义音色详情
cv, err := dao.CustomVoice.GetOne(ctx, customVoiceID)
if err != nil {
_, _ = dao.Audio.UpdateStatus(ctx, audioID, consts.AudioStatusFailed, "获取自定义音色失败: "+err.Error(), "", 0, "")
return nil, err
}
// 2. 调用模型生成音频
refAudioBase64 := base64.StdEncoding.EncodeToString(cv.ReferenceAudio)
xVectorOnlyMode := false
if cv.Text == "" {
xVectorOnlyMode = true
}
taskID, err = TTS.CreateBaseTask(asyncCtx(ctx), req.ScriptText, "Auto", cv.Text, cv.OssFile, refAudioBase64, xVectorOnlyMode, 1.0)
} else {
// 1. 调用模型生成音频
taskID, err = TTS.CreateCustomVoiceTask(asyncCtx(ctx), req.ScriptText, req.Voice, "Auto", "", 1.0)
}
if err != nil {
_, _ = dao.Audio.UpdateStatus(ctx, audioID, consts.AudioStatusFailed, "创建异步任务失败: "+err.Error(), "", 0, "")
return nil, err
}
_, _ = dao.AsyncTaskRef.Insert(ctx, &entity.AsyncTaskRef{
TaskID: taskID,
State: 0,
TableName: public.TableNameAudio,
BizID: audioID,
})
res = &dto.CreateAudioRes{
Id: audioID,
}
return
}
// List 获取音频列表
func (s *audio) List(ctx context.Context, req *dto.ListAudioReq) (res *dto.ListAudioRes, err error) {
audioList, total, err := dao.Audio.List(ctx, req)
if err != nil {
return nil, err
}
res = &dto.ListAudioRes{
Total: int64(total),
List: make([]*dto.AudioListItem, 0, len(audioList)),
}
for _, audio := range audioList {
res.List = append(res.List, &dto.AudioListItem{
ID: audio.Id,
Name: audio.Name,
Description: audio.Description,
ScriptText: audio.ScriptText,
AudioURL: audio.AudioURL,
Status: audio.Status,
ErrorMsg: audio.ErrorMsg,
Duration: audio.Duration,
ExternalID: audio.ExternalID,
Voice: audio.Voice,
VoiceType: audio.VoiceType,
CustomVoice: audio.CustomVoice,
CreatedAt: audio.CreatedAt,
UpdatedAt: audio.UpdatedAt,
})
}
return res, nil
}
// GetOne 获取单个音频
func (s *audio) GetOne(ctx context.Context, id int64) (*dto.GetAudioRes, error) {
audioOne, err := dao.Audio.GetOne(ctx, id)
if err != nil {
return nil, err
}
return &dto.GetAudioRes{
ID: audioOne.Id,
Name: audioOne.Name,
Description: audioOne.Description,
ScriptText: audioOne.ScriptText,
AudioURL: audioOne.AudioURL,
Status: audioOne.Status,
ErrorMsg: audioOne.ErrorMsg,
Duration: audioOne.Duration,
ExternalID: audioOne.ExternalID,
Voice: audioOne.Voice,
VoiceType: audioOne.VoiceType,
CustomVoice: audioOne.CustomVoice,
CreatedAt: audioOne.CreatedAt,
UpdatedAt: audioOne.UpdatedAt,
}, nil
}
// Update 更新音频
func (s *audio) Update(ctx context.Context, req *dto.UpdateAudioReq) (err error) {
// 先获取原始音频信息
audioOne, err := dao.Audio.GetOne(ctx, req.ID)
if err != nil {
return gerror.Wrap(err, "获取原始音频信息失败")
}
// 修改字段
if !g.IsEmpty(req.Name) {
audioOne.Name = req.Name
}
if !g.IsEmpty(req.Description) {
audioOne.Description = req.Description
}
if !g.IsEmpty(req.Voice) {
audioOne.Voice = req.Voice
}
if !g.IsEmpty(req.VoiceType) {
audioOne.VoiceType = req.VoiceType
}
if !g.IsEmpty(req.CustomVoice) {
audioOne.CustomVoice = req.CustomVoice
}
_, err = dao.Audio.Update(ctx, req.ID, audioOne)
return err
}
// Delete 删除音频
func (s *audio) Delete(ctx context.Context, id int64) error {
_, err := dao.Audio.Delete(ctx, id)
return err
}
// Generate 重新生成音频
func (s *audio) Generate(ctx context.Context, req *dto.GenerateAudioReq) (res *dto.GenerateAudioRes, err error) {
// 获取音频信息
audioOne, err := dao.Audio.GetOne(ctx, req.ID)
if err != nil {
return nil, gerror.Wrap(err, "获取音频信息失败")
}
// 重置状态为生成中
_, err = dao.Audio.UpdateStatus(ctx, req.ID, consts.AudioStatusGenerating, "", "", 0, "")
if err != nil {
return nil, err
}
// 构建请求
createReq := &dto.CreateAudioReq{
Name: audioOne.Name,
Description: audioOne.Description,
ScriptText: audioOne.ScriptText,
Voice: audioOne.Voice,
VoiceType: audioOne.VoiceType,
CustomVoice: audioOne.CustomVoice,
}
// 异步重新生成音频
var taskID string
if createReq.VoiceType == "custom" {
customVoiceID := gconv.Int64(createReq.CustomVoice)
cv, err := dao.CustomVoice.GetOne(ctx, customVoiceID)
if err != nil {
_, _ = dao.Audio.UpdateStatus(ctx, req.ID, consts.AudioStatusFailed, "获取自定义音色失败: "+err.Error(), "", 0, "")
return nil, err
}
refAudioBase64 := ""
if cv != nil && len(cv.ReferenceAudio) > 0 {
refAudioBase64 = base64.StdEncoding.EncodeToString(cv.ReferenceAudio)
}
refText := ""
if cv != nil {
refText = cv.Text
}
xVectorOnlyMode := false
if refText == "" {
xVectorOnlyMode = true
}
taskID, err = TTS.CreateBaseTask(asyncCtx(ctx), createReq.ScriptText, "Auto", refText, cv.OssFile, refAudioBase64, xVectorOnlyMode, 1.0)
} else {
taskID, err = TTS.CreateCustomVoiceTask(asyncCtx(ctx), createReq.ScriptText, createReq.Voice, "Auto", "", 1.0)
}
if err != nil {
_, _ = dao.Audio.UpdateStatus(ctx, req.ID, consts.AudioStatusFailed, "创建异步任务失败: "+err.Error(), "", 0, "")
return nil, err
}
_, _ = dao.AsyncTaskRef.Insert(ctx, &entity.AsyncTaskRef{
TaskID: taskID,
State: 0,
TableName: public.TableNameAudio,
BizID: req.ID,
})
res = &dto.GenerateAudioRes{
TaskID: gconv.String(req.ID),
}
return
}
// GetStatusOptions 获取状态选项
func (s *audio) GetStatusOptions(ctx context.Context, req *dto.GetAudioStatusOptionsReq) (res *dto.GetAudioStatusOptionsRes, err error) {
_ = ctx
_ = req
res = new(dto.GetAudioStatusOptionsRes)
res.Options = consts.GetAllAudioStatusKeyValue()
return res, nil
}
// TTS 文本转语音(使用 Qwen3-TTS
func (s *audio) TTS(ctx context.Context, req *dto.TTSReq) (res *dto.TTSRes, err error) {
_ = ctx
_ = req
return nil, gerror.New("该接口已迁移为异步:请使用 CreateAudio 创建异步任务并通过轮询/批量领取获取结果")
}