Files
media/controller/audio/audio_extract_controller.go
2026-05-19 14:33:06 +08:00

107 lines
3.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package audio
import (
"context"
"encoding/json"
common "media/controller/common"
dto "media/model/dto/audio"
service "media/service/asr"
"gitea.com/red-future/common/beans"
"github.com/gogf/gf/v2/frame/g"
"github.com/gogf/gf/v2/net/ghttp"
)
type audio struct{}
var AudioExtract = new(audio)
// safeResult 对外输出的识别结果(隐藏内部路径)
type safeResult struct {
Text string `json:"text"`
Model string `json:"model"`
Language string `json:"language"`
AudioSize int64 `json:"audioSize"`
AudioDuration string `json:"audioDuration"`
Scenes *dto.SceneSummaryDTO `json:"scenes,omitempty"`
}
// safeItem 对外输出的单视频结果
type safeItem struct {
FileName string `json:"fileName"`
Result *safeResult `json:"result,omitempty"`
Error string `json:"error,omitempty"`
}
// TranscribeHandler 语音转文字+分镜分析
// 支持两种入参方式:
// 1. JSON body: {"video_urls":[...], "model":"medium", "language":"zh", "threshold":0.3}
// 2. 文件上传: files 参数(兼容单/多文件)
func (c *audio) TranscribeHandler(r *ghttp.Request) {
ctx := r.Context()
ctx = context.WithValue(ctx, "user", &beans.User{UserName: "admin"})
// 优先尝试 JSON bodyURL 列表模式)
body := r.GetBody()
if len(body) > 0 && body[0] == '{' {
var req dto.TranscribeReq
if json.Unmarshal(body, &req) == nil && len(req.VideoURLs) > 0 {
// 填充默认值
if req.Model == "" {
req.Model = g.Cfg().MustGet(ctx, "whisper.model", "medium").String()
}
if req.Language == "" {
req.Language = g.Cfg().MustGet(ctx, "whisper.language", "zh").String()
}
if req.Threshold <= 0 {
req.Threshold = 0.3
}
res, svcErr := service.VideoTranscribe.TranscribeWithURLs(ctx, &req)
if svcErr != nil {
r.Response.WriteJson(g.Map{"code": 500, "message": svcErr.Error()})
return
}
r.Response.WriteJson(g.Map{"code": 200, "message": "success", "data": toSafeItems(res.Results)})
return
}
}
// 文件上传模式
savePaths, err := common.SaveUploadedFiles(r)
if err != nil || len(savePaths) == 0 {
r.Response.WriteJson(g.Map{"code": 400, "message": "请上传视频文件( multipart )或提供 video_urls( JSON )"})
return
}
results := service.VideoTranscribe.TranscribeUpload(ctx, savePaths,
r.Get("model", g.Cfg().MustGet(ctx, "whisper.model", "medium").String()).String(),
r.Get("language", g.Cfg().MustGet(ctx, "whisper.language", "zh").String()).String(),
r.Get("threshold", 0.3).Float64())
r.Response.WriteJson(g.Map{"code": 200, "message": "success", "data": toSafeItems(results)})
}
// toSafeItems 将结果转为安全的响应格式(移除 audioPath 等内部路径)
func toSafeItems(results []dto.TranscribeItem) []safeItem {
var items []safeItem
for _, item := range results {
si := safeItem{FileName: item.FileName, Error: item.Error}
if item.Result != nil {
if r, ok := item.Result.(*dto.TranscribeResult); ok {
si.Result = &safeResult{
Text: r.Text,
Model: r.Model,
Language: r.Language,
AudioSize: r.AudioSize,
AudioDuration: r.AudioDuration,
Scenes: r.Scenes,
}
}
}
items = append(items, si)
}
return items
}