107 lines
3.3 KiB
Go
107 lines
3.3 KiB
Go
package audio
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
|
||
common "media/controller/common"
|
||
dto "media/model/dto/audio"
|
||
service "media/service/asr"
|
||
|
||
"gitea.com/red-future/common/beans"
|
||
"github.com/gogf/gf/v2/frame/g"
|
||
"github.com/gogf/gf/v2/net/ghttp"
|
||
)
|
||
|
||
type audio struct{}
|
||
|
||
var AudioExtract = new(audio)
|
||
|
||
// safeResult 对外输出的识别结果(隐藏内部路径)
|
||
type safeResult struct {
|
||
Text string `json:"text"`
|
||
Model string `json:"model"`
|
||
Language string `json:"language"`
|
||
AudioSize int64 `json:"audioSize"`
|
||
AudioDuration string `json:"audioDuration"`
|
||
Scenes *dto.SceneSummaryDTO `json:"scenes,omitempty"`
|
||
}
|
||
|
||
// safeItem 对外输出的单视频结果
|
||
type safeItem struct {
|
||
FileName string `json:"fileName"`
|
||
Result *safeResult `json:"result,omitempty"`
|
||
Error string `json:"error,omitempty"`
|
||
}
|
||
|
||
// TranscribeHandler 语音转文字+分镜分析
|
||
// 支持两种入参方式:
|
||
// 1. JSON body: {"video_urls":[...], "model":"medium", "language":"zh", "threshold":0.3}
|
||
// 2. 文件上传: files 参数(兼容单/多文件)
|
||
func (c *audio) TranscribeHandler(r *ghttp.Request) {
|
||
ctx := r.Context()
|
||
ctx = context.WithValue(ctx, "user", &beans.User{UserName: "admin"})
|
||
|
||
// 优先尝试 JSON body(URL 列表模式)
|
||
body := r.GetBody()
|
||
if len(body) > 0 && body[0] == '{' {
|
||
var req dto.TranscribeReq
|
||
if json.Unmarshal(body, &req) == nil && len(req.VideoURLs) > 0 {
|
||
// 填充默认值
|
||
if req.Model == "" {
|
||
req.Model = g.Cfg().MustGet(ctx, "whisper.model", "medium").String()
|
||
}
|
||
if req.Language == "" {
|
||
req.Language = g.Cfg().MustGet(ctx, "whisper.language", "zh").String()
|
||
}
|
||
if req.Threshold <= 0 {
|
||
req.Threshold = 0.3
|
||
}
|
||
|
||
res, svcErr := service.VideoTranscribe.TranscribeWithURLs(ctx, &req)
|
||
if svcErr != nil {
|
||
r.Response.WriteJson(g.Map{"code": 500, "message": svcErr.Error()})
|
||
return
|
||
}
|
||
r.Response.WriteJson(g.Map{"code": 200, "message": "success", "data": toSafeItems(res.Results)})
|
||
return
|
||
}
|
||
}
|
||
|
||
// 文件上传模式
|
||
savePaths, err := common.SaveUploadedFiles(r)
|
||
if err != nil || len(savePaths) == 0 {
|
||
r.Response.WriteJson(g.Map{"code": 400, "message": "请上传视频文件( multipart )或提供 video_urls( JSON )"})
|
||
return
|
||
}
|
||
|
||
results := service.VideoTranscribe.TranscribeUpload(ctx, savePaths,
|
||
r.Get("model", g.Cfg().MustGet(ctx, "whisper.model", "medium").String()).String(),
|
||
r.Get("language", g.Cfg().MustGet(ctx, "whisper.language", "zh").String()).String(),
|
||
r.Get("threshold", 0.3).Float64())
|
||
|
||
r.Response.WriteJson(g.Map{"code": 200, "message": "success", "data": toSafeItems(results)})
|
||
}
|
||
|
||
// toSafeItems 将结果转为安全的响应格式(移除 audioPath 等内部路径)
|
||
func toSafeItems(results []dto.TranscribeItem) []safeItem {
|
||
var items []safeItem
|
||
for _, item := range results {
|
||
si := safeItem{FileName: item.FileName, Error: item.Error}
|
||
if item.Result != nil {
|
||
if r, ok := item.Result.(*dto.TranscribeResult); ok {
|
||
si.Result = &safeResult{
|
||
Text: r.Text,
|
||
Model: r.Model,
|
||
Language: r.Language,
|
||
AudioSize: r.AudioSize,
|
||
AudioDuration: r.AudioDuration,
|
||
Scenes: r.Scenes,
|
||
}
|
||
}
|
||
}
|
||
items = append(items, si)
|
||
}
|
||
return items
|
||
}
|