107 lines
3.3 KiB
Go
107 lines
3.3 KiB
Go
|
|
package audio
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"context"
|
|||
|
|
"encoding/json"
|
|||
|
|
|
|||
|
|
common "media/controller/common"
|
|||
|
|
dto "media/model/dto/audio"
|
|||
|
|
service "media/service/asr"
|
|||
|
|
|
|||
|
|
"gitea.com/red-future/common/beans"
|
|||
|
|
"github.com/gogf/gf/v2/frame/g"
|
|||
|
|
"github.com/gogf/gf/v2/net/ghttp"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
type audio struct{}
|
|||
|
|
|
|||
|
|
var AudioExtract = new(audio)
|
|||
|
|
|
|||
|
|
// safeResult 对外输出的识别结果(隐藏内部路径)
|
|||
|
|
type safeResult struct {
|
|||
|
|
Text string `json:"text"`
|
|||
|
|
Model string `json:"model"`
|
|||
|
|
Language string `json:"language"`
|
|||
|
|
AudioSize int64 `json:"audioSize"`
|
|||
|
|
AudioDuration string `json:"audioDuration"`
|
|||
|
|
Scenes *dto.SceneSummaryDTO `json:"scenes,omitempty"`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// safeItem 对外输出的单视频结果
|
|||
|
|
type safeItem struct {
|
|||
|
|
FileName string `json:"fileName"`
|
|||
|
|
Result *safeResult `json:"result,omitempty"`
|
|||
|
|
Error string `json:"error,omitempty"`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// TranscribeHandler 语音转文字+分镜分析
|
|||
|
|
// 支持两种入参方式:
|
|||
|
|
// 1. JSON body: {"video_urls":[...], "model":"medium", "language":"zh", "threshold":0.3}
|
|||
|
|
// 2. 文件上传: files 参数(兼容单/多文件)
|
|||
|
|
func (c *audio) TranscribeHandler(r *ghttp.Request) {
|
|||
|
|
ctx := r.Context()
|
|||
|
|
ctx = context.WithValue(ctx, "user", &beans.User{UserName: "admin"})
|
|||
|
|
|
|||
|
|
// 优先尝试 JSON body(URL 列表模式)
|
|||
|
|
body := r.GetBody()
|
|||
|
|
if len(body) > 0 && body[0] == '{' {
|
|||
|
|
var req dto.TranscribeReq
|
|||
|
|
if json.Unmarshal(body, &req) == nil && len(req.VideoURLs) > 0 {
|
|||
|
|
// 填充默认值
|
|||
|
|
if req.Model == "" {
|
|||
|
|
req.Model = g.Cfg().MustGet(ctx, "whisper.model", "medium").String()
|
|||
|
|
}
|
|||
|
|
if req.Language == "" {
|
|||
|
|
req.Language = g.Cfg().MustGet(ctx, "whisper.language", "zh").String()
|
|||
|
|
}
|
|||
|
|
if req.Threshold <= 0 {
|
|||
|
|
req.Threshold = 0.3
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
res, svcErr := service.VideoTranscribe.TranscribeWithURLs(ctx, &req)
|
|||
|
|
if svcErr != nil {
|
|||
|
|
r.Response.WriteJson(g.Map{"code": 500, "message": svcErr.Error()})
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
r.Response.WriteJson(g.Map{"code": 200, "message": "success", "data": toSafeItems(res.Results)})
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 文件上传模式
|
|||
|
|
savePaths, err := common.SaveUploadedFiles(r)
|
|||
|
|
if err != nil || len(savePaths) == 0 {
|
|||
|
|
r.Response.WriteJson(g.Map{"code": 400, "message": "请上传视频文件( multipart )或提供 video_urls( JSON )"})
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
results := service.VideoTranscribe.TranscribeUpload(ctx, savePaths,
|
|||
|
|
r.Get("model", g.Cfg().MustGet(ctx, "whisper.model", "medium").String()).String(),
|
|||
|
|
r.Get("language", g.Cfg().MustGet(ctx, "whisper.language", "zh").String()).String(),
|
|||
|
|
r.Get("threshold", 0.3).Float64())
|
|||
|
|
|
|||
|
|
r.Response.WriteJson(g.Map{"code": 200, "message": "success", "data": toSafeItems(results)})
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// toSafeItems 将结果转为安全的响应格式(移除 audioPath 等内部路径)
|
|||
|
|
func toSafeItems(results []dto.TranscribeItem) []safeItem {
|
|||
|
|
var items []safeItem
|
|||
|
|
for _, item := range results {
|
|||
|
|
si := safeItem{FileName: item.FileName, Error: item.Error}
|
|||
|
|
if item.Result != nil {
|
|||
|
|
if r, ok := item.Result.(*dto.TranscribeResult); ok {
|
|||
|
|
si.Result = &safeResult{
|
|||
|
|
Text: r.Text,
|
|||
|
|
Model: r.Model,
|
|||
|
|
Language: r.Language,
|
|||
|
|
AudioSize: r.AudioSize,
|
|||
|
|
AudioDuration: r.AudioDuration,
|
|||
|
|
Scenes: r.Scenes,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
items = append(items, si)
|
|||
|
|
}
|
|||
|
|
return items
|
|||
|
|
}
|