2026-05-19 14:33:06 +08:00
|
|
|
|
package audio
|
|
|
|
|
|
|
2026-05-20 11:32:39 +08:00
|
|
|
|
import "github.com/gogf/gf/v2/frame/g"
|
|
|
|
|
|
|
2026-05-19 14:33:06 +08:00
|
|
|
|
// TranscribeReq 语音转文字请求(JSON body / URL 方式)
|
|
|
|
|
|
type TranscribeReq struct {
|
2026-05-20 11:32:39 +08:00
|
|
|
|
g.Meta `path:"/transcribe" method:"post" tags:"音频转写" summary:"语音转文字(异步)" dc:"创建异步语音转文字任务,返回taskId"`
|
|
|
|
|
|
VideoURLs []string `json:"video_urls" v:"required#视频URL列表不能为空" dc:"视频URL列表"`
|
|
|
|
|
|
Model string `json:"model" dc:"whisper模型(tiny/base/small/medium)" d:"medium"`
|
|
|
|
|
|
Language string `json:"language" dc:"语言(zh/en/ja)" d:"zh"`
|
|
|
|
|
|
Threshold float64 `json:"threshold" dc:"场景检测阈值(0.1-0.5)" d:"0.3"`
|
|
|
|
|
|
CallbackURL string `json:"callback_url" dc:"任务完成后的回调地址(可选),成功后POST结果到此URL"`
|
2026-05-19 14:33:06 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TranscribeRes 语音转文字响应
|
|
|
|
|
|
type TranscribeRes struct {
|
|
|
|
|
|
Results []TranscribeItem `json:"results" dc:"处理结果列表"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TranscribeItem 单视频处理结果
|
|
|
|
|
|
type TranscribeItem struct {
|
|
|
|
|
|
FileName string `json:"fileName" dc:"文件名"`
|
|
|
|
|
|
Result interface{} `json:"result,omitempty" dc:"识别结果"`
|
|
|
|
|
|
Error string `json:"error,omitempty" dc:"错误信息"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// TranscribeResult 语音识别结果详情
|
|
|
|
|
|
type TranscribeResult struct {
|
|
|
|
|
|
Text string `json:"text" dc:"识别文本"`
|
|
|
|
|
|
Model string `json:"model" dc:"使用的模型"`
|
|
|
|
|
|
Language string `json:"language" dc:"语言"`
|
|
|
|
|
|
AudioPath string `json:"audioPath" dc:"音频文件路径"`
|
|
|
|
|
|
AudioSize int64 `json:"audioSize" dc:"音频文件大小(字节)"`
|
|
|
|
|
|
AudioDuration string `json:"audioDuration" dc:"音频时长"`
|
|
|
|
|
|
Scenes *SceneSummaryDTO `json:"scenes,omitempty" dc:"分镜分析"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// SceneSummaryDTO 分镜分析摘要
|
|
|
|
|
|
type SceneSummaryDTO struct {
|
|
|
|
|
|
TotalScenes int `json:"totalScenes" dc:"场景总数"`
|
|
|
|
|
|
DurationStr string `json:"durationStr" dc:"总时长"`
|
|
|
|
|
|
AspectRatio string `json:"aspectRatio" dc:"画面比例"`
|
|
|
|
|
|
Orientation string `json:"orientation" dc:"横屏/竖屏"`
|
|
|
|
|
|
Pacing string `json:"pacing" dc:"剪辑节奏"`
|
|
|
|
|
|
ShotTypes map[string]int `json:"shotTypes" dc:"镜头类型分布"`
|
|
|
|
|
|
Scenes []SceneShotDTO `json:"scenes" dc:"分镜列表"`
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// SceneShotDTO 单镜头信息
|
|
|
|
|
|
type SceneShotDTO struct {
|
|
|
|
|
|
SceneIndex int `json:"sceneIndex" dc:"场景序号"`
|
|
|
|
|
|
StartTimeStr string `json:"startTimeStr" dc:"开始时间"`
|
|
|
|
|
|
EndTimeStr string `json:"endTimeStr" dc:"结束时间"`
|
|
|
|
|
|
DurationStr string `json:"durationStr" dc:"时长"`
|
|
|
|
|
|
ShotType string `json:"shotType" dc:"镜头类型"`
|
|
|
|
|
|
Composition string `json:"composition" dc:"构图"`
|
|
|
|
|
|
NarrativePos string `json:"narrativePos" dc:"叙事位置"`
|
|
|
|
|
|
Description string `json:"description" dc:"场景描述"`
|
|
|
|
|
|
}
|