代码初始化
This commit is contained in:
232
service/asr/transcribe_service.go
Normal file
232
service/asr/transcribe_service.go
Normal file
@@ -0,0 +1,232 @@
|
||||
package asr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
dto "media/model/dto/audio"
|
||||
serviceAudio "media/service/audio"
|
||||
serviceScene "media/service/scene"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// VideoTranscribeReq 视频语音识别请求
|
||||
type VideoTranscribeReq struct {
|
||||
VideoPath string
|
||||
Model string
|
||||
Language string
|
||||
KeepAudio bool
|
||||
}
|
||||
|
||||
// VideoTranscribeRes 视频语音识别响应
|
||||
type VideoTranscribeRes struct {
|
||||
Text string `json:"text"`
|
||||
Model string `json:"model"`
|
||||
Language string `json:"language"`
|
||||
AudioPath string `json:"audioPath"`
|
||||
AudioSize int64 `json:"audioSize"`
|
||||
AudioDuration string `json:"audioDuration"`
|
||||
}
|
||||
|
||||
type transcribeService struct{}
|
||||
|
||||
var VideoTranscribe = new(transcribeService)
|
||||
|
||||
// TranscribeWithURLs 从 URL 下载视频并转录
|
||||
func (s *transcribeService) TranscribeWithURLs(ctx context.Context, req *dto.TranscribeReq) (res *dto.TranscribeRes, err error) {
|
||||
if len(req.VideoURLs) == 0 {
|
||||
return nil, errors.New("video_urls 不能为空")
|
||||
}
|
||||
|
||||
tempDir := getTempDir(ctx)
|
||||
os.MkdirAll(tempDir, 0755)
|
||||
|
||||
var savePaths []string
|
||||
for _, videoURL := range req.VideoURLs {
|
||||
savePath, dlErr := downloadFromURL(ctx, videoURL, tempDir)
|
||||
if dlErr != nil {
|
||||
continue
|
||||
}
|
||||
savePaths = append(savePaths, savePath)
|
||||
}
|
||||
if len(savePaths) == 0 {
|
||||
return nil, errors.New("所有视频下载均失败")
|
||||
}
|
||||
|
||||
results := s.processVideos(ctx, savePaths, req.Model, req.Language, req.Threshold)
|
||||
res = &dto.TranscribeRes{Results: results}
|
||||
return
|
||||
}
|
||||
|
||||
// TranscribeUpload 从已保存的文件转录
|
||||
func (s *transcribeService) TranscribeUpload(ctx context.Context, savePaths []string, model, language string, threshold float64) []dto.TranscribeItem {
|
||||
return s.processVideos(ctx, savePaths, model, language, threshold)
|
||||
}
|
||||
|
||||
// processVideos 逐个处理视频
|
||||
func (s *transcribeService) processVideos(ctx context.Context, savePaths []string, model, language string, threshold float64) []dto.TranscribeItem {
|
||||
var results []dto.TranscribeItem
|
||||
|
||||
for _, savePath := range savePaths {
|
||||
fileName := filepath.Base(savePath)
|
||||
if idx := strings.Index(fileName, "_"); idx > 0 {
|
||||
fileName = fileName[idx+1:]
|
||||
}
|
||||
|
||||
// 场景分析
|
||||
var scenes *dto.SceneSummaryDTO
|
||||
sceneRes, sceneErr := serviceScene.SceneAnalyzer.Analyze(ctx, &serviceScene.SceneAnalyzeReq{
|
||||
VideoPaths: []string{savePath},
|
||||
Threshold: threshold,
|
||||
ExtractKeyframes: false,
|
||||
})
|
||||
if sceneErr == nil && len(sceneRes.Analyses) > 0 {
|
||||
scenes = toSceneDTO(&sceneRes.Analyses[0])
|
||||
}
|
||||
|
||||
// 语音转文字(内部删除视频文件)
|
||||
transRes, transErr := s.TranscribeVideo(ctx, &VideoTranscribeReq{
|
||||
VideoPath: savePath,
|
||||
Model: model,
|
||||
Language: language,
|
||||
})
|
||||
if transErr != nil {
|
||||
os.Remove(savePath)
|
||||
results = append(results, dto.TranscribeItem{FileName: fileName, Error: transErr.Error()})
|
||||
continue
|
||||
}
|
||||
|
||||
results = append(results, dto.TranscribeItem{
|
||||
FileName: fileName,
|
||||
Result: &dto.TranscribeResult{
|
||||
Text: transRes.Text,
|
||||
Model: transRes.Model,
|
||||
Language: transRes.Language,
|
||||
AudioPath: transRes.AudioPath,
|
||||
AudioSize: transRes.AudioSize,
|
||||
AudioDuration: transRes.AudioDuration,
|
||||
Scenes: scenes,
|
||||
},
|
||||
})
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
// TranscribeVideo 从视频提取音频并转为文字
|
||||
func (s *transcribeService) TranscribeVideo(ctx context.Context, req *VideoTranscribeReq) (res *VideoTranscribeRes, err error) {
|
||||
audioReq := &serviceAudio.ExtractAudioReq{VideoPath: req.VideoPath, Format: "mp3"}
|
||||
audioRes, err := serviceAudio.AudioExtract.Extract(ctx, audioReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("音频提取失败: %v", err)
|
||||
}
|
||||
|
||||
whisperRes, err := Whisper.Transcribe(ctx, &TranscribeReq{AudioPath: audioRes.AudioPath, Model: req.Model, Language: req.Language})
|
||||
if err != nil {
|
||||
os.Remove(audioRes.AudioPath)
|
||||
return nil, fmt.Errorf("语音识别失败: %v", err)
|
||||
}
|
||||
|
||||
os.Remove(req.VideoPath)
|
||||
if !req.KeepAudio {
|
||||
os.Remove(audioRes.AudioPath)
|
||||
baseName := strings.TrimSuffix(audioRes.AudioPath, filepath.Ext(audioRes.AudioPath))
|
||||
os.Remove(baseName + ".txt")
|
||||
os.Remove(baseName + "." + whisperRes.Model + ".txt")
|
||||
}
|
||||
|
||||
res = &VideoTranscribeRes{
|
||||
Text: whisperRes.Text,
|
||||
Model: whisperRes.Model,
|
||||
Language: whisperRes.Language,
|
||||
AudioPath: audioRes.AudioPath,
|
||||
AudioSize: audioRes.Size,
|
||||
AudioDuration: audioRes.Duration,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func downloadFromURL(ctx context.Context, rawURL, tempDir string) (string, error) {
|
||||
parsedURL, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
segments := strings.Split(parsedURL.Path, "/")
|
||||
fileName := segments[len(segments)-1]
|
||||
if fileName == "" {
|
||||
fileName = fmt.Sprintf("video_%d.mp4", time.Now().UnixMilli())
|
||||
}
|
||||
savePath := filepath.Join(tempDir, fmt.Sprintf("%d_%s", time.Now().UnixMilli(), fileName))
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Minute}
|
||||
resp, err := client.Get(rawURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
out, err := os.Create(savePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
os.Remove(savePath)
|
||||
}
|
||||
return savePath, err
|
||||
}
|
||||
|
||||
func getTempDir(ctx context.Context) string {
|
||||
tempDir := g.Cfg().MustGet(ctx, "ffmpeg.temp_dir", "resource/temp").String()
|
||||
if tempDir == "" {
|
||||
tempDir = "resource/temp"
|
||||
}
|
||||
if !filepath.IsAbs(tempDir) {
|
||||
absDir, _ := filepath.Abs(tempDir)
|
||||
tempDir = absDir
|
||||
}
|
||||
return tempDir
|
||||
}
|
||||
|
||||
// toSceneDTO 将场景服务的原始结果转为 DTO 格式
|
||||
func toSceneDTO(analysis *serviceScene.VideoSceneAnalysis) *dto.SceneSummaryDTO {
|
||||
if analysis == nil {
|
||||
return nil
|
||||
}
|
||||
shots := make([]dto.SceneShotDTO, 0, len(analysis.Scenes))
|
||||
for _, s := range analysis.Scenes {
|
||||
shots = append(shots, dto.SceneShotDTO{
|
||||
SceneIndex: s.SceneIndex,
|
||||
StartTimeStr: s.StartTimeStr,
|
||||
EndTimeStr: s.EndTimeStr,
|
||||
DurationStr: s.DurationStr,
|
||||
ShotType: s.ShotType,
|
||||
Composition: s.Composition,
|
||||
NarrativePos: s.NarrativePos,
|
||||
Description: s.Description,
|
||||
})
|
||||
}
|
||||
return &dto.SceneSummaryDTO{
|
||||
TotalScenes: analysis.TotalScenes,
|
||||
DurationStr: analysis.DurationStr,
|
||||
AspectRatio: analysis.AspectRatio,
|
||||
Orientation: analysis.Orientation,
|
||||
Pacing: analysis.Summary.Pacing,
|
||||
ShotTypes: analysis.Summary.ShotTypeDist,
|
||||
Scenes: shots,
|
||||
}
|
||||
}
|
||||
391
service/asr/whisper_service.go
Normal file
391
service/asr/whisper_service.go
Normal file
@@ -0,0 +1,391 @@
|
||||
package asr
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"media/service/setup"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// WhisperBackend 后端类型
|
||||
type WhisperBackend int
|
||||
|
||||
const (
|
||||
backendPython WhisperBackend = iota // python -m whisper
|
||||
backendCLI // openai-whisper CLI (whisper 命令)
|
||||
backendCpp // whisper.cpp (whisper-cpp)
|
||||
)
|
||||
|
||||
// WhisperService 语音识别服务
|
||||
type WhisperService struct{}
|
||||
|
||||
// Whisper 语音识别服务单例
|
||||
var Whisper = new(WhisperService)
|
||||
|
||||
// TranscribeReq 语音识别请求
|
||||
type TranscribeReq struct {
|
||||
AudioPath string // 音频文件路径
|
||||
Model string // whisper 模型: tiny/base/small/medium/large
|
||||
Language string // 语言代码,默认 zh(中文)
|
||||
}
|
||||
|
||||
// TranscribeRes 语音识别响应
|
||||
type TranscribeRes struct {
|
||||
Text string // 完整识别文本
|
||||
Segments []Segment
|
||||
Model string // 使用的模型
|
||||
Language string // 识别的语言
|
||||
OutputPath string // 输出的 txt 文件路径
|
||||
}
|
||||
|
||||
// Segment 识别片段(带时间戳)
|
||||
type Segment struct {
|
||||
Start float64 `json:"start"` // 开始时间(秒)
|
||||
End float64 `json:"end"` // 结束时间(秒)
|
||||
Text string `json:"text"` // 文本内容
|
||||
}
|
||||
|
||||
// Transcribe 对音频文件进行语音识别(自动检测后端,自动降级)
|
||||
func (s *WhisperService) Transcribe(ctx context.Context, req *TranscribeReq) (res *TranscribeRes, err error) {
|
||||
// 1. 校验音频文件
|
||||
if _, err = os.Stat(req.AudioPath); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("音频文件不存在: %s", req.AudioPath)
|
||||
}
|
||||
|
||||
// 2. 设置默认值
|
||||
model := req.Model
|
||||
if model == "" {
|
||||
model = g.Cfg().MustGet(ctx, "whisper.model", "small").String()
|
||||
}
|
||||
language := req.Language
|
||||
if language == "" {
|
||||
language = g.Cfg().MustGet(ctx, "whisper.language", "zh").String()
|
||||
}
|
||||
|
||||
// 3. 检测后端,C++ 版找不到模型文件时自动降级
|
||||
backend, whisperPath := s.detectBackend()
|
||||
if backend == backendCpp {
|
||||
modelPath := s.resolveCppModelPath(model)
|
||||
if modelPath == "" {
|
||||
g.Log().Warningf(ctx, "whisper.cpp 模型文件(%s)未找到,降级到 Python whisper", model)
|
||||
backend = backendPython
|
||||
} else {
|
||||
g.Log().Infof(ctx, "语音识别(whisper.cpp): audio=%s, model=%s", req.AudioPath, modelPath)
|
||||
return s.transcribeWithCpp(ctx, req, whisperPath, modelPath, language)
|
||||
}
|
||||
}
|
||||
|
||||
switch backend {
|
||||
case backendCLI:
|
||||
g.Log().Infof(ctx, "语音识别(CLI): audio=%s, model=%s, language=%s", req.AudioPath, model, language)
|
||||
return s.transcribeWithCLI(ctx, req, whisperPath, model, language)
|
||||
default:
|
||||
g.Log().Infof(ctx, "语音识别(python): audio=%s, model=%s, language=%s", req.AudioPath, model, language)
|
||||
return s.transcribeWithPython(ctx, req, model, language)
|
||||
}
|
||||
}
|
||||
|
||||
// transcribeWithCLI 使用 whisper CLI 命令
|
||||
func (s *WhisperService) transcribeWithCLI(ctx context.Context, req *TranscribeReq, whisperPath, model, language string) (res *TranscribeRes, err error) {
|
||||
outputDir := filepath.Dir(req.AudioPath)
|
||||
modelDir := g.Cfg().MustGet(ctx, "whisper.model_dir", "").String()
|
||||
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
|
||||
|
||||
args := []string{
|
||||
req.AudioPath,
|
||||
"--model", model,
|
||||
"--language", language,
|
||||
"--output_dir", outputDir,
|
||||
"--output_format", "txt",
|
||||
"--threads", fmt.Sprintf("%d", threads),
|
||||
}
|
||||
if modelDir != "" {
|
||||
args = append(args, "--model_dir", modelDir)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, whisperPath, args...)
|
||||
output, execErr := cmd.CombinedOutput()
|
||||
if execErr != nil {
|
||||
g.Log().Errorf(ctx, "whisper CLI 执行失败: %v\n%s", execErr, string(output))
|
||||
return nil, fmt.Errorf("语音识别失败: %v", execErr)
|
||||
}
|
||||
|
||||
return s.readTxtResult(outputDir, req.AudioPath, model)
|
||||
}
|
||||
|
||||
// transcribeWithPython 使用 python -m whisper
|
||||
func (s *WhisperService) transcribeWithPython(ctx context.Context, req *TranscribeReq, model, language string) (res *TranscribeRes, err error) {
|
||||
// 查找 python
|
||||
pythonPath, err := exec.LookPath("python3")
|
||||
if err != nil {
|
||||
pythonPath, err = exec.LookPath("python")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("未找到 python,请安装: pip3 install openai-whisper")
|
||||
}
|
||||
}
|
||||
|
||||
outputDir := filepath.Dir(req.AudioPath)
|
||||
modelDir := g.Cfg().MustGet(ctx, "whisper.model_dir", "").String()
|
||||
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
|
||||
|
||||
args := []string{
|
||||
"-m", "whisper",
|
||||
req.AudioPath,
|
||||
"--model", model,
|
||||
"--language", language,
|
||||
"--output_dir", outputDir,
|
||||
"--output_format", "txt",
|
||||
"--threads", fmt.Sprintf("%d", threads),
|
||||
}
|
||||
if modelDir != "" {
|
||||
args = append(args, "--model_dir", modelDir)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, pythonPath, args...)
|
||||
output, execErr := cmd.CombinedOutput()
|
||||
if execErr != nil {
|
||||
g.Log().Errorf(ctx, "whisper(python) 执行失败: %v\n%s", execErr, string(output))
|
||||
return nil, fmt.Errorf("语音识别失败: %v", execErr)
|
||||
}
|
||||
|
||||
return s.readTxtResult(outputDir, req.AudioPath, model)
|
||||
}
|
||||
|
||||
// readTxtResult 读取 whisper 输出的 txt 文件
|
||||
func (s *WhisperService) readTxtResult(outputDir, audioPath, model string) (res *TranscribeRes, err error) {
|
||||
baseName := strings.TrimSuffix(filepath.Base(audioPath), filepath.Ext(audioPath))
|
||||
txtPaths := []string{
|
||||
filepath.Join(outputDir, baseName+".txt"),
|
||||
filepath.Join(outputDir, baseName+"."+model+".txt"),
|
||||
}
|
||||
|
||||
var textBytes []byte
|
||||
var txtPath string
|
||||
for _, p := range txtPaths {
|
||||
if b, e := os.ReadFile(p); e == nil {
|
||||
textBytes = b
|
||||
txtPath = p
|
||||
break
|
||||
}
|
||||
}
|
||||
if textBytes == nil {
|
||||
return nil, fmt.Errorf("读取识别结果文件失败")
|
||||
}
|
||||
|
||||
res = &TranscribeRes{
|
||||
Text: cleanTranscript(string(textBytes)),
|
||||
Model: model,
|
||||
OutputPath: txtPath,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// cleanTranscript 清理识别结果:去换行、合并空格
|
||||
func cleanTranscript(text string) string {
|
||||
text = strings.ReplaceAll(text, "\r\n", " ")
|
||||
text = strings.ReplaceAll(text, "\n", " ")
|
||||
text = strings.ReplaceAll(text, "\r", " ")
|
||||
// 合并多个空格
|
||||
for strings.Contains(text, " ") {
|
||||
text = strings.ReplaceAll(text, " ", " ")
|
||||
}
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
|
||||
// detectBackend 检测可用的 whisper 后端,返回后端类型和可执行路径
|
||||
func (s *WhisperService) detectBackend() (WhisperBackend, string) {
|
||||
// 1. 优先检测 C++ 版 whisper.cpp(最快,但参数格式不同)
|
||||
for _, name := range []string{"whisper-cpp", "whisper-cli"} {
|
||||
if path, err := exec.LookPath(name); err == nil {
|
||||
return backendCpp, path
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 检查 setup 检测到的 C++ 路径
|
||||
if setup.DetectedWhisperPath != "" {
|
||||
base := filepath.Base(setup.DetectedWhisperPath)
|
||||
if base == "whisper-cpp" || base == "whisper-cli" {
|
||||
if _, err := os.Stat(setup.DetectedWhisperPath); err == nil {
|
||||
return backendCpp, setup.DetectedWhisperPath
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 检测 Python CLI(whisper 命令)
|
||||
if path, err := exec.LookPath("whisper"); err == nil {
|
||||
return backendCLI, path
|
||||
}
|
||||
|
||||
// 4. 检查 setup 检测到的 Python CLI 路径
|
||||
if setup.DetectedWhisperPath != "" {
|
||||
if _, err := os.Stat(setup.DetectedWhisperPath); err == nil {
|
||||
return backendCLI, setup.DetectedWhisperPath
|
||||
}
|
||||
}
|
||||
|
||||
// 5. 检查配置中的路径
|
||||
if p := g.Cfg().MustGet(context.Background(), "whisper.path", "").String(); p != "" {
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
return backendCLI, p
|
||||
}
|
||||
}
|
||||
|
||||
return backendPython, ""
|
||||
}
|
||||
|
||||
// resolveCppModelPath 查找或下载 whisper.cpp 模型文件
|
||||
func (s *WhisperService) resolveCppModelPath(model string) string {
|
||||
modelName := strings.TrimPrefix(model, "ggml-")
|
||||
modelName = strings.TrimSuffix(modelName, ".bin")
|
||||
|
||||
cppModelName := "ggml-" + modelName + ".bin"
|
||||
home, _ := os.UserHomeDir()
|
||||
|
||||
// 目标路径:~/.cache/whisper/ggml-{model}.bin
|
||||
targetDir := filepath.Join(home, ".cache", "whisper")
|
||||
targetPath := filepath.Join(targetDir, cppModelName)
|
||||
|
||||
// 1. 如果已存在,直接返回
|
||||
if _, err := os.Stat(targetPath); err == nil {
|
||||
return targetPath
|
||||
}
|
||||
|
||||
// 2. 检查其他常见位置
|
||||
altPaths := []string{
|
||||
cppModelName,
|
||||
filepath.Join(home, ".cache", "whisper", "ggml-"+modelName+"-q5_0.bin"),
|
||||
"/opt/homebrew/share/whisper-cpp/models/" + cppModelName,
|
||||
"/usr/local/share/whisper-cpp/models/" + cppModelName,
|
||||
}
|
||||
for _, p := range altPaths {
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
return p
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 自动下载
|
||||
modelSize := map[string]string{
|
||||
"tiny": "75MB",
|
||||
"base": "150MB",
|
||||
"small": "500MB",
|
||||
"medium": "1.5GB",
|
||||
}
|
||||
size, _ := modelSize[modelName]
|
||||
|
||||
// 下载源:先试 hf-mirror(国内可访问),失败再试官方
|
||||
modelPath := fmt.Sprintf("ggerganov/whisper.cpp/resolve/main/%s", cppModelName)
|
||||
urls := []string{
|
||||
fmt.Sprintf("https://hf-mirror.com/%s", modelPath),
|
||||
fmt.Sprintf("https://huggingface.co/%s", modelPath),
|
||||
}
|
||||
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 正在下载模型 %s (%s)...", cppModelName, size)
|
||||
|
||||
// 创建目录
|
||||
os.MkdirAll(targetDir, 0755)
|
||||
|
||||
// 下载文件(多个源,依次尝试)
|
||||
var lastErr error
|
||||
for _, url := range urls {
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 下载地址: %s", url)
|
||||
if err := s.downloadFile(url, targetPath, 5*time.Minute); err == nil {
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 模型下载完成: %s", targetPath)
|
||||
return targetPath
|
||||
} else {
|
||||
lastErr = err
|
||||
g.Log().Warningf(context.TODO(), "[whisper.cpp] 从 %s 下载失败: %v,尝试下一个源...", url, err)
|
||||
}
|
||||
}
|
||||
|
||||
g.Log().Errorf(context.TODO(), "[whisper.cpp] 所有下载源均失败: %v", lastErr)
|
||||
return ""
|
||||
}
|
||||
|
||||
// downloadFile 下载文件到指定路径(支持超时)
|
||||
func (s *WhisperService) downloadFile(url, destPath string, timeout time.Duration) error {
|
||||
tmpPath := destPath + ".tmp"
|
||||
out, err := os.Create(tmpPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("创建临时文件失败: %v", err)
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
client := &http.Client{Timeout: timeout}
|
||||
resp, err := client.Get(url)
|
||||
if err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
os.Remove(tmpPath)
|
||||
return fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
written, err := io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpPath, destPath); err != nil {
|
||||
return fmt.Errorf("文件重命名失败: %v", err)
|
||||
}
|
||||
|
||||
g.Log().Infof(context.TODO(), "[whisper.cpp] 下载完成: %d bytes", written)
|
||||
return nil
|
||||
}
|
||||
|
||||
// transcribeWithCpp 使用 whisper.cpp(C++ 版,参数格式不同)
|
||||
func (s *WhisperService) transcribeWithCpp(ctx context.Context, req *TranscribeReq, binaryPath, model, language string) (res *TranscribeRes, err error) {
|
||||
outputDir := filepath.Dir(req.AudioPath)
|
||||
baseName := strings.TrimSuffix(filepath.Base(req.AudioPath), filepath.Ext(req.AudioPath))
|
||||
outputPrefix := filepath.Join(outputDir, baseName)
|
||||
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
|
||||
|
||||
// whisper.cpp 参数:
|
||||
// -f input.mp3 输入文件
|
||||
// -l zh 语言
|
||||
// -t 2 线程数
|
||||
// -otxt 输出 txt
|
||||
// -of /path/prefix 输出文件前缀(自动加 .txt)
|
||||
args := []string{
|
||||
"-f", req.AudioPath,
|
||||
"-l", language,
|
||||
"-t", fmt.Sprintf("%d", threads),
|
||||
"-otxt",
|
||||
"-of", outputPrefix,
|
||||
"-m", model,
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, binaryPath, args...)
|
||||
output, execErr := cmd.CombinedOutput()
|
||||
if execErr != nil {
|
||||
g.Log().Errorf(ctx, "whisper.cpp 执行失败: %v\n%s", execErr, string(output))
|
||||
return nil, fmt.Errorf("语音识别失败: %v", execErr)
|
||||
}
|
||||
|
||||
// whisper.cpp 输出: {prefix}.txt
|
||||
txtPath := outputPrefix + ".txt"
|
||||
textBytes, readErr := os.ReadFile(txtPath)
|
||||
if readErr != nil {
|
||||
return nil, fmt.Errorf("读取识别结果文件失败: %v", readErr)
|
||||
}
|
||||
|
||||
res = &TranscribeRes{
|
||||
Text: cleanTranscript(string(textBytes)),
|
||||
Model: model,
|
||||
Language: language,
|
||||
OutputPath: txtPath,
|
||||
}
|
||||
return
|
||||
}
|
||||
185
service/audio/audio_extract_service.go
Normal file
185
service/audio/audio_extract_service.go
Normal file
@@ -0,0 +1,185 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// AudioExtractService 音频提取服务
|
||||
type AudioExtractService struct{}
|
||||
|
||||
// AudioExtract 音频提取服务单例
|
||||
var AudioExtract = new(AudioExtractService)
|
||||
|
||||
// ExtractAudioReq 提取音频请求
|
||||
type ExtractAudioReq struct {
|
||||
VideoPath string // 视频文件路径
|
||||
Format string // 输出音频格式,默认 mp3
|
||||
}
|
||||
|
||||
// ExtractAudioRes 提取音频响应
|
||||
type ExtractAudioRes struct {
|
||||
AudioPath string // 提取后的音频文件路径
|
||||
Duration string // 音频时长
|
||||
Size int64 // 音频文件大小(bytes)
|
||||
}
|
||||
|
||||
// Extract 从视频中提取音频
|
||||
func (s *AudioExtractService) Extract(ctx context.Context, req *ExtractAudioReq) (res *ExtractAudioRes, err error) {
|
||||
// 1. 校验视频文件存在
|
||||
if _, err = os.Stat(req.VideoPath); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("视频文件不存在: %s", req.VideoPath)
|
||||
}
|
||||
|
||||
// 2. 校验 ffmpeg 是否可用
|
||||
ffmpegPath, err := s.getFFmpegPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 3. 确定输出格式
|
||||
format := req.Format
|
||||
if format == "" {
|
||||
format = "mp3"
|
||||
}
|
||||
format = strings.TrimLeft(format, ".")
|
||||
|
||||
// 4. 生成输出文件路径
|
||||
outputDir := filepath.Dir(req.VideoPath)
|
||||
baseName := strings.TrimSuffix(filepath.Base(req.VideoPath), filepath.Ext(req.VideoPath))
|
||||
timestamp := time.Now().UnixMilli()
|
||||
outputName := fmt.Sprintf("%s_audio_%d.%s", baseName, timestamp, format)
|
||||
outputPath := filepath.Join(outputDir, outputName)
|
||||
|
||||
g.Log().Infof(ctx, "开始提取音频: video=%s, output=%s", req.VideoPath, outputPath)
|
||||
|
||||
// 5. 构建 ffmpeg 命令
|
||||
// 提取音频并转换为指定格式
|
||||
args := []string{
|
||||
"-i", req.VideoPath,
|
||||
"-vn", // 去掉视频流
|
||||
"-acodec", "libmp3lame", // 使用 mp3 编码器(mp3格式)
|
||||
"-ab", "192k", // 音频比特率
|
||||
"-ar", "44100", // 采样率
|
||||
"-ac", "2", // 双声道
|
||||
"-y", // 覆盖输出文件
|
||||
outputPath,
|
||||
}
|
||||
|
||||
// 如果输出不是 mp3,调整编码器
|
||||
switch format {
|
||||
case "aac":
|
||||
args[4] = "aac"
|
||||
case "wav":
|
||||
args[4] = "pcm_s16le"
|
||||
args[5] = "-vn"
|
||||
args = args[:8] // wav 不需要指定比特率等参数
|
||||
args = append(args, outputPath)
|
||||
case "ogg":
|
||||
args[4] = "libvorbis"
|
||||
case "flac":
|
||||
args[4] = "flac"
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
|
||||
// 捕获输出用于调试
|
||||
output, execErr := cmd.CombinedOutput()
|
||||
if execErr != nil {
|
||||
g.Log().Errorf(ctx, "ffmpeg 执行失败: %v, output: %s", execErr, string(output))
|
||||
return nil, fmt.Errorf("音频提取失败: %v", execErr)
|
||||
}
|
||||
|
||||
// 6. 验证输出文件
|
||||
stat, statErr := os.Stat(outputPath)
|
||||
if statErr != nil {
|
||||
return nil, fmt.Errorf("音频文件生成失败: %v", statErr)
|
||||
}
|
||||
|
||||
// 7. 获取音频时长(通过 ffprobe)
|
||||
duration, _ := s.getAudioDuration(ctx, ffmpegPath, outputPath)
|
||||
|
||||
g.Log().Infof(ctx, "音频提取成功: path=%s, size=%d, duration=%s", outputPath, stat.Size(), duration)
|
||||
|
||||
res = &ExtractAudioRes{
|
||||
AudioPath: outputPath,
|
||||
Duration: duration,
|
||||
Size: stat.Size(),
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// getFFmpegPath 获取 ffmpeg 可执行路径
|
||||
func (s *AudioExtractService) getFFmpegPath() (string, error) {
|
||||
// 1. 优先从配置读取
|
||||
ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String()
|
||||
if ffmpegPath != "" {
|
||||
if _, err := os.Stat(ffmpegPath); err == nil {
|
||||
return ffmpegPath, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 从 PATH 中查找
|
||||
path, err := exec.LookPath("ffmpeg")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("未找到 ffmpeg,请确保已安装 ffmpeg 或在配置中指定路径")
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
// getAudioDuration 获取音频时长
|
||||
func (s *AudioExtractService) getAudioDuration(ctx context.Context, ffmpegPath string, audioPath string) (string, error) {
|
||||
// 使用 ffprobe 获取时长
|
||||
// 先尝试查找 ffprobe
|
||||
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
|
||||
if _, err := os.Stat(ffprobePath); os.IsNotExist(err) {
|
||||
ffprobePath = "ffprobe"
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffprobePath,
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
audioPath,
|
||||
)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
durationStr := strings.TrimSpace(string(output))
|
||||
// 转换为人类可读格式: 秒 -> HH:MM:SS
|
||||
var seconds float64
|
||||
fmt.Sscanf(durationStr, "%f", &seconds)
|
||||
|
||||
hours := int(seconds) / 3600
|
||||
minutes := (int(seconds) % 3600) / 60
|
||||
secs := int(seconds) % 60
|
||||
|
||||
return fmt.Sprintf("%02d:%02d:%02d", hours, minutes, secs), nil
|
||||
}
|
||||
|
||||
// ExtractAndCleanup 提取音频并清理临时视频文件
|
||||
func (s *AudioExtractService) ExtractAndCleanup(ctx context.Context, req *ExtractAudioReq) (res *ExtractAudioRes, err error) {
|
||||
res, err = s.Extract(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 尝试删除原始视频文件
|
||||
if req.VideoPath != "" {
|
||||
if removeErr := os.Remove(req.VideoPath); removeErr != nil {
|
||||
g.Log().Warningf(ctx, "删除临时视频文件失败: %v", removeErr)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
657
service/scene/scene_service.go
Normal file
657
service/scene/scene_service.go
Normal file
@@ -0,0 +1,657 @@
|
||||
package scene
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// SceneAnalyzerService 场景分析服务
|
||||
type SceneAnalyzerService struct{}
|
||||
|
||||
// SceneAnalyzer 场景分析服务单例
|
||||
var SceneAnalyzer = new(SceneAnalyzerService)
|
||||
|
||||
// KeyframeInfo 关键帧信息
|
||||
type KeyframeInfo struct {
|
||||
Path string `json:"path"` // 关键帧图片路径
|
||||
TimeStr string `json:"timeStr"` // 时间点
|
||||
Width int `json:"width"` // 图片宽度
|
||||
Height int `json:"height"` // 图片高度
|
||||
}
|
||||
|
||||
// SceneInfo 单个场景信息
|
||||
type SceneInfo struct {
|
||||
SceneIndex int `json:"sceneIndex"` // 场景序号
|
||||
StartTime float64 `json:"startTime"` // 开始时间(秒,精确到3位小数)
|
||||
EndTime float64 `json:"endTime"` // 结束时间(秒)
|
||||
Duration float64 `json:"duration"` // 时长(秒)
|
||||
StartTimeStr string `json:"startTimeStr"` // HH:MM:SS.mmm
|
||||
EndTimeStr string `json:"endTimeStr"`
|
||||
DurationStr string `json:"durationStr"`
|
||||
ShotType string `json:"shotType"` // 镜头类型
|
||||
MotionLevel string `json:"motionLevel"` // 运动程度
|
||||
Composition string `json:"composition"` // 构图类型
|
||||
NarrativePos string `json:"narrativePos"` // 叙事位置
|
||||
Keyframe *KeyframeInfo `json:"keyframe,omitempty"` // 关键帧(如有提取)
|
||||
Description string `json:"description"` // 场景描述(供 AI 使用)
|
||||
}
|
||||
|
||||
// VideoSceneAnalysis 单视频场景分析结果
|
||||
type VideoSceneAnalysis struct {
|
||||
FileName string `json:"fileName"`
|
||||
FilePath string `json:"filePath"`
|
||||
Duration float64 `json:"duration"`
|
||||
DurationStr string `json:"durationStr"`
|
||||
FrameRate float64 `json:"frameRate"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
AspectRatio string `json:"aspectRatio"` // 画面比例
|
||||
Orientation string `json:"orientation"` // 横屏/竖屏
|
||||
TotalScenes int `json:"totalScenes"`
|
||||
Scenes []SceneInfo `json:"scenes"`
|
||||
DetectParams DetectParams `json:"detectParams"`
|
||||
Summary SceneSummary `json:"summary"` // 场景总览
|
||||
}
|
||||
|
||||
// SceneSummary 场景总览
|
||||
type SceneSummary struct {
|
||||
AvgShotDuration float64 `json:"avgShotDuration"` // 平均镜头时长
|
||||
MinShotDuration float64 `json:"minShotDuration"`
|
||||
MaxShotDuration float64 `json:"maxShotDuration"`
|
||||
ShotTypeDist map[string]int `json:"shotTypeDist"` // 镜头类型分布
|
||||
MotionDist map[string]int `json:"motionDist"` // 运动程度分布
|
||||
CompositionDist map[string]int `json:"compositionDist"` // 构图分布
|
||||
Pacing string `json:"pacing"` // 剪辑节奏
|
||||
KeyframesDir string `json:"keyframesDir,omitempty"` // 关键帧目录
|
||||
}
|
||||
|
||||
// DetectParams 检测参数
|
||||
type DetectParams struct {
|
||||
Threshold float64 `json:"threshold"`
|
||||
Method string `json:"method"`
|
||||
ExtractKeyframes bool `json:"extractKeyframes"`
|
||||
}
|
||||
|
||||
// SceneAnalyzeReq 场景分析请求
|
||||
type SceneAnalyzeReq struct {
|
||||
VideoPaths []string // 视频文件路径列表
|
||||
Threshold float64 // 场景检测阈值 0.1-0.5,默认 0.3
|
||||
ExtractKeyframes bool // 是否提取关键帧图片
|
||||
}
|
||||
|
||||
// SceneAnalyzeRes 场景分析响应
|
||||
type SceneAnalyzeRes struct {
|
||||
Analyses []VideoSceneAnalysis `json:"analyses"`
|
||||
}
|
||||
|
||||
var (
|
||||
ptsTimeRegex = regexp.MustCompile(`pts_time:([\d.]+)`)
|
||||
)
|
||||
|
||||
// Analyze 分析多个视频的场景
|
||||
func (s *SceneAnalyzerService) Analyze(ctx context.Context, req *SceneAnalyzeReq) (res *SceneAnalyzeRes, err error) {
|
||||
threshold := req.Threshold
|
||||
if threshold <= 0 || threshold > 1 {
|
||||
threshold = 0.3
|
||||
}
|
||||
|
||||
var (
|
||||
mu sync.Mutex
|
||||
analyses []VideoSceneAnalysis
|
||||
wg sync.WaitGroup
|
||||
errCh = make(chan error, len(req.VideoPaths))
|
||||
)
|
||||
|
||||
for _, videoPath := range req.VideoPaths {
|
||||
wg.Add(1)
|
||||
go func(vp string) {
|
||||
defer wg.Done()
|
||||
analysis, aErr := s.analyzeSingle(ctx, vp, threshold, req.ExtractKeyframes)
|
||||
if aErr != nil {
|
||||
errCh <- fmt.Errorf("分析失败 [%s]: %v", filepath.Base(vp), aErr)
|
||||
return
|
||||
}
|
||||
mu.Lock()
|
||||
analyses = append(analyses, *analysis)
|
||||
mu.Unlock()
|
||||
}(videoPath)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
var errs []string
|
||||
for e := range errCh {
|
||||
errs = append(errs, e.Error())
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
g.Log().Errorf(ctx, "部分视频分析失败: %s", strings.Join(errs, "; "))
|
||||
}
|
||||
if len(analyses) == 0 {
|
||||
return nil, fmt.Errorf("所有视频分析均失败: %s", strings.Join(errs, "; "))
|
||||
}
|
||||
|
||||
res = &SceneAnalyzeRes{Analyses: analyses}
|
||||
return
|
||||
}
|
||||
|
||||
// analyzeSingle 分析单个视频
|
||||
func (s *SceneAnalyzerService) analyzeSingle(ctx context.Context, videoPath string, threshold float64, extractKeyframes bool) (*VideoSceneAnalysis, error) {
|
||||
ffmpegPath, err := s.getFFmpegPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 1. 视频元数据
|
||||
duration, frameRate, width, height, err := s.getVideoMeta(ctx, ffmpegPath, videoPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("获取视频元数据失败: %v", err)
|
||||
}
|
||||
|
||||
// 2. 场景检测
|
||||
sceneChanges, err := s.detectScenes(ctx, ffmpegPath, videoPath, threshold)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("场景检测失败: %v", err)
|
||||
}
|
||||
|
||||
// 3. 构建场景列表 + 分析
|
||||
rawScenes := s.buildScenes(sceneChanges, duration)
|
||||
totalDuration := duration
|
||||
|
||||
// 4. 提取关键帧(如果需要)
|
||||
keyframesDir := ""
|
||||
if extractKeyframes {
|
||||
keyframesDir = filepath.Join(filepath.Dir(videoPath), "keyframes_"+filepath.Base(videoPath))
|
||||
os.MkdirAll(keyframesDir, 0755)
|
||||
}
|
||||
|
||||
// 构建带分析信息的场景
|
||||
aspectRatio := fmt.Sprintf("%d:%d", width/gcd(width, height), height/gcd(width, height))
|
||||
orientation := "横屏"
|
||||
if height > width {
|
||||
orientation = "竖屏"
|
||||
}
|
||||
|
||||
fileName := filepath.Base(videoPath)
|
||||
if idx := strings.Index(fileName, "_"); idx > 0 {
|
||||
fileName = fileName[idx+1:]
|
||||
}
|
||||
|
||||
// 生成场景分析
|
||||
totalScenes := len(rawScenes)
|
||||
scenes := make([]SceneInfo, totalScenes)
|
||||
|
||||
shotDist := make(map[string]int)
|
||||
motionDist := make(map[string]int)
|
||||
compDist := make(map[string]int)
|
||||
var durTotal float64
|
||||
|
||||
for i, rs := range rawScenes {
|
||||
scene := SceneInfo{
|
||||
SceneIndex: rs.SceneIndex,
|
||||
StartTime: round3(rs.StartTime),
|
||||
EndTime: round3(rs.EndTime),
|
||||
Duration: round3(rs.Duration),
|
||||
StartTimeStr: rs.StartTimeStr,
|
||||
EndTimeStr: rs.EndTimeStr,
|
||||
DurationStr: rs.DurationStr,
|
||||
}
|
||||
|
||||
// 镜头类型
|
||||
scene.ShotType = classifyShotType(rs.Duration)
|
||||
shotDist[scene.ShotType]++
|
||||
|
||||
// 运动程度
|
||||
scene.MotionLevel = classifyMotionLevel(rs.Duration, totalDuration)
|
||||
motionDist[scene.MotionLevel]++
|
||||
|
||||
// 构图
|
||||
scene.Composition = classifyComposition(rs.Duration, width, height)
|
||||
compDist[scene.Composition]++
|
||||
|
||||
// 叙事位置
|
||||
ratio := rs.StartTime / totalDuration
|
||||
switch {
|
||||
case ratio < 0.15:
|
||||
scene.NarrativePos = "开头引入"
|
||||
case ratio < 0.35:
|
||||
scene.NarrativePos = "前段发展"
|
||||
case ratio < 0.65:
|
||||
scene.NarrativePos = "中段高潮"
|
||||
case ratio < 0.85:
|
||||
scene.NarrativePos = "后段收束"
|
||||
default:
|
||||
scene.NarrativePos = "结尾总结"
|
||||
}
|
||||
|
||||
// 关键帧
|
||||
if extractKeyframes && keyframesDir != "" {
|
||||
midTime := (rs.StartTime + rs.EndTime) / 2
|
||||
kfPath := filepath.Join(keyframesDir, fmt.Sprintf("scene_%03d.jpg", rs.SceneIndex))
|
||||
if kfErr := s.extractKeyframe(ctx, ffmpegPath, videoPath, midTime, kfPath); kfErr == nil {
|
||||
scene.Keyframe = &KeyframeInfo{
|
||||
Path: kfPath,
|
||||
TimeStr: formatTime(midTime),
|
||||
Width: width,
|
||||
Height: height,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AI 描述
|
||||
scene.Description = buildSceneDescription(scene)
|
||||
|
||||
durTotal += rs.Duration
|
||||
scenes[i] = scene
|
||||
}
|
||||
|
||||
analysis := &VideoSceneAnalysis{
|
||||
FileName: fileName,
|
||||
FilePath: videoPath,
|
||||
Duration: round3(totalDuration),
|
||||
DurationStr: formatTime(totalDuration),
|
||||
FrameRate: round3(frameRate),
|
||||
Width: width,
|
||||
Height: height,
|
||||
AspectRatio: aspectRatio,
|
||||
Orientation: orientation,
|
||||
TotalScenes: totalScenes,
|
||||
Scenes: scenes,
|
||||
DetectParams: DetectParams{
|
||||
Threshold: threshold,
|
||||
Method: "ffmpeg scene filter",
|
||||
ExtractKeyframes: extractKeyframes,
|
||||
},
|
||||
Summary: s.buildSummary(scenes, shotDist, motionDist, compDist, keyframesDir),
|
||||
}
|
||||
|
||||
return analysis, nil
|
||||
}
|
||||
|
||||
// buildSummary 构建场景总览
|
||||
func (s *SceneAnalyzerService) buildSummary(scenes []SceneInfo, shotDist, motionDist, compDist map[string]int, kfDir string) SceneSummary {
|
||||
if len(scenes) == 0 {
|
||||
return SceneSummary{}
|
||||
}
|
||||
var minD, maxD, sumD float64
|
||||
minD = math.MaxFloat64
|
||||
for _, sc := range scenes {
|
||||
sumD += sc.Duration
|
||||
if sc.Duration < minD {
|
||||
minD = sc.Duration
|
||||
}
|
||||
if sc.Duration > maxD {
|
||||
maxD = sc.Duration
|
||||
}
|
||||
}
|
||||
avgD := sumD / float64(len(scenes))
|
||||
|
||||
pacing := "平稳"
|
||||
if avgD < 2 {
|
||||
pacing = "快节奏(快速剪辑)"
|
||||
} else if avgD < 4 {
|
||||
pacing = "适中节奏"
|
||||
} else if avgD < 8 {
|
||||
pacing = "舒缓节奏"
|
||||
} else {
|
||||
pacing = "慢节奏(长镜头为主)"
|
||||
}
|
||||
|
||||
sm := SceneSummary{
|
||||
AvgShotDuration: round3(avgD),
|
||||
MinShotDuration: round3(minD),
|
||||
MaxShotDuration: round3(maxD),
|
||||
ShotTypeDist: shotDist,
|
||||
MotionDist: motionDist,
|
||||
CompositionDist: compDist,
|
||||
Pacing: pacing,
|
||||
}
|
||||
if kfDir != "" {
|
||||
sm.KeyframesDir = kfDir
|
||||
}
|
||||
return sm
|
||||
}
|
||||
|
||||
// getVideoMeta 获取视频元数据
|
||||
func (s *SceneAnalyzerService) getVideoMeta(ctx context.Context, ffmpegPath, videoPath string) (duration, frameRate float64, width, height int, err error) {
|
||||
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
|
||||
if _, statErr := os.Stat(ffprobePath); os.IsNotExist(statErr) {
|
||||
ffprobePath = "ffprobe"
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffprobePath,
|
||||
"-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
videoPath,
|
||||
)
|
||||
|
||||
output, execErr := cmd.Output()
|
||||
if execErr != nil {
|
||||
err = fmt.Errorf("ffprobe 执行失败: %v", execErr)
|
||||
return
|
||||
}
|
||||
|
||||
text := string(output)
|
||||
duration = parseJSONFloat(text, `"duration":`)
|
||||
frameRate = parseFrameRate(text)
|
||||
width = parseJSONInt(text, `"width":`)
|
||||
height = parseJSONInt(text, `"height":`)
|
||||
return
|
||||
}
|
||||
|
||||
// detectScenes 通过 ffmpeg scene filter 检测场景变化
|
||||
func (s *SceneAnalyzerService) detectScenes(ctx context.Context, ffmpegPath, videoPath string, threshold float64) ([]float64, error) {
|
||||
thresholdStr := strconv.FormatFloat(threshold, 'f', 1, 64)
|
||||
|
||||
args := []string{
|
||||
"-i", videoPath,
|
||||
"-filter:v", fmt.Sprintf("select='gt(scene,%s)',showinfo", thresholdStr),
|
||||
"-f", "null",
|
||||
"-",
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
output, _ := cmd.CombinedOutput()
|
||||
|
||||
var timestamps []float64
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(output)))
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
matches := ptsTimeRegex.FindStringSubmatch(line)
|
||||
if len(matches) >= 2 {
|
||||
ts, parseErr := strconv.ParseFloat(matches[1], 64)
|
||||
if parseErr == nil && ts > 0 {
|
||||
timestamps = append(timestamps, ts)
|
||||
}
|
||||
}
|
||||
}
|
||||
return timestamps, nil
|
||||
}
|
||||
|
||||
// extractKeyframe 提取指定时间点的关键帧
|
||||
func (s *SceneAnalyzerService) extractKeyframe(ctx context.Context, ffmpegPath, videoPath string, timeSec float64, outputPath string) error {
|
||||
timeStr := strconv.FormatFloat(timeSec, 'f', 3, 64)
|
||||
|
||||
args := []string{
|
||||
"-ss", timeStr,
|
||||
"-i", videoPath,
|
||||
"-vframes", "1",
|
||||
"-q:v", "3",
|
||||
"-y",
|
||||
outputPath,
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// buildScenes 根据场景变化时间戳构建场景列表
|
||||
func (s *SceneAnalyzerService) buildScenes(sceneChanges []float64, totalDuration float64) []SceneInfo {
|
||||
var scenes []SceneInfo
|
||||
|
||||
if len(sceneChanges) == 0 {
|
||||
scenes = append(scenes, SceneInfo{
|
||||
SceneIndex: 1,
|
||||
StartTime: 0,
|
||||
EndTime: totalDuration,
|
||||
Duration: totalDuration,
|
||||
StartTimeStr: formatTime(0),
|
||||
EndTimeStr: formatTime(totalDuration),
|
||||
DurationStr: formatTime(totalDuration),
|
||||
})
|
||||
return scenes
|
||||
}
|
||||
|
||||
startTime := 0.0
|
||||
for i, ts := range sceneChanges {
|
||||
if ts <= startTime || ts > totalDuration {
|
||||
continue
|
||||
}
|
||||
scenes = append(scenes, SceneInfo{
|
||||
SceneIndex: i + 1,
|
||||
StartTime: startTime,
|
||||
EndTime: ts,
|
||||
Duration: ts - startTime,
|
||||
StartTimeStr: formatTime(startTime),
|
||||
EndTimeStr: formatTime(ts),
|
||||
DurationStr: formatTime(ts - startTime),
|
||||
})
|
||||
startTime = ts
|
||||
}
|
||||
|
||||
if startTime < totalDuration {
|
||||
scenes = append(scenes, SceneInfo{
|
||||
SceneIndex: len(scenes) + 1,
|
||||
StartTime: startTime,
|
||||
EndTime: totalDuration,
|
||||
Duration: totalDuration - startTime,
|
||||
StartTimeStr: formatTime(startTime),
|
||||
EndTimeStr: formatTime(totalDuration),
|
||||
DurationStr: formatTime(totalDuration - startTime),
|
||||
})
|
||||
}
|
||||
|
||||
return scenes
|
||||
}
|
||||
|
||||
// ---------- 镜头分类逻辑 ----------
|
||||
|
||||
// classifyShotType 根据时长判断镜头类型
|
||||
func classifyShotType(duration float64) string {
|
||||
switch {
|
||||
case duration < 0.8:
|
||||
return "极速闪切"
|
||||
case duration < 1.5:
|
||||
return "快速切换"
|
||||
case duration < 2.5:
|
||||
return "短镜头"
|
||||
case duration < 4:
|
||||
return "标准镜头"
|
||||
case duration < 8:
|
||||
return "中长镜头"
|
||||
case duration < 15:
|
||||
return "长镜头"
|
||||
default:
|
||||
return "超长镜头"
|
||||
}
|
||||
}
|
||||
|
||||
// classifyMotionLevel 基于时长和相对比例推断运动程度
|
||||
func classifyMotionLevel(duration, totalDuration float64) string {
|
||||
switch {
|
||||
case duration < 1.0:
|
||||
return "高动态(快速切换)"
|
||||
case duration < 2.0:
|
||||
return "中高动态"
|
||||
case duration < 4.0:
|
||||
return "中等动态"
|
||||
case duration < 8.0:
|
||||
return "低动态(平稳)"
|
||||
default:
|
||||
return "静态/固定机位"
|
||||
}
|
||||
}
|
||||
|
||||
// classifyComposition 基于时长和画面比例推断构图类型
|
||||
func classifyComposition(duration float64, width, height int) string {
|
||||
isVertical := height > width
|
||||
|
||||
switch {
|
||||
case duration < 1.2:
|
||||
if isVertical {
|
||||
return "竖屏特写/细节"
|
||||
}
|
||||
return "特写/细节"
|
||||
case duration < 2.5:
|
||||
if isVertical {
|
||||
return "竖屏近景"
|
||||
}
|
||||
return "近景/中近景"
|
||||
case duration < 5:
|
||||
if isVertical {
|
||||
return "竖屏中景"
|
||||
}
|
||||
return "中景/半身"
|
||||
case duration < 10:
|
||||
if isVertical {
|
||||
return "竖屏全景"
|
||||
}
|
||||
return "全景/环境"
|
||||
default:
|
||||
if isVertical {
|
||||
return "竖屏远景/固定机位"
|
||||
}
|
||||
return "远景/广角"
|
||||
}
|
||||
}
|
||||
|
||||
// buildSceneDescription 生成可读的场景描述(供 AI 使用)
|
||||
func buildSceneDescription(scene SceneInfo) string {
|
||||
return fmt.Sprintf(
|
||||
"场景%d:%s~%s,时长%s,%s,%s,%s,%s",
|
||||
scene.SceneIndex,
|
||||
scene.StartTimeStr, scene.EndTimeStr,
|
||||
scene.DurationStr,
|
||||
scene.ShotType,
|
||||
scene.Composition,
|
||||
scene.MotionLevel,
|
||||
scene.NarrativePos,
|
||||
)
|
||||
}
|
||||
|
||||
// ---------- 工具函数 ----------
|
||||
|
||||
func round3(v float64) float64 {
|
||||
return math.Round(v*1000) / 1000
|
||||
}
|
||||
|
||||
func gcd(a, b int) int {
|
||||
for b != 0 {
|
||||
a, b = b, a%b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func getFFmpegPath() (string, error) {
|
||||
ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String()
|
||||
if ffmpegPath != "" {
|
||||
if _, err := os.Stat(ffmpegPath); err == nil {
|
||||
return ffmpegPath, nil
|
||||
}
|
||||
}
|
||||
path, err := exec.LookPath("ffmpeg")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("未找到 ffmpeg")
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func formatTime(seconds float64) string {
|
||||
h := int(seconds) / 3600
|
||||
m := (int(seconds) % 3600) / 60
|
||||
s := int(seconds) % 60
|
||||
ms := int(math.Round((seconds - float64(int(seconds))) * 1000))
|
||||
return fmt.Sprintf("%02d:%02d:%02d.%03d", h, m, s, ms)
|
||||
}
|
||||
|
||||
func parseJSONFloat(text, key string) float64 {
|
||||
idx := strings.Index(text, key)
|
||||
if idx < 0 {
|
||||
return 0
|
||||
}
|
||||
start := idx + len(key)
|
||||
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
|
||||
start++
|
||||
}
|
||||
end := start
|
||||
for end < len(text) && (isDigit(text[end]) || text[end] == '.') {
|
||||
end++
|
||||
}
|
||||
if start < end {
|
||||
val, _ := strconv.ParseFloat(text[start:end], 64)
|
||||
return val
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func parseJSONInt(text, key string) int {
|
||||
idx := strings.Index(text, key)
|
||||
if idx < 0 {
|
||||
return 0
|
||||
}
|
||||
start := idx + len(key)
|
||||
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
|
||||
start++
|
||||
}
|
||||
end := start
|
||||
for end < len(text) && isDigit(text[end]) {
|
||||
end++
|
||||
}
|
||||
if start < end {
|
||||
val, _ := strconv.Atoi(text[start:end])
|
||||
return val
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func parseFrameRate(text string) float64 {
|
||||
for _, key := range []string{`"r_frame_rate":`, `"avg_frame_rate":`} {
|
||||
idx := strings.Index(text, key)
|
||||
if idx < 0 {
|
||||
continue
|
||||
}
|
||||
start := idx + len(key)
|
||||
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
|
||||
start++
|
||||
}
|
||||
end := start
|
||||
for end < len(text) && text[end] != '"' && text[end] != ',' && text[end] != '}' && text[end] != ' ' {
|
||||
end++
|
||||
}
|
||||
valStr := text[start:end]
|
||||
if strings.Contains(valStr, "/") {
|
||||
parts := strings.Split(valStr, "/")
|
||||
if len(parts) == 2 {
|
||||
num, _ := strconv.ParseFloat(parts[0], 64)
|
||||
den, _ := strconv.ParseFloat(parts[1], 64)
|
||||
if den > 0 {
|
||||
return num / den
|
||||
}
|
||||
}
|
||||
}
|
||||
val, _ := strconv.ParseFloat(valStr, 64)
|
||||
if val > 0 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func isDigit(b byte) bool {
|
||||
return b >= '0' && b <= '9'
|
||||
}
|
||||
|
||||
// Cleanup 清理视频和关键帧文件
|
||||
func Cleanup(paths []string) {
|
||||
for _, p := range paths {
|
||||
os.RemoveAll(p)
|
||||
}
|
||||
}
|
||||
|
||||
// getFFmpegPath on SceneAnalyzerService
|
||||
func (s *SceneAnalyzerService) getFFmpegPath() (string, error) {
|
||||
return getFFmpegPath()
|
||||
}
|
||||
394
service/setup/setup_service.go
Normal file
394
service/setup/setup_service.go
Normal file
@@ -0,0 +1,394 @@
|
||||
package setup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
var (
|
||||
envConfigured bool
|
||||
|
||||
// DetectedWhisperPath 自动检测到的 whisper 命令行路径(空则使用 python -m whisper)
|
||||
DetectedWhisperPath string
|
||||
)
|
||||
|
||||
// EnsureDependencies 启动时检查并安装 ffmpeg 和 whisper
|
||||
func EnsureDependencies(ctx context.Context) {
|
||||
g.Log().Info(ctx, "========== 检查依赖环境 ==========")
|
||||
|
||||
ensureFFmpeg(ctx)
|
||||
ensureWhisper(ctx)
|
||||
resolveWhisperPath(ctx)
|
||||
|
||||
if envConfigured {
|
||||
g.Log().Info(ctx, "依赖检查完成,新环境变量已配置,建议重启终端")
|
||||
} else {
|
||||
g.Log().Info(ctx, "依赖检查完成,所有依赖已就绪")
|
||||
}
|
||||
g.Log().Info(ctx, "===================================")
|
||||
}
|
||||
|
||||
// ensureFFmpeg 确保 ffmpeg 可用
|
||||
func ensureFFmpeg(ctx context.Context) {
|
||||
if _, err := exec.LookPath("ffmpeg"); err == nil {
|
||||
g.Log().Info(ctx, "[ffmpeg] ✔ 已安装")
|
||||
return
|
||||
}
|
||||
|
||||
g.Log().Infof(ctx, "[ffmpeg] 未找到,尝试自动安装...")
|
||||
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
// 检查是否安装了 Homebrew
|
||||
if _, err := exec.LookPath("brew"); err != nil {
|
||||
g.Log().Warningf(ctx, "[ffmpeg] ⚠ 未检测到 Homebrew,请手动安装:\n brew install ffmpeg")
|
||||
return
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, "brew", "install", "ffmpeg")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
g.Log().Errorf(ctx, "[ffmpeg] ❌ 安装失败: %v\n%s", err, string(output))
|
||||
return
|
||||
}
|
||||
g.Log().Info(ctx, "[ffmpeg] ✔ 安装成功")
|
||||
|
||||
case "linux":
|
||||
// 尝试 apt
|
||||
if _, err := exec.LookPath("apt"); err == nil {
|
||||
cmd := exec.CommandContext(ctx, "sudo", "apt", "install", "-y", "ffmpeg")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
g.Log().Errorf(ctx, "[ffmpeg] ❌ apt 安装失败: %v\n%s", err, string(output))
|
||||
return
|
||||
}
|
||||
g.Log().Info(ctx, "[ffmpeg] ✔ 安装成功")
|
||||
return
|
||||
}
|
||||
// 尝试 yum
|
||||
if _, err := exec.LookPath("yum"); err == nil {
|
||||
cmd := exec.CommandContext(ctx, "sudo", "yum", "install", "-y", "ffmpeg")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
g.Log().Errorf(ctx, "[ffmpeg] ❌ yum 安装失败: %v\n%s", err, string(output))
|
||||
return
|
||||
}
|
||||
g.Log().Info(ctx, "[ffmpeg] ✔ 安装成功")
|
||||
return
|
||||
}
|
||||
g.Log().Warningf(ctx, "[ffmpeg] ⚠ 请手动安装: sudo apt install ffmpeg")
|
||||
|
||||
default:
|
||||
g.Log().Warningf(ctx, "[ffmpeg] ⚠ 不支持的平台(%s),请手动安装 ffmpeg", runtime.GOOS)
|
||||
}
|
||||
}
|
||||
|
||||
// ensureWhisper 确保 whisper 可用(优先安装 C++ 版,速度更快)
|
||||
func ensureWhisper(ctx context.Context) {
|
||||
// 1. 检查是否已有 whisper-cpp(C++ 版,最快)
|
||||
if path, err := exec.LookPath("whisper-cpp"); err == nil {
|
||||
g.Log().Infof(ctx, "[whisper] ✔ C++ 版已安装: %s", path)
|
||||
return
|
||||
}
|
||||
if path, err := exec.LookPath("whisper-cli"); err == nil {
|
||||
g.Log().Infof(ctx, "[whisper] ✔ C++ 版已安装: %s", path)
|
||||
return
|
||||
}
|
||||
|
||||
// 2. 检查 Homebrew 安装目录(即使不在 PATH 也能找到)
|
||||
if p := findHomebrewWhisperCpp(); p != "" {
|
||||
DetectedWhisperPath = p
|
||||
// 自动添加到 PATH 环境变量
|
||||
addToShellPath(ctx, filepath.Dir(p))
|
||||
g.Log().Infof(ctx, "[whisper] ✔ C++ 版已安装(自动检测): %s", p)
|
||||
return
|
||||
}
|
||||
|
||||
// 3. 尝试安装 whisper-cpp(C++ 版)
|
||||
if runtime.GOOS == "darwin" {
|
||||
if _, err := exec.LookPath("brew"); err == nil {
|
||||
g.Log().Infof(ctx, "[whisper] 安装 C++ 版 (brew install whisper-cpp)...")
|
||||
cmd := exec.CommandContext(ctx, "brew", "install", "whisper-cpp")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err == nil {
|
||||
g.Log().Info(ctx, "[whisper] ✔ C++ 版安装成功")
|
||||
// 装好后把 Homebrew bin 加到 PATH
|
||||
addToShellPath(ctx, getHomebrewBinDir())
|
||||
// 检测安装路径
|
||||
if p := findHomebrewWhisperCpp(); p != "" {
|
||||
DetectedWhisperPath = p
|
||||
}
|
||||
return
|
||||
}
|
||||
g.Log().Warningf(ctx, "[whisper] ⚠ brew 安装失败: %v\n%s", err, string(output))
|
||||
g.Log().Infof(ctx, "[whisper] 降级安装 Python 版...")
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 降级:检查 python -m whisper 是否可用
|
||||
if pythonWhisperAvailable() {
|
||||
g.Log().Info(ctx, "[whisper] ✔ Python 版已安装 (python3 -m whisper)")
|
||||
return
|
||||
}
|
||||
|
||||
// 5. 降级:pip 安装 Python 版
|
||||
if _, err := exec.LookPath("pip3"); err != nil {
|
||||
if _, err2 := exec.LookPath("pip"); err2 != nil {
|
||||
g.Log().Warningf(ctx, "[whisper] ⚠ 未找到 pip,请手动安装:\n pip3 install openai-whisper")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
g.Log().Infof(ctx, "[whisper] 安装 Python 版 (pip install openai-whisper)...")
|
||||
pipCmd := "pip3"
|
||||
if _, err := exec.LookPath("pip3"); err != nil {
|
||||
pipCmd = "pip"
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, pipCmd, "install", "--user", "openai-whisper")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
g.Log().Errorf(ctx, "[whisper] ❌ pip 安装失败: %v\n%s", err, string(output))
|
||||
return
|
||||
}
|
||||
g.Log().Info(ctx, "[whisper] ✔ Python 版安装成功")
|
||||
|
||||
// 安装后自动配置 PATH
|
||||
configureWhisperPath(ctx)
|
||||
}
|
||||
|
||||
// resolveWhisperPath 自动找到 whisper 二进制路径并存储
|
||||
func resolveWhisperPath(ctx context.Context) {
|
||||
// 0. 如果已经通过 ensure 检测到了路径,直接使用
|
||||
if DetectedWhisperPath != "" {
|
||||
if _, err := os.Stat(DetectedWhisperPath); err == nil {
|
||||
g.Log().Infof(ctx, "[whisper] ✔ 路径: %s", DetectedWhisperPath)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 1. 优先检测 C++ 版本(快 3-5 倍)
|
||||
for _, name := range []string{"whisper-cpp", "whisper-cli"} {
|
||||
if path, err := exec.LookPath(name); err == nil {
|
||||
DetectedWhisperPath = path
|
||||
g.Log().Infof(ctx, "[whisper] ✔ C++ 版: %s", path)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 在 Homebrew 目录查找 C++ 版本
|
||||
if p := findHomebrewWhisperCpp(); p != "" {
|
||||
DetectedWhisperPath = p
|
||||
g.Log().Infof(ctx, "[whisper] ✔ C++ 版(自动检测): %s", p)
|
||||
return
|
||||
}
|
||||
|
||||
// 3. 从 PATH 查找 Python 版 whisper
|
||||
if path, err := exec.LookPath("whisper"); err == nil {
|
||||
DetectedWhisperPath = path
|
||||
g.Log().Infof(ctx, "[whisper] ✔ Python 版: %s", path)
|
||||
return
|
||||
}
|
||||
|
||||
// 4. 尝试常见 pip user bin 路径
|
||||
for _, p := range getWhisperCandidates() {
|
||||
if info, err := os.Stat(p); err == nil && !info.IsDir() {
|
||||
DetectedWhisperPath = p
|
||||
g.Log().Infof(ctx, "[whisper] ✔ Python 版(自动检测): %s", p)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
g.Log().Info(ctx, "[whisper] ✔ 使用 python3 -m whisper 方式")
|
||||
}
|
||||
|
||||
// getWhisperCandidates 返回可能的 whisper 二进制路径
|
||||
func getWhisperCandidates() []string {
|
||||
var candidates []string
|
||||
|
||||
// 通过 python 探针获取 user-site bin 目录
|
||||
if p := getUserPythonBin(); p != "" {
|
||||
candidates = append(candidates, filepath.Join(p, "whisper"))
|
||||
}
|
||||
|
||||
// 常见 pip user base 路径
|
||||
userHome, _ := os.UserHomeDir()
|
||||
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
// macOS 常见的 Python 版本路径
|
||||
pythonVersions := []string{"3.9", "3.10", "3.11", "3.12", "3.13"}
|
||||
for _, ver := range pythonVersions {
|
||||
candidates = append(candidates,
|
||||
filepath.Join(userHome, "Library", "Python", ver, "bin", "whisper"),
|
||||
)
|
||||
}
|
||||
case "linux":
|
||||
candidates = append(candidates,
|
||||
filepath.Join(userHome, ".local", "bin", "whisper"),
|
||||
)
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
// getUserPythonBin 通过 python 获取 user bin 目录
|
||||
func getUserPythonBin() string {
|
||||
pythonCandidates := []string{"python3", "python"}
|
||||
for _, py := range pythonCandidates {
|
||||
path, err := exec.LookPath(py)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
cmd := exec.Command(path, "-m", "site", "--user-base")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
base := strings.TrimSpace(string(output))
|
||||
if base != "" {
|
||||
return filepath.Join(base, "bin")
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// configureWhisperPath 将 pip user bin 目录加到 shell 配置
|
||||
func configureWhisperPath(ctx context.Context) {
|
||||
binDir := getUserPythonBin()
|
||||
if binDir == "" {
|
||||
return
|
||||
}
|
||||
|
||||
// 检查是否已经在 PATH 中
|
||||
currentPath := os.Getenv("PATH")
|
||||
if strings.Contains(currentPath, binDir) {
|
||||
return
|
||||
}
|
||||
|
||||
// 配置到 .zshrc 或 .bashrc
|
||||
home, _ := os.UserHomeDir()
|
||||
rcFiles := []string{".zshrc", ".bashrc", ".bash_profile"}
|
||||
|
||||
for _, rc := range rcFiles {
|
||||
rcPath := filepath.Join(home, rc)
|
||||
// 文件不存在则跳过
|
||||
if _, err := os.Stat(rcPath); os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
// 检查是否已添加
|
||||
data, _ := os.ReadFile(rcPath)
|
||||
if strings.Contains(string(data), binDir) {
|
||||
continue
|
||||
}
|
||||
// 追加
|
||||
line := fmt.Sprintf("\nexport PATH=\"%s:$PATH\"\n", binDir)
|
||||
f, err := os.OpenFile(rcPath, os.O_APPEND|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
g.Log().Warningf(ctx, "[whisper] 写入 %s 失败: %v", rc, err)
|
||||
continue
|
||||
}
|
||||
f.WriteString(line)
|
||||
f.Close()
|
||||
g.Log().Infof(ctx, "[whisper] 已将 %s 添加到 %s,请执行: source ~/%s", binDir, rc, rc)
|
||||
envConfigured = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// pythonWhisperAvailable 检查 python -m whisper 是否可用
|
||||
func pythonWhisperAvailable() bool {
|
||||
pythonCandidates := []string{"python3", "python"}
|
||||
for _, py := range pythonCandidates {
|
||||
if path, err := exec.LookPath(py); err == nil {
|
||||
cmd := exec.Command(path, "-m", "whisper", "--help")
|
||||
if cmd.Run() == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// findHomebrewWhisperCpp 在 Homebrew 安装目录查找 whisper-cpp
|
||||
func findHomebrewWhisperCpp() string {
|
||||
dirs := getHomebrewBinDirs()
|
||||
for _, dir := range dirs {
|
||||
for _, name := range []string{"whisper-cpp", "whisper-cli"} {
|
||||
p := filepath.Join(dir, name)
|
||||
if info, err := os.Stat(p); err == nil && !info.IsDir() {
|
||||
return p
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// getHomebrewBinDirs 返回 Homebrew 可能的 bin 目录
|
||||
func getHomebrewBinDirs() []string {
|
||||
userHome, _ := os.UserHomeDir()
|
||||
return []string{
|
||||
"/opt/homebrew/bin", // Apple Silicon
|
||||
"/usr/local/bin", // Intel
|
||||
filepath.Join(userHome, ".homebrew", "bin"),
|
||||
}
|
||||
}
|
||||
|
||||
// getHomebrewBinDir 返回当前系统的 Homebrew bin 目录
|
||||
func getHomebrewBinDir() string {
|
||||
dirs := getHomebrewBinDirs()
|
||||
for _, dir := range dirs {
|
||||
if _, err := os.Stat(filepath.Join(dir, "brew")); err == nil {
|
||||
return dir
|
||||
}
|
||||
// 也检查 brew 命令路径
|
||||
if path, err := exec.LookPath("brew"); err == nil {
|
||||
return filepath.Dir(path)
|
||||
}
|
||||
}
|
||||
return "/opt/homebrew/bin" // 默认 Apple Silicon 路径
|
||||
}
|
||||
|
||||
// addToShellPath 将目录添加到 shell rc 文件的 PATH 中
|
||||
func addToShellPath(ctx context.Context, dir string) {
|
||||
if dir == "" {
|
||||
return
|
||||
}
|
||||
|
||||
// 检查是否已在 PATH 中
|
||||
currentPath := os.Getenv("PATH")
|
||||
if strings.Contains(currentPath, dir) {
|
||||
return
|
||||
}
|
||||
|
||||
home, _ := os.UserHomeDir()
|
||||
rcFiles := []string{".zshrc", ".bashrc", ".bash_profile"}
|
||||
|
||||
for _, rc := range rcFiles {
|
||||
rcPath := filepath.Join(home, rc)
|
||||
if _, err := os.Stat(rcPath); os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
data, _ := os.ReadFile(rcPath)
|
||||
if strings.Contains(string(data), dir) {
|
||||
continue
|
||||
}
|
||||
line := fmt.Sprintf("\nexport PATH=\"%s:$PATH\"\n", dir)
|
||||
f, err := os.OpenFile(rcPath, os.O_APPEND|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
g.Log().Warningf(ctx, "[setup] 写入 %s 失败: %v", rc, err)
|
||||
continue
|
||||
}
|
||||
f.WriteString(line)
|
||||
f.Close()
|
||||
g.Log().Infof(ctx, "[setup] 已将 %s 添加到 %s", dir, rc)
|
||||
envConfigured = true
|
||||
break
|
||||
}
|
||||
}
|
||||
285
service/video/concat_service.go
Normal file
285
service/video/concat_service.go
Normal file
@@ -0,0 +1,285 @@
|
||||
package video
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/gogf/gf/v2/frame/g"
|
||||
)
|
||||
|
||||
// ConcatService 视频拼接服务
|
||||
type ConcatService struct{}
|
||||
|
||||
// Concat 视频拼接服务单例
|
||||
var Concat = new(ConcatService)
|
||||
|
||||
// ConcatReq 视频拼接请求
|
||||
type ConcatReq struct {
|
||||
VideoPaths []string // 视频文件路径列表(按此顺序拼接)
|
||||
OutputPath string // 输出视频文件路径,空则自动生成
|
||||
Method string // 拼接方式: auto/fast/reencode,默认 auto
|
||||
}
|
||||
|
||||
// ConcatRes 视频拼接响应
|
||||
type ConcatRes struct {
|
||||
OutputPath string `json:"outputPath"` // 输出文件路径
|
||||
FileSize int64 `json:"fileSize"` // 文件大小(bytes)
|
||||
Duration float64 `json:"duration"` // 拼接后总时长(秒)
|
||||
DurationStr string `json:"durationStr"` // 可读时长
|
||||
MethodUsed string `json:"methodUsed"` // 实际使用的拼接方式
|
||||
InputFiles int `json:"inputFiles"` // 输入文件数
|
||||
}
|
||||
|
||||
// Concat 拼接多个视频为一个
|
||||
func (s *ConcatService) Concat(ctx context.Context, req *ConcatReq) (res *ConcatRes, err error) {
|
||||
if len(req.VideoPaths) < 2 {
|
||||
return nil, fmt.Errorf("至少需要2个视频才能拼接")
|
||||
}
|
||||
|
||||
// 校验所有视频文件存在
|
||||
for i, p := range req.VideoPaths {
|
||||
if _, err := os.Stat(p); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("第%d个视频文件不存在: %s", i+1, p)
|
||||
}
|
||||
}
|
||||
|
||||
ffmpegPath, err := s.getFFmpegPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 生成输出路径
|
||||
outputPath := req.OutputPath
|
||||
if outputPath == "" {
|
||||
outputDir := filepath.Dir(req.VideoPaths[0])
|
||||
outputPath = filepath.Join(outputDir, "concat_output.mp4")
|
||||
}
|
||||
|
||||
method := req.Method
|
||||
if method == "" {
|
||||
method = "auto"
|
||||
}
|
||||
|
||||
var methodUsed string
|
||||
|
||||
switch method {
|
||||
case "fast":
|
||||
// 无损拼接(要求同编码参数,速度快但可能黑屏)
|
||||
err = s.concatByDemuxer(ctx, ffmpegPath, req.VideoPaths, outputPath)
|
||||
methodUsed = "concat demuxer (无损)"
|
||||
default:
|
||||
// 重编码拼接(自动归一化分辨率/音频,兼容所有视频)
|
||||
err = s.concatByFilter(ctx, ffmpegPath, req.VideoPaths, outputPath)
|
||||
methodUsed = "concat filter (重编码)"
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("视频拼接失败: %v", err)
|
||||
}
|
||||
|
||||
// 获取输出文件信息
|
||||
stat, statErr := os.Stat(outputPath)
|
||||
if statErr != nil {
|
||||
return nil, fmt.Errorf("输出文件异常: %v", statErr)
|
||||
}
|
||||
|
||||
// 获取时长
|
||||
duration, _ := s.getVideoDuration(ctx, ffmpegPath, outputPath)
|
||||
|
||||
res = &ConcatRes{
|
||||
OutputPath: outputPath,
|
||||
FileSize: stat.Size(),
|
||||
Duration: duration,
|
||||
DurationStr: formatDuration(duration),
|
||||
MethodUsed: methodUsed,
|
||||
InputFiles: len(req.VideoPaths),
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// concatByDemuxer 使用 concat demuxer 无损拼接(要求同编码参数)
|
||||
func (s *ConcatService) concatByDemuxer(ctx context.Context, ffmpegPath string, inputs []string, output string) error {
|
||||
// 创建文件列表
|
||||
fileListPath := filepath.Join(filepath.Dir(output), "concat_list.txt")
|
||||
var lines []string
|
||||
for _, p := range inputs {
|
||||
lines = append(lines, fmt.Sprintf("file '%s'", p))
|
||||
}
|
||||
if err := os.WriteFile(fileListPath, []byte(strings.Join(lines, "\n")+"\n"), 0644); err != nil {
|
||||
return fmt.Errorf("创建文件列表失败: %v", err)
|
||||
}
|
||||
defer os.Remove(fileListPath)
|
||||
|
||||
args := []string{
|
||||
"-f", "concat",
|
||||
"-safe", "0",
|
||||
"-i", fileListPath,
|
||||
"-c", "copy", // 直接复制流,不重编码
|
||||
"-y",
|
||||
output,
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
outputBytes, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ffmpeg demuxer 失败: %v\n%s", err, string(outputBytes))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// concatByFilter 使用 concat filter 重编码拼接(自动归一化分辨率/音频参数)
|
||||
func (s *ConcatService) concatByFilter(ctx context.Context, ffmpegPath string, inputs []string, output string) error {
|
||||
n := len(inputs)
|
||||
|
||||
// 1. 获取所有视频的分辨率,确定统一输出尺寸
|
||||
maxW, maxH := 0, 0
|
||||
var inputMeta []struct{ w, h int }
|
||||
for _, p := range inputs {
|
||||
w, h, _ := s.getVideoResolution(ctx, ffmpegPath, p)
|
||||
inputMeta = append(inputMeta, struct{ w, h int }{w, h})
|
||||
if w > maxW {
|
||||
maxW = w
|
||||
}
|
||||
if h > maxH {
|
||||
maxH = h
|
||||
}
|
||||
}
|
||||
// 保底
|
||||
if maxW == 0 {
|
||||
maxW = 1920
|
||||
}
|
||||
if maxH == 0 {
|
||||
maxH = 1080
|
||||
}
|
||||
|
||||
// 2. 构建输入参数
|
||||
var inputArgs []string
|
||||
for _, p := range inputs {
|
||||
inputArgs = append(inputArgs, "-i", p)
|
||||
}
|
||||
|
||||
// 3. 构建 filter_complex:每个视频 scale+pad 到统一尺寸,然后 concat
|
||||
var filterParts []string
|
||||
for i := 0; i < n; i++ {
|
||||
filterParts = append(filterParts, fmt.Sprintf(
|
||||
"[%d:v]scale=%d:%d:force_original_aspect_ratio=decrease,pad=%d:%d:(ow-iw)/2:(oh-ih)/2,setsar=1,fps=30[v%d]",
|
||||
i, maxW, maxH, maxW, maxH, i,
|
||||
))
|
||||
filterParts = append(filterParts, fmt.Sprintf(
|
||||
"[%d:a]aresample=44100[a%d]",
|
||||
i, i,
|
||||
))
|
||||
}
|
||||
// 收集归一化后的流
|
||||
var concatInputs []string
|
||||
for i := 0; i < n; i++ {
|
||||
concatInputs = append(concatInputs, fmt.Sprintf("[v%d][a%d]", i, i))
|
||||
}
|
||||
filterStr := fmt.Sprintf("%s;%sconcat=n=%d:v=1:a=1[outv][outa]",
|
||||
strings.Join(filterParts, ";"),
|
||||
strings.Join(concatInputs, ""), n)
|
||||
|
||||
outputDir := filepath.Dir(output)
|
||||
args := append(inputArgs,
|
||||
"-filter_complex", filterStr,
|
||||
"-map", "[outv]",
|
||||
"-map", "[outa]",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-y",
|
||||
output,
|
||||
)
|
||||
|
||||
// 调试:记录完整命令
|
||||
g.Log().Debugf(ctx, "concat filter 命令: %s %v", ffmpegPath, args)
|
||||
|
||||
// 保存 filter graph 用于调试
|
||||
filterFile := filepath.Join(outputDir, "concat_filter.txt")
|
||||
os.WriteFile(filterFile, []byte(filterStr), 0644)
|
||||
defer os.Remove(filterFile)
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
outputBytes, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ffmpeg filter 失败: %v\n日志:\n%s", err, string(outputBytes))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getVideoResolution 获取视频分辨率
|
||||
func (s *ConcatService) getVideoResolution(ctx context.Context, ffmpegPath, videoPath string) (width, height int, err error) {
|
||||
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
|
||||
if _, err := os.Stat(ffprobePath); os.IsNotExist(err) {
|
||||
ffprobePath = "ffprobe"
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffprobePath,
|
||||
"-v", "error",
|
||||
"-select_streams", "v:0",
|
||||
"-show_entries", "stream=width,height",
|
||||
"-of", "csv=p=0",
|
||||
videoPath,
|
||||
)
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
fmt.Sscanf(strings.TrimSpace(string(output)), "%d,%d", &width, &height)
|
||||
return
|
||||
}
|
||||
|
||||
// getVideoDuration 获取视频时长
|
||||
func (s *ConcatService) getVideoDuration(ctx context.Context, ffmpegPath, videoPath string) (float64, error) {
|
||||
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
|
||||
if _, err := os.Stat(ffprobePath); os.IsNotExist(err) {
|
||||
ffprobePath = "ffprobe"
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffprobePath,
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
videoPath,
|
||||
)
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var duration float64
|
||||
fmt.Sscanf(strings.TrimSpace(string(output)), "%f", &duration)
|
||||
return duration, nil
|
||||
}
|
||||
|
||||
func (s *ConcatService) getFFmpegPath() (string, error) {
|
||||
ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String()
|
||||
if ffmpegPath != "" {
|
||||
if _, err := os.Stat(ffmpegPath); err == nil {
|
||||
return ffmpegPath, nil
|
||||
}
|
||||
}
|
||||
path, err := exec.LookPath("ffmpeg")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("未找到 ffmpeg")
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func formatDuration(seconds float64) string {
|
||||
h := int(seconds) / 3600
|
||||
m := (int(seconds) % 3600) / 60
|
||||
s := int(seconds) % 60
|
||||
return fmt.Sprintf("%02d:%02d:%02d", h, m, s)
|
||||
}
|
||||
|
||||
// CleanupConcat 清理输入视频文件
|
||||
func CleanupConcat(paths []string) {
|
||||
for _, p := range paths {
|
||||
os.Remove(p)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user