代码初始化

This commit is contained in:
2026-05-19 14:33:06 +08:00
commit 219b7e39c7
18 changed files with 3311 additions and 0 deletions

View File

@@ -0,0 +1,232 @@
package asr
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"
dto "media/model/dto/audio"
serviceAudio "media/service/audio"
serviceScene "media/service/scene"
"github.com/gogf/gf/v2/frame/g"
)
// VideoTranscribeReq 视频语音识别请求
type VideoTranscribeReq struct {
VideoPath string
Model string
Language string
KeepAudio bool
}
// VideoTranscribeRes 视频语音识别响应
type VideoTranscribeRes struct {
Text string `json:"text"`
Model string `json:"model"`
Language string `json:"language"`
AudioPath string `json:"audioPath"`
AudioSize int64 `json:"audioSize"`
AudioDuration string `json:"audioDuration"`
}
type transcribeService struct{}
var VideoTranscribe = new(transcribeService)
// TranscribeWithURLs 从 URL 下载视频并转录
func (s *transcribeService) TranscribeWithURLs(ctx context.Context, req *dto.TranscribeReq) (res *dto.TranscribeRes, err error) {
if len(req.VideoURLs) == 0 {
return nil, errors.New("video_urls 不能为空")
}
tempDir := getTempDir(ctx)
os.MkdirAll(tempDir, 0755)
var savePaths []string
for _, videoURL := range req.VideoURLs {
savePath, dlErr := downloadFromURL(ctx, videoURL, tempDir)
if dlErr != nil {
continue
}
savePaths = append(savePaths, savePath)
}
if len(savePaths) == 0 {
return nil, errors.New("所有视频下载均失败")
}
results := s.processVideos(ctx, savePaths, req.Model, req.Language, req.Threshold)
res = &dto.TranscribeRes{Results: results}
return
}
// TranscribeUpload 从已保存的文件转录
func (s *transcribeService) TranscribeUpload(ctx context.Context, savePaths []string, model, language string, threshold float64) []dto.TranscribeItem {
return s.processVideos(ctx, savePaths, model, language, threshold)
}
// processVideos 逐个处理视频
func (s *transcribeService) processVideos(ctx context.Context, savePaths []string, model, language string, threshold float64) []dto.TranscribeItem {
var results []dto.TranscribeItem
for _, savePath := range savePaths {
fileName := filepath.Base(savePath)
if idx := strings.Index(fileName, "_"); idx > 0 {
fileName = fileName[idx+1:]
}
// 场景分析
var scenes *dto.SceneSummaryDTO
sceneRes, sceneErr := serviceScene.SceneAnalyzer.Analyze(ctx, &serviceScene.SceneAnalyzeReq{
VideoPaths: []string{savePath},
Threshold: threshold,
ExtractKeyframes: false,
})
if sceneErr == nil && len(sceneRes.Analyses) > 0 {
scenes = toSceneDTO(&sceneRes.Analyses[0])
}
// 语音转文字(内部删除视频文件)
transRes, transErr := s.TranscribeVideo(ctx, &VideoTranscribeReq{
VideoPath: savePath,
Model: model,
Language: language,
})
if transErr != nil {
os.Remove(savePath)
results = append(results, dto.TranscribeItem{FileName: fileName, Error: transErr.Error()})
continue
}
results = append(results, dto.TranscribeItem{
FileName: fileName,
Result: &dto.TranscribeResult{
Text: transRes.Text,
Model: transRes.Model,
Language: transRes.Language,
AudioPath: transRes.AudioPath,
AudioSize: transRes.AudioSize,
AudioDuration: transRes.AudioDuration,
Scenes: scenes,
},
})
}
return results
}
// TranscribeVideo 从视频提取音频并转为文字
func (s *transcribeService) TranscribeVideo(ctx context.Context, req *VideoTranscribeReq) (res *VideoTranscribeRes, err error) {
audioReq := &serviceAudio.ExtractAudioReq{VideoPath: req.VideoPath, Format: "mp3"}
audioRes, err := serviceAudio.AudioExtract.Extract(ctx, audioReq)
if err != nil {
return nil, fmt.Errorf("音频提取失败: %v", err)
}
whisperRes, err := Whisper.Transcribe(ctx, &TranscribeReq{AudioPath: audioRes.AudioPath, Model: req.Model, Language: req.Language})
if err != nil {
os.Remove(audioRes.AudioPath)
return nil, fmt.Errorf("语音识别失败: %v", err)
}
os.Remove(req.VideoPath)
if !req.KeepAudio {
os.Remove(audioRes.AudioPath)
baseName := strings.TrimSuffix(audioRes.AudioPath, filepath.Ext(audioRes.AudioPath))
os.Remove(baseName + ".txt")
os.Remove(baseName + "." + whisperRes.Model + ".txt")
}
res = &VideoTranscribeRes{
Text: whisperRes.Text,
Model: whisperRes.Model,
Language: whisperRes.Language,
AudioPath: audioRes.AudioPath,
AudioSize: audioRes.Size,
AudioDuration: audioRes.Duration,
}
return
}
func downloadFromURL(ctx context.Context, rawURL, tempDir string) (string, error) {
parsedURL, err := url.Parse(rawURL)
if err != nil {
return "", err
}
segments := strings.Split(parsedURL.Path, "/")
fileName := segments[len(segments)-1]
if fileName == "" {
fileName = fmt.Sprintf("video_%d.mp4", time.Now().UnixMilli())
}
savePath := filepath.Join(tempDir, fmt.Sprintf("%d_%s", time.Now().UnixMilli(), fileName))
client := &http.Client{Timeout: 10 * time.Minute}
resp, err := client.Get(rawURL)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("HTTP %d", resp.StatusCode)
}
out, err := os.Create(savePath)
if err != nil {
return "", err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
if err != nil {
os.Remove(savePath)
}
return savePath, err
}
func getTempDir(ctx context.Context) string {
tempDir := g.Cfg().MustGet(ctx, "ffmpeg.temp_dir", "resource/temp").String()
if tempDir == "" {
tempDir = "resource/temp"
}
if !filepath.IsAbs(tempDir) {
absDir, _ := filepath.Abs(tempDir)
tempDir = absDir
}
return tempDir
}
// toSceneDTO 将场景服务的原始结果转为 DTO 格式
func toSceneDTO(analysis *serviceScene.VideoSceneAnalysis) *dto.SceneSummaryDTO {
if analysis == nil {
return nil
}
shots := make([]dto.SceneShotDTO, 0, len(analysis.Scenes))
for _, s := range analysis.Scenes {
shots = append(shots, dto.SceneShotDTO{
SceneIndex: s.SceneIndex,
StartTimeStr: s.StartTimeStr,
EndTimeStr: s.EndTimeStr,
DurationStr: s.DurationStr,
ShotType: s.ShotType,
Composition: s.Composition,
NarrativePos: s.NarrativePos,
Description: s.Description,
})
}
return &dto.SceneSummaryDTO{
TotalScenes: analysis.TotalScenes,
DurationStr: analysis.DurationStr,
AspectRatio: analysis.AspectRatio,
Orientation: analysis.Orientation,
Pacing: analysis.Summary.Pacing,
ShotTypes: analysis.Summary.ShotTypeDist,
Scenes: shots,
}
}

View File

@@ -0,0 +1,391 @@
package asr
import (
"context"
"fmt"
"io"
"media/service/setup"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/gogf/gf/v2/frame/g"
)
// WhisperBackend 后端类型
type WhisperBackend int
const (
backendPython WhisperBackend = iota // python -m whisper
backendCLI // openai-whisper CLI (whisper 命令)
backendCpp // whisper.cpp (whisper-cpp)
)
// WhisperService 语音识别服务
type WhisperService struct{}
// Whisper 语音识别服务单例
var Whisper = new(WhisperService)
// TranscribeReq 语音识别请求
type TranscribeReq struct {
AudioPath string // 音频文件路径
Model string // whisper 模型: tiny/base/small/medium/large
Language string // 语言代码,默认 zh中文
}
// TranscribeRes 语音识别响应
type TranscribeRes struct {
Text string // 完整识别文本
Segments []Segment
Model string // 使用的模型
Language string // 识别的语言
OutputPath string // 输出的 txt 文件路径
}
// Segment 识别片段(带时间戳)
type Segment struct {
Start float64 `json:"start"` // 开始时间(秒)
End float64 `json:"end"` // 结束时间(秒)
Text string `json:"text"` // 文本内容
}
// Transcribe 对音频文件进行语音识别(自动检测后端,自动降级)
func (s *WhisperService) Transcribe(ctx context.Context, req *TranscribeReq) (res *TranscribeRes, err error) {
// 1. 校验音频文件
if _, err = os.Stat(req.AudioPath); os.IsNotExist(err) {
return nil, fmt.Errorf("音频文件不存在: %s", req.AudioPath)
}
// 2. 设置默认值
model := req.Model
if model == "" {
model = g.Cfg().MustGet(ctx, "whisper.model", "small").String()
}
language := req.Language
if language == "" {
language = g.Cfg().MustGet(ctx, "whisper.language", "zh").String()
}
// 3. 检测后端C++ 版找不到模型文件时自动降级
backend, whisperPath := s.detectBackend()
if backend == backendCpp {
modelPath := s.resolveCppModelPath(model)
if modelPath == "" {
g.Log().Warningf(ctx, "whisper.cpp 模型文件(%s)未找到,降级到 Python whisper", model)
backend = backendPython
} else {
g.Log().Infof(ctx, "语音识别(whisper.cpp): audio=%s, model=%s", req.AudioPath, modelPath)
return s.transcribeWithCpp(ctx, req, whisperPath, modelPath, language)
}
}
switch backend {
case backendCLI:
g.Log().Infof(ctx, "语音识别(CLI): audio=%s, model=%s, language=%s", req.AudioPath, model, language)
return s.transcribeWithCLI(ctx, req, whisperPath, model, language)
default:
g.Log().Infof(ctx, "语音识别(python): audio=%s, model=%s, language=%s", req.AudioPath, model, language)
return s.transcribeWithPython(ctx, req, model, language)
}
}
// transcribeWithCLI 使用 whisper CLI 命令
func (s *WhisperService) transcribeWithCLI(ctx context.Context, req *TranscribeReq, whisperPath, model, language string) (res *TranscribeRes, err error) {
outputDir := filepath.Dir(req.AudioPath)
modelDir := g.Cfg().MustGet(ctx, "whisper.model_dir", "").String()
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
args := []string{
req.AudioPath,
"--model", model,
"--language", language,
"--output_dir", outputDir,
"--output_format", "txt",
"--threads", fmt.Sprintf("%d", threads),
}
if modelDir != "" {
args = append(args, "--model_dir", modelDir)
}
cmd := exec.CommandContext(ctx, whisperPath, args...)
output, execErr := cmd.CombinedOutput()
if execErr != nil {
g.Log().Errorf(ctx, "whisper CLI 执行失败: %v\n%s", execErr, string(output))
return nil, fmt.Errorf("语音识别失败: %v", execErr)
}
return s.readTxtResult(outputDir, req.AudioPath, model)
}
// transcribeWithPython 使用 python -m whisper
func (s *WhisperService) transcribeWithPython(ctx context.Context, req *TranscribeReq, model, language string) (res *TranscribeRes, err error) {
// 查找 python
pythonPath, err := exec.LookPath("python3")
if err != nil {
pythonPath, err = exec.LookPath("python")
if err != nil {
return nil, fmt.Errorf("未找到 python请安装: pip3 install openai-whisper")
}
}
outputDir := filepath.Dir(req.AudioPath)
modelDir := g.Cfg().MustGet(ctx, "whisper.model_dir", "").String()
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
args := []string{
"-m", "whisper",
req.AudioPath,
"--model", model,
"--language", language,
"--output_dir", outputDir,
"--output_format", "txt",
"--threads", fmt.Sprintf("%d", threads),
}
if modelDir != "" {
args = append(args, "--model_dir", modelDir)
}
cmd := exec.CommandContext(ctx, pythonPath, args...)
output, execErr := cmd.CombinedOutput()
if execErr != nil {
g.Log().Errorf(ctx, "whisper(python) 执行失败: %v\n%s", execErr, string(output))
return nil, fmt.Errorf("语音识别失败: %v", execErr)
}
return s.readTxtResult(outputDir, req.AudioPath, model)
}
// readTxtResult 读取 whisper 输出的 txt 文件
func (s *WhisperService) readTxtResult(outputDir, audioPath, model string) (res *TranscribeRes, err error) {
baseName := strings.TrimSuffix(filepath.Base(audioPath), filepath.Ext(audioPath))
txtPaths := []string{
filepath.Join(outputDir, baseName+".txt"),
filepath.Join(outputDir, baseName+"."+model+".txt"),
}
var textBytes []byte
var txtPath string
for _, p := range txtPaths {
if b, e := os.ReadFile(p); e == nil {
textBytes = b
txtPath = p
break
}
}
if textBytes == nil {
return nil, fmt.Errorf("读取识别结果文件失败")
}
res = &TranscribeRes{
Text: cleanTranscript(string(textBytes)),
Model: model,
OutputPath: txtPath,
}
return
}
// cleanTranscript 清理识别结果:去换行、合并空格
func cleanTranscript(text string) string {
text = strings.ReplaceAll(text, "\r\n", " ")
text = strings.ReplaceAll(text, "\n", " ")
text = strings.ReplaceAll(text, "\r", " ")
// 合并多个空格
for strings.Contains(text, " ") {
text = strings.ReplaceAll(text, " ", " ")
}
return strings.TrimSpace(text)
}
// detectBackend 检测可用的 whisper 后端,返回后端类型和可执行路径
func (s *WhisperService) detectBackend() (WhisperBackend, string) {
// 1. 优先检测 C++ 版 whisper.cpp最快但参数格式不同
for _, name := range []string{"whisper-cpp", "whisper-cli"} {
if path, err := exec.LookPath(name); err == nil {
return backendCpp, path
}
}
// 2. 检查 setup 检测到的 C++ 路径
if setup.DetectedWhisperPath != "" {
base := filepath.Base(setup.DetectedWhisperPath)
if base == "whisper-cpp" || base == "whisper-cli" {
if _, err := os.Stat(setup.DetectedWhisperPath); err == nil {
return backendCpp, setup.DetectedWhisperPath
}
}
}
// 3. 检测 Python CLIwhisper 命令)
if path, err := exec.LookPath("whisper"); err == nil {
return backendCLI, path
}
// 4. 检查 setup 检测到的 Python CLI 路径
if setup.DetectedWhisperPath != "" {
if _, err := os.Stat(setup.DetectedWhisperPath); err == nil {
return backendCLI, setup.DetectedWhisperPath
}
}
// 5. 检查配置中的路径
if p := g.Cfg().MustGet(context.Background(), "whisper.path", "").String(); p != "" {
if _, err := os.Stat(p); err == nil {
return backendCLI, p
}
}
return backendPython, ""
}
// resolveCppModelPath 查找或下载 whisper.cpp 模型文件
func (s *WhisperService) resolveCppModelPath(model string) string {
modelName := strings.TrimPrefix(model, "ggml-")
modelName = strings.TrimSuffix(modelName, ".bin")
cppModelName := "ggml-" + modelName + ".bin"
home, _ := os.UserHomeDir()
// 目标路径:~/.cache/whisper/ggml-{model}.bin
targetDir := filepath.Join(home, ".cache", "whisper")
targetPath := filepath.Join(targetDir, cppModelName)
// 1. 如果已存在,直接返回
if _, err := os.Stat(targetPath); err == nil {
return targetPath
}
// 2. 检查其他常见位置
altPaths := []string{
cppModelName,
filepath.Join(home, ".cache", "whisper", "ggml-"+modelName+"-q5_0.bin"),
"/opt/homebrew/share/whisper-cpp/models/" + cppModelName,
"/usr/local/share/whisper-cpp/models/" + cppModelName,
}
for _, p := range altPaths {
if _, err := os.Stat(p); err == nil {
return p
}
}
// 3. 自动下载
modelSize := map[string]string{
"tiny": "75MB",
"base": "150MB",
"small": "500MB",
"medium": "1.5GB",
}
size, _ := modelSize[modelName]
// 下载源:先试 hf-mirror国内可访问失败再试官方
modelPath := fmt.Sprintf("ggerganov/whisper.cpp/resolve/main/%s", cppModelName)
urls := []string{
fmt.Sprintf("https://hf-mirror.com/%s", modelPath),
fmt.Sprintf("https://huggingface.co/%s", modelPath),
}
g.Log().Infof(context.TODO(), "[whisper.cpp] 正在下载模型 %s (%s)...", cppModelName, size)
// 创建目录
os.MkdirAll(targetDir, 0755)
// 下载文件(多个源,依次尝试)
var lastErr error
for _, url := range urls {
g.Log().Infof(context.TODO(), "[whisper.cpp] 下载地址: %s", url)
if err := s.downloadFile(url, targetPath, 5*time.Minute); err == nil {
g.Log().Infof(context.TODO(), "[whisper.cpp] 模型下载完成: %s", targetPath)
return targetPath
} else {
lastErr = err
g.Log().Warningf(context.TODO(), "[whisper.cpp] 从 %s 下载失败: %v尝试下一个源...", url, err)
}
}
g.Log().Errorf(context.TODO(), "[whisper.cpp] 所有下载源均失败: %v", lastErr)
return ""
}
// downloadFile 下载文件到指定路径(支持超时)
func (s *WhisperService) downloadFile(url, destPath string, timeout time.Duration) error {
tmpPath := destPath + ".tmp"
out, err := os.Create(tmpPath)
if err != nil {
return fmt.Errorf("创建临时文件失败: %v", err)
}
defer out.Close()
client := &http.Client{Timeout: timeout}
resp, err := client.Get(url)
if err != nil {
os.Remove(tmpPath)
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
os.Remove(tmpPath)
return fmt.Errorf("HTTP %d", resp.StatusCode)
}
written, err := io.Copy(out, resp.Body)
if err != nil {
os.Remove(tmpPath)
return err
}
if err := os.Rename(tmpPath, destPath); err != nil {
return fmt.Errorf("文件重命名失败: %v", err)
}
g.Log().Infof(context.TODO(), "[whisper.cpp] 下载完成: %d bytes", written)
return nil
}
// transcribeWithCpp 使用 whisper.cppC++ 版,参数格式不同)
func (s *WhisperService) transcribeWithCpp(ctx context.Context, req *TranscribeReq, binaryPath, model, language string) (res *TranscribeRes, err error) {
outputDir := filepath.Dir(req.AudioPath)
baseName := strings.TrimSuffix(filepath.Base(req.AudioPath), filepath.Ext(req.AudioPath))
outputPrefix := filepath.Join(outputDir, baseName)
threads := g.Cfg().MustGet(ctx, "whisper.threads", 2).Int()
// whisper.cpp 参数:
// -f input.mp3 输入文件
// -l zh 语言
// -t 2 线程数
// -otxt 输出 txt
// -of /path/prefix 输出文件前缀(自动加 .txt
args := []string{
"-f", req.AudioPath,
"-l", language,
"-t", fmt.Sprintf("%d", threads),
"-otxt",
"-of", outputPrefix,
"-m", model,
}
cmd := exec.CommandContext(ctx, binaryPath, args...)
output, execErr := cmd.CombinedOutput()
if execErr != nil {
g.Log().Errorf(ctx, "whisper.cpp 执行失败: %v\n%s", execErr, string(output))
return nil, fmt.Errorf("语音识别失败: %v", execErr)
}
// whisper.cpp 输出: {prefix}.txt
txtPath := outputPrefix + ".txt"
textBytes, readErr := os.ReadFile(txtPath)
if readErr != nil {
return nil, fmt.Errorf("读取识别结果文件失败: %v", readErr)
}
res = &TranscribeRes{
Text: cleanTranscript(string(textBytes)),
Model: model,
Language: language,
OutputPath: txtPath,
}
return
}

View File

@@ -0,0 +1,185 @@
package audio
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/gogf/gf/v2/frame/g"
)
// AudioExtractService 音频提取服务
type AudioExtractService struct{}
// AudioExtract 音频提取服务单例
var AudioExtract = new(AudioExtractService)
// ExtractAudioReq 提取音频请求
type ExtractAudioReq struct {
VideoPath string // 视频文件路径
Format string // 输出音频格式,默认 mp3
}
// ExtractAudioRes 提取音频响应
type ExtractAudioRes struct {
AudioPath string // 提取后的音频文件路径
Duration string // 音频时长
Size int64 // 音频文件大小(bytes)
}
// Extract 从视频中提取音频
func (s *AudioExtractService) Extract(ctx context.Context, req *ExtractAudioReq) (res *ExtractAudioRes, err error) {
// 1. 校验视频文件存在
if _, err = os.Stat(req.VideoPath); os.IsNotExist(err) {
return nil, fmt.Errorf("视频文件不存在: %s", req.VideoPath)
}
// 2. 校验 ffmpeg 是否可用
ffmpegPath, err := s.getFFmpegPath()
if err != nil {
return nil, err
}
// 3. 确定输出格式
format := req.Format
if format == "" {
format = "mp3"
}
format = strings.TrimLeft(format, ".")
// 4. 生成输出文件路径
outputDir := filepath.Dir(req.VideoPath)
baseName := strings.TrimSuffix(filepath.Base(req.VideoPath), filepath.Ext(req.VideoPath))
timestamp := time.Now().UnixMilli()
outputName := fmt.Sprintf("%s_audio_%d.%s", baseName, timestamp, format)
outputPath := filepath.Join(outputDir, outputName)
g.Log().Infof(ctx, "开始提取音频: video=%s, output=%s", req.VideoPath, outputPath)
// 5. 构建 ffmpeg 命令
// 提取音频并转换为指定格式
args := []string{
"-i", req.VideoPath,
"-vn", // 去掉视频流
"-acodec", "libmp3lame", // 使用 mp3 编码器mp3格式
"-ab", "192k", // 音频比特率
"-ar", "44100", // 采样率
"-ac", "2", // 双声道
"-y", // 覆盖输出文件
outputPath,
}
// 如果输出不是 mp3调整编码器
switch format {
case "aac":
args[4] = "aac"
case "wav":
args[4] = "pcm_s16le"
args[5] = "-vn"
args = args[:8] // wav 不需要指定比特率等参数
args = append(args, outputPath)
case "ogg":
args[4] = "libvorbis"
case "flac":
args[4] = "flac"
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
// 捕获输出用于调试
output, execErr := cmd.CombinedOutput()
if execErr != nil {
g.Log().Errorf(ctx, "ffmpeg 执行失败: %v, output: %s", execErr, string(output))
return nil, fmt.Errorf("音频提取失败: %v", execErr)
}
// 6. 验证输出文件
stat, statErr := os.Stat(outputPath)
if statErr != nil {
return nil, fmt.Errorf("音频文件生成失败: %v", statErr)
}
// 7. 获取音频时长(通过 ffprobe
duration, _ := s.getAudioDuration(ctx, ffmpegPath, outputPath)
g.Log().Infof(ctx, "音频提取成功: path=%s, size=%d, duration=%s", outputPath, stat.Size(), duration)
res = &ExtractAudioRes{
AudioPath: outputPath,
Duration: duration,
Size: stat.Size(),
}
return
}
// getFFmpegPath 获取 ffmpeg 可执行路径
func (s *AudioExtractService) getFFmpegPath() (string, error) {
// 1. 优先从配置读取
ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String()
if ffmpegPath != "" {
if _, err := os.Stat(ffmpegPath); err == nil {
return ffmpegPath, nil
}
}
// 2. 从 PATH 中查找
path, err := exec.LookPath("ffmpeg")
if err != nil {
return "", fmt.Errorf("未找到 ffmpeg请确保已安装 ffmpeg 或在配置中指定路径")
}
return path, nil
}
// getAudioDuration 获取音频时长
func (s *AudioExtractService) getAudioDuration(ctx context.Context, ffmpegPath string, audioPath string) (string, error) {
// 使用 ffprobe 获取时长
// 先尝试查找 ffprobe
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
if _, err := os.Stat(ffprobePath); os.IsNotExist(err) {
ffprobePath = "ffprobe"
}
cmd := exec.CommandContext(ctx, ffprobePath,
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
audioPath,
)
output, err := cmd.Output()
if err != nil {
return "", err
}
durationStr := strings.TrimSpace(string(output))
// 转换为人类可读格式: 秒 -> HH:MM:SS
var seconds float64
fmt.Sscanf(durationStr, "%f", &seconds)
hours := int(seconds) / 3600
minutes := (int(seconds) % 3600) / 60
secs := int(seconds) % 60
return fmt.Sprintf("%02d:%02d:%02d", hours, minutes, secs), nil
}
// ExtractAndCleanup 提取音频并清理临时视频文件
func (s *AudioExtractService) ExtractAndCleanup(ctx context.Context, req *ExtractAudioReq) (res *ExtractAudioRes, err error) {
res, err = s.Extract(ctx, req)
if err != nil {
return nil, err
}
// 尝试删除原始视频文件
if req.VideoPath != "" {
if removeErr := os.Remove(req.VideoPath); removeErr != nil {
g.Log().Warningf(ctx, "删除临时视频文件失败: %v", removeErr)
}
}
return
}

View File

@@ -0,0 +1,657 @@
package scene
import (
"bufio"
"context"
"fmt"
"math"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"github.com/gogf/gf/v2/frame/g"
)
// SceneAnalyzerService 场景分析服务
type SceneAnalyzerService struct{}
// SceneAnalyzer 场景分析服务单例
var SceneAnalyzer = new(SceneAnalyzerService)
// KeyframeInfo 关键帧信息
type KeyframeInfo struct {
Path string `json:"path"` // 关键帧图片路径
TimeStr string `json:"timeStr"` // 时间点
Width int `json:"width"` // 图片宽度
Height int `json:"height"` // 图片高度
}
// SceneInfo 单个场景信息
type SceneInfo struct {
SceneIndex int `json:"sceneIndex"` // 场景序号
StartTime float64 `json:"startTime"` // 开始时间精确到3位小数
EndTime float64 `json:"endTime"` // 结束时间(秒)
Duration float64 `json:"duration"` // 时长(秒)
StartTimeStr string `json:"startTimeStr"` // HH:MM:SS.mmm
EndTimeStr string `json:"endTimeStr"`
DurationStr string `json:"durationStr"`
ShotType string `json:"shotType"` // 镜头类型
MotionLevel string `json:"motionLevel"` // 运动程度
Composition string `json:"composition"` // 构图类型
NarrativePos string `json:"narrativePos"` // 叙事位置
Keyframe *KeyframeInfo `json:"keyframe,omitempty"` // 关键帧(如有提取)
Description string `json:"description"` // 场景描述(供 AI 使用)
}
// VideoSceneAnalysis 单视频场景分析结果
type VideoSceneAnalysis struct {
FileName string `json:"fileName"`
FilePath string `json:"filePath"`
Duration float64 `json:"duration"`
DurationStr string `json:"durationStr"`
FrameRate float64 `json:"frameRate"`
Width int `json:"width"`
Height int `json:"height"`
AspectRatio string `json:"aspectRatio"` // 画面比例
Orientation string `json:"orientation"` // 横屏/竖屏
TotalScenes int `json:"totalScenes"`
Scenes []SceneInfo `json:"scenes"`
DetectParams DetectParams `json:"detectParams"`
Summary SceneSummary `json:"summary"` // 场景总览
}
// SceneSummary 场景总览
type SceneSummary struct {
AvgShotDuration float64 `json:"avgShotDuration"` // 平均镜头时长
MinShotDuration float64 `json:"minShotDuration"`
MaxShotDuration float64 `json:"maxShotDuration"`
ShotTypeDist map[string]int `json:"shotTypeDist"` // 镜头类型分布
MotionDist map[string]int `json:"motionDist"` // 运动程度分布
CompositionDist map[string]int `json:"compositionDist"` // 构图分布
Pacing string `json:"pacing"` // 剪辑节奏
KeyframesDir string `json:"keyframesDir,omitempty"` // 关键帧目录
}
// DetectParams 检测参数
type DetectParams struct {
Threshold float64 `json:"threshold"`
Method string `json:"method"`
ExtractKeyframes bool `json:"extractKeyframes"`
}
// SceneAnalyzeReq 场景分析请求
type SceneAnalyzeReq struct {
VideoPaths []string // 视频文件路径列表
Threshold float64 // 场景检测阈值 0.1-0.5,默认 0.3
ExtractKeyframes bool // 是否提取关键帧图片
}
// SceneAnalyzeRes 场景分析响应
type SceneAnalyzeRes struct {
Analyses []VideoSceneAnalysis `json:"analyses"`
}
var (
ptsTimeRegex = regexp.MustCompile(`pts_time:([\d.]+)`)
)
// Analyze 分析多个视频的场景
func (s *SceneAnalyzerService) Analyze(ctx context.Context, req *SceneAnalyzeReq) (res *SceneAnalyzeRes, err error) {
threshold := req.Threshold
if threshold <= 0 || threshold > 1 {
threshold = 0.3
}
var (
mu sync.Mutex
analyses []VideoSceneAnalysis
wg sync.WaitGroup
errCh = make(chan error, len(req.VideoPaths))
)
for _, videoPath := range req.VideoPaths {
wg.Add(1)
go func(vp string) {
defer wg.Done()
analysis, aErr := s.analyzeSingle(ctx, vp, threshold, req.ExtractKeyframes)
if aErr != nil {
errCh <- fmt.Errorf("分析失败 [%s]: %v", filepath.Base(vp), aErr)
return
}
mu.Lock()
analyses = append(analyses, *analysis)
mu.Unlock()
}(videoPath)
}
wg.Wait()
close(errCh)
var errs []string
for e := range errCh {
errs = append(errs, e.Error())
}
if len(errs) > 0 {
g.Log().Errorf(ctx, "部分视频分析失败: %s", strings.Join(errs, "; "))
}
if len(analyses) == 0 {
return nil, fmt.Errorf("所有视频分析均失败: %s", strings.Join(errs, "; "))
}
res = &SceneAnalyzeRes{Analyses: analyses}
return
}
// analyzeSingle 分析单个视频
func (s *SceneAnalyzerService) analyzeSingle(ctx context.Context, videoPath string, threshold float64, extractKeyframes bool) (*VideoSceneAnalysis, error) {
ffmpegPath, err := s.getFFmpegPath()
if err != nil {
return nil, err
}
// 1. 视频元数据
duration, frameRate, width, height, err := s.getVideoMeta(ctx, ffmpegPath, videoPath)
if err != nil {
return nil, fmt.Errorf("获取视频元数据失败: %v", err)
}
// 2. 场景检测
sceneChanges, err := s.detectScenes(ctx, ffmpegPath, videoPath, threshold)
if err != nil {
return nil, fmt.Errorf("场景检测失败: %v", err)
}
// 3. 构建场景列表 + 分析
rawScenes := s.buildScenes(sceneChanges, duration)
totalDuration := duration
// 4. 提取关键帧(如果需要)
keyframesDir := ""
if extractKeyframes {
keyframesDir = filepath.Join(filepath.Dir(videoPath), "keyframes_"+filepath.Base(videoPath))
os.MkdirAll(keyframesDir, 0755)
}
// 构建带分析信息的场景
aspectRatio := fmt.Sprintf("%d:%d", width/gcd(width, height), height/gcd(width, height))
orientation := "横屏"
if height > width {
orientation = "竖屏"
}
fileName := filepath.Base(videoPath)
if idx := strings.Index(fileName, "_"); idx > 0 {
fileName = fileName[idx+1:]
}
// 生成场景分析
totalScenes := len(rawScenes)
scenes := make([]SceneInfo, totalScenes)
shotDist := make(map[string]int)
motionDist := make(map[string]int)
compDist := make(map[string]int)
var durTotal float64
for i, rs := range rawScenes {
scene := SceneInfo{
SceneIndex: rs.SceneIndex,
StartTime: round3(rs.StartTime),
EndTime: round3(rs.EndTime),
Duration: round3(rs.Duration),
StartTimeStr: rs.StartTimeStr,
EndTimeStr: rs.EndTimeStr,
DurationStr: rs.DurationStr,
}
// 镜头类型
scene.ShotType = classifyShotType(rs.Duration)
shotDist[scene.ShotType]++
// 运动程度
scene.MotionLevel = classifyMotionLevel(rs.Duration, totalDuration)
motionDist[scene.MotionLevel]++
// 构图
scene.Composition = classifyComposition(rs.Duration, width, height)
compDist[scene.Composition]++
// 叙事位置
ratio := rs.StartTime / totalDuration
switch {
case ratio < 0.15:
scene.NarrativePos = "开头引入"
case ratio < 0.35:
scene.NarrativePos = "前段发展"
case ratio < 0.65:
scene.NarrativePos = "中段高潮"
case ratio < 0.85:
scene.NarrativePos = "后段收束"
default:
scene.NarrativePos = "结尾总结"
}
// 关键帧
if extractKeyframes && keyframesDir != "" {
midTime := (rs.StartTime + rs.EndTime) / 2
kfPath := filepath.Join(keyframesDir, fmt.Sprintf("scene_%03d.jpg", rs.SceneIndex))
if kfErr := s.extractKeyframe(ctx, ffmpegPath, videoPath, midTime, kfPath); kfErr == nil {
scene.Keyframe = &KeyframeInfo{
Path: kfPath,
TimeStr: formatTime(midTime),
Width: width,
Height: height,
}
}
}
// AI 描述
scene.Description = buildSceneDescription(scene)
durTotal += rs.Duration
scenes[i] = scene
}
analysis := &VideoSceneAnalysis{
FileName: fileName,
FilePath: videoPath,
Duration: round3(totalDuration),
DurationStr: formatTime(totalDuration),
FrameRate: round3(frameRate),
Width: width,
Height: height,
AspectRatio: aspectRatio,
Orientation: orientation,
TotalScenes: totalScenes,
Scenes: scenes,
DetectParams: DetectParams{
Threshold: threshold,
Method: "ffmpeg scene filter",
ExtractKeyframes: extractKeyframes,
},
Summary: s.buildSummary(scenes, shotDist, motionDist, compDist, keyframesDir),
}
return analysis, nil
}
// buildSummary 构建场景总览
func (s *SceneAnalyzerService) buildSummary(scenes []SceneInfo, shotDist, motionDist, compDist map[string]int, kfDir string) SceneSummary {
if len(scenes) == 0 {
return SceneSummary{}
}
var minD, maxD, sumD float64
minD = math.MaxFloat64
for _, sc := range scenes {
sumD += sc.Duration
if sc.Duration < minD {
minD = sc.Duration
}
if sc.Duration > maxD {
maxD = sc.Duration
}
}
avgD := sumD / float64(len(scenes))
pacing := "平稳"
if avgD < 2 {
pacing = "快节奏(快速剪辑)"
} else if avgD < 4 {
pacing = "适中节奏"
} else if avgD < 8 {
pacing = "舒缓节奏"
} else {
pacing = "慢节奏(长镜头为主)"
}
sm := SceneSummary{
AvgShotDuration: round3(avgD),
MinShotDuration: round3(minD),
MaxShotDuration: round3(maxD),
ShotTypeDist: shotDist,
MotionDist: motionDist,
CompositionDist: compDist,
Pacing: pacing,
}
if kfDir != "" {
sm.KeyframesDir = kfDir
}
return sm
}
// getVideoMeta 获取视频元数据
func (s *SceneAnalyzerService) getVideoMeta(ctx context.Context, ffmpegPath, videoPath string) (duration, frameRate float64, width, height int, err error) {
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
if _, statErr := os.Stat(ffprobePath); os.IsNotExist(statErr) {
ffprobePath = "ffprobe"
}
cmd := exec.CommandContext(ctx, ffprobePath,
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
videoPath,
)
output, execErr := cmd.Output()
if execErr != nil {
err = fmt.Errorf("ffprobe 执行失败: %v", execErr)
return
}
text := string(output)
duration = parseJSONFloat(text, `"duration":`)
frameRate = parseFrameRate(text)
width = parseJSONInt(text, `"width":`)
height = parseJSONInt(text, `"height":`)
return
}
// detectScenes 通过 ffmpeg scene filter 检测场景变化
func (s *SceneAnalyzerService) detectScenes(ctx context.Context, ffmpegPath, videoPath string, threshold float64) ([]float64, error) {
thresholdStr := strconv.FormatFloat(threshold, 'f', 1, 64)
args := []string{
"-i", videoPath,
"-filter:v", fmt.Sprintf("select='gt(scene,%s)',showinfo", thresholdStr),
"-f", "null",
"-",
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
output, _ := cmd.CombinedOutput()
var timestamps []float64
scanner := bufio.NewScanner(strings.NewReader(string(output)))
for scanner.Scan() {
line := scanner.Text()
matches := ptsTimeRegex.FindStringSubmatch(line)
if len(matches) >= 2 {
ts, parseErr := strconv.ParseFloat(matches[1], 64)
if parseErr == nil && ts > 0 {
timestamps = append(timestamps, ts)
}
}
}
return timestamps, nil
}
// extractKeyframe 提取指定时间点的关键帧
func (s *SceneAnalyzerService) extractKeyframe(ctx context.Context, ffmpegPath, videoPath string, timeSec float64, outputPath string) error {
timeStr := strconv.FormatFloat(timeSec, 'f', 3, 64)
args := []string{
"-ss", timeStr,
"-i", videoPath,
"-vframes", "1",
"-q:v", "3",
"-y",
outputPath,
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
return cmd.Run()
}
// buildScenes 根据场景变化时间戳构建场景列表
func (s *SceneAnalyzerService) buildScenes(sceneChanges []float64, totalDuration float64) []SceneInfo {
var scenes []SceneInfo
if len(sceneChanges) == 0 {
scenes = append(scenes, SceneInfo{
SceneIndex: 1,
StartTime: 0,
EndTime: totalDuration,
Duration: totalDuration,
StartTimeStr: formatTime(0),
EndTimeStr: formatTime(totalDuration),
DurationStr: formatTime(totalDuration),
})
return scenes
}
startTime := 0.0
for i, ts := range sceneChanges {
if ts <= startTime || ts > totalDuration {
continue
}
scenes = append(scenes, SceneInfo{
SceneIndex: i + 1,
StartTime: startTime,
EndTime: ts,
Duration: ts - startTime,
StartTimeStr: formatTime(startTime),
EndTimeStr: formatTime(ts),
DurationStr: formatTime(ts - startTime),
})
startTime = ts
}
if startTime < totalDuration {
scenes = append(scenes, SceneInfo{
SceneIndex: len(scenes) + 1,
StartTime: startTime,
EndTime: totalDuration,
Duration: totalDuration - startTime,
StartTimeStr: formatTime(startTime),
EndTimeStr: formatTime(totalDuration),
DurationStr: formatTime(totalDuration - startTime),
})
}
return scenes
}
// ---------- 镜头分类逻辑 ----------
// classifyShotType 根据时长判断镜头类型
func classifyShotType(duration float64) string {
switch {
case duration < 0.8:
return "极速闪切"
case duration < 1.5:
return "快速切换"
case duration < 2.5:
return "短镜头"
case duration < 4:
return "标准镜头"
case duration < 8:
return "中长镜头"
case duration < 15:
return "长镜头"
default:
return "超长镜头"
}
}
// classifyMotionLevel 基于时长和相对比例推断运动程度
func classifyMotionLevel(duration, totalDuration float64) string {
switch {
case duration < 1.0:
return "高动态(快速切换)"
case duration < 2.0:
return "中高动态"
case duration < 4.0:
return "中等动态"
case duration < 8.0:
return "低动态(平稳)"
default:
return "静态/固定机位"
}
}
// classifyComposition 基于时长和画面比例推断构图类型
func classifyComposition(duration float64, width, height int) string {
isVertical := height > width
switch {
case duration < 1.2:
if isVertical {
return "竖屏特写/细节"
}
return "特写/细节"
case duration < 2.5:
if isVertical {
return "竖屏近景"
}
return "近景/中近景"
case duration < 5:
if isVertical {
return "竖屏中景"
}
return "中景/半身"
case duration < 10:
if isVertical {
return "竖屏全景"
}
return "全景/环境"
default:
if isVertical {
return "竖屏远景/固定机位"
}
return "远景/广角"
}
}
// buildSceneDescription 生成可读的场景描述(供 AI 使用)
func buildSceneDescription(scene SceneInfo) string {
return fmt.Sprintf(
"场景%d%s%s时长%s%s%s%s%s",
scene.SceneIndex,
scene.StartTimeStr, scene.EndTimeStr,
scene.DurationStr,
scene.ShotType,
scene.Composition,
scene.MotionLevel,
scene.NarrativePos,
)
}
// ---------- 工具函数 ----------
func round3(v float64) float64 {
return math.Round(v*1000) / 1000
}
func gcd(a, b int) int {
for b != 0 {
a, b = b, a%b
}
return a
}
func getFFmpegPath() (string, error) {
ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String()
if ffmpegPath != "" {
if _, err := os.Stat(ffmpegPath); err == nil {
return ffmpegPath, nil
}
}
path, err := exec.LookPath("ffmpeg")
if err != nil {
return "", fmt.Errorf("未找到 ffmpeg")
}
return path, nil
}
func formatTime(seconds float64) string {
h := int(seconds) / 3600
m := (int(seconds) % 3600) / 60
s := int(seconds) % 60
ms := int(math.Round((seconds - float64(int(seconds))) * 1000))
return fmt.Sprintf("%02d:%02d:%02d.%03d", h, m, s, ms)
}
func parseJSONFloat(text, key string) float64 {
idx := strings.Index(text, key)
if idx < 0 {
return 0
}
start := idx + len(key)
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
start++
}
end := start
for end < len(text) && (isDigit(text[end]) || text[end] == '.') {
end++
}
if start < end {
val, _ := strconv.ParseFloat(text[start:end], 64)
return val
}
return 0
}
func parseJSONInt(text, key string) int {
idx := strings.Index(text, key)
if idx < 0 {
return 0
}
start := idx + len(key)
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
start++
}
end := start
for end < len(text) && isDigit(text[end]) {
end++
}
if start < end {
val, _ := strconv.Atoi(text[start:end])
return val
}
return 0
}
func parseFrameRate(text string) float64 {
for _, key := range []string{`"r_frame_rate":`, `"avg_frame_rate":`} {
idx := strings.Index(text, key)
if idx < 0 {
continue
}
start := idx + len(key)
for start < len(text) && (text[start] == ' ' || text[start] == '"') {
start++
}
end := start
for end < len(text) && text[end] != '"' && text[end] != ',' && text[end] != '}' && text[end] != ' ' {
end++
}
valStr := text[start:end]
if strings.Contains(valStr, "/") {
parts := strings.Split(valStr, "/")
if len(parts) == 2 {
num, _ := strconv.ParseFloat(parts[0], 64)
den, _ := strconv.ParseFloat(parts[1], 64)
if den > 0 {
return num / den
}
}
}
val, _ := strconv.ParseFloat(valStr, 64)
if val > 0 {
return val
}
}
return 0
}
func isDigit(b byte) bool {
return b >= '0' && b <= '9'
}
// Cleanup 清理视频和关键帧文件
func Cleanup(paths []string) {
for _, p := range paths {
os.RemoveAll(p)
}
}
// getFFmpegPath on SceneAnalyzerService
func (s *SceneAnalyzerService) getFFmpegPath() (string, error) {
return getFFmpegPath()
}

View File

@@ -0,0 +1,394 @@
package setup
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"github.com/gogf/gf/v2/frame/g"
)
var (
envConfigured bool
// DetectedWhisperPath 自动检测到的 whisper 命令行路径(空则使用 python -m whisper
DetectedWhisperPath string
)
// EnsureDependencies 启动时检查并安装 ffmpeg 和 whisper
func EnsureDependencies(ctx context.Context) {
g.Log().Info(ctx, "========== 检查依赖环境 ==========")
ensureFFmpeg(ctx)
ensureWhisper(ctx)
resolveWhisperPath(ctx)
if envConfigured {
g.Log().Info(ctx, "依赖检查完成,新环境变量已配置,建议重启终端")
} else {
g.Log().Info(ctx, "依赖检查完成,所有依赖已就绪")
}
g.Log().Info(ctx, "===================================")
}
// ensureFFmpeg 确保 ffmpeg 可用
func ensureFFmpeg(ctx context.Context) {
if _, err := exec.LookPath("ffmpeg"); err == nil {
g.Log().Info(ctx, "[ffmpeg] ✔ 已安装")
return
}
g.Log().Infof(ctx, "[ffmpeg] 未找到,尝试自动安装...")
switch runtime.GOOS {
case "darwin":
// 检查是否安装了 Homebrew
if _, err := exec.LookPath("brew"); err != nil {
g.Log().Warningf(ctx, "[ffmpeg] ⚠ 未检测到 Homebrew请手动安装:\n brew install ffmpeg")
return
}
cmd := exec.CommandContext(ctx, "brew", "install", "ffmpeg")
output, err := cmd.CombinedOutput()
if err != nil {
g.Log().Errorf(ctx, "[ffmpeg] ❌ 安装失败: %v\n%s", err, string(output))
return
}
g.Log().Info(ctx, "[ffmpeg] ✔ 安装成功")
case "linux":
// 尝试 apt
if _, err := exec.LookPath("apt"); err == nil {
cmd := exec.CommandContext(ctx, "sudo", "apt", "install", "-y", "ffmpeg")
output, err := cmd.CombinedOutput()
if err != nil {
g.Log().Errorf(ctx, "[ffmpeg] ❌ apt 安装失败: %v\n%s", err, string(output))
return
}
g.Log().Info(ctx, "[ffmpeg] ✔ 安装成功")
return
}
// 尝试 yum
if _, err := exec.LookPath("yum"); err == nil {
cmd := exec.CommandContext(ctx, "sudo", "yum", "install", "-y", "ffmpeg")
output, err := cmd.CombinedOutput()
if err != nil {
g.Log().Errorf(ctx, "[ffmpeg] ❌ yum 安装失败: %v\n%s", err, string(output))
return
}
g.Log().Info(ctx, "[ffmpeg] ✔ 安装成功")
return
}
g.Log().Warningf(ctx, "[ffmpeg] ⚠ 请手动安装: sudo apt install ffmpeg")
default:
g.Log().Warningf(ctx, "[ffmpeg] ⚠ 不支持的平台(%s),请手动安装 ffmpeg", runtime.GOOS)
}
}
// ensureWhisper 确保 whisper 可用(优先安装 C++ 版,速度更快)
func ensureWhisper(ctx context.Context) {
// 1. 检查是否已有 whisper-cppC++ 版,最快)
if path, err := exec.LookPath("whisper-cpp"); err == nil {
g.Log().Infof(ctx, "[whisper] ✔ C++ 版已安装: %s", path)
return
}
if path, err := exec.LookPath("whisper-cli"); err == nil {
g.Log().Infof(ctx, "[whisper] ✔ C++ 版已安装: %s", path)
return
}
// 2. 检查 Homebrew 安装目录(即使不在 PATH 也能找到)
if p := findHomebrewWhisperCpp(); p != "" {
DetectedWhisperPath = p
// 自动添加到 PATH 环境变量
addToShellPath(ctx, filepath.Dir(p))
g.Log().Infof(ctx, "[whisper] ✔ C++ 版已安装(自动检测): %s", p)
return
}
// 3. 尝试安装 whisper-cppC++ 版)
if runtime.GOOS == "darwin" {
if _, err := exec.LookPath("brew"); err == nil {
g.Log().Infof(ctx, "[whisper] 安装 C++ 版 (brew install whisper-cpp)...")
cmd := exec.CommandContext(ctx, "brew", "install", "whisper-cpp")
output, err := cmd.CombinedOutput()
if err == nil {
g.Log().Info(ctx, "[whisper] ✔ C++ 版安装成功")
// 装好后把 Homebrew bin 加到 PATH
addToShellPath(ctx, getHomebrewBinDir())
// 检测安装路径
if p := findHomebrewWhisperCpp(); p != "" {
DetectedWhisperPath = p
}
return
}
g.Log().Warningf(ctx, "[whisper] ⚠ brew 安装失败: %v\n%s", err, string(output))
g.Log().Infof(ctx, "[whisper] 降级安装 Python 版...")
}
}
// 4. 降级:检查 python -m whisper 是否可用
if pythonWhisperAvailable() {
g.Log().Info(ctx, "[whisper] ✔ Python 版已安装 (python3 -m whisper)")
return
}
// 5. 降级pip 安装 Python 版
if _, err := exec.LookPath("pip3"); err != nil {
if _, err2 := exec.LookPath("pip"); err2 != nil {
g.Log().Warningf(ctx, "[whisper] ⚠ 未找到 pip请手动安装:\n pip3 install openai-whisper")
return
}
}
g.Log().Infof(ctx, "[whisper] 安装 Python 版 (pip install openai-whisper)...")
pipCmd := "pip3"
if _, err := exec.LookPath("pip3"); err != nil {
pipCmd = "pip"
}
cmd := exec.CommandContext(ctx, pipCmd, "install", "--user", "openai-whisper")
output, err := cmd.CombinedOutput()
if err != nil {
g.Log().Errorf(ctx, "[whisper] ❌ pip 安装失败: %v\n%s", err, string(output))
return
}
g.Log().Info(ctx, "[whisper] ✔ Python 版安装成功")
// 安装后自动配置 PATH
configureWhisperPath(ctx)
}
// resolveWhisperPath 自动找到 whisper 二进制路径并存储
func resolveWhisperPath(ctx context.Context) {
// 0. 如果已经通过 ensure 检测到了路径,直接使用
if DetectedWhisperPath != "" {
if _, err := os.Stat(DetectedWhisperPath); err == nil {
g.Log().Infof(ctx, "[whisper] ✔ 路径: %s", DetectedWhisperPath)
return
}
}
// 1. 优先检测 C++ 版本(快 3-5 倍)
for _, name := range []string{"whisper-cpp", "whisper-cli"} {
if path, err := exec.LookPath(name); err == nil {
DetectedWhisperPath = path
g.Log().Infof(ctx, "[whisper] ✔ C++ 版: %s", path)
return
}
}
// 2. 在 Homebrew 目录查找 C++ 版本
if p := findHomebrewWhisperCpp(); p != "" {
DetectedWhisperPath = p
g.Log().Infof(ctx, "[whisper] ✔ C++ 版(自动检测): %s", p)
return
}
// 3. 从 PATH 查找 Python 版 whisper
if path, err := exec.LookPath("whisper"); err == nil {
DetectedWhisperPath = path
g.Log().Infof(ctx, "[whisper] ✔ Python 版: %s", path)
return
}
// 4. 尝试常见 pip user bin 路径
for _, p := range getWhisperCandidates() {
if info, err := os.Stat(p); err == nil && !info.IsDir() {
DetectedWhisperPath = p
g.Log().Infof(ctx, "[whisper] ✔ Python 版(自动检测): %s", p)
return
}
}
g.Log().Info(ctx, "[whisper] ✔ 使用 python3 -m whisper 方式")
}
// getWhisperCandidates 返回可能的 whisper 二进制路径
func getWhisperCandidates() []string {
var candidates []string
// 通过 python 探针获取 user-site bin 目录
if p := getUserPythonBin(); p != "" {
candidates = append(candidates, filepath.Join(p, "whisper"))
}
// 常见 pip user base 路径
userHome, _ := os.UserHomeDir()
switch runtime.GOOS {
case "darwin":
// macOS 常见的 Python 版本路径
pythonVersions := []string{"3.9", "3.10", "3.11", "3.12", "3.13"}
for _, ver := range pythonVersions {
candidates = append(candidates,
filepath.Join(userHome, "Library", "Python", ver, "bin", "whisper"),
)
}
case "linux":
candidates = append(candidates,
filepath.Join(userHome, ".local", "bin", "whisper"),
)
}
return candidates
}
// getUserPythonBin 通过 python 获取 user bin 目录
func getUserPythonBin() string {
pythonCandidates := []string{"python3", "python"}
for _, py := range pythonCandidates {
path, err := exec.LookPath(py)
if err != nil {
continue
}
cmd := exec.Command(path, "-m", "site", "--user-base")
output, err := cmd.Output()
if err != nil {
continue
}
base := strings.TrimSpace(string(output))
if base != "" {
return filepath.Join(base, "bin")
}
}
return ""
}
// configureWhisperPath 将 pip user bin 目录加到 shell 配置
func configureWhisperPath(ctx context.Context) {
binDir := getUserPythonBin()
if binDir == "" {
return
}
// 检查是否已经在 PATH 中
currentPath := os.Getenv("PATH")
if strings.Contains(currentPath, binDir) {
return
}
// 配置到 .zshrc 或 .bashrc
home, _ := os.UserHomeDir()
rcFiles := []string{".zshrc", ".bashrc", ".bash_profile"}
for _, rc := range rcFiles {
rcPath := filepath.Join(home, rc)
// 文件不存在则跳过
if _, err := os.Stat(rcPath); os.IsNotExist(err) {
continue
}
// 检查是否已添加
data, _ := os.ReadFile(rcPath)
if strings.Contains(string(data), binDir) {
continue
}
// 追加
line := fmt.Sprintf("\nexport PATH=\"%s:$PATH\"\n", binDir)
f, err := os.OpenFile(rcPath, os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
g.Log().Warningf(ctx, "[whisper] 写入 %s 失败: %v", rc, err)
continue
}
f.WriteString(line)
f.Close()
g.Log().Infof(ctx, "[whisper] 已将 %s 添加到 %s请执行: source ~/%s", binDir, rc, rc)
envConfigured = true
break
}
}
// pythonWhisperAvailable 检查 python -m whisper 是否可用
func pythonWhisperAvailable() bool {
pythonCandidates := []string{"python3", "python"}
for _, py := range pythonCandidates {
if path, err := exec.LookPath(py); err == nil {
cmd := exec.Command(path, "-m", "whisper", "--help")
if cmd.Run() == nil {
return true
}
}
}
return false
}
// findHomebrewWhisperCpp 在 Homebrew 安装目录查找 whisper-cpp
func findHomebrewWhisperCpp() string {
dirs := getHomebrewBinDirs()
for _, dir := range dirs {
for _, name := range []string{"whisper-cpp", "whisper-cli"} {
p := filepath.Join(dir, name)
if info, err := os.Stat(p); err == nil && !info.IsDir() {
return p
}
}
}
return ""
}
// getHomebrewBinDirs 返回 Homebrew 可能的 bin 目录
func getHomebrewBinDirs() []string {
userHome, _ := os.UserHomeDir()
return []string{
"/opt/homebrew/bin", // Apple Silicon
"/usr/local/bin", // Intel
filepath.Join(userHome, ".homebrew", "bin"),
}
}
// getHomebrewBinDir 返回当前系统的 Homebrew bin 目录
func getHomebrewBinDir() string {
dirs := getHomebrewBinDirs()
for _, dir := range dirs {
if _, err := os.Stat(filepath.Join(dir, "brew")); err == nil {
return dir
}
// 也检查 brew 命令路径
if path, err := exec.LookPath("brew"); err == nil {
return filepath.Dir(path)
}
}
return "/opt/homebrew/bin" // 默认 Apple Silicon 路径
}
// addToShellPath 将目录添加到 shell rc 文件的 PATH 中
func addToShellPath(ctx context.Context, dir string) {
if dir == "" {
return
}
// 检查是否已在 PATH 中
currentPath := os.Getenv("PATH")
if strings.Contains(currentPath, dir) {
return
}
home, _ := os.UserHomeDir()
rcFiles := []string{".zshrc", ".bashrc", ".bash_profile"}
for _, rc := range rcFiles {
rcPath := filepath.Join(home, rc)
if _, err := os.Stat(rcPath); os.IsNotExist(err) {
continue
}
data, _ := os.ReadFile(rcPath)
if strings.Contains(string(data), dir) {
continue
}
line := fmt.Sprintf("\nexport PATH=\"%s:$PATH\"\n", dir)
f, err := os.OpenFile(rcPath, os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
g.Log().Warningf(ctx, "[setup] 写入 %s 失败: %v", rc, err)
continue
}
f.WriteString(line)
f.Close()
g.Log().Infof(ctx, "[setup] 已将 %s 添加到 %s", dir, rc)
envConfigured = true
break
}
}

View File

@@ -0,0 +1,285 @@
package video
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/gogf/gf/v2/frame/g"
)
// ConcatService 视频拼接服务
type ConcatService struct{}
// Concat 视频拼接服务单例
var Concat = new(ConcatService)
// ConcatReq 视频拼接请求
type ConcatReq struct {
VideoPaths []string // 视频文件路径列表(按此顺序拼接)
OutputPath string // 输出视频文件路径,空则自动生成
Method string // 拼接方式: auto/fast/reencode默认 auto
}
// ConcatRes 视频拼接响应
type ConcatRes struct {
OutputPath string `json:"outputPath"` // 输出文件路径
FileSize int64 `json:"fileSize"` // 文件大小(bytes)
Duration float64 `json:"duration"` // 拼接后总时长(秒)
DurationStr string `json:"durationStr"` // 可读时长
MethodUsed string `json:"methodUsed"` // 实际使用的拼接方式
InputFiles int `json:"inputFiles"` // 输入文件数
}
// Concat 拼接多个视频为一个
func (s *ConcatService) Concat(ctx context.Context, req *ConcatReq) (res *ConcatRes, err error) {
if len(req.VideoPaths) < 2 {
return nil, fmt.Errorf("至少需要2个视频才能拼接")
}
// 校验所有视频文件存在
for i, p := range req.VideoPaths {
if _, err := os.Stat(p); os.IsNotExist(err) {
return nil, fmt.Errorf("第%d个视频文件不存在: %s", i+1, p)
}
}
ffmpegPath, err := s.getFFmpegPath()
if err != nil {
return nil, err
}
// 生成输出路径
outputPath := req.OutputPath
if outputPath == "" {
outputDir := filepath.Dir(req.VideoPaths[0])
outputPath = filepath.Join(outputDir, "concat_output.mp4")
}
method := req.Method
if method == "" {
method = "auto"
}
var methodUsed string
switch method {
case "fast":
// 无损拼接(要求同编码参数,速度快但可能黑屏)
err = s.concatByDemuxer(ctx, ffmpegPath, req.VideoPaths, outputPath)
methodUsed = "concat demuxer (无损)"
default:
// 重编码拼接(自动归一化分辨率/音频,兼容所有视频)
err = s.concatByFilter(ctx, ffmpegPath, req.VideoPaths, outputPath)
methodUsed = "concat filter (重编码)"
}
if err != nil {
return nil, fmt.Errorf("视频拼接失败: %v", err)
}
// 获取输出文件信息
stat, statErr := os.Stat(outputPath)
if statErr != nil {
return nil, fmt.Errorf("输出文件异常: %v", statErr)
}
// 获取时长
duration, _ := s.getVideoDuration(ctx, ffmpegPath, outputPath)
res = &ConcatRes{
OutputPath: outputPath,
FileSize: stat.Size(),
Duration: duration,
DurationStr: formatDuration(duration),
MethodUsed: methodUsed,
InputFiles: len(req.VideoPaths),
}
return
}
// concatByDemuxer 使用 concat demuxer 无损拼接(要求同编码参数)
func (s *ConcatService) concatByDemuxer(ctx context.Context, ffmpegPath string, inputs []string, output string) error {
// 创建文件列表
fileListPath := filepath.Join(filepath.Dir(output), "concat_list.txt")
var lines []string
for _, p := range inputs {
lines = append(lines, fmt.Sprintf("file '%s'", p))
}
if err := os.WriteFile(fileListPath, []byte(strings.Join(lines, "\n")+"\n"), 0644); err != nil {
return fmt.Errorf("创建文件列表失败: %v", err)
}
defer os.Remove(fileListPath)
args := []string{
"-f", "concat",
"-safe", "0",
"-i", fileListPath,
"-c", "copy", // 直接复制流,不重编码
"-y",
output,
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
outputBytes, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("ffmpeg demuxer 失败: %v\n%s", err, string(outputBytes))
}
return nil
}
// concatByFilter 使用 concat filter 重编码拼接(自动归一化分辨率/音频参数)
func (s *ConcatService) concatByFilter(ctx context.Context, ffmpegPath string, inputs []string, output string) error {
n := len(inputs)
// 1. 获取所有视频的分辨率,确定统一输出尺寸
maxW, maxH := 0, 0
var inputMeta []struct{ w, h int }
for _, p := range inputs {
w, h, _ := s.getVideoResolution(ctx, ffmpegPath, p)
inputMeta = append(inputMeta, struct{ w, h int }{w, h})
if w > maxW {
maxW = w
}
if h > maxH {
maxH = h
}
}
// 保底
if maxW == 0 {
maxW = 1920
}
if maxH == 0 {
maxH = 1080
}
// 2. 构建输入参数
var inputArgs []string
for _, p := range inputs {
inputArgs = append(inputArgs, "-i", p)
}
// 3. 构建 filter_complex每个视频 scale+pad 到统一尺寸,然后 concat
var filterParts []string
for i := 0; i < n; i++ {
filterParts = append(filterParts, fmt.Sprintf(
"[%d:v]scale=%d:%d:force_original_aspect_ratio=decrease,pad=%d:%d:(ow-iw)/2:(oh-ih)/2,setsar=1,fps=30[v%d]",
i, maxW, maxH, maxW, maxH, i,
))
filterParts = append(filterParts, fmt.Sprintf(
"[%d:a]aresample=44100[a%d]",
i, i,
))
}
// 收集归一化后的流
var concatInputs []string
for i := 0; i < n; i++ {
concatInputs = append(concatInputs, fmt.Sprintf("[v%d][a%d]", i, i))
}
filterStr := fmt.Sprintf("%s;%sconcat=n=%d:v=1:a=1[outv][outa]",
strings.Join(filterParts, ";"),
strings.Join(concatInputs, ""), n)
outputDir := filepath.Dir(output)
args := append(inputArgs,
"-filter_complex", filterStr,
"-map", "[outv]",
"-map", "[outa]",
"-preset", "fast",
"-crf", "23",
"-y",
output,
)
// 调试:记录完整命令
g.Log().Debugf(ctx, "concat filter 命令: %s %v", ffmpegPath, args)
// 保存 filter graph 用于调试
filterFile := filepath.Join(outputDir, "concat_filter.txt")
os.WriteFile(filterFile, []byte(filterStr), 0644)
defer os.Remove(filterFile)
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
outputBytes, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("ffmpeg filter 失败: %v\n日志:\n%s", err, string(outputBytes))
}
return nil
}
// getVideoResolution 获取视频分辨率
func (s *ConcatService) getVideoResolution(ctx context.Context, ffmpegPath, videoPath string) (width, height int, err error) {
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
if _, err := os.Stat(ffprobePath); os.IsNotExist(err) {
ffprobePath = "ffprobe"
}
cmd := exec.CommandContext(ctx, ffprobePath,
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=width,height",
"-of", "csv=p=0",
videoPath,
)
output, err := cmd.Output()
if err != nil {
return 0, 0, err
}
fmt.Sscanf(strings.TrimSpace(string(output)), "%d,%d", &width, &height)
return
}
// getVideoDuration 获取视频时长
func (s *ConcatService) getVideoDuration(ctx context.Context, ffmpegPath, videoPath string) (float64, error) {
ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe")
if _, err := os.Stat(ffprobePath); os.IsNotExist(err) {
ffprobePath = "ffprobe"
}
cmd := exec.CommandContext(ctx, ffprobePath,
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
videoPath,
)
output, err := cmd.Output()
if err != nil {
return 0, err
}
var duration float64
fmt.Sscanf(strings.TrimSpace(string(output)), "%f", &duration)
return duration, nil
}
func (s *ConcatService) getFFmpegPath() (string, error) {
ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String()
if ffmpegPath != "" {
if _, err := os.Stat(ffmpegPath); err == nil {
return ffmpegPath, nil
}
}
path, err := exec.LookPath("ffmpeg")
if err != nil {
return "", fmt.Errorf("未找到 ffmpeg")
}
return path, nil
}
func formatDuration(seconds float64) string {
h := int(seconds) / 3600
m := (int(seconds) % 3600) / 60
s := int(seconds) % 60
return fmt.Sprintf("%02d:%02d:%02d", h, m, s)
}
// CleanupConcat 清理输入视频文件
func CleanupConcat(paths []string) {
for _, p := range paths {
os.Remove(p)
}
}