package scene import ( "bufio" "context" "fmt" "math" "os" "os/exec" "path/filepath" "regexp" "strconv" "strings" "sync" "github.com/gogf/gf/v2/frame/g" ) // SceneAnalyzerService 场景分析服务 type SceneAnalyzerService struct{} // SceneAnalyzer 场景分析服务单例 var SceneAnalyzer = new(SceneAnalyzerService) // KeyframeInfo 关键帧信息 type KeyframeInfo struct { Path string `json:"path"` // 关键帧图片路径 TimeStr string `json:"timeStr"` // 时间点 Width int `json:"width"` // 图片宽度 Height int `json:"height"` // 图片高度 } // SceneInfo 单个场景信息 type SceneInfo struct { SceneIndex int `json:"sceneIndex"` // 场景序号 StartTime float64 `json:"startTime"` // 开始时间(秒,精确到3位小数) EndTime float64 `json:"endTime"` // 结束时间(秒) Duration float64 `json:"duration"` // 时长(秒) StartTimeStr string `json:"startTimeStr"` // HH:MM:SS.mmm EndTimeStr string `json:"endTimeStr"` DurationStr string `json:"durationStr"` ShotType string `json:"shotType"` // 镜头类型 MotionLevel string `json:"motionLevel"` // 运动程度 Composition string `json:"composition"` // 构图类型 NarrativePos string `json:"narrativePos"` // 叙事位置 Keyframe *KeyframeInfo `json:"keyframe,omitempty"` // 关键帧(如有提取) Description string `json:"description"` // 场景描述(供 AI 使用) } // VideoSceneAnalysis 单视频场景分析结果 type VideoSceneAnalysis struct { FileName string `json:"fileName"` FilePath string `json:"filePath"` Duration float64 `json:"duration"` DurationStr string `json:"durationStr"` FrameRate float64 `json:"frameRate"` Width int `json:"width"` Height int `json:"height"` AspectRatio string `json:"aspectRatio"` // 画面比例 Orientation string `json:"orientation"` // 横屏/竖屏 TotalScenes int `json:"totalScenes"` Scenes []SceneInfo `json:"scenes"` DetectParams DetectParams `json:"detectParams"` Summary SceneSummary `json:"summary"` // 场景总览 } // SceneSummary 场景总览 type SceneSummary struct { AvgShotDuration float64 `json:"avgShotDuration"` // 平均镜头时长 MinShotDuration float64 `json:"minShotDuration"` MaxShotDuration float64 `json:"maxShotDuration"` ShotTypeDist map[string]int `json:"shotTypeDist"` // 镜头类型分布 MotionDist map[string]int `json:"motionDist"` // 运动程度分布 CompositionDist map[string]int `json:"compositionDist"` // 构图分布 Pacing string `json:"pacing"` // 剪辑节奏 KeyframesDir string `json:"keyframesDir,omitempty"` // 关键帧目录 } // DetectParams 检测参数 type DetectParams struct { Threshold float64 `json:"threshold"` Method string `json:"method"` ExtractKeyframes bool `json:"extractKeyframes"` } // SceneAnalyzeReq 场景分析请求 type SceneAnalyzeReq struct { VideoPaths []string // 视频文件路径列表 Threshold float64 // 场景检测阈值 0.1-0.5,默认 0.3 ExtractKeyframes bool // 是否提取关键帧图片 } // SceneAnalyzeRes 场景分析响应 type SceneAnalyzeRes struct { Analyses []VideoSceneAnalysis `json:"analyses"` } var ( ptsTimeRegex = regexp.MustCompile(`pts_time:([\d.]+)`) ) // Analyze 分析多个视频的场景 func (s *SceneAnalyzerService) Analyze(ctx context.Context, req *SceneAnalyzeReq) (res *SceneAnalyzeRes, err error) { threshold := req.Threshold if threshold <= 0 || threshold > 1 { threshold = 0.3 } var ( mu sync.Mutex analyses []VideoSceneAnalysis wg sync.WaitGroup errCh = make(chan error, len(req.VideoPaths)) ) for _, videoPath := range req.VideoPaths { wg.Add(1) go func(vp string) { defer wg.Done() analysis, aErr := s.analyzeSingle(ctx, vp, threshold, req.ExtractKeyframes) if aErr != nil { errCh <- fmt.Errorf("分析失败 [%s]: %v", filepath.Base(vp), aErr) return } mu.Lock() analyses = append(analyses, *analysis) mu.Unlock() }(videoPath) } wg.Wait() close(errCh) var errs []string for e := range errCh { errs = append(errs, e.Error()) } if len(errs) > 0 { g.Log().Errorf(ctx, "部分视频分析失败: %s", strings.Join(errs, "; ")) } if len(analyses) == 0 { return nil, fmt.Errorf("所有视频分析均失败: %s", strings.Join(errs, "; ")) } res = &SceneAnalyzeRes{Analyses: analyses} return } // analyzeSingle 分析单个视频 func (s *SceneAnalyzerService) analyzeSingle(ctx context.Context, videoPath string, threshold float64, extractKeyframes bool) (*VideoSceneAnalysis, error) { ffmpegPath, err := s.getFFmpegPath() if err != nil { return nil, err } // 1. 视频元数据 duration, frameRate, width, height, err := s.getVideoMeta(ctx, ffmpegPath, videoPath) if err != nil { return nil, fmt.Errorf("获取视频元数据失败: %v", err) } // 2. 场景检测 sceneChanges, err := s.detectScenes(ctx, ffmpegPath, videoPath, threshold) if err != nil { return nil, fmt.Errorf("场景检测失败: %v", err) } // 3. 构建场景列表 + 分析 rawScenes := s.buildScenes(sceneChanges, duration) totalDuration := duration // 4. 提取关键帧(如果需要) keyframesDir := "" if extractKeyframes { keyframesDir = filepath.Join(filepath.Dir(videoPath), "keyframes_"+filepath.Base(videoPath)) os.MkdirAll(keyframesDir, 0755) } // 构建带分析信息的场景 aspectRatio := fmt.Sprintf("%d:%d", width/gcd(width, height), height/gcd(width, height)) orientation := "横屏" if height > width { orientation = "竖屏" } fileName := filepath.Base(videoPath) if idx := strings.Index(fileName, "_"); idx > 0 { fileName = fileName[idx+1:] } // 生成场景分析 totalScenes := len(rawScenes) scenes := make([]SceneInfo, totalScenes) shotDist := make(map[string]int) motionDist := make(map[string]int) compDist := make(map[string]int) var durTotal float64 for i, rs := range rawScenes { scene := SceneInfo{ SceneIndex: rs.SceneIndex, StartTime: round3(rs.StartTime), EndTime: round3(rs.EndTime), Duration: round3(rs.Duration), StartTimeStr: rs.StartTimeStr, EndTimeStr: rs.EndTimeStr, DurationStr: rs.DurationStr, } // 镜头类型 scene.ShotType = classifyShotType(rs.Duration) shotDist[scene.ShotType]++ // 运动程度 scene.MotionLevel = classifyMotionLevel(rs.Duration, totalDuration) motionDist[scene.MotionLevel]++ // 构图 scene.Composition = classifyComposition(rs.Duration, width, height) compDist[scene.Composition]++ // 叙事位置 ratio := rs.StartTime / totalDuration switch { case ratio < 0.15: scene.NarrativePos = "开头引入" case ratio < 0.35: scene.NarrativePos = "前段发展" case ratio < 0.65: scene.NarrativePos = "中段高潮" case ratio < 0.85: scene.NarrativePos = "后段收束" default: scene.NarrativePos = "结尾总结" } // 关键帧 if extractKeyframes && keyframesDir != "" { midTime := (rs.StartTime + rs.EndTime) / 2 kfPath := filepath.Join(keyframesDir, fmt.Sprintf("scene_%03d.jpg", rs.SceneIndex)) if kfErr := s.extractKeyframe(ctx, ffmpegPath, videoPath, midTime, kfPath); kfErr == nil { scene.Keyframe = &KeyframeInfo{ Path: kfPath, TimeStr: formatTime(midTime), Width: width, Height: height, } } } // AI 描述 scene.Description = buildSceneDescription(scene) durTotal += rs.Duration scenes[i] = scene } analysis := &VideoSceneAnalysis{ FileName: fileName, FilePath: videoPath, Duration: round3(totalDuration), DurationStr: formatTime(totalDuration), FrameRate: round3(frameRate), Width: width, Height: height, AspectRatio: aspectRatio, Orientation: orientation, TotalScenes: totalScenes, Scenes: scenes, DetectParams: DetectParams{ Threshold: threshold, Method: "ffmpeg scene filter", ExtractKeyframes: extractKeyframes, }, Summary: s.buildSummary(scenes, shotDist, motionDist, compDist, keyframesDir), } return analysis, nil } // buildSummary 构建场景总览 func (s *SceneAnalyzerService) buildSummary(scenes []SceneInfo, shotDist, motionDist, compDist map[string]int, kfDir string) SceneSummary { if len(scenes) == 0 { return SceneSummary{} } var minD, maxD, sumD float64 minD = math.MaxFloat64 for _, sc := range scenes { sumD += sc.Duration if sc.Duration < minD { minD = sc.Duration } if sc.Duration > maxD { maxD = sc.Duration } } avgD := sumD / float64(len(scenes)) pacing := "平稳" if avgD < 2 { pacing = "快节奏(快速剪辑)" } else if avgD < 4 { pacing = "适中节奏" } else if avgD < 8 { pacing = "舒缓节奏" } else { pacing = "慢节奏(长镜头为主)" } sm := SceneSummary{ AvgShotDuration: round3(avgD), MinShotDuration: round3(minD), MaxShotDuration: round3(maxD), ShotTypeDist: shotDist, MotionDist: motionDist, CompositionDist: compDist, Pacing: pacing, } if kfDir != "" { sm.KeyframesDir = kfDir } return sm } // getVideoMeta 获取视频元数据 func (s *SceneAnalyzerService) getVideoMeta(ctx context.Context, ffmpegPath, videoPath string) (duration, frameRate float64, width, height int, err error) { ffprobePath := filepath.Join(filepath.Dir(ffmpegPath), "ffprobe") if _, statErr := os.Stat(ffprobePath); os.IsNotExist(statErr) { ffprobePath = "ffprobe" } cmd := exec.CommandContext(ctx, ffprobePath, "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", videoPath, ) output, execErr := cmd.Output() if execErr != nil { err = fmt.Errorf("ffprobe 执行失败: %v", execErr) return } text := string(output) duration = parseJSONFloat(text, `"duration":`) frameRate = parseFrameRate(text) width = parseJSONInt(text, `"width":`) height = parseJSONInt(text, `"height":`) return } // detectScenes 通过 ffmpeg scene filter 检测场景变化 func (s *SceneAnalyzerService) detectScenes(ctx context.Context, ffmpegPath, videoPath string, threshold float64) ([]float64, error) { thresholdStr := strconv.FormatFloat(threshold, 'f', 1, 64) args := []string{ "-i", videoPath, "-filter:v", fmt.Sprintf("select='gt(scene,%s)',showinfo", thresholdStr), "-f", "null", "-", } cmd := exec.CommandContext(ctx, ffmpegPath, args...) output, _ := cmd.CombinedOutput() var timestamps []float64 scanner := bufio.NewScanner(strings.NewReader(string(output))) for scanner.Scan() { line := scanner.Text() matches := ptsTimeRegex.FindStringSubmatch(line) if len(matches) >= 2 { ts, parseErr := strconv.ParseFloat(matches[1], 64) if parseErr == nil && ts > 0 { timestamps = append(timestamps, ts) } } } return timestamps, nil } // extractKeyframe 提取指定时间点的关键帧 func (s *SceneAnalyzerService) extractKeyframe(ctx context.Context, ffmpegPath, videoPath string, timeSec float64, outputPath string) error { timeStr := strconv.FormatFloat(timeSec, 'f', 3, 64) args := []string{ "-ss", timeStr, "-i", videoPath, "-vframes", "1", "-q:v", "3", "-y", outputPath, } cmd := exec.CommandContext(ctx, ffmpegPath, args...) return cmd.Run() } // buildScenes 根据场景变化时间戳构建场景列表 func (s *SceneAnalyzerService) buildScenes(sceneChanges []float64, totalDuration float64) []SceneInfo { var scenes []SceneInfo if len(sceneChanges) == 0 { scenes = append(scenes, SceneInfo{ SceneIndex: 1, StartTime: 0, EndTime: totalDuration, Duration: totalDuration, StartTimeStr: formatTime(0), EndTimeStr: formatTime(totalDuration), DurationStr: formatTime(totalDuration), }) return scenes } startTime := 0.0 for i, ts := range sceneChanges { if ts <= startTime || ts > totalDuration { continue } scenes = append(scenes, SceneInfo{ SceneIndex: i + 1, StartTime: startTime, EndTime: ts, Duration: ts - startTime, StartTimeStr: formatTime(startTime), EndTimeStr: formatTime(ts), DurationStr: formatTime(ts - startTime), }) startTime = ts } if startTime < totalDuration { scenes = append(scenes, SceneInfo{ SceneIndex: len(scenes) + 1, StartTime: startTime, EndTime: totalDuration, Duration: totalDuration - startTime, StartTimeStr: formatTime(startTime), EndTimeStr: formatTime(totalDuration), DurationStr: formatTime(totalDuration - startTime), }) } return scenes } // ---------- 镜头分类逻辑 ---------- // classifyShotType 根据时长判断镜头类型 func classifyShotType(duration float64) string { switch { case duration < 0.8: return "极速闪切" case duration < 1.5: return "快速切换" case duration < 2.5: return "短镜头" case duration < 4: return "标准镜头" case duration < 8: return "中长镜头" case duration < 15: return "长镜头" default: return "超长镜头" } } // classifyMotionLevel 基于时长和相对比例推断运动程度 func classifyMotionLevel(duration, totalDuration float64) string { switch { case duration < 1.0: return "高动态(快速切换)" case duration < 2.0: return "中高动态" case duration < 4.0: return "中等动态" case duration < 8.0: return "低动态(平稳)" default: return "静态/固定机位" } } // classifyComposition 基于时长和画面比例推断构图类型 func classifyComposition(duration float64, width, height int) string { isVertical := height > width switch { case duration < 1.2: if isVertical { return "竖屏特写/细节" } return "特写/细节" case duration < 2.5: if isVertical { return "竖屏近景" } return "近景/中近景" case duration < 5: if isVertical { return "竖屏中景" } return "中景/半身" case duration < 10: if isVertical { return "竖屏全景" } return "全景/环境" default: if isVertical { return "竖屏远景/固定机位" } return "远景/广角" } } // buildSceneDescription 生成可读的场景描述(供 AI 使用) func buildSceneDescription(scene SceneInfo) string { return fmt.Sprintf( "场景%d:%s~%s,时长%s,%s,%s,%s,%s", scene.SceneIndex, scene.StartTimeStr, scene.EndTimeStr, scene.DurationStr, scene.ShotType, scene.Composition, scene.MotionLevel, scene.NarrativePos, ) } // ---------- 工具函数 ---------- func round3(v float64) float64 { return math.Round(v*1000) / 1000 } func gcd(a, b int) int { for b != 0 { a, b = b, a%b } return a } func getFFmpegPath() (string, error) { ffmpegPath := g.Cfg().MustGet(context.Background(), "ffmpeg.path", "").String() if ffmpegPath != "" { if _, err := os.Stat(ffmpegPath); err == nil { return ffmpegPath, nil } } path, err := exec.LookPath("ffmpeg") if err != nil { return "", fmt.Errorf("未找到 ffmpeg") } return path, nil } func formatTime(seconds float64) string { h := int(seconds) / 3600 m := (int(seconds) % 3600) / 60 s := int(seconds) % 60 ms := int(math.Round((seconds - float64(int(seconds))) * 1000)) return fmt.Sprintf("%02d:%02d:%02d.%03d", h, m, s, ms) } func parseJSONFloat(text, key string) float64 { idx := strings.Index(text, key) if idx < 0 { return 0 } start := idx + len(key) for start < len(text) && (text[start] == ' ' || text[start] == '"') { start++ } end := start for end < len(text) && (isDigit(text[end]) || text[end] == '.') { end++ } if start < end { val, _ := strconv.ParseFloat(text[start:end], 64) return val } return 0 } func parseJSONInt(text, key string) int { idx := strings.Index(text, key) if idx < 0 { return 0 } start := idx + len(key) for start < len(text) && (text[start] == ' ' || text[start] == '"') { start++ } end := start for end < len(text) && isDigit(text[end]) { end++ } if start < end { val, _ := strconv.Atoi(text[start:end]) return val } return 0 } func parseFrameRate(text string) float64 { for _, key := range []string{`"r_frame_rate":`, `"avg_frame_rate":`} { idx := strings.Index(text, key) if idx < 0 { continue } start := idx + len(key) for start < len(text) && (text[start] == ' ' || text[start] == '"') { start++ } end := start for end < len(text) && text[end] != '"' && text[end] != ',' && text[end] != '}' && text[end] != ' ' { end++ } valStr := text[start:end] if strings.Contains(valStr, "/") { parts := strings.Split(valStr, "/") if len(parts) == 2 { num, _ := strconv.ParseFloat(parts[0], 64) den, _ := strconv.ParseFloat(parts[1], 64) if den > 0 { return num / den } } } val, _ := strconv.ParseFloat(valStr, 64) if val > 0 { return val } } return 0 } func isDigit(b byte) bool { return b >= '0' && b <= '9' } // Cleanup 清理视频和关键帧文件 func Cleanup(paths []string) { for _, p := range paths { os.RemoveAll(p) } } // getFFmpegPath on SceneAnalyzerService func (s *SceneAnalyzerService) getFFmpegPath() (string, error) { return getFFmpegPath() }