package service import ( "context" "time" "model-asynch/dao" "github.com/gogf/gf/v2/frame/g" ) var Cleaner = &cleaner{} type cleaner struct{} // RunOnce 由上层定时任务触发:执行一次清理/重试 func (c *cleaner) RunOnce(ctx context.Context) { // 1) 清理已下载(state=4)且过期的任务(硬删除 + OSS) expired, err := dao.Task.ListExpiredDownloadedGlobal(ctx, 200) if err != nil { g.Log().Errorf(ctx, "[cleaner] list expired(downloaded) error: %v", err) } else { for _, t := range expired { deleteTmpResult(t.TmpFile) _ = dao.Task.HardDeleteByIDGlobal(ctx, t.Id) } g.Log().Infof(ctx, "[cleaner] expired(downloaded) cleaned, count=%d", len(expired)) } // 2) 超时任务标失败 list, err := dao.Task.ListTimeoutTasksGlobal(ctx, 200) if err != nil { g.Log().Errorf(ctx, "[cleaner] list timeout error: %v", err) } else { for _, t := range list { _ = dao.Task.UpdateFailedGlobal(ctx, t.Id, "任务超时自动失败") ReleaseQueueSlot(ctx, t.ModelName, t.TaskID) } g.Log().Infof(ctx, "[cleaner] timeout cleaned, count=%d", len(list)) } // 3) 失败(state=3)的任务按模型配置 retry_times 重新入队(放到队尾) retryable, err := dao.Task.ListFailedRetryableGlobal(ctx, 200) if err != nil { g.Log().Errorf(ctx, "[cleaner] list failed retryable error: %v", err) } else { for _, t := range retryable { // 失败任务重新入队(state=3 -> 0)前,先严格占用 queue_limit slot;占用失败则留在失败态,下一轮再尝试 // 获取模型配置以得到 queue_limit / expected_seconds m, err := dao.Model.GetByModelNameForTenant(ctx, t.TenantId, t.ModelName) if err != nil || m == nil { continue } limit := GetRuntimeQueueLimit(ctx, t.ModelName, m.QueueLimit) if limit > 0 { ok, _ := AcquireQueueSlot(ctx, t.ModelName, t.TaskID, limit, m.ExpectedSeconds) if !ok { continue } } // retry_queue_max_seconds 控制失败重试的排队策略: // - =0:失败重试插队到队首 // - >0:当任务从创建到现在的排队时长 >= maxSeconds,则插队到队首;否则仍放到队尾 now := time.Now() enqueueAt := now maxSeconds := t.RetryQueueMaxSeconds if maxSeconds == 0 { enqueueAt = now.Add(-100 * 365 * 24 * time.Hour) } else if maxSeconds > 0 && t.CreatedAt != nil { if now.Sub(t.CreatedAt.Time) >= time.Duration(maxSeconds)*time.Second { enqueueAt = now.Add(-100 * 365 * 24 * time.Hour) } } _ = dao.Task.RequeueForRetryGlobal(ctx, t.Id, enqueueAt) } g.Log().Infof(ctx, "[cleaner] failed retryable cleaned, count=%d", len(retryable)) } // 4) 超过重试次数仍失败(state=3)的任务:硬删除 exhausted, err := dao.Task.ListFailedExhaustedGlobal(ctx, 200) if err != nil { g.Log().Errorf(ctx, "[cleaner] list failed exhausted error: %v", err) } else { for _, t := range exhausted { deleteTmpResult(t.TmpFile) // 重试耗尽硬删除:释放闸门占位(兜底,若此前已释放则幂等) ReleaseQueueSlot(ctx, t.ModelName, t.TaskID) _ = dao.Task.HardDeleteByIDGlobal(ctx, t.Id) } g.Log().Infof(ctx, "[cleaner] failed exhausted cleaned, count=%d", len(exhausted)) } }