refactor(service): 重构服务模块结构并优化模型配置

2026-05-29 17:54:19 +08:00
parent e487b4bb5e
commit d409b84b58
24 changed files with 943 additions and 1158 deletions
--- a/service/queue/auto_tune.go
+++ b/service/queue/auto_tune.go
@@ -0,0 +1,201 @@
+package queue
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"math"
+	"model-gateway/model/dto"
+
+	"model-gateway/consts/public"
+	"model-gateway/model/entity"
+
+	"gitea.com/red-future/common/db/gfdb"
+	"github.com/gogf/gf/v2/frame/g"
+)
+
+// AutoTuneResult 单次调参结果（按 model_name）
+type AutoTuneResult struct {
+	ModelName string  `json:"modelName"`      // 模型名称（asynch_models.model_name）
+	Samples   int     `json:"samples"`        // 统计样本数（窗口内 state=2/3 且 started_at/finished_at 非空的任务数量）
+	P90Exec   float64 `json:"p90ExecSeconds"` // 执行耗时 P90（秒），口径：finished_at - started_at
+
+	CapMaxConcurrency int `json:"capMaxConcurrency"` // 配置上限：asynch_models.max_concurrency（cap，不会被动态调参覆盖）
+	OldMaxConcurrency int `json:"oldMaxConcurrency"` // 调参前运行时值（Redis），若无则等于 cap
+	NewMaxConcurrency int `json:"newMaxConcurrency"` // 本次计算出的运行时值（将写入 Redis），受 ±50% 约束且不超过 cap
+
+	CapQueueLimit int `json:"capQueueLimit"` // 配置上限：asynch_models.queue_limit（cap，不会被动态调参覆盖）
+	OldQueueLimit int `json:"oldQueueLimit"` // 调参前运行时值（Redis），若无则等于 cap
+	NewQueueLimit int `json:"newQueueLimit"` // 本次计算出的运行时值（将写入 Redis），受 ±50% 约束且不超过 cap
+
+	ExpectedSeconds int `json:"expectedSeconds"` // 模型预计执行时间（秒）：asynch_models.expected_seconds（用于 queue_limit 计算绑定）
+}
+
+// AutoTune 由上层定时任务通过接口触发：
+// - 统计指定时间窗口内该模型任务的执行耗时（finished_at - started_at，取 P90）
+// - 基于吞吐与 P90 执行耗时估算 max_concurrency 的运行时值（不超过 cap）
+// - queue_limit 与 expected_seconds 绑定（允许排队时间 = expected_seconds * 2），生成运行时值（不超过 cap）
+// - 单次调整幅度限制 ±50%，写入 Redis（带 TTL）
+func AutoTune(ctx context.Context, req *dto.AutoTuneReq) (res *dto.AutoTuneRes, err error) {
+	if req == nil {
+		return nil, errors.New("request cannot be nil")
+	}
+	if req.WindowSeconds <= 0 {
+		req.WindowSeconds = 3600 // 默认1小时
+	}
+	// 1) 读取模型配置（cap），按 model_name 聚合去重（如果表里有多租户重复数据，取较大上限）
+	var modelRows []*entity.AsynchModel
+	if err := gfdb.DB(ctx).Model(ctx, public.TableNameModel).
+		Where("deleted_at IS NULL").
+		Where(entity.AsynchModelCol.Enabled, 1).
+		Scan(&modelRows); err != nil {
+		return nil, err
+	}
+	modelMap := make(map[string]*entity.AsynchModel)
+	for _, m := range modelRows {
+		if m == nil || m.ModelName == "" {
+			continue
+		}
+		cur := modelMap[m.ModelName]
+		if cur == nil {
+			modelMap[m.ModelName] = m
+			continue
+		}
+		// 取更大的 cap
+		if m.MaxConcurrency > cur.MaxConcurrency {
+			cur.MaxConcurrency = m.MaxConcurrency
+		}
+		if m.QueueLimit > cur.QueueLimit {
+			cur.QueueLimit = m.QueueLimit
+		}
+		if m.ExpectedSeconds > cur.ExpectedSeconds {
+			cur.ExpectedSeconds = m.ExpectedSeconds
+		}
+	}
+	if len(modelMap) == 0 {
+		return nil, errors.New("no models found")
+	}
+
+	// 2) 统计指定窗口：按 model_name 计算 cnt 和 P90 执行耗时
+	type statRow struct {
+		ModelName string
+		Cnt       int
+		P90Exec   float64
+	}
+	var stats []statRow
+	sql := fmt.Sprintf(`
+SELECT model_name,
+       COUNT(1) AS cnt,
+       COALESCE(percentile_cont(0.9) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (finished_at - started_at))), 0) AS p90_exec
+  FROM %s
+ WHERE deleted_at IS NULL
+   AND state IN (2,3)
+   AND started_at IS NOT NULL
+   AND finished_at IS NOT NULL
+   AND finished_at >= (NOW() - (? || ' seconds')::interval)
+ GROUP BY model_name`, public.TableNameTask)
+	r, err := gfdb.DB(ctx).GetAll(ctx, sql, req.WindowSeconds)
+	if err != nil {
+		return nil, err
+	}
+	_ = r.Structs(&stats)
+	statMap := make(map[string]statRow, len(stats))
+	for _, s := range stats {
+		statMap[s.ModelName] = s
+	}
+
+	// 3) 调参计算
+	const utilization = 0.8
+	const maxChangeRatio = 0.5 // ±50%
+	const queueFactor = 2.0    // 与 expected_seconds 绑定：W_target = expected_seconds * 2
+
+	out := make([]AutoTuneResult, 0, len(modelMap))
+	for modelName, m := range modelMap {
+		s := statMap[modelName]
+		capMax := m.MaxConcurrency
+		capQueue := m.QueueLimit
+		oldMax := GetRuntimeMaxConcurrency(ctx, modelName, capMax)
+		oldQueue := GetRuntimeQueueLimit(ctx, modelName, capQueue)
+
+		// 默认：无样本则不调整
+		if s.Cnt <= 0 || s.P90Exec <= 0 {
+			out = append(out, AutoTuneResult{
+				ModelName:         modelName,
+				Samples:           s.Cnt,
+				P90Exec:           s.P90Exec,
+				CapMaxConcurrency: capMax,
+				OldMaxConcurrency: oldMax,
+				NewMaxConcurrency: oldMax,
+				CapQueueLimit:     capQueue,
+				OldQueueLimit:     oldQueue,
+				NewQueueLimit:     oldQueue,
+				ExpectedSeconds:   m.ExpectedSeconds,
+			})
+			continue
+		}
+
+		// arrival_rate ≈ 完成数/3600
+		arrivalRate := float64(s.Cnt) / 3600.0
+
+		// desiredMax = ceil(arrivalRate * p90 / utilization)
+		desiredMax := int(math.Ceil(arrivalRate * s.P90Exec / utilization))
+		if desiredMax < 1 {
+			desiredMax = 1
+		}
+		// 单次变化幅度限制
+		minMax := int(math.Floor(float64(oldMax) * (1 - maxChangeRatio)))
+		maxMax := int(math.Ceil(float64(oldMax) * (1 + maxChangeRatio)))
+		if minMax < 1 {
+			minMax = 1
+		}
+		newMax := clampInt(desiredMax, minMax, maxMax)
+		if capMax > 0 {
+			newMax = clampInt(newMax, 1, capMax)
+		}
+		setRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName), newMax)
+
+		// queue_limit：W_target = expected_seconds * queueFactor
+		exp := m.ExpectedSeconds
+		if exp <= 0 {
+			exp = 60
+		}
+		wTarget := float64(exp) * queueFactor
+		desiredQueue := int(math.Ceil(arrivalRate*wTarget)) + newMax
+		if desiredQueue < newMax {
+			desiredQueue = newMax
+		}
+
+		newQueue := oldQueue
+		if capQueue > 0 {
+			minQ := int(math.Floor(float64(oldQueue) * (1 - maxChangeRatio)))
+			maxQ := int(math.Ceil(float64(oldQueue) * (1 + maxChangeRatio)))
+			if minQ < newMax {
+				minQ = newMax
+			}
+			if maxQ < minQ {
+				maxQ = minQ
+			}
+			newQueue = clampInt(desiredQueue, minQ, maxQ)
+			newQueue = clampInt(newQueue, newMax, capQueue)
+			setRuntimeInt(ctx, runtimeQueueLimitKey(modelName), newQueue)
+		}
+
+		out = append(out, AutoTuneResult{
+			ModelName:         modelName,
+			Samples:           s.Cnt,
+			P90Exec:           s.P90Exec,
+			CapMaxConcurrency: capMax,
+			OldMaxConcurrency: oldMax,
+			NewMaxConcurrency: newMax,
+			CapQueueLimit:     capQueue,
+			OldQueueLimit:     oldQueue,
+			NewQueueLimit:     newQueue,
+			ExpectedSeconds:   m.ExpectedSeconds,
+		})
+	}
+
+	g.Log().Infof(ctx, "[auto_tune] done models=%d windowSeconds=%d", len(out), req.WindowSeconds)
+	return &dto.AutoTuneRes{
+		List: out,
+	}, nil
+}
--- a/service/queue/queue_gate.go
+++ b/service/queue/queue_gate.go
@@ -0,0 +1,107 @@
+package queue
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"time"
+
+	"github.com/gogf/gf/v2/frame/g"
+	"github.com/gogf/gf/v2/util/gconv"
+)
+
+// ===== 严格 queue_limit：Redis 原子闸门 =====
+//
+// 背景：原来的 queue_limit 通过“Count + Insert”做近似控制，分布式并发创建时会短暂超限。
+// 目标：以 Redis Lua 脚本实现原子校验 + 入队占位，做到严格不超限。
+//
+// 计数口径与原逻辑保持一致：只统计 state=0/1（排队中/执行中）。
+// - CreateTask 成功入库后占用 1 个 slot
+// - 任务成功/失败（state->2/3）释放 slot
+// - 失败任务重试（state 3->0）需要再次占用 slot，若占位失败则暂不重试（留在 state=3，下次 cleaner 再尝试）
+//
+// 说明：为避免极端情况下“占位泄漏”导致永久占满，采用 ZSET + 过期时间的方式自动回收。
+// 只要任务实际生命周期远小于 gateTTLSeconds，就可保持严格。
+
+const (
+	queueGateKeyPrefix = "asynch:qgate:" // asynch:qgate:{modelName}
+)
+
+// Lua：清理过期 slot，然后按 limit 做原子判定并占位
+var queueGateAcquireLua = `
+local key = KEYS[1]
+local now = tonumber(ARGV[1])
+local limit = tonumber(ARGV[2])
+local expireAt = tonumber(ARGV[3])
+local member = ARGV[4]
+local keyTTL = tonumber(ARGV[5])
+
+-- 先清理过期的占位
+redis.call("ZREMRANGEBYSCORE", key, "-inf", now)
+
+local current = tonumber(redis.call("ZCARD", key) or "0")
+if current >= limit then
+  return 0
+end
+redis.call("ZADD", key, expireAt, member)
+redis.call("EXPIRE", key, keyTTL)
+return 1
+`
+
+// Lua：释放 slot（幂等）
+var queueGateReleaseLua = `
+local key = KEYS[1]
+local member = ARGV[1]
+redis.call("ZREM", key, member)
+return 1
+`
+
+func queueGateKey(modelName string) string {
+	return fmt.Sprintf("%s%s", queueGateKeyPrefix, modelName)
+}
+
+// calcGateTTLSeconds 计算闸门占位的“自动回收 TTL”
+// 取 expectedSeconds 的倍数并做上下限，避免任务异常导致永久占位。
+func calcGateTTLSeconds(expectedSeconds int) int {
+	// 默认至少 1 小时；最多 24 小时
+	minTTL := 3600
+	maxTTL := 24 * 3600
+	if expectedSeconds <= 0 {
+		return minTTL
+	}
+	ttl := int(math.Ceil(float64(expectedSeconds) * 10)) // 预计耗时 * 10 做兜底
+	if ttl < minTTL {
+		ttl = minTTL
+	}
+	if ttl > maxTTL {
+		ttl = maxTTL
+	}
+	return ttl
+}
+
+// AcquireQueueSlot 严格入队：原子占位（成功返回 true）
+func AcquireQueueSlot(ctx context.Context, modelName, taskId string, limit int, expectedSeconds int) (bool, error) {
+	if limit <= 0 {
+		return true, nil
+	}
+	key := queueGateKey(modelName)
+	now := time.Now().Unix()
+	ttl := calcGateTTLSeconds(expectedSeconds)
+	expireAt := now + int64(ttl)
+	// keyTTL 要略大于 member TTL，避免 key 先过期导致计数丢失
+	keyTTL := ttl + 60
+	r, err := g.Redis().Do(ctx, "EVAL", queueGateAcquireLua, 1, key, now, limit, expireAt, taskId, keyTTL)
+	if err != nil {
+		return false, fmt.Errorf("queue gate acquire failed: %w", err)
+	}
+	return gconv.Int(r) == 1, nil
+}
+
+// ReleaseQueueSlot 释放占位（幂等）
+func ReleaseQueueSlot(ctx context.Context, modelName, taskId string) {
+	if taskId == "" || modelName == "" {
+		return
+	}
+	key := queueGateKey(modelName)
+	_, _ = g.Redis().Do(ctx, "EVAL", queueGateReleaseLua, 1, key, taskId)
+}
--- a/service/queue/runtime_tune.go
+++ b/service/queue/runtime_tune.go
@@ -0,0 +1,82 @@
+package queue
+
+import (
+	"context"
+
+	"github.com/gogf/gf/v2/frame/g"
+	"github.com/gogf/gf/v2/util/gconv"
+)
+
+// 运行时调参存储在 Redis，不修改 asynch_models 中的 cap（最大上限）。
+// 上层每小时调用 /model/autoTune 写入运行时值；Worker/CreateTask 读取运行时值生效。
+
+const (
+	runtimeMaxCKeyPrefix  = "asynch:runtime:max_concurrency:" // + model_name
+	runtimeQueueKeyPrefix = "asynch:runtime:queue_limit:"     // + model_name
+	runtimeTTLSeconds     = 2 * 3600                          // 2小时，避免一次调参失败导致立即回退
+)
+
+func runtimeMaxConcurrencyKey(modelName string) string {
+	return runtimeMaxCKeyPrefix + modelName
+}
+func runtimeQueueLimitKey(modelName string) string {
+	return runtimeQueueKeyPrefix + modelName
+}
+
+func getRuntimeInt(ctx context.Context, key string) (int, bool) {
+	v, err := g.Redis().Do(ctx, "GET", key)
+	if err != nil || v == nil {
+		return 0, false
+	}
+	iv := gconv.Int(v)
+	if iv <= 0 {
+		return 0, false
+	}
+	return iv, true
+}
+
+func setRuntimeInt(ctx context.Context, key string, val int) {
+	if val <= 0 {
+		return
+	}
+	// SETEX key ttl val
+	_, _ = g.Redis().Do(ctx, "SETEX", key, runtimeTTLSeconds, val)
+}
+
+// GetRuntimeMaxConcurrency 返回运行时并发上限（<= cap）。若不存在运行时值，则返回 cap。
+func GetRuntimeMaxConcurrency(ctx context.Context, modelName string, cap int) int {
+	if cap <= 0 {
+		return cap
+	}
+	if v, ok := getRuntimeInt(ctx, runtimeMaxConcurrencyKey(modelName)); ok {
+		if v > cap {
+			return cap
+		}
+		return v
+	}
+	return cap
+}
+
+// GetRuntimeQueueLimit 返回运行时队列上限（<= cap）。若不存在运行时值，则返回 cap。
+func GetRuntimeQueueLimit(ctx context.Context, modelName string, cap int) int {
+	if cap <= 0 {
+		return cap
+	}
+	if v, ok := getRuntimeInt(ctx, runtimeQueueLimitKey(modelName)); ok {
+		if v > cap {
+			return cap
+		}
+		return v
+	}
+	return cap
+}
+
+func clampInt(v, minV, maxV int) int {
+	if v < minV {
+		return minV
+	}
+	if v > maxV {
+		return maxV
+	}
+	return v
+}
--- a/service/queue/semaphore.go
+++ b/service/queue/semaphore.go
@@ -0,0 +1,57 @@
+package queue
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/gogf/gf/v2/frame/g"
+	"github.com/gogf/gf/v2/util/gconv"
+)
+
+var acquireLua = `
+local current = tonumber(redis.call("GET", KEYS[1]) or "0")
+local max = tonumber(ARGV[1])
+local ttl = tonumber(ARGV[2])
+if current >= max then
+  return 0
+end
+current = redis.call("INCR", KEYS[1])
+if current == 1 then
+  redis.call("EXPIRE", KEYS[1], ttl)
+end
+if current > max then
+  redis.call("DECR", KEYS[1])
+  return 0
+end
+return 1
+`
+
+var releaseLua = `
+local current = tonumber(redis.call("DECR", KEYS[1]) or "0")
+if current <= 0 then
+  redis.call("DEL", KEYS[1])
+end
+return 1
+`
+
+// AcquireSemaphore 获取并发令牌
+func AcquireSemaphore(ctx context.Context, key string, max int, ttlSeconds int64) (bool, error) {
+	if max <= 0 {
+		// 不限制
+		return true, nil
+	}
+	if ttlSeconds <= 0 {
+		ttlSeconds = 3600
+	}
+	r, err := g.Redis().Do(ctx, "EVAL", acquireLua, 1, key, max, ttlSeconds)
+	if err != nil {
+		return false, fmt.Errorf("获取并发令牌失败: %w", err)
+	}
+	return gconv.Int(r) == 1, nil
+}
+
+// ReleaseSemaphore 释放并发令牌
+func ReleaseSemaphore(ctx context.Context, key string) error {
+	_, err := g.Redis().Do(ctx, "EVAL", releaseLua, 1, key)
+	return err
+}