feat: 新增操作日志、任务分页查询与模型失败重试优化
- 新增操作日志表(asynch_op_log)及对应DAO,记录任务创建等操作的审计信息 - 新增任务分页查询接口(ListTask)及对应DTO、Service和DAO方法 - 优化模型调用失败重试逻辑:支持配置重试排队策略(插队到队首或队尾) - 新增临时文件存储机制,当模型调用成功但OSS上传失败时,下次仅重试OSS上传 - 模型配置新增retry_queue_max_seconds字段,控制失败重试排队策略 - 更新数据库表结构(asynch_models、asynch_task、新增asynch_op_log)及同步更新SQL - 配置文件调整:超时单位改为秒,更新服务地址和轮询间隔 - 修复模型列表查询支持按名称模糊搜索
This commit is contained in:
@@ -73,8 +73,11 @@ func (d *modelDao) GetByID(ctx context.Context, id int64) (m *entity.AsynchModel
|
||||
return
|
||||
}
|
||||
|
||||
func (d *modelDao) List(ctx context.Context, pageNum, pageSize int) (list []*entity.AsynchModel, total int64, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameModel).OrderDesc(entity.AsynchModelCol.CreatedAt)
|
||||
func (d *modelDao) List(ctx context.Context, pageNum, pageSize int, modelNameLike string) (list []*entity.AsynchModel, total int64, err error) {
|
||||
model := gfdb.DB(ctx).Model(ctx, public.TableNameModel).Where("deleted_at IS NULL").OrderDesc(entity.AsynchModelCol.CreatedAt)
|
||||
if modelNameLike != "" {
|
||||
model = model.WhereLike(entity.AsynchModelCol.ModelName, "%"+modelNameLike+"%")
|
||||
}
|
||||
if pageNum > 0 && pageSize > 0 {
|
||||
model = model.Page(pageNum, pageSize)
|
||||
}
|
||||
@@ -86,4 +89,3 @@ func (d *modelDao) List(ctx context.Context, pageNum, pageSize int) (list []*ent
|
||||
err = r.Structs(&list)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
22
dao/op_log_dao.go
Normal file
22
dao/op_log_dao.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package dao
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"model-asynch/consts/public"
|
||||
"model-asynch/model/entity"
|
||||
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
)
|
||||
|
||||
type opLogDao struct{}
|
||||
|
||||
var OpLog = &opLogDao{}
|
||||
|
||||
func (d *opLogDao) Insert(ctx context.Context, log *entity.AsynchOpLog) (id int64, err error) {
|
||||
r, err := gfdb.DB(ctx).Model(ctx, public.TableNameOpLog).Data(log).Insert()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return r.LastInsertId()
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"gitea.com/red-future/common/db/gfdb"
|
||||
"github.com/gogf/gf/v2/database/gdb"
|
||||
"github.com/gogf/gf/v2/os/gtime"
|
||||
"github.com/gogf/gf/v2/util/gconv"
|
||||
)
|
||||
|
||||
var Task = &taskDao{}
|
||||
@@ -136,6 +137,31 @@ func (d *taskDao) CountActiveByModel(ctx context.Context, modelName string) (int
|
||||
return int64(n), err
|
||||
}
|
||||
|
||||
// List 任务分页查询(受 gfdb 租户 Hook 影响)
|
||||
func (d *taskDao) List(ctx context.Context, pageNum, pageSize int, modelNameLike, taskIDLike string, state *int) (list []*entity.AsynchTask, total int64, err error) {
|
||||
m := gfdb.DB(ctx).Model(ctx, public.TableNameTask).Where("deleted_at IS NULL")
|
||||
if modelNameLike != "" {
|
||||
m = m.WhereLike(entity.AsynchTaskCol.ModelName, "%"+modelNameLike+"%")
|
||||
}
|
||||
if taskIDLike != "" {
|
||||
m = m.WhereLike(entity.AsynchTaskCol.TaskID, "%"+taskIDLike+"%")
|
||||
}
|
||||
if state != nil {
|
||||
m = m.Where(entity.AsynchTaskCol.State, *state)
|
||||
}
|
||||
m = m.OrderDesc(entity.AsynchTaskCol.CreatedAt)
|
||||
if pageNum > 0 && pageSize > 0 {
|
||||
m = m.Page(pageNum, pageSize)
|
||||
}
|
||||
r, totalInt, err := m.AllAndCount(false)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
total = gconv.Int64(totalInt)
|
||||
err = r.Structs(&list)
|
||||
return
|
||||
}
|
||||
|
||||
// ClaimPending 抢占 pending 任务(state=0),并在同一事务中更新为 running(state=1)
|
||||
// 使用 PostgreSQL: FOR UPDATE SKIP LOCKED 避免多 worker 重复消费
|
||||
func (d *taskDao) ClaimPending(ctx context.Context, batchSize int) (tasks []*entity.AsynchTask, err error) {
|
||||
|
||||
@@ -20,7 +20,7 @@ func (d *taskDao) ClaimPendingGlobal(ctx context.Context, batchSize int) (tasks
|
||||
}
|
||||
err = gfdb.DB(ctx).Transaction(ctx, func(ctx context.Context, tx gdb.TX) error {
|
||||
sql := fmt.Sprintf(
|
||||
`SELECT id, tenant_id, model_name, task_id, input_ref, request_payload
|
||||
`SELECT id, tenant_id, model_name, task_id, input_ref, request_payload, phase, tmp_file
|
||||
FROM %s
|
||||
WHERE deleted_at IS NULL AND state = 0
|
||||
ORDER BY enqueue_at ASC
|
||||
@@ -58,7 +58,7 @@ func (d *taskDao) ClaimPendingGlobal(ctx context.Context, batchSize int) (tasks
|
||||
func (d *taskDao) UpdateSuccessGlobal(ctx context.Context, id int64, ossFile, fileType string, fileSize int64, expireAt *gtime.Time) error {
|
||||
now := gtime.Now()
|
||||
_, err := gfdb.DB(ctx).Exec(ctx,
|
||||
fmt.Sprintf(`UPDATE %s SET state=2, oss_file=?, file_type=?, file_size=?, error_msg='', finished_at=?, expire_at=NULL, updated_at=? WHERE id=?`, public.TableNameTask),
|
||||
fmt.Sprintf(`UPDATE %s SET state=2, oss_file=?, file_type=?, file_size=?, error_msg='', finished_at=?, expire_at=NULL, phase=0, tmp_file='', updated_at=? WHERE id=?`, public.TableNameTask),
|
||||
ossFile, fileType, fileSize, now, now, id,
|
||||
)
|
||||
return err
|
||||
@@ -67,12 +67,31 @@ func (d *taskDao) UpdateSuccessGlobal(ctx context.Context, id int64, ossFile, fi
|
||||
func (d *taskDao) UpdateFailedGlobal(ctx context.Context, id int64, errorMsg string) error {
|
||||
now := gtime.Now()
|
||||
_, err := gfdb.DB(ctx).Exec(ctx,
|
||||
fmt.Sprintf(`UPDATE %s SET state=3, error_msg=?, finished_at=?, updated_at=? WHERE id=?`, public.TableNameTask),
|
||||
fmt.Sprintf(`UPDATE %s SET state=3, error_msg=?, finished_at=?, phase=0, tmp_file='', updated_at=? WHERE id=?`, public.TableNameTask),
|
||||
errorMsg, now, now, id,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
// UpdateFailedKeepTmpGlobal OSS 上传失败:保留 phase/tmp_file,下一轮仅重试 OSS 上传
|
||||
func (d *taskDao) UpdateFailedKeepTmpGlobal(ctx context.Context, id int64, errorMsg string) error {
|
||||
now := gtime.Now()
|
||||
_, err := gfdb.DB(ctx).Exec(ctx,
|
||||
fmt.Sprintf(`UPDATE %s SET state=3, error_msg=?, finished_at=?, phase=1, updated_at=? WHERE id=?`, public.TableNameTask),
|
||||
errorMsg, now, now, id,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
// UpdateTmpAfterModelGlobal 模型调用成功后,写入临时文件路径并标记 phase=1
|
||||
func (d *taskDao) UpdateTmpAfterModelGlobal(ctx context.Context, id int64, tmpFile string) error {
|
||||
_, err := gfdb.DB(ctx).Exec(ctx,
|
||||
fmt.Sprintf(`UPDATE %s SET phase=1, tmp_file=?, updated_at=NOW() WHERE id=?`, public.TableNameTask),
|
||||
tmpFile, id,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *taskDao) SoftDeleteByTaskIDGlobal(ctx context.Context, taskID string) error {
|
||||
_, err := gfdb.DB(ctx).Exec(ctx,
|
||||
fmt.Sprintf(`UPDATE %s SET deleted_at=NOW(), updated_at=NOW() WHERE task_id=? AND deleted_at IS NULL`, public.TableNameTask),
|
||||
@@ -113,7 +132,8 @@ func (d *taskDao) ListFailedRetryableGlobal(ctx context.Context, limit int) (lis
|
||||
}
|
||||
r, err := gfdb.DB(ctx).GetAll(ctx,
|
||||
fmt.Sprintf(`
|
||||
SELECT t.*
|
||||
SELECT t.*,
|
||||
m.retry_queue_max_seconds AS retry_queue_max_seconds
|
||||
FROM %s t
|
||||
JOIN %s m
|
||||
ON t.tenant_id = m.tenant_id
|
||||
@@ -132,11 +152,13 @@ SELECT t.*
|
||||
return
|
||||
}
|
||||
|
||||
// RequeueForRetryGlobal 将任务重新入队(state=0,enqueue_at=now),并将 retry_count +1
|
||||
func (d *taskDao) RequeueForRetryGlobal(ctx context.Context, id int64) error {
|
||||
// RequeueForRetryGlobal 将任务重新入队(state=0),并将 retry_count +1
|
||||
// enqueueAt 用于控制重试任务在队列中的位置:
|
||||
// - enqueueAt 越早,越靠前(ClaimPendingGlobal 按 enqueue_at ASC 抢占)
|
||||
func (d *taskDao) RequeueForRetryGlobal(ctx context.Context, id int64, enqueueAt time.Time) error {
|
||||
_, err := gfdb.DB(ctx).Exec(ctx,
|
||||
fmt.Sprintf(`UPDATE %s SET state=0, retry_count=retry_count+1, enqueue_at=NOW(), updated_at=NOW() WHERE id=? AND state=3 AND deleted_at IS NULL`, public.TableNameTask),
|
||||
id,
|
||||
fmt.Sprintf(`UPDATE %s SET state=0, retry_count=retry_count+1, enqueue_at=?, updated_at=NOW() WHERE id=? AND state=3 AND deleted_at IS NULL`, public.TableNameTask),
|
||||
enqueueAt, id,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user