重构数据引擎

This commit is contained in:
2026-05-29 18:39:32 +08:00
parent 3ced686cb5
commit 15db71b7ba
132 changed files with 2534 additions and 26198 deletions

252
service/sync/api_client.go Normal file
View File

@@ -0,0 +1,252 @@
package sync
import (
"bytes"
"context"
"crypto/rand"
"encoding/json"
"fmt"
"io"
"math/big"
"net/http"
"net/url"
"strings"
"time"
"github.com/sirupsen/logrus"
)
// ApiResult API 调用结果
type ApiResult struct {
Body []byte
DurationMs int64
}
// ApiClient 通用 API 客户端
type ApiClient struct {
config *PlatformConfig
client *http.Client
}
// NewApiClient 创建客户端
func NewApiClient(config *PlatformConfig) *ApiClient {
timeout := 30 * time.Second
if config.RequestTimeoutMs > 0 {
timeout = time.Duration(config.RequestTimeoutMs) * time.Millisecond
}
return &ApiClient{
config: config,
client: &http.Client{Timeout: timeout},
}
}
// Get 发送 GET 请求(无参数)
func (c *ApiClient) Get(ctx context.Context, path string) (*ApiResult, error) {
return c.doRequest(ctx, "GET", path, nil, false)
}
// PostJSON 发送 POST JSON 请求
func (c *ApiClient) PostJSON(ctx context.Context, path string, body interface{}) (*ApiResult, error) {
return c.doRequest(ctx, "POST", path, body, false)
}
// Request 通用请求方法(支持 GET/POST支持参数在 query 或 body
func (c *ApiClient) Request(ctx context.Context, method, path string, params map[string]interface{}, paramsInQuery bool) (*ApiResult, error) {
if paramsInQuery {
return c.doRequest(ctx, method, path, params, true)
}
if method == "GET" {
return c.doRequest(ctx, "GET", path, params, true)
}
return c.doRequest(ctx, method, path, params, false)
}
func (c *ApiClient) doRequest(ctx context.Context, method, path string, body interface{}, paramsInQuery bool) (result *ApiResult, err error) {
maxRetries := c.config.MaxRetries
if maxRetries <= 0 {
maxRetries = 3
}
retryDelay := time.Duration(c.config.RetryDelayMs) * time.Millisecond
if retryDelay <= 0 {
retryDelay = 1 * time.Second
}
for attempt := 0; attempt <= maxRetries; attempt++ {
result, err = c.execute(ctx, method, path, body, paramsInQuery)
if err == nil {
return result, nil
}
logrus.Warnf("请求失败 (attempt %d/%d): %v", attempt+1, maxRetries+1, err)
if attempt < maxRetries {
time.Sleep(retryDelay * time.Duration(attempt+1))
}
}
return result, fmt.Errorf("请求已重试 %d 次仍失败: %w", maxRetries, err)
}
func (c *ApiClient) execute(ctx context.Context, method, path string, body interface{}, paramsInQuery bool) (*ApiResult, error) {
start := time.Now()
fullURL := c.config.GetApiUrl(path)
// 先注入认证参数
fullURL = c.applyAuthURL(fullURL)
var reqBody io.Reader
if body != nil && !paramsInQuery {
b, _ := json.Marshal(body)
reqBody = bytes.NewBuffer(b)
}
// 如果参数在查询字符串中,拼接到 URL
if body != nil && paramsInQuery {
if paramsMap, ok := body.(map[string]interface{}); ok {
fullURL = c.buildQueryURL(fullURL, paramsMap)
}
}
logrus.Infof("请求 URL: %s", fullURL)
req, err := http.NewRequestWithContext(ctx, method, fullURL, reqBody)
if err != nil {
return nil, fmt.Errorf("创建请求失败: %w", err)
}
c.applyAuthHeader(req)
req.Header.Set("User-Agent", "data-engine/1.0")
if body != nil && !paramsInQuery {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.client.Do(req)
if err != nil {
return nil, fmt.Errorf("请求失败: %w", err)
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(resp.Body)
result := &ApiResult{Body: respBody, DurationMs: time.Since(start).Milliseconds()}
if resp.StatusCode >= 400 {
return result, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody))
}
return result, nil
}
// buildQueryURL 将 params 拼接到 URL 查询参数中
// 支持数组/对象类型的值自动 JSON 序列化 + URL 编码
func (c *ApiClient) buildQueryURL(rawURL string, params map[string]interface{}) string {
parsed, _ := url.Parse(rawURL)
q := parsed.Query()
for k, v := range params {
switch val := v.(type) {
case string:
q.Set(k, val)
case bool:
if val {
q.Set(k, "true")
} else {
q.Set(k, "false")
}
case float64:
// JSON 数字反序列化默认是 float64转 int 避免科学计数法
if val == float64(int64(val)) {
q.Set(k, fmt.Sprintf("%d", int64(val)))
} else {
q.Set(k, fmt.Sprintf("%v", val))
}
case float32:
q.Set(k, fmt.Sprintf("%v", val))
case int, int8, int16, int32, int64:
q.Set(k, fmt.Sprintf("%d", val))
case uint, uint8, uint16, uint32, uint64:
q.Set(k, fmt.Sprintf("%d", val))
case []interface{}, map[string]interface{}:
// 数组或对象需要 JSON 序列化后 URL 编码
b, _ := json.Marshal(v)
q.Set(k, string(b))
default:
q.Set(k, fmt.Sprintf("%v", v))
}
}
parsed.RawQuery = q.Encode()
return parsed.String()
}
func (c *ApiClient) applyAuthURL(rawURL string) string {
cfg := c.config.AuthConfig
token := c.config.AccessToken
if cfg == nil {
return rawURL
}
tokenInQuery, _ := cfg["token_in_query"].(bool)
queryKey, _ := cfg["query_key"].(string)
if queryKey == "" {
queryKey = "access_token"
}
extraParams := make(map[string]string)
if eq, ok := cfg["extra_query_params"].(map[string]interface{}); ok {
for k, v := range eq {
val := fmt.Sprintf("%v", v)
val = strings.ReplaceAll(val, "{timestamp}", fmt.Sprintf("%d", time.Now().Unix()))
val = strings.ReplaceAll(val, "{nonce}", generateNonce())
extraParams[k] = val
}
}
if !tokenInQuery && len(extraParams) == 0 {
return rawURL
}
parsed, _ := url.Parse(rawURL)
q := parsed.Query()
if tokenInQuery && token != "" {
q.Set(queryKey, token)
}
for k, v := range extraParams {
q.Set(k, v)
}
parsed.RawQuery = q.Encode()
return parsed.String()
}
func (c *ApiClient) applyAuthHeader(req *http.Request) {
cfg := c.config.AuthConfig
token := c.config.AccessToken
if cfg != nil {
if tiq, _ := cfg["token_in_query"].(bool); tiq {
return
}
}
if token == "" {
return
}
if cfg != nil {
if h, ok := cfg["header_name"].(string); ok {
f := cfg["header_format"].(string)
if f == "" {
f = "{token}"
}
req.Header.Set(h, strings.ReplaceAll(f, "{token}", token))
return
}
}
switch c.config.AuthType {
case "OAUTH2", "TOKEN":
req.Header.Set("Authorization", "Bearer "+token)
case "API_KEY":
req.Header.Set("X-API-Key", token)
}
}
func generateNonce() string {
nanoPart := time.Now().UnixNano() % 1000000000000
r, _ := rand.Int(rand.Reader, big.NewInt(10000))
return fmt.Sprintf("%012d%04d", nanoPart, r.Int64())
}

View File

@@ -0,0 +1,116 @@
package sync
import (
"context"
"fmt"
"time"
dao "dataengine/dao/copydata"
taskDto "dataengine/model/dto/copydata"
"gitea.com/red-future/common/beans"
"github.com/gogf/gf/v2/frame/g"
"github.com/sirupsen/logrus"
)
// StartCompensation 启动补偿调度器(在后台循环执行)
func StartCompensation(ctx context.Context) {
sec := g.Cfg().MustGet(ctx, "sync.compensation_interval_seconds", 300).Int()
if sec < 10 {
sec = 300
}
interval := time.Duration(sec) * time.Second
logrus.Infof("补偿调度器启动,间隔: %v", interval)
ctx = context.WithValue(ctx, "user", &beans.User{UserName: "admin", TenantId: 1})
runCompensation(ctx)
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
runCompensation(ctx)
case <-ctx.Done():
logrus.Info("补偿调度器已停止")
return
}
}
}
func runCompensation(ctx context.Context) {
logrus.Info("=== 开始补偿扫描 ===")
tasks, err := dao.SyncTaskLog.QueryFailedTasks(ctx, &taskDto.QueryFailedTasksReq{
Status: []string{"failed"},
Limit: 50,
})
if err != nil {
logrus.Errorf("查询失败任务异常: %v", err)
return
}
if len(tasks) == 0 {
logrus.Info("当前没有需要补偿的任务")
return
}
logrus.Infof("发现 %d 个失败任务", len(tasks))
for _, task := range tasks {
if task.RetryCount >= task.MaxRetry {
logrus.Warnf("任务 %s 已达最大重试次数 %d", task.TaskID, task.MaxRetry)
dao.SyncTaskLog.Update(ctx, &taskDto.UpdateSyncTaskLogReq{
ID: task.Id,
Status: "manual_review",
ErrorMessage: fmt.Sprintf("已达最大重试次数 %d", task.MaxRetry),
})
continue
}
platformCode := task.PlatformCode
interfaceCode := task.InterfaceCode
if platformCode == "" || interfaceCode == "" {
logrus.Warnf("任务 %s 缺少 platform_code 或 interface_code跳过", task.TaskID)
continue
}
logrus.Infof("补偿: %s/%s (第 %d 次)", platformCode, interfaceCode, task.RetryCount+1)
retryCount := task.RetryCount + 1
dao.SyncTaskLog.Update(ctx, &taskDto.UpdateSyncTaskLogReq{
ID: task.Id,
Status: "retrying",
RetryCount: &retryCount,
})
_, err := SyncByConfig(ctx, platformCode, interfaceCode, false)
if err != nil {
logrus.Errorf("补偿失败: %v", err)
backoff := []int{5, 15, 30, 60, 120}
waitMin := 5
if retryCount <= len(backoff) {
waitMin = backoff[retryCount-1]
} else {
waitMin = backoff[len(backoff)-1]
}
nextRetry := time.Now().Add(time.Duration(waitMin) * time.Minute)
dao.SyncTaskLog.Update(ctx, &taskDto.UpdateSyncTaskLogReq{
ID: task.Id,
Status: "failed",
ErrorMessage: err.Error(),
ErrorCode: "COMPENSATION_FAILED",
NextRetryTime: nextRetry,
})
} else {
logrus.Infof("补偿成功: %s/%s", platformCode, interfaceCode)
now := time.Now()
dao.SyncTaskLog.Update(ctx, &taskDto.UpdateSyncTaskLogReq{
ID: task.Id,
Status: "success",
CompletedAt: now,
})
}
}
logrus.Info("=== 补偿扫描完成 ===")
}

View File

@@ -0,0 +1,70 @@
package sync
import (
"context"
"time"
"gitea.com/red-future/common/db/gfdb"
"github.com/sirupsen/logrus"
)
// InsertRows 批量写入数据
func InsertRows(ctx context.Context, tableName string, conflictKeys []string, rows []map[string]interface{}) (int, error) {
if len(rows) == 0 {
return 0, nil
}
now := time.Now()
for i := range rows {
if rows[i] == nil {
rows[i] = make(map[string]interface{})
}
if _, ok := rows[i]["created_at"]; !ok {
rows[i]["created_at"] = now
}
if _, ok := rows[i]["updated_at"]; !ok {
rows[i]["updated_at"] = now
}
}
batchSize := 100
total := 0
for i := 0; i < len(rows); i += batchSize {
end := i + batchSize
if end > len(rows) {
end = len(rows)
}
batch := rows[i:end]
m := gfdb.DB(ctx).Model(ctx, tableName).Data(batch)
if len(conflictKeys) > 0 {
keys := make([]interface{}, len(conflictKeys))
for j, k := range conflictKeys {
keys[j] = k
}
m = m.OnConflict(keys...)
}
_, err := m.Save()
if err != nil {
logrus.Errorf("批量写入 %s 失败: %v", tableName, err)
for _, row := range batch {
mm := gfdb.DB(ctx).Model(ctx, tableName).Data(row)
if len(conflictKeys) > 0 {
keys := make([]interface{}, len(conflictKeys))
for j, k := range conflictKeys {
keys[j] = k
}
mm = mm.OnConflict(keys...)
}
if _, e := mm.Save(); e != nil {
logrus.Errorf("逐条写入失败: %v", e)
} else {
total++
}
}
} else {
total += len(batch)
}
}
return total, nil
}

View File

@@ -0,0 +1,658 @@
package sync
import (
"context"
"encoding/json"
"fmt"
"strings"
"sync"
"time"
consts "dataengine/consts/public"
dao "dataengine/dao/copydata"
taskDto "dataengine/model/dto/copydata"
entity "dataengine/model/entity/dict"
"gitea.com/red-future/common/db/gfdb"
"github.com/sirupsen/logrus"
)
// SyncResult 同步结果
type SyncResult struct {
TableName string
TotalPages int
TotalRows int
InsertedRows int
Duration string
}
// PrefetchConfig 预取配置
type PrefetchConfig struct {
URL string `json:"url"`
Method string `json:"method"`
ResponsePath string `json:"response_path"`
TargetParam string `json:"target_param"`
ValueField string `json:"value_field"`
}
// SyncByConfig 执行同步
func SyncByConfig(ctx context.Context, platformCode, interfaceCode string, isFullSync bool) (*SyncResult, error) {
start := time.Now()
pm := &PlatformManager{}
platform, ifaces, err := pm.GetPlatformWithInterfaces(ctx, platformCode)
if err != nil {
return nil, fmt.Errorf("读取平台配置失败: %w", err)
}
var iface *entity.ApiInterface
for i := range ifaces {
if ifaces[i].Code == interfaceCode {
iface = &ifaces[i]
break
}
}
if iface == nil {
return nil, fmt.Errorf("未找到接口 [%s]", interfaceCode)
}
if iface.TableDefinition == nil || len(iface.TableDefinition) == 0 {
return nil, fmt.Errorf("接口 [%s] 未配置 table_definition", interfaceCode)
}
td, err := ParseTableDefinition(iface.TableDefinition)
if err != nil {
return nil, fmt.Errorf("解析表结构失败: %w", err)
}
if err := EnsureTable(ctx, td); err != nil {
return nil, fmt.Errorf("建表失败: %w", err)
}
// 检查上次同步状态(在标记 running 之前检查)
prevStatus := getSyncStatus(ctx, platformCode, interfaceCode)
lastSyncTime := int64(0)
if !isFullSync {
lastSyncTime = getLastSyncTime(ctx, platformCode, interfaceCode)
}
if prevStatus == "running" {
logrus.Warnf("检测到上次同步异常中断 [%s/%s],将重新全量同步", platformCode, interfaceCode)
lastSyncTime = 0
}
// 标记同步开始(保留 last_sync_time 不变,状态设为 running
markSyncRunning(ctx, platformCode, interfaceCode, lastSyncTime)
api := NewApiClient(platform)
prefetch := parsePrefetchConfig(iface.RequestConfig)
if prefetch != nil {
return syncWithPrefetch(ctx, api, platform, iface, ifaces, td, prefetch, isFullSync, lastSyncTime, start)
}
return syncSingleAPI(ctx, api, platform, iface, td, lastSyncTime, start)
}
// paramsInQuery 判断参数是否应放在 URL 查询字符串中
func paramsInQuery(iface *entity.ApiInterface) bool {
if iface.Method == "GET" {
return true
}
if iface.RequestConfig != nil {
if loc, _ := iface.RequestConfig["parameters_location"].(string); loc == "query" {
return true
}
}
return false
}
// syncSingleAPI 单接口分页同步
func syncSingleAPI(ctx context.Context, api *ApiClient, platform *PlatformConfig, iface *entity.ApiInterface, td *TableDefinition, lastSyncTime int64, start time.Time) (*SyncResult, error) {
pageSize := GetSyncPageSize(ctx)
if ps, ok := iface.RequestConfig["page_size"].(float64); ok {
pageSize = int(ps)
}
inQuery := paramsInQuery(iface)
method := string(iface.Method)
body := buildReqBody(iface, 1, pageSize, lastSyncTime, nil)
resp, err := api.Request(ctx, method, iface.Url, body, inQuery)
if err != nil {
recordFailure(ctx, platform.PlatformCode, iface.Code, err.Error())
return nil, fmt.Errorf("获取第一页失败: %w", err)
}
rows, totalPages, maxTime, err := parseResp(resp.Body, iface.ResponseConfig)
if err != nil {
return nil, err
}
result := &SyncResult{TableName: td.TableName, TotalPages: totalPages}
inserted, _ := savePage(ctx, td, rows)
result.InsertedRows += inserted
result.TotalRows += len(rows)
for page := 2; page <= totalPages; page++ {
body := buildReqBody(iface, page, pageSize, lastSyncTime, nil)
resp, err := api.Request(ctx, method, iface.Url, body, inQuery)
if err != nil {
logrus.Errorf("第 %d 页失败: %v", page, err)
continue
}
rows, _, mt, err := parseResp(resp.Body, iface.ResponseConfig)
if err != nil {
continue
}
inserted, _ = savePage(ctx, td, rows)
result.InsertedRows += inserted
result.TotalRows += len(rows)
if mt > maxTime {
maxTime = mt
}
time.Sleep(100 * time.Millisecond)
}
if maxTime <= 0 {
maxTime = time.Now().Unix()
}
updateSyncTime(ctx, platform.PlatformCode, iface.Code, maxTime)
result.Duration = fmt.Sprintf("%.1fs", time.Since(start).Seconds())
logrus.Infof("同步完成 - 表:%s, %d条, 写入%d条, 耗时%s", td.TableName, result.TotalRows, result.InsertedRows, result.Duration)
return result, nil
}
// syncWithPrefetch 预取模式同步(先分页拉取全部实体列表,再并发处理每个实体)
func syncWithPrefetch(ctx context.Context, api *ApiClient, platform *PlatformConfig, iface *entity.ApiInterface, allIfaces []entity.ApiInterface, td *TableDefinition, prefetch *PrefetchConfig, isFullSync bool, lastSyncTime int64, start time.Time) (*SyncResult, error) {
logrus.Infof("预取模式: %s -> %s", prefetch.URL, iface.Url)
// 1. 查找匹配 prefetch URL 的接口配置(用于获取正确的请求参数)
prefetchIface := findInterfaceByURL(allIfaces, prefetch.URL)
prefetchParams := buildPrefetchParams(iface)
if prefetchIface != nil && prefetchIface.RequestConfig != nil {
// 使用 prefetch 目标接口的 request_config 重建参数(覆盖默认值)
for k, v := range prefetchIface.RequestConfig {
if k == "headers" || k == "prefetch" || k == "page_param" ||
k == "page_size_param" || k == "time_field" || k == "parameters_location" ||
k == "filtering" || k == "group_by" || k == "date_range" {
continue
}
prefetchParams[k] = v
}
}
method := strings.ToUpper(prefetch.Method)
inQuery := paramsInQuery(iface)
allEntities := make([]interface{}, 0)
allRows := make([]map[string]interface{}, 0)
prefetchPage := 1
prefetchTotalPages := 1
for prefetchPage <= prefetchTotalPages {
params := make(map[string]interface{})
for k, v := range prefetchParams {
params[k] = v
}
pageParam := "page"
if p, ok := iface.RequestConfig["page_param"].(string); ok {
pageParam = p
}
params[pageParam] = prefetchPage
resp, err := api.Request(ctx, method, prefetch.URL, params, true)
if err != nil {
return nil, fmt.Errorf("预取第 %d 页失败: %w", prefetchPage, err)
}
entities, _, _, err := parseResp(resp.Body, nil)
if err != nil {
return nil, fmt.Errorf("解析预取第 %d 页响应失败: %w", prefetchPage, err)
}
// 收集完整数据行(用于存库)和提取的 ID 值(用于遍历)
for _, item := range entities {
allRows = append(allRows, item)
if prefetch.ValueField == "" {
allEntities = append(allEntities, item)
} else if v, ok := item[prefetch.ValueField]; ok {
// 将 float64 转 int64避免后续 URL 参数中出现科学计数法
if f, ok := v.(float64); ok {
allEntities = append(allEntities, int64(f))
} else {
allEntities = append(allEntities, v)
}
}
}
if prefetchPage == 1 {
if tp := getTotalPages(resp.Body); tp > 0 {
prefetchTotalPages = tp
} else {
break
}
}
prefetchPage++
time.Sleep(50 * time.Millisecond)
}
if len(allEntities) == 0 {
logrus.Warn("预取结果为空列表,跳过同步")
return &SyncResult{TableName: td.TableName, Duration: fmt.Sprintf("%.1fs", time.Since(start).Seconds())}, nil
}
logrus.Infof("预取到 %d 个实体(共 %d 页)", len(allEntities), prefetchPage-1)
// 2. 将预取的数据也存入库(如账户列表存入 tencent_account_relation
if prefetchIface != nil && prefetchIface.TableDefinition != nil {
prefetchTd, err := ParseTableDefinition(prefetchIface.TableDefinition)
if err == nil {
if ensureErr := EnsureTable(ctx, prefetchTd); ensureErr == nil {
saved, _ := savePage(ctx, prefetchTd, allRows)
logrus.Infof("预取数据已存库: %s, %d 条", prefetchTd.TableName, saved)
}
}
}
// 2. 并发处理每个实体的数据
result := &SyncResult{TableName: td.TableName}
pageSize := GetSyncPageSize(ctx)
if ps, ok := iface.RequestConfig["page_size"].(float64); ok {
pageSize = int(ps)
}
dataMethod := string(iface.Method)
concurrency := GetSyncConcurrency(ctx)
var mu sync.Mutex
var wg sync.WaitGroup
sem := make(chan struct{}, concurrency)
globalMaxTime := lastSyncTime
for idx, entityVal := range allEntities {
wg.Add(1)
sem <- struct{}{}
go func(idx int, val interface{}) {
defer wg.Done()
defer func() { <-sem }()
logrus.Infof(" 处理实体 [%d/%d]: %v", idx+1, len(allEntities), val)
page := 1
totalPages := 1
entityMaxTime := int64(0)
for page <= totalPages {
body := buildReqBody(iface, page, pageSize, lastSyncTime, map[string]interface{}{
prefetch.TargetParam: val,
})
resp, err := api.Request(ctx, dataMethod, iface.Url, body, inQuery)
if err != nil {
logrus.Errorf(" 实体 %v 第 %d 页失败: %v", val, page, err)
page++
time.Sleep(200 * time.Millisecond)
continue
}
rows, tp, mt, parseErr := parseResp(resp.Body, iface.ResponseConfig)
if parseErr != nil {
logrus.Errorf(" 解析响应失败: %v", parseErr)
page++
continue
}
if page == 1 {
totalPages = tp
}
for i := range rows {
rows[i][prefetch.TargetParam] = val
}
inserted, _ := savePage(ctx, td, rows)
mu.Lock()
result.InsertedRows += inserted
result.TotalRows += len(rows)
mu.Unlock()
if mt > entityMaxTime {
entityMaxTime = mt
}
page++
time.Sleep(100 * time.Millisecond)
}
if entityMaxTime > 0 {
mu.Lock()
if entityMaxTime > globalMaxTime {
globalMaxTime = entityMaxTime
}
mu.Unlock()
}
}(idx, entityVal)
}
wg.Wait()
if globalMaxTime <= 0 {
globalMaxTime = time.Now().Unix()
}
updateSyncTime(ctx, platform.PlatformCode, iface.Code, globalMaxTime)
result.Duration = fmt.Sprintf("%.1fs", time.Since(start).Seconds())
logrus.Infof("同步完成 - 表:%s, %d条, 写入%d条, 耗时%s", td.TableName, result.TotalRows, result.InsertedRows, result.Duration)
return result, nil
}
// getTotalPages 从响应中提取总页数
func getTotalPages(raw []byte) int {
var r struct {
Data map[string]interface{} `json:"data"`
}
if err := json.Unmarshal(raw, &r); err != nil {
return 0
}
if r.Data == nil {
return 0
}
if pi, ok := r.Data["page_info"].(map[string]interface{}); ok {
if tp, ok := pi["total_page"].(float64); ok {
return int(tp)
}
}
return 0
}
// buildPrefetchParams 构建预取接口的请求参数
func buildPrefetchParams(iface *entity.ApiInterface) map[string]interface{} {
params := make(map[string]interface{})
if iface.RequestConfig != nil {
pageParam := "page"
psParam := "page_size"
if p, ok := iface.RequestConfig["page_param"].(string); ok {
pageParam = p
}
if p, ok := iface.RequestConfig["page_size_param"].(string); ok {
psParam = p
}
params[pageParam] = 1
params[psParam] = 100
for k, v := range iface.RequestConfig {
if k == "headers" || k == "prefetch" || k == "page_param" ||
k == "page_size_param" || k == "time_field" || k == "parameters_location" ||
k == "filtering" || k == "group_by" || k == "date_range" {
continue
}
if k == pageParam || k == psParam {
continue
}
params[k] = v
}
}
return params
}
// parsePrefetchConfig 解析预取配置
func parsePrefetchConfig(requestConfig map[string]interface{}) *PrefetchConfig {
if requestConfig == nil {
return nil
}
raw, ok := requestConfig["prefetch"]
if !ok || raw == nil {
return nil
}
m, ok := raw.(map[string]interface{})
if !ok {
return nil
}
pc := &PrefetchConfig{}
if u, _ := m["url"].(string); u != "" {
pc.URL = u
} else {
return nil
}
if method, _ := m["method"].(string); method != "" {
pc.Method = method
} else {
pc.Method = "GET"
}
pc.ResponsePath, _ = m["response_path"].(string)
pc.TargetParam, _ = m["target_param"].(string)
pc.ValueField, _ = m["value_field"].(string)
return pc
}
// extractValues 从 JSON 响应中提取值列表
func extractValues(raw []byte, path, valueField string) ([]interface{}, error) {
var resp map[string]interface{}
if err := json.Unmarshal(raw, &resp); err != nil {
return nil, fmt.Errorf("JSON解析失败: %w", err)
}
parts := strings.Split(path, ".")
current := resp
for i, part := range parts {
if i == len(parts)-1 {
list, ok := current[part].([]interface{})
if !ok {
return nil, fmt.Errorf("路径 %s 不是数组", path)
}
var values []interface{}
for _, item := range list {
if valueField == "" {
values = append(values, item)
} else if m, ok := item.(map[string]interface{}); ok {
if v, exists := m[valueField]; exists {
values = append(values, v)
}
}
}
return values, nil
}
next, ok := current[part].(map[string]interface{})
if !ok {
return nil, fmt.Errorf("路径 %s 在 %s 处中断", path, part)
}
current = next
}
return nil, fmt.Errorf("路径 %s 不完整", path)
}
// buildReqBody 构建请求参数
func buildReqBody(iface *entity.ApiInterface, page, pageSize int, lastSyncTime int64, extraParams map[string]interface{}) map[string]interface{} {
body := make(map[string]interface{})
if iface.RequestConfig != nil {
for k, v := range iface.RequestConfig {
if k == "time_field" || k == "headers" || k == "prefetch" ||
k == "page_param" || k == "page_size_param" || k == "parameters_location" {
continue
}
body[k] = v
}
}
pageParam := "page"
psParam := "page_size"
if iface.RequestConfig != nil {
if p, ok := iface.RequestConfig["page_param"].(string); ok {
pageParam = p
}
if p, ok := iface.RequestConfig["page_size_param"].(string); ok {
psParam = p
}
}
body[pageParam] = page
body[psParam] = pageSize
// 增量同步:将 time_field 转为 API 期望的 filtering 格式
// 如 filtering=[{"field":"last_modified_time","operator":"GREATER_EQUALS","values":["1780037982"]}]
if lastSyncTime > 0 {
if tf, ok := iface.RequestConfig["time_field"].(string); ok && tf != "" {
timeFilter := map[string]interface{}{
"field": tf,
"operator": "GREATER_EQUALS",
"values": []interface{}{fmt.Sprintf("%d", lastSyncTime)},
}
// 合并已有的 filtering如果 request_config 中已定义其他过滤条件)
if existing, ok := body["filtering"].([]interface{}); ok {
body["filtering"] = append(existing, timeFilter)
} else {
body["filtering"] = []interface{}{timeFilter}
}
}
}
for k, v := range extraParams {
body[k] = v
}
return body
}
// parseResp 解析同步接口返回值
func parseResp(raw []byte, responseConfig map[string]interface{}) ([]map[string]interface{}, int, int64, error) {
var r struct {
Code int `json:"code"`
Message string `json:"message"`
Data map[string]interface{} `json:"data"`
}
if err := json.Unmarshal(raw, &r); err != nil {
return nil, 0, 0, fmt.Errorf("解析响应失败: %w", err)
}
if r.Code != 0 {
return nil, 0, 0, fmt.Errorf("API错误: code=%d, message=%s", r.Code, r.Message)
}
var rows []map[string]interface{}
totalPages := 1
maxTime := int64(0)
var listData []interface{}
if lp, ok := r.Data["list"]; ok {
listData, _ = lp.([]interface{})
} else if lp, ok := r.Data["data"]; ok {
if m, ok := lp.(map[string]interface{}); ok {
if l, ok := m["list"].([]interface{}); ok {
listData = l
}
}
}
for _, item := range listData {
if m, ok := item.(map[string]interface{}); ok {
j, _ := json.Marshal(m)
m["raw_data"] = string(j)
if t, ok := m["last_modified_time"].(float64); ok && int64(t) > maxTime {
maxTime = int64(t)
}
if t, ok := m["created_time"].(float64); ok && int64(t) > maxTime {
maxTime = int64(t)
}
rows = append(rows, m)
}
}
if pi, ok := r.Data["page_info"].(map[string]interface{}); ok {
if tp, ok := pi["total_page"].(float64); ok {
totalPages = int(tp)
} else if tp, ok := pi["total_page"].(int); ok {
totalPages = tp
}
}
return rows, totalPages, maxTime, nil
}
func savePage(ctx context.Context, td *TableDefinition, rows []map[string]interface{}) (int, error) {
if len(rows) == 0 {
return 0, nil
}
colSet := make(map[string]bool)
for _, c := range td.Columns {
colSet[c.Name] = true
}
var clean []map[string]interface{}
for _, row := range rows {
c := make(map[string]interface{})
for k, v := range row {
if colSet[k] {
c[k] = v
}
}
if r, ok := row["raw_data"]; ok {
c["raw_data"] = r
}
clean = append(clean, c)
}
return InsertRows(ctx, td.TableName, td.ConflictKeys, clean)
}
func getLastSyncTime(ctx context.Context, platformCode, interfaceCode string) int64 {
var t int64
gfdb.DB(ctx).Model(ctx, consts.SyncTrackerTable).
Fields("last_sync_time").
Where("platform_code", platformCode).
Where("interface_code", interfaceCode).
Scan(&t)
return t
}
func getSyncStatus(ctx context.Context, platformCode, interfaceCode string) string {
var s string
gfdb.DB(ctx).Model(ctx, consts.SyncTrackerTable).
Fields("sync_status").
Where("platform_code", platformCode).
Where("interface_code", interfaceCode).
Scan(&s)
return s
}
func markSyncRunning(ctx context.Context, platformCode, interfaceCode string, lastSyncTime int64) {
gfdb.DB(ctx).Model(ctx, consts.SyncTrackerTable).
Data(map[string]interface{}{
"platform_code": platformCode,
"interface_code": interfaceCode,
"last_sync_time": lastSyncTime,
"sync_status": "running",
}).
OnConflict("platform_code", "interface_code").
Save()
}
func updateSyncTime(ctx context.Context, platformCode, interfaceCode string, t int64) {
gfdb.DB(ctx).Model(ctx, consts.SyncTrackerTable).
Data(map[string]interface{}{
"platform_code": platformCode,
"interface_code": interfaceCode,
"last_sync_time": t,
"last_sync_at": time.Now(),
"sync_status": "success",
}).
OnConflict("platform_code", "interface_code").
Save()
}
func recordFailure(ctx context.Context, platformCode, interfaceCode, errMsg string) {
dao.SyncTaskLog.Create(ctx, &taskDto.CreateSyncTaskLogReq{
TaskID: fmt.Sprintf("%s_%s_%d", platformCode, interfaceCode, time.Now().UnixNano()),
TaskType: fmt.Sprintf("%s_%s", platformCode, interfaceCode),
PlatformCode: platformCode,
InterfaceCode: interfaceCode,
Status: "failed",
MaxRetry: 3,
RequestParams: map[string]interface{}{
"platform_code": platformCode,
"interface_code": interfaceCode,
"error": errMsg,
},
})
}
// findInterfaceByURL 在所有接口中查找匹配 URL 的接口
func findInterfaceByURL(ifaces []entity.ApiInterface, url string) *entity.ApiInterface {
for i := range ifaces {
if ifaces[i].Url == url {
return &ifaces[i]
}
}
return nil
}

43
service/sync/helpers.go Normal file
View File

@@ -0,0 +1,43 @@
package sync
import (
"context"
"github.com/gogf/gf/v2/frame/g"
)
// GetSyncPageSize 获取分页大小默认100
func GetSyncPageSize(ctx context.Context) int {
ps := g.Cfg().MustGet(ctx, "sync.page_size", 100).Int()
if ps < 1 || ps > 100 {
return 100
}
return ps
}
// GetSyncConcurrency 获取并发数默认5
func GetSyncConcurrency(ctx context.Context) int {
c := g.Cfg().MustGet(ctx, "sync.concurrency", 5).Int()
if c < 1 {
return 1
}
return c
}
// GetSyncInterval 获取同步间隔分钟默认60
func GetSyncInterval(ctx context.Context) int {
m := g.Cfg().MustGet(ctx, "sync.sync_interval_minutes", 60).Int()
if m < 5 {
return 60
}
return m
}
// GetRetryCount 获取重试次数默认3
func GetRetryCount(ctx context.Context) int {
r := g.Cfg().MustGet(ctx, "sync.retry_count", 3).Int()
if r < 0 {
return 3
}
return r
}

View File

@@ -0,0 +1,111 @@
package sync
import (
"context"
"fmt"
dao "dataengine/dao/dict"
dto "dataengine/model/dto/dict"
entity "dataengine/model/entity/dict"
"github.com/sirupsen/logrus"
)
// PlatformConfig 运行时平台配置
type PlatformConfig struct {
*entity.DatasourcePlatform
AccessToken string
}
// GetApiUrl 拼接完整 API URL
func (c *PlatformConfig) GetApiUrl(apiPath string) string {
if c.ApiBaseUrl == "" {
return apiPath
}
return c.ApiBaseUrl + apiPath
}
// PlatformManager 平台配置管理器
type PlatformManager struct{}
// GetPlatform 根据平台编码获取配置
func (m *PlatformManager) GetPlatform(ctx context.Context, platformCode string) (*PlatformConfig, error) {
platform, err := dao.DatasourcePlatform.GetByPlatformCode(ctx, platformCode)
if err != nil {
return nil, fmt.Errorf("查询平台配置失败 [%s]: %w", platformCode, err)
}
if platform == nil {
return nil, fmt.Errorf("平台不存在 [%s]", platformCode)
}
if platform.Status != "ACTIVE" {
return nil, fmt.Errorf("平台 [%s] 未启用", platformCode)
}
cfg := &PlatformConfig{DatasourcePlatform: platform}
switch platform.AuthType {
case "TOKEN":
cfg.AccessToken = platform.Token
case "OAUTH2":
if platform.Token != "" {
cfg.AccessToken = platform.Token
}
case "API_KEY":
cfg.AccessToken = platform.ApiKey
default:
logrus.Warnf("平台 %s 认证类型 %s 未处理", platformCode, platform.AuthType)
}
return cfg, nil
}
// GetInterfaces 获取平台下的活跃接口列表
func (m *PlatformManager) GetInterfaces(ctx context.Context, platformId int64) ([]entity.ApiInterface, error) {
interfaces, _, err := dao.ApiInterface.List(ctx, &dto.ListApiInterfaceReq{
PlatformId: platformId,
Status: "active",
})
if err != nil {
return nil, err
}
return interfaces, nil
}
// GetInterfaceByCode 根据编码获取接口定义
func (m *PlatformManager) GetInterfaceByCode(ctx context.Context, platformId int64, code string) (*entity.ApiInterface, error) {
all, _, err := dao.ApiInterface.List(ctx, &dto.ListApiInterfaceReq{
PlatformId: platformId,
Code: code,
Status: "active",
})
if err != nil {
return nil, err
}
if len(all) == 0 {
return nil, fmt.Errorf("未找到接口 [code=%s]", code)
}
return &all[0], nil
}
// GetPlatformWithInterfaces 获取平台及所有接口
func (m *PlatformManager) GetPlatformWithInterfaces(ctx context.Context, platformCode string) (*PlatformConfig, []entity.ApiInterface, error) {
cfg, err := m.GetPlatform(ctx, platformCode)
if err != nil {
return nil, nil, err
}
interfaces, err := m.GetInterfaces(ctx, cfg.ID)
if err != nil {
return nil, nil, err
}
return cfg, interfaces, nil
}
// FindInterfaceUrl 在接口列表中查找指定编码的 URL
func FindInterfaceUrl(ifaces []entity.ApiInterface, code string) string {
for _, iface := range ifaces {
if iface.Code == code {
return iface.Url
}
}
return ""
}

View File

@@ -0,0 +1,93 @@
package sync
import (
"context"
"time"
dao "dataengine/dao/dict"
dto "dataengine/model/dto/dict"
"gitea.com/red-future/common/beans"
"github.com/gogf/gf/v2/frame/g"
"github.com/sirupsen/logrus"
)
// StartAutoSync 启动自动同步(独立 goroutine启动后自动循环执行
func StartAutoSync(ctx context.Context) {
interval := GetSyncInterval(ctx)
logrus.Infof("自动同步调度器启动,间隔: %d 分钟", interval)
// 首次执行:根据 sync_tracker 是否有记录自动判断全量/增量
// 无记录 → 全量,有记录 → 增量
runAutoSync(ctx)
ticker := time.NewTicker(time.Duration(interval) * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
runAutoSync(ctx)
case <-ctx.Done():
logrus.Info("自动同步调度器已停止")
return
}
}
}
func runAutoSync(ctx context.Context) {
logrus.Info("=== 开始自动同步 ===")
// 注入用户上下文ORM 框架需要用于租户隔离)
ctx = context.WithValue(ctx, "user", &beans.User{UserName: "admin", TenantId: 1})
// 查询所有 ACTIVE 平台
platforms, _, err := dao.DatasourcePlatform.List(ctx, &dto.ListDatasourcePlatformReq{
Status: "ACTIVE",
})
if err != nil {
logrus.Errorf("查询平台列表失败: %v", err)
return
}
for _, p := range platforms {
// 查询该平台下有 table_definition 的接口
interfaces, _, err := dao.ApiInterface.List(ctx, &dto.ListApiInterfaceReq{
PlatformId: p.ID,
Status: "active",
})
if err != nil {
logrus.Errorf("查询接口列表失败 [platform=%s]: %v", p.PlatformCode, err)
continue
}
for _, iface := range interfaces {
if iface.TableDefinition == nil || len(iface.TableDefinition) == 0 {
continue
}
logrus.Infof("自动同步: %s / %s", p.PlatformCode, iface.Code)
// isFullSync=false 表示去查 sync_tracker
// 有记录 → 增量,无记录 → lastSyncTime=0 → 全量
_, err := SyncByConfig(ctx, p.PlatformCode, iface.Code, false)
if err != nil {
logrus.Errorf("自动同步失败 [%s/%s]: %v", p.PlatformCode, iface.Code, err)
}
}
}
logrus.Info("=== 自动同步完成 ===")
}
// InitAndStartAutoSync 在 main 中调用:初始化配置后启动自动同步和补偿
func InitAndStartAutoSync(ctx context.Context) {
// 读取配置中的同步开关
enabled := g.Cfg().MustGet(ctx, "sync.auto_sync_enabled", true).Bool()
if enabled {
go StartAutoSync(ctx)
} else {
logrus.Info("自动同步已关闭")
}
// 补偿调度器独立启动,不受 auto_sync_enabled 控制
go StartCompensation(ctx)
}

View File

@@ -0,0 +1,103 @@
package sync
import (
"context"
"fmt"
"strings"
"gitea.com/red-future/common/db/gfdb"
"github.com/sirupsen/logrus"
)
// ColumnDef 列定义
type ColumnDef struct {
Name string `json:"name"`
Type string `json:"type"`
Comment string `json:"comment,omitempty"`
}
// TableDefinition 表结构定义
type TableDefinition struct {
TableName string `json:"table_name"`
Columns []ColumnDef `json:"columns"`
ConflictKeys []string `json:"conflict_keys,omitempty"`
}
// ParseTableDefinition 解析 table_definition JSON
func ParseTableDefinition(raw map[string]interface{}) (*TableDefinition, error) {
td := &TableDefinition{}
name, _ := raw["table_name"].(string)
if name == "" {
return nil, fmt.Errorf("table_definition 缺少 table_name")
}
td.TableName = name
colsRaw, _ := raw["columns"].([]interface{})
for _, c := range colsRaw {
cm, _ := c.(map[string]interface{})
if cm == nil {
continue
}
n, _ := cm["name"].(string)
t, _ := cm["type"].(string)
comment, _ := cm["comment"].(string)
if n == "" || t == "" {
continue
}
td.Columns = append(td.Columns, ColumnDef{Name: n, Type: t, Comment: comment})
}
if keys, _ := raw["conflict_keys"].([]interface{}); keys != nil {
for _, k := range keys {
if s, ok := k.(string); ok {
td.ConflictKeys = append(td.ConflictKeys, s)
}
}
}
if len(td.Columns) == 0 {
return nil, fmt.Errorf("table_definition 列定义为空")
}
return td, nil
}
// EnsureTable 确保表存在
func EnsureTable(ctx context.Context, td *TableDefinition) error {
sql := buildCreateSQL(td)
logrus.Infof("建表: %s", td.TableName)
_, err := gfdb.DB(ctx).Exec(ctx, sql)
if err != nil {
return fmt.Errorf("建表失败 [%s]: %w", td.TableName, err)
}
logrus.Infof("表 %s 已就绪", td.TableName)
return nil
}
func buildCreateSQL(td *TableDefinition) string {
cols := []string{
"id BIGSERIAL PRIMARY KEY",
"tenant_id BIGINT NOT NULL DEFAULT 0",
"creator VARCHAR(64) DEFAULT ''",
"created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()",
"updater VARCHAR(64) DEFAULT ''",
"updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()",
"deleted_at TIMESTAMP WITH TIME ZONE",
}
for _, c := range td.Columns {
cols = append(cols, fmt.Sprintf("%s %s", c.Name, c.Type))
}
cols = append(cols, "raw_data JSONB DEFAULT '{}'")
// 添加复合唯一索引(用于 ON CONFLICT upsert所有 conflict_keys 作为一个复合索引)
var constraints []string
if len(td.ConflictKeys) > 0 {
cols := strings.Join(td.ConflictKeys, ", ")
indexName := fmt.Sprintf("uq_%s_conflict", td.TableName)
constraints = append(constraints,
fmt.Sprintf("CREATE UNIQUE INDEX IF NOT EXISTS %s ON %s (%s)", indexName, td.TableName, cols))
}
sql := fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s (\n %s\n);\n", td.TableName, strings.Join(cols, ",\n "))
if len(constraints) > 0 {
sql += strings.Join(constraints, ";\n") + ";"
}
return sql
}