package extract import ( "context" "fmt" "strings" "time" "dataengine/common/report/config" "dataengine/common/report/model" "gitea.redpowerfuture.com/red-future/common/db/gfdb" "github.com/gogf/gf/v2/util/gconv" "github.com/sirupsen/logrus" ) // DailyExtractor 天级数据抽取器 type DailyExtractor struct { loader *config.ConfigLoader } // NewDailyExtractor 创建抽取器 func NewDailyExtractor() *DailyExtractor { return &DailyExtractor{ loader: config.GetLoader(), } } // ExtractDailyData 按天抽取数据(业务层定时任务调用) func (e *DailyExtractor) ExtractDailyData(ctx context.Context, businessCode, reportCode, statDate, executor string) (*model.ExtractDailyDataResp, error) { start := time.Now() logger := logrus.WithFields(logrus.Fields{ "businessCode": businessCode, "reportCode": reportCode, "statDate": statDate, }) // 1. 获取报表配置 report, err := e.loader.GetReport(ctx, businessCode, reportCode) if err != nil { return nil, fmt.Errorf("获取报表配置失败: %w", err) } // 2. 获取抽取配置 extractConfigs, err := e.loader.GetExtractConfigs(ctx, businessCode, reportCode) if err != nil { return nil, fmt.Errorf("获取抽取配置失败: %w", err) } if len(extractConfigs) == 0 { return nil, fmt.Errorf("没有可用的抽取配置") } // 3. 获取字段配置 fieldMap, err := e.loader.GetFieldMap(ctx, businessCode, reportCode) if err != nil { return nil, fmt.Errorf("获取字段配置失败: %w", err) } // 4. 确保统计宽表存在 if err := e.ensureStatTableExists(ctx, report, fieldMap); err != nil { return nil, fmt.Errorf("确保统计宽表存在失败: %w", err) } totalCount := 0 successCount := 0 failCount := 0 var lastErr error // 5. 遍历每个抽取配置 for _, ec := range extractConfigs { // 检查幂等性 exLog, err := e.loader.GetExtractLog(ctx, businessCode, reportCode, ec.ExtractCode, statDate) if err != nil { logger.Errorf("获取抽取记录失败: %v", err) } if exLog != nil && exLog.Status == model.ExtractStatusSuccess { logger.Infof("抽取配置 %s 日期 %s 已完成,跳过", ec.ExtractCode, statDate) continue } // 创建抽取记录 extractLog := &model.ExtractLog{ BusinessCode: businessCode, ReportCode: reportCode, ExtractCode: ec.ExtractCode, StatDate: statDate, ExtractType: ec.ExtractType, Status: model.ExtractStatusRunning, Executor: executor, StartTime: &start, } _ = e.loader.CreateExtractLog(ctx, extractLog) // 执行抽取 c, s, f, err := e.executeExtract(ctx, &ec, report, fieldMap, statDate) totalCount += c successCount += s failCount += f // 更新抽取记录 now := time.Now() extractLog.EndTime = &now extractLog.TotalCount = c extractLog.SuccessCount = s extractLog.FailCount = f if err != nil { extractLog.Status = model.ExtractStatusFailed extractLog.ErrorMessage = err.Error() lastErr = err logger.Errorf("抽取配置 %s 执行失败: %v", ec.ExtractCode, err) } else { extractLog.Status = model.ExtractStatusSuccess logger.Infof("抽取配置 %s 完成, 总数:%d 成功:%d 失败:%d", ec.ExtractCode, c, s, f) } if updateErr := e.loader.UpdateExtractLog(ctx, extractLog); updateErr != nil { logger.Errorf("更新抽取记录失败: %v", updateErr) } } execTime := time.Since(start).Milliseconds() logger.Infof("按天抽取完成, 总数:%d 成功:%d 失败:%d 耗时:%dms", totalCount, successCount, failCount, execTime) resp := &model.ExtractDailyDataResp{ Success: lastErr == nil, TotalCount: totalCount, SuccessCount: successCount, FailCount: failCount, ExecTimeMs: execTime, } if lastErr != nil { resp.ErrorMsg = lastErr.Error() } return resp, nil } // executeExtract 执行单个抽取配置 func (e *DailyExtractor) executeExtract(ctx context.Context, ec *model.ExtractConfig, report *model.ReportConfig, fieldMap map[string]*model.FieldConfig, statDate string) (total, success, fail int, err error) { logger := logrus.WithField("extractCode", ec.ExtractCode) // 1. 构建抽取SQL extractSQL, whereArgs, err := e.buildExtractSQL(ctx, ec, report, statDate) if err != nil { return 0, 0, 0, fmt.Errorf("构建抽取SQL失败: %w", err) } logger.Debugf("抽取SQL: %s", extractSQL) // 2. 分批抽取 batchSize := ec.BatchSize if batchSize <= 0 { batchSize = 1000 } offset := 0 for { // 添加分页 pagedSQL := fmt.Sprintf("%s LIMIT %d OFFSET %d", extractSQL, batchSize, offset) args := append(whereArgs) rows, queryErr := gfdb.DB(ctx).GetAll(ctx, pagedSQL, args...) if queryErr != nil { return total, success, fail, fmt.Errorf("抽取查询失败: %w", queryErr) } batchCount := rows.Len() if batchCount == 0 { break } // 3. 应用转换规则(仅 DIRECT 模式需注入审计字段,AGGREGATE 模式已由SQL处理) dataList := rows.List() if ec.ExtractMode != model.ExtractModeAggregate { for i := range dataList { e.applyTransformRules(ec, dataList[i]) dataList[i]["tenant_id"] = 1 dataList[i]["business_code"] = ec.BusinessCode } } // 4. 写入统计宽表 c, _, writeErr := e.batchUpsert(ctx, report.StatTableName, report.ConflictKeys, dataList) if writeErr != nil { logger.Errorf("批量写入失败 (offset=%d): %v", offset, writeErr) fail += batchCount } else { success += c } total += batchCount offset += batchSize if batchCount < batchSize { break } } return total, success, fail, nil } // buildExtractSQL 构建抽取SQL func (e *DailyExtractor) buildExtractSQL(ctx context.Context, ec *model.ExtractConfig, report *model.ReportConfig, statDate string) (string, []interface{}, error) { var args []interface{} sourceTable := ec.SourceTableName if ec.SourceTableAlias != "" { sourceTable = ec.SourceTableAlias } else { sourceTable = "s" } // 日期字段 dateField := report.DateField if dateField == "" { dateField = "stat_date" } // 判断抽取模式 mode := ec.ExtractMode if mode == "" { mode = model.ExtractModeDirect } if mode == model.ExtractModeAggregate { return e.buildAggregateExtractSQL(ec, report, sourceTable, dateField, statDate) } // === 默认 DIRECT 模式:逐行抽取 === return e.buildDirectExtractSQL(ec, report, sourceTable, dateField, statDate), args, nil } // buildDirectExtractSQL 逐行抽取模式SQL(直接映射,不做聚合) func (e *DailyExtractor) buildDirectExtractSQL(ec *model.ExtractConfig, report *model.ReportConfig, sourceTable, dateField, statDate string) string { var selectParts []string // 基础审计字段(常量注入) selectParts = append(selectParts, "0 AS id") selectParts = append(selectParts, "1 AS tenant_id") selectParts = append(selectParts, fmt.Sprintf("'%s' AS business_code", ec.BusinessCode)) selectParts = append(selectParts, "'system' AS creator") selectParts = append(selectParts, "NOW() AS created_at") selectParts = append(selectParts, "'system' AS updater") selectParts = append(selectParts, "NOW() AS updated_at") selectParts = append(selectParts, "NULL::TIMESTAMP AS deleted_at") // 日期字段 selectParts = append(selectParts, fmt.Sprintf("'%s' AS %s", statDate, dateField)) // 原始数据 selectParts = append(selectParts, "'{}'::JSONB AS raw_data") // 字段映射 for _, mapping := range ec.FieldMappings { targetField := mapping.TargetField sourceField := mapping.SourceField var expr string if mapping.TransformRule != nil { expr = e.applyTransformExpr(mapping.TransformRule, fmt.Sprintf("%s.%s", sourceTable, sourceField)) } else { expr = fmt.Sprintf("%s.%s", sourceTable, sourceField) } if mapping.DefaultValue != nil { expr = fmt.Sprintf("COALESCE(%s, '%v')", expr, mapping.DefaultValue) } selectParts = append(selectParts, fmt.Sprintf("%s AS %s", expr, targetField)) } // FROM + JOIN fromClause := e.buildFromClause(ec, sourceTable) // JOIN 字段映射 selectParts = append(selectParts, e.buildJoinFieldSelects(ec)...) // WHERE whereClause := e.buildWhereClause(ec, sourceTable, statDate) return fmt.Sprintf("SELECT %s FROM %s %s", strings.Join(selectParts, ", "), fromClause, whereClause) } // buildAggregateExtractSQL 聚合抽取模式SQL(GROUP BY + SUM/COUNT/AVG) func (e *DailyExtractor) buildAggregateExtractSQL(ec *model.ExtractConfig, report *model.ReportConfig, sourceTable, dateField, statDate string) (string, []interface{}, error) { var selectParts []string var groupByParts []string var args []interface{} // 基础审计字段(聚合模式下用常量) selectParts = append(selectParts, "ROW_NUMBER() OVER () AS id") // 伪自增ID selectParts = append(selectParts, "1 AS tenant_id") selectParts = append(selectParts, fmt.Sprintf("'%s' AS business_code", ec.BusinessCode)) selectParts = append(selectParts, "'system' AS creator") selectParts = append(selectParts, "NOW() AS created_at") selectParts = append(selectParts, "'system' AS updater") selectParts = append(selectParts, "NOW() AS updated_at") selectParts = append(selectParts, "NULL::TIMESTAMP AS deleted_at") // 日期字段(常量) selectParts = append(selectParts, fmt.Sprintf("'%s' AS %s", statDate, dateField)) // 原始数据 selectParts = append(selectParts, "'{}'::JSONB AS raw_data") // GroupByFields 集合(快速查找) gbySet := make(map[string]bool) for _, gbf := range ec.GroupByFields { gbySet[gbf] = true } // 添加 GroupBy 字段到 SELECT 和 GROUP BY for _, gbf := range ec.GroupByFields { selectParts = append(selectParts, fmt.Sprintf("%s.%s", sourceTable, gbf)) groupByParts = append(groupByParts, fmt.Sprintf("%s.%s", sourceTable, gbf)) } // 字段映射:根据 AggregateFunction 决定聚合方式 for _, mapping := range ec.FieldMappings { targetField := mapping.TargetField sourceField := mapping.SourceField // 构建源表达式 var sourceExpr string if mapping.TransformRule != nil { sourceExpr = e.applyTransformExpr(mapping.TransformRule, fmt.Sprintf("%s.%s", sourceTable, sourceField)) } else { sourceExpr = fmt.Sprintf("%s.%s", sourceTable, sourceField) } // 判断是否需要聚合 aggFunc := strings.ToUpper(mapping.AggregateFunction) if aggFunc != "" && !gbySet[sourceField] { // 聚合字段:SUM(s.xxx) / COUNT(s.xxx) / AVG(s.xxx) expr := fmt.Sprintf("%s(%s)", aggFunc, sourceExpr) if mapping.DefaultValue != nil { expr = fmt.Sprintf("COALESCE(%s, %v)", expr, mapping.DefaultValue) } selectParts = append(selectParts, fmt.Sprintf("%s AS %s", expr, targetField)) } else if gbySet[sourceField] { // GroupBy 字段不需要重复加入 SELECT(已通过 groupByFields 处理) continue } else { // 非聚合字段,也未在 GroupBy 中 → 用 MAX/MIN 取值(兼容 PG only_full_group_by) expr := fmt.Sprintf("MAX(%s)", sourceExpr) if mapping.DefaultValue != nil { expr = fmt.Sprintf("COALESCE(%s, %v)", expr, mapping.DefaultValue) } selectParts = append(selectParts, fmt.Sprintf("%s AS %s", expr, targetField)) } } // FROM + JOIN fromClause := e.buildFromClause(ec, sourceTable) // WHERE whereClause := e.buildWhereClause(ec, sourceTable, statDate) // 组合 SQL sql := fmt.Sprintf("SELECT %s FROM %s %s", strings.Join(selectParts, ", "), fromClause, whereClause) // GROUP BY if len(groupByParts) > 0 { sql += " GROUP BY " + strings.Join(groupByParts, ", ") } return sql, args, nil } // buildFromClause 构建FROM + JOIN子句 func (e *DailyExtractor) buildFromClause(ec *model.ExtractConfig, sourceTable string) string { fromClause := fmt.Sprintf("%s %s", ec.SourceTableName, sourceTable) for _, join := range ec.JoinConfigs { joinType := "LEFT JOIN" jType := strings.ToUpper(join.JoinType) if jType == "INNER" { joinType = "INNER JOIN" } else if jType == "RIGHT" { joinType = "RIGHT JOIN" } joinAlias := join.JoinAlias if joinAlias == "" { joinAlias = join.JoinTable } fromClause += fmt.Sprintf(" %s %s %s ON %s", joinType, join.JoinTable, joinAlias, join.JoinCondition) } return fromClause } // buildJoinFieldSelects 构建JOIN表的字段映射SELECT部分 func (e *DailyExtractor) buildJoinFieldSelects(ec *model.ExtractConfig) []string { var parts []string for _, join := range ec.JoinConfigs { joinAlias := join.JoinAlias if joinAlias == "" { joinAlias = join.JoinTable } for _, jm := range join.FieldMappings { targetField := jm.TargetField sourceExpr := fmt.Sprintf("%s.%s", joinAlias, jm.SourceField) if jm.TransformRule != nil { sourceExpr = e.applyTransformExpr(jm.TransformRule, sourceExpr) } parts = append(parts, fmt.Sprintf("%s AS %s", sourceExpr, targetField)) } } return parts } // buildWhereClause 构建WHERE子句 func (e *DailyExtractor) buildWhereClause(ec *model.ExtractConfig, sourceTable, statDate string) string { var whereConditions []string // 日期范围(增量抽取) if ec.ExtractType == model.ExtractTypeIncremental && ec.ExtractKeyField != "" { dateCondition := fmt.Sprintf("%s.%s::date = '%s'", sourceTable, ec.ExtractKeyField, statDate) whereConditions = append(whereConditions, dateCondition) } // 自定义过滤条件 if ec.FilterExpression != "" { whereConditions = append(whereConditions, ec.FilterExpression) } if len(whereConditions) == 0 { return "" } return "WHERE " + strings.Join(whereConditions, " AND ") } // applyTransformExpr 应用转换表达式 func (e *DailyExtractor) applyTransformExpr(rule *model.TransformRule, sourceExpr string) string { switch rule.RuleType { case "CALCULATE": if rule.Expression != "" { return strings.ReplaceAll(rule.Expression, "{source}", sourceExpr) } case "FORMAT": if rule.Format != "" { return fmt.Sprintf("TO_CHAR(%s, '%s')", sourceExpr, rule.Format) } case "MAPPING": // 在代码中运行时做映射 return sourceExpr } return sourceExpr } // applyTransformRules 应用运行时转换规则(映射等代码转换) func (e *DailyExtractor) applyTransformRules(ec *model.ExtractConfig, row map[string]interface{}) { for _, rule := range ec.TransformRules { if rule.RuleType != "MAPPING" { continue } sourceField := rule.Expression // 存储源字段名 targetField := rule.RuleCode // 存储目标字段名 if sourceVal, ok := row[sourceField]; ok { strVal := gconv.String(sourceVal) if mapped, exists := rule.Mapping[strVal]; exists { row[targetField] = mapped } } } } // ensureStatTableExists 确保统计宽表存在 func (e *DailyExtractor) ensureStatTableExists(ctx context.Context, report *model.ReportConfig, fieldMap map[string]*model.FieldConfig) error { tableName := report.StatTableName // 检查表是否存在 result, err := gfdb.DB(ctx).GetAll(ctx, "SELECT COUNT(*) FROM pg_tables WHERE tablename = $1", strings.ToLower(tableName)) if err != nil { return err } count := 0 if len(result) > 0 { count = result[0]["count"].Int() } if count == 0 { // 需要建表 return e.createStatTable(ctx, report, fieldMap) } logrus.Infof("统计宽表 %s 已存在", tableName) return nil } // createStatTable 创建统计宽表 func (e *DailyExtractor) createStatTable(ctx context.Context, report *model.ReportConfig, fieldMap map[string]*model.FieldConfig) error { var cols []string // 标准审计字段 cols = append(cols, "id BIGSERIAL PRIMARY KEY") cols = append(cols, "tenant_id BIGINT NOT NULL DEFAULT 0") cols = append(cols, "business_code VARCHAR(64) NOT NULL DEFAULT ''") cols = append(cols, "creator VARCHAR(64) DEFAULT ''") cols = append(cols, "created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()") cols = append(cols, "updater VARCHAR(64) DEFAULT ''") cols = append(cols, "updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()") cols = append(cols, "deleted_at TIMESTAMP WITH TIME ZONE") // 日期字段 dateField := report.DateField if dateField == "" { dateField = "stat_date" } cols = append(cols, fmt.Sprintf("%s VARCHAR(16) NOT NULL DEFAULT ''", dateField)) // 业务字段 for _, fc := range fieldMap { fc := fc colType := fieldTypeToPG(fc.FieldType) cols = append(cols, fmt.Sprintf("%s %s", fc.FieldCode, colType)) } // 原始数据 cols = append(cols, "raw_data JSONB DEFAULT '{}'") tableName := report.StatTableName sql := fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s (\n %s\n)", tableName, strings.Join(cols, ",\n ")) logrus.Infof("创建统计宽表: %s", tableName) if _, err := gfdb.DB(ctx).Exec(ctx, sql); err != nil { return fmt.Errorf("建表失败: %w", err) } // 冲突唯一索引 if len(report.ConflictKeys) > 0 { indexName := fmt.Sprintf("uq_%s_conflict", tableName) indexCols := strings.Join(report.ConflictKeys, ", ") indexSQL := fmt.Sprintf("CREATE UNIQUE INDEX IF NOT EXISTS %s ON %s (%s)", indexName, tableName, indexCols) if _, err := gfdb.DB(ctx).Exec(ctx, indexSQL); err != nil { logrus.Warnf("创建冲突索引失败: %v", err) } } // 字段注释 for _, fc := range fieldMap { fc := fc if fc.FieldName != "" { escaped := strings.ReplaceAll(fc.FieldName, "'", "''") commentSQL := fmt.Sprintf("COMMENT ON COLUMN %s.%s IS '%s'", tableName, fc.FieldCode, escaped) if _, err := gfdb.DB(ctx).Exec(ctx, commentSQL); err != nil { logrus.Warnf("添加字段注释失败 [%s.%s]: %v", tableName, fc.FieldCode, err) } } } return nil } // batchUpsert 批量upsert写入 func (e *DailyExtractor) batchUpsert(ctx context.Context, tableName string, conflictKeys []string, rows []map[string]interface{}) (int, []string, error) { if len(rows) == 0 { return 0, nil, nil } now := time.Now() for i := range rows { if rows[i] == nil { rows[i] = make(map[string]interface{}) } rows[i]["updated_at"] = now } batchSize := 100 total := 0 var allColumns []string for i := 0; i < len(rows); i += batchSize { end := i + batchSize if end > len(rows) { end = len(rows) } batch := rows[i:end] m := gfdb.DB(ctx).Model(ctx, tableName).Data(batch) if len(conflictKeys) > 0 { keys := make([]interface{}, len(conflictKeys)) for j, k := range conflictKeys { keys[j] = k } m = m.OnConflict(keys...) } _, err := m.Save() if err != nil { logrus.Errorf("批量写入 %s 失败: %v", tableName, err) // 逐条重试 for _, row := range batch { mm := gfdb.DB(ctx).Model(ctx, tableName).Data(row) if len(conflictKeys) > 0 { keys := make([]interface{}, len(conflictKeys)) for j, k := range conflictKeys { keys[j] = k } mm = mm.OnConflict(keys...) } if _, e := mm.Save(); e != nil { logrus.Errorf("逐条写入失败: %v", e) } else { total++ } } } else { total += len(batch) } } return total, allColumns, nil } // fieldTypeToPG 字段类型转PG类型 func fieldTypeToPG(fieldType string) string { switch fieldType { case model.FieldTypeInt: return "NUMERIC(20,0) DEFAULT 0" case model.FieldTypeFloat: return "NUMERIC(20,4) DEFAULT 0" case model.FieldTypeDate: return "VARCHAR(16) DEFAULT ''" case model.FieldTypeDatetime: return "TIMESTAMP WITH TIME ZONE" case model.FieldTypeJsonb: return "JSONB DEFAULT '{}'" default: return "VARCHAR(256) DEFAULT ''" } }