feat: 支持多数据库配置与PGVector检索

This commit is contained in:
2026-04-03 17:59:05 +08:00
parent 86c2b7d66e
commit 026beea4d9
12 changed files with 304 additions and 182 deletions

View File

@@ -134,9 +134,9 @@ CREATE TABLE IF NOT EXISTS rag_knowledge_keyword (
);
-- 唯一索引:保证 租户 + 数据集 + 文档 + 关键词 全局唯一
CREATE UNIQUE INDEX uk_rag_knowledge_keyword_tenant_dataset_doc_word
ON rag_knowledge_keyword(tenant_id, dataset_id, document_id, word)
WHERE deleted_at IS NULL;
-- CREATE UNIQUE INDEX uk_rag_knowledge_keyword_tenant_dataset_doc_word
-- ON rag_knowledge_keyword(tenant_id, dataset_id, document_id, word)
-- WHERE deleted_at IS NULL;
-- 索引(按业务高频查询)
CREATE INDEX idx_keyword_tenant_id ON rag_knowledge_keyword(tenant_id);
@@ -159,4 +159,119 @@ COMMENT ON COLUMN rag_knowledge_keyword.document_id IS '文档ID';
COMMENT ON COLUMN rag_knowledge_keyword.word IS '关键词';
COMMENT ON COLUMN rag_knowledge_keyword.weight IS '权重';
--------------------pgsql创建rag_knowledge_keyword表语句---------------------------
--------------------pgsql创建rag_knowledge_keyword表语句---------------------------
--------------------pgsql创建rag_vector_dataset_index表语句---------------------------
-- 向量数据集索引表
CREATE TABLE IF NOT EXISTS rag_vector_dataset_index (
-- 基础字段
id BIGINT PRIMARY KEY, -- 主键ID非自增
tenant_id BIGINT NOT NULL DEFAULT 0, -- 租户ID int8
creator VARCHAR(64) NOT NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updater VARCHAR(64) NOT NULL,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
deleted_at timestamp(6),
-- 核心字段
dataset_id INT8 NOT NULL,
name VARCHAR(255) NOT NULL,
collection VARCHAR(255) NOT NULL,
dimension INT NOT NULL,
field_type VARCHAR(50) NOT NULL,
metric_type VARCHAR(50) NOT NULL,
status SMALLINT NOT NULL DEFAULT 1, -- 状态1启用/0停用
vector_count INT8 NOT NULL DEFAULT 0,
description TEXT
);
-- 唯一约束
ALTER TABLE rag_vector_dataset_index ADD CONSTRAINT uk_dataset_id_name UNIQUE (dataset_id, name);
-- 索引
CREATE INDEX idx_dataset_index_tenant_id ON rag_vector_dataset_index(tenant_id);
CREATE INDEX idx_dataset_index_dataset_id ON rag_vector_dataset_index(dataset_id);
CREATE INDEX idx_dataset_index_status ON rag_vector_dataset_index(status);
-- 注释
COMMENT ON TABLE rag_vector_dataset_index IS '向量数据集索引表';
COMMENT ON COLUMN rag_vector_dataset_index.id IS '主键ID非自增';
COMMENT ON COLUMN rag_vector_dataset_index.tenant_id IS '租户ID';
COMMENT ON COLUMN rag_vector_dataset_index.creator IS '创建人';
COMMENT ON COLUMN rag_vector_dataset_index.created_at IS '创建时间';
COMMENT ON COLUMN rag_vector_dataset_index.updater IS '更新人';
COMMENT ON COLUMN rag_vector_dataset_index.updated_at IS '更新时间';
COMMENT ON COLUMN rag_vector_dataset_index.deleted_at IS '删除时间(软删)';
COMMENT ON COLUMN rag_vector_dataset_index.dataset_id IS '数据集ID';
COMMENT ON COLUMN rag_vector_dataset_index.name IS '索引名称';
COMMENT ON COLUMN rag_vector_dataset_index.collection IS '向量集合名称';
COMMENT ON COLUMN rag_vector_dataset_index.dimension IS '向量维度';
COMMENT ON COLUMN rag_vector_dataset_index.field_type IS '字段类型';
COMMENT ON COLUMN rag_vector_dataset_index.metric_type IS '度量类型';
COMMENT ON COLUMN rag_vector_dataset_index.status IS '状态';
COMMENT ON COLUMN rag_vector_dataset_index.vector_count IS '向量数量';
COMMENT ON COLUMN rag_vector_dataset_index.description IS '描述';
--------------------pgsql创建rag_vector_dataset_index表语句---------------------------
--------------------pgsql创建rag_vector_document_chunk表语句---------------------------
CREATE EXTENSION IF NOT EXISTS vector;
-- 文档分块向量表
CREATE TABLE IF NOT EXISTS rag_vector_document_chunk (
-- 基础字段
id BIGINT PRIMARY KEY, -- 主键ID非自增
tenant_id BIGINT NOT NULL DEFAULT 0, -- 租户ID int8
creator VARCHAR(64) NOT NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updater VARCHAR(64) NOT NULL,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
deleted_at timestamp(6),
-- 核心字段
status SMALLINT NOT NULL DEFAULT 1, -- 状态1启用/0停用
vector_status SMALLINT NOT NULL DEFAULT 1, -- 向量化状态: 1pending, 2processing, 3completed, 4failed,5partCompleted
dataset_id INT8 NOT NULL,
document_id INT8 NOT NULL,
content TEXT NOT NULL,
content_hash VARCHAR(128) NOT NULL,
chunk_index INT8 NOT NULL,
-- 向量字段pgvector
vector vector(1024) NOT NULL,
-- 扩展信息
metadata JSONB
);
-- 索引
CREATE INDEX idx_chunk_tenant_id ON rag_vector_document_chunk(tenant_id);
CREATE INDEX idx_chunk_dataset_id ON rag_vector_document_chunk(dataset_id);
CREATE INDEX idx_chunk_document_id ON rag_vector_document_chunk(document_id);
CREATE INDEX idx_chunk_content_hash ON rag_vector_document_chunk(content_hash);
CREATE INDEX idx_chunk_status ON rag_vector_document_chunk(status);
CREATE INDEX idx_chunk_vector_status ON rag_vector_document_chunk(vector_status);
-- 注释
COMMENT ON TABLE rag_vector_document_chunk IS '文档分块向量表';
COMMENT ON COLUMN rag_vector_document_chunk.id IS '主键ID非自增';
COMMENT ON COLUMN rag_vector_document_chunk.tenant_id IS '租户ID';
COMMENT ON COLUMN rag_vector_document_chunk.creator IS '创建人';
COMMENT ON COLUMN rag_vector_document_chunk.created_at IS '创建时间';
COMMENT ON COLUMN rag_vector_document_chunk.updater IS '更新人';
COMMENT ON COLUMN rag_vector_document_chunk.updated_at IS '更新时间';
COMMENT ON COLUMN rag_vector_document_chunk.deleted_at IS '删除时间(软删)';
COMMENT ON COLUMN rag_vector_document_chunk.status IS '状态';
COMMENT ON COLUMN rag_vector_document_chunk.vector_status IS '向量生成状态';
COMMENT ON COLUMN rag_vector_document_chunk.dataset_id IS '数据集ID';
COMMENT ON COLUMN rag_vector_document_chunk.document_id IS '文档ID';
COMMENT ON COLUMN rag_vector_document_chunk.content IS '分块内容';
COMMENT ON COLUMN rag_vector_document_chunk.content_hash IS '内容哈希';
COMMENT ON COLUMN rag_vector_document_chunk.chunk_index IS '分块序号';
COMMENT ON COLUMN rag_vector_document_chunk.vector IS '向量数据';
COMMENT ON COLUMN rag_vector_document_chunk.metadata IS '扩展元数据';
--------------------pgsql创建rag_vector_document_chunk表语句---------------------------