Aller au contenu

Architecture Base de Données

PostgreSQL 16 avec pgvector pour les embeddings.

Modèles (13)

erDiagram
    TENANT ||--o{ USER : has
    USER ||--o{ CONVERSATION : owns
    CONVERSATION ||--o{ MESSAGE : contains
    USER ||--o{ DOCUMENT : uploads
    DOCUMENT ||--o{ EMBEDDING : has
    USER ||--o{ FOLDER : organizes

    TENANT {
        uuid id PK
        string name
        jsonb settings
    }

    USER {
        uuid id PK
        uuid tenant_id FK
        string email
        string role
    }

    CONVERSATION {
        uuid id PK
        uuid user_id FK
        uuid tenant_id
        string title
        text system_prompt
    }

    MESSAGE {
        uuid id PK
        uuid conversation_id FK
        string role
        text content
        jsonb metadata
    }

    DOCUMENT {
        uuid id PK
        uuid user_id FK
        uuid folder_id FK
        string filename
        string status
    }

    EMBEDDING {
        uuid id PK
        uuid document_id FK
        text content
        vector embedding
    }

Row-Level Security (RLS)

-- Activation RLS
ALTER TABLE conversations ENABLE ROW LEVEL SECURITY;

-- Politique d'isolation
CREATE POLICY tenant_isolation ON conversations
    USING (tenant_id = current_setting('app.current_tenant_id')::uuid);

pgvector

-- Extension
CREATE EXTENSION vector;

-- Table embeddings
CREATE TABLE embeddings (
    id UUID PRIMARY KEY,
    document_id UUID REFERENCES documents(id),
    content TEXT,
    embedding vector(1536)  -- OpenAI dimension
);

-- Index IVFFlat
CREATE INDEX ON embeddings
    USING ivfflat (embedding vector_cosine_ops)
    WITH (lists = 100);

Recherche Sémantique

SELECT
    d.filename,
    e.content,
    1 - (e.embedding <=> query_embedding) as similarity
FROM embeddings e
JOIN documents d ON e.document_id = d.id
WHERE tenant_id = $1
ORDER BY e.embedding <=> query_embedding
LIMIT 5;

Connection Pooling

engine = create_async_engine(
    DATABASE_URL,
    pool_size=20,        # Connexions maintenues
    max_overflow=40,     # Burst to 60
    pool_pre_ping=True   # Health check
)