Architecture Base de Données¶
PostgreSQL 16 avec pgvector pour les embeddings.
Modèles (13)¶
erDiagram
TENANT ||--o{ USER : has
USER ||--o{ CONVERSATION : owns
CONVERSATION ||--o{ MESSAGE : contains
USER ||--o{ DOCUMENT : uploads
DOCUMENT ||--o{ EMBEDDING : has
USER ||--o{ FOLDER : organizes
TENANT {
uuid id PK
string name
jsonb settings
}
USER {
uuid id PK
uuid tenant_id FK
string email
string role
}
CONVERSATION {
uuid id PK
uuid user_id FK
uuid tenant_id
string title
text system_prompt
}
MESSAGE {
uuid id PK
uuid conversation_id FK
string role
text content
jsonb metadata
}
DOCUMENT {
uuid id PK
uuid user_id FK
uuid folder_id FK
string filename
string status
}
EMBEDDING {
uuid id PK
uuid document_id FK
text content
vector embedding
} Row-Level Security (RLS)¶
-- Activation RLS
ALTER TABLE conversations ENABLE ROW LEVEL SECURITY;
-- Politique d'isolation
CREATE POLICY tenant_isolation ON conversations
USING (tenant_id = current_setting('app.current_tenant_id')::uuid);
pgvector¶
-- Extension
CREATE EXTENSION vector;
-- Table embeddings
CREATE TABLE embeddings (
id UUID PRIMARY KEY,
document_id UUID REFERENCES documents(id),
content TEXT,
embedding vector(1536) -- OpenAI dimension
);
-- Index IVFFlat
CREATE INDEX ON embeddings
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
Recherche Sémantique¶
SELECT
d.filename,
e.content,
1 - (e.embedding <=> query_embedding) as similarity
FROM embeddings e
JOIN documents d ON e.document_id = d.id
WHERE tenant_id = $1
ORDER BY e.embedding <=> query_embedding
LIMIT 5;