Building a RAG System
This guide walks through building a Retrieval-Augmented Generation (RAG) system with LatticeDB. We'll chunk documents, generate embeddings, store them with graph relationships, and query using vector search combined with graph traversal.
Architecture
A typical RAG system with LatticeDB:
- Ingest — Split documents into chunks, generate embeddings, store in the graph
- Link — Create edges between chunks, documents, authors, topics
- Retrieve — Vector search finds relevant chunks, graph traversal gathers context
- Generate — Pass retrieved context to an LLM
Step 1: Set Up the Database
from latticedb import Database, hash_embed
db = Database(
"rag.db",
create=True,
enable_vector=True,
vector_dimensions=128, # Match your embedding model's output
)
db.open()
For production, use a real embedding model via the HTTP client:
from latticedb import EmbeddingClient
client = EmbeddingClient(
"http://localhost:11434",
model="nomic-embed-text",
)
Step 2: Ingest Documents
Split documents into chunks and store them with graph relationships:
def ingest_document(db, title, author_name, chunks):
with db.write() as txn:
# Create or find the author
doc = txn.create_node(
labels=["Document"],
properties={"title": title},
)
author = txn.create_node(
labels=["Person"],
properties={"name": author_name},
)
txn.create_edge(doc.id, author.id, "AUTHORED_BY")
# Create chunks with embeddings
prev_chunk = None
for i, text in enumerate(chunks):
chunk = txn.create_node(
labels=["Chunk"],
properties={"text": text, "position": i},
)
# Store embedding
embedding = hash_embed(text, dimensions=128)
txn.set_vector(chunk.id, "embedding", embedding)
# Index for full-text search
txn.fts_index(chunk.id, text)
# Link chunk to document
txn.create_edge(chunk.id, doc.id, "PART_OF")
# Link sequential chunks
if prev_chunk is not None:
txn.create_edge(prev_chunk.id, chunk.id, "NEXT")
prev_chunk = chunk
txn.commit()
Step 3: Add Topic Links
Enrich the graph with topic relationships:
with db.write() as txn:
ml_topic = txn.create_node(
labels=["Topic"],
properties={"name": "Machine Learning"},
)
# Link documents to topics
txn.create_edge(doc.id, ml_topic.id, "ABOUT")
txn.commit()
Step 4: Query — Vector Search + Graph Context
The key advantage of LatticeDB: retrieve by similarity, then traverse the graph for additional context.
def retrieve_context(db, query_text, k=5):
"""Retrieve relevant chunks with their surrounding context."""
query_vec = hash_embed(query_text, dimensions=128)
# Find similar chunks and traverse to their documents and authors
results = db.query("""
MATCH (chunk:Chunk)-[:PART_OF]->(doc:Document)-[:AUTHORED_BY]->(author:Person)
WHERE chunk.embedding <=> $query < 0.5
RETURN chunk.text, doc.title, author.name
ORDER BY chunk.embedding <=> $query
LIMIT $k
""", parameters={"query": query_vec, "k": k})
return results
Retrieve with Neighboring Chunks
Get surrounding chunks for more context:
results = db.query("""
MATCH (chunk:Chunk)-[:PART_OF]->(doc:Document)
WHERE chunk.embedding <=> $query < 0.5
WITH chunk, doc
ORDER BY chunk.embedding <=> $query
LIMIT 5
MATCH (prev:Chunk)-[:NEXT]->(chunk)
RETURN prev.text, chunk.text, doc.title
""", parameters={"query": query_vec})
Combine Vector and Full-Text Search
results = db.query("""
MATCH (chunk:Chunk)-[:PART_OF]->(doc:Document)
WHERE chunk.embedding <=> $query < 0.5
AND chunk.text @@ $keywords
RETURN chunk.text, doc.title
ORDER BY chunk.embedding <=> $query
LIMIT 10
""", parameters={
"query": query_vec,
"keywords": "transformer attention",
})
Step 5: Pass to LLM
context = retrieve_context(db, "How does self-attention work?")
# Build prompt with retrieved context
chunks = [f"[{r['doc.title']}] {r['chunk.text']}" for r in context]
prompt = f"""Answer the question based on the following context:
{chr(10).join(chunks)}
Question: How does self-attention work?"""
# Pass to your LLM of choice
# response = llm.generate(prompt)
Batch Loading
For large datasets, use batch insert:
import numpy as np
with db.write() as txn:
# Insert 10,000 chunks at once
vectors = np.array([hash_embed(text, 128) for text in all_chunks], dtype=np.float32)
node_ids = txn.batch_insert("Chunk", vectors)
# Set properties and create edges afterward
for node_id, text in zip(node_ids, all_chunks):
txn.set_property(node_id, "text", text)
txn.fts_index(node_id, text)
txn.commit()
Performance Tips
- Use
batch_insert()for bulk loading — significantly faster than individual creates - Set
ef_searchbased on your recall requirements (64 gives 100% recall at 1M vectors) - Use
cache_size_mbto control memory usage - Index only the text fields you need to search with
fts_index() - Use parameters (
$name) to enable query plan caching