Embedding-Based Recommendation Systems: Beyond Collaborative Filtering
Why Embeddings Beat Traditional Recommendations
Collaborative filtering: "Users who liked X also liked Y"
Problem: Needs lots of data, cold-start issues, can't handle new items
Embedding-based: "Items semantically similar to what you engage with"
Advantage: Works from day 1, handles new content, captures deeper patterns
Building an Embedding Recommendation Engine
Generate Content Embeddings
from sentence_transformers import SentenceTransformer
import numpy as np
model = SentenceTransformer('all-MiniLM-L6-v2')
def generate_content_embedding(item: dict) -> np.ndarray:
"""Create vector representation of content"""
# Combine relevant fields
text = f"""
Title: {item['title']}
Description: {item['description']}
Category: {item['category']}
Tags: {', '.join(item['tags'])}
"""
embedding = model.encode(text)
return embedding
Build Similarity Index
import faiss
class RecommendationEngine:
def __init__(self, dimension=384):
self.index = faiss.IndexFlatIP(dimension)
self.items = []
def add_items(self, items: list):
"""Index all content"""
embeddings = []
for item in items:
emb = generate_content_embedding(item)
embeddings.append(emb)
self.items.append(item)
embeddings = np.array(embeddings)
faiss.normalize_L2(embeddings)
self.index.add(embeddings)
def recommend(self, user_id: str, k=10):
"""Get personalized recommendations"""
user_embedding = generate_user_embedding(user_id)
user_embedding = user_embedding.reshape(1, -1)
faiss.normalize_L2(user_embedding)
scores, indices = self.index.search(user_embedding, k)
return [
{
'item': self.items[idx],
'score': score
}
for score, idx in zip(scores[0], indices[0])
]
User Embeddings from Behavior
def generate_user_embedding(user_id: str) -> np.ndarray:
"""Create user vector from interactions"""
# Get user's interaction history
interactions = get_user_interactions(user_id)
# Weight recent interactions more
embeddings = []
weights = []
for interaction in interactions:
item_emb = generate_content_embedding(interaction['item'])
embeddings.append(item_emb)
# Recency weight (exponential decay)
days_ago = (datetime.now() - interaction['timestamp']).days
weight = np.exp(-0.1 * days_ago)
weights.append(weight)
# Weighted average
embeddings = np.array(embeddings)
weights = np.array(weights) / sum(weights)
user_embedding = np.average(embeddings, axis=0, weights=weights)
return user_embedding
Hybrid Recommendation System
def hybrid_recommendations(user_id: str, k=10) -> list:
"""Combine multiple signals"""
# 1. Embedding-based similarity
embedding_recs = embedding_engine.recommend(user_id, k=20)
# 2. Collaborative filtering
collab_recs = collaborative_filter(user_id, k=20)
# 3. Trending content
trending = get_trending_items(days=7, k=20)
# Combine with weighted scoring
candidates = {}
for rec in embedding_recs:
candidates[rec['item']['id']] = {
'item': rec['item'],
'score': rec['score'] * 0.5 # 50% weight
}
for rec in collab_recs:
if rec['item']['id'] in candidates:
candidates[rec['item']['id']]['score'] += rec['score'] * 0.3
else:
candidates[rec['item']['id']] = {
'item': rec['item'],
'score': rec['score'] * 0.3
}
for item in trending:
if item['id'] in candidates:
candidates[item['id']]['score'] += 0.2
else:
candidates[item['id']] = {
'item': item,
'score': 0.2
}
# Sort by combined score
ranked = sorted(candidates.values(), key=lambda x: x['score'], reverse=True)
return ranked[:k]
Cold-Start Solutions
def cold_start_recommendations(new_user_id: str) -> list:
"""Recommendations for brand new users"""
# Use onboarding responses
preferences = get_onboarding_preferences(new_user_id)
# Generate query embedding from preferences
query_text = f"""
Interests: {', '.join(preferences['interests'])}
Goals: {', '.join(preferences['goals'])}
Experience level: {preferences['experience']}
"""
query_embedding = model.encode(query_text)
# Find similar content
query_embedding = query_embedding.reshape(1, -1)
faiss.normalize_L2(query_embedding)
scores, indices = engine.index.search(query_embedding, k=10)
return [engine.items[idx] for idx in indices[0]]
Real-Time Personalization
from fastapi import FastAPI
app = FastAPI()
@app.get("/recommendations/{user_id}")
async def get_recommendations(user_id: str, context: dict = None):
"""Real-time API endpoint"""
# Check cache first
cached = redis.get(f"recs:{user_id}")
if cached:
return json.loads(cached)
# Generate fresh recommendations
if is_new_user(user_id):
recs = cold_start_recommendations(user_id)
else:
recs = hybrid_recommendations(user_id, k=10)
# Apply contextual filters
if context:
recs = filter_by_context(recs, context)
# Cache for 1 hour
redis.setex(f"recs:{user_id}", 3600, json.dumps(recs))
return recs
Measuring Effectiveness
def evaluate_recommendations():
"""Measure recommendation quality"""
users = get_test_users(n=1000)
metrics = {
'ctr': [],
'engagement_time': [],
'conversion': []
}
for user_id in users:
recs = hybrid_recommendations(user_id, k=10)
# Track what happens
clicks = count_clicks(user_id, recs, days=7)
engagement = sum_engagement_time(user_id, recs, days=7)
conversions = count_conversions(user_id, recs, days=7)
metrics['ctr'].append(clicks / len(recs))
metrics['engagement_time'].append(engagement)
metrics['conversion'].append(conversions)
return {
'avg_ctr': np.mean(metrics['ctr']),
'avg_engagement': np.mean(metrics['engagement_time']),
'conversion_rate': np.mean(metrics['conversion'])
}
Advanced Techniques
Multi-Modal Embeddings
def generate_multimodal_embedding(item: dict) -> np.ndarray:
"""Combine text, image, and metadata"""
# Text embedding
text_emb = model.encode(item['description'])
# Image embedding (if available)
if item.get('image_url'):
image_emb = encode_image(item['image_url'])
else:
image_emb = np.zeros(768)
# Metadata embedding
meta_text = f"Category: {item['category']}, Price: {item['price']}"
meta_emb = model.encode(meta_text)
# Concatenate
combined = np.concatenate([text_emb, image_emb, meta_emb])
return combined
Diversity in Recommendations
def diversify_recommendations(recs: list, diversity_factor=0.3) -> list:
"""Avoid filter bubbles"""
selected = []
selected_embeddings = []
for rec in recs:
if not selected:
selected.append(rec)
selected_embeddings.append(rec['embedding'])
continue
# Calculate similarity to already selected
similarities = [
cosine_similarity(rec['embedding'], sel_emb)
for sel_emb in selected_embeddings
]
max_similarity = max(similarities)
# Balance relevance vs. diversity
adjusted_score = rec['score'] * (1 - diversity_factor * max_similarity)
rec['adjusted_score'] = adjusted_score
# Re-rank by adjusted score
diversified = sorted(recs, key=lambda x: x.get('adjusted_score', x['score']), reverse=True)
return diversified[:10]
Production Deployment
def daily_index_update():
"""Rebuild recommendation index"""
# Get all content
items = get_all_items()
# Generate fresh embeddings
new_engine = RecommendationEngine()
new_engine.add_items(items)
# Atomic swap
global recommendation_engine
recommendation_engine = new_engine
print(f"Index updated with {len(items)} items")
Real Results
- 2-3x higher CTR vs. popularity-based recommendations
- 40-60% increase in engagement time
- Works on day 1 (no cold-start problem)
- Scales to millions of items with FAISS
Start Here
- Generate embeddings for your content
- Build FAISS index
- Create user embeddings from behavior
- Serve recommendations via API
- A/B test vs. current system
- Measure CTR, engagement, conversion
Embedding-based recommendations are the new standard. Start building today.
Tools:
- sentence-transformers
- FAISS
- Redis (caching)
- FastAPI (serving)
Enjoying this article?
Get deep technical guides like this delivered weekly.
Get AI growth insights weekly
Join engineers and product leaders building with AI. No spam, unsubscribe anytime.
Keep reading
AI-Powered Personalization at Scale: From Segments to Individuals
Traditional segmentation is dead. Learn how to build individual-level personalization systems with embeddings, real-time inference, and behavioral prediction models that adapt to every user.
AIAI-Native Growth: Why Traditional Product Growth Playbooks Are Dead
The playbook that got you to 100K users won't get you to 10M. AI isn't just another channel—it's fundamentally reshaping how products grow, retain, and monetize. Here's what actually works in 2026.
AIDynamic Pricing with Machine Learning: Optimize Revenue Per User
Stop leaving money on the table with static pricing. Learn how to build ML-powered pricing systems that optimize for willingness-to-pay and increase revenue by 20-40%.