Faceted Semantic Search: Combining Filters with Vector Similarity
Learn how to combine metadata filters with vector similarity search, comparing pre-filter and post-filter strategies, designing filterable metadata schemas, and building a responsive faceted search UI.
The Need for Faceted Semantic Search
Pure vector search returns the most semantically similar results, but users often need to narrow results by structured attributes — show me articles about machine learning published this year, in English, from peer-reviewed journals. Faceted search combines the power of semantic similarity with precise metadata filtering, giving users both relevance and control.
The key design decision is whether to apply filters before or after the vector search. Each approach has meaningful tradeoffs for accuracy and performance.
Pre-Filter vs Post-Filter Strategies
from dataclasses import dataclass
from enum import Enum
class FilterStrategy(Enum):
PRE_FILTER = "pre_filter" # filter first, then vector search
POST_FILTER = "post_filter" # vector search first, then filter
@dataclass
class StrategyAnalysis:
strategy: FilterStrategy
pros: list
cons: list
strategies = [
StrategyAnalysis(
strategy=FilterStrategy.PRE_FILTER,
pros=[
"Guarantees returning exactly top_k filtered results",
"Search only over matching subset, so faster for selective filters",
"No wasted computation on irrelevant documents",
],
cons=[
"Requires the vector index to support filtering natively",
"Highly selective filters reduce the candidate pool, hurting ANN recall",
"Complex to implement with partitioned indexes",
],
),
StrategyAnalysis(
strategy=FilterStrategy.POST_FILTER,
pros=[
"Simple to implement — vector search then Python filter",
"Works with any vector index without modification",
"Vector search quality is unaffected by filters",
],
cons=[
"May return fewer than top_k results after filtering",
"Must over-fetch to compensate, increasing latency",
"Wasteful when filters are very selective",
],
),
]
Implementing Both Strategies
import numpy as np
from typing import List, Dict, Optional, Any
from sentence_transformers import SentenceTransformer
class FacetedSearchEngine:
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
self.documents: List[Dict] = []
self.embeddings: Optional[np.ndarray] = None
def index(self, documents: List[Dict]):
self.documents = documents
texts = [
f"{d.get('title', '')}. {d.get('body', '')}" for d in documents
]
self.embeddings = self.model.encode(
texts, normalize_embeddings=True
)
def _apply_filters(
self, indices: np.ndarray, filters: Dict[str, Any]
) -> List[int]:
"""Apply metadata filters to a set of document indices."""
filtered = []
for idx in indices:
doc = self.documents[idx]
match = True
for key, value in filters.items():
if key.endswith("__gte"):
field = key[:-5]
if doc.get(field, 0) < value:
match = False
elif key.endswith("__lte"):
field = key[:-5]
if doc.get(field, float("inf")) > value:
match = False
elif key.endswith("__in"):
field = key[:-4]
if doc.get(field) not in value:
match = False
else:
if doc.get(key) != value:
match = False
if match:
filtered.append(idx)
return filtered
def search_post_filter(
self,
query: str,
filters: Dict[str, Any],
top_k: int = 10,
over_fetch_factor: int = 5,
) -> List[Dict]:
"""Post-filter: vector search first, then apply filters."""
query_emb = self.model.encode(
[query], normalize_embeddings=True
)
scores = np.dot(self.embeddings, query_emb.T).flatten()
# Over-fetch to ensure enough results after filtering
fetch_k = top_k * over_fetch_factor
top_indices = np.argsort(scores)[::-1][:fetch_k]
filtered_indices = self._apply_filters(top_indices, filters)
results = []
for idx in filtered_indices[:top_k]:
doc = self.documents[idx].copy()
doc["score"] = float(scores[idx])
results.append(doc)
return results
def search_pre_filter(
self,
query: str,
filters: Dict[str, Any],
top_k: int = 10,
) -> List[Dict]:
"""Pre-filter: apply filters first, then vector search within subset."""
all_indices = np.arange(len(self.documents))
filtered_indices = self._apply_filters(all_indices, filters)
if not filtered_indices:
return []
filtered_embeddings = self.embeddings[filtered_indices]
query_emb = self.model.encode(
[query], normalize_embeddings=True
)
scores = np.dot(filtered_embeddings, query_emb.T).flatten()
sorted_positions = np.argsort(scores)[::-1][:top_k]
results = []
for pos in sorted_positions:
idx = filtered_indices[pos]
doc = self.documents[idx].copy()
doc["score"] = float(scores[pos])
results.append(doc)
return results
Designing the Metadata Schema
Effective faceted search requires well-structured metadata. Design your metadata fields for the filter patterns your users actually need.
from datetime import datetime
METADATA_SCHEMA = {
"category": {
"type": "keyword",
"facet_type": "multi_select",
"values": ["engineering", "product", "research", "tutorial"],
},
"author": {
"type": "keyword",
"facet_type": "searchable_select",
},
"published_at": {
"type": "date",
"facet_type": "date_range",
},
"reading_time_minutes": {
"type": "integer",
"facet_type": "range_slider",
"min": 1,
"max": 60,
},
"language": {
"type": "keyword",
"facet_type": "single_select",
"values": ["en", "es", "fr", "de", "ja"],
},
}
# Usage example
results = engine.search_pre_filter(
query="machine learning best practices",
filters={
"category__in": ["engineering", "research"],
"reading_time_minutes__lte": 15,
"language": "en",
},
top_k=10,
)
Building Facet Counts
Users need to see how many results exist for each filter value. Compute facet counts from the current result set to enable progressive filtering.
See AI Voice Agents Handle Real Calls
Book a free demo or calculate how much you can save with AI voice automation.
from collections import Counter
def compute_facet_counts(
engine: FacetedSearchEngine,
query: str,
current_filters: Dict[str, Any],
facet_fields: List[str],
candidate_limit: int = 500,
) -> Dict[str, Dict[str, int]]:
"""Compute result counts for each facet value."""
query_emb = engine.model.encode(
[query], normalize_embeddings=True
)
scores = np.dot(engine.embeddings, query_emb.T).flatten()
top_indices = np.argsort(scores)[::-1][:candidate_limit]
# Apply existing filters except the facet being counted
facet_counts = {}
for facet_field in facet_fields:
partial_filters = {
k: v for k, v in current_filters.items()
if not k.startswith(facet_field)
}
filtered = engine._apply_filters(top_indices, partial_filters)
counter = Counter(
engine.documents[idx].get(facet_field) for idx in filtered
)
facet_counts[facet_field] = dict(counter.most_common(20))
return facet_counts
FAQ
When should I use pre-filter vs post-filter?
Use pre-filter when your filters are moderately selective (filtering out 50-90% of documents) and you need guaranteed result counts. Use post-filter when filters are broad (keeping 50%+ of documents) or when your vector index does not support native filtering. For production systems, implement both and choose dynamically based on estimated filter selectivity.
How do I handle multi-select facets where the user can pick multiple values?
Use an __in filter operator that checks if the document's field value is in the user's selected set. For multi-valued document fields (e.g., a document with multiple tags), check if there is any intersection between the document's values and the user's selections. This is the standard behavior users expect from e-commerce style faceted search.
How do faceted counts stay accurate as users toggle filters?
Recompute facet counts on every filter change, but exclude the field being counted from the active filters. This ensures that selecting "Engineering" in the category facet still shows accurate counts for other categories, preventing the common problem where all other category counts drop to zero after selection.
#FacetedSearch #VectorSearch #MetadataFiltering #SearchUX #InformationRetrieval #AgenticAI #LearnAI #AIEngineering
CallSphere Team
Expert insights on AI voice agents and customer communication automation.
Try CallSphere AI Voice Agents
See how AI voice agents work for your industry. Live demo available -- no signup required.