TechLead
Lesson 15 of 24
5 min read
AI Agents & RAG

Hybrid Search

Combine keyword-based BM25 search with semantic vector search for superior retrieval quality

Why Hybrid Search?

Neither keyword search nor semantic search is perfect on its own. Keyword search (BM25) excels at finding exact terms, product names, error codes, and technical identifiers. Semantic search excels at understanding meaning, synonyms, and intent. Hybrid search combines both to get the best of both worlds.

When Semantic Search Fails

  • Exact names: "ERROR_CODE_4032" won't match semantically but keywords find it instantly
  • Rare terms: Domain jargon may not have good embedding representations
  • Abbreviations: "K8s" and "Kubernetes" may be far apart in embedding space
  • Code search: Function names and variable names need exact matching

BM25: The Keyword Search Baseline

# BM25 keyword search implementation
from rank_bm25 import BM25Okapi
import re

class BM25Search:
    def __init__(self, documents: list[str]):
        self.documents = documents
        # Tokenize documents
        self.tokenized = [self._tokenize(doc) for doc in documents]
        self.bm25 = BM25Okapi(self.tokenized)

    def _tokenize(self, text: str) -> list[str]:
        """Simple tokenization with lowercasing."""
        return re.findall(r'\w+', text.lower())

    def search(self, query: str, top_k: int = 5) -> list[tuple[int, float, str]]:
        """Search for documents matching the query."""
        tokenized_query = self._tokenize(query)
        scores = self.bm25.get_scores(tokenized_query)

        # Get top-k indices
        top_indices = scores.argsort()[-top_k:][::-1]

        return [
            (idx, scores[idx], self.documents[idx])
            for idx in top_indices
            if scores[idx] > 0
        ]

# Usage
documents = [
    "The ERROR_CODE_4032 indicates an authentication failure.",
    "Authentication errors occur when credentials are invalid.",
    "Kubernetes (K8s) orchestrates container deployments.",
    "Container orchestration automates deployment scaling.",
]

bm25 = BM25Search(documents)
results = bm25.search("ERROR_CODE_4032")
for idx, score, doc in results:
    print(f"[{score:.2f}] {doc}")

Implementing Hybrid Search

Hybrid search runs both keyword and semantic searches, then combines the results using Reciprocal Rank Fusion (RRF) or weighted scoring.

// Hybrid search with Reciprocal Rank Fusion
interface SearchResult {
  id: string;
  content: string;
  score: number;
}

function reciprocalRankFusion(
  resultSets: SearchResult[][],
  k: number = 60, // RRF constant
  weights?: number[]
): SearchResult[] {
  const scores = new Map<string, { score: number; content: string }>();
  const setWeights = weights || resultSets.map(() => 1.0);

  for (let setIdx = 0; setIdx < resultSets.length; setIdx++) {
    const results = resultSets[setIdx];
    const weight = setWeights[setIdx];

    for (let rank = 0; rank < results.length; rank++) {
      const result = results[rank];
      const rrfScore = weight * (1 / (k + rank + 1));

      const existing = scores.get(result.id);
      if (existing) {
        existing.score += rrfScore;
      } else {
        scores.set(result.id, {
          score: rrfScore,
          content: result.content,
        });
      }
    }
  }

  return Array.from(scores.entries())
    .map(([id, { score, content }]) => ({ id, content, score }))
    .sort((a, b) => b.score - a.score);
}

// Full hybrid search implementation
async function hybridSearch(
  query: string,
  vectorStore: any,
  bm25Index: any,
  topK: number = 5,
  semanticWeight: number = 0.7,
  keywordWeight: number = 0.3,
): Promise<SearchResult[]> {
  // Run both searches in parallel
  const [semanticResults, keywordResults] = await Promise.all([
    vectorStore.similaritySearchWithScore(query, topK * 2),
    bm25Index.search(query, topK * 2),
  ]);

  // Normalize results to common format
  const semantic: SearchResult[] = semanticResults.map(
    ([doc, score]: any, i: number) => ({
      id: doc.metadata?.id || `sem_${i}`,
      content: doc.pageContent,
      score: 1 - score, // Convert distance to similarity
    })
  );

  const keyword: SearchResult[] = keywordResults.map(
    ([idx, score, content]: any) => ({
      id: `kw_${idx}`,
      content,
      score,
    })
  );

  // Fuse results
  return reciprocalRankFusion(
    [semantic, keyword],
    60,
    [semanticWeight, keywordWeight]
  ).slice(0, topK);
}

Hybrid Search with Weaviate

Weaviate has built-in hybrid search that combines BM25 and vector search natively.

# Weaviate native hybrid search
import weaviate
from weaviate.classes.query import HybridFusion

client = weaviate.connect_to_local()  # or connect_to_wcs()

# Create collection with hybrid search support
collection = client.collections.create(
    name="Documents",
    vectorizer_config=weaviate.classes.config.Configure.Vectorizer.text2vec_openai(),
)

# Add documents
collection.data.insert_many([
    {"content": "ERROR_CODE_4032 authentication failure", "source": "errors.md"},
    {"content": "Authentication requires valid JWT tokens", "source": "auth.md"},
    {"content": "Kubernetes cluster management guide", "source": "k8s.md"},
])

# Hybrid search (combines BM25 + vector automatically)
results = collection.query.hybrid(
    query="ERROR_CODE_4032",
    alpha=0.5,  # 0 = pure keyword, 1 = pure vector, 0.5 = balanced
    limit=5,
    fusion_type=HybridFusion.RELATIVE_SCORE,
    return_metadata=weaviate.classes.query.MetadataQuery(score=True),
)

for obj in results.objects:
    print(f"[{obj.metadata.score:.3f}] {obj.properties['content']}")

client.close()

Hybrid Search with pgvector + Full-Text Search

# PostgreSQL hybrid search with pgvector + tsvector
import psycopg2

conn = psycopg2.connect("dbname=mydb user=myuser")
cur = conn.cursor()

# Create table with both vector and full-text search
cur.execute("""
    CREATE TABLE IF NOT EXISTS documents (
        id SERIAL PRIMARY KEY,
        content TEXT,
        source TEXT,
        embedding vector(1536),
        tsv tsvector GENERATED ALWAYS AS (to_tsvector('english', content)) STORED
    );

    CREATE INDEX IF NOT EXISTS idx_docs_embedding ON documents
        USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);

    CREATE INDEX IF NOT EXISTS idx_docs_tsv ON documents
        USING gin(tsv);
""")

# Hybrid search query
def hybrid_search_pg(query: str, query_embedding: list, top_k: int = 5, alpha: float = 0.5):
    """Combine semantic and full-text search in PostgreSQL."""
    cur.execute("""
        WITH semantic AS (
            SELECT id, content, source,
                   1 - (embedding <=> %s::vector) AS semantic_score,
                   ROW_NUMBER() OVER (ORDER BY embedding <=> %s::vector) AS semantic_rank
            FROM documents
            ORDER BY embedding <=> %s::vector
            LIMIT %s
        ),
        keyword AS (
            SELECT id, content, source,
                   ts_rank(tsv, plainto_tsquery('english', %s)) AS keyword_score,
                   ROW_NUMBER() OVER (ORDER BY ts_rank(tsv, plainto_tsquery('english', %s)) DESC) AS keyword_rank
            FROM documents
            WHERE tsv @@ plainto_tsquery('english', %s)
            LIMIT %s
        )
        SELECT COALESCE(s.id, k.id) AS id,
               COALESCE(s.content, k.content) AS content,
               COALESCE(s.source, k.source) AS source,
               (%s * COALESCE(1.0 / (60 + s.semantic_rank), 0)) +
               (%s * COALESCE(1.0 / (60 + k.keyword_rank), 0)) AS hybrid_score
        FROM semantic s
        FULL OUTER JOIN keyword k ON s.id = k.id
        ORDER BY hybrid_score DESC
        LIMIT %s
    """, (query_embedding, query_embedding, query_embedding, top_k * 2,
          query, query, query, top_k * 2,
          alpha, 1 - alpha, top_k))

    return cur.fetchall()

Hybrid Search Tips

  • Tune the alpha parameter: Start at 0.5 and adjust based on your query types. Technical queries need more keyword weight.
  • Use RRF over simple weighting: RRF is more robust because it works with ranks, not raw scores that may have different scales.
  • Consider query routing: Detect if a query has specific terms (codes, names) and adjust alpha dynamically.
  • Measure both components: Track how often keyword vs semantic search contributes the best result to identify the optimal balance.

Summary

Hybrid search is a significant upgrade over pure semantic search, especially for production applications dealing with technical content, product names, or error codes. Whether you use a database with built-in hybrid support (Weaviate, pgvector), or implement RRF fusion yourself, combining keyword and semantic approaches consistently produces better retrieval quality.

Continue Learning