What is Monitoring & Alerting?

Build real-time performance dashboards, set up alerting on regressions, and implement Real User Monitoring (RUM)

Monitoring & Alerting - Performance Engineering Tutorial | TechLead

Why Performance Monitoring Matters

Lab testing catches issues during development, but only Real User Monitoring (RUM) captures what users actually experience in production. Network conditions, device capabilities, geographic distribution, and third-party scripts all affect real performance. Monitoring provides the continuous feedback loop needed to maintain and improve performance over time.

Monitoring Layers

Real User Monitoring (RUM): Collect actual user performance data from browsers
Synthetic Monitoring: Automated tests from fixed locations on a schedule
Server Monitoring: Track server response times, error rates, and resource usage
Application Performance Monitoring (APM): Trace individual requests through the entire stack
Alerting: Automated notifications when metrics cross defined thresholds

Building a RUM Pipeline

// lib/rum.ts — Real User Monitoring client
import { onLCP, onINP, onCLS, onFCP, onTTFB, type Metric } from 'web-vitals';

interface RUMPayload {
  sessionId: string;
  url: string;
  timestamp: number;
  connection: string;
  deviceType: string;
  metrics: Record<string, number>;
}

class RUMCollector {
  private sessionId: string;
  private metrics: Record<string, number> = {};
  private reported = false;

  constructor() {
    this.sessionId = crypto.randomUUID();
    this.initObservers();
    this.reportOnUnload();
  }

  private initObservers(): void {
    onLCP((m) => this.collect(m));
    onINP((m) => this.collect(m));
    onCLS((m) => this.collect(m));
    onFCP((m) => this.collect(m));
    onTTFB((m) => this.collect(m));
  }

  private collect(metric: Metric): void {
    this.metrics[metric.name] = metric.value;
  }

  private getConnectionType(): string {
    const nav = navigator as any;
    return nav.connection?.effectiveType || 'unknown';
  }

  private getDeviceType(): string {
    const width = window.innerWidth;
    if (width < 768) return 'mobile';
    if (width < 1024) return 'tablet';
    return 'desktop';
  }

  private reportOnUnload(): void {
    const report = () => {
      if (this.reported) return;
      this.reported = true;

      const payload: RUMPayload = {
        sessionId: this.sessionId,
        url: window.location.href,
        timestamp: Date.now(),
        connection: this.getConnectionType(),
        deviceType: this.getDeviceType(),
        metrics: this.metrics,
      };

      navigator.sendBeacon('/api/rum', JSON.stringify(payload));
    };

    document.addEventListener('visibilitychange', () => {
      if (document.visibilityState === 'hidden') report();
    });
    window.addEventListener('pagehide', report);
  }
}

// Initialize on page load
if (typeof window !== 'undefined') {
  new RUMCollector();
}

Server-Side Monitoring

// middleware.ts — Server-side performance tracking
import { NextRequest, NextResponse } from 'next/server';

export function middleware(request: NextRequest) {
  const start = Date.now();

  const response = NextResponse.next();

  const duration = Date.now() - start;
  response.headers.set(
    'Server-Timing',
    `middleware;dur=${duration};desc="Edge Middleware"`
  );

  // Log slow requests
  if (duration > 100) {
    console.warn(`Slow middleware: ${request.nextUrl.pathname} took ${duration}ms`);
  }

  return response;
}

// API route with detailed timing
// app/api/products/route.ts
export async function GET(request: NextRequest) {
  const timings: string[] = [];
  const start = Date.now();

  // Time database query
  const dbStart = Date.now();
  const products = await prisma.product.findMany({ take: 20 });
  timings.push(`db;dur=${Date.now() - dbStart};desc="Database Query"`);

  // Time serialization
  const serStart = Date.now();
  const json = JSON.stringify(products);
  timings.push(`serialize;dur=${Date.now() - serStart};desc="Serialization"`);

  timings.push(`total;dur=${Date.now() - start};desc="Total"`);

  return new Response(json, {
    headers: {
      'Content-Type': 'application/json',
      'Server-Timing': timings.join(', '),
    },
  });
}

Setting Up Alerts

// alert-rules.ts — Define alerting thresholds
interface AlertRule {
  metric: string;
  condition: 'above' | 'below';
  threshold: number;
  window: string;  // Time window for evaluation
  severity: 'critical' | 'warning' | 'info';
  channel: string; // Notification channel
}

const alertRules: AlertRule[] = [
  {
    metric: 'lcp_p75',
    condition: 'above',
    threshold: 2500,
    window: '15m',
    severity: 'critical',
    channel: 'slack-performance',
  },
  {
    metric: 'cls_p75',
    condition: 'above',
    threshold: 0.1,
    window: '15m',
    severity: 'warning',
    channel: 'slack-performance',
  },
  {
    metric: 'error_rate',
    condition: 'above',
    threshold: 0.05,
    window: '5m',
    severity: 'critical',
    channel: 'pagerduty',
  },
  {
    metric: 'p99_response_time',
    condition: 'above',
    threshold: 3000,
    window: '10m',
    severity: 'warning',
    channel: 'slack-performance',
  },
];

// Slack notification for alerts
async function sendSlackAlert(rule: AlertRule, currentValue: number): Promise<void> {
  const emoji = rule.severity === 'critical' ? ':rotating_light:' : ':warning:';

  await fetch(process.env.SLACK_WEBHOOK_URL!, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      text: `${emoji} Performance Alert: ${rule.metric} is ${currentValue} (threshold: ${rule.threshold}) over the last ${rule.window}`,
      blocks: [
        {
          type: 'section',
          text: {
            type: 'mrkdwn',
            text: `*${emoji} Performance Alert*\n*Metric:* ${rule.metric}\n*Current:* ${currentValue}\n*Threshold:* ${rule.threshold}\n*Window:* ${rule.window}\n*Severity:* ${rule.severity}`,
          },
        },
      ],
    }),
  });
}

Monitoring Best Practices

Collect RUM data: Lab tests miss real-world variability — always measure real users
Track percentiles, not averages: p75 and p95 reveal the experience of your slowest users
Use Server-Timing headers: Make server-side timing visible to client-side monitoring
Alert on regressions: Detect when metrics cross thresholds and notify immediately
Segment data: Analyze by device type, connection, geography, and page type

Monitoring & Alerting

Why Performance Monitoring Matters

Monitoring Layers

Building a RUM Pipeline

Server-Side Monitoring

Setting Up Alerts

Monitoring Best Practices

Continue Learning

JavaScript Performance

System Design

CSS

React

Cloud & Kubernetes