Clustering & Scaling - Advanced Node.js Tutorial | TechLead

Why Clustering?

A single Node.js process runs on one CPU core. To take advantage of multi-core systems, you can use the cluster module to spawn multiple worker processes that share the same server port. This is essential for production applications handling high traffic.

🚀 Clustering vs Worker Threads

Clustering

• Multiple Node.js processes
• Each has its own memory
• Best for handling more requests
• Zero-downtime restarts

Worker Threads

• Multiple threads in one process
• Can share memory
• Best for CPU-intensive tasks
• Lower memory overhead

Basic Cluster Setup

const cluster = require('cluster');
const http = require('http');
const os = require('os');

const numCPUs = os.cpus().length;

if (cluster.isPrimary) {
  console.log(`Primary process ${process.pid} is running`);

  // Fork workers for each CPU
  for (let i = 0; i < numCPUs; i++) {
    cluster.fork();
  }

  cluster.on('exit', (worker, code, signal) => {
    console.log(`Worker ${worker.process.pid} died`);
    // Restart the worker
    cluster.fork();
  });

} else {
  // Workers share the TCP connection
  http.createServer((req, res) => {
    res.writeHead(200);
    res.end(`Handled by worker ${process.pid}\n`);
  }).listen(8000);

  console.log(`Worker ${process.pid} started`);
}

Cluster with Express

const cluster = require('cluster');
const express = require('express');
const os = require('os');

const numCPUs = os.cpus().length;

if (cluster.isPrimary) {
  console.log(`Primary ${process.pid} starting ${numCPUs} workers...`);

  for (let i = 0; i < numCPUs; i++) {
    cluster.fork();
  }

  cluster.on('exit', (worker, code, signal) => {
    console.log(`Worker ${worker.process.pid} died. Restarting...`);
    cluster.fork();
  });

  // Graceful shutdown
  process.on('SIGTERM', () => {
    console.log('Shutting down...');
    for (const id in cluster.workers) {
      cluster.workers[id].kill();
    }
    process.exit(0);
  });

} else {
  const app = express();

  app.get('/', (req, res) => {
    res.json({ 
      message: 'Hello!',
      pid: process.pid,
      uptime: process.uptime()
    });
  });

  app.get('/heavy', (req, res) => {
    // Simulate CPU work
    let sum = 0;
    for (let i = 0; i < 1e7; i++) sum += i;
    res.json({ result: sum, pid: process.pid });
  });

  app.listen(3000, () => {
    console.log(`Worker ${process.pid} listening on port 3000`);
  });
}

Communication Between Workers

const cluster = require('cluster');

if (cluster.isPrimary) {
  const worker1 = cluster.fork();
  const worker2 = cluster.fork();

  // Receive messages from workers
  worker1.on('message', (msg) => {
    console.log('From worker 1:', msg);
    // Broadcast to all workers
    for (const id in cluster.workers) {
      cluster.workers[id].send({ type: 'broadcast', data: msg });
    }
  });

  // Send message to specific worker
  worker1.send({ type: 'init', data: { id: 1 } });

} else {
  // Worker receives messages
  process.on('message', (msg) => {
    console.log(`Worker ${process.pid} received:`, msg);
  });

  // Worker sends messages to primary
  process.send({ status: 'ready', pid: process.pid });
}

Graceful Shutdown

const cluster = require('cluster');
const express = require('express');

if (cluster.isPrimary) {
  // ... fork workers ...

  process.on('SIGTERM', gracefulShutdown);
  process.on('SIGINT', gracefulShutdown);

  function gracefulShutdown() {
    console.log('Received shutdown signal');
    
    for (const id in cluster.workers) {
      cluster.workers[id].send('shutdown');
    }

    setTimeout(() => {
      console.log('Forcing exit...');
      process.exit(1);
    }, 30000);
  }

} else {
  const app = express();
  let server;
  let isShuttingDown = false;

  // Track active connections
  const connections = new Set();

  server = app.listen(3000, () => {
    console.log(`Worker ${process.pid} ready`);
  });

  server.on('connection', (conn) => {
    connections.add(conn);
    conn.on('close', () => connections.delete(conn));
  });

  process.on('message', (msg) => {
    if (msg === 'shutdown') {
      isShuttingDown = true;

      // Stop accepting new connections
      server.close(() => {
        console.log(`Worker ${process.pid} closed`);
        process.exit(0);
      });

      // Close existing connections gracefully
      connections.forEach((conn) => conn.end());

      setTimeout(() => {
        connections.forEach((conn) => conn.destroy());
      }, 5000);
    }
  });

  // Reject new requests during shutdown
  app.use((req, res, next) => {
    if (isShuttingDown) {
      res.status(503).send('Server is shutting down');
    } else {
      next();
    }
  });
}

Using PM2 (Production)

PM2 is a production process manager that handles clustering, monitoring, and zero-downtime reloads.

# Install PM2 globally
npm install -g pm2

# Start with cluster mode (uses all CPUs)
pm2 start app.js -i max

# Or specify number of instances
pm2 start app.js -i 4

# Ecosystem file (ecosystem.config.js)
module.exports = {
  apps: [{
    name: 'my-app',
    script: 'app.js',
    instances: 'max',
    exec_mode: 'cluster',
    env: {
      NODE_ENV: 'production',
      PORT: 3000
    },
    max_memory_restart: '1G',
    error_file: './logs/error.log',
    out_file: './logs/output.log'
  }]
};

# PM2 commands
pm2 start ecosystem.config.js
pm2 reload my-app          # Zero-downtime reload
pm2 scale my-app 4         # Scale to 4 instances
pm2 monit                  # Monitor dashboard
pm2 logs                   # View logs
pm2 save                   # Save process list
pm2 startup                # Generate startup script

Load Balancing Strategies

Strategy	Description	Use Case
Round Robin (default)	Distributes connections sequentially	General purpose
Least Connections	Sends to worker with fewest connections	Long-lived connections
IP Hash	Same IP always goes to same worker	Session affinity

// Set load balancing strategy
cluster.schedulingPolicy = cluster.SCHED_RR;  // Round robin (default on Linux)
cluster.schedulingPolicy = cluster.SCHED_NONE; // OS handles it

Session Handling with Clustering

// Problem: In-memory sessions don't work across workers!

// Solution 1: Use Redis for sessions
const session = require('express-session');
const RedisStore = require('connect-redis').default;
const { createClient } = require('redis');

const redisClient = createClient({ url: 'redis://localhost:6379' });
redisClient.connect();

app.use(session({
  store: new RedisStore({ client: redisClient }),
  secret: 'your-secret',
  resave: false,
  saveUninitialized: false
}));

// Solution 2: Sticky sessions with PM2
// ecosystem.config.js
module.exports = {
  apps: [{
    name: 'my-app',
    script: 'app.js',
    instances: 'max',
    exec_mode: 'cluster',
    // Enable sticky sessions
    listen_timeout: 5000,
    wait_ready: true
  }]
};

📚 Learn More

📖 Node.js Documentation: Cluster →

💡 Best Practices

• Use PM2 in production for easier management
• Always implement graceful shutdown
• Use Redis or similar for shared state (sessions, cache)
• Monitor worker health and restart failed workers
• Don't fork more workers than CPU cores