Why Clustering?
A single Node.js process runs on one CPU core. To take advantage of multi-core systems, you can use the cluster module to spawn multiple worker processes that share the same server port. This is essential for production applications handling high traffic.
🚀 Clustering vs Worker Threads
Clustering
- • Multiple Node.js processes
- • Each has its own memory
- • Best for handling more requests
- • Zero-downtime restarts
Worker Threads
- • Multiple threads in one process
- • Can share memory
- • Best for CPU-intensive tasks
- • Lower memory overhead
Basic Cluster Setup
const cluster = require('cluster');
const http = require('http');
const os = require('os');
const numCPUs = os.cpus().length;
if (cluster.isPrimary) {
console.log(`Primary process ${process.pid} is running`);
// Fork workers for each CPU
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died`);
// Restart the worker
cluster.fork();
});
} else {
// Workers share the TCP connection
http.createServer((req, res) => {
res.writeHead(200);
res.end(`Handled by worker ${process.pid}\n`);
}).listen(8000);
console.log(`Worker ${process.pid} started`);
}
Cluster with Express
const cluster = require('cluster');
const express = require('express');
const os = require('os');
const numCPUs = os.cpus().length;
if (cluster.isPrimary) {
console.log(`Primary ${process.pid} starting ${numCPUs} workers...`);
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died. Restarting...`);
cluster.fork();
});
// Graceful shutdown
process.on('SIGTERM', () => {
console.log('Shutting down...');
for (const id in cluster.workers) {
cluster.workers[id].kill();
}
process.exit(0);
});
} else {
const app = express();
app.get('/', (req, res) => {
res.json({
message: 'Hello!',
pid: process.pid,
uptime: process.uptime()
});
});
app.get('/heavy', (req, res) => {
// Simulate CPU work
let sum = 0;
for (let i = 0; i < 1e7; i++) sum += i;
res.json({ result: sum, pid: process.pid });
});
app.listen(3000, () => {
console.log(`Worker ${process.pid} listening on port 3000`);
});
}
Communication Between Workers
const cluster = require('cluster');
if (cluster.isPrimary) {
const worker1 = cluster.fork();
const worker2 = cluster.fork();
// Receive messages from workers
worker1.on('message', (msg) => {
console.log('From worker 1:', msg);
// Broadcast to all workers
for (const id in cluster.workers) {
cluster.workers[id].send({ type: 'broadcast', data: msg });
}
});
// Send message to specific worker
worker1.send({ type: 'init', data: { id: 1 } });
} else {
// Worker receives messages
process.on('message', (msg) => {
console.log(`Worker ${process.pid} received:`, msg);
});
// Worker sends messages to primary
process.send({ status: 'ready', pid: process.pid });
}
Graceful Shutdown
const cluster = require('cluster');
const express = require('express');
if (cluster.isPrimary) {
// ... fork workers ...
process.on('SIGTERM', gracefulShutdown);
process.on('SIGINT', gracefulShutdown);
function gracefulShutdown() {
console.log('Received shutdown signal');
for (const id in cluster.workers) {
cluster.workers[id].send('shutdown');
}
setTimeout(() => {
console.log('Forcing exit...');
process.exit(1);
}, 30000);
}
} else {
const app = express();
let server;
let isShuttingDown = false;
// Track active connections
const connections = new Set();
server = app.listen(3000, () => {
console.log(`Worker ${process.pid} ready`);
});
server.on('connection', (conn) => {
connections.add(conn);
conn.on('close', () => connections.delete(conn));
});
process.on('message', (msg) => {
if (msg === 'shutdown') {
isShuttingDown = true;
// Stop accepting new connections
server.close(() => {
console.log(`Worker ${process.pid} closed`);
process.exit(0);
});
// Close existing connections gracefully
connections.forEach((conn) => conn.end());
setTimeout(() => {
connections.forEach((conn) => conn.destroy());
}, 5000);
}
});
// Reject new requests during shutdown
app.use((req, res, next) => {
if (isShuttingDown) {
res.status(503).send('Server is shutting down');
} else {
next();
}
});
}
Using PM2 (Production)
PM2 is a production process manager that handles clustering, monitoring, and zero-downtime reloads.
# Install PM2 globally
npm install -g pm2
# Start with cluster mode (uses all CPUs)
pm2 start app.js -i max
# Or specify number of instances
pm2 start app.js -i 4
# Ecosystem file (ecosystem.config.js)
module.exports = {
apps: [{
name: 'my-app',
script: 'app.js',
instances: 'max',
exec_mode: 'cluster',
env: {
NODE_ENV: 'production',
PORT: 3000
},
max_memory_restart: '1G',
error_file: './logs/error.log',
out_file: './logs/output.log'
}]
};
# PM2 commands
pm2 start ecosystem.config.js
pm2 reload my-app # Zero-downtime reload
pm2 scale my-app 4 # Scale to 4 instances
pm2 monit # Monitor dashboard
pm2 logs # View logs
pm2 save # Save process list
pm2 startup # Generate startup script
Load Balancing Strategies
| Strategy | Description | Use Case |
|---|---|---|
| Round Robin (default) | Distributes connections sequentially | General purpose |
| Least Connections | Sends to worker with fewest connections | Long-lived connections |
| IP Hash | Same IP always goes to same worker | Session affinity |
// Set load balancing strategy
cluster.schedulingPolicy = cluster.SCHED_RR; // Round robin (default on Linux)
cluster.schedulingPolicy = cluster.SCHED_NONE; // OS handles it
Session Handling with Clustering
// Problem: In-memory sessions don't work across workers!
// Solution 1: Use Redis for sessions
const session = require('express-session');
const RedisStore = require('connect-redis').default;
const { createClient } = require('redis');
const redisClient = createClient({ url: 'redis://localhost:6379' });
redisClient.connect();
app.use(session({
store: new RedisStore({ client: redisClient }),
secret: 'your-secret',
resave: false,
saveUninitialized: false
}));
// Solution 2: Sticky sessions with PM2
// ecosystem.config.js
module.exports = {
apps: [{
name: 'my-app',
script: 'app.js',
instances: 'max',
exec_mode: 'cluster',
// Enable sticky sessions
listen_timeout: 5000,
wait_ready: true
}]
};
📚 Learn More
💡 Best Practices
- • Use PM2 in production for easier management
- • Always implement graceful shutdown
- • Use Redis or similar for shared state (sessions, cache)
- • Monitor worker health and restart failed workers
- • Don't fork more workers than CPU cores