- Published on
MongoDB Monitoring and Maintenance: Operational Excellence
- Authors
- Name
- Mamun Rashid
- @mmncit
MongoDB Monitoring and Maintenance: Operational Excellence
Welcome to Part 9 of our MongoDB Zero to Hero series. After learning about production deployment, it's crucial to understand how to maintain and monitor your MongoDB deployment for optimal performance and reliability.
Monitoring Strategy Overview
Key Areas to Monitor
- Performance Metrics: Query performance, throughput, latency
- Resource Utilization: CPU, memory, disk, network
- Replication Health: Lag, sync status, member health
- Storage Metrics: Disk usage, index efficiency, collection stats
- Security Events: Authentication failures, unauthorized access
- Application Metrics: Connection pool, error rates, response times
Built-in Monitoring Tools
MongoDB Server Status
// Get comprehensive server statistics
db.serverStatus();
// Key sections to monitor:
const serverStatus = db.serverStatus();
// Connections
console.log('Current connections:', serverStatus.connections.current);
console.log('Available connections:', serverStatus.connections.available);
// Operations counters
const opcounters = serverStatus.opcounters;
console.log('Inserts per second:', opcounters.insert);
console.log('Queries per second:', opcounters.query);
console.log('Updates per second:', opcounters.update);
console.log('Deletes per second:', opcounters.delete);
// Memory usage
const mem = serverStatus.mem;
console.log('Resident memory (MB):', mem.resident);
console.log('Virtual memory (MB):', mem.virtual);
console.log('Mapped memory (MB):', mem.mapped);
// WiredTiger cache statistics
const cache = serverStatus.wiredTiger.cache;
console.log('Cache size (MB):', cache['maximum bytes configured'] / 1024 / 1024);
console.log('Cache used (MB):', cache['bytes currently in the cache'] / 1024 / 1024);
console.log('Cache hit ratio:', cache['cache hit ratio'] * 100 + '%');
Database Statistics
// Database-level statistics
db.stats();
// Collection-level statistics
db.orders.stats();
// Index statistics
db.orders.aggregate([{ $indexStats: {} }]);
// Storage engine statistics
db.serverStatus().wiredTiger;
// Monitoring script for key metrics
function monitorDatabase() {
const stats = db.stats();
const serverStatus = db.serverStatus();
return {
timestamp: new Date(),
database: {
collections: stats.collections,
objects: stats.objects,
avgObjSize: stats.avgObjSize,
dataSize: stats.dataSize,
storageSize: stats.storageSize,
indexSize: stats.indexSize,
},
performance: {
opcounters: serverStatus.opcounters,
connections: serverStatus.connections,
globalLock: serverStatus.globalLock,
memory: serverStatus.mem,
},
replication: serverStatus.repl || null,
};
}
// Log metrics periodically
setInterval(() => {
const metrics = monitorDatabase();
console.log(JSON.stringify(metrics, null, 2));
}, 60000); // Every minute
Replica Set Monitoring
// Replica set status and health
function monitorReplicaSet() {
try {
const status = rs.status();
const config = rs.conf();
const monitoring = {
timestamp: new Date(),
replSetName: status.set,
members: status.members.map((member) => ({
name: member.name,
state: member.stateStr,
health: member.health,
uptime: member.uptime,
lastHeartbeat: member.lastHeartbeat,
lastHeartbeatRecv: member.lastHeartbeatRecv,
pingMs: member.pingMs,
syncSourceHost: member.syncSourceHost,
oplogLag: member.optimeDate ? (status.date - member.optimeDate) / 1000 : null,
})),
primary: status.members.find((m) => m.state === 1)?.name,
secondary: status.members.filter((m) => m.state === 2).map((m) => m.name),
};
// Check for issues
const issues = [];
monitoring.members.forEach((member) => {
if (member.health !== 1) {
issues.push(`Member ${member.name} is unhealthy`);
}
if (member.oplogLag && member.oplogLag > 10) {
issues.push(`Member ${member.name} has high replication lag: ${member.oplogLag}s`);
}
if (member.pingMs && member.pingMs > 100) {
issues.push(`Member ${member.name} has high ping: ${member.pingMs}ms`);
}
});
if (!monitoring.primary) {
issues.push('No primary member found');
}
monitoring.issues = issues;
return monitoring;
} catch (error) {
return {
timestamp: new Date(),
error: error.message,
status: 'Error getting replica set status',
};
}
}
// Monitor replica set health
setInterval(() => {
const replicaStatus = monitorReplicaSet();
if (replicaStatus.issues && replicaStatus.issues.length > 0) {
console.warn('Replica set issues detected:', replicaStatus.issues);
// Send alerts
}
console.log('Replica set status:', JSON.stringify(replicaStatus, null, 2));
}, 30000); // Every 30 seconds
Query Performance Monitoring
// Enable and monitor profiling
db.setProfilingLevel(1, { slowms: 100 });
// Query profile data
function analyzeSlowQueries() {
const slowQueries = db.system.profile
.find({
ts: { $gte: new Date(Date.now() - 3600000) }, // Last hour
})
.sort({ ts: -1 });
const analysis = {
timestamp: new Date(),
totalSlowQueries: slowQueries.count(),
queries: [],
};
slowQueries.forEach((query) => {
analysis.queries.push({
timestamp: query.ts,
operation: query.command,
duration: query.millis,
namespace: query.ns,
planSummary: query.planSummary,
docsExamined: query.docsExamined,
docsReturned: query.docsReturned,
keysExamined: query.keysExamined,
});
});
return analysis;
}
// Identify most common slow operations
function getSlowQueryPatterns() {
return db.system.profile.aggregate([
{ $match: { ts: { $gte: new Date(Date.now() - 86400000) } } }, // Last 24 hours
{
$group: {
_id: {
operation: '$command',
planSummary: '$planSummary',
},
count: { $sum: 1 },
avgDuration: { $avg: '$millis' },
maxDuration: { $max: '$millis' },
totalDuration: { $sum: '$millis' },
},
},
{ $sort: { totalDuration: -1 } },
{ $limit: 10 },
]);
}
External Monitoring Solutions
Prometheus and Grafana Setup
MongoDB Exporter Configuration
# docker-compose.yml
version: '3.8'
services:
mongodb-exporter:
image: percona/mongodb_exporter:0.32
ports:
- '9216:9216'
environment:
MONGODB_URI: 'mongodb://monitor:password@mongodb-primary:27017,mongodb-secondary1:27017,mongodb-secondary2:27017/?replicaSet=myapp-replica'
command:
- '--mongodb.uri=mongodb://monitor:password@mongodb-primary:27017,mongodb-secondary1:27017,mongodb-secondary2:27017/?replicaSet=myapp-replica'
- '--collect-all'
- '--discovering-mode'
- '--mongodb.collstats-colls=orders,users,products'
prometheus:
image: prom/prometheus:latest
ports:
- '9090:9090'
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./alert-rules.yml:/etc/prometheus/alert-rules.yml
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
grafana:
image: grafana/grafana:latest
ports:
- '3000:3000'
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
volumes:
- grafana-storage:/var/lib/grafana
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards
- ./grafana/datasources:/etc/grafana/provisioning/datasources
volumes:
grafana-storage:
Prometheus Configuration
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- 'alert-rules.yml'
scrape_configs:
- job_name: 'mongodb'
static_configs:
- targets: ['mongodb-exporter:9216']
scrape_interval: 30s
scrape_timeout: 10s
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
Custom Monitoring Dashboard
{
"dashboard": {
"title": "MongoDB Monitoring",
"panels": [
{
"title": "Operations Per Second",
"type": "graph",
"targets": [
{
"expr": "rate(mongodb_op_counters_total[5m])",
"legendFormat": "{{type}}"
}
]
},
{
"title": "Memory Usage",
"type": "graph",
"targets": [
{
"expr": "mongodb_memory{type=\"resident\"}",
"legendFormat": "Resident Memory"
},
{
"expr": "mongodb_memory{type=\"virtual\"}",
"legendFormat": "Virtual Memory"
}
]
},
{
"title": "Connection Count",
"type": "stat",
"targets": [
{
"expr": "mongodb_connections{state=\"current\"}"
}
]
},
{
"title": "Replication Lag",
"type": "graph",
"targets": [
{
"expr": "mongodb_mongod_replset_member_replication_lag",
"legendFormat": "{{member}}"
}
]
}
]
}
}
Application Performance Monitoring
// Node.js application monitoring
const client = require('prom-client');
// Create custom metrics
const queryDuration = new client.Histogram({
name: 'mongodb_query_duration_seconds',
help: 'Duration of MongoDB queries',
labelNames: ['operation', 'collection', 'database'],
});
const queryErrors = new client.Counter({
name: 'mongodb_query_errors_total',
help: 'Total number of MongoDB query errors',
labelNames: ['operation', 'collection', 'error_type'],
});
const connectionPoolSize = new client.Gauge({
name: 'mongodb_connection_pool_size',
help: 'Current MongoDB connection pool size',
labelNames: ['state'],
});
// Wrap MongoDB operations
class MonitoredMongoClient {
constructor(client) {
this.client = client;
this.db = client.db();
}
async find(collection, query, options = {}) {
const timer = queryDuration.startTimer({
operation: 'find',
collection,
database: this.db.databaseName,
});
try {
const result = await this.db.collection(collection).find(query, options).toArray();
timer();
return result;
} catch (error) {
timer();
queryErrors.inc({
operation: 'find',
collection,
error_type: error.constructor.name,
});
throw error;
}
}
async insertOne(collection, document) {
const timer = queryDuration.startTimer({
operation: 'insertOne',
collection,
database: this.db.databaseName,
});
try {
const result = await this.db.collection(collection).insertOne(document);
timer();
return result;
} catch (error) {
timer();
queryErrors.inc({
operation: 'insertOne',
collection,
error_type: error.constructor.name,
});
throw error;
}
}
// Monitor connection pool
updateConnectionPoolMetrics() {
const topology = this.client.topology;
if (topology && topology.s && topology.s.coreTopology) {
const pools = topology.s.coreTopology.s.servers;
let totalConnections = 0;
let availableConnections = 0;
pools.forEach((server) => {
if (server.s && server.s.pool) {
totalConnections += server.s.pool.totalConnectionCount;
availableConnections += server.s.pool.availableConnectionCount;
}
});
connectionPoolSize.set({ state: 'total' }, totalConnections);
connectionPoolSize.set({ state: 'available' }, availableConnections);
connectionPoolSize.set({ state: 'used' }, totalConnections - availableConnections);
}
}
}
// Monitor connection pool periodically
setInterval(() => {
monitoredClient.updateConnectionPoolMetrics();
}, 10000);
Alerting Configuration
Alert Rules
# alert-rules.yml
groups:
- name: mongodb.rules
rules:
# Critical Alerts
- alert: MongoDBDown
expr: up{job="mongodb"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: 'MongoDB instance {{ $labels.instance }} is down'
description: 'MongoDB instance has been down for more than 1 minute'
- alert: MongoDBReplicationLag
expr: mongodb_mongod_replset_member_replication_lag > 10
for: 2m
labels:
severity: critical
annotations:
summary: 'MongoDB replication lag is high on {{ $labels.instance }}'
description: 'Replication lag is {{ $value }} seconds'
- alert: MongoDBHighMemoryUsage
expr: (mongodb_memory{type="resident"} / mongodb_memory{type="virtual"}) > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: 'MongoDB memory usage is high on {{ $labels.instance }}'
description: 'Memory usage is {{ $value | humanizePercentage }}'
# Performance Alerts
- alert: MongoDBSlowQueries
expr: rate(mongodb_mongod_op_latencies_latency_total[5m]) > 1000
for: 5m
labels:
severity: warning
annotations:
summary: 'MongoDB has slow queries on {{ $labels.instance }}'
description: 'Average query latency is {{ $value }}ms'
- alert: MongoDBHighConnections
expr: mongodb_connections{state="current"} / mongodb_connections{state="available"} > 0.8
for: 5m
labels:
severity: warning
annotations:
summary: 'MongoDB connection usage is high on {{ $labels.instance }}'
description: 'Connection usage is {{ $value | humanizePercentage }}'
- alert: MongoDBCacheMissRatio
expr: rate(mongodb_mongod_wiredtiger_cache_bytes_read_into_cache_total[5m]) / rate(mongodb_mongod_wiredtiger_cache_bytes_requested_from_cache_total[5m]) > 0.3
for: 10m
labels:
severity: warning
annotations:
summary: 'MongoDB cache miss ratio is high on {{ $labels.instance }}'
description: 'Cache miss ratio is {{ $value | humanizePercentage }}'
# Storage Alerts
- alert: MongoDBDiskSpaceUsage
expr: (mongodb_mongod_storage_engine_persistent_cache_bytes / (1024^3)) > 80
for: 5m
labels:
severity: warning
annotations:
summary: 'MongoDB disk usage is high on {{ $labels.instance }}'
description: 'Disk usage is {{ $value }}GB'
- alert: MongoDBIndexMissing
expr: mongodb_mongod_metrics_query_executor_scanned_objects_total / mongodb_mongod_metrics_query_executor_scanned_total > 1000
for: 10m
labels:
severity: warning
annotations:
summary: 'MongoDB may have missing indexes on {{ $labels.instance }}'
description: 'High object scan ratio detected'
Notification Channels
// Slack notification service
class AlertManager {
constructor(webhookUrl) {
this.webhookUrl = webhookUrl;
}
async sendAlert(alert) {
const message = {
channel: '#mongodb-alerts',
username: 'MongoDB Monitor',
icon_emoji: ':warning:',
attachments: [
{
color: this.getColorForSeverity(alert.severity),
title: alert.summary,
text: alert.description,
fields: [
{
title: 'Instance',
value: alert.instance,
short: true,
},
{
title: 'Severity',
value: alert.severity,
short: true,
},
{
title: 'Time',
value: new Date().toISOString(),
short: true,
},
],
},
],
};
try {
await fetch(this.webhookUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(message),
});
} catch (error) {
console.error('Failed to send alert:', error);
}
}
getColorForSeverity(severity) {
switch (severity) {
case 'critical':
return 'danger';
case 'warning':
return 'warning';
case 'info':
return 'good';
default:
return '#439FE0';
}
}
}
// Email notification service
class EmailAlertManager {
constructor(transporter) {
this.transporter = transporter;
}
async sendAlert(alert, recipients) {
const mailOptions = {
from: 'mongodb-monitor@company.com',
to: recipients.join(','),
subject: `MongoDB Alert: ${alert.severity.toUpperCase()} - ${alert.summary}`,
html: `
<h2>MongoDB Alert</h2>
<p><strong>Severity:</strong> ${alert.severity}</p>
<p><strong>Instance:</strong> ${alert.instance}</p>
<p><strong>Summary:</strong> ${alert.summary}</p>
<p><strong>Description:</strong> ${alert.description}</p>
<p><strong>Time:</strong> ${new Date().toISOString()}</p>
`,
};
try {
await this.transporter.sendMail(mailOptions);
} catch (error) {
console.error('Failed to send email alert:', error);
}
}
}
Maintenance Tasks
Regular Maintenance Checklist
Daily Tasks
#!/bin/bash
# daily-maintenance.sh
echo "=== MongoDB Daily Maintenance $(date) ==="
# Check replica set status
mongo --eval "rs.status()" | grep -E "(state|health|optime)"
# Check disk space
df -h | grep -E "(data|log)"
# Check for slow queries
mongo --eval "db.system.profile.find({ts: {\$gte: new Date(Date.now() - 86400000)}, millis: {\$gt: 1000}}).count()" myapp
# Check connection count
mongo --eval "db.serverStatus().connections" myapp
# Rotate logs if needed
if [ $(stat -c%s /var/log/mongodb/mongod.log) -gt 1073741824 ]; then
echo "Rotating large log file"
logrotate -f /etc/logrotate.d/mongodb
fi
echo "Daily maintenance completed"
Weekly Tasks
#!/bin/bash
# weekly-maintenance.sh
echo "=== MongoDB Weekly Maintenance $(date) ==="
# Validate database integrity
mongo --eval "db.runCommand({validate: 'orders', full: true})" myapp
mongo --eval "db.runCommand({validate: 'users', full: true})" myapp
# Check index usage
mongo --eval "db.orders.aggregate([{\$indexStats: {}}])" myapp
# Clean up old profile data
mongo --eval "db.system.profile.deleteMany({ts: {\$lt: new Date(Date.now() - 604800000)}})" myapp
# Analyze collection statistics
mongo --eval "db.orders.stats(1024*1024)" myapp
mongo --eval "db.users.stats(1024*1024)" myapp
# Check replication oplog window
mongo --eval "rs.printReplicationInfo()" myapp
echo "Weekly maintenance completed"
Monthly Tasks
#!/bin/bash
# monthly-maintenance.sh
echo "=== MongoDB Monthly Maintenance $(date) ==="
# Full database validation
mongo --eval "db.runCommand({validate: '*', full: true, background: true})" myapp
# Reindex collections if needed (during maintenance window)
# mongo --eval "db.orders.reIndex()" myapp
# Compact collections (WiredTiger - use with caution)
# mongo --eval "db.runCommand({compact: 'logs'})" myapp
# Update statistics
mongo --eval "db.runCommand({planCacheClear: 'orders'})" myapp
mongo --eval "db.runCommand({planCacheClear: 'users'})" myapp
# Backup verification
echo "Verifying latest backup..."
./verify-backup.sh
echo "Monthly maintenance completed"
Index Maintenance
// Index maintenance and optimization
class IndexManager {
constructor(db) {
this.db = db;
}
// Analyze index usage
async analyzeIndexUsage(collection) {
const stats = await this.db
.collection(collection)
.aggregate([{ $indexStats: {} }])
.toArray();
const analysis = {
collection,
timestamp: new Date(),
indexes: stats.map((stat) => ({
name: stat.name,
key: stat.key,
accesses: stat.accesses.ops,
since: stat.accesses.since,
usageRate: stat.accesses.ops / this.getDaysSince(stat.accesses.since),
})),
};
// Identify unused indexes
analysis.unusedIndexes = analysis.indexes.filter(
(idx) => idx.name !== '_id_' && idx.accesses === 0,
);
// Identify low-usage indexes
analysis.lowUsageIndexes = analysis.indexes.filter(
(idx) => idx.name !== '_id_' && idx.usageRate < 1 && idx.accesses > 0,
);
return analysis;
}
// Recommend index optimizations
async recommendOptimizations(collection) {
const usage = await this.analyzeIndexUsage(collection);
const recommendations = [];
// Check for unused indexes
usage.unusedIndexes.forEach((idx) => {
recommendations.push({
type: 'DROP_INDEX',
index: idx.name,
reason: 'Index is not being used',
impact: 'Reduced write overhead',
});
});
// Check for redundant indexes
const redundant = this.findRedundantIndexes(usage.indexes);
redundant.forEach((pair) => {
recommendations.push({
type: 'DROP_INDEX',
index: pair.redundant.name,
reason: `Redundant with ${pair.primary.name}`,
impact: 'Reduced storage and write overhead',
});
});
// Analyze query patterns for missing indexes
const queryPatterns = await this.analyzeQueryPatterns(collection);
queryPatterns.forEach((pattern) => {
if (pattern.recommendation) {
recommendations.push(pattern.recommendation);
}
});
return recommendations;
}
// Find redundant indexes
findRedundantIndexes(indexes) {
const redundant = [];
for (let i = 0; i < indexes.length; i++) {
for (let j = i + 1; j < indexes.length; j++) {
const idx1 = indexes[i];
const idx2 = indexes[j];
if (this.isRedundant(idx1.key, idx2.key)) {
redundant.push({
primary: idx1.accesses > idx2.accesses ? idx1 : idx2,
redundant: idx1.accesses > idx2.accesses ? idx2 : idx1,
});
}
}
}
return redundant;
}
// Check if one index is redundant with another
isRedundant(key1, key2) {
const fields1 = Object.keys(key1);
const fields2 = Object.keys(key2);
// Check if key1 is a prefix of key2 or vice versa
if (fields1.length < fields2.length) {
return fields1.every(
(field, index) => fields2[index] === field && key1[field] === key2[field],
);
} else if (fields2.length < fields1.length) {
return fields2.every(
(field, index) => fields1[index] === field && key1[field] === key2[field],
);
}
return false;
}
// Analyze query patterns from profiler
async analyzeQueryPatterns(collection) {
const slowQueries = await this.db
.collection('system.profile')
.find({
ns: `${this.db.databaseName}.${collection}`,
ts: { $gte: new Date(Date.now() - 86400000) }, // Last 24 hours
millis: { $gt: 100 },
})
.toArray();
const patterns = {};
slowQueries.forEach((query) => {
const key = JSON.stringify(query.command.find || query.command.update || {});
if (!patterns[key]) {
patterns[key] = {
count: 0,
totalTime: 0,
avgTime: 0,
query: query.command,
planSummary: query.planSummary,
};
}
patterns[key].count++;
patterns[key].totalTime += query.millis;
patterns[key].avgTime = patterns[key].totalTime / patterns[key].count;
});
// Generate recommendations for patterns without good indexes
return Object.values(patterns).map((pattern) => {
if (pattern.planSummary === 'COLLSCAN' && pattern.count >= 10) {
return {
...pattern,
recommendation: {
type: 'CREATE_INDEX',
fields: this.extractIndexFields(pattern.query),
reason: `Collection scan detected for frequent query (${pattern.count} times)`,
impact: `Could improve query time from ${pattern.avgTime}ms`,
},
};
}
return pattern;
});
}
extractIndexFields(query) {
// Simple field extraction - in practice, this would be more sophisticated
const fields = {};
if (query.find) {
Object.keys(query.find).forEach((field) => {
fields[field] = 1;
});
}
if (query.sort) {
Object.keys(query.sort).forEach((field) => {
fields[field] = query.sort[field];
});
}
return fields;
}
getDaysSince(date) {
return Math.max(1, Math.floor((Date.now() - date.getTime()) / (1000 * 60 * 60 * 24)));
}
}
// Usage
const indexManager = new IndexManager(db);
// Weekly index analysis
async function weeklyIndexMaintenance() {
const collections = ['orders', 'users', 'products', 'logs'];
for (const collection of collections) {
console.log(`\n=== Analyzing indexes for ${collection} ===`);
const analysis = await indexManager.analyzeIndexUsage(collection);
console.log(`Total indexes: ${analysis.indexes.length}`);
console.log(`Unused indexes: ${analysis.unusedIndexes.length}`);
console.log(`Low usage indexes: ${analysis.lowUsageIndexes.length}`);
const recommendations = await indexManager.recommendOptimizations(collection);
if (recommendations.length > 0) {
console.log('\nRecommendations:');
recommendations.forEach((rec) => {
console.log(`- ${rec.type}: ${rec.reason}`);
});
}
}
}
Performance Tuning
// Performance analysis and tuning
class PerformanceTuner {
constructor(db) {
this.db = db;
}
// Analyze query performance
async analyzeQueryPerformance() {
const analysis = await this.db
.collection('system.profile')
.aggregate([
{
$match: {
ts: { $gte: new Date(Date.now() - 86400000) }, // Last 24 hours
millis: { $exists: true },
},
},
{
$group: {
_id: {
command: '$command',
planSummary: '$planSummary',
},
count: { $sum: 1 },
avgDuration: { $avg: '$millis' },
maxDuration: { $max: '$millis' },
totalDuration: { $sum: '$millis' },
avgDocsExamined: { $avg: '$docsExamined' },
avgDocsReturned: { $avg: '$docsReturned' },
},
},
{
$match: {
$or: [{ avgDuration: { $gt: 100 } }, { count: { $gt: 100 } }],
},
},
{ $sort: { totalDuration: -1 } },
{ $limit: 20 },
])
.toArray();
return {
timestamp: new Date(),
slowestQueries: analysis,
recommendations: this.generatePerformanceRecommendations(analysis),
};
}
generatePerformanceRecommendations(queries) {
const recommendations = [];
queries.forEach((query) => {
const efficiency = query.avgDocsReturned / Math.max(query.avgDocsExamined, 1);
if (efficiency < 0.1) {
recommendations.push({
type: 'INDEX_NEEDED',
query: query._id.command,
reason: `Low query efficiency: ${(efficiency * 100).toFixed(1)}%`,
impact: `Examined ${query.avgDocsExamined} docs to return ${query.avgDocsReturned}`,
});
}
if (query.avgDuration > 1000) {
recommendations.push({
type: 'SLOW_QUERY',
query: query._id.command,
reason: `Average duration ${query.avgDuration}ms exceeds threshold`,
frequency: query.count,
});
}
if (query._id.planSummary === 'COLLSCAN' && query.count > 50) {
recommendations.push({
type: 'COLLECTION_SCAN',
query: query._id.command,
reason: 'Frequent collection scans detected',
frequency: query.count,
});
}
});
return recommendations;
}
// Analyze connection patterns
async analyzeConnections() {
const serverStatus = await this.db.admin().serverStatus();
const connections = serverStatus.connections;
return {
current: connections.current,
available: connections.available,
totalCreated: connections.totalCreated,
utilization: ((connections.current / connections.available) * 100).toFixed(1) + '%',
recommendations: this.getConnectionRecommendations(connections),
};
}
getConnectionRecommendations(connections) {
const recommendations = [];
const utilization = connections.current / connections.available;
if (utilization > 0.8) {
recommendations.push({
type: 'HIGH_CONNECTION_USAGE',
message: 'Consider increasing connection pool size or optimizing connection usage',
});
}
if (connections.totalCreated > connections.current * 10) {
recommendations.push({
type: 'CONNECTION_CHURN',
message: 'High connection churn detected - review connection pooling configuration',
});
}
return recommendations;
}
}
Troubleshooting Common Issues
Performance Issues
// Performance troubleshooting toolkit
class PerformanceTroubleshooter {
constructor(db) {
this.db = db;
}
// Diagnose slow queries
async diagnoseSlow queries() {
console.log('=== Slow Query Diagnosis ===');
// Check if profiling is enabled
const profilingStatus = await this.db.admin().command({ profile: -1 });
console.log('Profiling level:', profilingStatus.was);
if (profilingStatus.was === 0) {
console.log('Enabling profiling for slow queries...');
await this.db.admin().command({ profile: 1, slowms: 100 });
}
// Get recent slow queries
const slowQueries = await this.db.collection('system.profile')
.find({ ts: { $gte: new Date(Date.now() - 3600000) } })
.sort({ millis: -1 })
.limit(10)
.toArray();
slowQueries.forEach((query, index) => {
console.log(`\n--- Slow Query ${index + 1} ---`);
console.log('Duration:', query.millis, 'ms');
console.log('Command:', JSON.stringify(query.command, null, 2));
console.log('Plan:', query.planSummary);
console.log('Docs examined:', query.docsExamined);
console.log('Docs returned:', query.docsReturned);
if (query.docsExamined > query.docsReturned * 10) {
console.log('⚠️ Poor query efficiency - consider adding indexes');
}
});
}
// Check replica set lag
async checkReplicationLag() {
try {
const status = await this.db.admin().command({ replSetGetStatus: 1 });
const primary = status.members.find(m => m.state === 1);
if (!primary) {
console.log('❌ No primary found in replica set');
return;
}
console.log('=== Replication Status ===');
console.log('Primary:', primary.name);
status.members.forEach(member => {
if (member.state === 2) { // Secondary
const lag = (primary.optimeDate - member.optimeDate) / 1000;
console.log(`Secondary ${member.name}: ${lag}s lag`);
if (lag > 10) {
console.log(`⚠️ High replication lag on ${member.name}`);
// Check for issues
if (member.lastHeartbeatMessage) {
console.log('Last message:', member.lastHeartbeatMessage);
}
}
}
});
} catch (error) {
console.log('Not connected to replica set or insufficient permissions');
}
}
// Memory analysis
async analyzeMemoryUsage() {
const serverStatus = await this.db.admin().serverStatus();
const mem = serverStatus.mem;
const wiredTiger = serverStatus.wiredTiger;
console.log('=== Memory Analysis ===');
console.log('Resident Memory:', mem.resident, 'MB');
console.log('Virtual Memory:', mem.virtual, 'MB');
console.log('Mapped Memory:', mem.mapped || 'N/A', 'MB');
if (wiredTiger && wiredTiger.cache) {
const cache = wiredTiger.cache;
const cacheSize = cache['maximum bytes configured'] / 1024 / 1024;
const cacheUsed = cache['bytes currently in the cache'] / 1024 / 1024;
const hitRatio = cache['cache hit ratio'] * 100;
console.log('\n--- WiredTiger Cache ---');
console.log('Max cache size:', cacheSize.toFixed(1), 'MB');
console.log('Current cache usage:', cacheUsed.toFixed(1), 'MB');
console.log('Cache hit ratio:', hitRatio.toFixed(1), '%');
if (hitRatio < 95) {
console.log('⚠️ Low cache hit ratio - consider increasing cache size');
}
if (cacheUsed / cacheSize > 0.95) {
console.log('⚠️ Cache is nearly full - consider increasing cache size');
}
}
}
// Connection analysis
async analyzeConnections() {
const serverStatus = await this.db.admin().serverStatus();
const connections = serverStatus.connections;
console.log('=== Connection Analysis ===');
console.log('Current connections:', connections.current);
console.log('Available connections:', connections.available);
console.log('Total created:', connections.totalCreated);
const utilizationPercent = (connections.current / connections.available * 100).toFixed(1);
console.log('Utilization:', utilizationPercent + '%');
if (connections.current / connections.available > 0.8) {
console.log('⚠️ High connection utilization');
}
// Check for connection leaks
const activeConnections = await this.db.admin().command({ currentOp: true });
const longRunningOps = activeConnections.inprog.filter(op =>
op.secs_running && op.secs_running > 300
);
if (longRunningOps.length > 0) {
console.log('\n--- Long Running Operations ---');
longRunningOps.forEach(op => {
console.log(`Operation running for ${op.secs_running}s:`, op.command);
});
}
}
}
// Automated health check
async function performHealthCheck(db) {
const troubleshooter = new PerformanceTroubleshooter(db);
console.log('🔍 Starting MongoDB Health Check...\n');
try {
await troubleshooter.diagnoseSlowQueries();
await troubleshooter.checkReplicationLag();
await troubleshooter.analyzeMemoryUsage();
await troubleshooter.analyzeConnections();
console.log('\n✅ Health check completed');
} catch (error) {
console.error('❌ Health check failed:', error.message);
}
}
What's Next?
You've now mastered MongoDB monitoring and maintenance practices. To complete your MongoDB journey, explore Advanced Topics to learn about expert-level features like GridFS, Change Streams, and advanced optimization techniques.
Series Navigation
- Previous: MongoDB Production Deployment
- Next: MongoDB Advanced Topics
- Hub: MongoDB Zero to Hero - Complete Guide
This is Part 9 of the MongoDB Zero to Hero series. Effective monitoring and maintenance are crucial for keeping MongoDB running smoothly in production environments.
Enjoyed this post?
Subscribe to get notified about new posts and updates. No spam, unsubscribe anytime.
By subscribing, you agree to our Privacy Policy. You can unsubscribe at any time.
Discussion (0)
This website is still under development. If you encounter any issues, please contact me