<!-- TITLE: Alert Rules List --> <!-- SUBTITLE: A quick summary of Alert Rules List --> # Simple Alert Rules for checking main resources ```yaml groups: - name: CPU rules: - alert: High-CPU-Usage expr: avg by (mode, instance) (100 - (irate(node_cpu_seconds_total{job="Node_Exporter",mode="idle" }[10m])*100)) > 95 for: 2h labels: severity: info annotations: summary: 'CPU usage is very High!' description: '{{$labels.env}}: CPU usage on stage {{ $labels.instance }} is OVER 95%. Current value is {{ $value | humanize }}%.' - name: RAM rules: - alert: Memory-Out-Of-Space expr: 100 * (1 - ((avg_over_time(node_memory_MemFree_bytes{job="Node_Exporter"}[10m]) + avg_over_time(node_memory_Cached_bytes{job="Node_Exporter"}[10m]) + avg_over_time(node_memory_Buffers_bytes{job="Node_Exporter"}[10m])) / avg_over_time(node_memory_MemTotal_bytes{job="Node_Exporter"}[10m]))) > 90 for: 2h labels: severity: info annotations: summary: 'Free RAM bytes is out of space!' description: '{{$labels.env}}: RAM on stage {{ $labels.instance }} is out of space. Current value is {{ $value | humanize }}%.' - name: HDD rules: - alert: Low-Root-Disk-Space expr: ((node_filesystem_size_bytes{mountpoint="/",job="Node_Exporter"} - node_filesystem_avail_bytes{mountpoint="/",job="Node_Exporter"})/node_filesystem_size_bytes{mountpoint="/",job="Node_Exporter"}) * 100 > 90 for: 10m labels: severity: info annotations: summary: 'Free space on disk is low!' description: '{{$labels.env}}: Disk on stage {{ $labels.instance }} is OVER 90%. Current value is {{ $value | humanize }}%.' - name: SSL rules: - alert: SSL-Expired expr: (ssl_cert_not_after{job="SSL_Expire_Check"} - time()) < 86400*7 for: 8h labels: severity: info annotations: summary: 'SSL expire alert!' description: "SSL certificate for the {{ $labels.instance }} will expired after {{ $value | humanizeDuration }} days!" - name: HTTP rules: - alert: HTTP-Check expr: probe_success{job="blackbox"} == 0 for: 5m labels: severity: info annotations: summary: 'Stage not Responding' description: "{{$labels.env}}: Stage {{ $labels.instance }} not responding!! Check connection!" ```