246 lines
7.1 KiB
Nix
246 lines
7.1 KiB
Nix
{ config, pkgs, lib, ... }:
|
|
let
|
|
# PHP-FPM exporter wrapper to handle the semicolon-in-URI escaping issue
|
|
phpfpmExporterScript = pkgs.writeShellScript "phpfpm-exporter-wrapper" ''
|
|
exec ${pkgs.prometheus-php-fpm-exporter}/bin/php-fpm-exporter server \
|
|
--phpfpm.scrape-uri 'unix:///run/phpfpm/mediawiki.sock;/fpm-status' \
|
|
--web.listen-address ':9253'
|
|
'';
|
|
in
|
|
{
|
|
services.prometheus = {
|
|
enable = true;
|
|
port = 9090;
|
|
listenAddress = "127.0.0.1";
|
|
|
|
retentionTime = "90d";
|
|
extraFlags = [
|
|
"--storage.tsdb.max-block-duration=2h"
|
|
"--storage.tsdb.retention.size=5GB"
|
|
];
|
|
|
|
globalConfig = {
|
|
scrape_interval = "15s";
|
|
evaluation_interval = "15s";
|
|
};
|
|
|
|
scrapeConfigs = [
|
|
# ── Primary system metrics ──
|
|
{
|
|
job_name = "node";
|
|
static_configs = [{
|
|
targets = [ "localhost:9100" ];
|
|
labels = { instance = "wiki"; };
|
|
}];
|
|
}
|
|
|
|
# ── Replica system metrics (over Tailscale) ──
|
|
{
|
|
job_name = "node-replica";
|
|
static_configs = [{
|
|
targets = [ "wiki-replica:9100" ];
|
|
labels = { instance = "wiki-replica"; };
|
|
}];
|
|
}
|
|
|
|
# ── Primary MariaDB ──
|
|
# Queries/s, connections, buffer pool hit ratio, slow queries,
|
|
# binlog position, table locks, InnoDB row operations
|
|
{
|
|
job_name = "mysqld";
|
|
static_configs = [{
|
|
targets = [ "localhost:9104" ];
|
|
labels = { instance = "wiki"; };
|
|
}];
|
|
}
|
|
|
|
# ── Replica MariaDB (over Tailscale) ──
|
|
# Replication lag (Seconds_Behind_Master), IO/SQL thread status,
|
|
# relay log position, read-only query volume
|
|
{
|
|
job_name = "mysqld-replica";
|
|
static_configs = [{
|
|
targets = [ "wiki-replica:9104" ];
|
|
labels = { instance = "wiki-replica"; };
|
|
}];
|
|
}
|
|
|
|
# ── Primary Caddy ──
|
|
# Requests/s by status code (2xx, 3xx, 4xx, 5xx), response latency
|
|
# histograms, active connections, bytes in/out
|
|
{
|
|
job_name = "caddy";
|
|
static_configs = [{
|
|
targets = [ "localhost:2019" ];
|
|
labels = { instance = "wiki"; };
|
|
}];
|
|
}
|
|
|
|
# ── Replica Caddy (over Tailscale) ──
|
|
{
|
|
job_name = "caddy-replica";
|
|
static_configs = [{
|
|
targets = [ "wiki-replica:2019" ];
|
|
labels = { instance = "wiki-replica"; };
|
|
}];
|
|
}
|
|
|
|
# ── Primary PHP-FPM ──
|
|
# Active/idle/total workers, accepted connections, request duration,
|
|
# slow requests, max_children reached count
|
|
{
|
|
job_name = "phpfpm";
|
|
static_configs = [{
|
|
targets = [ "localhost:9253" ];
|
|
labels = { instance = "wiki"; };
|
|
}];
|
|
}
|
|
|
|
# ── Primary memcached ──
|
|
# Hit rate, miss rate, evictions, current items, bytes used/limit,
|
|
# connections, get/set/delete rates
|
|
{
|
|
job_name = "memcached";
|
|
static_configs = [{
|
|
targets = [ "localhost:9150" ];
|
|
labels = { instance = "wiki"; };
|
|
}];
|
|
}
|
|
|
|
# ── Replica memcached (over Tailscale) ──
|
|
{
|
|
job_name = "memcached-replica";
|
|
static_configs = [{
|
|
targets = [ "wiki-replica:9150" ];
|
|
labels = { instance = "wiki-replica"; };
|
|
}];
|
|
}
|
|
|
|
# ── Blackbox HTTP probes ──
|
|
# End-to-end: DNS resolution time, TCP connect, TLS handshake,
|
|
# HTTP response time, status code, TLS cert expiry
|
|
{
|
|
job_name = "blackbox-http";
|
|
metrics_path = "/probe";
|
|
params = { module = [ "http_2xx" ]; };
|
|
static_configs = [{
|
|
targets = [
|
|
# Primary wiki
|
|
"https://www.noisebridge.net"
|
|
"https://www.noisebridge.net/wiki/Main_Page"
|
|
"https://www.noisebridge.net/health"
|
|
# Replica wiki
|
|
"https://readonly.noisebridge.net"
|
|
"https://readonly.noisebridge.net/wiki/Main_Page"
|
|
"https://readonly.noisebridge.net/health"
|
|
# Grafana
|
|
"https://grafana.noisebridge.net"
|
|
];
|
|
}];
|
|
relabel_configs = [
|
|
{
|
|
source_labels = [ "__address__" ];
|
|
target_label = "__param_target";
|
|
}
|
|
{
|
|
source_labels = [ "__param_target" ];
|
|
target_label = "instance";
|
|
}
|
|
{
|
|
target_label = "__address__";
|
|
replacement = "localhost:9115";
|
|
}
|
|
];
|
|
}
|
|
|
|
# ── Grafana internal metrics ──
|
|
{
|
|
job_name = "grafana";
|
|
static_configs = [{
|
|
targets = [ "localhost:3000" ];
|
|
}];
|
|
}
|
|
];
|
|
};
|
|
|
|
# ── Node exporter ──
|
|
# System-level: CPU, RAM, disk I/O, filesystem usage, network traffic,
|
|
# systemd unit states, plus custom textfile metrics from the backup script
|
|
services.prometheus.exporters.node = {
|
|
enable = true;
|
|
port = 9100;
|
|
enabledCollectors = [
|
|
"cpu"
|
|
"diskstats"
|
|
"filesystem"
|
|
"loadavg"
|
|
"meminfo"
|
|
"netdev"
|
|
"stat"
|
|
"time"
|
|
"vmstat"
|
|
"systemd"
|
|
"textfile"
|
|
];
|
|
extraFlags = [
|
|
"--collector.textfile.directory=/var/lib/prometheus-node-exporter/textfile"
|
|
];
|
|
};
|
|
|
|
# ── Blackbox exporter ──
|
|
# Makes actual HTTP requests and reports: probe success/failure, response
|
|
# time broken into phases (DNS, connect, TLS, processing, transfer),
|
|
# HTTP status code, TLS certificate expiry date
|
|
services.prometheus.exporters.blackbox = {
|
|
enable = true;
|
|
port = 9115;
|
|
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON {
|
|
modules = {
|
|
http_2xx = {
|
|
prober = "http";
|
|
timeout = "10s";
|
|
http = {
|
|
valid_http_versions = [ "HTTP/1.1" "HTTP/2.0" ];
|
|
valid_status_codes = [ 200 ];
|
|
method = "GET";
|
|
follow_redirects = true;
|
|
preferred_ip_protocol = "ip4";
|
|
};
|
|
};
|
|
};
|
|
});
|
|
};
|
|
|
|
# ── Memcached exporter ──
|
|
# Exposes: cmd_get, cmd_set, get_hits, get_misses (→ hit ratio),
|
|
# evictions, curr_items, bytes (used), limit_maxbytes,
|
|
# curr_connections, total_connections
|
|
services.prometheus.exporters.memcached = {
|
|
enable = true;
|
|
port = 9150;
|
|
extraFlags = [ "--memcached.address=localhost:11211" ];
|
|
};
|
|
|
|
# ── PHP-FPM exporter ──
|
|
# Exposes: active_processes, idle_processes, total_processes,
|
|
# accepted_conn, listen_queue, max_listen_queue,
|
|
# slow_requests, max_children_reached
|
|
# Uses a wrapper script to handle the semicolon in the scrape URI
|
|
systemd.services.prometheus-phpfpm-exporter = {
|
|
description = "Prometheus PHP-FPM exporter";
|
|
after = [ "phpfpm-mediawiki.service" ];
|
|
wantedBy = [ "multi-user.target" ];
|
|
serviceConfig = {
|
|
ExecStart = phpfpmExporterScript;
|
|
User = "mediawiki";
|
|
Group = "mediawiki";
|
|
Restart = "always";
|
|
RestartSec = "5s";
|
|
};
|
|
};
|
|
|
|
# Textfile collector directory for backup and sync metrics
|
|
systemd.tmpfiles.rules = [
|
|
"d /var/lib/prometheus-node-exporter/textfile 0755 root root -"
|
|
];
|
|
}
|