feat: init
Some checks failed
CI / check (push) Has been cancelled
CI / deploy (push) Has been cancelled

This commit is contained in:
Jet 2026-03-17 04:07:14 -07:00
commit 8cfede9f57
No known key found for this signature in database
28 changed files with 2129 additions and 0 deletions

View file

@ -0,0 +1,246 @@
{ config, pkgs, lib, ... }:
let
# PHP-FPM exporter wrapper to handle the semicolon-in-URI escaping issue
phpfpmExporterScript = pkgs.writeShellScript "phpfpm-exporter-wrapper" ''
exec ${pkgs.prometheus-php-fpm-exporter}/bin/php-fpm-exporter server \
--phpfpm.scrape-uri 'unix:///run/phpfpm/mediawiki.sock;/fpm-status' \
--web.listen-address ':9253'
'';
in
{
services.prometheus = {
enable = true;
port = 9090;
listenAddress = "127.0.0.1";
retentionTime = "90d";
extraFlags = [
"--storage.tsdb.max-block-duration=2h"
"--storage.tsdb.retention.size=5GB"
];
globalConfig = {
scrape_interval = "15s";
evaluation_interval = "15s";
};
scrapeConfigs = [
# ── Primary system metrics ──
{
job_name = "node";
static_configs = [{
targets = [ "localhost:9100" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica system metrics (over Tailscale) ──
{
job_name = "node-replica";
static_configs = [{
targets = [ "wiki-replica:9100" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Primary MariaDB ──
# Queries/s, connections, buffer pool hit ratio, slow queries,
# binlog position, table locks, InnoDB row operations
{
job_name = "mysqld";
static_configs = [{
targets = [ "localhost:9104" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica MariaDB (over Tailscale) ──
# Replication lag (Seconds_Behind_Master), IO/SQL thread status,
# relay log position, read-only query volume
{
job_name = "mysqld-replica";
static_configs = [{
targets = [ "wiki-replica:9104" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Primary Caddy ──
# Requests/s by status code (2xx, 3xx, 4xx, 5xx), response latency
# histograms, active connections, bytes in/out
{
job_name = "caddy";
static_configs = [{
targets = [ "localhost:2019" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica Caddy (over Tailscale) ──
{
job_name = "caddy-replica";
static_configs = [{
targets = [ "wiki-replica:2019" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Primary PHP-FPM ──
# Active/idle/total workers, accepted connections, request duration,
# slow requests, max_children reached count
{
job_name = "phpfpm";
static_configs = [{
targets = [ "localhost:9253" ];
labels = { instance = "wiki"; };
}];
}
# ── Primary memcached ──
# Hit rate, miss rate, evictions, current items, bytes used/limit,
# connections, get/set/delete rates
{
job_name = "memcached";
static_configs = [{
targets = [ "localhost:9150" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica memcached (over Tailscale) ──
{
job_name = "memcached-replica";
static_configs = [{
targets = [ "wiki-replica:9150" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Blackbox HTTP probes ──
# End-to-end: DNS resolution time, TCP connect, TLS handshake,
# HTTP response time, status code, TLS cert expiry
{
job_name = "blackbox-http";
metrics_path = "/probe";
params = { module = [ "http_2xx" ]; };
static_configs = [{
targets = [
# Primary wiki
"https://www.noisebridge.net"
"https://www.noisebridge.net/wiki/Main_Page"
"https://www.noisebridge.net/health"
# Replica wiki
"https://readonly.noisebridge.net"
"https://readonly.noisebridge.net/wiki/Main_Page"
"https://readonly.noisebridge.net/health"
# Grafana
"https://grafana.noisebridge.net"
];
}];
relabel_configs = [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
target_label = "__address__";
replacement = "localhost:9115";
}
];
}
# ── Grafana internal metrics ──
{
job_name = "grafana";
static_configs = [{
targets = [ "localhost:3000" ];
}];
}
];
};
# ── Node exporter ──
# System-level: CPU, RAM, disk I/O, filesystem usage, network traffic,
# systemd unit states, plus custom textfile metrics from the backup script
services.prometheus.exporters.node = {
enable = true;
port = 9100;
enabledCollectors = [
"cpu"
"diskstats"
"filesystem"
"loadavg"
"meminfo"
"netdev"
"stat"
"time"
"vmstat"
"systemd"
"textfile"
];
extraFlags = [
"--collector.textfile.directory=/var/lib/prometheus-node-exporter/textfile"
];
};
# ── Blackbox exporter ──
# Makes actual HTTP requests and reports: probe success/failure, response
# time broken into phases (DNS, connect, TLS, processing, transfer),
# HTTP status code, TLS certificate expiry date
services.prometheus.exporters.blackbox = {
enable = true;
port = 9115;
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON {
modules = {
http_2xx = {
prober = "http";
timeout = "10s";
http = {
valid_http_versions = [ "HTTP/1.1" "HTTP/2.0" ];
valid_status_codes = [ 200 ];
method = "GET";
follow_redirects = true;
preferred_ip_protocol = "ip4";
};
};
};
});
};
# ── Memcached exporter ──
# Exposes: cmd_get, cmd_set, get_hits, get_misses (→ hit ratio),
# evictions, curr_items, bytes (used), limit_maxbytes,
# curr_connections, total_connections
services.prometheus.exporters.memcached = {
enable = true;
port = 9150;
extraFlags = [ "--memcached.address=localhost:11211" ];
};
# ── PHP-FPM exporter ──
# Exposes: active_processes, idle_processes, total_processes,
# accepted_conn, listen_queue, max_listen_queue,
# slow_requests, max_children_reached
# Uses a wrapper script to handle the semicolon in the scrape URI
systemd.services.prometheus-phpfpm-exporter = {
description = "Prometheus PHP-FPM exporter";
after = [ "phpfpm-mediawiki.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = phpfpmExporterScript;
User = "mediawiki";
Group = "mediawiki";
Restart = "always";
RestartSec = "5s";
};
};
# Textfile collector directory for backup and sync metrics
systemd.tmpfiles.rules = [
"d /var/lib/prometheus-node-exporter/textfile 0755 root root -"
];
}