feat: init
This commit is contained in:
commit
8cfede9f57
28 changed files with 2129 additions and 0 deletions
246
modules/wiki-primary/prometheus.nix
Normal file
246
modules/wiki-primary/prometheus.nix
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
{ config, pkgs, lib, ... }:
|
||||
let
|
||||
# PHP-FPM exporter wrapper to handle the semicolon-in-URI escaping issue
|
||||
phpfpmExporterScript = pkgs.writeShellScript "phpfpm-exporter-wrapper" ''
|
||||
exec ${pkgs.prometheus-php-fpm-exporter}/bin/php-fpm-exporter server \
|
||||
--phpfpm.scrape-uri 'unix:///run/phpfpm/mediawiki.sock;/fpm-status' \
|
||||
--web.listen-address ':9253'
|
||||
'';
|
||||
in
|
||||
{
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
port = 9090;
|
||||
listenAddress = "127.0.0.1";
|
||||
|
||||
retentionTime = "90d";
|
||||
extraFlags = [
|
||||
"--storage.tsdb.max-block-duration=2h"
|
||||
"--storage.tsdb.retention.size=5GB"
|
||||
];
|
||||
|
||||
globalConfig = {
|
||||
scrape_interval = "15s";
|
||||
evaluation_interval = "15s";
|
||||
};
|
||||
|
||||
scrapeConfigs = [
|
||||
# ── Primary system metrics ──
|
||||
{
|
||||
job_name = "node";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:9100" ];
|
||||
labels = { instance = "wiki"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Replica system metrics (over Tailscale) ──
|
||||
{
|
||||
job_name = "node-replica";
|
||||
static_configs = [{
|
||||
targets = [ "wiki-replica:9100" ];
|
||||
labels = { instance = "wiki-replica"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Primary MariaDB ──
|
||||
# Queries/s, connections, buffer pool hit ratio, slow queries,
|
||||
# binlog position, table locks, InnoDB row operations
|
||||
{
|
||||
job_name = "mysqld";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:9104" ];
|
||||
labels = { instance = "wiki"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Replica MariaDB (over Tailscale) ──
|
||||
# Replication lag (Seconds_Behind_Master), IO/SQL thread status,
|
||||
# relay log position, read-only query volume
|
||||
{
|
||||
job_name = "mysqld-replica";
|
||||
static_configs = [{
|
||||
targets = [ "wiki-replica:9104" ];
|
||||
labels = { instance = "wiki-replica"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Primary Caddy ──
|
||||
# Requests/s by status code (2xx, 3xx, 4xx, 5xx), response latency
|
||||
# histograms, active connections, bytes in/out
|
||||
{
|
||||
job_name = "caddy";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:2019" ];
|
||||
labels = { instance = "wiki"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Replica Caddy (over Tailscale) ──
|
||||
{
|
||||
job_name = "caddy-replica";
|
||||
static_configs = [{
|
||||
targets = [ "wiki-replica:2019" ];
|
||||
labels = { instance = "wiki-replica"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Primary PHP-FPM ──
|
||||
# Active/idle/total workers, accepted connections, request duration,
|
||||
# slow requests, max_children reached count
|
||||
{
|
||||
job_name = "phpfpm";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:9253" ];
|
||||
labels = { instance = "wiki"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Primary memcached ──
|
||||
# Hit rate, miss rate, evictions, current items, bytes used/limit,
|
||||
# connections, get/set/delete rates
|
||||
{
|
||||
job_name = "memcached";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:9150" ];
|
||||
labels = { instance = "wiki"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Replica memcached (over Tailscale) ──
|
||||
{
|
||||
job_name = "memcached-replica";
|
||||
static_configs = [{
|
||||
targets = [ "wiki-replica:9150" ];
|
||||
labels = { instance = "wiki-replica"; };
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Blackbox HTTP probes ──
|
||||
# End-to-end: DNS resolution time, TCP connect, TLS handshake,
|
||||
# HTTP response time, status code, TLS cert expiry
|
||||
{
|
||||
job_name = "blackbox-http";
|
||||
metrics_path = "/probe";
|
||||
params = { module = [ "http_2xx" ]; };
|
||||
static_configs = [{
|
||||
targets = [
|
||||
# Primary wiki
|
||||
"https://www.noisebridge.net"
|
||||
"https://www.noisebridge.net/wiki/Main_Page"
|
||||
"https://www.noisebridge.net/health"
|
||||
# Replica wiki
|
||||
"https://readonly.noisebridge.net"
|
||||
"https://readonly.noisebridge.net/wiki/Main_Page"
|
||||
"https://readonly.noisebridge.net/health"
|
||||
# Grafana
|
||||
"https://grafana.noisebridge.net"
|
||||
];
|
||||
}];
|
||||
relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__address__" ];
|
||||
target_label = "__param_target";
|
||||
}
|
||||
{
|
||||
source_labels = [ "__param_target" ];
|
||||
target_label = "instance";
|
||||
}
|
||||
{
|
||||
target_label = "__address__";
|
||||
replacement = "localhost:9115";
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
# ── Grafana internal metrics ──
|
||||
{
|
||||
job_name = "grafana";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:3000" ];
|
||||
}];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# ── Node exporter ──
|
||||
# System-level: CPU, RAM, disk I/O, filesystem usage, network traffic,
|
||||
# systemd unit states, plus custom textfile metrics from the backup script
|
||||
services.prometheus.exporters.node = {
|
||||
enable = true;
|
||||
port = 9100;
|
||||
enabledCollectors = [
|
||||
"cpu"
|
||||
"diskstats"
|
||||
"filesystem"
|
||||
"loadavg"
|
||||
"meminfo"
|
||||
"netdev"
|
||||
"stat"
|
||||
"time"
|
||||
"vmstat"
|
||||
"systemd"
|
||||
"textfile"
|
||||
];
|
||||
extraFlags = [
|
||||
"--collector.textfile.directory=/var/lib/prometheus-node-exporter/textfile"
|
||||
];
|
||||
};
|
||||
|
||||
# ── Blackbox exporter ──
|
||||
# Makes actual HTTP requests and reports: probe success/failure, response
|
||||
# time broken into phases (DNS, connect, TLS, processing, transfer),
|
||||
# HTTP status code, TLS certificate expiry date
|
||||
services.prometheus.exporters.blackbox = {
|
||||
enable = true;
|
||||
port = 9115;
|
||||
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON {
|
||||
modules = {
|
||||
http_2xx = {
|
||||
prober = "http";
|
||||
timeout = "10s";
|
||||
http = {
|
||||
valid_http_versions = [ "HTTP/1.1" "HTTP/2.0" ];
|
||||
valid_status_codes = [ 200 ];
|
||||
method = "GET";
|
||||
follow_redirects = true;
|
||||
preferred_ip_protocol = "ip4";
|
||||
};
|
||||
};
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
# ── Memcached exporter ──
|
||||
# Exposes: cmd_get, cmd_set, get_hits, get_misses (→ hit ratio),
|
||||
# evictions, curr_items, bytes (used), limit_maxbytes,
|
||||
# curr_connections, total_connections
|
||||
services.prometheus.exporters.memcached = {
|
||||
enable = true;
|
||||
port = 9150;
|
||||
extraFlags = [ "--memcached.address=localhost:11211" ];
|
||||
};
|
||||
|
||||
# ── PHP-FPM exporter ──
|
||||
# Exposes: active_processes, idle_processes, total_processes,
|
||||
# accepted_conn, listen_queue, max_listen_queue,
|
||||
# slow_requests, max_children_reached
|
||||
# Uses a wrapper script to handle the semicolon in the scrape URI
|
||||
systemd.services.prometheus-phpfpm-exporter = {
|
||||
description = "Prometheus PHP-FPM exporter";
|
||||
after = [ "phpfpm-mediawiki.service" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = phpfpmExporterScript;
|
||||
User = "mediawiki";
|
||||
Group = "mediawiki";
|
||||
Restart = "always";
|
||||
RestartSec = "5s";
|
||||
};
|
||||
};
|
||||
|
||||
# Textfile collector directory for backup and sync metrics
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/lib/prometheus-node-exporter/textfile 0755 root root -"
|
||||
];
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue