feat: init
Some checks failed
CI / check (push) Has been cancelled
CI / deploy (push) Has been cancelled

This commit is contained in:
Jet 2026-03-17 04:07:14 -07:00
commit 8cfede9f57
No known key found for this signature in database
28 changed files with 2129 additions and 0 deletions

View file

@ -0,0 +1,291 @@
{ config, pkgs, lib, ... }:
{
services.prometheus = {
alertmanagers = [{
static_configs = [{
targets = [ "localhost:9093" ];
}];
}];
rules = [
(builtins.toJSON {
groups = [
{
name = "wiki-availability";
rules = [
{
alert = "WikiDown";
expr = ''probe_success{job="blackbox-http",instance=~".*www.noisebridge.net.*"} == 0'';
"for" = "2m";
labels.severity = "critical";
annotations = {
summary = "Primary wiki is unreachable";
description = "{{ $labels.instance }} has been down for more than 2 minutes.";
};
}
{
alert = "ReplicaDown";
expr = ''probe_success{job="blackbox-http",instance=~".*readonly.noisebridge.net.*"} == 0'';
"for" = "5m";
labels.severity = "warning";
annotations = {
summary = "Replica wiki is unreachable";
description = "{{ $labels.instance }} has been down for more than 5 minutes.";
};
}
{
alert = "HighErrorRate";
expr = ''sum(rate(caddy_http_responses_total{code=~"5.."}[5m])) by (instance) / sum(rate(caddy_http_responses_total[5m])) by (instance) > 0.05'';
"for" = "5m";
labels.severity = "critical";
annotations = {
summary = "High HTTP 5xx error rate on {{ $labels.instance }}";
description = "More than 5% of requests are returning server errors.";
};
}
{
alert = "HighLatency";
expr = ''histogram_quantile(0.95, sum(rate(caddy_http_request_duration_seconds_bucket[5m])) by (le, instance)) > 2'';
"for" = "5m";
labels.severity = "warning";
annotations = {
summary = "High p95 latency on {{ $labels.instance }}";
description = "95th percentile response time is {{ $value | humanizeDuration }}.";
};
}
{
alert = "TLSCertExpiringSoon";
expr = ''probe_ssl_earliest_cert_expiry{job="blackbox-http"} - time() < 7 * 86400'';
"for" = "1h";
labels.severity = "warning";
annotations = {
summary = "TLS certificate expiring within 7 days";
description = "Certificate for {{ $labels.instance }} expires in {{ $value | humanizeDuration }}.";
};
}
];
}
{
name = "wiki-infrastructure";
rules = [
{
alert = "DiskFull";
expr = ''(node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.15'';
"for" = "5m";
labels.severity = "warning";
annotations = {
summary = "Disk usage above 85% on {{ $labels.instance }}";
description = "Root filesystem is {{ $value | humanizePercentage }} free.";
};
}
{
alert = "DiskCritical";
expr = ''(node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.05'';
"for" = "2m";
labels.severity = "critical";
annotations = {
summary = "Disk almost full on {{ $labels.instance }}";
description = "Root filesystem is {{ $value | humanizePercentage }} free. Immediate action required.";
};
}
{
alert = "HighMemoryUsage";
expr = ''(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) > 0.9'';
"for" = "5m";
labels.severity = "warning";
annotations = {
summary = "Memory usage above 90% on {{ $labels.instance }}";
description = "Available memory is {{ $value | humanizePercentage }} of total.";
};
}
{
alert = "HighCPU";
expr = ''1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) > 0.85'';
"for" = "10m";
labels.severity = "warning";
annotations = {
summary = "Sustained high CPU on {{ $labels.instance }}";
description = "CPU usage has been above 85% for 10 minutes.";
};
}
{
alert = "SystemdUnitFailed";
expr = ''node_systemd_unit_state{state="failed"} == 1'';
"for" = "5m";
labels.severity = "warning";
annotations = {
summary = "Systemd unit failed on {{ $labels.instance }}";
description = "Unit {{ $labels.name }} is in failed state.";
};
}
];
}
{
name = "wiki-database";
rules = [
{
alert = "ReplicationBroken";
expr = ''mysql_slave_status_slave_io_running{instance="wiki-replica"} == 0 or mysql_slave_status_slave_sql_running{instance="wiki-replica"} == 0'';
"for" = "2m";
labels.severity = "critical";
annotations = {
summary = "MySQL replication thread stopped";
description = "Replication IO or SQL thread is not running on the replica.";
};
}
{
alert = "ReplicationLagging";
expr = ''mysql_slave_status_seconds_behind_master{instance="wiki-replica"} > 300'';
"for" = "5m";
labels.severity = "warning";
annotations = {
summary = "MySQL replication lagging";
description = "Replica is {{ $value }}s behind the primary.";
};
}
{
alert = "MySQLConnectionsExhausted";
expr = ''mysql_global_status_threads_connected / mysql_global_variables_max_connections > 0.8'';
"for" = "5m";
labels.severity = "warning";
annotations = {
summary = "MySQL connections above 80% on {{ $labels.instance }}";
description = "{{ $value | humanizePercentage }} of max connections in use.";
};
}
{
alert = "MySQLSlowQueries";
expr = ''rate(mysql_global_status_slow_queries[5m]) > 0.1'';
"for" = "10m";
labels.severity = "warning";
annotations = {
summary = "Elevated slow queries on {{ $labels.instance }}";
description = "{{ $value }} slow queries per second over the last 5 minutes.";
};
}
];
}
{
name = "wiki-application";
rules = [
{
alert = "PHPFPMExhausted";
expr = ''phpfpm_active_processes >= phpfpm_total_processes'';
"for" = "1m";
labels.severity = "warning";
annotations = {
summary = "PHP-FPM workers exhausted";
description = "All PHP-FPM workers are active requests may be queuing.";
};
}
{
alert = "PHPFPMDown";
expr = ''up{job="phpfpm"} == 0'';
"for" = "1m";
labels.severity = "critical";
annotations = {
summary = "PHP-FPM exporter is down";
description = "Cannot scrape PHP-FPM metrics the PHP-FPM process may be dead.";
};
}
{
alert = "MemcachedDown";
expr = ''up{job=~"memcached.*"} == 0'';
"for" = "2m";
labels.severity = "critical";
annotations = {
summary = "Memcached is down on {{ $labels.instance }}";
description = "The memcached exporter is unreachable. MediaWiki will fall back to database queries and be slow.";
};
}
{
alert = "MemcachedEvictions";
expr = ''rate(memcached_items_evicted_total[5m]) > 10'';
"for" = "10m";
labels.severity = "warning";
annotations = {
summary = "High memcached eviction rate on {{ $labels.instance }}";
description = "{{ $value }} evictions/sec cache is too small, consider increasing maxMemory.";
};
}
{
alert = "MemcachedHitRateLow";
expr = ''rate(memcached_commands_total{command="get",status="hit"}[5m]) / rate(memcached_commands_total{command="get"}[5m]) < 0.8'';
"for" = "15m";
labels.severity = "warning";
annotations = {
summary = "Low memcached hit rate on {{ $labels.instance }}";
description = "Cache hit rate is {{ $value | humanizePercentage }}. Pages may be slow.";
};
}
];
}
{
name = "wiki-backups";
rules = [
{
alert = "BackupStale";
expr = ''(time() - backup_latest_timestamp_seconds) > 86400'';
"for" = "1h";
labels.severity = "warning";
annotations = {
summary = "Wiki backup is stale";
description = "Last successful backup was more than 24 hours ago.";
};
}
{
alert = "BackupFailed";
expr = ''backup_b2_sync_success != 1'';
"for" = "10m";
labels.severity = "critical";
annotations = {
summary = "B2 backup sync failed";
description = "The last rclone sync to Backblaze B2 did not succeed.";
};
}
{
alert = "ImageSyncStale";
expr = ''(time() - imagesync_latest_timestamp_seconds) > 7200'';
"for" = "30m";
labels.severity = "warning";
annotations = {
summary = "Image sync to replica is stale";
description = "Last successful image sync was more than 2 hours ago. Replica may have broken image links.";
};
}
];
}
];
})
];
};
services.prometheus.alertmanager = {
enable = true;
port = 9093;
listenAddress = "127.0.0.1";
configuration = {
route = {
receiver = "discord";
group_by = [ "alertname" "instance" ];
group_wait = "30s";
group_interval = "5m";
repeat_interval = "4h";
};
receivers = [
{
name = "discord";
webhook_configs = [{
url_file = config.age.secrets.discord-webhook.path;
}];
}
];
};
};
age.secrets.discord-webhook = {
file = ../../secrets/discord-webhook.age;
owner = "alertmanager";
group = "alertmanager";
};
}

View file

@ -0,0 +1,97 @@
{ config, pkgs, lib, ... }:
let
backupScript = pkgs.writeShellScript "wiki-backup" ''
set -euo pipefail
BACKUP_DIR="/var/backups/mysql"
TEXTFILE_DIR="/var/lib/prometheus-node-exporter/textfile"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
# Cleanup old local backups (keep 7 days)
find "$BACKUP_DIR" -maxdepth 1 -type d -mtime +7 -exec rm -rf {} + 2>/dev/null || true
# Dump database with mydumper
echo "Starting database dump..."
${pkgs.mydumper}/bin/mydumper \
--database noisebridge_mediawiki \
--outputdir "$BACKUP_DIR/$TIMESTAMP" \
--threads 2 \
--compress \
--routines \
--triggers \
--events \
--logfile "$BACKUP_DIR/mydumper.log"
# Sync to Backblaze B2
echo "Syncing to Backblaze B2..."
export RCLONE_CONFIG_B2_TYPE=b2
export RCLONE_CONFIG_B2_ACCOUNT=$(cat ${config.age.secrets.b2-credentials.path} | ${pkgs.jq}/bin/jq -r .keyID)
export RCLONE_CONFIG_B2_KEY=$(cat ${config.age.secrets.b2-credentials.path} | ${pkgs.jq}/bin/jq -r .applicationKey)
SYNC_SUCCESS=0
${pkgs.rclone}/bin/rclone sync "$BACKUP_DIR" b2:noisebridge-wiki-backup/mysql/ \
--transfers 4 \
--checkers 8 \
--b2-hard-delete \
&& SYNC_SUCCESS=1
# Sync uploaded images to B2
${pkgs.rclone}/bin/rclone sync /var/lib/mediawiki/images/ b2:noisebridge-wiki-backup/images/ \
--transfers 4 \
--checkers 8 \
|| SYNC_SUCCESS=0
# Back up Tor hidden service keys (losing these = losing the .onion address)
${pkgs.rclone}/bin/rclone sync /var/lib/tor/onion/ b2:noisebridge-wiki-backup/tor-keys/ \
--transfers 1 \
|| true
# Write metrics for Prometheus textfile collector (no leading whitespace!)
cat > "$TEXTFILE_DIR/backup.prom" <<'METRICS'
# HELP backup_latest_timestamp_seconds Unix timestamp of latest backup
# TYPE backup_latest_timestamp_seconds gauge
METRICS
echo "backup_latest_timestamp_seconds $(date +%s)" >> "$TEXTFILE_DIR/backup.prom"
cat >> "$TEXTFILE_DIR/backup.prom" <<METRICS
# HELP backup_b2_sync_success Whether the last B2 sync succeeded (1=success, 0=failure)
# TYPE backup_b2_sync_success gauge
backup_b2_sync_success $SYNC_SUCCESS
METRICS
echo "Backup complete."
'';
in
{
systemd.services.wiki-backup = {
description = "Wiki database and image backup to Backblaze B2";
after = [ "mysql.service" ];
serviceConfig = {
Type = "oneshot";
ExecStart = backupScript;
User = "root";
IOSchedulingClass = "idle";
CPUSchedulingPolicy = "idle";
};
};
systemd.timers.wiki-backup = {
description = "Daily wiki backup timer";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*-*-* 04:00:00";
Persistent = true;
RandomizedDelaySec = "15m";
};
};
systemd.tmpfiles.rules = [
"d /var/backups/mysql 0750 root root -"
];
age.secrets.b2-credentials = {
file = ../../secrets/b2-credentials.age;
owner = "root";
group = "root";
mode = "0400";
};
}

View file

@ -0,0 +1,162 @@
{ config, pkgs, lib, ... }:
{
services.caddy = {
enable = true;
package = pkgs.caddy-custom;
globalConfig = ''
order rate_limit before basicauth
servers {
# Trust Cloudflare's edge IPs so {client_ip} resolves to the real visitor
trusted_proxies static 173.245.48.0/20 103.21.244.0/22 103.22.200.0/22 103.31.4.0/22 141.101.64.0/18 108.162.192.0/18 190.93.240.0/20 188.114.96.0/20 197.234.240.0/22 198.41.128.0/17 162.158.0.0/15 104.16.0.0/13 104.24.0.0/14 172.64.0.0/13 131.0.72.0/22 2400:cb00::/32 2606:4700::/32 2803:f800::/32 2405:b500::/32 2405:8100::/32 2a06:98c0::/29 2c0f:f248::/32
metrics
}
'';
virtualHosts = {
"www.noisebridge.net" = {
extraConfig = ''
# Health check endpoint
handle /health {
respond "ok" 200
}
# Bot blocking
@bots header_regexp User-Agent "(?i)(ClaudeBot|GPTBot|CCBot|Bytespider|AhrefsBot|SemrushBot|MJ12bot|DotBot|PetalBot|Amazonbot|anthropic-ai|ChatGPT-User|cohere-ai|FacebookBot|Google-Extended|PerplexityBot)"
respond @bots 403
# robots.txt
handle /robots.txt {
respond "User-agent: ClaudeBot
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: *
Allow: /
Sitemap: https://www.noisebridge.net/sitemap.xml
"
}
# Rate limiting for anonymous users (no session cookie)
# {client_ip} works with or without a reverse proxy in front
@anon {
not header_regexp Cookie "nb_wiki_session="
}
rate_limit @anon {
zone anon_zone {
key {client_ip}
events 60
window 1m
}
}
# Cache headers: anon gets public caching, logged-in gets private
@logged_in {
header_regexp Cookie "nb_wiki_session="
}
header @anon Cache-Control "public, max-age=7200"
header @logged_in Cache-Control "private, no-cache"
# Security headers
header {
Strict-Transport-Security "max-age=63072000; includeSubDomains; preload"
X-Content-Type-Options "nosniff"
X-Frame-Options "SAMEORIGIN"
Referrer-Policy "strict-origin-when-cross-origin"
}
# Proxy to PHP-FPM
php_fastcgi unix//run/phpfpm/mediawiki.sock {
root ${config.services.mediawiki.finalPackage}/share/mediawiki
}
file_server {
root ${config.services.mediawiki.finalPackage}/share/mediawiki
}
'';
};
"grafana.noisebridge.net" = {
extraConfig = ''
reverse_proxy localhost:3000
'';
};
# Domain redirects
"noisebridge.net" = {
extraConfig = ''
redir https://www.noisebridge.net{uri} permanent
'';
};
"noisebridge.com" = {
extraConfig = ''
redir https://www.noisebridge.net{uri} permanent
'';
};
"noisebridge.org" = {
extraConfig = ''
redir https://www.noisebridge.net{uri} permanent
'';
};
"noisebridge.io" = {
extraConfig = ''
redir https://www.noisebridge.net{uri} permanent
'';
};
# ── Tor .onion vhost ──
# Tor daemon forwards port 80 → localhost:8080. Caddy listens here
# with HTTP only (no TLS — .onion v3 is already end-to-end encrypted).
#
# Differences from the clearnet vhost:
# - No IP-based rate limiting (all Tor traffic arrives from 127.0.0.1)
# - No HSTS (no TLS to enforce)
# - No Cache-Control: public (no CDN to cache at)
# - Bot blocking by User-Agent still works
":8080" = {
extraConfig = ''
# Bot blocking (same list as clearnet)
@bots header_regexp User-Agent "(?i)(ClaudeBot|GPTBot|CCBot|Bytespider|AhrefsBot|SemrushBot|MJ12bot|DotBot|PetalBot|Amazonbot|anthropic-ai|ChatGPT-User|cohere-ai|FacebookBot|Google-Extended|PerplexityBot)"
respond @bots 403
# robots.txt — block everything on .onion (no reason for bots to index)
handle /robots.txt {
respond "User-agent: *
Disallow: /
"
}
# Security headers (no HSTS — no TLS over .onion)
header {
X-Content-Type-Options "nosniff"
X-Frame-Options "SAMEORIGIN"
Referrer-Policy "no-referrer"
X-Wiki-Access "tor"
}
php_fastcgi unix//run/phpfpm/mediawiki.sock {
root ${config.services.mediawiki.finalPackage}/share/mediawiki
}
file_server {
root ${config.services.mediawiki.finalPackage}/share/mediawiki
}
'';
};
};
};
# Port 8080 is only for local Tor daemon — not public
# (firewall already blocks it since it's not in allowedTCPPorts)
}

View file

@ -0,0 +1,41 @@
{ config, pkgs, lib, ... }:
{
services.grafana = {
enable = true;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = 3000;
domain = "grafana.noisebridge.net";
root_url = "https://grafana.noisebridge.net";
};
security = {
admin_user = "admin";
admin_password = "$__file{${config.age.secrets.grafana-admin.path}}";
disable_gravatar = true;
};
analytics.reporting_enabled = false;
"auth.anonymous".enabled = false;
users.allow_sign_up = false;
};
provision = {
enable = true;
datasources.settings.datasources = [
{
name = "Prometheus";
type = "prometheus";
url = "http://127.0.0.1:9090";
isDefault = true;
editable = false;
}
];
};
};
age.secrets.grafana-admin = {
file = ../../secrets/grafana-admin.age;
owner = "grafana";
group = "grafana";
};
}

View file

@ -0,0 +1,123 @@
{ config, pkgs, lib, ... }:
{
services.mediawiki.extraConfig = lib.mkAfter ''
# ----- Invite-only accounts -----
$wgGroupPermissions['*']['createaccount'] = false;
$wgGroupPermissions['bureaucrat']['createaccount'] = true;
# ----- File cache for anonymous readers -----
$wgUseFileCache = true;
$wgFileCacheDirectory = "/var/cache/mediawiki";
$wgShowIPinHeader = false;
# ----- Rate limit exemption for logged-in users -----
$wgGroupPermissions['user']['noratelimit'] = true;
# ----- Email -----
$wgEnableEmail = true;
$wgEnableUserEmail = true;
$wgEmergencyContact = "wiki@noisebridge.net";
$wgPasswordSender = "wiki@noisebridge.net";
# Mail sent via local Postfix, which relays through m3.noisebridge.net
# ----- ReCaptcha (login brute-force only) -----
wfLoadExtension( 'ConfirmEdit/ReCaptchaNoCaptcha' );
$wgReCaptchaSiteKey = '6Le_REPLACE_SITE_KEY';
$wgReCaptchaSecretKey = trim(file_get_contents('${config.age.secrets.mediawiki-recaptcha.path}'));
$wgCaptchaTriggers['badlogin'] = true;
$wgCaptchaTriggers['createaccount'] = false;
$wgCaptchaTriggers['edit'] = false;
$wgCaptchaTriggers['create'] = false;
'';
# PHP-FPM: static pool for maximum performance
# Use individual mkForce to override defaults without clobbering
# required settings (listen, user, group) set by the mediawiki module
services.phpfpm.pools.mediawiki.settings = {
"pm" = lib.mkForce "static";
"pm.max_children" = lib.mkForce 30;
"pm.max_requests" = lib.mkForce 500;
"request_terminate_timeout" = lib.mkForce "30s";
"catch_workers_output" = lib.mkForce true;
"pm.status_path" = "/fpm-status";
# OPcache
"php_admin_value[opcache.enable]" = 1;
"php_admin_value[opcache.memory_consumption]" = 256;
"php_admin_value[opcache.max_accelerated_files]" = 10000;
"php_admin_value[opcache.revalidate_freq]" = 60;
"php_admin_value[opcache.jit]" = 1255;
"php_admin_value[opcache.jit_buffer_size]" = "64M";
# Memory & execution
"php_admin_value[memory_limit]" = "256M";
"php_admin_value[max_execution_time]" = 30;
"php_admin_value[upload_max_filesize]" = "10M";
"php_admin_value[post_max_size]" = "12M";
};
# File cache directory
systemd.tmpfiles.rules = [
"d /var/cache/mediawiki 0755 mediawiki mediawiki -"
];
# MediaWiki job runner (since wgJobRunRate=0)
systemd.services.mediawiki-jobrunner = {
description = "MediaWiki job runner";
after = [ "mysql.service" "phpfpm-mediawiki.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "simple";
User = "mediawiki";
Group = "mediawiki";
ExecStart = "${pkgs.php}/bin/php ${config.services.mediawiki.finalPackage}/share/mediawiki/maintenance/runJobs.php --wait --maxjobs=10";
Restart = "always";
RestartSec = "30s";
};
};
# Sync uploaded images to replica over Tailscale (hourly)
# Writes textfile metrics so Prometheus can alert on stale syncs
systemd.services.wiki-image-sync = {
description = "Sync wiki images to replica";
after = [ "tailscale-autoconnect.service" ];
path = [ pkgs.rsync ];
serviceConfig = {
Type = "oneshot";
User = "root";
};
script = ''
TEXTFILE_DIR="/var/lib/prometheus-node-exporter/textfile"
if rsync -az --delete /var/lib/mediawiki/images/ wiki-replica:/var/lib/mediawiki/images/; then
SYNC_OK=1
else
SYNC_OK=0
fi
cat > "$TEXTFILE_DIR/imagesync.prom" <<EOF
# HELP imagesync_latest_timestamp_seconds Unix timestamp of latest image sync attempt
# TYPE imagesync_latest_timestamp_seconds gauge
imagesync_latest_timestamp_seconds $(date +%s)
# HELP imagesync_success Whether the last image sync succeeded (1=success, 0=failure)
# TYPE imagesync_success gauge
imagesync_success $SYNC_OK
EOF
'';
};
systemd.timers.wiki-image-sync = {
description = "Hourly wiki image sync to replica";
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "hourly";
Persistent = true;
};
};
age.secrets.mediawiki-recaptcha = {
file = ../../secrets/mediawiki-recaptcha.age;
owner = "mediawiki";
group = "mediawiki";
};
}

View file

@ -0,0 +1,92 @@
{ config, pkgs, lib, ... }:
{
services.mysql = {
enable = true;
package = pkgs.mariadb;
dataDir = "/var/lib/mysql";
settings.mysqld = {
bind-address = "0.0.0.0";
# InnoDB
innodb_buffer_pool_size = "512M";
innodb_log_file_size = "128M";
innodb_flush_log_at_trx_commit = 1;
innodb_file_per_table = 1;
# GTID replication (required for MASTER_AUTO_POSITION)
server-id = 1;
log_bin = "mysql-bin";
binlog_format = "ROW";
sync_binlog = 1;
expire_logs_days = 7;
binlog_do_db = "noisebridge_mediawiki";
gtid_strict_mode = 1;
# Performance
max_connections = 100;
tmp_table_size = "64M";
max_heap_table_size = "64M";
table_open_cache = 400;
sort_buffer_size = "2M";
read_buffer_size = "2M";
# Character set
character-set-server = "binary";
collation-server = "binary";
};
ensureDatabases = [ "noisebridge_mediawiki" ];
ensureUsers = [
{
name = "wiki";
ensurePermissions = {
"noisebridge_mediawiki.*" = "ALL PRIVILEGES";
};
}
{
name = "repl";
ensurePermissions = {
"*.*" = "REPLICATION SLAVE, REPLICATION CLIENT";
};
}
{
name = "mysqld_exporter";
ensurePermissions = {
"*.*" = "PROCESS, REPLICATION CLIENT, SELECT";
};
}
];
};
# Set repl user password (ensureUsers creates with no password / socket auth,
# but the replica connects over TCP and needs a password)
systemd.services.mysql-repl-password = {
description = "Set MySQL replication user password";
after = [ "mysql.service" ];
requires = [ "mysql.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
REPL_PASS=$(cat ${config.age.secrets.mysql-replication.path})
${pkgs.mariadb}/bin/mysql -u root -e \
"ALTER USER 'repl'@'%' IDENTIFIED BY '$REPL_PASS';"
'';
};
# mysqld exporter for Prometheus
services.prometheus.exporters.mysqld = {
enable = true;
port = 9104;
runAsLocalSuperUser = true;
};
age.secrets.mysql-replication = {
file = ../../secrets/mysql-replication.age;
owner = "mysql";
group = "mysql";
};
}

View file

@ -0,0 +1,15 @@
{ config, pkgs, lib, ... }:
{
services.postfix = {
enable = true;
hostname = "wiki.noisebridge.net";
origin = "noisebridge.net";
relayHost = "m3"; # Tailscale hostname for existing Noisebridge mail server
destination = []; # Don't accept mail for local delivery
networks = [ "127.0.0.0/8" "[::1]/128" ];
config = {
inet_interfaces = "loopback-only";
smtp_tls_security_level = "may";
};
};
}

View file

@ -0,0 +1,246 @@
{ config, pkgs, lib, ... }:
let
# PHP-FPM exporter wrapper to handle the semicolon-in-URI escaping issue
phpfpmExporterScript = pkgs.writeShellScript "phpfpm-exporter-wrapper" ''
exec ${pkgs.prometheus-php-fpm-exporter}/bin/php-fpm-exporter server \
--phpfpm.scrape-uri 'unix:///run/phpfpm/mediawiki.sock;/fpm-status' \
--web.listen-address ':9253'
'';
in
{
services.prometheus = {
enable = true;
port = 9090;
listenAddress = "127.0.0.1";
retentionTime = "90d";
extraFlags = [
"--storage.tsdb.max-block-duration=2h"
"--storage.tsdb.retention.size=5GB"
];
globalConfig = {
scrape_interval = "15s";
evaluation_interval = "15s";
};
scrapeConfigs = [
# ── Primary system metrics ──
{
job_name = "node";
static_configs = [{
targets = [ "localhost:9100" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica system metrics (over Tailscale) ──
{
job_name = "node-replica";
static_configs = [{
targets = [ "wiki-replica:9100" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Primary MariaDB ──
# Queries/s, connections, buffer pool hit ratio, slow queries,
# binlog position, table locks, InnoDB row operations
{
job_name = "mysqld";
static_configs = [{
targets = [ "localhost:9104" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica MariaDB (over Tailscale) ──
# Replication lag (Seconds_Behind_Master), IO/SQL thread status,
# relay log position, read-only query volume
{
job_name = "mysqld-replica";
static_configs = [{
targets = [ "wiki-replica:9104" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Primary Caddy ──
# Requests/s by status code (2xx, 3xx, 4xx, 5xx), response latency
# histograms, active connections, bytes in/out
{
job_name = "caddy";
static_configs = [{
targets = [ "localhost:2019" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica Caddy (over Tailscale) ──
{
job_name = "caddy-replica";
static_configs = [{
targets = [ "wiki-replica:2019" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Primary PHP-FPM ──
# Active/idle/total workers, accepted connections, request duration,
# slow requests, max_children reached count
{
job_name = "phpfpm";
static_configs = [{
targets = [ "localhost:9253" ];
labels = { instance = "wiki"; };
}];
}
# ── Primary memcached ──
# Hit rate, miss rate, evictions, current items, bytes used/limit,
# connections, get/set/delete rates
{
job_name = "memcached";
static_configs = [{
targets = [ "localhost:9150" ];
labels = { instance = "wiki"; };
}];
}
# ── Replica memcached (over Tailscale) ──
{
job_name = "memcached-replica";
static_configs = [{
targets = [ "wiki-replica:9150" ];
labels = { instance = "wiki-replica"; };
}];
}
# ── Blackbox HTTP probes ──
# End-to-end: DNS resolution time, TCP connect, TLS handshake,
# HTTP response time, status code, TLS cert expiry
{
job_name = "blackbox-http";
metrics_path = "/probe";
params = { module = [ "http_2xx" ]; };
static_configs = [{
targets = [
# Primary wiki
"https://www.noisebridge.net"
"https://www.noisebridge.net/wiki/Main_Page"
"https://www.noisebridge.net/health"
# Replica wiki
"https://readonly.noisebridge.net"
"https://readonly.noisebridge.net/wiki/Main_Page"
"https://readonly.noisebridge.net/health"
# Grafana
"https://grafana.noisebridge.net"
];
}];
relabel_configs = [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
target_label = "__address__";
replacement = "localhost:9115";
}
];
}
# ── Grafana internal metrics ──
{
job_name = "grafana";
static_configs = [{
targets = [ "localhost:3000" ];
}];
}
];
};
# ── Node exporter ──
# System-level: CPU, RAM, disk I/O, filesystem usage, network traffic,
# systemd unit states, plus custom textfile metrics from the backup script
services.prometheus.exporters.node = {
enable = true;
port = 9100;
enabledCollectors = [
"cpu"
"diskstats"
"filesystem"
"loadavg"
"meminfo"
"netdev"
"stat"
"time"
"vmstat"
"systemd"
"textfile"
];
extraFlags = [
"--collector.textfile.directory=/var/lib/prometheus-node-exporter/textfile"
];
};
# ── Blackbox exporter ──
# Makes actual HTTP requests and reports: probe success/failure, response
# time broken into phases (DNS, connect, TLS, processing, transfer),
# HTTP status code, TLS certificate expiry date
services.prometheus.exporters.blackbox = {
enable = true;
port = 9115;
configFile = pkgs.writeText "blackbox.yml" (builtins.toJSON {
modules = {
http_2xx = {
prober = "http";
timeout = "10s";
http = {
valid_http_versions = [ "HTTP/1.1" "HTTP/2.0" ];
valid_status_codes = [ 200 ];
method = "GET";
follow_redirects = true;
preferred_ip_protocol = "ip4";
};
};
};
});
};
# ── Memcached exporter ──
# Exposes: cmd_get, cmd_set, get_hits, get_misses (→ hit ratio),
# evictions, curr_items, bytes (used), limit_maxbytes,
# curr_connections, total_connections
services.prometheus.exporters.memcached = {
enable = true;
port = 9150;
extraFlags = [ "--memcached.address=localhost:11211" ];
};
# ── PHP-FPM exporter ──
# Exposes: active_processes, idle_processes, total_processes,
# accepted_conn, listen_queue, max_listen_queue,
# slow_requests, max_children_reached
# Uses a wrapper script to handle the semicolon in the scrape URI
systemd.services.prometheus-phpfpm-exporter = {
description = "Prometheus PHP-FPM exporter";
after = [ "phpfpm-mediawiki.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = phpfpmExporterScript;
User = "mediawiki";
Group = "mediawiki";
Restart = "always";
RestartSec = "5s";
};
};
# Textfile collector directory for backup and sync metrics
systemd.tmpfiles.rules = [
"d /var/lib/prometheus-node-exporter/textfile 0755 root root -"
];
}