commit 8cfede9f5729044daf12ddb616b351c09054d774 Author: Jet Date: Tue Mar 17 04:07:14 2026 -0700 feat: init diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..129bcf9 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,11 @@ +## What + + + +## Why + + + +## Testing + + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..733ec09 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,61 @@ +name: CI + +on: + pull_request: + push: + branches: [main] + +jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: DeterminateSystems/nix-installer-action@main + - uses: cachix/cachix-action@v15 + with: + name: noisebridge-wiki + authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} + + - name: nix flake check + run: nix flake check + + - name: Build wiki + run: nix build .#nixosConfigurations.wiki.config.system.build.toplevel + + - name: Build wiki-replica + run: nix build .#nixosConfigurations.wiki-replica.config.system.build.toplevel + + deploy: + needs: check + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + runs-on: ubuntu-latest + concurrency: + group: deploy + cancel-in-progress: false + steps: + - uses: actions/checkout@v4 + - uses: DeterminateSystems/nix-installer-action@main + - uses: cachix/cachix-action@v15 + with: + name: noisebridge-wiki + authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} + + - name: Connect to Tailscale + uses: tailscale/github-action@v2 + with: + oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }} + oauth-secret: ${{ secrets.TS_OAUTH_SECRET }} + tags: tag:ci + + - name: Configure SSH + run: | + mkdir -p ~/.ssh + echo "${{ secrets.DEPLOY_SSH_KEY }}" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 + ssh-keyscan -t ed25519 wiki wiki-replica >> ~/.ssh/known_hosts 2>/dev/null + + - name: Deploy wiki + run: nix run .#deploy -- .#wiki -- --ssh-opts="-o ConnectTimeout=30" + + - name: Deploy wiki-replica + run: nix run .#deploy -- .#wiki-replica -- --ssh-opts="-o ConnectTimeout=30" diff --git a/.github/workflows/update-flake.yml b/.github/workflows/update-flake.yml new file mode 100644 index 0000000..ed94a71 --- /dev/null +++ b/.github/workflows/update-flake.yml @@ -0,0 +1,17 @@ +name: Update flake.lock + +on: + schedule: + - cron: "0 6 * * *" # 6am UTC daily + workflow_dispatch: + +jobs: + update: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: DeterminateSystems/nix-installer-action@main + - uses: DeterminateSystems/update-flake-lock@main + with: + pr-title: "chore: update flake.lock" + pr-labels: dependencies diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..59aaa3d --- /dev/null +++ b/flake.nix @@ -0,0 +1,110 @@ +{ + description = "Noisebridge Wiki — Standalone NixOS Infrastructure"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + agenix = { + url = "github:ryantm/agenix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + deploy-rs = { + url = "github:serokell/deploy-rs"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + }; + + outputs = { self, nixpkgs, agenix, deploy-rs, ... }: + let + system = "x86_64-linux"; + pkgs = nixpkgs.legacyPackages.${system}; + in + { + overlays.default = import ./overlays/caddy.nix; + + nixosConfigurations.wiki = nixpkgs.lib.nixosSystem { + inherit system; + specialArgs = { inherit agenix; }; + modules = [ + { nixpkgs.overlays = [ self.overlays.default ]; } + agenix.nixosModules.default + ./hosts/wiki + ./modules/common.nix + ./modules/tailscale.nix + ./modules/security.nix + ./modules/users.nix + ./modules/tor.nix + ./modules/mediawiki-base.nix + ./modules/wiki-primary/mediawiki.nix + ./modules/wiki-primary/mysql.nix + ./modules/wiki-primary/caddy.nix + ./modules/wiki-primary/prometheus.nix + ./modules/wiki-primary/alerting.nix + ./modules/wiki-primary/grafana.nix + ./modules/wiki-primary/backup.nix + ./modules/wiki-primary/postfix.nix + ]; + }; + + nixosConfigurations.wiki-replica = nixpkgs.lib.nixosSystem { + inherit system; + specialArgs = { inherit agenix; }; + modules = [ + { nixpkgs.overlays = [ self.overlays.default ]; } + agenix.nixosModules.default + ./hosts/wiki-replica + ./modules/common.nix + ./modules/tailscale.nix + ./modules/security.nix + ./modules/users.nix + ./modules/tor.nix + ./modules/mediawiki-base.nix + ./modules/wiki-replica/mediawiki.nix + ./modules/wiki-replica/mysql.nix + ./modules/wiki-replica/caddy.nix + ]; + }; + + deploy.nodes = { + wiki = { + hostname = "wiki"; # Tailscale hostname + profiles.system = { + user = "root"; + sshUser = "root"; + path = deploy-rs.lib.${system}.activate.nixos + self.nixosConfigurations.wiki; + }; + }; + wiki-replica = { + hostname = "wiki-replica"; # Tailscale hostname + profiles.system = { + user = "root"; + sshUser = "root"; + path = deploy-rs.lib.${system}.activate.nixos + self.nixosConfigurations.wiki-replica; + }; + }; + }; + + checks = builtins.mapAttrs + (system: deployLib: deployLib.deployChecks self.deploy) + deploy-rs.lib; + + apps.${system}.deploy = { + type = "app"; + program = "${deploy-rs.packages.${system}.default}/bin/deploy"; + }; + + devShells.${system}.default = pkgs.mkShell { + packages = with pkgs; [ + deploy-rs.packages.${system}.default + agenix.packages.${system}.default + mariadb.client + rclone + curl + jq + hey + mydumper + ]; + }; + }; +} diff --git a/hosts/wiki-replica/default.nix b/hosts/wiki-replica/default.nix new file mode 100644 index 0000000..783db4b --- /dev/null +++ b/hosts/wiki-replica/default.nix @@ -0,0 +1,16 @@ +{ config, pkgs, lib, ... }: +{ + imports = [ + ./hardware-configuration.nix + ]; + + boot.loader.systemd-boot.enable = true; + boot.loader.efi.canTouchEfiVariables = true; + + networking.hostName = "wiki-replica"; + networking.domain = "noisebridge.net"; + + networking.useDHCP = true; + + system.stateVersion = "24.11"; +} diff --git a/hosts/wiki-replica/hardware-configuration.nix b/hosts/wiki-replica/hardware-configuration.nix new file mode 100644 index 0000000..aa353e3 --- /dev/null +++ b/hosts/wiki-replica/hardware-configuration.nix @@ -0,0 +1,15 @@ +# Replace with output of `nixos-generate-config --show-hardware-config` +# after installing on the actual VPS. +{ config, lib, modulesPath, ... }: +{ + imports = [ + (modulesPath + "/profiles/qemu-guest.nix") + ]; + + boot.initrd.availableKernelModules = [ + "virtio_pci" + "virtio_scsi" + "ahci" + "sd_mod" + ]; +} diff --git a/hosts/wiki/default.nix b/hosts/wiki/default.nix new file mode 100644 index 0000000..3fc460f --- /dev/null +++ b/hosts/wiki/default.nix @@ -0,0 +1,21 @@ +{ config, pkgs, lib, ... }: +{ + imports = [ + ./hardware-configuration.nix + ]; + + boot.loader.systemd-boot.enable = true; + boot.loader.efi.canTouchEfiVariables = true; + + networking.hostName = "wiki"; + networking.domain = "noisebridge.net"; + + # VPS typically uses DHCP — override with static IP if needed + networking.useDHCP = true; + # networking.interfaces.ens3 = { + # ipv4.addresses = [{ address = "TODO"; prefixLength = 24; }]; + # }; + # networking.defaultGateway = { address = "TODO"; interface = "ens3"; }; + + system.stateVersion = "24.11"; +} diff --git a/hosts/wiki/hardware-configuration.nix b/hosts/wiki/hardware-configuration.nix new file mode 100644 index 0000000..aa353e3 --- /dev/null +++ b/hosts/wiki/hardware-configuration.nix @@ -0,0 +1,15 @@ +# Replace with output of `nixos-generate-config --show-hardware-config` +# after installing on the actual VPS. +{ config, lib, modulesPath, ... }: +{ + imports = [ + (modulesPath + "/profiles/qemu-guest.nix") + ]; + + boot.initrd.availableKernelModules = [ + "virtio_pci" + "virtio_scsi" + "ahci" + "sd_mod" + ]; +} diff --git a/modules/common.nix b/modules/common.nix new file mode 100644 index 0000000..776dfbb --- /dev/null +++ b/modules/common.nix @@ -0,0 +1,31 @@ +{ config, pkgs, ... }: +{ + nix = { + settings = { + auto-optimise-store = true; + experimental-features = [ "nix-command" "flakes" ]; + trusted-users = [ "root" "@wheel" ]; + }; + gc = { + automatic = true; + dates = "weekly"; + options = "--delete-older-than 30d"; + }; + }; + + time.timeZone = "US/Pacific"; + i18n.defaultLocale = "en_US.UTF-8"; + services.timesyncd.enable = true; + + environment.systemPackages = with pkgs; [ + vim + git + htop + tmux + curl + wget + jq + dig + tcpdump + ]; +} diff --git a/modules/mediawiki-base.nix b/modules/mediawiki-base.nix new file mode 100644 index 0000000..29537c6 --- /dev/null +++ b/modules/mediawiki-base.nix @@ -0,0 +1,179 @@ +{ config, pkgs, lib, ... }: +{ + services.memcached = { + enable = true; + maxMemory = 256; + listen = "127.0.0.1"; + port = 11211; + }; + + services.mediawiki = { + enable = true; + name = "Noisebridge"; + url = "https://www.noisebridge.net"; + passwordFile = config.age.secrets.mediawiki-secret-key.path; + + database = { + type = "mysql"; + name = "noisebridge_mediawiki"; + user = "wiki"; + passwordFile = config.age.secrets.mysql-mediawiki.path; + socket = "/run/mysqld/mysqld.sock"; + createLocally = false; + }; + + extensions = { + # Bundled extensions + CiteThisPage = null; + Cite = null; + ConfirmEdit = null; + Gadgets = null; + ImageMap = null; + InputBox = null; + Interwiki = null; + LocalisationUpdate = null; + Nuke = null; + ParserFunctions = null; + PdfHandler = null; + Poem = null; + Renameuser = null; + SpamBlacklist = null; + SyntaxHighlight_GeSHi = null; + TitleBlacklist = null; + WikiEditor = null; + CategoryTree = null; + CodeEditor = null; + VisualEditor = null; + Scribunto = null; + TemplateData = null; + TextExtracts = null; + PageImages = null; + Popups = null; + MultimediaViewer = null; + Math = null; + ReplaceText = null; + SecureLinkFixer = null; + }; + + skins = { + Vector = null; + }; + + extraConfig = '' + # ----- Branding & URLs ----- + $wgMetaNamespace = "Noisebridge"; + $wgSitename = "Noisebridge"; + $wgServer = "https://www.noisebridge.net"; + $wgScriptPath = ""; + $wgArticlePath = "/wiki/$1"; + $wgUsePathInfo = true; + $wgLogo = "$wgResourceBasePath/resources/assets/noisebridge-logo.png"; + + # ----- Skin: Vector 2022 only ----- + $wgDefaultSkin = "vector-2022"; + $wgVectorDefaultSkinVersion = "2"; + $wgSkipSkins = [ "cologneblue", "monobook", "modern", "timeless" ]; + + # ----- Locale & License ----- + $wgLanguageCode = "en"; + $wgLocaltimezone = "America/Los_Angeles"; + $wgRightsPage = ""; + $wgRightsUrl = "https://creativecommons.org/licenses/by-sa/4.0/"; + $wgRightsText = "Creative Commons Attribution-ShareAlike"; + $wgRightsIcon = "$wgResourceBasePath/resources/assets/licenses/cc-by-sa.png"; + + # ----- Database ----- + $wgDBtype = "mysql"; + $wgDBTableOptions = "ENGINE=InnoDB, DEFAULT CHARSET=binary"; + + # ----- Memcached ----- + $wgMainCacheType = CACHE_MEMCACHED; + $wgMemCachedServers = [ "127.0.0.1:11211" ]; + $wgSessionCacheType = CACHE_MEMCACHED; + $wgMessageCacheType = CACHE_MEMCACHED; + $wgParserCacheType = CACHE_MEMCACHED; + + # ----- Permissions ----- + # Anonymous users can read but not edit + $wgGroupPermissions['*']['edit'] = false; + $wgGroupPermissions['*']['createpage'] = false; + $wgGroupPermissions['*']['createtalk'] = false; + $wgGroupPermissions['*']['writeapi'] = false; + + # Registered users can edit after autoconfirm + $wgGroupPermissions['user']['edit'] = true; + $wgGroupPermissions['user']['createpage'] = true; + $wgGroupPermissions['user']['createtalk'] = true; + $wgGroupPermissions['user']['writeapi'] = true; + $wgGroupPermissions['user']['upload'] = true; + $wgGroupPermissions['user']['reupload'] = true; + $wgGroupPermissions['user']['move'] = true; + + # Autoconfirm: 5 edits + 3 days + email confirmed + $wgAutoConfirmAge = 3 * 86400; + $wgAutoConfirmCount = 5; + $wgEmailConfirmToEdit = true; + + # ----- Uploads ----- + $wgEnableUploads = true; + $wgFileExtensions = array_merge( + $wgFileExtensions, + [ "pdf", "svg", "png", "gif", "jpg", "jpeg", "webp" ] + ); + $wgMaxUploadSize = 10 * 1024 * 1024; // 10MB + $wgUseImageMagick = true; + $wgImageMagickConvertCommand = "${pkgs.imagemagick}/bin/convert"; + + # ----- Foreign file repos (Wikimedia Commons) ----- + $wgUseInstantCommons = true; + + # ----- Cookie prefix (must match Caddy session detection) ----- + $wgCookiePrefix = "nb_wiki"; + + # ----- Performance ----- + $wgUseGzip = true; + $wgDiff3 = "${pkgs.diffutils}/bin/diff3"; + $wgJobRunRate = 0; // jobs handled by maintenance script + $wgResourceLoaderMaxage = [ + 'versioned' => 30 * 86400, + 'unversioned' => 300, + ]; + + # ----- Extension configs ----- + # Scribunto (Lua templating) + $wgScribuntoDefaultEngine = "luastandalone"; + $wgScribuntoEngineConf['luastandalone']['luaPath'] = "${pkgs.lua5_4}/bin/lua"; + + # VisualEditor + $wgVisualEditorEnableWikitext = true; + $wgDefaultUserOptions['visualeditor-enable'] = 1; + + # ParserFunctions + $wgPFEnableStringFunctions = true; + + # SpamBlacklist + $wgSpamBlacklistFiles = [ + "https://meta.wikimedia.org/w/index.php?title=Spam_blacklist&action=raw&sb_ver=1", + ]; + + # SyntaxHighlight + $wgSyntaxHighlightModels['nix'] = 'nix'; + + # Popups (Page Previews) + $wgPopupsHideOptInOnPreferencesPage = true; + $wgPopupsOptInDefaultState = "1"; + ''; + }; + + age.secrets.mediawiki-secret-key = { + file = ../secrets/mediawiki-secret-key.age; + owner = "mediawiki"; + group = "mediawiki"; + }; + + age.secrets.mysql-mediawiki = { + file = ../secrets/mysql-mediawiki.age; + owner = "mediawiki"; + group = "mediawiki"; + }; +} diff --git a/modules/security.nix b/modules/security.nix new file mode 100644 index 0000000..b1be95e --- /dev/null +++ b/modules/security.nix @@ -0,0 +1,121 @@ +{ config, pkgs, ... }: +{ + networking.firewall = { + enable = true; + # SSH is NOT public — only accessible via Tailscale (trustedInterfaces) + allowedTCPPorts = [ + 80 # HTTP (Caddy ACME + redirect) + 443 # HTTPS + ]; + logReversePathDrops = true; + + # Kernel-level DDoS protection via iptables + # These rules fire BEFORE Caddy even sees the packet, so they're very cheap. + extraCommands = '' + # ── SYN flood protection ── + # Limit new TCP connections to 30/sec per source IP (burst 50). + # Legitimate browsers open ~6 connections; scrapers open hundreds. + iptables -N RATE_LIMIT 2>/dev/null || iptables -F RATE_LIMIT + iptables -A RATE_LIMIT -m hashlimit \ + --hashlimit-name syn_flood \ + --hashlimit-above 30/sec \ + --hashlimit-burst 50 \ + --hashlimit-mode srcip \ + --hashlimit-htable-expire 300000 \ + -j DROP + iptables -A RATE_LIMIT -j RETURN + + # Hook into INPUT chain for new TCP SYN packets to HTTP/HTTPS + iptables -C INPUT -p tcp --syn -m multiport --dports 80,443 -j RATE_LIMIT 2>/dev/null || \ + iptables -I INPUT -p tcp --syn -m multiport --dports 80,443 -j RATE_LIMIT + + # ── Connection limit ── + # Max 200 concurrent connections per source IP to HTTP/HTTPS. + # A single browser uses ~6-10; a scraper farm uses thousands. + iptables -C INPUT -p tcp -m multiport --dports 80,443 -m connlimit --connlimit-above 200 --connlimit-mask 32 -j DROP 2>/dev/null || \ + iptables -I INPUT -p tcp -m multiport --dports 80,443 -m connlimit --connlimit-above 200 --connlimit-mask 32 -j DROP + + # ── Same for IPv6 ── + ip6tables -N RATE_LIMIT 2>/dev/null || ip6tables -F RATE_LIMIT + ip6tables -A RATE_LIMIT -m hashlimit \ + --hashlimit-name syn_flood_v6 \ + --hashlimit-above 30/sec \ + --hashlimit-burst 50 \ + --hashlimit-mode srcip \ + --hashlimit-htable-expire 300000 \ + -j DROP + ip6tables -A RATE_LIMIT -j RETURN + + ip6tables -C INPUT -p tcp --syn -m multiport --dports 80,443 -j RATE_LIMIT 2>/dev/null || \ + ip6tables -I INPUT -p tcp --syn -m multiport --dports 80,443 -j RATE_LIMIT + + ip6tables -C INPUT -p tcp -m multiport --dports 80,443 -m connlimit --connlimit-above 200 --connlimit-mask 64 -j DROP 2>/dev/null || \ + ip6tables -I INPUT -p tcp -m multiport --dports 80,443 -m connlimit --connlimit-above 200 --connlimit-mask 64 -j DROP + ''; + + # Clean up custom chains on stop + extraStopCommands = '' + iptables -D INPUT -p tcp --syn -m multiport --dports 80,443 -j RATE_LIMIT 2>/dev/null || true + iptables -D INPUT -p tcp -m multiport --dports 80,443 -m connlimit --connlimit-above 200 --connlimit-mask 32 -j DROP 2>/dev/null || true + iptables -F RATE_LIMIT 2>/dev/null || true + iptables -X RATE_LIMIT 2>/dev/null || true + + ip6tables -D INPUT -p tcp --syn -m multiport --dports 80,443 -j RATE_LIMIT 2>/dev/null || true + ip6tables -D INPUT -p tcp -m multiport --dports 80,443 -m connlimit --connlimit-above 200 --connlimit-mask 64 -j DROP 2>/dev/null || true + ip6tables -F RATE_LIMIT 2>/dev/null || true + ip6tables -X RATE_LIMIT 2>/dev/null || true + ''; + }; + + services.openssh = { + enable = true; + settings = { + PasswordAuthentication = false; + KbdInteractiveAuthentication = false; + PermitRootLogin = "prohibit-password"; + X11Forwarding = false; + MaxAuthTries = 3; + }; + # Do NOT open firewall — SSH only over Tailscale + openFirewall = false; + }; + + # Fail2ban for HTTP abuse (not SSH — SSH isn't public) + services.fail2ban = { + enable = true; + maxretry = 5; + bantime = "1h"; + bantime-increment = { + enable = true; + maxtime = "48h"; + }; + }; + + boot.kernel.sysctl = { + # Reverse path filtering + "net.ipv4.conf.all.rp_filter" = 1; + "net.ipv4.conf.default.rp_filter" = 1; + + # Ignore broadcast pings + "net.ipv4.icmp_echo_ignore_broadcasts" = 1; + + # Don't accept or send redirects + "net.ipv4.conf.all.accept_redirects" = 0; + "net.ipv6.conf.all.accept_redirects" = 0; + "net.ipv4.conf.all.send_redirects" = 0; + + # Reject source-routed packets + "net.ipv4.conf.all.accept_source_route" = 0; + "net.ipv6.conf.all.accept_source_route" = 0; + + # SYN flood protection (kernel-level SYN cookies) + "net.ipv4.tcp_syncookies" = 1; + "net.ipv4.tcp_max_syn_backlog" = 4096; + + # Reduce TIME_WAIT accumulation from abusive connections + "net.ipv4.tcp_fin_timeout" = 15; + + # Connection tracking table size (default 65536 is too small under DDoS) + "net.netfilter.nf_conntrack_max" = 262144; + }; +} diff --git a/modules/tailscale.nix b/modules/tailscale.nix new file mode 100644 index 0000000..59f0d10 --- /dev/null +++ b/modules/tailscale.nix @@ -0,0 +1,26 @@ +{ config, pkgs, ... }: +{ + age.secrets.tailscale-auth = { + file = ../secrets/tailscale-auth.age; + owner = "root"; + }; + + services.tailscale.enable = true; + + systemd.services.tailscale-autoconnect = { + description = "Automatic connection to Tailscale"; + after = [ "network-pre.target" "tailscale.service" ]; + wants = [ "network-pre.target" "tailscale.service" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig.Type = "oneshot"; + script = '' + sleep 2 + status="$(${pkgs.tailscale}/bin/tailscale status -json | ${pkgs.jq}/bin/jq -r .BackendState)" + if [ "$status" = "Running" ]; then exit 0; fi + ${pkgs.tailscale}/bin/tailscale up --authkey file:${config.age.secrets.tailscale-auth.path} + ''; + }; + + networking.firewall.trustedInterfaces = [ "tailscale0" ]; + networking.firewall.allowedUDPPorts = [ 41641 ]; +} diff --git a/modules/tor.nix b/modules/tor.nix new file mode 100644 index 0000000..bfff159 --- /dev/null +++ b/modules/tor.nix @@ -0,0 +1,45 @@ +# Tor hidden service — gives each machine a .onion address +# +# After first boot, find the .onion address: +# cat /var/lib/tor/onion/wiki/hostname +# +# Back up the private key! Losing it means losing the .onion address: +# /var/lib/tor/onion/wiki/hs_ed25519_secret_key +# +# The .onion address is a hash of this key — it's permanent as long as +# the key exists. Both machines get different keys and different addresses. +# +# Traffic flow: +# Tor user → Tor network → local Tor daemon → localhost:8080 → Caddy → PHP-FPM +# +# No Cloudflare in the path, no TLS needed (.onion v3 is end-to-end encrypted), +# no IP-based rate limiting possible (all traffic arrives from 127.0.0.1). +{ config, pkgs, lib, ... }: +{ + services.tor = { + enable = true; + client.enable = false; # we're a server, not a client + + relay.onionServices.wiki = { + version = 3; + map = [{ + port = 80; + target = { + addr = "127.0.0.1"; + port = 8080; + }; + }]; + }; + }; + + # Tor needs outbound connectivity to join the network + # (already allowed — the firewall doesn't block outbound by default) + + # Ensure the onion service directory is backed up + # The key files are in /var/lib/tor/onion/wiki/ + # If using agenix to manage a pre-generated key for a stable .onion address: + # 1. Generate a key: tor --keygen (or use mkp224o for vanity addresses) + # 2. Encrypt with agenix: agenix -e secrets/tor-onion-key.age + # 3. Deploy to /var/lib/tor/onion/wiki/hs_ed25519_secret_key + # For now, Tor generates the key on first boot — just back it up. +} diff --git a/modules/users.nix b/modules/users.nix new file mode 100644 index 0000000..118367c --- /dev/null +++ b/modules/users.nix @@ -0,0 +1,74 @@ +{ config, pkgs, lib, ... }: +let + admins = [ + { + name = "superq"; + github = "SuperQ"; + description = "Ben Kochie"; + } + { + name = "rizend"; + description = "rizend"; + extraKeys = [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDWvlc3+qDxhKE3jCCxKKU1h9QJyhCqLgHAwkiokvSPig6dXZW9f8uS/1CNMEmB5avrZhT6S3V00NExqZMldJechROhQoZb6YrUzakaeJCHrbThotQ/TlDuRWCCEh+y/qowk261X4Rbdx/KMwPuROP0p+pw2u3CVoLC7ejnsCwzTMZJ450QtZau0nvP7PY1vnehg2npA4HOqtwjOABJlMMpSZfaQdddwQJ7YE01GLpXF73Lwcnyue51fWFdjsQwIeQM2feO0yf1r1fjoLyMfWCVLK2GI0ONXVFWKQ52kfzr4QQ7Tq+Xi12qr7KGlHZ8yl7tw3MUoyU7k0HrUea1F8WF" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCvHlZKV8yBsJOkeu2FkWZ1UDY/uTS8bBUbqh1W0pJ3BMec55uvRLNv1AT5Z7RHKbwdjiZTBm6sP0CRVjsOxeGRCVeddHx1SxsXeihZIRQLHX+Z7M1YwYdzmzRDIEhuZhp+RnGH71ESVEHlmUhNPYsNmlgE3nyNbbDatYRZQqC204pal6cz4CHRUWYIozAQvpO8BF+cNDbNgT1yR5DWflwHErlv8yltmxNjh+gQQgp7RzI+05uzpRgumLCIqdHIKUflDJGvZXnUNAr5nv8Xe3W77AZz348nK2SYoD7dOBw23LpEzmy0mENL+/d3ZCuricslc1eBqCpVxJiF7s/RCtix" + ]; + } + { + name = "bfb"; + github = "kevinjos"; + description = "bfb"; + } + { + name = "jof"; + github = "jof"; + description = "Jonathan Lassoff"; + } + { + name = "mcint"; + github = "mcint"; + description = "Loren McIntyre"; + } + ]; + + mkAdmin = { name, github ? null, description, extraKeys ? [] }: { + inherit name; + value = { + isNormalUser = true; + inherit description; + extraGroups = [ "wheel" ]; + openssh.authorizedKeys.keys = extraKeys; + openssh.authorizedKeys.keyFiles = + lib.optionals (github != null) [ + (builtins.fetchurl { + url = "https://github.com/${github}.keys"; + }) + ]; + }; + }; + + # Collect all GitHub key files for root access (deploy-rs needs root SSH) + adminKeyFiles = lib.concatMap + ({ github ? null, ... }: + lib.optionals (github != null) [ + (builtins.fetchurl { url = "https://github.com/${github}.keys"; }) + ]) + admins; + + adminExtraKeys = lib.concatMap + ({ extraKeys ? [], ... }: extraKeys) + admins; +in +{ + users.mutableUsers = false; + + users.users = builtins.listToAttrs (map mkAdmin admins); + + # Root gets all admin keys so deploy-rs can SSH in + users.users.root.openssh.authorizedKeys = { + keyFiles = adminKeyFiles; + keys = adminExtraKeys; + }; + + security.sudo.wheelNeedsPassword = false; +} diff --git a/modules/wiki-primary/alerting.nix b/modules/wiki-primary/alerting.nix new file mode 100644 index 0000000..8d7cefc --- /dev/null +++ b/modules/wiki-primary/alerting.nix @@ -0,0 +1,291 @@ +{ config, pkgs, lib, ... }: +{ + services.prometheus = { + alertmanagers = [{ + static_configs = [{ + targets = [ "localhost:9093" ]; + }]; + }]; + + rules = [ + (builtins.toJSON { + groups = [ + { + name = "wiki-availability"; + rules = [ + { + alert = "WikiDown"; + expr = ''probe_success{job="blackbox-http",instance=~".*www.noisebridge.net.*"} == 0''; + "for" = "2m"; + labels.severity = "critical"; + annotations = { + summary = "Primary wiki is unreachable"; + description = "{{ $labels.instance }} has been down for more than 2 minutes."; + }; + } + { + alert = "ReplicaDown"; + expr = ''probe_success{job="blackbox-http",instance=~".*readonly.noisebridge.net.*"} == 0''; + "for" = "5m"; + labels.severity = "warning"; + annotations = { + summary = "Replica wiki is unreachable"; + description = "{{ $labels.instance }} has been down for more than 5 minutes."; + }; + } + { + alert = "HighErrorRate"; + expr = ''sum(rate(caddy_http_responses_total{code=~"5.."}[5m])) by (instance) / sum(rate(caddy_http_responses_total[5m])) by (instance) > 0.05''; + "for" = "5m"; + labels.severity = "critical"; + annotations = { + summary = "High HTTP 5xx error rate on {{ $labels.instance }}"; + description = "More than 5% of requests are returning server errors."; + }; + } + { + alert = "HighLatency"; + expr = ''histogram_quantile(0.95, sum(rate(caddy_http_request_duration_seconds_bucket[5m])) by (le, instance)) > 2''; + "for" = "5m"; + labels.severity = "warning"; + annotations = { + summary = "High p95 latency on {{ $labels.instance }}"; + description = "95th percentile response time is {{ $value | humanizeDuration }}."; + }; + } + { + alert = "TLSCertExpiringSoon"; + expr = ''probe_ssl_earliest_cert_expiry{job="blackbox-http"} - time() < 7 * 86400''; + "for" = "1h"; + labels.severity = "warning"; + annotations = { + summary = "TLS certificate expiring within 7 days"; + description = "Certificate for {{ $labels.instance }} expires in {{ $value | humanizeDuration }}."; + }; + } + ]; + } + { + name = "wiki-infrastructure"; + rules = [ + { + alert = "DiskFull"; + expr = ''(node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.15''; + "for" = "5m"; + labels.severity = "warning"; + annotations = { + summary = "Disk usage above 85% on {{ $labels.instance }}"; + description = "Root filesystem is {{ $value | humanizePercentage }} free."; + }; + } + { + alert = "DiskCritical"; + expr = ''(node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) < 0.05''; + "for" = "2m"; + labels.severity = "critical"; + annotations = { + summary = "Disk almost full on {{ $labels.instance }}"; + description = "Root filesystem is {{ $value | humanizePercentage }} free. Immediate action required."; + }; + } + { + alert = "HighMemoryUsage"; + expr = ''(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) > 0.9''; + "for" = "5m"; + labels.severity = "warning"; + annotations = { + summary = "Memory usage above 90% on {{ $labels.instance }}"; + description = "Available memory is {{ $value | humanizePercentage }} of total."; + }; + } + { + alert = "HighCPU"; + expr = ''1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) > 0.85''; + "for" = "10m"; + labels.severity = "warning"; + annotations = { + summary = "Sustained high CPU on {{ $labels.instance }}"; + description = "CPU usage has been above 85% for 10 minutes."; + }; + } + { + alert = "SystemdUnitFailed"; + expr = ''node_systemd_unit_state{state="failed"} == 1''; + "for" = "5m"; + labels.severity = "warning"; + annotations = { + summary = "Systemd unit failed on {{ $labels.instance }}"; + description = "Unit {{ $labels.name }} is in failed state."; + }; + } + ]; + } + { + name = "wiki-database"; + rules = [ + { + alert = "ReplicationBroken"; + expr = ''mysql_slave_status_slave_io_running{instance="wiki-replica"} == 0 or mysql_slave_status_slave_sql_running{instance="wiki-replica"} == 0''; + "for" = "2m"; + labels.severity = "critical"; + annotations = { + summary = "MySQL replication thread stopped"; + description = "Replication IO or SQL thread is not running on the replica."; + }; + } + { + alert = "ReplicationLagging"; + expr = ''mysql_slave_status_seconds_behind_master{instance="wiki-replica"} > 300''; + "for" = "5m"; + labels.severity = "warning"; + annotations = { + summary = "MySQL replication lagging"; + description = "Replica is {{ $value }}s behind the primary."; + }; + } + { + alert = "MySQLConnectionsExhausted"; + expr = ''mysql_global_status_threads_connected / mysql_global_variables_max_connections > 0.8''; + "for" = "5m"; + labels.severity = "warning"; + annotations = { + summary = "MySQL connections above 80% on {{ $labels.instance }}"; + description = "{{ $value | humanizePercentage }} of max connections in use."; + }; + } + { + alert = "MySQLSlowQueries"; + expr = ''rate(mysql_global_status_slow_queries[5m]) > 0.1''; + "for" = "10m"; + labels.severity = "warning"; + annotations = { + summary = "Elevated slow queries on {{ $labels.instance }}"; + description = "{{ $value }} slow queries per second over the last 5 minutes."; + }; + } + ]; + } + { + name = "wiki-application"; + rules = [ + { + alert = "PHPFPMExhausted"; + expr = ''phpfpm_active_processes >= phpfpm_total_processes''; + "for" = "1m"; + labels.severity = "warning"; + annotations = { + summary = "PHP-FPM workers exhausted"; + description = "All PHP-FPM workers are active — requests may be queuing."; + }; + } + { + alert = "PHPFPMDown"; + expr = ''up{job="phpfpm"} == 0''; + "for" = "1m"; + labels.severity = "critical"; + annotations = { + summary = "PHP-FPM exporter is down"; + description = "Cannot scrape PHP-FPM metrics — the PHP-FPM process may be dead."; + }; + } + { + alert = "MemcachedDown"; + expr = ''up{job=~"memcached.*"} == 0''; + "for" = "2m"; + labels.severity = "critical"; + annotations = { + summary = "Memcached is down on {{ $labels.instance }}"; + description = "The memcached exporter is unreachable. MediaWiki will fall back to database queries and be slow."; + }; + } + { + alert = "MemcachedEvictions"; + expr = ''rate(memcached_items_evicted_total[5m]) > 10''; + "for" = "10m"; + labels.severity = "warning"; + annotations = { + summary = "High memcached eviction rate on {{ $labels.instance }}"; + description = "{{ $value }} evictions/sec — cache is too small, consider increasing maxMemory."; + }; + } + { + alert = "MemcachedHitRateLow"; + expr = ''rate(memcached_commands_total{command="get",status="hit"}[5m]) / rate(memcached_commands_total{command="get"}[5m]) < 0.8''; + "for" = "15m"; + labels.severity = "warning"; + annotations = { + summary = "Low memcached hit rate on {{ $labels.instance }}"; + description = "Cache hit rate is {{ $value | humanizePercentage }}. Pages may be slow."; + }; + } + ]; + } + { + name = "wiki-backups"; + rules = [ + { + alert = "BackupStale"; + expr = ''(time() - backup_latest_timestamp_seconds) > 86400''; + "for" = "1h"; + labels.severity = "warning"; + annotations = { + summary = "Wiki backup is stale"; + description = "Last successful backup was more than 24 hours ago."; + }; + } + { + alert = "BackupFailed"; + expr = ''backup_b2_sync_success != 1''; + "for" = "10m"; + labels.severity = "critical"; + annotations = { + summary = "B2 backup sync failed"; + description = "The last rclone sync to Backblaze B2 did not succeed."; + }; + } + { + alert = "ImageSyncStale"; + expr = ''(time() - imagesync_latest_timestamp_seconds) > 7200''; + "for" = "30m"; + labels.severity = "warning"; + annotations = { + summary = "Image sync to replica is stale"; + description = "Last successful image sync was more than 2 hours ago. Replica may have broken image links."; + }; + } + ]; + } + ]; + }) + ]; + }; + + services.prometheus.alertmanager = { + enable = true; + port = 9093; + listenAddress = "127.0.0.1"; + configuration = { + route = { + receiver = "discord"; + group_by = [ "alertname" "instance" ]; + group_wait = "30s"; + group_interval = "5m"; + repeat_interval = "4h"; + }; + receivers = [ + { + name = "discord"; + webhook_configs = [{ + url_file = config.age.secrets.discord-webhook.path; + }]; + } + ]; + }; + }; + + age.secrets.discord-webhook = { + file = ../../secrets/discord-webhook.age; + owner = "alertmanager"; + group = "alertmanager"; + }; +} diff --git a/modules/wiki-primary/backup.nix b/modules/wiki-primary/backup.nix new file mode 100644 index 0000000..55d23dc --- /dev/null +++ b/modules/wiki-primary/backup.nix @@ -0,0 +1,97 @@ +{ config, pkgs, lib, ... }: +let + backupScript = pkgs.writeShellScript "wiki-backup" '' + set -euo pipefail + + BACKUP_DIR="/var/backups/mysql" + TEXTFILE_DIR="/var/lib/prometheus-node-exporter/textfile" + TIMESTAMP=$(date +%Y%m%d-%H%M%S) + + # Cleanup old local backups (keep 7 days) + find "$BACKUP_DIR" -maxdepth 1 -type d -mtime +7 -exec rm -rf {} + 2>/dev/null || true + + # Dump database with mydumper + echo "Starting database dump..." + ${pkgs.mydumper}/bin/mydumper \ + --database noisebridge_mediawiki \ + --outputdir "$BACKUP_DIR/$TIMESTAMP" \ + --threads 2 \ + --compress \ + --routines \ + --triggers \ + --events \ + --logfile "$BACKUP_DIR/mydumper.log" + + # Sync to Backblaze B2 + echo "Syncing to Backblaze B2..." + export RCLONE_CONFIG_B2_TYPE=b2 + export RCLONE_CONFIG_B2_ACCOUNT=$(cat ${config.age.secrets.b2-credentials.path} | ${pkgs.jq}/bin/jq -r .keyID) + export RCLONE_CONFIG_B2_KEY=$(cat ${config.age.secrets.b2-credentials.path} | ${pkgs.jq}/bin/jq -r .applicationKey) + + SYNC_SUCCESS=0 + ${pkgs.rclone}/bin/rclone sync "$BACKUP_DIR" b2:noisebridge-wiki-backup/mysql/ \ + --transfers 4 \ + --checkers 8 \ + --b2-hard-delete \ + && SYNC_SUCCESS=1 + + # Sync uploaded images to B2 + ${pkgs.rclone}/bin/rclone sync /var/lib/mediawiki/images/ b2:noisebridge-wiki-backup/images/ \ + --transfers 4 \ + --checkers 8 \ + || SYNC_SUCCESS=0 + + # Back up Tor hidden service keys (losing these = losing the .onion address) + ${pkgs.rclone}/bin/rclone sync /var/lib/tor/onion/ b2:noisebridge-wiki-backup/tor-keys/ \ + --transfers 1 \ + || true + + # Write metrics for Prometheus textfile collector (no leading whitespace!) + cat > "$TEXTFILE_DIR/backup.prom" <<'METRICS' +# HELP backup_latest_timestamp_seconds Unix timestamp of latest backup +# TYPE backup_latest_timestamp_seconds gauge +METRICS + echo "backup_latest_timestamp_seconds $(date +%s)" >> "$TEXTFILE_DIR/backup.prom" + cat >> "$TEXTFILE_DIR/backup.prom" < "$TEXTFILE_DIR/imagesync.prom" <