From cff8de14afe7b2c9a1324d1aba07c2cf0cf66c9c Mon Sep 17 00:00:00 2001 From: xah30 Date: Fri, 29 May 2026 20:02:35 +0300 Subject: [PATCH] =?UTF-8?q?fix(cli):=20v3.4.1=20=E2=80=94=20macOS=20defaul?= =?UTF-8?q?t-route=20override=20+=20admin=20sock=20chmod=200666?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two production-blocking bugs from the GUI's first end-to-end live test against the production server. ## 1) os_routes: macOS `0.0.0.0/0` does not override the existing default Empirical observation: client connects, server-side rx counter grows as we send packets (TCP/443 handshake + frames arrive), but server-side tx never ticks. From the Mac side `ping 10.7.0.1` returns 0/3, `curl https://1.0.0.1` returns empty. Tracing it: even with `[tunnel.split] default = "VPN"` the host's pre-existing default route (`default → 192.168.1.254 → en0`) was still winning routing decisions. Aura's `route add -net 0.0.0.0/0 -interface utunN` had exit-zero'd but the new entry never beat the original default — macOS happily accepts the route command, the kernel just doesn't use it for outgoing packets. This is a known macOS quirk that every long-lived VPN works around the same way: install two **half-Internet** routes (`0.0.0.0/1` and `128.0.0.0/1`) which are strictly more specific than `0.0.0.0/0` and so win by longest-prefix match. Tailscale, WireGuard, OpenVPN all do this. We now do too. Updated the macos_plan_default_vpn unit test to assert the new plan shape (4 steps for VPN + direct-cidr + direct-host instead of the old 3). The split has a known limitation: the server's own outer endpoint (e.g. 187.77.67.17:443) is now routed into the tunnel too. The dialer's already-established TCP source-IP keeps the *current* connection alive, but a redial after a route flap would loop. Documented in the source comment; v3.5 will add an explicit `/32 via ` bypass at install time. ## 2) admin: chmod 0666 the freshly-bound Unix socket When `aura client` is spawned by `sudo` (the GUI does this on the user's behalf), the admin Unix socket ends up owned by root with the default 0755 mode. macOS's `connect()` requires write permission on the socket file, so the desktop-user GUI sees `Permission denied (os error 13)` and the status panel stays empty — even though the tunnel itself works. `transport::listen` now does `chmod 0666` on the socket immediately after `UnixListener::bind`. The socket lives under `/tmp` (laptop) or `/run` (systemd-managed server) so the directory permissions still gate access; making the socket world-RW just lets the per-machine apps that already have filesystem access actually use it. ## Verification - `cargo test -p aura-cli os_routes::tests::macos_plan_default_vpn` — ok - `cargo build --release -p aura-cli` — green - Bug repro: pre-fix, server admin shows `rx: 13 tx: 5` while client sends ICMP that never returns. Post-fix (manual test): the half-Internet routes appear in `netstat -rn`, ping 10.7.0.1 succeeds, curl through the tunnel works. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- crates/aura-cli/src/admin.rs | 20 ++++++++++- crates/aura-cli/src/os_routes.rs | 59 +++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/crates/aura-cli/src/admin.rs b/crates/aura-cli/src/admin.rs index 8998e99..f28dae4 100644 --- a/crates/aura-cli/src/admin.rs +++ b/crates/aura-cli/src/admin.rs @@ -412,9 +412,27 @@ mod transport { use tokio::net::{UnixListener, UnixStream}; /// Bind a Unix domain socket at `path`, removing any stale socket file first. + /// + /// v3.4.1: chmod 0666 the freshly-bound socket so a non-root caller (e.g. the desktop + /// user's `aura-gui` process probing the GUI's root-spawned `aura client`) can + /// `connect()`. Without this, the default umask leaves the socket at 0755 — macOS + /// (unlike Linux) treats `connect()` as needing write permission, so the GUI sees + /// `Permission denied (os error 13)` and the status panel stays empty. We accept the + /// `0666` scope because the socket lives under `/tmp` (single-user laptops) or `/run` + /// (server, managed by systemd) — directory-level access is the real gate, not the + /// socket file mode. pub fn listen(path: &str) -> io::Result { + use std::os::unix::fs::PermissionsExt; let _ = std::fs::remove_file(path); - UnixListener::bind(path) + let listener = UnixListener::bind(path)?; + if let Err(e) = + std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o666)) + { + tracing::warn!(socket = path, error = %e, + "chmod 666 on admin socket failed (non-fatal; queries from non-root will \ + fail with Permission denied)"); + } + Ok(listener) } /// Accept the next admin client. Returns the stream half on success. diff --git a/crates/aura-cli/src/os_routes.rs b/crates/aura-cli/src/os_routes.rs index 3a4e11c..dbd277c 100644 --- a/crates/aura-cli/src/os_routes.rs +++ b/crates/aura-cli/src/os_routes.rs @@ -777,19 +777,34 @@ fn macos_apply_plan(tun_name: &str, routes: &SplitRoutes, gateway: IpAddr) -> Ve let mut plan = Vec::new(); match routes.default { DefaultAction::Vpn => { - // Default-via-TUN. macOS allows multiple default routes; the most-recently-added - // generally wins by priority, which suits us here (the VPN default must override the - // host's pre-existing default for the lifetime of the session). - plan.push(PlannedCommand::new( - "route", - vec![ - "add".into(), - "-net".into(), - "0.0.0.0/0".into(), - "-interface".into(), - tun_name.into(), - ], - )); + // macOS `route add -net 0.0.0.0/0 -interface utunN` does NOT override the kernel's + // existing default route (the one DHCP installed via the LAN gateway). The kernel + // happily accepts the `route add` but the new entry never wins routing decisions, so + // outbound traffic keeps egressing through the original interface and the VPN looks + // "connected but inert" — exactly what bit us in the first GUI test (server-side + // rx counters grew but tx didn't, because the kernel never sent packets through + // utunN). WireGuard, OpenVPN, Tailscale all work around this by installing **two + // half-Internet routes** (`0.0.0.0/1` and `128.0.0.0/1`) which are strictly more + // specific than `0.0.0.0/0` and so beat the host default by longest-prefix match. + // We do the same. + for cidr in ["0.0.0.0/1", "128.0.0.0/1"] { + plan.push(PlannedCommand::new( + "route", + vec![ + "add".into(), + "-net".into(), + cidr.into(), + "-interface".into(), + tun_name.into(), + ], + )); + } + // The server's outer endpoint (UDP/TCP/QUIC to e.g. 187.77.67.17) MUST still egress + // via the original default route, otherwise we'd be tunnelling the tunnel — infinite + // recursion. The dialer's bound source IP keeps this working in practice for active + // connections, but a redial after a route flap would hit the new utunN. v3.5 fixes + // this by installing a `/32 via ` bypass at install time; + // for v3.4 we accept the risk and document it (see MIGRATION §10). for cidr in &routes.direct_cidrs { plan.push(PlannedCommand::new( "route", @@ -1139,18 +1154,22 @@ mod tests { ..Default::default() }; let plan = macos_apply_plan("utun4", &split, "10.0.0.1".parse().unwrap()); - assert_eq!(plan.len(), 3); - // Default first via -interface. + // v3.4.1: 2 half-Internet routes + 1 direct CIDR + 1 direct host = 4 steps. + // (We avoid `0.0.0.0/0` because macOS would silently keep the original default winning.) + assert_eq!(plan.len(), 4); + // Two half-Internet routes via -interface. assert_eq!(plan[0].prog, "route"); assert!(plan[0].args.contains(&"-interface".to_string())); assert!(plan[0].args.contains(&"utun4".to_string())); - assert!(plan[0].args.contains(&"0.0.0.0/0".to_string())); + assert!(plan[0].args.contains(&"0.0.0.0/1".to_string())); + assert!(plan[1].args.contains(&"128.0.0.0/1".to_string())); + assert!(plan[1].args.contains(&"utun4".to_string())); // CIDR via gateway. - assert!(plan[1].args.contains(&"192.168.0.0/16".to_string())); - assert!(plan[1].args.contains(&"10.0.0.1".to_string())); + assert!(plan[2].args.contains(&"192.168.0.0/16".to_string())); + assert!(plan[2].args.contains(&"10.0.0.1".to_string())); // Host via gateway (-host). - assert!(plan[2].args.contains(&"-host".to_string())); - assert!(plan[2].args.contains(&"1.2.3.4".to_string())); + assert!(plan[3].args.contains(&"-host".to_string())); + assert!(plan[3].args.contains(&"1.2.3.4".to_string())); } /// Undo flips `add` -> `del` on Linux and reuses the rest of the args (so the route is