From a974abdaa2a313be836fc7442dc47eb7dede99a0 Mon Sep 17 00:00:00 2001 From: xah30 Date: Fri, 29 May 2026 21:10:28 +0300 Subject: [PATCH] =?UTF-8?q?fix(cli):=20v3.4.3=20=E2=80=94=20install=20bypa?= =?UTF-8?q?ss=20routes=20BEFORE=20half-Internet=20routes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v3.4.2 fix injected the server-IP bypass into `SplitRoutes::direct_hosts` but the macOS apply plan emitted the bypass commands AFTER the two half-Internet routes. There's a ~tens-of-ms race window during which: 1. `route add -net 0.0.0.0/1 -interface utunN` ← installed 2. `route add -net 128.0.0.0/1 -interface utunN` ← installed; 187.77.67.17 now matches `128.0.0.0/1` and routes to utunN 3. *kernel re-resolves routes for the live TCP socket Aura is using to talk to 187.77.67.17* — packets briefly enter utunN → infinite recursion → the socket sees a stall and the inner data plane collapses 4. `route add -host 187.77.67.17 192.168.1.254` ← finally bypasses, but too late — TCP is already in a bad state This matches the user's "Aura умирает через пару секунд после подключения" symptom verbatim. Server side saw `rx_packets` grow once (a few frames from the cover-traffic loop) and then `tx_packets` flatline at zero — exactly what happens when the upstream is dead. Fix: reorder `macos_apply_plan` for `DefaultAction::Vpn` so all bypasses (direct_cidrs + direct_hosts) install FIRST. When the half-Internet routes finally land, the kernel's longest-prefix-match already has the /32 bypass for the server IP ready, so the in-flight TCP socket keeps egressing via en0 throughout. Test updated to assert the new plan order: [0] direct CIDR via gateway [1] direct host via gateway (-host) [2] 0.0.0.0/1 via -interface utun [3] 128.0.0.0/1 via -interface utun 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- crates/aura-cli/src/os_routes.rs | 89 +++++++++++++++++--------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/crates/aura-cli/src/os_routes.rs b/crates/aura-cli/src/os_routes.rs index dbd277c..2a27801 100644 --- a/crates/aura-cli/src/os_routes.rs +++ b/crates/aura-cli/src/os_routes.rs @@ -777,34 +777,18 @@ fn macos_apply_plan(tun_name: &str, routes: &SplitRoutes, gateway: IpAddr) -> Ve let mut plan = Vec::new(); match routes.default { DefaultAction::Vpn => { - // macOS `route add -net 0.0.0.0/0 -interface utunN` does NOT override the kernel's - // existing default route (the one DHCP installed via the LAN gateway). The kernel - // happily accepts the `route add` but the new entry never wins routing decisions, so - // outbound traffic keeps egressing through the original interface and the VPN looks - // "connected but inert" — exactly what bit us in the first GUI test (server-side - // rx counters grew but tx didn't, because the kernel never sent packets through - // utunN). WireGuard, OpenVPN, Tailscale all work around this by installing **two - // half-Internet routes** (`0.0.0.0/1` and `128.0.0.0/1`) which are strictly more - // specific than `0.0.0.0/0` and so beat the host default by longest-prefix match. - // We do the same. - for cidr in ["0.0.0.0/1", "128.0.0.0/1"] { - plan.push(PlannedCommand::new( - "route", - vec![ - "add".into(), - "-net".into(), - cidr.into(), - "-interface".into(), - tun_name.into(), - ], - )); - } - // The server's outer endpoint (UDP/TCP/QUIC to e.g. 187.77.67.17) MUST still egress - // via the original default route, otherwise we'd be tunnelling the tunnel — infinite - // recursion. The dialer's bound source IP keeps this working in practice for active - // connections, but a redial after a route flap would hit the new utunN. v3.5 fixes - // this by installing a `/32 via ` bypass at install time; - // for v3.4 we accept the risk and document it (see MIGRATION §10). + // ORDER MATTERS. We install bypasses FIRST so that when the half-Internet routes + // (which capture e.g. 187.77.67.17 inside `128.0.0.0/1`) land, the kernel's + // longest-prefix match already has a /32 specific bypass route to fall back to. If + // we did it the other way around there is a tens-of-ms race window during which the + // server-IP packets the dialer is sending to keep the encrypted tunnel alive get + // routed BACK INTO the TUN — infinite recursion — and the live TCP session collapses + // before the bypass install lands. That's what bit the v3.4.1 → v3.4.2 user report + // ("aura умирает через пару секунд"). + // + // direct_cidrs first (broad ranges like 192.168.0.0/16 the operator may have + // declared), then direct_hosts (the auto-injected server-endpoint bypasses from + // client.rs). for cidr in &routes.direct_cidrs { plan.push(PlannedCommand::new( "route", @@ -827,6 +811,24 @@ fn macos_apply_plan(tun_name: &str, routes: &SplitRoutes, gateway: IpAddr) -> Ve ], )); } + // THEN the half-Internet routes. macOS `route add -net 0.0.0.0/0 -interface utunN` + // does NOT override the kernel's existing default route (it accepts the add but the + // new entry never wins routing decisions). WireGuard / OpenVPN / Tailscale all work + // around this by installing two half-Internet routes (`0.0.0.0/1` + `128.0.0.0/1`), + // strictly more specific than `0.0.0.0/0` so they beat the host default by + // longest-prefix match. We do the same. + for cidr in ["0.0.0.0/1", "128.0.0.0/1"] { + plan.push(PlannedCommand::new( + "route", + vec![ + "add".into(), + "-net".into(), + cidr.into(), + "-interface".into(), + tun_name.into(), + ], + )); + } } DefaultAction::Direct => { for cidr in &routes.vpn_cidrs { @@ -1154,22 +1156,23 @@ mod tests { ..Default::default() }; let plan = macos_apply_plan("utun4", &split, "10.0.0.1".parse().unwrap()); - // v3.4.1: 2 half-Internet routes + 1 direct CIDR + 1 direct host = 4 steps. - // (We avoid `0.0.0.0/0` because macOS would silently keep the original default winning.) + // v3.4.3: 1 direct CIDR + 1 direct host + 2 half-Internet routes = 4 steps. + // ORDER: bypasses first (so the kernel has them as more-specific routes BEFORE the + // half-Internet routes land), then the half-Internet routes. Avoids the race window + // where in-flight server-IP packets briefly route back into the TUN. assert_eq!(plan.len(), 4); - // Two half-Internet routes via -interface. - assert_eq!(plan[0].prog, "route"); - assert!(plan[0].args.contains(&"-interface".to_string())); - assert!(plan[0].args.contains(&"utun4".to_string())); - assert!(plan[0].args.contains(&"0.0.0.0/1".to_string())); - assert!(plan[1].args.contains(&"128.0.0.0/1".to_string())); - assert!(plan[1].args.contains(&"utun4".to_string())); - // CIDR via gateway. - assert!(plan[2].args.contains(&"192.168.0.0/16".to_string())); - assert!(plan[2].args.contains(&"10.0.0.1".to_string())); - // Host via gateway (-host). - assert!(plan[3].args.contains(&"-host".to_string())); - assert!(plan[3].args.contains(&"1.2.3.4".to_string())); + // Step 0: direct CIDR bypass via gateway. + assert!(plan[0].args.contains(&"192.168.0.0/16".to_string())); + assert!(plan[0].args.contains(&"10.0.0.1".to_string())); + // Step 1: direct host bypass via gateway (-host). + assert!(plan[1].args.contains(&"-host".to_string())); + assert!(plan[1].args.contains(&"1.2.3.4".to_string())); + // Steps 2-3: half-Internet routes via -interface. + assert!(plan[2].args.contains(&"-interface".to_string())); + assert!(plan[2].args.contains(&"utun4".to_string())); + assert!(plan[2].args.contains(&"0.0.0.0/1".to_string())); + assert!(plan[3].args.contains(&"128.0.0.0/1".to_string())); + assert!(plan[3].args.contains(&"utun4".to_string())); } /// Undo flips `add` -> `del` on Linux and reuses the rest of the args (so the route is