From b904d40fbab02292bbfe63e881b48b6c529f35b6 Mon Sep 17 00:00:00 2001 From: xah30 Date: Fri, 29 May 2026 22:22:47 +0300 Subject: [PATCH] =?UTF-8?q?fix(cli):=20v3.4.5=20=E2=80=94=20static-reserva?= =?UTF-8?q?tion=20IpPool=20always=20succeeds=20(#1=20user=20retest)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empirical observation from the v3.4.4 retest: the GUI's Connect button now goes through cleanly, but if the user did `Disconnect → Connect` after any prior ungraceful exit (SIGKILL, crash, network flap that hadn't yet timed out), the new handshake immediately died with: Error: router run loop Caused by: peer connection closed; router shutting down That's our v3.4 "peer connection broke" guard firing. Looking at the server log, the cause was: WARN aura_cli::server: refusing connection: ip pool denied an address (unknown id under static_only, duplicate static reservation, or pool exhausted) i.e. the static reservation `mac-v34 → 10.7.0.10` for the new connection was refused because the previous (now-dead) session never released 10.7.0.10 from the pool's `in_use` set. Without restarting the systemd unit, no reconnect was possible. This was the previously-documented v1 policy ("do not hand out the same IP twice"). For dynamic allocation that policy is correct — two different clients fighting for the same IP would corrupt routing. But for STATIC reservations there is no ambiguity: the static map says "this IP is reserved for THIS client id", so a reconnect with the same id is the rightful owner; the previous holder (same id) is by definition stale. Fix: in `IpPool::assign`, when a static reservation matches the requested client_id, always return it — skip the `in_use.contains(&ip)` check. The server's accept loop already runs `ServerRoutes::register(ip, new_conn)` which evicts any previously-registered conn under the same IP, drops its Arc, the transport closes, and the orphaned per-conn task ends and calls `pool.release` naturally. So the in_use marker is correctly cleared by the eviction cascade within milliseconds of the new assign. Test updated to match new behaviour: `static_reservation_refused_when_already_in_use` → `static_reservation_always_honoured_even_if_in_use` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- crates/aura-cli/src/pool.rs | 38 +++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/crates/aura-cli/src/pool.rs b/crates/aura-cli/src/pool.rs index 71e9016..99a5b2f 100644 --- a/crates/aura-cli/src/pool.rs +++ b/crates/aura-cli/src/pool.rs @@ -146,8 +146,19 @@ impl IpPool { /// Assign an IP to a connecting client identified by `client_id`. /// - /// Returns `None` if the policy refuses the client (`StaticOnly` and unknown id; a static - /// reservation is already in use; pool exhausted on dynamic allocation). + /// Returns `None` if the policy refuses the client (`StaticOnly` and unknown id; pool + /// exhausted on dynamic allocation). + /// + /// **Static reservations always succeed** (v3.4.5 fix): a static map entry means "this IP + /// belongs to this client id, period". If the IP is already marked `in_use` it's because the + /// previous session's per-conn task never released it (typically: client was SIGKILL'd, the + /// underlying transport timed out but the server-side cleanup is still running, etc.). The + /// `ServerRoutes::register` path the caller invokes immediately after this method will see + /// any previously-registered connection under the same IP, log "evicting a previously- + /// registered connection", drop its Arc, and the transport closes — at which point the + /// per-conn task ends and would release the IP normally. By the time the new conn starts + /// dispatching, ownership is clean. This unblocks the reconnect-after-ungraceful-exit case + /// that previously locked clients out of the server until aura.service was restarted. pub async fn assign(&self, client_id: &str) -> Option { let mut in_use = self.in_use.lock().await; // Static-or-Dynamic + Static-only: try the static map first. @@ -156,11 +167,7 @@ impl IpPool { PoolStrategy::StaticOnly | PoolStrategy::StaticOrDynamic ) { if let Some(ip) = self.static_map.get(client_id).copied() { - if in_use.contains(&ip) { - // Refuse rather than serve duplicates: another live session is holding the - // static reservation. The caller logs the refusal. - return None; - } + // Always honour the static reservation, even if marked in_use. See doc above. in_use.insert(ip); return Some(ip); } @@ -404,7 +411,15 @@ mod tests { } #[tokio::test] - async fn static_reservation_refused_when_already_in_use() { + async fn static_reservation_always_honoured_even_if_in_use() { + // v3.4.5: static reservations always succeed. Previous behaviour ("refuse the second + // assign while the first is still in_use") locked clients out of the server after an + // ungraceful exit (SIGKILL, transport timeout that hadn't fired yet, etc.), since the + // per-conn cleanup hadn't run and the IP stayed marked in_use forever. The server's + // accept loop now relies on `ServerRoutes::register` to evict any previously-registered + // conn under the same IP; the eviction drops the prev Arc, the transport closes, and + // the orphaned per-conn task ends and calls `pool.release` naturally. So returning the + // same IP a second time is correct — ownership reconciles within milliseconds. let mut statics = HashMap::new(); statics.insert("alice".to_string(), ip("10.0.0.5")); let pool = IpPool::new( @@ -415,10 +430,9 @@ mod tests { ) .unwrap(); assert_eq!(pool.assign("alice").await, Some(ip("10.0.0.5"))); - // A second handshake from the same id while the first is still live is refused (the v1 - // policy: do not hand out the same IP twice; the caller logs a warning and drops the conn). - assert!(pool.assign("alice").await.is_none()); - // After release, the second handshake succeeds. + // Second assign for the same id returns the SAME IP — no refusal. + assert_eq!(pool.assign("alice").await, Some(ip("10.0.0.5"))); + // Even after release, idempotent. pool.release(ip("10.0.0.5")).await; assert_eq!(pool.assign("alice").await, Some(ip("10.0.0.5"))); }