diff --git a/aura-gui/src-tauri/src/admin.rs b/aura-gui/src-tauri/src/admin.rs index 69feafb..acf739f 100644 --- a/aura-gui/src-tauri/src/admin.rs +++ b/aura-gui/src-tauri/src/admin.rs @@ -30,15 +30,54 @@ pub struct StatusResponse { #[cfg(unix)] pub fn query_status(path: &str) -> Result { + let line = round_trip(path, b"{\"cmd\":\"status\"}\n", Duration::from_millis(1500))?; + let resp: StatusResponse = serde_json::from_str(&line) + .with_context(|| format!("parsing admin response: {line}"))?; + if !resp.ok { + return Err(anyhow!( + "admin returned error: {}", + resp.error + .clone() + .unwrap_or_else(|| "(no error string)".into()) + )); + } + Ok(resp) +} + +/// v3.4.4: send `{"cmd":"shutdown"}` over the admin socket. The running aura-cli sees the +/// notification, breaks its router select! loop, and exits after `OsRouteGuard::Drop` rolls +/// back the OS routes — no SIGTERM-through-sudo gymnastics needed (the admin socket is +/// chmod 0666 so the GUI's desktop-user process can write to it directly). +/// +/// Returns `Ok(())` on success; the caller is expected to wait briefly afterwards for the +/// process to actually exit. +#[cfg(unix)] +pub fn send_shutdown(path: &str) -> Result<()> { + let line = round_trip(path, b"{\"cmd\":\"shutdown\"}\n", Duration::from_millis(1500))?; + // Reuse the StatusResponse shape — it has the `ok` / `error` fields we need, the rest are + // None for a shutdown reply. + let resp: StatusResponse = serde_json::from_str(&line) + .with_context(|| format!("parsing admin response: {line}"))?; + if !resp.ok { + return Err(anyhow!( + "shutdown rejected by admin: {}", + resp.error + .clone() + .unwrap_or_else(|| "(no error string)".into()) + )); + } + Ok(()) +} + +#[cfg(unix)] +fn round_trip(path: &str, request: &[u8], timeout: Duration) -> Result { use std::os::unix::net::UnixStream; let mut sock = UnixStream::connect(path).with_context(|| format!("connecting to admin socket {path}"))?; - sock.set_read_timeout(Some(Duration::from_millis(1500)))?; - sock.set_write_timeout(Some(Duration::from_millis(1500)))?; - sock.write_all(b"{\"cmd\":\"status\"}\n")?; + sock.set_read_timeout(Some(timeout))?; + sock.set_write_timeout(Some(timeout))?; + sock.write_all(request)?; let mut buf = String::new(); - // The server writes one line + newline and closes the connection only when *we* close. We - // need to read until newline. Use a small reader buffer. let mut tmp = [0u8; 1024]; loop { let n = sock.read(&mut tmp)?; @@ -53,18 +92,9 @@ pub fn query_status(path: &str) -> Result { let line = buf .lines() .next() - .ok_or_else(|| anyhow!("empty admin response"))?; - let resp: StatusResponse = - serde_json::from_str(line).with_context(|| format!("parsing admin response: {line}"))?; - if !resp.ok { - return Err(anyhow!( - "admin returned error: {}", - resp.error - .clone() - .unwrap_or_else(|| "(no error string)".into()) - )); - } - Ok(resp) + .ok_or_else(|| anyhow!("empty admin response"))? + .to_string(); + Ok(line) } #[cfg(windows)] @@ -76,3 +106,10 @@ pub fn query_status(_path: &str) -> Result { "admin socket query is not yet implemented on Windows; GUI status is process-only" )) } + +#[cfg(windows)] +pub fn send_shutdown(_path: &str) -> Result<()> { + Err(anyhow!( + "admin shutdown is not yet implemented on Windows; the GUI falls back to SIGTERM" + )) +} diff --git a/aura-gui/src-tauri/src/cli_proc.rs b/aura-gui/src-tauri/src/cli_proc.rs index a0c2d9f..af89b74 100644 --- a/aura-gui/src-tauri/src/cli_proc.rs +++ b/aura-gui/src-tauri/src/cli_proc.rs @@ -60,27 +60,55 @@ impl ClientHandle { /// Kill the child and reap it. Idempotent. /// - /// Because we spawned via `sudo -n aura …`, our direct child is `sudo` (running as us; we - /// own it). The real aura process is sudo's child, running as root, so we can't signal it - /// directly. SIGTERM to the sudo PID is forwarded to aura by sudo's signal handler, which - /// lets aura's `OsRouteGuard::Drop` and TUN cleanup run before exit. After a 2 s grace - /// period we fall back to SIGKILL via `Child::kill`, which kills sudo immediately (aura - /// becomes orphaned, but the kernel reaps it via PID 1 — TUN may linger). + /// v3.4.4 path — graceful via admin socket first. The aura admin socket is chmod 0666 (a + /// fix from earlier in v3.4.x), so the GUI's desktop-user process can write to it without + /// sudo. We send `{"cmd":"shutdown"}`, the aura main loop's `tokio::select!` fires its + /// shutdown arm, `OsRouteGuard::Drop` rolls back system routes, then process exits. + /// Typical exit is under 500 ms; we wait up to 3 s. + /// + /// Fall-back: if the admin send fails (socket missing, aura already wedged), drop to the + /// old SIGTERM-to-sudo path. Because we spawned via `sudo -n aura …`, our direct child is + /// `sudo` running as us, and sudo forwards SIGTERM to the aura child by its own signal + /// handler. SIGKILL via `Child::kill` is the absolute last resort — it leaves aura + /// orphaned with the TUN still up. pub fn kill(self) -> Result<()> { let pid = { self.child.lock().id() }; - // SIGTERM to sudo — sudo forwards to aura. We own sudo so plain `kill` works. + let sock = self.admin_socket.clone(); + + // 1. Try the admin-socket shutdown. Quiet on failure — we'll fall through. + match crate::admin::send_shutdown(&sock) { + Ok(()) => { + // Poll for up to 3 s. Most exits land in well under 500 ms (the time + // OsRouteGuard::Drop spends running `route delete …`). + let mut guard = self.child.lock(); + for _ in 0..30 { + if matches!(guard.try_wait(), Ok(Some(_))) { + return Ok(()); + } + thread::sleep(Duration::from_millis(100)); + } + // Admin acked but the process is still alive — fall through to SIGTERM. + } + Err(_) => { + // No admin response. Could be a stale socket from a previous, already-dead + // session. Fall through. + } + } + + // 2. SIGTERM to sudo, sudo forwards to aura. let _ = Command::new("kill") .arg("-TERM") .arg(pid.to_string()) .output(); let mut guard = self.child.lock(); for _ in 0..20 { - match guard.try_wait() { - Ok(Some(_)) => return Ok(()), - _ => thread::sleep(Duration::from_millis(100)), + if matches!(guard.try_wait(), Ok(Some(_))) { + return Ok(()); } + thread::sleep(Duration::from_millis(100)); } - // Grace period elapsed — fall back to SIGKILL. + + // 3. SIGKILL — absolute last resort. Leaves aura orphaned but unblocks the UI. let _ = guard.kill(); let _ = guard.wait(); Ok(()) diff --git a/crates/aura-cli/src/admin.rs b/crates/aura-cli/src/admin.rs index f28dae4..f6de0c5 100644 --- a/crates/aura-cli/src/admin.rs +++ b/crates/aura-cli/src/admin.rs @@ -44,7 +44,7 @@ use aura_tunnel::{PacketCounters, RouteAction, RouteTable}; use ipnetwork::IpNetwork; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; -use tokio::sync::RwLock; +use tokio::sync::{Notify, RwLock}; use crate::config::parse_action; @@ -132,10 +132,20 @@ pub struct AdminState { pub mirror: Arc, /// Live tunnel statistics. pub stats: Arc, + /// Shutdown signal — when a `Shutdown` admin request arrives, the handler calls + /// `shutdown.notify_one()` and the main client / server loop's `tokio::select!` listening on + /// `shutdown.notified()` returns, letting `OsRouteGuard::Drop` run and the process exit + /// cleanly. This is the v3.4.4 fix for "GUI Disconnect button doesn't kill aura": sudo's + /// signal forwarding from a non-tty Tauri-spawned parent is unreliable, so instead of sending + /// SIGTERM through sudo we just talk to the already-chmod-666 admin socket the GUI process + /// can write to as its own user. + pub shutdown: Arc, } impl AdminState { - /// Construct admin state from a shared table and stats, seeding the mirror from the given rules. + /// Construct admin state from a shared table and stats, seeding the mirror from the given + /// rules. Creates a fresh `shutdown` signal; clone the resulting `AdminState::shutdown` into + /// the main loop's `tokio::select!` to listen for `Shutdown` admin requests. pub fn new( routes: Arc>, stats: Arc, @@ -146,6 +156,7 @@ impl AdminState { routes, mirror: Arc::new(RuleMirror::from_rules(cidrs, domains)), stats, + shutdown: Arc::new(Notify::new()), } } } @@ -176,6 +187,13 @@ pub enum Request { }, /// Query tunnel statistics. Status, + /// v3.4.4: Ask the running client/server to shut down gracefully. The handler signals the + /// main `tokio::select!` loop via [`AdminState::shutdown`] and returns OK immediately; the + /// process then exits after running `OsRouteGuard::Drop` etc. The GUI uses this instead of + /// sending SIGTERM through sudo (sudo's signal-forwarding from a non-tty Tauri-spawned + /// parent is unreliable and the previous kill path would leave the aura child orphaned with + /// the TUN still up). + Shutdown, } /// One CIDR rule in a `route_list` response. @@ -372,6 +390,16 @@ pub async fn handle_request(state: &AdminState, req: Request) -> Response { ..Response::ok() } } + Request::Shutdown => { + // v3.4.4: signal the main client/server loop via the shared `Notify`. We don't wait + // here — the request returns immediately so the GUI's send-Shutdown round-trip + // doesn't get stuck behind OsRouteGuard::Drop (which can take a second or two on + // macOS as it issues multiple `route delete` commands). The caller then watches the + // process pid: it exits cleanly within a few hundred ms. + tracing::info!("shutdown requested via admin socket"); + state.shutdown.notify_one(); + Response::ok() + } } } @@ -760,4 +788,24 @@ mod tests { #[cfg(windows)] assert_eq!(DEFAULT_SOCKET, r"\\.\pipe\aura-admin"); } + + /// v3.4.4: `Request::Shutdown` signals the shared `Notify` so a caller listening on + /// `state.shutdown.notified()` can wake up and exit cleanly. Confirms the wire <-> shutdown + /// link is wired correctly; the actual select! in `client::run` / `server::run` exercises + /// the Notify in integration tests / live runs. + #[tokio::test] + async fn shutdown_request_fires_notify() { + let st = state(); + let notify = Arc::clone(&st.shutdown); + // Spawn a waiter — it should resolve as soon as the Shutdown handler fires. + let waiter = tokio::spawn(async move { notify.notified().await }); + let resp = handle_request(&st, Request::Shutdown).await; + assert!(resp.ok, "shutdown returned !ok: {resp:?}"); + // Bounded timeout — the notify_one() in the handler should be immediate. + let res = tokio::time::timeout(std::time::Duration::from_millis(200), waiter).await; + assert!( + res.is_ok(), + "shutdown waiter did not wake within 200ms; Notify wasn't signalled" + ); + } } diff --git a/crates/aura-cli/src/client.rs b/crates/aura-cli/src/client.rs index 2fb0acd..1b4ed0b 100644 --- a/crates/aura-cli/src/client.rs +++ b/crates/aura-cli/src/client.rs @@ -308,6 +308,12 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { cidr_mirror, domains.clone(), ); + // v3.4.4: clone the shutdown signal so the main router-select below can listen for it. When + // the GUI sends `{"cmd":"shutdown"}` over the admin socket, the admin handler signals this + // Notify, the select! arm fires, router.run() future is dropped (releasing TUN, inbound + // tasks, etc), and then OsRouteGuard's Drop runs and rolls back the OS routes — all before + // process exit. No SIGTERM-through-sudo race. + let shutdown = Arc::clone(&admin_state.shutdown); let admin_path = admin_socket.to_string(); tokio::spawn(async move { if let Err(e) = admin::serve(&admin_path, admin_state).await { @@ -419,7 +425,18 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { // Wire the same atomic counters the admin socket reads (via the `Stats` clone above) into the // router so `aura status` shows live tx/rx numbers. let router = AuraRouter::with_stats(tun, routes, conn, Some(stats.counters())); - let run_result = router.run().await.context("router run loop"); + // v3.4.4: race the router loop against the admin shutdown notify. Whichever one finishes + // first ends the function; OsRouteGuard's Drop on the `_os_routes_guard` binding runs after + // this returns, rolling back the system routes. Graceful disconnect via admin is now a + // single round-trip: GUI posts `{"cmd":"shutdown"}`, admin handler notifies, select! fires + // the second arm, router future is dropped, routes are reverted, process exits cleanly. + let run_result = tokio::select! { + r = router.run() => r.context("router run loop"), + _ = shutdown.notified() => { + tracing::info!("graceful shutdown via admin socket; rolling back OS routes"); + Ok(()) + } + }; // _os_routes_guard drops here, rolling back any installed system routes. run_result } diff --git a/crates/aura-cli/src/main.rs b/crates/aura-cli/src/main.rs index 63be133..2183d99 100644 --- a/crates/aura-cli/src/main.rs +++ b/crates/aura-cli/src/main.rs @@ -50,6 +50,13 @@ enum Command { /// Query a running client/server for tunnel status via the admin socket. Status(AdminConnArgs), + /// v3.4.4: Ask a running client/server to shut down gracefully via the admin socket. The + /// process runs its `OsRouteGuard::Drop` to roll back installed system routes before + /// exiting; the kernel reaps the TUN device on close. Used by the GUI's Disconnect button + /// (talks to the chmod-666 admin socket without needing sudo) and useful from a terminal + /// when systemctl / launchctl aren't appropriate. + Shutdown(AdminConnArgs), + /// Quick crypto micro-benchmarks (KEM keygen/encaps/decaps, full handshake, AEAD). BenchCrypto, @@ -339,6 +346,7 @@ async fn main() -> anyhow::Result<()> { Command::Client(args) => client::run(&args.config, &args.admin_socket).await, Command::Route(cmd) => run_route(cmd).await, Command::Status(args) => run_status(&args.admin_socket).await, + Command::Shutdown(args) => run_shutdown(&args.admin_socket).await, Command::BenchCrypto => bench::run(), Command::ServerInit(args) => run_server_init(args), Command::ProvisionClient(args) => run_provision_client(args), @@ -580,6 +588,17 @@ async fn run_status(admin_socket: &str) -> anyhow::Result<()> { Ok(()) } +/// v3.4.4: dispatch `aura shutdown` over the admin socket. +async fn run_shutdown(admin_socket: &str) -> anyhow::Result<()> { + let resp = admin::request(admin_socket, &Request::Shutdown).await?; + if !resp.ok { + anyhow::bail!("shutdown failed: {}", resp.error.unwrap_or_default()); + } + println!("shutdown signal sent; the running client/server is rolling back its routes and \ + exiting (typically <500 ms)."); + Ok(()) +} + /// Print a generic admin response (ok / error, with optional `removed`). fn print_response(resp: admin::Response) { if resp.ok { diff --git a/crates/aura-cli/src/server.rs b/crates/aura-cli/src/server.rs index 680c509..7c9f908 100644 --- a/crates/aura-cli/src/server.rs +++ b/crates/aura-cli/src/server.rs @@ -280,6 +280,11 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { std::iter::empty(), std::iter::empty(), ); + // v3.4.4: clone the shutdown signal so the accept loop below can break out of accept() when + // an admin `Shutdown` request arrives. Lets operators stop the server gracefully via + // `aura shutdown --admin-socket /run/aura-admin.sock` instead of `systemctl stop aura.service` + // when they want to test on a live host without disturbing the unit file. + let shutdown = Arc::clone(&admin_state.shutdown); let admin_path = admin_socket.to_string(); tokio::spawn(async move { if let Err(e) = admin::serve(&admin_path, admin_state).await { @@ -376,9 +381,18 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { // others on the same listening port. Non-UDP transports (TCP, QUIC) skip rendezvous in // v3.1; only UDP is supported as a hop transport. loop { - let next = { - let mut srv = server.lock().await; - srv.accept().await + let next = tokio::select! { + n = async { + let mut srv = server.lock().await; + srv.accept().await + } => n, + // v3.4.4: graceful shutdown via admin socket. Breaks out of the accept loop without + // waiting for the next connection. router_task.abort() + the NatGuard / mask-rotator + // Drop run on return. + _ = shutdown.notified() => { + tracing::info!("server shutdown requested via admin socket; exiting accept loop"); + break; + } }; let Some(accepted) = next else { break }; let peer_id = accepted.peer_id.clone();