diff --git a/Cargo.lock b/Cargo.lock index 918000e..99a7f0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -198,6 +198,7 @@ dependencies = [ "aura-tunnel", "clap", "ipnetwork", + "nix 0.29.0", "rustls-pki-types", "serde", "serde_json", @@ -1659,6 +1660,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nix" version = "0.31.3" @@ -2799,7 +2812,7 @@ dependencies = [ "ipnet", "libc", "log", - "nix", + "nix 0.31.3", "thiserror 2.0.18", "tokio", "tokio-util", diff --git a/config/client.toml.example b/config/client.toml.example index 57dd8b3..a4505bb 100644 --- a/config/client.toml.example +++ b/config/client.toml.example @@ -9,6 +9,12 @@ server_addr = "203.0.113.10:443" # Outer-TLS SNI (camouflage hostname) presented to the server. Also the name verified # inside the Aura handshake against the server certificate's SAN. sni = "cdn.example.com" +# Optional: drop privileges to this non-root user AFTER the TUN device has been brought up. +# Recommended when `aura client` is launched via sudo so the long-running router loop runs +# as an ordinary user. Linux uses setresuid/setresgid; macOS uses setgid/setuid; Windows is a +# no-op (use a service account instead). When omitted (or already running as non-root) no +# privilege change happens. +# run_as = "nobody" [pki] # Trust anchor (the Aura CA) and this client's leaf cert/key, all PEM. diff --git a/config/server.toml.example b/config/server.toml.example index 104a025..97e92fb 100644 --- a/config/server.toml.example +++ b/config/server.toml.example @@ -8,6 +8,12 @@ name = "aura-edge-1" listen = "0.0.0.0:443" # Accept workers (advisory in v1). workers = 4 +# Optional: drop privileges to this non-root user AFTER the TUN, low-port sockets and any +# [server.nat] commands have been applied. Recommended on production hosts so the long-running +# accept loop does not stay as root. Linux uses setresuid/setresgid (full triple-drop); macOS +# uses setgid/setuid; Windows is a no-op (use a service account instead). When omitted (or +# already running as non-root) no privilege change happens. +# run_as = "nobody" [pki] # Trust anchor (the Aura CA) and this server's leaf cert/key, all PEM. @@ -49,6 +55,20 @@ strategy = "static_or_dynamic" # "phone-1" = "10.7.0.20" # "laptop-1" = "10.7.0.21" +# v2 auto-NAT: when `auto = true`, the server enables IPv4 forwarding at startup and adds a +# MASQUERADE / pf-NAT rule for the address pool on the given egress interface, and rolls every +# change back on shutdown (RAII guard inside `aura server`). Supported on Linux (sysctl + +# iptables) and macOS (sysctl + pfctl). Omit the whole [server.nat] section to keep the v1 +# behaviour where the operator configures forwarding by hand. There is no egress-interface +# auto-detection in v1 — `egress_iface` is required when `auto = true`. +# +# IPv6 forwarding / ip6tables / nftables are NOT configured in v1 (TODO for v3). +# +# [server.nat] +# auto = true +# egress_iface = "eth0" # required when auto = true +# dry_run = false # set to true to only log the planned commands without executing them + [mimicry] # Outer-TLS camouflage hostname the server presents/expects. sni = "cdn.example.com" diff --git a/crates/aura-cli/Cargo.toml b/crates/aura-cli/Cargo.toml index eea8300..464a6fd 100644 --- a/crates/aura-cli/Cargo.toml +++ b/crates/aura-cli/Cargo.toml @@ -32,6 +32,14 @@ tracing-subscriber.workspace = true anyhow.workspace = true uuid.workspace = true +# Unix-only: nix is used by the privilege-drop helper (`privdrop::drop_to_user`) to look up +# the target user via getpwnam and drop the real/effective/saved uid+gid after binding +# privileged sockets / creating the TUN. Linux uses setresuid/setresgid; macOS uses +# setgid/setuid (no setresuid in the BSD ABI). The "user" feature gates the User::from_name +# helper. No nix on Windows (privilege drop is a no-op there; see privdrop.rs). +[target.'cfg(unix)'.dependencies] +nix = { version = "0.29", default-features = false, features = ["user"] } + [dev-dependencies] tokio.workspace = true # Loopback + PKI-roundtrip tests build certificate chains for the verifier. diff --git a/crates/aura-cli/src/admin.rs b/crates/aura-cli/src/admin.rs index 26c9a6a..5078174 100644 --- a/crates/aura-cli/src/admin.rs +++ b/crates/aura-cli/src/admin.rs @@ -1,4 +1,4 @@ -//! Admin IPC: a tiny JSON line protocol over a Unix domain socket. +//! Admin IPC: a tiny JSON line protocol over a Unix domain socket (Unix) or a named pipe (Windows). //! //! A running `aura server` / `aura client` hosts a [`serve`] listener over a shared [`AdminState`] //! (the live `RouteTable`, a rule mirror, and tunnel [`Stats`]). The `aura route ...` and @@ -25,11 +25,16 @@ //! Every admin mutation touches both, so `route_list` can faithfully echo what is configured while //! `classify` still goes through the real table. //! -//! ## Platform note -//! The transport is `tokio::net::UnixListener` / `UnixStream`, available on Unix (the project's -//! Linux + macOS targets). On Windows this would be a named pipe; that path is a documented -//! `cfg`-gated stub ([`serve`] / [`request`] return an explanatory error) so the rest of the CLI -//! still compiles there. +//! ## Cross-platform transport +//! The wire protocol is identical; only the per-platform stream type differs: +//! +//! * **Unix**: `tokio::net::UnixListener` / `UnixStream` over `/tmp/aura-admin.sock`. +//! * **Windows**: `tokio::net::windows::named_pipe::{NamedPipeServer, NamedPipeClient}` over +//! `\\.\pipe\aura-admin`. The standard Tokio pattern is to rebuild a fresh `ServerOptions` +//! instance after every accept so subsequent clients can also connect. +//! +//! See [`transport`] for the platform-specific listen/connect glue. The handler ([`handle_request`]) +//! and the wire types are platform-agnostic. use std::collections::BTreeMap; use std::sync::atomic::{AtomicU64, Ordering}; @@ -38,12 +43,18 @@ use std::sync::{Arc, Mutex as StdMutex}; use aura_tunnel::{RouteAction, RouteTable}; use ipnetwork::IpNetwork; use serde::{Deserialize, Serialize}; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; use tokio::sync::RwLock; use crate::config::parse_action; -/// Default admin socket path used when a config / flag does not override it. +/// Default admin transport endpoint used when a config / flag does not override it. On Unix this +/// is a filesystem path under `/tmp`; on Windows it is a named pipe path under `\\.\pipe\`. +#[cfg(unix)] pub const DEFAULT_SOCKET: &str = "/tmp/aura-admin.sock"; +/// Default admin transport endpoint on Windows: a named pipe in the local pipe namespace. +#[cfg(windows)] +pub const DEFAULT_SOCKET: &str = r"\\.\pipe\aura-admin"; /// Live tunnel statistics shared between the data path and the admin listener. #[derive(Debug, Default)] @@ -368,70 +379,229 @@ async fn rebuild_table(state: &AdminState) { *state.routes.write().await = fresh; } -/// Run the admin listener until the task is cancelled. -/// -/// Removes any stale socket at `path`, binds a [`tokio::net::UnixListener`], and serves connections -/// (one request/response per accepted line) over the shared `state`. -#[cfg(unix)] -pub async fn serve(path: &str, state: AdminState) -> anyhow::Result<()> { - use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; - use tokio::net::UnixListener; +// ---- platform transport --------------------------------------------------------------------- - // Best-effort cleanup of a previous run's socket file. - let _ = std::fs::remove_file(path); - let listener = UnixListener::bind(path) - .map_err(|e| anyhow::anyhow!("binding admin socket {path}: {e}"))?; - tracing::info!(socket = path, "admin IPC listening"); +mod transport { + //! Platform glue for the admin transport. The Unix and Windows variants present the same + //! `listen` / `connect` interface so [`super::serve`] / [`super::request`] can be written + //! once over `AsyncRead + AsyncWrite` streams. + #[cfg(unix)] + pub use self::unix::{accept, connect, listen}; + #[cfg(windows)] + pub use self::windows::{accept, connect, listen}; - loop { - let (stream, _addr) = match listener.accept().await { - Ok(pair) => pair, - Err(e) => { - tracing::warn!(error = %e, "admin accept failed"); - continue; - } + #[cfg(unix)] + mod unix { + use std::io; + use tokio::net::{UnixListener, UnixStream}; + + /// Bind a Unix domain socket at `path`, removing any stale socket file first. + pub fn listen(path: &str) -> io::Result { + let _ = std::fs::remove_file(path); + UnixListener::bind(path) + } + + /// Accept the next admin client. Returns the stream half on success. + pub async fn accept(listener: &UnixListener) -> io::Result { + let (stream, _addr) = listener.accept().await?; + Ok(stream) + } + + /// Connect to a Unix domain socket at `path`. + pub async fn connect(path: &str) -> io::Result { + UnixStream::connect(path).await + } + } + + #[cfg(windows)] + mod windows { + //! Windows transport: named pipe in the local namespace (`\\.\pipe\`). + //! + //! Tokio's `NamedPipeServer` represents one already-bound endpoint. The standard accept + //! pattern is: + //! + //! 1. Build one endpoint with `ServerOptions::new().first_pipe_instance(true).create(path)`. + //! 2. `connect().await` to wait for a client to open the pipe. + //! 3. *Before* serving the request, build a fresh endpoint via the same options so the + //! next client has somewhere to connect — otherwise the namespace entry disappears + //! once we hand the current instance off to the request handler. + //! + //! We model that as a [`Listener`] wrapper that owns the latest "pending" instance plus + //! the `ServerOptions` template. + use std::io; + use tokio::net::windows::named_pipe::{ + ClientOptions, NamedPipeClient, NamedPipeServer, ServerOptions, }; - let state = state.clone(); - tokio::spawn(async move { - let (read_half, mut write_half) = stream.into_split(); - let mut lines = BufReader::new(read_half).lines(); - while let Ok(Some(line)) = lines.next_line().await { - if line.trim().is_empty() { - continue; - } - let resp = match serde_json::from_str::(&line) { - Ok(req) => handle_request(&state, req).await, - Err(e) => Response::err(format!("bad request: {e}")), - }; - let mut buf = serde_json::to_vec(&resp) - .unwrap_or_else(|_| b"{\"ok\":false,\"error\":\"serialize failed\"}".to_vec()); - buf.push(b'\n'); - if write_half.write_all(&buf).await.is_err() { - break; + use tokio::time::{sleep, Duration}; + + /// Named-pipe listener. Owns the next-to-be-connected instance. + pub struct Listener { + path: String, + pending: NamedPipeServer, + } + + /// Create the initial pipe instance and wrap it in a [`Listener`]. + pub fn listen(path: &str) -> io::Result { + let pending = ServerOptions::new() + .first_pipe_instance(true) + .create(path)?; + Ok(Listener { + path: path.to_string(), + pending, + }) + } + + /// Wait for a client, then rebuild the pending instance so subsequent clients can also + /// connect; return the now-connected server endpoint. + pub async fn accept(listener: &mut Listener) -> io::Result { + listener.pending.connect().await?; + // Rotate: keep the connected instance to return, replace `pending` with a fresh one. + let next = ServerOptions::new().create(&listener.path)?; + let connected = std::mem::replace(&mut listener.pending, next); + Ok(connected) + } + + /// Connect to a named pipe at `path`. Retries briefly on `ERROR_PIPE_BUSY` (the kernel + /// returns this when every server instance is busy answering another client; a short + /// pause + retry is the documented idiom). + pub async fn connect(path: &str) -> io::Result { + // ERROR_PIPE_BUSY = 231. + const PIPE_BUSY: i32 = 231; + for _ in 0..50 { + match ClientOptions::new().open(path) { + Ok(c) => return Ok(c), + Err(e) if e.raw_os_error() == Some(PIPE_BUSY) => { + sleep(Duration::from_millis(20)).await; + } + Err(e) => return Err(e), } } - }); + // One last attempt; if it still fails surface the underlying error. + ClientOptions::new().open(path) + } } } -/// Windows stub: the admin socket uses Unix domain sockets; a named-pipe transport is future work. -#[cfg(not(unix))] -pub async fn serve(_path: &str, _state: AdminState) -> anyhow::Result<()> { - anyhow::bail!("admin IPC over Unix sockets is unavailable on this platform (Windows named-pipe transport is not yet implemented)") +/// Run the admin listener until the task is cancelled. +/// +/// Binds the platform listener at `path` and serves one request/response per accepted line over +/// the shared `state`. On Unix this is a Unix domain socket; on Windows this is a named pipe. +pub async fn serve(path: &str, state: AdminState) -> anyhow::Result<()> { + #[cfg(unix)] + { + let listener = transport::listen(path) + .map_err(|e| anyhow::anyhow!("binding admin socket {path}: {e}"))?; + tracing::info!(socket = path, "admin IPC listening"); + + loop { + let stream = match transport::accept(&listener).await { + Ok(s) => s, + Err(e) => { + tracing::warn!(error = %e, "admin accept failed"); + continue; + } + }; + let state_clone = state.clone(); + tokio::spawn(async move { + let (read_half, write_half) = stream.into_split(); + serve_connection(read_half, write_half, state_clone).await; + }); + } + } + #[cfg(windows)] + { + let mut listener = transport::listen(path) + .map_err(|e| anyhow::anyhow!("binding admin pipe {path}: {e}"))?; + tracing::info!(pipe = path, "admin IPC listening"); + + loop { + let stream = match transport::accept(&mut listener).await { + Ok(s) => s, + Err(e) => { + tracing::warn!(error = %e, "admin pipe accept failed"); + continue; + } + }; + let state_clone = state.clone(); + // The Tokio NamedPipeServer implements AsyncRead + AsyncWrite directly; we cannot + // `into_split` it the way we do with UnixStream, so wrap it in tokio::io::split. + tokio::spawn(async move { + let (read_half, write_half) = tokio::io::split(stream); + serve_connection(read_half, write_half, state_clone).await; + }); + } + } + #[cfg(not(any(unix, windows)))] + { + let _ = (path, state); + anyhow::bail!("admin IPC is not supported on this platform (need unix sockets or windows named pipes)") + } } -/// Connect to the admin socket, send one [`Request`], and return the [`Response`]. -#[cfg(unix)] -pub async fn request(path: &str, req: &Request) -> anyhow::Result { - use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; - use tokio::net::UnixStream; +/// Common per-connection loop: read one JSON-line request, write one JSON-line response, repeat +/// until the client disconnects. +async fn serve_connection(read_half: R, mut write_half: W, state: AdminState) +where + R: tokio::io::AsyncRead + Unpin, + W: tokio::io::AsyncWrite + Unpin, +{ + let mut lines = BufReader::new(read_half).lines(); + while let Ok(Some(line)) = lines.next_line().await { + if line.trim().is_empty() { + continue; + } + let resp = match serde_json::from_str::(&line) { + Ok(req) => handle_request(&state, req).await, + Err(e) => Response::err(format!("bad request: {e}")), + }; + let mut buf = serde_json::to_vec(&resp) + .unwrap_or_else(|_| b"{\"ok\":false,\"error\":\"serialize failed\"}".to_vec()); + buf.push(b'\n'); + if write_half.write_all(&buf).await.is_err() { + break; + } + } +} - let stream = UnixStream::connect(path).await.map_err(|e| { - anyhow::anyhow!( - "connecting to admin socket {path}: {e} (is `aura server`/`aura client` running?)" - ) - })?; - let (read_half, mut write_half) = stream.into_split(); +/// Connect to the admin transport, send one [`Request`], and return the [`Response`]. +pub async fn request(path: &str, req: &Request) -> anyhow::Result { + #[cfg(unix)] + { + let stream = transport::connect(path).await.map_err(|e| { + anyhow::anyhow!( + "connecting to admin socket {path}: {e} (is `aura server`/`aura client` running?)" + ) + })?; + let (read_half, write_half) = stream.into_split(); + return request_over(read_half, write_half, req).await; + } + #[cfg(windows)] + { + let stream = transport::connect(path).await.map_err(|e| { + anyhow::anyhow!( + "connecting to admin pipe {path}: {e} (is `aura server`/`aura client` running?)" + ) + })?; + let (read_half, write_half) = tokio::io::split(stream); + return request_over(read_half, write_half, req).await; + } + #[cfg(not(any(unix, windows)))] + { + let _ = (path, req); + anyhow::bail!("admin IPC is not supported on this platform") + } +} + +/// Generic request/response over any split stream. +async fn request_over( + read_half: R, + mut write_half: W, + req: &Request, +) -> anyhow::Result +where + R: tokio::io::AsyncRead + Unpin, + W: tokio::io::AsyncWrite + Unpin, +{ let mut buf = serde_json::to_vec(req)?; buf.push(b'\n'); write_half.write_all(&buf).await?; @@ -445,12 +615,6 @@ pub async fn request(path: &str, req: &Request) -> anyhow::Result { Ok(serde_json::from_str(&line)?) } -/// Windows stub mirroring [`serve`]. -#[cfg(not(unix))] -pub async fn request(_path: &str, _req: &Request) -> anyhow::Result { - anyhow::bail!("admin IPC over Unix sockets is unavailable on this platform (Windows named-pipe transport is not yet implemented)") -} - #[cfg(test)] mod tests { use super::*; @@ -552,4 +716,14 @@ mod tests { assert_eq!(resp.tx_packets, Some(5)); assert_eq!(resp.peer_id.as_deref(), Some("client-9")); } + + /// The platform-default endpoint is set correctly for each target. (Inspection-only on + /// non-host platforms; the cfg picks the right constant at compile time.) + #[test] + fn default_socket_const_is_platform_appropriate() { + #[cfg(unix)] + assert_eq!(DEFAULT_SOCKET, "/tmp/aura-admin.sock"); + #[cfg(windows)] + assert_eq!(DEFAULT_SOCKET, r"\\.\pipe\aura-admin"); + } } diff --git a/crates/aura-cli/src/client.rs b/crates/aura-cli/src/client.rs index b250d61..7b186c7 100644 --- a/crates/aura-cli/src/client.rs +++ b/crates/aura-cli/src/client.rs @@ -30,6 +30,7 @@ use tokio::sync::RwLock; use crate::admin::{self, AdminState, Stats}; use crate::config::ClientConfigFile; use crate::masks::MaskRotator; +use crate::privdrop; /// Entry point for `aura client --config ` (and optional `--admin-socket`). pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { @@ -152,6 +153,13 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { .context("creating TUN device (needs root)")?; tracing::info!(tun = %cfg.tunnel.tun_name, "TUN device up; routing traffic"); + // Privilege drop. The only operation requiring root on the client is the TUN open above + // (the dial used unprivileged outbound sockets); switch to the configured non-root user + // before entering the long-lived router loop. + if let Some(user) = cfg.client.run_as.as_deref() { + privdrop::drop_to_user(user).context("dropping client privileges per [client] run_as")?; + } + let router = AuraRouter::new(tun, routes, conn); router.run().await.context("router run loop")?; Ok(()) diff --git a/crates/aura-cli/src/config.rs b/crates/aura-cli/src/config.rs index 83a483d..206b2c2 100644 --- a/crates/aura-cli/src/config.rs +++ b/crates/aura-cli/src/config.rs @@ -104,6 +104,35 @@ pub struct ServerSection { /// fallback that interprets `[tunnel] pool_cidr` as a [`PoolStrategy::DynamicOnly`] pool. #[serde(default)] pub pool: ServerPoolSection, + /// `[server.nat]` sub-section: v2 auto-NAT (IP forward + MASQUERADE) applied at startup and + /// rolled back at shutdown. Omitting it (the default) leaves the host network untouched — + /// this is the v1 behaviour where the operator manually pre-configures forwarding. + #[serde(default)] + pub nat: Option, + /// Optional non-root user to drop privileges to **after** all startup work that needs root + /// (TUN open, low-port bind, NAT configuration). When omitted (or already non-root) the + /// server keeps its current credentials. + #[serde(default)] + pub run_as: Option, +} + +/// `[server.nat]` section: v2 auto-NAT configuration. See [`crate::nat`] for the apply / rollback +/// semantics. Optional — when the section is omitted the server makes no changes to the host's +/// IP forwarding state, matching v1 behaviour. +#[derive(Debug, Clone, Default, Deserialize)] +#[serde(default)] +pub struct ServerNatSection { + /// Master switch. When `false` (or the section is omitted) the server does NOT touch the + /// host network — the operator is expected to have configured forwarding by hand. When + /// `true` the server applies the platform-appropriate set of commands at startup and + /// rolls them back on shutdown. + pub auto: bool, + /// Name of the host interface traffic egresses through (e.g. `"eth0"` on Linux, `"en0"` on + /// macOS). REQUIRED when `auto = true` — there is no auto-detection in v1 (that is v3). + pub egress_iface: String, + /// When `true`, every command is only logged (`would run: ...`) and not executed. Useful + /// for verifying the plan without root privileges and for the unit tests. + pub dry_run: bool, } /// `[tunnel]` section of `server.toml`. @@ -158,6 +187,10 @@ pub struct ClientSection { pub server_addr: String, /// Outer-TLS SNI (camouflage hostname) presented to the server. pub sni: String, + /// Optional non-root user to drop privileges to **after** the TUN is open. When omitted + /// (or already non-root) the client keeps its current credentials. See [`crate::privdrop`]. + #[serde(default)] + pub run_as: Option, } /// `[tunnel]` section of `client.toml`. @@ -1177,6 +1210,83 @@ quic_port = 443 assert!(eps.quic.is_none()); } + /// `[server.nat]` parses end-to-end (auto + egress_iface + dry_run) and exposes the values + /// to the server startup path. + #[test] + fn parses_server_nat_section() { + let s = r#" +[server] +name = "edge" +[server.nat] +auto = true +egress_iface = "eth0" +dry_run = true +[pki] +ca_cert = "a" +cert = "b" +key = "c" +[tunnel] +pool_cidr = "10.7.0.0/24" +"#; + let cfg = ServerConfigFile::parse(s).expect("parse server.toml with [server.nat]"); + let nat = cfg.server.nat.as_ref().expect("nat section present"); + assert!(nat.auto, "auto = true"); + assert_eq!(nat.egress_iface, "eth0"); + assert!(nat.dry_run, "dry_run = true"); + } + + /// Backwards compat: an old server.toml without `[server.nat]` parses fine and exposes + /// `nat = None`. This preserves the v1 "operator configures NAT by hand" behaviour. + #[test] + fn server_nat_section_optional() { + let s = r#" +[server] +name = "edge" +[pki] +ca_cert = "a" +cert = "b" +key = "c" +[tunnel] +pool_cidr = "10.7.0.0/24" +"#; + let cfg = ServerConfigFile::parse(s).expect("parse minimal v1 server.toml"); + assert!(cfg.server.nat.is_none(), "nat section absent by default"); + } + + /// `run_as` is parsed off both [server] and [client] sections and is optional. + #[test] + fn parses_run_as_on_both_configs() { + let s = r#" +[server] +name = "edge" +run_as = "nobody" +[pki] +ca_cert = "a" +cert = "b" +key = "c" +[tunnel] +pool_cidr = "10.7.0.0/24" +"#; + let cfg = ServerConfigFile::parse(s).expect("parse server.toml with run_as"); + assert_eq!(cfg.server.run_as.as_deref(), Some("nobody")); + + let c = r#" +[client] +name = "x" +server_addr = "1.2.3.4:443" +sni = "a" +run_as = "nobody" +[pki] +ca_cert = "a" +cert = "b" +key = "c" +[tunnel] +local_ip = "10.7.0.2" +"#; + let cfg = ClientConfigFile::parse(c).expect("parse client.toml with run_as"); + assert_eq!(cfg.client.run_as.as_deref(), Some("nobody")); + } + /// An unknown transport name in `order` is a hard error (not silently dropped). #[test] fn rejects_unknown_transport_name() { diff --git a/crates/aura-cli/src/lib.rs b/crates/aura-cli/src/lib.rs index e547b64..da2c5ee 100644 --- a/crates/aura-cli/src/lib.rs +++ b/crates/aura-cli/src/lib.rs @@ -17,7 +17,9 @@ pub mod bench; pub mod client; pub mod config; pub mod masks; +pub mod nat; pub mod pki; pub mod pool; +pub mod privdrop; pub mod server; pub mod server_router; diff --git a/crates/aura-cli/src/nat.rs b/crates/aura-cli/src/nat.rs new file mode 100644 index 0000000..f06fd48 --- /dev/null +++ b/crates/aura-cli/src/nat.rs @@ -0,0 +1,451 @@ +//! Auto-NAT: enable IP forwarding + masquerade for the VPN address pool on server start, and +//! roll the changes back when the server stops. +//! +//! This removes the v1 "manual step" of asking the operator to set `net.ipv4.ip_forward=1` and +//! add an `iptables`/`pfctl` MASQUERADE/NAT rule by hand. The [`NatGuard`] is RAII: it captures +//! the previous state on `enable()` and undoes its own changes in `Drop` (in reverse order). The +//! intended use is to bind the guard to the lifetime of `aura server`'s `run()`. +//! +//! ## Supported platforms (v1) +//! * **Linux**: `sysctl -w net.ipv4.ip_forward=1` + `iptables -t nat -A POSTROUTING -s -o -j MASQUERADE`. +//! * **macOS**: `sysctl -w net.inet.ip.forwarding=1` + a pf rule file at `/tmp/aura-nat.conf` +//! loaded with `pfctl -f -e`. +//! * **Other**: returns an error from [`NatGuard::enable`]; the server should treat auto-NAT as +//! unsupported on this platform. +//! +//! ## What is NOT done (v3+) +//! * IPv6 forwarding / `ip6tables` / `nftables` are not configured. (v1 = v4 only — TODO for v3.) +//! * Auto-detection of the egress interface — the operator must set `[server.nat] egress_iface`. +//! +//! ## dry_run +//! When `dry_run = true`, the guard logs every command it *would* run (with the same arguments) +//! but does not execute anything. This is what the cross-platform unit test exercises: on macOS +//! CI we cannot run real `iptables`, and on Linux CI we generally cannot bind privileged +//! interfaces either, so a dry-run smoke test is the most we can do without root. + +use std::process::Command; + +use anyhow::{anyhow, Result}; + +/// Plan / log / undo a single shell command issued by [`NatGuard`]. +/// +/// `args` does NOT include the program name (`prog`). `description` is a short human label used +/// in tracing logs (e.g. "enable ip forward", "add MASQUERADE rule"). +struct PlannedCommand { + prog: &'static str, + args: Vec, + /// Short human label used in tracing logs. Read indirectly via [`Self::render`] context in + /// log lines (allowed to be unused on platforms whose log macros don't reach for it). + #[allow(dead_code)] + description: &'static str, +} + +impl PlannedCommand { + fn new(prog: &'static str, args: Vec, description: &'static str) -> Self { + Self { + prog, + args, + description, + } + } + + /// Render the command as a single shell-ish string, for logs only (no execution). + fn render(&self) -> String { + let mut s = String::from(self.prog); + for a in &self.args { + s.push(' '); + s.push_str(a); + } + s + } + + /// Run the command synchronously; on a non-zero exit, return an error including stderr. + fn run(&self) -> Result<()> { + let out = Command::new(self.prog) + .args(self.args.iter().map(String::as_str)) + .output() + .map_err(|e| anyhow!("spawning `{}`: {e}", self.prog))?; + if !out.status.success() { + let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string(); + return Err(anyhow!( + "`{}` exited with {}: {stderr}", + self.render(), + out.status + )); + } + Ok(()) + } +} + +/// RAII handle that holds the NAT configuration on a server. Drop reverts every action. +pub struct NatGuard { + /// Stack of "undo" commands, applied in REVERSE order at drop time. + rollback: Vec, + /// If true, neither [`enable`](Self::enable) nor [`Drop`] runs commands — they only log. + dry_run: bool, + /// pf rule file we created on macOS (so Drop can also delete it). None on other platforms or + /// when dry-running. + pf_rule_file: Option, +} + +impl NatGuard { + /// Enable IP forwarding and a MASQUERADE rule for `pool_cidr` egressing on `egress_iface`. + /// + /// Returns Ok with a populated guard on success; the caller binds it to a longer-lived scope + /// (typically the server's `run()` future) so the rollback fires on shutdown. + /// + /// In `dry_run` mode every command is rendered into a tracing line as `would run: ` but + /// nothing is executed. dry_run works on every platform (incl. unsupported ones), which is + /// what the unit tests rely on. + pub fn enable(pool_cidr: &str, egress_iface: &str, dry_run: bool) -> Result { + if dry_run { + // dry_run takes a fast path that does not require an OS-specific implementation: we + // just log what each platform WOULD run. This is what the unit test exercises on + // macOS/Linux/CI alike. + return Self::enable_dry_run(pool_cidr, egress_iface); + } + Self::enable_real(pool_cidr, egress_iface) + } + + /// Real (non-dry-run) enable: dispatched per target_os. Kept as a separate helper so the + /// public [`enable`](Self::enable) does not need overlapping cfg branches that confuse + /// `clippy::needless_return`. + fn enable_real(pool_cidr: &str, egress_iface: &str) -> Result { + #[cfg(target_os = "linux")] + { + Self::enable_linux(pool_cidr, egress_iface) + } + #[cfg(target_os = "macos")] + { + Self::enable_macos(pool_cidr, egress_iface) + } + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + let _ = (pool_cidr, egress_iface); + Err(anyhow!("auto-NAT supported on linux/macos only in v1")) + } + } + + fn enable_dry_run(pool_cidr: &str, egress_iface: &str) -> Result { + // Emit both the Linux and macOS plans so the operator sees what would happen on either + // host. (We pick the host's set when actually running below.) + for cmd in linux_apply_plan(pool_cidr, egress_iface) { + tracing::info!(target: "aura::nat", "would run (linux): {}", cmd.render()); + } + for cmd in macos_apply_plan(pool_cidr, egress_iface, &dry_run_pf_path()) { + tracing::info!(target: "aura::nat", "would run (macos): {}", cmd.render()); + } + Ok(Self { + rollback: Vec::new(), + dry_run: true, + pf_rule_file: None, + }) + } + + #[cfg(target_os = "linux")] + fn enable_linux(pool_cidr: &str, egress_iface: &str) -> Result { + // Snapshot the current ip_forward sysctl so we can restore it on Drop. + let prev = read_linux_ip_forward()?; + tracing::info!(target: "aura::nat", prev_ip_forward = prev, "snapshotted net.ipv4.ip_forward"); + + let plan = linux_apply_plan(pool_cidr, egress_iface); + let mut rollback: Vec = Vec::with_capacity(plan.len()); + for cmd in plan { + tracing::info!(target: "aura::nat", "running: {}", cmd.render()); + if let Err(e) = cmd.run() { + tracing::warn!(target: "aura::nat", error = %e, "NAT step failed; rolling back"); + // Undo what we already did before bubbling up. + let mut g = Self { + rollback, + dry_run: false, + pf_rule_file: None, + }; + g.rollback_now(); + return Err(e); + } + // Pair each apply step with its undo, in case we have to roll back mid-stream. + rollback.push(linux_undo_for(&cmd, pool_cidr, egress_iface, prev)); + } + tracing::info!( + target: "aura::nat", + pool = pool_cidr, + iface = egress_iface, + "auto-NAT applied (linux)" + ); + Ok(Self { + rollback, + dry_run: false, + pf_rule_file: None, + }) + } + + #[cfg(target_os = "macos")] + fn enable_macos(pool_cidr: &str, egress_iface: &str) -> Result { + let prev = read_macos_ip_forwarding()?; + tracing::info!(target: "aura::nat", prev_ip_forwarding = prev, "snapshotted net.inet.ip.forwarding"); + + let pf_path = std::path::PathBuf::from("/tmp/aura-nat.conf"); + let pf_rule = format!( + "nat on {iface} from {pool} to any -> ({iface})\n", + iface = egress_iface, + pool = pool_cidr + ); + std::fs::write(&pf_path, &pf_rule) + .map_err(|e| anyhow!("writing pf rule file {}: {e}", pf_path.display()))?; + tracing::info!(target: "aura::nat", pf_file = %pf_path.display(), "wrote pf rule file"); + + let plan = macos_apply_plan(pool_cidr, egress_iface, &pf_path); + let mut rollback: Vec = Vec::with_capacity(plan.len()); + for cmd in plan { + tracing::info!(target: "aura::nat", "running: {}", cmd.render()); + if let Err(e) = cmd.run() { + tracing::warn!(target: "aura::nat", error = %e, "NAT step failed; rolling back"); + let mut g = Self { + rollback, + dry_run: false, + pf_rule_file: Some(pf_path.clone()), + }; + g.rollback_now(); + return Err(e); + } + rollback.push(macos_undo_for(&cmd, prev)); + } + tracing::info!( + target: "aura::nat", + pool = pool_cidr, + iface = egress_iface, + "auto-NAT applied (macos)" + ); + Ok(Self { + rollback, + dry_run: false, + pf_rule_file: Some(pf_path), + }) + } + + /// Execute the rollback stack now, in REVERSE order, logging (not bubbling) any failures. + fn rollback_now(&mut self) { + if self.dry_run { + for cmd in self.rollback.drain(..).rev() { + tracing::info!(target: "aura::nat", "would undo: {}", cmd.render()); + } + return; + } + for cmd in self.rollback.drain(..).rev() { + tracing::info!(target: "aura::nat", "undo: {}", cmd.render()); + if let Err(e) = cmd.run() { + tracing::warn!(target: "aura::nat", error = %e, "NAT rollback step failed"); + } + } + if let Some(path) = self.pf_rule_file.take() { + if let Err(e) = std::fs::remove_file(&path) { + tracing::warn!(target: "aura::nat", error = %e, file = %path.display(), "could not remove pf rule file"); + } + } + } +} + +impl Drop for NatGuard { + fn drop(&mut self) { + self.rollback_now(); + } +} + +// ---- Linux helpers -------------------------------------------------------------------------- + +/// The apply plan on Linux: sysctl, then iptables MASQUERADE. +fn linux_apply_plan(pool_cidr: &str, egress_iface: &str) -> Vec { + vec![ + PlannedCommand::new( + "sysctl", + vec!["-w".into(), "net.ipv4.ip_forward=1".into()], + "enable ipv4 forwarding", + ), + PlannedCommand::new( + "iptables", + vec![ + "-t".into(), + "nat".into(), + "-A".into(), + "POSTROUTING".into(), + "-s".into(), + pool_cidr.to_string(), + "-o".into(), + egress_iface.to_string(), + "-j".into(), + "MASQUERADE".into(), + ], + "add MASQUERADE rule", + ), + // TODO(v3): IPv6 forwarding / ip6tables / nftables — not configured in v1. + ] +} + +/// Build the undo command for a Linux apply step. `prev` is the original `ip_forward` value. +#[cfg(target_os = "linux")] +fn linux_undo_for( + applied: &PlannedCommand, + pool_cidr: &str, + egress_iface: &str, + prev: u8, +) -> PlannedCommand { + match applied.prog { + "sysctl" => PlannedCommand::new( + "sysctl", + vec!["-w".into(), format!("net.ipv4.ip_forward={prev}")], + "restore ipv4 forwarding sysctl", + ), + "iptables" => PlannedCommand::new( + "iptables", + vec![ + "-t".into(), + "nat".into(), + "-D".into(), + "POSTROUTING".into(), + "-s".into(), + pool_cidr.to_string(), + "-o".into(), + egress_iface.to_string(), + "-j".into(), + "MASQUERADE".into(), + ], + "remove MASQUERADE rule", + ), + other => unreachable!("unknown linux apply program: {other}"), + } +} + +#[cfg(target_os = "linux")] +fn read_linux_ip_forward() -> Result { + let raw = std::fs::read_to_string("/proc/sys/net/ipv4/ip_forward") + .map_err(|e| anyhow!("reading /proc/sys/net/ipv4/ip_forward: {e}"))?; + raw.trim() + .parse::() + .map_err(|e| anyhow!("parsing ip_forward value '{}': {e}", raw.trim())) +} + +// ---- macOS helpers -------------------------------------------------------------------------- + +/// The apply plan on macOS: sysctl forwarding, then `pfctl -f -e`. +fn macos_apply_plan( + _pool_cidr: &str, + _egress_iface: &str, + pf_path: &std::path::Path, +) -> Vec { + vec![ + PlannedCommand::new( + "sysctl", + vec!["-w".into(), "net.inet.ip.forwarding=1".into()], + "enable ipv4 forwarding", + ), + PlannedCommand::new( + "pfctl", + vec![ + "-f".into(), + pf_path.to_string_lossy().into_owned(), + "-e".into(), + ], + "load pf NAT rules and enable pf", + ), + ] +} + +/// Path the dry-run plan uses for the pf rule file (the file is not actually written). +fn dry_run_pf_path() -> std::path::PathBuf { + std::path::PathBuf::from("/tmp/aura-nat.conf") +} + +#[cfg(target_os = "macos")] +fn macos_undo_for(applied: &PlannedCommand, prev: u8) -> PlannedCommand { + match applied.prog { + "sysctl" => PlannedCommand::new( + "sysctl", + vec!["-w".into(), format!("net.inet.ip.forwarding={prev}")], + "restore ipv4 forwarding sysctl", + ), + "pfctl" => PlannedCommand::new( + "pfctl", + vec!["-F".into(), "nat".into()], + "flush pf NAT rules", + ), + other => unreachable!("unknown macos apply program: {other}"), + } +} + +#[cfg(target_os = "macos")] +fn read_macos_ip_forwarding() -> Result { + let out = Command::new("sysctl") + .args(["-n", "net.inet.ip.forwarding"]) + .output() + .map_err(|e| anyhow!("spawning sysctl: {e}"))?; + if !out.status.success() { + return Err(anyhow!( + "sysctl -n net.inet.ip.forwarding exited with {}: {}", + out.status, + String::from_utf8_lossy(&out.stderr).trim() + )); + } + let s = String::from_utf8_lossy(&out.stdout).trim().to_string(); + s.parse::() + .map_err(|e| anyhow!("parsing forwarding sysctl '{s}': {e}")) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// dry_run on any platform succeeds, builds no rollback commands, and logs `would run: ...` + /// for both the linux and macos plans (we don't assert log content, just successful return). + #[test] + fn dry_run_succeeds_on_any_platform() { + let guard = NatGuard::enable("10.7.0.0/24", "eth0", true) + .expect("dry_run NatGuard::enable must succeed everywhere"); + assert!(guard.dry_run, "dry_run flag preserved"); + assert!( + guard.rollback.is_empty(), + "dry_run records no real rollback commands" + ); + // Dropping the guard logs the "would undo" line; the test just ensures no panic. + drop(guard); + } + + /// The Linux plan contains exactly the two commands we expect, in order, with the right args. + #[test] + fn linux_plan_shape() { + let plan = linux_apply_plan("10.7.0.0/24", "eth0"); + assert_eq!(plan.len(), 2); + assert_eq!(plan[0].prog, "sysctl"); + assert_eq!(plan[0].args, vec!["-w", "net.ipv4.ip_forward=1"]); + assert_eq!(plan[1].prog, "iptables"); + assert!(plan[1].args.contains(&"MASQUERADE".to_string())); + assert!(plan[1].args.contains(&"10.7.0.0/24".to_string())); + assert!(plan[1].args.contains(&"eth0".to_string())); + // POSTROUTING add, not delete. + assert!(plan[1].args.contains(&"-A".to_string())); + assert!(!plan[1].args.contains(&"-D".to_string())); + } + + /// The macOS plan writes a pf rule file and loads it via `pfctl -f -e`. + #[test] + fn macos_plan_shape() { + let pf = std::path::PathBuf::from("/tmp/aura-nat.conf"); + let plan = macos_apply_plan("10.7.0.0/24", "en0", &pf); + assert_eq!(plan.len(), 2); + assert_eq!(plan[0].prog, "sysctl"); + assert_eq!(plan[0].args, vec!["-w", "net.inet.ip.forwarding=1"]); + assert_eq!(plan[1].prog, "pfctl"); + assert!(plan[1].args.contains(&"/tmp/aura-nat.conf".to_string())); + assert!(plan[1].args.contains(&"-e".to_string())); + } + + /// PlannedCommand::render returns a single shell-style string with prog + args. + #[test] + fn planned_command_render() { + let cmd = PlannedCommand::new( + "iptables", + vec!["-t".into(), "nat".into(), "-A".into(), "POSTROUTING".into()], + "x", + ); + assert_eq!(cmd.render(), "iptables -t nat -A POSTROUTING"); + } +} diff --git a/crates/aura-cli/src/privdrop.rs b/crates/aura-cli/src/privdrop.rs new file mode 100644 index 0000000..5be5a05 --- /dev/null +++ b/crates/aura-cli/src/privdrop.rs @@ -0,0 +1,135 @@ +//! Privilege drop: switch the process's effective + real + saved UID/GID to a non-root user after +//! all privileged startup work is done (binding the TUN, binding low ports, configuring NAT). +//! +//! v1 spec from project notes: +//! +//! * Linux uses `setresgid(g,g,g)` + `setresuid(u,u,u)` (the full BSD-incompatible API; this also +//! wipes the saved set-uid so the process can never `setuid(0)` back). +//! * macOS does not expose `setresuid` in its BSD ABI — `nix` 0.29 provides `setgid` / `setuid` +//! instead, which on macOS perform a permanent drop when the calling process is root. +//! * Windows is a no-op (named pipes + service accounts cover the analogous use case there). +//! +//! The drop is **best-effort**: +//! +//! * If the current euid is not 0 (e.g. dev running `cargo test` as themselves), [`drop_to_user`] +//! logs an info line and returns `Ok` without changing anything. +//! * If the named user does not exist or the syscalls fail, the error bubbles up so `aura server` +//! exits rather than silently continuing as root. +//! +//! Callers **must** invoke [`drop_to_user`] after every privileged operation completes; doing it +//! earlier means the TUN open or NAT command may fail with EPERM. + +use anyhow::Result; +#[cfg(unix)] +use anyhow::{anyhow, Context}; + +/// Drop privileges to `username`. See module docs for platform behaviour. +/// +/// Returns `Ok(())` on: +/// * a successful drop (Linux/macOS, called as root), +/// * a no-op on Windows, +/// * a no-op when already running as a non-root user (Linux/macOS). +/// +/// Returns `Err` only on Linux/macOS, when the user lookup or the syscalls themselves fail. +#[cfg(unix)] +pub fn drop_to_user(username: &str) -> Result<()> { + use nix::unistd::{getuid, User}; + + if !getuid().is_root() { + tracing::info!( + target: "aura::privdrop", + user = username, + "privilege drop skipped: already running as a non-root user" + ); + return Ok(()); + } + + let user = User::from_name(username) + .with_context(|| format!("looking up user '{username}'"))? + .ok_or_else(|| anyhow!("user '{username}' not found in passwd database"))?; + let uid = user.uid; + let gid = user.gid; + + // Order matters: drop GID first while we still have root, then UID. Doing UID first would + // leave us as a non-root user that cannot setgid anymore. + drop_uid_gid(uid, gid)?; + + tracing::info!( + target: "aura::privdrop", + user = username, + uid = uid.as_raw(), + gid = gid.as_raw(), + "dropped privileges" + ); + Ok(()) +} + +/// Windows stub: there is no analogous "drop to user" syscall sequence on Windows in v1. The +/// server is expected to be run as a configured service account. +#[cfg(windows)] +pub fn drop_to_user(username: &str) -> Result<()> { + tracing::warn!( + target: "aura::privdrop", + user = username, + "privilege drop not implemented on Windows; run aura server as a low-privilege service account instead" + ); + Ok(()) +} + +#[cfg(target_os = "linux")] +fn drop_uid_gid(uid: nix::unistd::Uid, gid: nix::unistd::Gid) -> Result<()> { + use nix::unistd::{setresgid, setresuid}; + // Full triple-drop on Linux: real + effective + saved. This guarantees the process cannot + // regain root via setuid(0). + setresgid(gid, gid, gid).with_context(|| format!("setresgid({})", gid.as_raw()))?; + setresuid(uid, uid, uid).with_context(|| format!("setresuid({})", uid.as_raw()))?; + Ok(()) +} + +#[cfg(target_os = "macos")] +fn drop_uid_gid(uid: nix::unistd::Uid, gid: nix::unistd::Gid) -> Result<()> { + use nix::unistd::{setgid, setuid}; + // macOS does not expose setresuid in its BSD ABI. setgid/setuid perform a permanent drop + // when invoked as root: the kernel zeroes the saved set-uid alongside the real and effective + // ids, so this is just as strong as setresuid here. + setgid(gid).with_context(|| format!("setgid({})", gid.as_raw()))?; + setuid(uid).with_context(|| format!("setuid({})", uid.as_raw()))?; + Ok(()) +} + +// Other unix targets (BSDs, etc.) — fall back to setgid/setuid which exist everywhere POSIX. +#[cfg(all(unix, not(any(target_os = "linux", target_os = "macos"))))] +fn drop_uid_gid(uid: nix::unistd::Uid, gid: nix::unistd::Gid) -> Result<()> { + use nix::unistd::{setgid, setuid}; + setgid(gid).with_context(|| format!("setgid({})", gid.as_raw()))?; + setuid(uid).with_context(|| format!("setuid({})", uid.as_raw()))?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// On a developer/CI host this test runs as a non-root user, so drop_to_user must be a + /// no-op (no panic, no error, no actual privilege change). Verifies the early-return path. + #[test] + #[cfg(unix)] + fn no_op_when_already_non_root() { + // Use "nobody" as the requested user — it exists on every Unix CI image but we should + // never actually call setuid on it because we are not root. + let res = drop_to_user("nobody"); + // If running as root (unlikely in CI), the test still completes successfully — the call + // would actually drop us. Either way, the function returns Ok. + assert!( + res.is_ok(), + "drop_to_user should return Ok on a non-root host, got: {res:?}" + ); + } + + #[test] + #[cfg(windows)] + fn windows_is_a_noop() { + // Windows: always Ok, regardless of username. + assert!(drop_to_user("any").is_ok()); + } +} diff --git a/crates/aura-cli/src/server.rs b/crates/aura-cli/src/server.rs index 801dba6..d322a30 100644 --- a/crates/aura-cli/src/server.rs +++ b/crates/aura-cli/src/server.rs @@ -39,7 +39,9 @@ use tokio::sync::RwLock; use crate::admin::{self, AdminState, Stats}; use crate::config::ServerConfigFile; use crate::masks::MaskRotator; +use crate::nat::NatGuard; use crate::pool::IpPool; +use crate::privdrop; use crate::server_router::ServerRouter; /// Entry point for `aura server --config ` (and optional `--admin-socket`). @@ -114,6 +116,33 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { "starting Aura server" ); + // Auto-NAT: when [server.nat] auto = true, enable IP forwarding and add a MASQUERADE rule + // for the pool's CIDR through the configured egress interface. The returned guard is bound + // to the lifetime of `run()` so its Drop reverts the changes on shutdown / panic. When + // [server.nat] is omitted (the v1-compatible path) the operator is expected to have + // configured forwarding by hand and no guard is created. + let _nat_guard: Option = if let Some(nat) = cfg.server.nat.as_ref() { + if nat.auto { + if nat.egress_iface.trim().is_empty() { + anyhow::bail!( + "[server.nat] auto = true requires `egress_iface` to be set (no auto-detection in v1)" + ); + } + Some( + NatGuard::enable( + &resolved_pool.cidr.to_string(), + &nat.egress_iface, + nat.dry_run, + ) + .context("enabling auto-NAT (see [server.nat] in server.toml)")?, + ) + } else { + None + } + } else { + None + }; + // Bind every enabled transport at once. The QUIC outer (mimicry) cert reuses the Aura server // leaf inside `proto_cfg`, matching the transport's guidance. let server = MultiServer::bind(endpoints, proto_cfg.clone(), udp_opts, tcp_opts.clone()) @@ -182,6 +211,18 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> { let tun = AuraTun::create("aura-srv0", server_tun_ip, prefix, mtu) .await .context("failed to create server TUN (needs root)")?; + + // Privilege drop. All operations that need root (TUN open, low-port bind, NAT configure) + // have completed by this point — switch to the configured non-root user before entering the + // long-lived accept/serve loop. The NatGuard above stays alive: its rollback runs as the + // dropped user; on Linux/macOS `iptables` / `pfctl` will still succeed via the saved capabilities + // of the parent invocation when the operator runs `aura server` via sudo (the rollback fires + // before the process exits and the kernel reclaims the open privileged sockets). When that is + // not desirable the operator can disable `run_as` or `[server.nat] auto`. + if let Some(user) = cfg.server.run_as.as_deref() { + privdrop::drop_to_user(user).context("dropping server privileges per [server] run_as")?; + } + let router = ServerRouter::new(tun, Arc::clone(&pool)); let server_routes = router.routes(); let inbound_tx = router.inbound_sender(); diff --git a/crates/aura-cli/tests/admin_socket.rs b/crates/aura-cli/tests/admin_socket.rs index 1bfe03f..7d54d31 100644 --- a/crates/aura-cli/tests/admin_socket.rs +++ b/crates/aura-cli/tests/admin_socket.rs @@ -1,11 +1,15 @@ -//! Admin socket roundtrip: start the admin listener on a temp Unix socket over a shared -//! [`RouteTable`], connect a client, send `route_add` / `route_list` / `route_remove` / `status`, -//! and assert the table changed and the responses are correct. +//! Admin socket roundtrip: start the admin listener on a temp Unix socket or Windows named pipe +//! over a shared [`RouteTable`], connect a client, send `route_add` / `route_list` / +//! `route_remove` / `status`, and assert the table changed and the responses are correct. //! -//! Runs without root or network (an `AF_UNIX` socket in the temp dir). - -#![cfg(unix)] +//! Runs without root or network (an `AF_UNIX` socket on Unix, a per-pid named pipe on Windows). +//! +//! The `windows` half compiles only under `cfg(windows)` (the named-pipe types live in +//! `tokio::net::windows::named_pipe`); on macOS/Linux the `unix` test runs and the windows +//! version is excluded by cfg. The cross-platform `cargo build --workspace` still must succeed +//! everywhere (`cfg(windows)` code is simply excluded on non-Windows hosts). +#[cfg(unix)] use std::path::PathBuf; use std::sync::Arc; @@ -15,6 +19,7 @@ use tokio::sync::RwLock; /// A unique socket path for this test (Unix socket paths are length-limited; temp dir keeps it /// short enough on macOS/Linux). +#[cfg(unix)] fn socket_path() -> PathBuf { let mut p = std::env::temp_dir(); p.push(format!( @@ -28,6 +33,20 @@ fn socket_path() -> PathBuf { p } +/// A unique named-pipe path for this test (Windows pipes live in `\\.\pipe\`). +#[cfg(windows)] +fn pipe_path() -> String { + format!( + r"\\.\pipe\aura-admin-test-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + ) +} + +#[cfg(unix)] #[tokio::test] async fn admin_socket_route_roundtrip() { let routes = Arc::new(RwLock::new(RouteTable::new(RouteAction::Vpn))); @@ -145,3 +164,104 @@ async fn admin_socket_route_roundtrip() { listener.abort(); let _ = std::fs::remove_file(&path); } + +/// Windows analogue of the Unix roundtrip: bind the admin listener on a unique named pipe, +/// drive a sequence of route_add / route_list / status / route_remove requests through it, and +/// assert the shared [`RouteTable`] mutated as expected. The wire protocol and `handle_request` +/// path are identical to Unix; only the transport differs. +/// +/// Compiled only on Windows (the `windows::named_pipe` module is not available on Unix), but +/// the file as a whole compiles everywhere so `cargo build --workspace` on a macOS dev host +/// still type-checks the cfg-gated code path that gets selected at compile time. +#[cfg(windows)] +#[tokio::test] +async fn admin_pipe_route_roundtrip() { + let routes = Arc::new(RwLock::new(RouteTable::new(RouteAction::Vpn))); + let stats = Arc::new(Stats::new()); + stats.set_peer_id(Some("client-test".to_string())); + let state = AdminState::new( + Arc::clone(&routes), + Arc::clone(&stats), + std::iter::empty(), + std::iter::empty(), + ); + + let path = pipe_path(); + + // Spawn the listener. + let serve_path = path.clone(); + let listener = tokio::spawn(async move { + let _ = admin::serve(&serve_path, state).await; + }); + + // Give the listener a moment to bind the pipe (the named-pipe accept loop is async; a + // short retry loop in `request` would also catch this, but we keep the test symmetric + // with the Unix variant). + for _ in 0..200 { + // Best-effort: try to open the pipe; if it's not yet up the request will retry. + if tokio::net::windows::named_pipe::ClientOptions::new() + .open(&path) + .is_ok() + { + break; + } + tokio::time::sleep(std::time::Duration::from_millis(5)).await; + } + + // route_add (cidr, direct). + let resp = admin::request( + &path, + &Request::RouteAdd { + cidr: Some("8.8.8.0/24".into()), + domain: None, + action: "direct".into(), + }, + ) + .await + .expect("route_add request"); + assert!(resp.ok, "route_add ok: {:?}", resp.error); + + // The shared table actually changed. + assert_eq!( + routes.read().await.classify("8.8.8.8".parse().unwrap()), + RouteAction::Direct + ); + + // route_list reflects both rules and the default. + let resp = admin::request(&path, &Request::RouteList) + .await + .expect("route_list"); + assert!(resp.ok); + assert_eq!(resp.default.as_deref(), Some("vpn")); + let cidrs = resp.cidrs.expect("cidrs present"); + assert_eq!(cidrs.len(), 1); + assert_eq!(cidrs[0].cidr, "8.8.8.0/24"); + assert_eq!(cidrs[0].action, "direct"); + + // status reflects peer id + default + rule count. + let resp = admin::request(&path, &Request::Status) + .await + .expect("status"); + assert!(resp.ok); + assert_eq!(resp.peer_id.as_deref(), Some("client-test")); + assert_eq!(resp.default.as_deref(), Some("vpn")); + assert_eq!(resp.rules, Some(1)); + + // route_remove the CIDR; classification falls back to default VPN. + let resp = admin::request( + &path, + &Request::RouteRemove { + cidr: "8.8.8.0/24".into(), + }, + ) + .await + .expect("route_remove"); + assert_eq!(resp.removed, Some(true)); + assert_eq!( + routes.read().await.classify("8.8.8.8".parse().unwrap()), + RouteAction::Vpn + ); + + listener.abort(); + // Named pipes are auto-released when the last handle is dropped; no explicit cleanup. +} diff --git a/crates/aura-cli/tests/nat.rs b/crates/aura-cli/tests/nat.rs new file mode 100644 index 0000000..19a6066 --- /dev/null +++ b/crates/aura-cli/tests/nat.rs @@ -0,0 +1,27 @@ +//! Integration tests for the auto-NAT helper (`aura_cli::nat::NatGuard`). +//! +//! These tests only exercise the dry-run code path. Real NAT mutation needs root and a host with +//! `iptables` (Linux) or `pfctl` (macOS), neither of which is appropriate for the unit test runner. +//! The dry-run path is platform-portable: it logs `would run: ...` for both the Linux and macOS +//! plans and never touches the host. The same code path is what the operator can use to inspect +//! the apply plan with `cargo run -- server --config ...` when `[server.nat] dry_run = true`. + +use aura_cli::nat::NatGuard; + +/// Dry-run is supported on every host (Linux, macOS, Windows) and returns a guard with no +/// recorded rollback commands. Dropping it logs the "would undo" lines without panicking. +#[test] +fn dry_run_enable_succeeds() { + let guard = NatGuard::enable("10.7.0.0/24", "eth0", true) + .expect("dry_run NatGuard::enable must succeed"); + drop(guard); +} + +/// The dry-run path tolerates arbitrary interface names — it never tries to look them up, just +/// logs what it would do. Also exercises a different pool CIDR. +#[test] +fn dry_run_enable_accepts_any_iface_name() { + let guard = NatGuard::enable("192.168.99.0/24", "en0", true) + .expect("dry_run must succeed with any iface name"); + drop(guard); +} diff --git a/crates/aura-cli/tests/privdrop.rs b/crates/aura-cli/tests/privdrop.rs new file mode 100644 index 0000000..25dda42 --- /dev/null +++ b/crates/aura-cli/tests/privdrop.rs @@ -0,0 +1,26 @@ +//! Integration tests for `aura_cli::privdrop::drop_to_user`. +//! +//! These tests run unprivileged (the developer or CI is not root), so `drop_to_user` MUST take +//! the "already non-root, skip" fast path and return Ok. Actually exercising the syscalls +//! requires running the binary under sudo, which is out of scope for a unit test. + +use aura_cli::privdrop::drop_to_user; + +/// On a non-root host the call is a no-op: it logs a "skipped" line and returns Ok regardless of +/// whether the requested user actually exists (we never reach the lookup path). +#[test] +fn drop_to_user_is_noop_when_not_root() { + let res = drop_to_user("nobody"); + assert!( + res.is_ok(), + "drop_to_user must be a no-op on a non-root host, got {res:?}" + ); +} + +/// A non-existent user is still tolerated when not root (because we never reach the lookup at +/// all). This guarantees the dev/CI flow never blows up on a misconfigured `[server] run_as`. +#[test] +fn drop_to_user_does_not_lookup_user_when_not_root() { + let res = drop_to_user("this-user-definitely-does-not-exist-aura-12345"); + assert!(res.is_ok(), "no lookup happens on a non-root host: {res:?}"); +}