//! v3.1 / v3.2 multi-hop / onion routing — the **client side** of an N-hop circuit //! `client → hop[0] → hop[1] → ... → hop[N-1]`. v3.1 supports `N = 2` (entry + exit); //! v3.2 supports `N = 2` OR `N = 3` (entry + middle + exit) plus **per-hop client //! certificates** so different hops cannot be linked by certificate CN. //! //! ## Wire dance (recursive) //! //! For each hop `i` from `0` to `N-1` the dialler: //! //! 1. **Outer handshake to `hop[i]`**: opens an Aura UDP transport connection to `hop[i].addr` //! (through any already-stacked proxy/forwarder chain) using `hop[i].proto_cfg`, which carries //! that hop's expected SAN as `server_name` AND the per-hop client cert/key — see [`HopConfig`]. //! 2. **ExtendBridge** (only if `i < N - 1`): sends one //! [`aura_proto::ControlKind::ExtendBridge`] envelope carrying `hop[i+1].addr` to ask the //! current hop to splice a bridge to the next downstream hop. Waits for //! [`aura_proto::ControlKind::CircuitReady`] (or [`aura_proto::ControlKind::CircuitFailed`]). //! 3. **Loopback proxy** (only if `i < N - 1`): binds a local UDP socket and spawns a forwarder //! that splices every datagram between that socket and the outer connection to `hop[i]`. The //! next iteration's outer handshake is addressed at this loopback socket — so the actual bytes //! on the wire travel through the existing tunnel to `hop[i]`, which forwards them through its //! bridge to `hop[i+1]`. //! 4. **Final hop** (`i == N - 1`): no ExtendBridge / loopback — the connection returned by step //! 1 is the innermost session and authenticates the *exit's* cert. Its `peer_id()` is the exit //! SAN; every subsequent send/recv on the resulting [`CircuitConnection`] is wrapped in //! `N` AEAD layers (one per hop). //! //! Result: every IP packet is encrypted N times — once per hop — so the exit knows the client's //! certificate CN but not the source IP; every intermediate hop knows the previous hop's address //! and the next hop's address but not the destination, and never sees a plaintext byte. //! //! ## Per-hop client identity (v3.2) //! //! The v3.1 dialler used a single `[pki]` cert/key for every hop, so the entry-relay and the exit //! both saw the *same* certificate CN — trivially linkable. v3.2 lets the caller pass a different //! [`aura_proto::ClientConfig`] for each hop via [`HopConfig`]. The CLI generates an indepedent //! UUID-v4 cert per hop with `aura provision-client --circuit-hops N`. With distinct CNs per hop //! the only thing that is linkable is the *temporal* correlation of one packet leaving the client //! and one packet leaving the exit — which the cell-padding wrapper (see [`crate::cells`]) is the //! companion mitigation for. use std::net::SocketAddr; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use std::time::Duration; use anyhow::{anyhow, bail, Context}; use async_trait::async_trait; use aura_proto::{ decode_control_envelope, encode_control_envelope, encode_extend_bridge, ClientConfig, ControlKind, PacketConnection, }; use aura_transport::{UdpClient, UdpConnection, UdpOpts}; use tokio::net::UdpSocket; use tokio::sync::RwLock; use tokio::task::JoinHandle; /// How long the client waits for each hop to reply with [`ControlKind::CircuitReady`] after /// sending the [`ControlKind::ExtendBridge`] envelope. const READY_TIMEOUT_SECS: u64 = 5; /// Per-hop dial configuration. One instance per hop in the circuit; the order matches the wire /// order (`hops[0]` = entry, `hops[N-1]` = exit). /// /// `proto_cfg.server_name` is the SAN the verifier checks on **this hop's** certificate during the /// outer Aura handshake. `proto_cfg.client_cert_pem` / `proto_cfg.client_key_pem` is the client /// identity presented **to this hop** — different per hop in v3.2 so the entry and the exit cannot /// link the two handshakes by certificate CN. #[derive(Debug, Clone)] pub struct HopConfig { /// Wire address of this hop (already resolved to `IP:port`). pub addr: SocketAddr, /// Aura client config for the handshake to *this* hop. pub proto_cfg: ClientConfig, } impl HopConfig { /// Convenience: build a hop using the same client config as the rest of the circuit. Used by /// the v3.1 / `CircuitHop::Addr` back-compat path where the caller wants every hop to use the /// global `[pki]` cert/key (matching the v3.1 behaviour). pub fn from_shared(addr: SocketAddr, proto_cfg: ClientConfig) -> Self { Self { addr, proto_cfg } } } /// An established multi-hop circuit. The inner [`UdpConnection`]'s outgoing datagrams travel /// through a chain of loopback proxies + outer relay connections; from the inner handshake / data /// exchange's point of view nothing is special — it is talking to a normal Aura UDP server. /// /// The outer connections and forwarder tasks are owned here so dropping the circuit tears /// everything down in order. pub struct CircuitConnection { /// The innermost UDP connection (target of the final hop's handshake). All `send_packet` / /// `recv_packet` calls delegate to it; the forwarder chain splices its bytes onto the outer /// hops in order. inner: UdpConnection, /// Every outer hop connection, in order (`hop[0]` first). Pinned alive for the lifetime of the /// circuit; the per-hop forwarder tasks own clones, but holding the originals here means every /// outer is dropped at exactly the same time as `Self`. _outer_conns: Vec>, /// One forwarder task per intermediate hop (so `N - 1` tasks for an N-hop circuit). Aborted in /// [`Drop`] so dropping the circuit cleans them up. forwarders: Vec>, /// The chain of loopback proxy sockets (one per intermediate hop). Held here so they outlive /// the forwarders that read/write through them; the forwarder also holds an `Arc` /// clone, but this prevents a close-on-last-clone race during shutdown. _proxy_sockets: Vec>, } impl Drop for CircuitConnection { fn drop(&mut self) { for f in &self.forwarders { f.abort(); } } } impl CircuitConnection { /// The verified peer Common Name as learned during the **innermost** handshake. This is the /// **exit-server's** identity (NOT any intermediate hop) — the whole point of multi-hop is that /// the inner handshake authenticates the exit through every relay opaquely. #[must_use] pub fn peer_id(&self) -> Option<&str> { self.inner.peer_id() } /// Promote into a trait object so the router / dialer layer can treat the circuit the same way /// it treats a single-hop UDP / TCP / QUIC connection. #[must_use] pub fn into_dyn(self) -> Arc { Arc::new(self) } } #[async_trait] impl PacketConnection for CircuitConnection { async fn send_packet(&self, packet: &[u8]) -> anyhow::Result<()> { // Delegate to the inner UdpConnection — the proxy forwarder picks up its outgoing // datagrams from the innermost loopback proxy socket and tunnels them through the chain. self.inner.send_packet(packet).await } async fn recv_packet(&self) -> anyhow::Result> { self.inner.recv_packet().await } } /// Build an N-hop circuit `client → hops[0] → hops[1] → ... → hops[N-1]`. Returns the established /// [`CircuitConnection`]. /// /// `hops.len()` must be in `{2, 3}` — v3.1 accepted only 2; v3.2 extends to 3. Each entry's /// [`HopConfig::proto_cfg`] supplies: /// /// * The SAN expected on that hop's server certificate (`proto_cfg.server_name`). /// * The client cert/key presented **to that hop** (`proto_cfg.client_cert_pem` / /// `proto_cfg.client_key_pem`). Distinct per hop = identity-unlinkable v3.2 behaviour. /// /// # Errors /// * Any outer UDP connection failed. /// * Any intermediate hop refused (`CircuitFailed`) or did not reply within /// [`READY_TIMEOUT_SECS`] seconds. /// * The inner Aura handshake to the exit failed (bad exit cert chain, SAN mismatch, etc.). pub async fn dial_circuit( hops: &[HopConfig], udp_opts: UdpOpts, ) -> anyhow::Result { if hops.len() < 2 || hops.len() > 3 { bail!( "v3.2 multi-hop supports 2 or 3 hops (entry, [middle,] exit); got {}", hops.len() ); } // We build the chain iteratively. At each iteration the "current outer" is what we are // currently dialing through; for the first hop it is a literal `UdpClient::connect`, for every // subsequent hop it is a loopback proxy + forwarder splicing onto the previous outer. let mut outer_conns: Vec> = Vec::with_capacity(hops.len() - 1); let mut forwarders: Vec> = Vec::with_capacity(hops.len() - 1); let mut proxy_sockets: Vec> = Vec::with_capacity(hops.len() - 1); // Step 1: dial the very first hop directly via UDP. This is the only hop whose outer handshake // exits the client process as a real datagram on the OS network stack. let entry = &hops[0]; let first = UdpClient::connect(entry.addr, entry.proto_cfg.clone(), udp_opts) .await .with_context(|| format!("dial entry hop at {}", entry.addr))?; let mut current_outer: Arc = first.into_dyn(); // For every *intermediate* hop (every hop except the last) we: // - ask it to bridge to the next hop via ExtendBridge, // - wait for CircuitReady, // - bring up a loopback proxy + forwarder so the next outer handshake travels through // `current_outer`, // - then re-dial the *next* hop via that loopback proxy and update `current_outer`. // // After the loop, `current_outer` is the outer connection to `hops[N-2]` and the next dial // (step 6 below) is the inner handshake to `hops[N-1]` (the exit). We need to keep // `current_outer` itself in `outer_conns` too — it is the outermost of the inner-handshake's // pipe. for i in 0..hops.len() - 1 { let next = &hops[i + 1]; // 2. Tell the current hop to splice onto `next.addr`. let payload = encode_extend_bridge(next.addr); let envelope = encode_control_envelope(ControlKind::ExtendBridge, &payload); current_outer .send_packet(&envelope) .await .with_context(|| format!("send ExtendBridge to hop[{}] at {}", i, hops[i].addr))?; // 3. Wait for CircuitReady from this hop (or CircuitFailed = bail). The remote may send // unrelated envelopes (CRL pushes etc.) in front of ours; ignore until our envelope // arrives or the deadline elapses. let ready_deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(READY_TIMEOUT_SECS); loop { let now = tokio::time::Instant::now(); if now >= ready_deadline { bail!( "timeout waiting for CircuitReady from hop[{}] at {}", i, hops[i].addr ); } let remaining = ready_deadline - now; let pkt = tokio::time::timeout(remaining, current_outer.recv_packet()) .await .map_err(|_| { anyhow!( "timeout waiting for CircuitReady from hop[{}] at {}", i, hops[i].addr ) })? .with_context(|| format!("recv from hop[{}] at {}", i, hops[i].addr))?; match decode_control_envelope(&pkt) { Ok(Some((ControlKind::CircuitReady, _))) => break, Ok(Some((ControlKind::CircuitFailed, reason))) => { let r = String::from_utf8_lossy(&reason); bail!("hop[{}] at {} refused circuit: {}", i, hops[i].addr, r); } Ok(Some((other, _))) => { tracing::debug!( hop = i, kind = ?other, "ignoring unexpected control envelope while waiting for CircuitReady" ); continue; } Ok(None) => { tracing::debug!( hop = i, "ignoring non-control packet from hop before CircuitReady" ); continue; } Err(e) => { tracing::debug!( hop = i, error = %e, "malformed envelope from hop before CircuitReady" ); continue; } } } // 4. Bring up the local proxy UDP socket. The next iteration's UdpClient::connect will // target this address; the forwarder below splices every datagram between the proxy // socket and the current outer connection. let proxy_socket = UdpSocket::bind("127.0.0.1:0") .await .with_context(|| format!("bind loopback proxy for hop[{}] -> hop[{}]", i, i + 1))?; let proxy_addr = proxy_socket .local_addr() .context("read local proxy address")?; let proxy_socket = Arc::new(proxy_socket); // 5. Spawn the forwarder BEFORE running the next outer handshake — the handshake's first // datagram must already be flowing while it is being written. let outer_for_send = Arc::clone(¤t_outer); let outer_for_recv = Arc::clone(¤t_outer); let proxy_for_send = Arc::clone(&proxy_socket); let proxy_for_recv = Arc::clone(&proxy_socket); let hop_idx = i; let forwarder = tokio::spawn(async move { // Source address of the next-hop UdpClient, learned from its first datagram on the // proxy socket. We need it to know where to deliver `outer.recv_packet` payloads back. let inner_peer: Arc>> = Arc::new(tokio::sync::Mutex::new(None)); // Task A: proxy.recv_from -> outer.send_packet let inner_peer_a = Arc::clone(&inner_peer); let to_outer = async move { let mut buf = vec![0u8; 4096]; loop { let (n, from) = match proxy_for_recv.recv_from(&mut buf).await { Ok(v) => v, Err(_) => break, }; { let mut latch = inner_peer_a.lock().await; if latch.is_none() { *latch = Some(from); } } if outer_for_send.send_packet(&buf[..n]).await.is_err() { break; } } }; // Task B: outer.recv_packet -> proxy.send_to(inner_peer_addr) let inner_peer_b = Arc::clone(&inner_peer); let from_outer = async move { loop { let pkt = match outer_for_recv.recv_packet().await { Ok(p) => p, Err(_) => break, }; let dest = { *inner_peer_b.lock().await }; if let Some(dest) = dest { if proxy_for_send.send_to(&pkt, dest).await.is_err() { break; } } // Else: next-hop UdpClient has not sent its first datagram yet; drop. The // reliable adapter will retransmit on its RTO timer. The race window is tiny. } }; tokio::select! { _ = to_outer => {} _ = from_outer => {} } tracing::debug!(hop = hop_idx, "circuit forwarder exited"); }); // 6. Move `current_outer` into our owned list, spawn the forwarder + socket into theirs, // then dial the *next* hop through the loopback proxy. The dial returns the new // `current_outer`. outer_conns.push(current_outer); forwarders.push(forwarder); proxy_sockets.push(Arc::clone(&proxy_socket)); // 7. Dial the next hop through the proxy. For an intermediate next hop this becomes the // new `current_outer`; for the final hop (last iteration) it is the *inner* connection // we return wrapped in `CircuitConnection`. let is_last = i == hops.len() - 2; let next_conn = UdpClient::connect(proxy_addr, next.proto_cfg.clone(), udp_opts) .await .with_context(|| { format!( "{} handshake to hop[{}] at {} through hop[{}]", if is_last { "inner" } else { "intermediate" }, i + 1, next.addr, i ) })?; if is_last { // The innermost session: wrap it in CircuitConnection along with every outer + proxy // we own. Note: we do NOT push next_conn into outer_conns — it becomes `inner`. return Ok(CircuitConnection { inner: next_conn, _outer_conns: outer_conns, forwarders, _proxy_sockets: proxy_sockets, }); } else { // Promote to dyn for the next loop iteration. current_outer = next_conn.into_dyn(); } } // Unreachable: the loop always returns when `is_last` is true (the last intermediate // iteration always produces the inner session for the exit). unreachable!("dial_circuit loop must return on the final hop") } /// v3.1 back-compat shim: build hops from a flat `[SocketAddr]` list using a shared /// [`ClientConfig`] for every hop and call [`dial_circuit`]. Useful for code paths that have a /// single proto_cfg (e.g. an old `[client] sni`). /// /// Behaviour matches v3.1 exactly when given exactly 2 hops; with 3 hops it now also works (every /// hop uses the same cert / key, i.e. NOT identity-unlinkable — use the per-hop variant for that). pub async fn dial_circuit_shared_cfg( hops: &[SocketAddr], proto_cfg: ClientConfig, udp_opts: UdpOpts, ) -> anyhow::Result { let hop_cfgs: Vec = hops .iter() .map(|a| HopConfig::from_shared(*a, proto_cfg.clone())) .collect(); dial_circuit(&hop_cfgs, udp_opts).await } /// Variant of [`dial_circuit_shared_cfg`] letting the caller override the SAN expected on the /// **first hop's** cert (the relay) independently of the exit's expected SAN /// (`proto_cfg.server_name`, used by the inner handshake). v3.1 kept this for the loopback test /// which uses a different SAN per role. /// /// Equivalent to v3.1 behaviour. For arbitrary per-hop overrides, build a `Vec` /// directly and call [`dial_circuit`]. pub async fn dial_circuit_with_relay_name( hops: &[SocketAddr], proto_cfg: ClientConfig, udp_opts: UdpOpts, relay_server_name: Option<&str>, ) -> anyhow::Result { if hops.len() != 2 { bail!( "dial_circuit_with_relay_name requires exactly 2 hops (entry, exit); got {}", hops.len() ); } let mut entry_cfg = proto_cfg.clone(); if let Some(name) = relay_server_name { entry_cfg.server_name = name.to_string(); } let hop_cfgs = vec![ HopConfig::from_shared(hops[0], entry_cfg), HopConfig::from_shared(hops[1], proto_cfg), ]; dial_circuit(&hop_cfgs, udp_opts).await } // ---- v3.3: RotatingCircuit --------------------------------------------------------------------- // // Every `interval` seconds the rotator silently rebuilds the entire N-hop circuit from scratch // (new outer handshakes, new ExtendBridge envelopes, a fresh inner handshake to the exit) and // atomically swaps the new [`CircuitConnection`] in for the old one. Any in-flight `send_packet` // / `recv_packet` calls on the previous instance keep running on their own `Arc` clones until // they complete or the OS-level socket dies; new sends/receives after the swap go through the // fresh circuit. The old circuit is dropped — closing every outer connection and aborting every // forwarder task — as soon as the last in-flight `Arc` is released. // // Identity rotation: because `dial_circuit` re-runs the full per-hop handshake every time, every // relay sees a brand-new TLS session (different ephemeral key, fresh AEAD nonces). With per-hop // client certs (v3.2) the certificate CN is also rotated. The exit only knows the client's // stable cert CN; the relay only knows the previous and next IP — neither side can correlate // activity across rotations to a single long-lived flow. /// Parameters captured at construction time so the background rotator can rebuild the circuit /// without re-reading the config. Immutable for the lifetime of the rotator. struct RebuildParams { /// Per-hop dial configs. The whole vector is cloned into every [`dial_circuit`] call so /// concurrent rebuild attempts cannot mutate each other's view. hops: Vec, /// UDP transport options applied to every outer hop's [`aura_transport::UdpClient::connect`]. udp_opts: UdpOpts, /// How long to wait between successful rebuilds. Failures do not reset the timer — the next /// tick is `interval` from the previous wakeup, regardless of outcome. interval: Duration, } /// A [`PacketConnection`] wrapper that periodically rebuilds the underlying [`CircuitConnection`] /// in the background. Every `send_packet` / `recv_packet` call delegates to the **currently active** /// inner [`CircuitConnection`]; when a rebuild completes, the new circuit atomically replaces the /// old one. /// /// ## Lifecycle /// /// * [`RotatingCircuit::new`] dials the initial circuit synchronously (so the caller can fail fast /// if the entry hop is unreachable) and then spawns the background rotator. /// * Every `interval` the rotator runs [`dial_circuit`] with the captured [`RebuildParams::hops`]. /// On success the new [`CircuitConnection`] replaces the previous one inside the [`RwLock`]; /// on failure the previous one is kept and the rotator logs a warning, then waits another /// `interval` before retrying. /// * [`Drop`] aborts the rotator task. The currently-active inner circuit is dropped through the /// `Arc` chain, tearing down its forwarders and outer sockets. /// /// ## Cell padding interaction /// /// The CLI wires [`RotatingCircuit`] **inside** any [`crate::cells::CellPaddingConn`] — the /// padding layer is applied to the rotator's `Arc`, not to each individual /// circuit. This means every rotation produces a circuit that carries cells of the **same** /// `cell_size`, keeping the on-wire signature stable across rotations. pub struct RotatingCircuit { /// The currently-active circuit. Replaced on each successful rebuild. /// /// `Arc<...>` so `send_packet` / `recv_packet` can grab a cheap clone, release the read-lock, /// then await on the snapshot — any in-flight call on a *previous* inner does not block the /// rotator's swap. current: Arc>>, /// Captured rebuild parameters. Wrapped in `Arc` so the rotator task can own a clone without /// holding `&self`. _rebuild: Arc, /// Number of *successful* rotations completed since construction. Tests use this to assert /// that the background rotator actually ran; production code does not depend on the value. rotation_count: Arc, /// Background rotator. Aborted on [`Drop`]. rotator_task: JoinHandle<()>, } impl Drop for RotatingCircuit { fn drop(&mut self) { // Stop the rotator first so it cannot replace `current` mid-drop. self.rotator_task.abort(); // `current`'s last `Arc` is released when `self` goes out of scope; that drops the // wrapped `CircuitConnection`, which in turn aborts every forwarder + closes every outer. } } impl RotatingCircuit { /// Dial the initial N-hop circuit and start the background rotator. /// /// `interval` MUST be greater than zero; the caller is expected to gate construction on a /// non-zero `rotation_interval_secs`. If `dial_circuit` fails synchronously, the error /// propagates and no background task is spawned. /// /// # Errors /// * The initial [`dial_circuit`] failed (entry hop unreachable, hop count invalid, etc.). pub async fn new( hops: Vec, udp_opts: UdpOpts, interval: Duration, ) -> anyhow::Result { let initial = dial_circuit(&hops, udp_opts) .await .context("RotatingCircuit: initial dial_circuit")?; let current = Arc::new(RwLock::new(Arc::new(initial))); let rebuild = Arc::new(RebuildParams { hops, udp_opts, interval, }); let rotation_count = Arc::new(AtomicU64::new(0)); let task_current = Arc::clone(¤t); let task_rebuild = Arc::clone(&rebuild); let task_counter = Arc::clone(&rotation_count); let rotator_task = tokio::spawn(async move { rotator_loop(task_current, task_rebuild, task_counter).await; }); Ok(Self { current, _rebuild: rebuild, rotation_count, rotator_task, }) } /// Number of successful rotations that have occurred since construction. Test-only helper — /// production code MUST not depend on the exact value because rotations are timer-driven. #[must_use] pub fn rotation_count(&self) -> u64 { self.rotation_count.load(Ordering::Relaxed) } /// The verified peer Common Name of the **currently-active** inner circuit's exit. This may /// change across rotations only if `hops[N-1].proto_cfg.server_name` was changed — under /// normal operation (immutable `RebuildParams`) it stays the same. pub async fn peer_id(&self) -> Option { let snap = { self.current.read().await.clone() }; snap.peer_id().map(str::to_owned) } } #[async_trait] impl PacketConnection for RotatingCircuit { async fn send_packet(&self, packet: &[u8]) -> anyhow::Result<()> { // Snapshot the current circuit (cheap `Arc` clone) and release the read-lock immediately // so the rotator's `write().await` can replace `current` while this send is in flight. let conn = { self.current.read().await.clone() }; conn.send_packet(packet).await } async fn recv_packet(&self) -> anyhow::Result> { let conn = { self.current.read().await.clone() }; conn.recv_packet().await } } /// Background rotator: every `interval` rebuild the circuit and atomically swap it in. /// /// Failure handling: a failed rebuild leaves the previous circuit in place and the rotator waits /// the full `interval` before retrying. This avoids tight-loop hammering an unreachable entry /// hop (a transient network glitch should not multiply the dial rate). async fn rotator_loop( current: Arc>>, rebuild: Arc, rotation_count: Arc, ) { loop { tokio::time::sleep(rebuild.interval).await; match dial_circuit(&rebuild.hops, rebuild.udp_opts).await { Ok(next) => { let new_arc = Arc::new(next); { let mut slot = current.write().await; // `std::mem::replace` returns the previous `Arc`. It drops // here at the end of this block — if no `send_packet`/`recv_packet` is still // holding a snapshot, the old `CircuitConnection`'s `Drop` runs immediately // (aborting forwarders, closing sockets). let _old = std::mem::replace(&mut *slot, new_arc); } let n = rotation_count.fetch_add(1, Ordering::Relaxed) + 1; tracing::info!(rotation = n, "circuit rotated successfully"); } Err(e) => { tracing::warn!( error = %e, "circuit rotation failed; keeping previous circuit active until next tick" ); } } } }