feat(cli,tunnel): v3.4 client consumes manifest endpoints + fix #45 silent client exit
Two follow-ups to the previous v3.4 commit (ba8d6b7):
## #49 — client uses BridgeEndpoint ports as authoritative
BridgesDiscoveryWatcher now keeps a second snapshot
(`Arc<RwLock<Vec<BridgeEndpoint>>>`) for the per-transport endpoints carried by
v3.4 manifests, alongside the existing flat-bridges snapshot for v3.3
compatibility. `endpoints_snapshot()` and `primary_endpoint()` expose it to the
client.
In `client::run`, immediately after the watcher loads, the primary endpoint's
per-transport ports override the dial-time `dial_cfg.endpoints.{tcp,quic,udp}`
*ports*. The IP stays whatever the dialer already resolved (server_addr /
bridge list). This is what closes the loop on the user's friend's setup: the
server picks 8444 because sing-box has 443/8443, signs a manifest with
`endpoints = [{tcp: 8444, ...}]`, the client loads it on next refresh and
starts dialing the right port without an operator-side `client.toml` edit.
When the manifest has no `endpoints` field (old v3.3 format, or operator
chose not to publish per-transport ports), no override is applied and the
client.toml `[transport] *_port` values are used as before.
## #45 — silent client exit on broken connection
Root cause confirmed in `AuraRouter::run`:
- the inbound task did `let pkt = inbound_conn.recv_packet().await?;`, so any
recv error returned silently via `?`
- the `to_tun_tx` channel sender dropped, `to_tun_rx.recv()` returned `None`
- the outbound `select!` arm matched `None => break Ok(())`
- the router returned `Ok(())`, the client's `run()` returned `Ok(())`, the
process exited 0 with no log, no error message
We saw this empirically when the user disabled a co-resident VPN that had been
routing AuraVPN's UDP/444 traffic — the underlying QUIC socket broke, the
inbound task hit recv error, and the whole client vanished.
Fix:
- Inbound task now logs the error at `error` level with the underlying
`recv_packet` cause before exiting.
- The outbound `select!`'s `None` arm now returns an Err (not Ok(())) so the
caller knows the tunnel died and `aura client` exits non-zero — which is
what a supervisor (systemd, launchd, or a future auto-redial loop) wants to
see.
- The router waits up to 200ms for the inbound task to land cleanly before
returning, so its error / panic is logged instead of being swallowed by
`abort()`.
Existing tests still pass (12/12 in aura-tunnel router tests). Tested
manually: with the fix, killing the underlying transport now produces a
"peer connection broke (recv_packet failed): …" error line and a non-zero
exit, instead of silent process disappearance.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -333,6 +333,10 @@ impl BridgeManifest {
|
||||
pub struct BridgesDiscoveryWatcher {
|
||||
/// The current effective merged list (static + manifest, de-duplicated by `SocketAddr`).
|
||||
snapshot: Arc<RwLock<Vec<SocketAddr>>>,
|
||||
/// v3.4: the per-transport endpoints carried by the most-recently-loaded manifest. Empty
|
||||
/// when the manifest has no `endpoints` field (v3.3-format manifest, or v3.4 manifest where
|
||||
/// the operator opted not to publish per-transport ports).
|
||||
endpoints_snapshot: Arc<RwLock<Vec<BridgeEndpoint>>>,
|
||||
/// The static list from `[client] bridges` (used as a fallback when the manifest is missing).
|
||||
static_bridges: Vec<SocketAddr>,
|
||||
/// File path of the signed manifest.
|
||||
@@ -355,8 +359,10 @@ impl BridgesDiscoveryWatcher {
|
||||
static_bridges: Vec<SocketAddr>,
|
||||
) -> Self {
|
||||
let snapshot = Arc::new(RwLock::new(static_bridges.clone()));
|
||||
let endpoints_snapshot = Arc::new(RwLock::new(Vec::new()));
|
||||
let watcher = Self {
|
||||
snapshot,
|
||||
endpoints_snapshot,
|
||||
static_bridges,
|
||||
manifest_path,
|
||||
ca_cert_pem,
|
||||
@@ -366,6 +372,20 @@ impl BridgesDiscoveryWatcher {
|
||||
watcher
|
||||
}
|
||||
|
||||
/// v3.4: clone of the per-transport endpoint snapshot. Empty when the manifest has no
|
||||
/// `endpoints` field. The dialer's [`Endpoints`](aura_transport::Endpoints) port overrides
|
||||
/// should be derived from this — see [`Self::primary_endpoint`].
|
||||
pub async fn endpoints_snapshot(&self) -> Vec<BridgeEndpoint> {
|
||||
self.endpoints_snapshot.read().await.clone()
|
||||
}
|
||||
|
||||
/// v3.4: first endpoint from the snapshot, when present. Useful for the common case of a
|
||||
/// single-server deployment where the watcher mainly mirrors the primary server's chosen
|
||||
/// ports.
|
||||
pub async fn primary_endpoint(&self) -> Option<BridgeEndpoint> {
|
||||
self.endpoints_snapshot.read().await.first().cloned()
|
||||
}
|
||||
|
||||
/// Snapshot handle: clones of this `Arc<RwLock<...>>` can be read concurrently by the dial loop.
|
||||
pub fn handle(&self) -> Arc<RwLock<Vec<SocketAddr>>> {
|
||||
Arc::clone(&self.snapshot)
|
||||
@@ -386,11 +406,17 @@ impl BridgesDiscoveryWatcher {
|
||||
let merged = merged_snapshot(&self.static_bridges, &manifest.parsed_bridges());
|
||||
let merged_len = merged.len();
|
||||
*self.snapshot.write().await = merged;
|
||||
// v3.4: copy the per-transport endpoints over too. They drive dial-time port
|
||||
// overrides on the client (see [`crate::client::run`]). Old v3.3 manifests have
|
||||
// an empty `endpoints` field and the snapshot just clears.
|
||||
let endpoints_len = manifest.endpoints.len();
|
||||
*self.endpoints_snapshot.write().await = manifest.endpoints.clone();
|
||||
tracing::info!(
|
||||
path = %self.manifest_path.display(),
|
||||
generated_at = manifest.generated_at,
|
||||
expires_at = manifest.expires_at,
|
||||
manifest_bridges = manifest.bridges.len(),
|
||||
manifest_endpoints = endpoints_len,
|
||||
merged_total = merged_len,
|
||||
"loaded signed bridges manifest"
|
||||
);
|
||||
|
||||
@@ -131,6 +131,36 @@ pub async fn run(config_path: &Path, admin_socket: &str) -> anyhow::Result<()> {
|
||||
// returned JoinHandle. Dropping the watcher returned by `new` would also be fine —
|
||||
// the handle keeps a clone of the Arc and outlives the local binding.
|
||||
let _bg = watcher.spawn_refresh();
|
||||
// v3.4: when the manifest carries per-transport endpoints, override the dial-time
|
||||
// *_port for each transport with the operator's published value. This is what lets a
|
||||
// server that had to port-scan past a busy 8443 (sing-box / Hysteria2 on the same host)
|
||||
// tell its clients to use 8444 instead — the client.toml's static [transport] ports
|
||||
// become only the bootstrap fallback. We deliberately override only the *port*: the IP
|
||||
// stays whatever the dialer already resolved (server_addr / bridge list), because the
|
||||
// bridges manifest is authoritative for ports but not for which host the client is
|
||||
// currently talking to.
|
||||
if let Some(ep) = watcher.primary_endpoint().await {
|
||||
let mut applied = Vec::new();
|
||||
if let (Some(port), Some(addr)) = (ep.tcp, dial_cfg.endpoints.tcp) {
|
||||
dial_cfg.endpoints.tcp = Some(std::net::SocketAddr::new(addr.ip(), port));
|
||||
applied.push(format!("tcp={}", port));
|
||||
}
|
||||
if let (Some(port), Some(addr)) = (ep.quic, dial_cfg.endpoints.quic) {
|
||||
dial_cfg.endpoints.quic = Some(std::net::SocketAddr::new(addr.ip(), port));
|
||||
applied.push(format!("quic={}", port));
|
||||
}
|
||||
if let (Some(port), Some(addr)) = (ep.udp, dial_cfg.endpoints.udp) {
|
||||
dial_cfg.endpoints.udp = Some(std::net::SocketAddr::new(addr.ip(), port));
|
||||
applied.push(format!("udp={}", port));
|
||||
}
|
||||
if !applied.is_empty() {
|
||||
tracing::info!(
|
||||
endpoint_host = %ep.host,
|
||||
overrides = %applied.join(","),
|
||||
"v3.4 manifest endpoints override dial-time transport ports"
|
||||
);
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
path = %manifest_path.display(),
|
||||
refresh_interval_secs = refresh_secs,
|
||||
|
||||
Reference in New Issue
Block a user