fix(server): v3.6 — implicit auto-NAT on Linux (root cause of full-VPN dying)
Symptoms: in default = "VPN" full-VPN mode external internet was dead even though tunnel-internal ping (10.7.0.1) worked perfectly. The tunnel itself was assembled and AEAD-encrypted (see TEST_CASES.md), but packets sent through it died on the server side. Root cause: server's `[server.nat]` was opt-in. On the production server (187.77.67.17) deployed before v2, the section is absent in /etc/aura/server.toml, so `aura server` never ran the iptables MASQUERADE plan. Packets egressed to the upstream router with src = 10.7.0.10 (RFC1918), which the provider's reverse-path filter dropped — full-VPN clients saw "internet is dead". Tunnel-internal pool addresses worked because they don't need NAT. Fix: * `server.rs`: when `[server.nat]` is absent in server.toml AND we are on Linux, attempt auto-NAT with an auto-detected egress_iface. If detection or the iptables call fails we DON'T bail — we log a loud error and let the server come up so safe-mode clients keep working. * `config.rs`: `ServerNatSection::default()` now defaults `auto = true`. A bare `[server.nat]` header (no `auto =`) now means "yes, enable it" instead of the silent-noop it used to be. * New tests for both bare-header and explicit `auto = false` opt-out paths. * `docs/server_nat_fix.md`: step-by-step instructions for fixing the existing 187.77.67.17 server (binary upgrade vs. manual server.toml patch vs. fully-manual sysctl + iptables). * `docs/deployment.md`: replaces "manual mandatory step" wording with the new auto-NAT story. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -264,24 +264,42 @@ impl ServerOuterCertSection {
|
||||
}
|
||||
|
||||
/// `[server.nat]` section: v2 auto-NAT configuration. See [`crate::nat`] for the apply / rollback
|
||||
/// semantics. Optional — when the section is omitted the server makes no changes to the host's
|
||||
/// IP forwarding state, matching v1 behaviour.
|
||||
#[derive(Debug, Clone, Default, Deserialize)]
|
||||
/// semantics. Optional — when the section is omitted the server falls back to the v3.6
|
||||
/// **implicit auto-NAT** path on Linux (see [`crate::server`]): it tries `auto = true` with an
|
||||
/// auto-detected `egress_iface`, logging a clear notice. To opt out explicitly write
|
||||
/// `[server.nat]\nauto = false` (or upgrade to a config with `[server.nat] auto = true`
|
||||
/// and an explicit `egress_iface`).
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct ServerNatSection {
|
||||
/// Master switch. When `false` (or the section is omitted) the server does NOT touch the
|
||||
/// host network — the operator is expected to have configured forwarding by hand. When
|
||||
/// `true` the server applies the platform-appropriate set of commands at startup and
|
||||
/// rolls them back on shutdown.
|
||||
/// Master switch. **Defaults to `true`** so that an operator who writes `[server.nat]` at all
|
||||
/// gets working NAT without having to also remember `auto = true`. Set it to `false`
|
||||
/// explicitly to disable auto-NAT while still keeping the section (e.g. only to pin
|
||||
/// `egress_iface` for documentation purposes).
|
||||
#[serde(default = "default_true")]
|
||||
pub auto: bool,
|
||||
/// Name of the host interface traffic egresses through (e.g. `"eth0"` on Linux, `"en0"` on
|
||||
/// macOS). REQUIRED when `auto = true` — there is no auto-detection in v1 (that is v3).
|
||||
/// macOS). Optional since v3 — when empty the server auto-detects from the host's default
|
||||
/// route via [`crate::os_routes::detect_default_egress_iface`]; only set this if the host
|
||||
/// has multiple egresses or auto-detection fails.
|
||||
#[serde(default)]
|
||||
pub egress_iface: String,
|
||||
/// When `true`, every command is only logged (`would run: ...`) and not executed. Useful
|
||||
/// for verifying the plan without root privileges and for the unit tests.
|
||||
#[serde(default)]
|
||||
pub dry_run: bool,
|
||||
}
|
||||
|
||||
impl Default for ServerNatSection {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
auto: true,
|
||||
egress_iface: String::new(),
|
||||
dry_run: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `[tunnel]` section of `server.toml`.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ServerTunnelSection {
|
||||
@@ -1952,7 +1970,8 @@ pool_cidr = "10.7.0.0/24"
|
||||
}
|
||||
|
||||
/// Backwards compat: an old server.toml without `[server.nat]` parses fine and exposes
|
||||
/// `nat = None`. This preserves the v1 "operator configures NAT by hand" behaviour.
|
||||
/// `nat = None`. v3.6 keeps the *type* the same (`Option<ServerNatSection>`) — the new
|
||||
/// implicit-auto-NAT behaviour lives in [`crate::server::run`], not in the parser.
|
||||
#[test]
|
||||
fn server_nat_section_optional() {
|
||||
let s = r#"
|
||||
@@ -1966,7 +1985,65 @@ key = "c"
|
||||
pool_cidr = "10.7.0.0/24"
|
||||
"#;
|
||||
let cfg = ServerConfigFile::parse(s).expect("parse minimal v1 server.toml");
|
||||
assert!(cfg.server.nat.is_none(), "nat section absent by default");
|
||||
assert!(cfg.server.nat.is_none(), "nat section absent in toml");
|
||||
}
|
||||
|
||||
/// v3.6: `ServerNatSection::default()` is now `auto = true` (was `false` in v1/v2). This
|
||||
/// makes a bare `[server.nat]` section (no `auto =` field) work out of the box — the
|
||||
/// operator who wrote the section evidently wants it enabled.
|
||||
#[test]
|
||||
fn server_nat_section_default_is_auto_true() {
|
||||
let d = ServerNatSection::default();
|
||||
assert!(d.auto, "v3.6 default: auto = true");
|
||||
assert!(
|
||||
d.egress_iface.is_empty(),
|
||||
"v3.6 default: egress_iface empty (server.rs auto-detects)"
|
||||
);
|
||||
assert!(!d.dry_run, "v3.6 default: dry_run = false");
|
||||
}
|
||||
|
||||
/// v3.6: an operator who writes a bare `[server.nat]` section without specifying `auto =`
|
||||
/// gets `auto = true` (the new default). Egress is left empty so the runtime auto-detects.
|
||||
#[test]
|
||||
fn server_nat_section_bare_header_enables_auto() {
|
||||
let s = r#"
|
||||
[server]
|
||||
name = "edge"
|
||||
[server.nat]
|
||||
[pki]
|
||||
ca_cert = "a"
|
||||
cert = "b"
|
||||
key = "c"
|
||||
[tunnel]
|
||||
pool_cidr = "10.7.0.0/24"
|
||||
"#;
|
||||
let cfg = ServerConfigFile::parse(s).expect("parse server.toml with bare [server.nat]");
|
||||
let nat = cfg.server.nat.as_ref().expect("section present");
|
||||
assert!(nat.auto, "v3.6: bare [server.nat] defaults to auto = true");
|
||||
assert!(nat.egress_iface.is_empty(), "egress empty -> runtime auto-detect");
|
||||
assert!(!nat.dry_run);
|
||||
}
|
||||
|
||||
/// v3.6 opt-out: writing `auto = false` explicitly keeps the historical v1/v2 behaviour
|
||||
/// (server does not touch the host NAT). This is the explicit escape hatch for operators
|
||||
/// who have already configured iptables / nftables by hand.
|
||||
#[test]
|
||||
fn server_nat_section_explicit_opt_out() {
|
||||
let s = r#"
|
||||
[server]
|
||||
name = "edge"
|
||||
[server.nat]
|
||||
auto = false
|
||||
[pki]
|
||||
ca_cert = "a"
|
||||
cert = "b"
|
||||
key = "c"
|
||||
[tunnel]
|
||||
pool_cidr = "10.7.0.0/24"
|
||||
"#;
|
||||
let cfg = ServerConfigFile::parse(s).expect("parse server.toml with auto = false");
|
||||
let nat = cfg.server.nat.as_ref().expect("section present");
|
||||
assert!(!nat.auto, "explicit auto = false is honoured");
|
||||
}
|
||||
|
||||
/// v3.2: `[transport.masks] palette = "russian"` parses into [`MaskPalette::Russian`] and
|
||||
|
||||
Reference in New Issue
Block a user