feat(relay): log malformed datagram counts

PLAN.md calls for heavy diagnostics and better malformed-frame handling. The
relay already disconnected peers after repeated malformed datagrams, but overlay
or header-level malformed datagrams had no operator-facing count until the peer
was closed.

Log each malformed peer datagram with room, peer id, role, current count,
threshold, and whether the threshold is disconnecting the peer. Keep the
existing threshold behavior unchanged; this only makes the counter visible.

Document the malformed-datagram count in the relay README section.

Test Plan:
- cargo fmt --check
- cargo test -p lanparty-relay
- cargo test --workspace
- cargo clippy --workspace --all-targets -- -D warnings
- git diff --check

Refs: PLAN.md
This commit is contained in:
2026-05-21 22:10:28 +02:00
parent d4cb119b19
commit 2c946ce9c2
2 changed files with 58 additions and 0 deletions
+2
View File
@@ -129,6 +129,8 @@ certificate handling remains future work. Ethernet forwarding decisions are
logged with room, peer, MAC, ethertype, action, drop reason, and target count.
Safety-policy rejects use the `filtered` action so they are distinguishable
from malformed/unknown-destination drops and rate limits.
Malformed peer datagrams log their per-peer count before the relay disconnects
peers that cross the malformed-datagram threshold.
Unknown unicast from a client is forwarded only to the gateway port; unknown
unicast from the gateway is dropped instead of flooded to every remote client.
When a peer joins or leaves, the relay sends a reliable lifecycle control event
+56
View File
@@ -111,6 +111,10 @@ impl MalformedDatagramTracker {
None
}
}
const fn count(&self) -> usize {
self.count
}
}
impl RelayServer {
@@ -321,9 +325,25 @@ async fn run_peer_io(
Ok(PeerDatagramOutcome::Accepted) => {}
Ok(PeerDatagramOutcome::Malformed) => {
if let Some(reason) = malformed_tracker.record_malformed() {
eprintln!(
"{}",
malformed_datagram_log_line(
accepted,
malformed_tracker.count(),
true,
)
);
connection.close(0_u32.into(), reason.as_bytes());
return PeerClose::protocol_error(reason);
}
eprintln!(
"{}",
malformed_datagram_log_line(
accepted,
malformed_tracker.count(),
false,
)
);
}
Err(error) => {
eprintln!(
@@ -554,6 +574,22 @@ fn peer_stats_log_line(accepted: &AcceptedPeer, stats: &TunnelStats) -> String {
)
}
fn malformed_datagram_log_line(
accepted: &AcceptedPeer,
malformed_count: usize,
disconnecting: bool,
) -> String {
format!(
"malformed peer datagram room={} peer_id={} role={:?} count={} threshold={} disconnecting={}",
accepted.room,
accepted.peer.peer_id(),
accepted.peer.role(),
malformed_count,
MAX_MALFORMED_DATAGRAMS_PER_PEER,
disconnecting
)
}
async fn collect_target_sessions(
sessions: &Arc<Mutex<HashMap<PeerKey, PeerSession>>>,
room: &RoomCode,
@@ -967,13 +1003,33 @@ mod tests {
for _ in 1..MAX_MALFORMED_DATAGRAMS_PER_PEER {
assert_eq!(tracker.record_malformed(), None);
}
assert_eq!(tracker.count(), MAX_MALFORMED_DATAGRAMS_PER_PEER - 1);
let reason = tracker
.record_malformed()
.expect("threshold should disconnect peer");
assert_eq!(tracker.count(), MAX_MALFORMED_DATAGRAMS_PER_PEER);
assert!(reason.contains("malformed datagrams"));
}
#[tokio::test]
async fn formats_malformed_datagram_log_line() {
let rooms = Arc::new(Mutex::new(RoomRegistry::default()));
let accepted = accepted_client_for_forwarding(&rooms, client_mac(1)).await;
let line = malformed_datagram_log_line(&accepted, 3, false);
assert!(line.contains("malformed peer datagram"));
assert!(line.contains("room=TESTROOM"));
assert!(line.contains(&format!("peer_id={}", accepted.peer.peer_id())));
assert!(line.contains("role=Client"));
assert!(line.contains("count=3"));
assert!(line.contains(&format!("threshold={MAX_MALFORMED_DATAGRAMS_PER_PEER}")));
assert!(line.contains("disconnecting=false"));
let line = malformed_datagram_log_line(&accepted, MAX_MALFORMED_DATAGRAMS_PER_PEER, true);
assert!(line.contains("disconnecting=true"));
}
#[tokio::test]
async fn classifies_bad_peer_datagrams_as_malformed() {
let rooms = Arc::new(Mutex::new(RoomRegistry::default()));