fix(peer): bound outbound transfer drain waits
Update and remove-download operations must wait for existing outbound readers to release game files before mutating or deleting the game root. That wait was unbounded, so a stuck transfer guard could leave the game permanently marked as Updating or RemovingDownload and prevent the requested operation from ever starting. Return a structured begin-operation result and put a five-second timeout around the drain wait. If the transfer count does not reach zero, the operation start fails, the active-operation snapshot is cleared, and the caller emits the normal failure event for the attempted operation. The destructive mutation is not allowed to proceed after a timeout. Test Plan: - just test - just clippy - git diff --check Refs: Claude review finding #3
This commit is contained in:
@@ -6,6 +6,7 @@ use std::{
|
||||
net::SocketAddr,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use lanspread_db::db::{GameDB, GameFileDescription};
|
||||
@@ -38,6 +39,9 @@ use crate::{
|
||||
// Command handlers
|
||||
// =============================================================================
|
||||
|
||||
const OUTBOUND_TRANSFER_DRAIN_POLL_INTERVAL: Duration = Duration::from_millis(10);
|
||||
const OUTBOUND_TRANSFER_DRAIN_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
/// Handles the `ListGames` command.
|
||||
pub async fn handle_list_games_command(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerEvent>) {
|
||||
log::info!("ListGames command received");
|
||||
@@ -304,10 +308,18 @@ pub async fn handle_download_game_files_command(
|
||||
return;
|
||||
}
|
||||
|
||||
if !begin_operation(ctx, tx_notify_ui, &id, OperationKind::Downloading).await {
|
||||
match begin_operation(ctx, tx_notify_ui, &id, OperationKind::Downloading).await {
|
||||
BeginOperationResult::Started => {}
|
||||
BeginOperationResult::AlreadyActive => {
|
||||
log::warn!("Operation for {id} already in progress; ignoring new download request");
|
||||
return;
|
||||
}
|
||||
BeginOperationResult::DrainTimedOut => {
|
||||
log::error!("Timed out waiting for outbound transfers before downloading {id}");
|
||||
send_download_failed(tx_notify_ui, &id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let active_operations = ctx.active_operations.clone();
|
||||
let active_downloads = ctx.active_downloads.clone();
|
||||
@@ -491,10 +503,21 @@ async fn run_install_operation(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerEve
|
||||
return;
|
||||
};
|
||||
|
||||
if !begin_operation(ctx, tx_notify_ui, &id, prepared.operation_kind).await {
|
||||
match begin_operation(ctx, tx_notify_ui, &id, prepared.operation_kind).await {
|
||||
BeginOperationResult::Started => {}
|
||||
BeginOperationResult::AlreadyActive => {
|
||||
log::warn!("Operation for {id} already in progress; ignoring install command");
|
||||
return;
|
||||
}
|
||||
BeginOperationResult::DrainTimedOut => {
|
||||
log::error!("Timed out waiting for outbound transfers before install/update of {id}");
|
||||
events::send(
|
||||
tx_notify_ui,
|
||||
PeerEvent::InstallGameFailed { id: id.clone() },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
run_started_install_operation(ctx, tx_notify_ui, id, prepared).await;
|
||||
}
|
||||
@@ -616,10 +639,21 @@ async fn run_uninstall_operation(ctx: &Ctx, tx_notify_ui: &UnboundedSender<PeerE
|
||||
return;
|
||||
}
|
||||
|
||||
if !begin_operation(ctx, tx_notify_ui, &id, OperationKind::Uninstalling).await {
|
||||
match begin_operation(ctx, tx_notify_ui, &id, OperationKind::Uninstalling).await {
|
||||
BeginOperationResult::Started => {}
|
||||
BeginOperationResult::AlreadyActive => {
|
||||
log::warn!("Operation for {id} already in progress; ignoring uninstall command");
|
||||
return;
|
||||
}
|
||||
BeginOperationResult::DrainTimedOut => {
|
||||
log::error!("Timed out waiting for outbound transfers before uninstall of {id}");
|
||||
events::send(
|
||||
tx_notify_ui,
|
||||
PeerEvent::UninstallGameFailed { id: id.clone() },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let game_root = { ctx.game_dir.read().await.join(&id) };
|
||||
let operation_guard = OperationGuard::new(
|
||||
@@ -678,10 +712,21 @@ async fn run_remove_downloaded_operation(
|
||||
return;
|
||||
}
|
||||
|
||||
if !begin_operation(ctx, tx_notify_ui, &id, OperationKind::RemovingDownload).await {
|
||||
match begin_operation(ctx, tx_notify_ui, &id, OperationKind::RemovingDownload).await {
|
||||
BeginOperationResult::Started => {}
|
||||
BeginOperationResult::AlreadyActive => {
|
||||
log::warn!("Operation for {id} already in progress; ignoring downloaded-file removal");
|
||||
return;
|
||||
}
|
||||
BeginOperationResult::DrainTimedOut => {
|
||||
log::error!("Timed out waiting for outbound transfers before removal of {id}");
|
||||
events::send(
|
||||
tx_notify_ui,
|
||||
PeerEvent::RemoveDownloadedGameFailed { id: id.clone() },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let game_dir = { ctx.game_dir.read().await.clone() };
|
||||
let operation_guard = OperationGuard::new(
|
||||
@@ -730,12 +775,36 @@ async fn run_remove_downloaded_operation(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
enum BeginOperationResult {
|
||||
Started,
|
||||
AlreadyActive,
|
||||
DrainTimedOut,
|
||||
}
|
||||
|
||||
async fn begin_operation(
|
||||
ctx: &Ctx,
|
||||
tx_notify_ui: &UnboundedSender<PeerEvent>,
|
||||
id: &str,
|
||||
operation: OperationKind,
|
||||
) -> bool {
|
||||
) -> BeginOperationResult {
|
||||
begin_operation_with_drain_timeout(
|
||||
ctx,
|
||||
tx_notify_ui,
|
||||
id,
|
||||
operation,
|
||||
OUTBOUND_TRANSFER_DRAIN_TIMEOUT,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn begin_operation_with_drain_timeout(
|
||||
ctx: &Ctx,
|
||||
tx_notify_ui: &UnboundedSender<PeerEvent>,
|
||||
id: &str,
|
||||
operation: OperationKind,
|
||||
drain_timeout: Duration,
|
||||
) -> BeginOperationResult {
|
||||
let started = {
|
||||
let mut active_operations = ctx.active_operations.write().await;
|
||||
match active_operations.entry(id.to_string()) {
|
||||
@@ -748,13 +817,30 @@ async fn begin_operation(
|
||||
};
|
||||
|
||||
if !started {
|
||||
return false;
|
||||
return BeginOperationResult::AlreadyActive;
|
||||
}
|
||||
|
||||
events::emit_active_operations(&ctx.active_operations, tx_notify_ui).await;
|
||||
|
||||
if operation == OperationKind::Updating || operation == OperationKind::RemovingDownload {
|
||||
// Cancel all active outbound transfers for this game
|
||||
if operation_requires_outbound_drain(operation)
|
||||
&& !cancel_and_wait_for_outbound_transfers(ctx, id, drain_timeout).await
|
||||
{
|
||||
end_operation(ctx, tx_notify_ui, id).await;
|
||||
return BeginOperationResult::DrainTimedOut;
|
||||
}
|
||||
|
||||
BeginOperationResult::Started
|
||||
}
|
||||
|
||||
fn operation_requires_outbound_drain(operation: OperationKind) -> bool {
|
||||
operation == OperationKind::Updating || operation == OperationKind::RemovingDownload
|
||||
}
|
||||
|
||||
async fn cancel_and_wait_for_outbound_transfers(
|
||||
ctx: &Ctx,
|
||||
id: &str,
|
||||
drain_timeout: Duration,
|
||||
) -> bool {
|
||||
let mut tokens_to_cancel = Vec::new();
|
||||
{
|
||||
let active = ctx.active_outbound_transfers.read().await;
|
||||
@@ -768,7 +854,7 @@ async fn begin_operation(
|
||||
token.cancel();
|
||||
}
|
||||
|
||||
// Wait until active outbound transfers drop to 0
|
||||
let drained = tokio::time::timeout(drain_timeout, async {
|
||||
loop {
|
||||
let count = {
|
||||
let active = ctx.active_outbound_transfers.read().await;
|
||||
@@ -777,11 +863,23 @@ async fn begin_operation(
|
||||
if count == 0 {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
|
||||
tokio::time::sleep(OUTBOUND_TRANSFER_DRAIN_POLL_INTERVAL).await;
|
||||
}
|
||||
})
|
||||
.await
|
||||
.is_ok();
|
||||
|
||||
if !drained {
|
||||
let count = {
|
||||
let active = ctx.active_outbound_transfers.read().await;
|
||||
active.get(id).map_or(0, Vec::len)
|
||||
};
|
||||
log::error!(
|
||||
"Timed out after {drain_timeout:?} waiting for {count} outbound transfer(s) to drain for {id}"
|
||||
);
|
||||
}
|
||||
|
||||
true
|
||||
drained
|
||||
}
|
||||
|
||||
async fn transition_download_to_install(
|
||||
@@ -1495,7 +1593,10 @@ mod tests {
|
||||
let ctx = test_ctx(temp.path().to_path_buf());
|
||||
let (tx, mut rx) = mpsc::unbounded_channel();
|
||||
|
||||
assert!(begin_operation(&ctx, &tx, "game", OperationKind::Updating).await);
|
||||
assert_eq!(
|
||||
begin_operation(&ctx, &tx, "game", OperationKind::Updating).await,
|
||||
BeginOperationResult::Started
|
||||
);
|
||||
assert_active_update(
|
||||
recv_event(&mut rx).await,
|
||||
vec![ActiveOperation {
|
||||
@@ -1505,6 +1606,48 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn begin_operation_timeout_clears_active_operation_snapshot() {
|
||||
let temp = TempDir::new("lanspread-handler-active-drain-timeout");
|
||||
let root = temp.game_root();
|
||||
write_file(&root.join("version.ini"), b"20250101");
|
||||
write_file(&root.join("game.eti"), b"archive");
|
||||
|
||||
let ctx = test_ctx(temp.path().to_path_buf());
|
||||
let (tx, mut rx) = mpsc::unbounded_channel();
|
||||
let token = CancellationToken::new();
|
||||
ctx.active_outbound_transfers
|
||||
.write()
|
||||
.await
|
||||
.insert("game".to_string(), vec![(1, token.clone())]);
|
||||
|
||||
assert_eq!(
|
||||
begin_operation_with_drain_timeout(
|
||||
&ctx,
|
||||
&tx,
|
||||
"game",
|
||||
OperationKind::Updating,
|
||||
Duration::from_millis(1),
|
||||
)
|
||||
.await,
|
||||
BeginOperationResult::DrainTimedOut
|
||||
);
|
||||
|
||||
assert!(token.is_cancelled());
|
||||
assert_active_update(
|
||||
recv_event(&mut rx).await,
|
||||
vec![ActiveOperation {
|
||||
id: "game".to_string(),
|
||||
operation: ActiveOperationKind::Updating,
|
||||
}],
|
||||
);
|
||||
assert_active_update(recv_event(&mut rx).await, Vec::new());
|
||||
assert!(
|
||||
!ctx.active_operations.read().await.contains_key("game"),
|
||||
"timed-out drain should not leave the operation stuck active"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unchanged_settled_scan_is_not_reemitted() {
|
||||
let temp = TempDir::new("lanspread-handler-settled-unchanged");
|
||||
|
||||
Reference in New Issue
Block a user