Files
upl/scripts/nginx-smoke.sh
ddidderr c072b93726 feat: write chunks directly to temp upload files
Completed uploads used to copy every staged chunk into a second file before
renaming the result into data/complete. That doubled write volume and required
peak disk space for both the chunk set and the final file.

Write each chunk directly into one private temp upload file at its final offset
instead. After a chunk write succeeds, record a tiny durable completion marker
for progress and resume scans. Completion now verifies the temp file length and
all markers, then renames the temp file into the completed upload directory.

Add UPL_TEMP_DIR and --temp-dir so operators can choose where upload metadata,
markers, and temp files live. The default remains data/staging, and docs call
out that the temp directory must be on the same filesystem as data/complete for
atomic promotion. The nginx example now aliases only the completed upload
directory, and the smoke test verifies that final-file alias.

This keeps the existing length-based validation model; it does not add per-chunk
hashing.

Test Plan:
- just check
- just nginx-smoke
- cargo clippy && cargo clippy --benches && cargo clippy --tests
- cargo +nightly fmt --all
- cargo clippy && cargo clippy --benches && cargo clippy --tests

Refs: none
2026-05-30 18:10:05 +02:00

170 lines
4.9 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
backend_port="${UPL_SMOKE_BACKEND_PORT:-39123}"
proxy_port="${UPL_SMOKE_PROXY_PORT:-39124}"
nginx_image="${NGINX_IMAGE:-nginx:stable-alpine}"
workspace_dir="$(pwd)"
mkdir -p "$workspace_dir/target/nginx-smoke"
tmp_dir="$(mktemp -d "$workspace_dir/target/nginx-smoke/run.XXXXXXXX")"
data_dir="$tmp_dir/data"
complete_dir="$data_dir/complete"
temp_dir="$tmp_dir/upload-temp"
nginx_conf_dir="$tmp_dir/nginx-conf.d"
nginx_conf="$nginx_conf_dir/default.conf"
backend_log="$tmp_dir/backend.log"
source_file="$tmp_dir/source.bin"
served_file="$tmp_dir/served.bin"
chunk0="$tmp_dir/chunk0.part"
chunk1="$tmp_dir/chunk1.part"
backend_pid=""
nginx_container="upl-nginx-smoke-$$"
cleanup() {
if [[ -n "$backend_pid" ]] && kill -0 "$backend_pid" 2>/dev/null; then
kill "$backend_pid" 2>/dev/null || true
wait "$backend_pid" 2>/dev/null || true
fi
docker rm -f "$nginx_container" >/dev/null 2>&1 || true
rm -rf "$tmp_dir"
}
trap cleanup EXIT
start_backend() {
UPL_BIND="0.0.0.0:$backend_port" UPL_DATA_DIR="$data_dir" UPL_TEMP_DIR="$temp_dir" \
cargo run --quiet >"$backend_log" 2>&1 &
backend_pid="$!"
wait_for "http://127.0.0.1:$backend_port/healthz"
}
wait_for() {
local url="$1"
for _ in $(seq 1 80); do
if curl -fsS "$url" >/dev/null 2>&1; then
return 0
fi
sleep 0.1
done
echo "Timed out waiting for $url" >&2
if [[ -f "$backend_log" ]]; then
tail -n 80 "$backend_log" >&2 || true
fi
return 1
}
json_field() {
local field="$1"
node -e '
const field = process.argv[1];
let input = "";
process.stdin.setEncoding("utf8");
process.stdin.on("data", (chunk) => input += chunk);
process.stdin.on("end", () => {
const value = JSON.parse(input)[field];
if (value === undefined) process.exit(2);
process.stdout.write(String(value));
});
' "$field"
}
mkdir -p "$complete_dir" "$temp_dir" "$nginx_conf_dir"
cat >"$nginx_conf" <<EOF
server {
listen $proxy_port;
client_max_body_size 64m;
location /files/ {
alias /upl-complete/;
autoindex off;
try_files \$uri =404;
}
location / {
proxy_pass http://host.docker.internal:$backend_port;
proxy_http_version 1.1;
proxy_request_buffering off;
proxy_buffering off;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
proxy_set_header Host \$host;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto http;
proxy_set_header X-Real-IP \$remote_addr;
}
}
EOF
start_backend
docker run -d --rm \
--name "$nginx_container" \
--add-host host.docker.internal:host-gateway \
-p "127.0.0.1:$proxy_port:$proxy_port" \
-v "$nginx_conf_dir:/etc/nginx/conf.d:ro" \
-v "$complete_dir:/upl-complete:ro" \
"$nginx_image" >/dev/null
wait_for "http://127.0.0.1:$proxy_port/healthz"
dd if=/dev/urandom of="$source_file" bs=1M count=17 status=none
dd if="$source_file" of="$chunk0" bs=1M count=16 status=none
dd if="$source_file" of="$chunk1" bs=1M skip=16 status=none
size="$(wc -c <"$source_file" | tr -d ' ')"
create_response="$(
curl -fsS \
-H "Content-Type: application/json" \
-d "{\"name\":\"source.bin\",\"size\":$size,\"last_modified\":1760000000000}" \
"http://127.0.0.1:$proxy_port/api/uploads"
)"
upload_id="$(printf '%s' "$create_response" | json_field upload_id)"
curl -fsS -X PUT \
-H "Content-Type: application/octet-stream" \
--data-binary "@$chunk0" \
"http://127.0.0.1:$proxy_port/api/uploads/$upload_id/chunks/0" >/dev/null
progress_before_restart="$(
curl -fsS "http://127.0.0.1:$proxy_port/api/uploads/$upload_id"
)"
printf '%s' "$progress_before_restart" | grep -q '"completed_chunks":\[0\]'
kill "$backend_pid"
wait "$backend_pid" 2>/dev/null || true
backend_pid=""
start_backend
progress_after_restart="$(
curl -fsS "http://127.0.0.1:$proxy_port/api/uploads/$upload_id"
)"
printf '%s' "$progress_after_restart" | grep -q '"completed_chunks":\[0\]'
curl -fsS -X PUT \
-H "Content-Type: application/octet-stream" \
--data-binary "@$chunk1" \
"http://127.0.0.1:$proxy_port/api/uploads/$upload_id/chunks/1" >/dev/null
complete_response="$(
curl -fsS -X POST "http://127.0.0.1:$proxy_port/api/uploads/$upload_id/complete"
)"
complete_path="$(printf '%s' "$complete_response" | json_field file_path)"
source_hash="$(sha256sum "$source_file" | awk '{print $1}')"
complete_hash="$(sha256sum "$complete_path" | awk '{print $1}')"
curl -fsS "http://127.0.0.1:$proxy_port/files/source.bin" -o "$served_file"
served_hash="$(sha256sum "$served_file" | awk '{print $1}')"
if [[ "$source_hash" != "$complete_hash" ]]; then
echo "Checksum mismatch after nginx-proxied resume" >&2
exit 1
fi
if [[ "$source_hash" != "$served_hash" ]]; then
echo "Checksum mismatch through nginx completed-file alias" >&2
exit 1
fi
echo "nginx smoke ok: $upload_id"