magicciv/tools/ci-autoplay-smoke.sh

151 lines
6.1 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# ci-autoplay-smoke.sh — Hang-regression smoke test for the autoplay pipeline.
#
# Runs one seeded T100 autoplay with a hard wall-clock budget and asserts the
# final `turn_stats.jsonl` entry has `outcome != "in_progress"`. Catches any
# class of hang — whether the root cause is in Godot (signal re-entry, main-
# loop stall), in Rust (MCTS deadlock, combat infinite loop), or in the
# shell harness (pkill substring collision, missing SAFETY timeout).
#
# Regression history:
# 2026-04-17 loop13 — PARALLEL=10 T300 hung all 10 seeds because
# `run_ap3.sh`'s cleanup `pkill -f "AUTO_PLAY_DIR=<path>"` substring-matched
# active sibling seeds whose paths shared a numeric prefix (seed1 → seed10).
# Fixed by switching to a unique per-run AP_RUN_ID token. This smoke test
# would have caught the hang immediately in `./run verify` because the
# victim game's `outcome` stays "in_progress" after SIGTERM.
#
# Usage:
# tools/ci-autoplay-smoke.sh # default seed=1, T50, 120s budget
# tools/ci-autoplay-smoke.sh <seed> <turns> # custom seed/turns
#
# Environment:
# AUTOPLAY_HOST — if set, run via SSH on that host (e.g. apricot)
# PROJECT_ROOT_REMOTE — repo path on RUN host (default: $HOME/Code/…)
# SMOKE_WALL_BUDGET_SEC — hard wall-clock budget (default: 180)
# SMOKE_KEEP_OUTPUT — "1" to keep .local/ci-smoke/ results dir after test
#
# Exit codes:
# 0 — game finished with a terminal outcome (victory | max_turns | defeat)
# 1 — game hung (outcome still "in_progress") OR no turn_stats produced
# 2 — bad arguments / SSH / environment failure
#
# Hook into ./run verify per p0-10 hang-regression mandate.
set -uo pipefail
SEED="${1:-1}"
TURNS="${2:-50}"
BUDGET="${SMOKE_WALL_BUDGET_SEC:-120}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
STAMP="$(date +%Y%m%d_%H%M%S)"
RESULTS_DIR="$PROJECT_DIR/.local/ci-smoke/smoke_${STAMP}_seed${SEED}"
mkdir -p "$RESULTS_DIR"
echo "[ci-autoplay-smoke] seed=$SEED turns=$TURNS budget=${BUDGET}s"
echo "[ci-autoplay-smoke] results: $RESULTS_DIR"
_cleanup() {
if [ "${SMOKE_KEEP_OUTPUT:-0}" != "1" ]; then
rm -rf "$RESULTS_DIR" 2>/dev/null || true
fi
}
trap _cleanup EXIT
_fail() {
echo "[ci-autoplay-smoke] FAIL: $*" >&2
exit 1
}
# ── Run autoplay ─────────────────────────────────────────────────────────────
if [ -n "${AUTOPLAY_HOST:-}" ]; then
# Remote path — use the same runner autoplay-batch.sh uses.
REMOTE_ROOT="${PROJECT_ROOT_REMOTE:-\$HOME/Code/@projects/@magic-civilization}"
REMOTE_DIR="${REMOTE_ROOT}/.local/ci-smoke/smoke_${STAMP}_seed${SEED}"
REMOTE_RUNNER="${REMOTE_RUNNER:-\$HOME/bin/run_ap3.sh}"
RUN_ID="ci_smoke_${STAMP}_seed${SEED}"
ssh "$AUTOPLAY_HOST" "
set -uo pipefail
mkdir -p '$REMOTE_DIR'
AUTO_PLAY=true \
AUTO_PLAY_SEED='$SEED' \
AUTO_PLAY_TURN_LIMIT='$TURNS' \
AUTO_PLAY_DIR='$REMOTE_DIR' \
AP_RUN_ID='$RUN_ID' \
timeout '$BUDGET' bash $REMOTE_RUNNER
" >"$RESULTS_DIR/game.log" 2>&1
REMOTE_EXIT=$?
# Pull turn_stats + meta back. The remote auto_play writes either into
# the AUTO_PLAY_DIR directly (if the caller named it `game_<stamp>_seed<N>`)
# or into a `game_*` subdir. ssh-cat handles both shapes — globbing via
# scp's non-quoted path ran into login-shell variations.
ssh "$AUTOPLAY_HOST" "find '$REMOTE_DIR' -maxdepth 3 -name turn_stats.jsonl -print0 | xargs -0 -I{} cat {}" \
>"$RESULTS_DIR/turn_stats.jsonl" 2>/dev/null || true
ssh "$AUTOPLAY_HOST" "find '$REMOTE_DIR' -maxdepth 3 -name meta.json -print0 | xargs -0 -I{} cat {}" \
>"$RESULTS_DIR/meta.json" 2>/dev/null || true
if [ "$REMOTE_EXIT" -eq 124 ]; then
_fail "autoplay timed out after ${BUDGET}s — hang regression detected (SSH timeout path)"
fi
else
# Local path — flatpak Godot, Linux only.
if ! command -v flatpak >/dev/null 2>&1; then
echo "[ci-autoplay-smoke] SKIP: no flatpak locally and AUTOPLAY_HOST unset"
exit 0
fi
# p1-45: Rebuild GDExtension before smoke so the .so is never stale.
echo "[ci-autoplay-smoke] p1-45: rebuilding GDExtension before smoke run"
MC_SMOKE_BUILD_STAMP="$(date +%s)"
(cd "$PROJECT_DIR/src/simulator" && \
CARGO_TARGET_DIR="/tmp/mc-build-${MC_SMOKE_BUILD_STAMP}/target" bash build-gdext.sh)
echo "[ci-autoplay-smoke] GDExtension rebuild complete"
cd "$PROJECT_DIR/src/game"
timeout "$BUDGET" flatpak run --user \
--filesystem=home \
--env=AUTO_PLAY=true \
--env=AUTO_PLAY_SEED="$SEED" \
--env=AUTO_PLAY_TURN_LIMIT="$TURNS" \
--env=AUTO_PLAY_DIR="$RESULTS_DIR" \
--env=AP_RUN_ID="ci_smoke_${STAMP}_seed${SEED}" \
org.godotengine.Godot --path . --rendering-method gl_compatibility --headless \
>"$RESULTS_DIR/game.log" 2>&1
LOCAL_EXIT=$?
if [ "$LOCAL_EXIT" -eq 124 ]; then
_fail "autoplay timed out after ${BUDGET}s — hang regression detected"
fi
fi
# ── Assert terminal outcome ──────────────────────────────────────────────────
STATS_FILE="$(find "$RESULTS_DIR" -name 'turn_stats.jsonl' -type f 2>/dev/null | head -1)"
if [ -z "$STATS_FILE" ] || [ ! -s "$STATS_FILE" ]; then
_fail "no turn_stats.jsonl produced (autoplay never wrote a turn line)"
fi
LAST_OUTCOME="$(tail -1 "$STATS_FILE" | python3 -c "
import json, sys
try:
d = json.loads(sys.stdin.read())
print(d.get('outcome', 'missing'))
except Exception as e:
print('parse_error')
")"
case "$LAST_OUTCOME" in
victory|max_turns|defeat)
echo "[ci-autoplay-smoke] PASS — outcome=$LAST_OUTCOME"
exit 0
;;
in_progress)
_fail "outcome=in_progress — game hung mid-run (see $STATS_FILE)"
;;
*)
_fail "outcome=$LAST_OUTCOME — unexpected terminal state (see $STATS_FILE)"
;;
esac