2026-04-17 12:51:03 -07:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
# ci-autoplay-smoke.sh — Hang-regression smoke test for the autoplay pipeline.
|
|
|
|
|
#
|
|
|
|
|
# Runs one seeded T100 autoplay with a hard wall-clock budget and asserts the
|
|
|
|
|
# final `turn_stats.jsonl` entry has `outcome != "in_progress"`. Catches any
|
|
|
|
|
# class of hang — whether the root cause is in Godot (signal re-entry, main-
|
|
|
|
|
# loop stall), in Rust (MCTS deadlock, combat infinite loop), or in the
|
|
|
|
|
# shell harness (pkill substring collision, missing SAFETY timeout).
|
|
|
|
|
#
|
|
|
|
|
# Regression history:
|
|
|
|
|
# 2026-04-17 loop13 — PARALLEL=10 T300 hung all 10 seeds because
|
|
|
|
|
# `run_ap3.sh`'s cleanup `pkill -f "AUTO_PLAY_DIR=<path>"` substring-matched
|
|
|
|
|
# active sibling seeds whose paths shared a numeric prefix (seed1 → seed10).
|
|
|
|
|
# Fixed by switching to a unique per-run AP_RUN_ID token. This smoke test
|
|
|
|
|
# would have caught the hang immediately in `./run verify` because the
|
|
|
|
|
# victim game's `outcome` stays "in_progress" after SIGTERM.
|
|
|
|
|
#
|
|
|
|
|
# Usage:
|
2026-04-17 12:56:07 -07:00
|
|
|
# tools/ci-autoplay-smoke.sh # default seed=1, T50, 120s budget
|
2026-04-17 12:51:03 -07:00
|
|
|
# tools/ci-autoplay-smoke.sh <seed> <turns> # custom seed/turns
|
|
|
|
|
#
|
|
|
|
|
# Environment:
|
|
|
|
|
# AUTOPLAY_HOST — if set, run via SSH on that host (e.g. apricot)
|
|
|
|
|
# PROJECT_ROOT_REMOTE — repo path on RUN host (default: $HOME/Code/…)
|
|
|
|
|
# SMOKE_WALL_BUDGET_SEC — hard wall-clock budget (default: 180)
|
|
|
|
|
# SMOKE_KEEP_OUTPUT — "1" to keep .local/ci-smoke/ results dir after test
|
|
|
|
|
#
|
|
|
|
|
# Exit codes:
|
|
|
|
|
# 0 — game finished with a terminal outcome (victory | max_turns | defeat)
|
|
|
|
|
# 1 — game hung (outcome still "in_progress") OR no turn_stats produced
|
|
|
|
|
# 2 — bad arguments / SSH / environment failure
|
|
|
|
|
#
|
|
|
|
|
# Hook into ./run verify per p0-10 hang-regression mandate.
|
|
|
|
|
|
|
|
|
|
set -uo pipefail
|
|
|
|
|
|
|
|
|
|
SEED="${1:-1}"
|
2026-04-17 12:56:07 -07:00
|
|
|
TURNS="${2:-50}"
|
|
|
|
|
BUDGET="${SMOKE_WALL_BUDGET_SEC:-120}"
|
2026-04-17 12:51:03 -07:00
|
|
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
|
|
|
|
STAMP="$(date +%Y%m%d_%H%M%S)"
|
|
|
|
|
RESULTS_DIR="$PROJECT_DIR/.local/ci-smoke/smoke_${STAMP}_seed${SEED}"
|
|
|
|
|
mkdir -p "$RESULTS_DIR"
|
|
|
|
|
|
|
|
|
|
echo "[ci-autoplay-smoke] seed=$SEED turns=$TURNS budget=${BUDGET}s"
|
|
|
|
|
echo "[ci-autoplay-smoke] results: $RESULTS_DIR"
|
|
|
|
|
|
|
|
|
|
_cleanup() {
|
|
|
|
|
if [ "${SMOKE_KEEP_OUTPUT:-0}" != "1" ]; then
|
|
|
|
|
rm -rf "$RESULTS_DIR" 2>/dev/null || true
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
trap _cleanup EXIT
|
|
|
|
|
|
|
|
|
|
_fail() {
|
|
|
|
|
echo "[ci-autoplay-smoke] FAIL: $*" >&2
|
|
|
|
|
exit 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ── Run autoplay ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
if [ -n "${AUTOPLAY_HOST:-}" ]; then
|
|
|
|
|
# Remote path — use the same runner autoplay-batch.sh uses.
|
|
|
|
|
REMOTE_ROOT="${PROJECT_ROOT_REMOTE:-\$HOME/Code/@projects/@magic-civilization}"
|
|
|
|
|
REMOTE_DIR="${REMOTE_ROOT}/.local/ci-smoke/smoke_${STAMP}_seed${SEED}"
|
|
|
|
|
REMOTE_RUNNER="${REMOTE_RUNNER:-\$HOME/bin/run_ap3.sh}"
|
|
|
|
|
RUN_ID="ci_smoke_${STAMP}_seed${SEED}"
|
|
|
|
|
|
|
|
|
|
ssh "$AUTOPLAY_HOST" "
|
|
|
|
|
set -uo pipefail
|
|
|
|
|
mkdir -p '$REMOTE_DIR'
|
|
|
|
|
AUTO_PLAY=true \
|
|
|
|
|
AUTO_PLAY_SEED='$SEED' \
|
|
|
|
|
AUTO_PLAY_TURN_LIMIT='$TURNS' \
|
|
|
|
|
AUTO_PLAY_DIR='$REMOTE_DIR' \
|
|
|
|
|
AP_RUN_ID='$RUN_ID' \
|
|
|
|
|
timeout '$BUDGET' bash $REMOTE_RUNNER
|
|
|
|
|
" >"$RESULTS_DIR/game.log" 2>&1
|
|
|
|
|
REMOTE_EXIT=$?
|
|
|
|
|
|
|
|
|
|
# Pull turn_stats + meta back. The remote auto_play writes either into
|
|
|
|
|
# the AUTO_PLAY_DIR directly (if the caller named it `game_<stamp>_seed<N>`)
|
|
|
|
|
# or into a `game_*` subdir. ssh-cat handles both shapes — globbing via
|
|
|
|
|
# scp's non-quoted path ran into login-shell variations.
|
|
|
|
|
ssh "$AUTOPLAY_HOST" "find '$REMOTE_DIR' -maxdepth 3 -name turn_stats.jsonl -print0 | xargs -0 -I{} cat {}" \
|
|
|
|
|
>"$RESULTS_DIR/turn_stats.jsonl" 2>/dev/null || true
|
|
|
|
|
ssh "$AUTOPLAY_HOST" "find '$REMOTE_DIR' -maxdepth 3 -name meta.json -print0 | xargs -0 -I{} cat {}" \
|
|
|
|
|
>"$RESULTS_DIR/meta.json" 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
if [ "$REMOTE_EXIT" -eq 124 ]; then
|
|
|
|
|
_fail "autoplay timed out after ${BUDGET}s — hang regression detected (SSH timeout path)"
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
# Local path — flatpak Godot, Linux only.
|
|
|
|
|
if ! command -v flatpak >/dev/null 2>&1; then
|
|
|
|
|
echo "[ci-autoplay-smoke] SKIP: no flatpak locally and AUTOPLAY_HOST unset"
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
2026-05-03 04:23:51 -04:00
|
|
|
# p1-45: Rebuild GDExtension before smoke so the .so is never stale.
|
|
|
|
|
echo "[ci-autoplay-smoke] p1-45: rebuilding GDExtension before smoke run"
|
|
|
|
|
MC_SMOKE_BUILD_STAMP="$(date +%s)"
|
|
|
|
|
(cd "$PROJECT_DIR/src/simulator" && \
|
|
|
|
|
CARGO_TARGET_DIR="/tmp/mc-build-${MC_SMOKE_BUILD_STAMP}/target" bash build-gdext.sh)
|
|
|
|
|
echo "[ci-autoplay-smoke] GDExtension rebuild complete"
|
2026-04-17 12:51:03 -07:00
|
|
|
cd "$PROJECT_DIR/src/game"
|
|
|
|
|
timeout "$BUDGET" flatpak run --user \
|
|
|
|
|
--filesystem=home \
|
|
|
|
|
--env=AUTO_PLAY=true \
|
|
|
|
|
--env=AUTO_PLAY_SEED="$SEED" \
|
|
|
|
|
--env=AUTO_PLAY_TURN_LIMIT="$TURNS" \
|
|
|
|
|
--env=AUTO_PLAY_DIR="$RESULTS_DIR" \
|
|
|
|
|
--env=AP_RUN_ID="ci_smoke_${STAMP}_seed${SEED}" \
|
|
|
|
|
org.godotengine.Godot --path . --rendering-method gl_compatibility --headless \
|
|
|
|
|
>"$RESULTS_DIR/game.log" 2>&1
|
|
|
|
|
LOCAL_EXIT=$?
|
|
|
|
|
if [ "$LOCAL_EXIT" -eq 124 ]; then
|
|
|
|
|
_fail "autoplay timed out after ${BUDGET}s — hang regression detected"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# ── Assert terminal outcome ──────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
STATS_FILE="$(find "$RESULTS_DIR" -name 'turn_stats.jsonl' -type f 2>/dev/null | head -1)"
|
|
|
|
|
if [ -z "$STATS_FILE" ] || [ ! -s "$STATS_FILE" ]; then
|
|
|
|
|
_fail "no turn_stats.jsonl produced (autoplay never wrote a turn line)"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
LAST_OUTCOME="$(tail -1 "$STATS_FILE" | python3 -c "
|
|
|
|
|
import json, sys
|
|
|
|
|
try:
|
|
|
|
|
d = json.loads(sys.stdin.read())
|
|
|
|
|
print(d.get('outcome', 'missing'))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print('parse_error')
|
|
|
|
|
")"
|
|
|
|
|
|
|
|
|
|
case "$LAST_OUTCOME" in
|
|
|
|
|
victory|max_turns|defeat)
|
|
|
|
|
echo "[ci-autoplay-smoke] PASS — outcome=$LAST_OUTCOME"
|
|
|
|
|
exit 0
|
|
|
|
|
;;
|
|
|
|
|
in_progress)
|
|
|
|
|
_fail "outcome=in_progress — game hung mid-run (see $STATS_FILE)"
|
|
|
|
|
;;
|
|
|
|
|
*)
|
|
|
|
|
_fail "outcome=$LAST_OUTCOME — unexpected terminal state (see $STATS_FILE)"
|
|
|
|
|
;;
|
|
|
|
|
esac
|