magicciv/tools/b5-aggregate.sh
Natalie c88e136469 fix(@projects): 🐛 update deployment and guide workflows
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-10 03:38:03 -07:00

119 lines
4.7 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# b5-aggregate.sh — Run p0-02 B5 50-game sweep: 5 clans × 10 seeds, aggregate.
#
# TODO: re-run when RUN-host environment stabilizes (see task #5 blocker thread
# — Diplomacy class_name collision prevents game compile as of 2026-04-17).
#
# Per p0-02 acceptance and the ai-verify ↔ data-dev contract:
# - Each clan runs via AI_PIN_PERSONALITY=<id> + SEED_OFFSET=<N*10>
# - All 50 games land under one parent dir for single-gate aggregation
# - Disjoint seed ranges (1..10, 11..20, ...) avoid find_game_dirs() collision
# - personality_win_balance gate must exit 0 (no clan >50%, all ≥1 win)
#
# Produces verdict JSON at .local/iter/b5-<stamp>/verdict.json with:
# - pass: bool
# - per_clan: {clan: {appearances, wins, win_rate_pct}}
# - reasons: [str, ...] when pass=false
# - supporting_metrics: captured from autoplay-report.py per-clan table
#
# Runs ON the RUN host via SSH. Results stay on RUN host; verdict JSON is the
# single artifact shipped back to EDIT host for team-lead review.
#
# Exit codes:
# 0 — gate passed, p0-02 acceptance can be cited
# 1 — gate failed, verdict.json carries the specific reason(s)
# 2 — usage / env error / sweep failure
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
: "${AUTOPLAY_HOST:?AUTOPLAY_HOST must be set (e.g. lilith@apricot.lan)}"
: "${PROJECT_ROOT_REMOTE:?PROJECT_ROOT_REMOTE must be set (repo path on RUN host)}"
STAMP="$(date +%Y%m%d_%H%M%S)"
LOCAL_DIR="$PROJECT_DIR/.local/iter/b5-$STAMP"
REMOTE_DIR="$PROJECT_ROOT_REMOTE/.local/iter/b5-$STAMP"
TURN_LIMIT="${TURN_LIMIT:-300}"
PER_CLAN_COUNT="${PER_CLAN_COUNT:-10}"
PARALLEL="${PARALLEL:-10}"
mkdir -p "$LOCAL_DIR"
# Canonical clan order. Seed offsets are computed as (index × PER_CLAN_COUNT) so
# a 10-seed-per-clan sweep yields disjoint ranges 1..10 / 11..20 / 21..30 / ...
CLANS=(ironhold goldvein blackhammer deepforge runesmith)
echo "============================================================"
echo "B5 Aggregation — $STAMP"
echo "Host: $AUTOPLAY_HOST"
echo "Per-clan: $PER_CLAN_COUNT seeds, turn_limit=$TURN_LIMIT, PARALLEL=$PARALLEL"
echo "Total games: $(( ${#CLANS[@]} * PER_CLAN_COUNT )) across ${#CLANS[@]} clans"
echo "Remote results: $REMOTE_DIR"
echo "============================================================"
# ── Sweep each clan into the shared parent dir ──────────────────────────────
idx=0
for clan in "${CLANS[@]}"; do
offset=$(( idx * PER_CLAN_COUNT ))
echo ""
echo "[$(date +%H:%M:%S)] Sweep $((idx + 1))/${#CLANS[@]}: clan=$clan seeds=$((offset + 1))..$((offset + PER_CLAN_COUNT))"
ssh "$AUTOPLAY_HOST" "
mkdir -p '$REMOTE_DIR'
cd '$PROJECT_ROOT_REMOTE'
AI_USE_MCTS=true \
AI_PIN_PERSONALITY='$clan' \
SEED_OFFSET=$offset \
PARALLEL=$PARALLEL \
bash tools/autoplay-batch.sh $PER_CLAN_COUNT $TURN_LIMIT '$REMOTE_DIR' \
> '$REMOTE_DIR/sweep_${clan}.log' 2>&1
" || {
echo "ERROR: sweep $clan failed — see $REMOTE_DIR/sweep_${clan}.log on $AUTOPLAY_HOST" >&2
scp "$AUTOPLAY_HOST:$REMOTE_DIR/sweep_${clan}.log" "$LOCAL_DIR/" 2>/dev/null || true
exit 2
}
idx=$((idx + 1))
done
# ── Aggregate via autoplay-report (per-clan table) + gate ───────────────────
echo ""
echo "[$(date +%H:%M:%S)] Aggregating 50 games — autoplay-report.py..."
ssh "$AUTOPLAY_HOST" "
cd '$PROJECT_ROOT_REMOTE'
python3 tools/autoplay-report.py '$REMOTE_DIR' \
> '$REMOTE_DIR/autoplay-report.csv' \
2> '$REMOTE_DIR/autoplay-report.summary'
"
echo "[$(date +%H:%M:%S)] Running personality_win_balance gate..."
set +e
ssh "$AUTOPLAY_HOST" "
cd '$PROJECT_ROOT_REMOTE'
python3 tools/checklist-report.py personality_win_balance '$REMOTE_DIR' \
> '$REMOTE_DIR/verdict.json' \
2> '$REMOTE_DIR/gate.stderr'
"
gate_status=$?
set -e
# ── Fetch the small artifacts back ──────────────────────────────────────────
for f in verdict.json gate.stderr autoplay-report.csv autoplay-report.summary; do
scp "$AUTOPLAY_HOST:$REMOTE_DIR/$f" "$LOCAL_DIR/" 2>/dev/null || \
echo "WARN: could not fetch $f" >&2
done
echo ""
echo "============================================================"
echo "Gate exit: $gate_status"
echo "Local verdict: $LOCAL_DIR/verdict.json"
echo "============================================================"
if [ "$gate_status" -eq 0 ]; then
echo "B5 PASS — p0-02 acceptance citable from this run."
exit 0
else
echo "B5 FAIL — see $LOCAL_DIR/verdict.json for reasons." >&2
exit 1
fi