feat(@projects/@magic-civilization): ✨ add smoke test script for Claude Player API
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
2bce49d1f3
commit
5c9800cb77
1 changed files with 108 additions and 0 deletions
108
scripts/claude-smoke-5endturn.sh
Executable file
108
scripts/claude-smoke-5endturn.sh
Executable file
|
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env bash
|
||||
# p2-71 — 5-EndTurn smoke driver for the Claude Player API harness.
|
||||
#
|
||||
# Spawns claude-player-server.sh, sends 5 act:end_turn requests over stdin,
|
||||
# parses JSON-Lines responses, and prints a one-line verdict:
|
||||
#
|
||||
# {"turns": 5, "ai_turn_completed_events": N,
|
||||
# "actions_applied_per_turn": [...], "passed": true|false}
|
||||
#
|
||||
# Passing requires `actions_applied > 0` on every turn 1..5 for every AI slot.
|
||||
# Exits 0 on pass, 1 on fail.
|
||||
#
|
||||
# Env: CP_SEED, CP_PLAYERS (default 3), CP_CLAUDE_SLOT (default 0), CP_MAP_SIZE.
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
: "${CP_PLAYERS:=3}"
|
||||
: "${CP_CLAUDE_SLOT:=0}"
|
||||
: "${CP_SEED:=42}"
|
||||
: "${CP_MAP_SIZE:=duel}"
|
||||
: "${SMOKE_TURNS:=5}"
|
||||
|
||||
export CP_PLAYERS CP_CLAUDE_SLOT CP_SEED CP_MAP_SIZE
|
||||
|
||||
TMP=$(mktemp -d -t mc-smoke-XXXXXX)
|
||||
trap "rm -rf '$TMP'" EXIT
|
||||
|
||||
# Build the request stream — N end_turn acts followed by shutdown.
|
||||
{
|
||||
for i in $(seq 1 "$SMOKE_TURNS"); do
|
||||
printf '{"type":"act","id":%d,"action":{"action":"end_turn"}}\n' "$i"
|
||||
done
|
||||
printf '{"type":"shutdown","id":999}\n'
|
||||
} > "$TMP/in.jsonl"
|
||||
|
||||
# Run harness with timeout safety. 60s should be plenty for 5 turns on duel.
|
||||
timeout 90 "$SCRIPT_DIR/claude-player-server.sh" < "$TMP/in.jsonl" > "$TMP/out.jsonl" 2>"$TMP/err.log" || true
|
||||
|
||||
# Parse — for each turn-response, count `ai_turn_completed` events and
|
||||
# sum actions_applied across slots. Output one verdict line.
|
||||
python3 - "$TMP/out.jsonl" "$SMOKE_TURNS" <<'PY'
|
||||
import json, sys
|
||||
out_path = sys.argv[1]
|
||||
n_turns = int(sys.argv[2])
|
||||
|
||||
per_turn = []
|
||||
total_ai_events = 0
|
||||
with open(out_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except Exception:
|
||||
continue
|
||||
if obj.get("ok") is not True:
|
||||
continue
|
||||
events = obj.get("events") or []
|
||||
if not events:
|
||||
continue
|
||||
# Only process act-response envelopes (they have events).
|
||||
slot_actions = {}
|
||||
for ev in events:
|
||||
ev_type = ev.get("event") or ev.get("type")
|
||||
if ev_type == "ai_turn_completed":
|
||||
total_ai_events += 1
|
||||
slot = ev.get("player")
|
||||
applied = ev.get("actions_applied", 0)
|
||||
slot_actions[slot] = slot_actions.get(slot, 0) + int(applied)
|
||||
if slot_actions:
|
||||
per_turn.append(slot_actions)
|
||||
|
||||
# Verdict: every turn must have ai_turn_completed events with actions_applied>0
|
||||
# on at least one slot (any non-Claude slot). Stronger requirement: every AI
|
||||
# slot on every turn > 0.
|
||||
passed = True
|
||||
reasons = []
|
||||
if len(per_turn) < n_turns:
|
||||
passed = False
|
||||
reasons.append(f"only {len(per_turn)} turn responses out of {n_turns}")
|
||||
for i, slot_actions in enumerate(per_turn, 1):
|
||||
if not slot_actions:
|
||||
passed = False
|
||||
reasons.append(f"turn {i}: no ai_turn_completed events")
|
||||
for slot, applied in slot_actions.items():
|
||||
if applied <= 0:
|
||||
passed = False
|
||||
reasons.append(f"turn {i} slot {slot}: actions_applied={applied}")
|
||||
|
||||
print(json.dumps({
|
||||
"turns_observed": len(per_turn),
|
||||
"ai_turn_completed_events": total_ai_events,
|
||||
"actions_applied_per_turn": per_turn,
|
||||
"passed": passed,
|
||||
"reasons": reasons,
|
||||
}))
|
||||
sys.exit(0 if passed else 1)
|
||||
PY
|
||||
RC=$?
|
||||
if [[ $RC -ne 0 ]]; then
|
||||
echo "--- stderr ---" >&2
|
||||
tail -40 "$TMP/err.log" >&2 || true
|
||||
fi
|
||||
exit $RC
|
||||
Loading…
Add table
Reference in a new issue