feat(@projects/@magic-civilization): ✨ update player_stats output in auto-play tests

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-14 17:32:01 -07:00 · 2026-04-14 17:32:01 -07:00 · ef9818dace
commit ef9818dace
parent 979cd0ca26
4 changed files with 193 additions and 162 deletions
--- a/src/game/engine/scenes/tests/auto_play.gd
+++ b/src/game/engine/scenes/tests/auto_play.gd
@ -1401,7 +1401,7 @@ func _write_result(outcome: String) -> void:
 		"victory_type": _victory_type,
 		"wall_clock_sec": wall_clock,
 		"aggregate": aggregate,
-		"final_stats": _build_final_stats(),
+		"player_stats": _build_player_stats(),
 		"invariant_violations": _violations,
 	}
 	DirAccess.make_dir_recursive_absolute(_output_dir)
--- a/tools/autoplay-report.py
+++ b/tools/autoplay-report.py
@ -1,31 +1,42 @@
 #!/usr/bin/env python3
-"""autoplay-report.py — Aggregate autoplay batch results into a CSV + summary report.
+"""
+Aggregate auto_play batch results into a CSV + summary + assertions.
+
+Reads all result_<stamp>_seed<N>.json files under <results_dir>/seed_*/
+Validates each against tools/autoplay-result-schema.json before consuming.

 Usage:
    tools/autoplay-report.py <results_dir> [--baseline PATH] [--update-baseline]

-Reads all <results_dir>/seed_*/result_*.json files.
-Emits CSV to stdout, summary to stderr.
-Exits non-zero if any assertion fails.
-"""
+Exits:
+    0  all games parsed, validated, and assertions passed
+    1  schema validation failure OR assertion failure OR missing results
+    2  usage error

+stdlib only — no pip installs.
+"""
 from __future__ import annotations

-import argparse
+import csv
 import json
-import os
 import statistics
 import sys
 from pathlib import Path
 from typing import Any

+# Local import (same dir)
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+from autoplay_validate import load_schema, validate  # noqa: E402

-def find_result_files(results_dir: Path) -> list[tuple[int, Path]]:
-    """Return (seed, path) pairs for all result_*.json files, sorted by seed.
-    Matches filenames of the form result_<datetime>_seed<N>.json under
-    seed_<N>/ subdirectories. Picks the most recent (lexicographic max) if
-    multiple stamps exist for the same seed."""
-    results: list[tuple[int, Path]] = []
+
+def find_result_files(results_dir: Path) -> tuple[list[tuple[int, Path]], list[int]]:
+    """Find result files. Returns (found, missing_seeds).
+
+    Picks the most recent timestamped file per seed (lexicographic max of
+    result_<stamp>_seed<N>.json). Falls back to legacy result_<N>.json.
+    """
+    found: list[tuple[int, Path]] = []
+    missing: list[int] = []
    for seed_dir in sorted(results_dir.glob("seed_*")):
        if not seed_dir.is_dir():
            continue
@ -35,94 +46,120 @@ def find_result_files(results_dir: Path) -> list[tuple[int, Path]]:
        seed = int(seed_str)
        candidates = sorted(seed_dir.glob(f"result_*_seed{seed}.json"))
        if not candidates:
-            # Fall back to legacy naming for backward compatibility
            legacy = seed_dir / f"result_{seed}.json"
            if legacy.exists():
                candidates = [legacy]
-        if not candidates:
-            continue
-        results.append((seed, candidates[-1]))
-    return results
+        if candidates:
+            found.append((seed, candidates[-1]))
+        else:
+            missing.append(seed)
+    return found, missing


-def parse_result(path: Path) -> dict[str, Any]:
-    with path.open() as f:
-        return json.load(f)
+# CSV schema — derived from the JSON schema. Top-level + aggregate + per-player 0/1.
+AGGREGATE_FIELDS = [
+    "total_combats",
+    "total_cities_founded",
+    "total_cities_captured",
+    "turn_first_combat",
+    "turn_first_city_captured",
+]
+
+PLAYER_FIELDS = [
+    "pop", "pop_peak", "mil",
+    "cities", "cities_captured", "cities_lost",
+    "gold", "gold_peak", "gold_per_turn",
+    "techs", "tiles", "buildings",
+    "happiness",
+    "food_total", "production_total",
+    "kills", "units_lost",
+    "turn_first_pop_3", "turn_first_pop_4",
+]


 def extract_row(seed: int, data: dict[str, Any]) -> dict[str, Any]:
-    """Extract CSV fields from a result JSON."""
-    players = data.get("players", [{}, {}])
-    p0 = players[0] if len(players) > 0 else {}
-    p1 = players[1] if len(players) > 1 else {}
-
-    def pstat(p: dict[str, Any], key: str) -> int:
-        return int(p.get(key, -1))
-
-    return {
+    row: dict[str, Any] = {
        "seed": seed,
-        "outcome": data.get("outcome", ""),
-        "turns": int(data.get("turns", data.get("final_turn", -1))),
-        "winner": int(data.get("winner", data.get("winner_index", -1))),
-        "p0_pop": pstat(p0, "pop"),
-        "p0_mil": pstat(p0, "mil"),
-        "p0_cities": pstat(p0, "cities"),
-        "p0_gold": pstat(p0, "gold"),
-        "p0_techs": pstat(p0, "techs"),
-        "p0_combats": pstat(p0, "combats"),
-        "p1_pop": pstat(p1, "pop"),
-        "p1_mil": pstat(p1, "mil"),
-        "p1_cities": pstat(p1, "cities"),
-        "p1_gold": pstat(p1, "gold"),
-        "p1_techs": pstat(p1, "techs"),
-        "p1_combats": pstat(p1, "combats"),
-        "invariants": int(data.get("invariant_violations", 0)),
+        "outcome": data["outcome"],
+        "turns_played": data["turns_played"],
+        "winner_index": data["winner_index"],
+        "victory_type": data["victory_type"],
+        "wall_clock_sec": round(float(data["wall_clock_sec"]), 2),
    }
+    for f in AGGREGATE_FIELDS:
+        row[f"agg_{f}"] = data["aggregate"][f]
+    player_stats: dict[str, Any] = data["player_stats"]
+    for pid in ("0", "1"):
+        pstat = player_stats.get(pid, {})
+        for f in PLAYER_FIELDS:
+            row[f"p{pid}_{f}"] = pstat.get(f, "")
+    row["invariant_violations"] = len(data["invariant_violations"])
+    return row


-CSV_FIELDS = [
-    "seed", "outcome", "turns", "winner",
-    "p0_pop", "p0_mil", "p0_cities", "p0_gold", "p0_techs", "p0_combats",
-    "p1_pop", "p1_mil", "p1_cities", "p1_gold", "p1_techs", "p1_combats",
-    "invariants",
-]
-
-VALID_OUTCOMES = {"victory", "max_turns", "defeat"}
+def csv_fieldnames() -> list[str]:
+    fields = [
+        "seed", "outcome", "turns_played", "winner_index",
+        "victory_type", "wall_clock_sec",
+    ]
+    fields += [f"agg_{f}" for f in AGGREGATE_FIELDS]
+    for pid in ("0", "1"):
+        fields += [f"p{pid}_{f}" for f in PLAYER_FIELDS]
+    fields.append("invariant_violations")
+    return fields


-def run_assertions(rows: list[dict[str, Any]], missing_seeds: list[int]) -> list[str]:
-    """Return list of assertion failure messages (empty = all pass)."""
+VALID_OUTCOMES = {"victory", "max_turns", "defeat", "in_progress"}
+
+
+def run_assertions(
+    rows: list[dict[str, Any]],
+    missing_seeds: list[int],
+    schema_errors: dict[Path, list[str]],
+) -> list[str]:
    failures: list[str] = []

    if missing_seeds:
+        failures.append(f"Missing result files for seeds: {missing_seeds}")
+
+    if schema_errors:
+        for path, errs in schema_errors.items():
+            failures.append(f"Schema validation failed for {path}:")
+            for e in errs[:5]:
+                failures.append(f"  {e}")
+            if len(errs) > 5:
+                failures.append(f"  ... ({len(errs) - 5} more)")
+
+    if not rows:
+        failures.append("No valid result rows to analyze.")
+        return failures
+
+    bad_outcomes = [r for r in rows if r["outcome"] not in VALID_OUTCOMES]
+    if bad_outcomes:
        failures.append(
-            f"Missing result.json for seeds: {missing_seeds} — "
-            "Task 1 (AUTO_PLAY_SEED + JSON writer) may not be complete yet"
+            f"{len(bad_outcomes)} game(s) had invalid outcome values"
        )

-    for row in rows:
-        outcome = row["outcome"]
-        if outcome not in VALID_OUTCOMES:
-            failures.append(
-                f"Seed {row['seed']}: invalid outcome '{outcome}' "
-                f"(expected one of {sorted(VALID_OUTCOMES)})"
-            )
-
-    total_violations = sum(r["invariants"] for r in rows)
+    total_violations = sum(r["invariant_violations"] for r in rows)
    if total_violations > 0:
-        per_seed = {r["seed"]: r["invariants"] for r in rows if r["invariants"] > 0}
        failures.append(
-            f"Invariant violations detected: {total_violations} total, "
-            f"by seed: {per_seed}"
+            f"Total invariant violations across games: {total_violations}"
        )

-    if rows:
-        max_p0_pop = max(r["p0_pop"] for r in rows)
-        if max_p0_pop < 4:
-            failures.append(
-                f"Sanity check failed: no game reached p0 pop >= 4 "
-                f"(max was {max_p0_pop}). Growth system may be broken."
-            )
+    max_p0_pop = max(r["p0_pop_peak"] for r in rows)
+    if max_p0_pop < 4:
+        failures.append(
+            f"No game reached p0_pop_peak >= 4 (max was {max_p0_pop}). "
+            "Growth system may be broken."
+        )
+
+    # Pacing: if ANY game has never seen combat, that's worth flagging
+    never_combat = [r for r in rows if r["agg_turn_first_combat"] == -1]
+    if never_combat:
+        failures.append(
+            f"{len(never_combat)} game(s) never fought a single combat — "
+            "AI may be pacifist or unreachable."
+        )

    return failures

@ -130,100 +167,94 @@ def run_assertions(rows: list[dict[str, Any]], missing_seeds: list[int]) -> list
 def median_int(values: list[int]) -> int:
    if not values:
        return -1
-    valid = [v for v in values if v >= 0]
-    if not valid:
-        return -1
-    return int(statistics.median(valid))
+    return int(statistics.median(values))


-def write_baseline_stub(path: Path) -> None:
-    stub = {
-        "_note": "Baseline stub — Phase 3b will populate this with real thresholds.",
-        "version": 1,
-        "thresholds": {},
-    }
-    path.write_text(json.dumps(stub, indent=2) + "\n")
-    print(f"Baseline stub written to {path}", file=sys.stderr)
+def print_summary(rows: list[dict[str, Any]], out: Any = sys.stderr) -> None:
+    print("=== autoplay batch report ===", file=out)
+    print(f"games: {len(rows)}", file=out)
+    counts: dict[str, int] = {}
+    for r in rows:
+        counts[r["outcome"]] = counts.get(r["outcome"], 0) + 1
+    for k, v in sorted(counts.items()):
+        pct = 100 * v // len(rows) if rows else 0
+        print(f"  {k}: {v} ({pct}%)", file=out)
+    if rows:
+        print(
+            f"median turns_played: {median_int([r['turns_played'] for r in rows])}",
+            file=out,
+        )
+        print(
+            f"median p0_pop_peak: {median_int([r['p0_pop_peak'] for r in rows])}",
+            file=out,
+        )
+        print(
+            f"median p0_gold_peak: {median_int([r['p0_gold_peak'] for r in rows])}",
+            file=out,
+        )
+        print(
+            f"median agg_total_combats: {median_int([r['agg_total_combats'] for r in rows])}",
+            file=out,
+        )
+        total_v = sum(r["invariant_violations"] for r in rows)
+        print(f"invariant violations (total): {total_v}", file=out)


-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Aggregate autoplay batch results into CSV + summary."
-    )
-    parser.add_argument("results_dir", type=Path, help="Directory containing seed_* subdirs")
-    parser.add_argument("--baseline", type=Path, default=None, help="Baseline JSON for comparison (not yet implemented)")
-    parser.add_argument("--update-baseline", action="store_true", help="Write/update baseline file (stub only for now)")
-    args = parser.parse_args()
-
-    results_dir: Path = args.results_dir
+def main(argv: list[str]) -> int:
+    args = [a for a in argv[1:] if not a.startswith("-")]
+    flags = {a for a in argv[1:] if a.startswith("-")}
+    if not args:
+        print(
+            "usage: autoplay-report.py <results_dir> [--baseline PATH] [--update-baseline]",
+            file=sys.stderr,
+        )
+        return 2
+    results_dir = Path(args[0])
    if not results_dir.is_dir():
-        print(f"ERROR: results_dir not found: {results_dir}", file=sys.stderr)
-        sys.exit(1)
+        print(f"ERROR: {results_dir} is not a directory", file=sys.stderr)
+        return 2

-    seed_entries = find_result_files(results_dir)
-    if not seed_entries:
-        print(f"ERROR: No seed_*/result_*.json files found in {results_dir}", file=sys.stderr)
-        sys.exit(1)
+    found, missing = find_result_files(results_dir)
+    if not found and not missing:
+        print(f"ERROR: No seed_*/ dirs found under {results_dir}", file=sys.stderr)
+        return 1

+    schema = load_schema()
    rows: list[dict[str, Any]] = []
-    parse_failures: list[str] = []
-    missing_seeds: list[int] = []
-
-    for seed, result_path in seed_entries:
-        if not result_path.exists():
-            missing_seeds.append(seed)
-            continue
+    schema_errors: dict[Path, list[str]] = {}
+    for seed, path in found:
        try:
-            data = parse_result(result_path)
-            rows.append(extract_row(seed, data))
-        except (json.JSONDecodeError, KeyError) as exc:
-            parse_failures.append(f"Seed {seed}: failed to parse {result_path}: {exc}")
+            data = json.loads(path.read_text())
+        except (OSError, json.JSONDecodeError) as e:
+            schema_errors[path] = [f"cannot load: {e}"]
+            continue
+        errs = validate(data, schema)
+        if errs:
+            schema_errors[path] = errs
+            continue
+        rows.append(extract_row(seed, data))

-    # CSV header + rows to stdout
-    print(",".join(CSV_FIELDS))
-    for row in rows:
-        print(",".join(str(row[f]) for f in CSV_FIELDS))
+    # CSV to stdout
+    writer = csv.DictWriter(sys.stdout, fieldnames=csv_fieldnames())
+    writer.writeheader()
+    for r in rows:
+        writer.writerow(r)

-    # Summary to stderr
-    n_games = len(rows)
-    n_victories = sum(1 for r in rows if r["outcome"] == "victory")
-    n_max_turns = sum(1 for r in rows if r["outcome"] == "max_turns")
-    turns_list = [r["turns"] for r in rows]
-    p0_pop_list = [r["p0_pop"] for r in rows]
-    p0_combats_list = [r["p0_combats"] for r in rows]
-    total_violations = sum(r["invariants"] for r in rows)
+    # Summary + assertions to stderr
+    print_summary(rows)
+    failures = run_assertions(rows, missing, schema_errors)
+    if failures:
+        print("\n=== FAILURES ===", file=sys.stderr)
+        for f in failures:
+            print(f"  {f}", file=sys.stderr)
+        return 1

-    pct = lambda n: f"{round(100 * n / n_games)}%" if n_games > 0 else "n/a"
+    if "--update-baseline" in flags:
+        print("--update-baseline: not yet implemented (Phase 3b)", file=sys.stderr)

-    print("", file=sys.stderr)
-    print("=== autoplay batch report ===", file=sys.stderr)
-    print(f"games: {n_games}", file=sys.stderr)
-    print(f"victories: {n_victories} ({pct(n_victories)})", file=sys.stderr)
-    print(f"max_turns: {n_max_turns} ({pct(n_max_turns)})", file=sys.stderr)
-    print(f"median turns: {median_int(turns_list)}", file=sys.stderr)
-    print(f"median p0_pop_final: {median_int(p0_pop_list)}", file=sys.stderr)
-    print(f"median p0_combats: {median_int(p0_combats_list)}", file=sys.stderr)
-    print(f"invariant violations (total): {total_violations}", file=sys.stderr)
-    if missing_seeds:
-        print(f"missing result.json: seeds {missing_seeds}", file=sys.stderr)
-
-    # Assertions
-    all_failures = parse_failures + run_assertions(rows, missing_seeds)
-
-    if args.update_baseline:
-        baseline_path = args.baseline or results_dir / "baseline.json"
-        write_baseline_stub(baseline_path)
-
-    if all_failures:
-        print("", file=sys.stderr)
-        print("=== ASSERTION FAILURES ===", file=sys.stderr)
-        for msg in all_failures:
-            print(f"  FAIL: {msg}", file=sys.stderr)
-        sys.exit(1)
-
-    print("", file=sys.stderr)
-    print("All assertions passed.", file=sys.stderr)
+    print("\nAll assertions passed.", file=sys.stderr)
+    return 0


 if __name__ == "__main__":
-    main()
+    sys.exit(main(sys.argv))
--- a/tools/autoplay-result-schema.json
+++ b/tools/autoplay-result-schema.json
@ -13,7 +13,7 @@
    "victory_type",
    "wall_clock_sec",
    "aggregate",
-    "final_stats",
+    "player_stats",
    "invariant_violations"
  ],
  "properties": {
@ -44,9 +44,9 @@
        "turn_first_city_captured": { "type": "integer", "minimum": -1 }
      }
    },
-    "final_stats": {
+    "player_stats": {
      "type": "object",
-      "description": "Map of player_index (as string) to per-player stats",
+      "description": "Map of player_index (as string) to per-player stats. Rewritten every turn — not final until outcome != 'in_progress'.",
      "propertyNames": { "pattern": "^[0-9]+$" },
      "additionalProperties": {
        "$ref": "#/definitions/player_stats"
--- a/tools/autoplay-validate.py
+++ b/tools/autoplay-validate.py