diff --git a/src/game/engine/scenes/tests/auto_play.gd b/src/game/engine/scenes/tests/auto_play.gd index a73686e4..753a7973 100644 --- a/src/game/engine/scenes/tests/auto_play.gd +++ b/src/game/engine/scenes/tests/auto_play.gd @@ -1401,7 +1401,7 @@ func _write_result(outcome: String) -> void: "victory_type": _victory_type, "wall_clock_sec": wall_clock, "aggregate": aggregate, - "final_stats": _build_final_stats(), + "player_stats": _build_player_stats(), "invariant_violations": _violations, } DirAccess.make_dir_recursive_absolute(_output_dir) diff --git a/tools/autoplay-report.py b/tools/autoplay-report.py index 716930a9..b8034c4e 100755 --- a/tools/autoplay-report.py +++ b/tools/autoplay-report.py @@ -1,31 +1,42 @@ #!/usr/bin/env python3 -"""autoplay-report.py — Aggregate autoplay batch results into a CSV + summary report. +""" +Aggregate auto_play batch results into a CSV + summary + assertions. + +Reads all result__seed.json files under /seed_*/ +Validates each against tools/autoplay-result-schema.json before consuming. Usage: tools/autoplay-report.py [--baseline PATH] [--update-baseline] -Reads all /seed_*/result_*.json files. -Emits CSV to stdout, summary to stderr. -Exits non-zero if any assertion fails. -""" +Exits: + 0 all games parsed, validated, and assertions passed + 1 schema validation failure OR assertion failure OR missing results + 2 usage error +stdlib only — no pip installs. +""" from __future__ import annotations -import argparse +import csv import json -import os import statistics import sys from pathlib import Path from typing import Any +# Local import (same dir) +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from autoplay_validate import load_schema, validate # noqa: E402 -def find_result_files(results_dir: Path) -> list[tuple[int, Path]]: - """Return (seed, path) pairs for all result_*.json files, sorted by seed. - Matches filenames of the form result__seed.json under - seed_/ subdirectories. Picks the most recent (lexicographic max) if - multiple stamps exist for the same seed.""" - results: list[tuple[int, Path]] = [] + +def find_result_files(results_dir: Path) -> tuple[list[tuple[int, Path]], list[int]]: + """Find result files. Returns (found, missing_seeds). + + Picks the most recent timestamped file per seed (lexicographic max of + result__seed.json). Falls back to legacy result_.json. + """ + found: list[tuple[int, Path]] = [] + missing: list[int] = [] for seed_dir in sorted(results_dir.glob("seed_*")): if not seed_dir.is_dir(): continue @@ -35,94 +46,120 @@ def find_result_files(results_dir: Path) -> list[tuple[int, Path]]: seed = int(seed_str) candidates = sorted(seed_dir.glob(f"result_*_seed{seed}.json")) if not candidates: - # Fall back to legacy naming for backward compatibility legacy = seed_dir / f"result_{seed}.json" if legacy.exists(): candidates = [legacy] - if not candidates: - continue - results.append((seed, candidates[-1])) - return results + if candidates: + found.append((seed, candidates[-1])) + else: + missing.append(seed) + return found, missing -def parse_result(path: Path) -> dict[str, Any]: - with path.open() as f: - return json.load(f) +# CSV schema — derived from the JSON schema. Top-level + aggregate + per-player 0/1. +AGGREGATE_FIELDS = [ + "total_combats", + "total_cities_founded", + "total_cities_captured", + "turn_first_combat", + "turn_first_city_captured", +] + +PLAYER_FIELDS = [ + "pop", "pop_peak", "mil", + "cities", "cities_captured", "cities_lost", + "gold", "gold_peak", "gold_per_turn", + "techs", "tiles", "buildings", + "happiness", + "food_total", "production_total", + "kills", "units_lost", + "turn_first_pop_3", "turn_first_pop_4", +] def extract_row(seed: int, data: dict[str, Any]) -> dict[str, Any]: - """Extract CSV fields from a result JSON.""" - players = data.get("players", [{}, {}]) - p0 = players[0] if len(players) > 0 else {} - p1 = players[1] if len(players) > 1 else {} - - def pstat(p: dict[str, Any], key: str) -> int: - return int(p.get(key, -1)) - - return { + row: dict[str, Any] = { "seed": seed, - "outcome": data.get("outcome", ""), - "turns": int(data.get("turns", data.get("final_turn", -1))), - "winner": int(data.get("winner", data.get("winner_index", -1))), - "p0_pop": pstat(p0, "pop"), - "p0_mil": pstat(p0, "mil"), - "p0_cities": pstat(p0, "cities"), - "p0_gold": pstat(p0, "gold"), - "p0_techs": pstat(p0, "techs"), - "p0_combats": pstat(p0, "combats"), - "p1_pop": pstat(p1, "pop"), - "p1_mil": pstat(p1, "mil"), - "p1_cities": pstat(p1, "cities"), - "p1_gold": pstat(p1, "gold"), - "p1_techs": pstat(p1, "techs"), - "p1_combats": pstat(p1, "combats"), - "invariants": int(data.get("invariant_violations", 0)), + "outcome": data["outcome"], + "turns_played": data["turns_played"], + "winner_index": data["winner_index"], + "victory_type": data["victory_type"], + "wall_clock_sec": round(float(data["wall_clock_sec"]), 2), } + for f in AGGREGATE_FIELDS: + row[f"agg_{f}"] = data["aggregate"][f] + player_stats: dict[str, Any] = data["player_stats"] + for pid in ("0", "1"): + pstat = player_stats.get(pid, {}) + for f in PLAYER_FIELDS: + row[f"p{pid}_{f}"] = pstat.get(f, "") + row["invariant_violations"] = len(data["invariant_violations"]) + return row -CSV_FIELDS = [ - "seed", "outcome", "turns", "winner", - "p0_pop", "p0_mil", "p0_cities", "p0_gold", "p0_techs", "p0_combats", - "p1_pop", "p1_mil", "p1_cities", "p1_gold", "p1_techs", "p1_combats", - "invariants", -] - -VALID_OUTCOMES = {"victory", "max_turns", "defeat"} +def csv_fieldnames() -> list[str]: + fields = [ + "seed", "outcome", "turns_played", "winner_index", + "victory_type", "wall_clock_sec", + ] + fields += [f"agg_{f}" for f in AGGREGATE_FIELDS] + for pid in ("0", "1"): + fields += [f"p{pid}_{f}" for f in PLAYER_FIELDS] + fields.append("invariant_violations") + return fields -def run_assertions(rows: list[dict[str, Any]], missing_seeds: list[int]) -> list[str]: - """Return list of assertion failure messages (empty = all pass).""" +VALID_OUTCOMES = {"victory", "max_turns", "defeat", "in_progress"} + + +def run_assertions( + rows: list[dict[str, Any]], + missing_seeds: list[int], + schema_errors: dict[Path, list[str]], +) -> list[str]: failures: list[str] = [] if missing_seeds: + failures.append(f"Missing result files for seeds: {missing_seeds}") + + if schema_errors: + for path, errs in schema_errors.items(): + failures.append(f"Schema validation failed for {path}:") + for e in errs[:5]: + failures.append(f" {e}") + if len(errs) > 5: + failures.append(f" ... ({len(errs) - 5} more)") + + if not rows: + failures.append("No valid result rows to analyze.") + return failures + + bad_outcomes = [r for r in rows if r["outcome"] not in VALID_OUTCOMES] + if bad_outcomes: failures.append( - f"Missing result.json for seeds: {missing_seeds} — " - "Task 1 (AUTO_PLAY_SEED + JSON writer) may not be complete yet" + f"{len(bad_outcomes)} game(s) had invalid outcome values" ) - for row in rows: - outcome = row["outcome"] - if outcome not in VALID_OUTCOMES: - failures.append( - f"Seed {row['seed']}: invalid outcome '{outcome}' " - f"(expected one of {sorted(VALID_OUTCOMES)})" - ) - - total_violations = sum(r["invariants"] for r in rows) + total_violations = sum(r["invariant_violations"] for r in rows) if total_violations > 0: - per_seed = {r["seed"]: r["invariants"] for r in rows if r["invariants"] > 0} failures.append( - f"Invariant violations detected: {total_violations} total, " - f"by seed: {per_seed}" + f"Total invariant violations across games: {total_violations}" ) - if rows: - max_p0_pop = max(r["p0_pop"] for r in rows) - if max_p0_pop < 4: - failures.append( - f"Sanity check failed: no game reached p0 pop >= 4 " - f"(max was {max_p0_pop}). Growth system may be broken." - ) + max_p0_pop = max(r["p0_pop_peak"] for r in rows) + if max_p0_pop < 4: + failures.append( + f"No game reached p0_pop_peak >= 4 (max was {max_p0_pop}). " + "Growth system may be broken." + ) + + # Pacing: if ANY game has never seen combat, that's worth flagging + never_combat = [r for r in rows if r["agg_turn_first_combat"] == -1] + if never_combat: + failures.append( + f"{len(never_combat)} game(s) never fought a single combat — " + "AI may be pacifist or unreachable." + ) return failures @@ -130,100 +167,94 @@ def run_assertions(rows: list[dict[str, Any]], missing_seeds: list[int]) -> list def median_int(values: list[int]) -> int: if not values: return -1 - valid = [v for v in values if v >= 0] - if not valid: - return -1 - return int(statistics.median(valid)) + return int(statistics.median(values)) -def write_baseline_stub(path: Path) -> None: - stub = { - "_note": "Baseline stub — Phase 3b will populate this with real thresholds.", - "version": 1, - "thresholds": {}, - } - path.write_text(json.dumps(stub, indent=2) + "\n") - print(f"Baseline stub written to {path}", file=sys.stderr) +def print_summary(rows: list[dict[str, Any]], out: Any = sys.stderr) -> None: + print("=== autoplay batch report ===", file=out) + print(f"games: {len(rows)}", file=out) + counts: dict[str, int] = {} + for r in rows: + counts[r["outcome"]] = counts.get(r["outcome"], 0) + 1 + for k, v in sorted(counts.items()): + pct = 100 * v // len(rows) if rows else 0 + print(f" {k}: {v} ({pct}%)", file=out) + if rows: + print( + f"median turns_played: {median_int([r['turns_played'] for r in rows])}", + file=out, + ) + print( + f"median p0_pop_peak: {median_int([r['p0_pop_peak'] for r in rows])}", + file=out, + ) + print( + f"median p0_gold_peak: {median_int([r['p0_gold_peak'] for r in rows])}", + file=out, + ) + print( + f"median agg_total_combats: {median_int([r['agg_total_combats'] for r in rows])}", + file=out, + ) + total_v = sum(r["invariant_violations"] for r in rows) + print(f"invariant violations (total): {total_v}", file=out) -def main() -> None: - parser = argparse.ArgumentParser( - description="Aggregate autoplay batch results into CSV + summary." - ) - parser.add_argument("results_dir", type=Path, help="Directory containing seed_* subdirs") - parser.add_argument("--baseline", type=Path, default=None, help="Baseline JSON for comparison (not yet implemented)") - parser.add_argument("--update-baseline", action="store_true", help="Write/update baseline file (stub only for now)") - args = parser.parse_args() - - results_dir: Path = args.results_dir +def main(argv: list[str]) -> int: + args = [a for a in argv[1:] if not a.startswith("-")] + flags = {a for a in argv[1:] if a.startswith("-")} + if not args: + print( + "usage: autoplay-report.py [--baseline PATH] [--update-baseline]", + file=sys.stderr, + ) + return 2 + results_dir = Path(args[0]) if not results_dir.is_dir(): - print(f"ERROR: results_dir not found: {results_dir}", file=sys.stderr) - sys.exit(1) + print(f"ERROR: {results_dir} is not a directory", file=sys.stderr) + return 2 - seed_entries = find_result_files(results_dir) - if not seed_entries: - print(f"ERROR: No seed_*/result_*.json files found in {results_dir}", file=sys.stderr) - sys.exit(1) + found, missing = find_result_files(results_dir) + if not found and not missing: + print(f"ERROR: No seed_*/ dirs found under {results_dir}", file=sys.stderr) + return 1 + schema = load_schema() rows: list[dict[str, Any]] = [] - parse_failures: list[str] = [] - missing_seeds: list[int] = [] - - for seed, result_path in seed_entries: - if not result_path.exists(): - missing_seeds.append(seed) - continue + schema_errors: dict[Path, list[str]] = {} + for seed, path in found: try: - data = parse_result(result_path) - rows.append(extract_row(seed, data)) - except (json.JSONDecodeError, KeyError) as exc: - parse_failures.append(f"Seed {seed}: failed to parse {result_path}: {exc}") + data = json.loads(path.read_text()) + except (OSError, json.JSONDecodeError) as e: + schema_errors[path] = [f"cannot load: {e}"] + continue + errs = validate(data, schema) + if errs: + schema_errors[path] = errs + continue + rows.append(extract_row(seed, data)) - # CSV header + rows to stdout - print(",".join(CSV_FIELDS)) - for row in rows: - print(",".join(str(row[f]) for f in CSV_FIELDS)) + # CSV to stdout + writer = csv.DictWriter(sys.stdout, fieldnames=csv_fieldnames()) + writer.writeheader() + for r in rows: + writer.writerow(r) - # Summary to stderr - n_games = len(rows) - n_victories = sum(1 for r in rows if r["outcome"] == "victory") - n_max_turns = sum(1 for r in rows if r["outcome"] == "max_turns") - turns_list = [r["turns"] for r in rows] - p0_pop_list = [r["p0_pop"] for r in rows] - p0_combats_list = [r["p0_combats"] for r in rows] - total_violations = sum(r["invariants"] for r in rows) + # Summary + assertions to stderr + print_summary(rows) + failures = run_assertions(rows, missing, schema_errors) + if failures: + print("\n=== FAILURES ===", file=sys.stderr) + for f in failures: + print(f" {f}", file=sys.stderr) + return 1 - pct = lambda n: f"{round(100 * n / n_games)}%" if n_games > 0 else "n/a" + if "--update-baseline" in flags: + print("--update-baseline: not yet implemented (Phase 3b)", file=sys.stderr) - print("", file=sys.stderr) - print("=== autoplay batch report ===", file=sys.stderr) - print(f"games: {n_games}", file=sys.stderr) - print(f"victories: {n_victories} ({pct(n_victories)})", file=sys.stderr) - print(f"max_turns: {n_max_turns} ({pct(n_max_turns)})", file=sys.stderr) - print(f"median turns: {median_int(turns_list)}", file=sys.stderr) - print(f"median p0_pop_final: {median_int(p0_pop_list)}", file=sys.stderr) - print(f"median p0_combats: {median_int(p0_combats_list)}", file=sys.stderr) - print(f"invariant violations (total): {total_violations}", file=sys.stderr) - if missing_seeds: - print(f"missing result.json: seeds {missing_seeds}", file=sys.stderr) - - # Assertions - all_failures = parse_failures + run_assertions(rows, missing_seeds) - - if args.update_baseline: - baseline_path = args.baseline or results_dir / "baseline.json" - write_baseline_stub(baseline_path) - - if all_failures: - print("", file=sys.stderr) - print("=== ASSERTION FAILURES ===", file=sys.stderr) - for msg in all_failures: - print(f" FAIL: {msg}", file=sys.stderr) - sys.exit(1) - - print("", file=sys.stderr) - print("All assertions passed.", file=sys.stderr) + print("\nAll assertions passed.", file=sys.stderr) + return 0 if __name__ == "__main__": - main() + sys.exit(main(sys.argv)) diff --git a/tools/autoplay-result-schema.json b/tools/autoplay-result-schema.json index 153fd94d..6221a5eb 100644 --- a/tools/autoplay-result-schema.json +++ b/tools/autoplay-result-schema.json @@ -13,7 +13,7 @@ "victory_type", "wall_clock_sec", "aggregate", - "final_stats", + "player_stats", "invariant_violations" ], "properties": { @@ -44,9 +44,9 @@ "turn_first_city_captured": { "type": "integer", "minimum": -1 } } }, - "final_stats": { + "player_stats": { "type": "object", - "description": "Map of player_index (as string) to per-player stats", + "description": "Map of player_index (as string) to per-player stats. Rewritten every turn — not final until outcome != 'in_progress'.", "propertyNames": { "pattern": "^[0-9]+$" }, "additionalProperties": { "$ref": "#/definitions/player_stats" diff --git a/tools/autoplay-validate.py b/tools/autoplay-validate.py old mode 100644 new mode 100755