feat(@projects/@magic-civilization): ✨ update player_stats output in auto-play tests
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
979cd0ca26
commit
ef9818dace
4 changed files with 193 additions and 162 deletions
|
|
@ -1401,7 +1401,7 @@ func _write_result(outcome: String) -> void:
|
|||
"victory_type": _victory_type,
|
||||
"wall_clock_sec": wall_clock,
|
||||
"aggregate": aggregate,
|
||||
"final_stats": _build_final_stats(),
|
||||
"player_stats": _build_player_stats(),
|
||||
"invariant_violations": _violations,
|
||||
}
|
||||
DirAccess.make_dir_recursive_absolute(_output_dir)
|
||||
|
|
|
|||
|
|
@ -1,31 +1,42 @@
|
|||
#!/usr/bin/env python3
|
||||
"""autoplay-report.py — Aggregate autoplay batch results into a CSV + summary report.
|
||||
"""
|
||||
Aggregate auto_play batch results into a CSV + summary + assertions.
|
||||
|
||||
Reads all result_<stamp>_seed<N>.json files under <results_dir>/seed_*/
|
||||
Validates each against tools/autoplay-result-schema.json before consuming.
|
||||
|
||||
Usage:
|
||||
tools/autoplay-report.py <results_dir> [--baseline PATH] [--update-baseline]
|
||||
|
||||
Reads all <results_dir>/seed_*/result_*.json files.
|
||||
Emits CSV to stdout, summary to stderr.
|
||||
Exits non-zero if any assertion fails.
|
||||
"""
|
||||
Exits:
|
||||
0 all games parsed, validated, and assertions passed
|
||||
1 schema validation failure OR assertion failure OR missing results
|
||||
2 usage error
|
||||
|
||||
stdlib only — no pip installs.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import statistics
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# Local import (same dir)
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
from autoplay_validate import load_schema, validate # noqa: E402
|
||||
|
||||
def find_result_files(results_dir: Path) -> list[tuple[int, Path]]:
|
||||
"""Return (seed, path) pairs for all result_*.json files, sorted by seed.
|
||||
Matches filenames of the form result_<datetime>_seed<N>.json under
|
||||
seed_<N>/ subdirectories. Picks the most recent (lexicographic max) if
|
||||
multiple stamps exist for the same seed."""
|
||||
results: list[tuple[int, Path]] = []
|
||||
|
||||
def find_result_files(results_dir: Path) -> tuple[list[tuple[int, Path]], list[int]]:
|
||||
"""Find result files. Returns (found, missing_seeds).
|
||||
|
||||
Picks the most recent timestamped file per seed (lexicographic max of
|
||||
result_<stamp>_seed<N>.json). Falls back to legacy result_<N>.json.
|
||||
"""
|
||||
found: list[tuple[int, Path]] = []
|
||||
missing: list[int] = []
|
||||
for seed_dir in sorted(results_dir.glob("seed_*")):
|
||||
if not seed_dir.is_dir():
|
||||
continue
|
||||
|
|
@ -35,94 +46,120 @@ def find_result_files(results_dir: Path) -> list[tuple[int, Path]]:
|
|||
seed = int(seed_str)
|
||||
candidates = sorted(seed_dir.glob(f"result_*_seed{seed}.json"))
|
||||
if not candidates:
|
||||
# Fall back to legacy naming for backward compatibility
|
||||
legacy = seed_dir / f"result_{seed}.json"
|
||||
if legacy.exists():
|
||||
candidates = [legacy]
|
||||
if not candidates:
|
||||
continue
|
||||
results.append((seed, candidates[-1]))
|
||||
return results
|
||||
if candidates:
|
||||
found.append((seed, candidates[-1]))
|
||||
else:
|
||||
missing.append(seed)
|
||||
return found, missing
|
||||
|
||||
|
||||
def parse_result(path: Path) -> dict[str, Any]:
|
||||
with path.open() as f:
|
||||
return json.load(f)
|
||||
# CSV schema — derived from the JSON schema. Top-level + aggregate + per-player 0/1.
|
||||
AGGREGATE_FIELDS = [
|
||||
"total_combats",
|
||||
"total_cities_founded",
|
||||
"total_cities_captured",
|
||||
"turn_first_combat",
|
||||
"turn_first_city_captured",
|
||||
]
|
||||
|
||||
PLAYER_FIELDS = [
|
||||
"pop", "pop_peak", "mil",
|
||||
"cities", "cities_captured", "cities_lost",
|
||||
"gold", "gold_peak", "gold_per_turn",
|
||||
"techs", "tiles", "buildings",
|
||||
"happiness",
|
||||
"food_total", "production_total",
|
||||
"kills", "units_lost",
|
||||
"turn_first_pop_3", "turn_first_pop_4",
|
||||
]
|
||||
|
||||
|
||||
def extract_row(seed: int, data: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Extract CSV fields from a result JSON."""
|
||||
players = data.get("players", [{}, {}])
|
||||
p0 = players[0] if len(players) > 0 else {}
|
||||
p1 = players[1] if len(players) > 1 else {}
|
||||
|
||||
def pstat(p: dict[str, Any], key: str) -> int:
|
||||
return int(p.get(key, -1))
|
||||
|
||||
return {
|
||||
row: dict[str, Any] = {
|
||||
"seed": seed,
|
||||
"outcome": data.get("outcome", ""),
|
||||
"turns": int(data.get("turns", data.get("final_turn", -1))),
|
||||
"winner": int(data.get("winner", data.get("winner_index", -1))),
|
||||
"p0_pop": pstat(p0, "pop"),
|
||||
"p0_mil": pstat(p0, "mil"),
|
||||
"p0_cities": pstat(p0, "cities"),
|
||||
"p0_gold": pstat(p0, "gold"),
|
||||
"p0_techs": pstat(p0, "techs"),
|
||||
"p0_combats": pstat(p0, "combats"),
|
||||
"p1_pop": pstat(p1, "pop"),
|
||||
"p1_mil": pstat(p1, "mil"),
|
||||
"p1_cities": pstat(p1, "cities"),
|
||||
"p1_gold": pstat(p1, "gold"),
|
||||
"p1_techs": pstat(p1, "techs"),
|
||||
"p1_combats": pstat(p1, "combats"),
|
||||
"invariants": int(data.get("invariant_violations", 0)),
|
||||
"outcome": data["outcome"],
|
||||
"turns_played": data["turns_played"],
|
||||
"winner_index": data["winner_index"],
|
||||
"victory_type": data["victory_type"],
|
||||
"wall_clock_sec": round(float(data["wall_clock_sec"]), 2),
|
||||
}
|
||||
for f in AGGREGATE_FIELDS:
|
||||
row[f"agg_{f}"] = data["aggregate"][f]
|
||||
player_stats: dict[str, Any] = data["player_stats"]
|
||||
for pid in ("0", "1"):
|
||||
pstat = player_stats.get(pid, {})
|
||||
for f in PLAYER_FIELDS:
|
||||
row[f"p{pid}_{f}"] = pstat.get(f, "")
|
||||
row["invariant_violations"] = len(data["invariant_violations"])
|
||||
return row
|
||||
|
||||
|
||||
CSV_FIELDS = [
|
||||
"seed", "outcome", "turns", "winner",
|
||||
"p0_pop", "p0_mil", "p0_cities", "p0_gold", "p0_techs", "p0_combats",
|
||||
"p1_pop", "p1_mil", "p1_cities", "p1_gold", "p1_techs", "p1_combats",
|
||||
"invariants",
|
||||
]
|
||||
|
||||
VALID_OUTCOMES = {"victory", "max_turns", "defeat"}
|
||||
def csv_fieldnames() -> list[str]:
|
||||
fields = [
|
||||
"seed", "outcome", "turns_played", "winner_index",
|
||||
"victory_type", "wall_clock_sec",
|
||||
]
|
||||
fields += [f"agg_{f}" for f in AGGREGATE_FIELDS]
|
||||
for pid in ("0", "1"):
|
||||
fields += [f"p{pid}_{f}" for f in PLAYER_FIELDS]
|
||||
fields.append("invariant_violations")
|
||||
return fields
|
||||
|
||||
|
||||
def run_assertions(rows: list[dict[str, Any]], missing_seeds: list[int]) -> list[str]:
|
||||
"""Return list of assertion failure messages (empty = all pass)."""
|
||||
VALID_OUTCOMES = {"victory", "max_turns", "defeat", "in_progress"}
|
||||
|
||||
|
||||
def run_assertions(
|
||||
rows: list[dict[str, Any]],
|
||||
missing_seeds: list[int],
|
||||
schema_errors: dict[Path, list[str]],
|
||||
) -> list[str]:
|
||||
failures: list[str] = []
|
||||
|
||||
if missing_seeds:
|
||||
failures.append(f"Missing result files for seeds: {missing_seeds}")
|
||||
|
||||
if schema_errors:
|
||||
for path, errs in schema_errors.items():
|
||||
failures.append(f"Schema validation failed for {path}:")
|
||||
for e in errs[:5]:
|
||||
failures.append(f" {e}")
|
||||
if len(errs) > 5:
|
||||
failures.append(f" ... ({len(errs) - 5} more)")
|
||||
|
||||
if not rows:
|
||||
failures.append("No valid result rows to analyze.")
|
||||
return failures
|
||||
|
||||
bad_outcomes = [r for r in rows if r["outcome"] not in VALID_OUTCOMES]
|
||||
if bad_outcomes:
|
||||
failures.append(
|
||||
f"Missing result.json for seeds: {missing_seeds} — "
|
||||
"Task 1 (AUTO_PLAY_SEED + JSON writer) may not be complete yet"
|
||||
f"{len(bad_outcomes)} game(s) had invalid outcome values"
|
||||
)
|
||||
|
||||
for row in rows:
|
||||
outcome = row["outcome"]
|
||||
if outcome not in VALID_OUTCOMES:
|
||||
failures.append(
|
||||
f"Seed {row['seed']}: invalid outcome '{outcome}' "
|
||||
f"(expected one of {sorted(VALID_OUTCOMES)})"
|
||||
)
|
||||
|
||||
total_violations = sum(r["invariants"] for r in rows)
|
||||
total_violations = sum(r["invariant_violations"] for r in rows)
|
||||
if total_violations > 0:
|
||||
per_seed = {r["seed"]: r["invariants"] for r in rows if r["invariants"] > 0}
|
||||
failures.append(
|
||||
f"Invariant violations detected: {total_violations} total, "
|
||||
f"by seed: {per_seed}"
|
||||
f"Total invariant violations across games: {total_violations}"
|
||||
)
|
||||
|
||||
if rows:
|
||||
max_p0_pop = max(r["p0_pop"] for r in rows)
|
||||
if max_p0_pop < 4:
|
||||
failures.append(
|
||||
f"Sanity check failed: no game reached p0 pop >= 4 "
|
||||
f"(max was {max_p0_pop}). Growth system may be broken."
|
||||
)
|
||||
max_p0_pop = max(r["p0_pop_peak"] for r in rows)
|
||||
if max_p0_pop < 4:
|
||||
failures.append(
|
||||
f"No game reached p0_pop_peak >= 4 (max was {max_p0_pop}). "
|
||||
"Growth system may be broken."
|
||||
)
|
||||
|
||||
# Pacing: if ANY game has never seen combat, that's worth flagging
|
||||
never_combat = [r for r in rows if r["agg_turn_first_combat"] == -1]
|
||||
if never_combat:
|
||||
failures.append(
|
||||
f"{len(never_combat)} game(s) never fought a single combat — "
|
||||
"AI may be pacifist or unreachable."
|
||||
)
|
||||
|
||||
return failures
|
||||
|
||||
|
|
@ -130,100 +167,94 @@ def run_assertions(rows: list[dict[str, Any]], missing_seeds: list[int]) -> list
|
|||
def median_int(values: list[int]) -> int:
|
||||
if not values:
|
||||
return -1
|
||||
valid = [v for v in values if v >= 0]
|
||||
if not valid:
|
||||
return -1
|
||||
return int(statistics.median(valid))
|
||||
return int(statistics.median(values))
|
||||
|
||||
|
||||
def write_baseline_stub(path: Path) -> None:
|
||||
stub = {
|
||||
"_note": "Baseline stub — Phase 3b will populate this with real thresholds.",
|
||||
"version": 1,
|
||||
"thresholds": {},
|
||||
}
|
||||
path.write_text(json.dumps(stub, indent=2) + "\n")
|
||||
print(f"Baseline stub written to {path}", file=sys.stderr)
|
||||
def print_summary(rows: list[dict[str, Any]], out: Any = sys.stderr) -> None:
|
||||
print("=== autoplay batch report ===", file=out)
|
||||
print(f"games: {len(rows)}", file=out)
|
||||
counts: dict[str, int] = {}
|
||||
for r in rows:
|
||||
counts[r["outcome"]] = counts.get(r["outcome"], 0) + 1
|
||||
for k, v in sorted(counts.items()):
|
||||
pct = 100 * v // len(rows) if rows else 0
|
||||
print(f" {k}: {v} ({pct}%)", file=out)
|
||||
if rows:
|
||||
print(
|
||||
f"median turns_played: {median_int([r['turns_played'] for r in rows])}",
|
||||
file=out,
|
||||
)
|
||||
print(
|
||||
f"median p0_pop_peak: {median_int([r['p0_pop_peak'] for r in rows])}",
|
||||
file=out,
|
||||
)
|
||||
print(
|
||||
f"median p0_gold_peak: {median_int([r['p0_gold_peak'] for r in rows])}",
|
||||
file=out,
|
||||
)
|
||||
print(
|
||||
f"median agg_total_combats: {median_int([r['agg_total_combats'] for r in rows])}",
|
||||
file=out,
|
||||
)
|
||||
total_v = sum(r["invariant_violations"] for r in rows)
|
||||
print(f"invariant violations (total): {total_v}", file=out)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Aggregate autoplay batch results into CSV + summary."
|
||||
)
|
||||
parser.add_argument("results_dir", type=Path, help="Directory containing seed_* subdirs")
|
||||
parser.add_argument("--baseline", type=Path, default=None, help="Baseline JSON for comparison (not yet implemented)")
|
||||
parser.add_argument("--update-baseline", action="store_true", help="Write/update baseline file (stub only for now)")
|
||||
args = parser.parse_args()
|
||||
|
||||
results_dir: Path = args.results_dir
|
||||
def main(argv: list[str]) -> int:
|
||||
args = [a for a in argv[1:] if not a.startswith("-")]
|
||||
flags = {a for a in argv[1:] if a.startswith("-")}
|
||||
if not args:
|
||||
print(
|
||||
"usage: autoplay-report.py <results_dir> [--baseline PATH] [--update-baseline]",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
results_dir = Path(args[0])
|
||||
if not results_dir.is_dir():
|
||||
print(f"ERROR: results_dir not found: {results_dir}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"ERROR: {results_dir} is not a directory", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
seed_entries = find_result_files(results_dir)
|
||||
if not seed_entries:
|
||||
print(f"ERROR: No seed_*/result_*.json files found in {results_dir}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
found, missing = find_result_files(results_dir)
|
||||
if not found and not missing:
|
||||
print(f"ERROR: No seed_*/ dirs found under {results_dir}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
schema = load_schema()
|
||||
rows: list[dict[str, Any]] = []
|
||||
parse_failures: list[str] = []
|
||||
missing_seeds: list[int] = []
|
||||
|
||||
for seed, result_path in seed_entries:
|
||||
if not result_path.exists():
|
||||
missing_seeds.append(seed)
|
||||
continue
|
||||
schema_errors: dict[Path, list[str]] = {}
|
||||
for seed, path in found:
|
||||
try:
|
||||
data = parse_result(result_path)
|
||||
rows.append(extract_row(seed, data))
|
||||
except (json.JSONDecodeError, KeyError) as exc:
|
||||
parse_failures.append(f"Seed {seed}: failed to parse {result_path}: {exc}")
|
||||
data = json.loads(path.read_text())
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
schema_errors[path] = [f"cannot load: {e}"]
|
||||
continue
|
||||
errs = validate(data, schema)
|
||||
if errs:
|
||||
schema_errors[path] = errs
|
||||
continue
|
||||
rows.append(extract_row(seed, data))
|
||||
|
||||
# CSV header + rows to stdout
|
||||
print(",".join(CSV_FIELDS))
|
||||
for row in rows:
|
||||
print(",".join(str(row[f]) for f in CSV_FIELDS))
|
||||
# CSV to stdout
|
||||
writer = csv.DictWriter(sys.stdout, fieldnames=csv_fieldnames())
|
||||
writer.writeheader()
|
||||
for r in rows:
|
||||
writer.writerow(r)
|
||||
|
||||
# Summary to stderr
|
||||
n_games = len(rows)
|
||||
n_victories = sum(1 for r in rows if r["outcome"] == "victory")
|
||||
n_max_turns = sum(1 for r in rows if r["outcome"] == "max_turns")
|
||||
turns_list = [r["turns"] for r in rows]
|
||||
p0_pop_list = [r["p0_pop"] for r in rows]
|
||||
p0_combats_list = [r["p0_combats"] for r in rows]
|
||||
total_violations = sum(r["invariants"] for r in rows)
|
||||
# Summary + assertions to stderr
|
||||
print_summary(rows)
|
||||
failures = run_assertions(rows, missing, schema_errors)
|
||||
if failures:
|
||||
print("\n=== FAILURES ===", file=sys.stderr)
|
||||
for f in failures:
|
||||
print(f" {f}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
pct = lambda n: f"{round(100 * n / n_games)}%" if n_games > 0 else "n/a"
|
||||
if "--update-baseline" in flags:
|
||||
print("--update-baseline: not yet implemented (Phase 3b)", file=sys.stderr)
|
||||
|
||||
print("", file=sys.stderr)
|
||||
print("=== autoplay batch report ===", file=sys.stderr)
|
||||
print(f"games: {n_games}", file=sys.stderr)
|
||||
print(f"victories: {n_victories} ({pct(n_victories)})", file=sys.stderr)
|
||||
print(f"max_turns: {n_max_turns} ({pct(n_max_turns)})", file=sys.stderr)
|
||||
print(f"median turns: {median_int(turns_list)}", file=sys.stderr)
|
||||
print(f"median p0_pop_final: {median_int(p0_pop_list)}", file=sys.stderr)
|
||||
print(f"median p0_combats: {median_int(p0_combats_list)}", file=sys.stderr)
|
||||
print(f"invariant violations (total): {total_violations}", file=sys.stderr)
|
||||
if missing_seeds:
|
||||
print(f"missing result.json: seeds {missing_seeds}", file=sys.stderr)
|
||||
|
||||
# Assertions
|
||||
all_failures = parse_failures + run_assertions(rows, missing_seeds)
|
||||
|
||||
if args.update_baseline:
|
||||
baseline_path = args.baseline or results_dir / "baseline.json"
|
||||
write_baseline_stub(baseline_path)
|
||||
|
||||
if all_failures:
|
||||
print("", file=sys.stderr)
|
||||
print("=== ASSERTION FAILURES ===", file=sys.stderr)
|
||||
for msg in all_failures:
|
||||
print(f" FAIL: {msg}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("", file=sys.stderr)
|
||||
print("All assertions passed.", file=sys.stderr)
|
||||
print("\nAll assertions passed.", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
sys.exit(main(sys.argv))
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
"victory_type",
|
||||
"wall_clock_sec",
|
||||
"aggregate",
|
||||
"final_stats",
|
||||
"player_stats",
|
||||
"invariant_violations"
|
||||
],
|
||||
"properties": {
|
||||
|
|
@ -44,9 +44,9 @@
|
|||
"turn_first_city_captured": { "type": "integer", "minimum": -1 }
|
||||
}
|
||||
},
|
||||
"final_stats": {
|
||||
"player_stats": {
|
||||
"type": "object",
|
||||
"description": "Map of player_index (as string) to per-player stats",
|
||||
"description": "Map of player_index (as string) to per-player stats. Rewritten every turn — not final until outcome != 'in_progress'.",
|
||||
"propertyNames": { "pattern": "^[0-9]+$" },
|
||||
"additionalProperties": {
|
||||
"$ref": "#/definitions/player_stats"
|
||||
|
|
|
|||
0
tools/autoplay-validate.py
Normal file → Executable file
0
tools/autoplay-validate.py
Normal file → Executable file
Loading…
Add table
Reference in a new issue