magicciv/tooling/rl_self_play/tests/test_env_termination.py
Natalie 00e98329fa feat(@projects/@magic-civilization): update objectives dashboard and climate integration
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-09 01:07:07 -07:00

76 lines
2.8 KiB
Python

"""Unit tests for `MagicCivEnv._check_termination` — the multi-player
sole-survivor win logic added for the mp-v1 retrain.
`_check_termination` is pure over `(view, recent_events, self._live_players)`,
so we exercise it on a bare instance (`__new__`, no harness subprocess) with
`_live_players` seeded directly. This pins:
* duel (2p) behaviour is byte-equivalent to the old 1v1 shortcut;
* a 4p game only wins when EVERY opponent is eliminated, not on the first;
* `game_over` and self-elimination still take priority.
"""
from __future__ import annotations
from tooling.rl_self_play.magic_civ_env import (
LOSS_REWARD,
WIN_BASE,
MagicCivEnv,
)
def _env(live: set[int]) -> MagicCivEnv:
"""A MagicCivEnv shell with `_live_players` set, no harness."""
env = MagicCivEnv.__new__(MagicCivEnv)
env._live_players = set(live)
return env
def _view(me: int = 0, city_count: int = 1) -> dict:
return {"player": me, "score": {"city_count": city_count}, "units": []}
def test_duel_opponent_elimination_is_a_win() -> None:
env = _env({0, 1})
ev = [{"type": "player_eliminated", "player": 1}]
term, reward, reason = env._check_termination(_view(), ev)
assert term and reason == "won" and reward == WIN_BASE
def test_multiplayer_first_elimination_is_not_yet_a_win() -> None:
env = _env({0, 1, 2, 3})
ev = [{"type": "player_eliminated", "player": 2}]
term, _reward, reason = env._check_termination(_view(), ev)
assert not term and reason is None
# The live set was pruned, so a later elimination progresses toward win.
assert env._live_players == {0, 1, 3}
def test_multiplayer_sole_survivor_wins() -> None:
env = _env({0, 1, 3}) # already down to learner + two opponents
ev = [
{"type": "player_eliminated", "player": 1},
{"type": "player_eliminated", "player": 3},
]
term, reward, reason = env._check_termination(_view(), ev)
assert term and reason == "won" and reward == WIN_BASE
def test_self_elimination_is_a_loss() -> None:
env = _env({0, 1, 2})
ev = [{"type": "player_eliminated", "player": 0}]
term, reward, reason = env._check_termination(_view(), ev)
assert term and reason == "eliminated" and reward == LOSS_REWARD
def test_game_over_event_takes_priority() -> None:
env = _env({0, 1, 2, 3})
win = env._check_termination(_view(me=0), [{"type": "game_over", "winner": 0}])
assert win[0] and win[2] == "won"
loss = env._check_termination(_view(me=0), [{"type": "game_over", "winner": 2}])
assert loss[0] and loss[2] == "eliminated"
def test_defensive_fallback_no_cities_no_founder_is_loss() -> None:
env = _env({0, 1})
view = {"player": 0, "score": {"city_count": 0}, "units": []}
term, reward, reason = env._check_termination(view, [])
assert term and reason == "eliminated" and reward == LOSS_REWARD