76 lines
2.8 KiB
Python
76 lines
2.8 KiB
Python
"""Unit tests for `MagicCivEnv._check_termination` — the multi-player
|
|
sole-survivor win logic added for the mp-v1 retrain.
|
|
|
|
`_check_termination` is pure over `(view, recent_events, self._live_players)`,
|
|
so we exercise it on a bare instance (`__new__`, no harness subprocess) with
|
|
`_live_players` seeded directly. This pins:
|
|
* duel (2p) behaviour is byte-equivalent to the old 1v1 shortcut;
|
|
* a 4p game only wins when EVERY opponent is eliminated, not on the first;
|
|
* `game_over` and self-elimination still take priority.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from tooling.rl_self_play.magic_civ_env import (
|
|
LOSS_REWARD,
|
|
WIN_BASE,
|
|
MagicCivEnv,
|
|
)
|
|
|
|
|
|
def _env(live: set[int]) -> MagicCivEnv:
|
|
"""A MagicCivEnv shell with `_live_players` set, no harness."""
|
|
env = MagicCivEnv.__new__(MagicCivEnv)
|
|
env._live_players = set(live)
|
|
return env
|
|
|
|
|
|
def _view(me: int = 0, city_count: int = 1) -> dict:
|
|
return {"player": me, "score": {"city_count": city_count}, "units": []}
|
|
|
|
|
|
def test_duel_opponent_elimination_is_a_win() -> None:
|
|
env = _env({0, 1})
|
|
ev = [{"type": "player_eliminated", "player": 1}]
|
|
term, reward, reason = env._check_termination(_view(), ev)
|
|
assert term and reason == "won" and reward == WIN_BASE
|
|
|
|
|
|
def test_multiplayer_first_elimination_is_not_yet_a_win() -> None:
|
|
env = _env({0, 1, 2, 3})
|
|
ev = [{"type": "player_eliminated", "player": 2}]
|
|
term, _reward, reason = env._check_termination(_view(), ev)
|
|
assert not term and reason is None
|
|
# The live set was pruned, so a later elimination progresses toward win.
|
|
assert env._live_players == {0, 1, 3}
|
|
|
|
|
|
def test_multiplayer_sole_survivor_wins() -> None:
|
|
env = _env({0, 1, 3}) # already down to learner + two opponents
|
|
ev = [
|
|
{"type": "player_eliminated", "player": 1},
|
|
{"type": "player_eliminated", "player": 3},
|
|
]
|
|
term, reward, reason = env._check_termination(_view(), ev)
|
|
assert term and reason == "won" and reward == WIN_BASE
|
|
|
|
|
|
def test_self_elimination_is_a_loss() -> None:
|
|
env = _env({0, 1, 2})
|
|
ev = [{"type": "player_eliminated", "player": 0}]
|
|
term, reward, reason = env._check_termination(_view(), ev)
|
|
assert term and reason == "eliminated" and reward == LOSS_REWARD
|
|
|
|
|
|
def test_game_over_event_takes_priority() -> None:
|
|
env = _env({0, 1, 2, 3})
|
|
win = env._check_termination(_view(me=0), [{"type": "game_over", "winner": 0}])
|
|
assert win[0] and win[2] == "won"
|
|
loss = env._check_termination(_view(me=0), [{"type": "game_over", "winner": 2}])
|
|
assert loss[0] and loss[2] == "eliminated"
|
|
|
|
|
|
def test_defensive_fallback_no_cities_no_founder_is_loss() -> None:
|
|
env = _env({0, 1})
|
|
view = {"player": 0, "score": {"city_count": 0}, "units": []}
|
|
term, reward, reason = env._check_termination(view, [])
|
|
assert term and reason == "eliminated" and reward == LOSS_REWARD
|