magicciv/tooling/rl_self_play/tests/test_env_termination.py

"""Unit tests for `MagicCivEnv._check_termination` — the multi-player
sole-survivor win logic added for the mp-v1 retrain.

`_check_termination` is pure over `(view, recent_events, self._live_players)`,
so we exercise it on a bare instance (`__new__`, no harness subprocess) with
`_live_players` seeded directly. This pins:
  * duel (2p) behaviour is byte-equivalent to the old 1v1 shortcut;
  * a 4p game only wins when EVERY opponent is eliminated, not on the first;
  * `game_over` and self-elimination still take priority.
"""
from __future__ import annotations

from tooling.rl_self_play.magic_civ_env import (
    LOSS_REWARD,
    WIN_BASE,
    MagicCivEnv,
)


def _env(live: set[int]) -> MagicCivEnv:
    """A MagicCivEnv shell with `_live_players` set, no harness."""
    env = MagicCivEnv.__new__(MagicCivEnv)
    env._live_players = set(live)
    return env


def _view(me: int = 0, city_count: int = 1) -> dict:
    return {"player": me, "score": {"city_count": city_count}, "units": []}


def test_duel_opponent_elimination_is_a_win() -> None:
    env = _env({0, 1})
    ev = [{"type": "player_eliminated", "player": 1}]
    term, reward, reason = env._check_termination(_view(), ev)
    assert term and reason == "won" and reward == WIN_BASE


def test_multiplayer_first_elimination_is_not_yet_a_win() -> None:
    env = _env({0, 1, 2, 3})
    ev = [{"type": "player_eliminated", "player": 2}]
    term, _reward, reason = env._check_termination(_view(), ev)
    assert not term and reason is None
    # The live set was pruned, so a later elimination progresses toward win.
    assert env._live_players == {0, 1, 3}


def test_multiplayer_sole_survivor_wins() -> None:
    env = _env({0, 1, 3})  # already down to learner + two opponents
    ev = [
        {"type": "player_eliminated", "player": 1},
        {"type": "player_eliminated", "player": 3},
    ]
    term, reward, reason = env._check_termination(_view(), ev)
    assert term and reason == "won" and reward == WIN_BASE


def test_self_elimination_is_a_loss() -> None:
    env = _env({0, 1, 2})
    ev = [{"type": "player_eliminated", "player": 0}]
    term, reward, reason = env._check_termination(_view(), ev)
    assert term and reason == "eliminated" and reward == LOSS_REWARD


def test_game_over_event_takes_priority() -> None:
    env = _env({0, 1, 2, 3})
    win = env._check_termination(_view(me=0), [{"type": "game_over", "winner": 0}])
    assert win[0] and win[2] == "won"
    loss = env._check_termination(_view(me=0), [{"type": "game_over", "winner": 2}])
    assert loss[0] and loss[2] == "eliminated"


def test_defensive_fallback_no_cities_no_founder_is_loss() -> None:
    env = _env({0, 1})
    view = {"player": 0, "score": {"city_count": 0}, "units": []}
    term, reward, reason = env._check_termination(view, [])
    assert term and reason == "eliminated" and reward == LOSS_REWARD