feat(@projects/@magic-civilization): add learned slot turn execution logic

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-06-08 18:19:24 -07:00
parent a5de8ad517
commit 6a15d0a418
2 changed files with 193 additions and 22 deletions

View file

@ -152,12 +152,12 @@ static func run(player: RefCounted) -> int:
# default `scripted:default`) takes the existing scripted turn unchanged.
var controller_id: String = _controller_id_for_slot(int(player.index))
if controller_id.begins_with("learned:"):
# Increment 1 (this round): construct the faithful Rust GameState +
# run the non-empty-view PROBE, then fall through to the scripted
# turn so the game still advances (the policy loop + writeback is
# Increment 2 — deliberately NOT wired here). The probe result is
# logged for the Inc-1 gate; it never mutates game state.
_run_learned_probe(player, controller_id)
# Increment 2 — the learned slot decides its turn via the trained
# policy (in Rust, on the faithful state) and the chosen actions are
# REPLAYED through the authoritative GDScript dispatch. This RETURNS;
# the scripted path does NOT also run the slot (Inc-1 fell through;
# Inc-2 must not, or both AIs would drive the same slot).
return _run_learned_turn(player, controller_id)
_apply_mcts_strategic_override(player)
return _apply_tactical_actions(player)
@ -204,33 +204,121 @@ static func _parse_controller_map() -> void:
print("AiTurnBridge: p1-29k controller map = %s" % str(_controller_map))
## p1-29k Increment 1 — build the faithful Rust GameState for `player`'s slot
## and run the non-empty-view PROBE through `GdGameState`. Logs the per-class
## diagnostic dict. Does NOT run the policy loop and does NOT mutate state —
## this is the Inc-1 soundness gate only; the learned turn itself falls through
## to the scripted path in `run`.
static func _run_learned_probe(player: RefCounted, controller_id: String) -> void:
## p1-29k Increment 2 — drive the learned slot's whole turn.
##
## 1. Build/refresh the faithful Rust GameState (grid + catalogs once, per-turn
## unit/city snapshot every call).
## 2. (Diag) emit the per-class view diagnostics so the Inc-1 gate stays
## observable across the run.
## 3. Run the trained policy via `GdGameState.run_learned_slot` — it decides +
## applies to the throwaway Rust scratch and returns the ordered
## applied-action LOG.
## 4. REPLAY the log through the authoritative GDScript dispatch
## (`ai_turn_bridge_dispatch.gd::dispatch_action`) so moves/attacks/queue
## resolve under the SAME engine (GDScript `CombatResolver`) + EventBus
## signals the scripted slots use. Founding is logged-only (Inc-3 boundary).
##
## Returns the number of actions actually applied to the GDScript world.
static func _run_learned_turn(player: RefCounted, controller_id: String) -> int:
var gd_state: RefCounted = GameState.get_gd_state()
if gd_state == null:
push_error("AiTurnBridge: p1-29k probe — GdGameState unavailable")
return
if not gd_state.has_method("learned_view_diagnostics"):
push_error("AiTurnBridge: p1-29k — GdGameState unavailable")
return 0
if not gd_state.has_method("run_learned_slot"):
push_error(
"AiTurnBridge: p1-29k probe — GdGameState.learned_view_diagnostics missing "
+ "(gdext build predates p1-29k Increment 1)"
"AiTurnBridge: p1-29k — GdGameState.run_learned_slot missing "
+ "(gdext build predates p1-29k Increment 2)"
)
return
return 0
# Stamp grid + catalogs once per game; refresh per-player snapshot each call.
if not _learned_state_initialized:
if not _init_learned_state(gd_state):
return
return 0
_learned_state_initialized = true
_refresh_learned_player_snapshot(gd_state)
var diag: Dictionary = gd_state.learned_view_diagnostics(int(player.index))
var slot: int = int(player.index)
# Diagnostics (Inc-1 gate, kept observable).
if gd_state.has_method("learned_view_diagnostics"):
var diag: Dictionary = gd_state.learned_view_diagnostics(slot)
print(
"[p1-29k] learned_view turn=%d slot=%d controller=%s diag=%s"
% [int(GameState.turn_number), slot, controller_id, str(diag)]
)
# Run the policy; get the applied-action log.
var action_log: Array = gd_state.run_learned_slot(slot)
print(
"[p1-29k] learned_view turn=%d slot=%d controller=%s diag=%s"
% [int(GameState.turn_number), int(player.index), controller_id, str(diag)]
"[p1-29k] learned_actions turn=%d slot=%d n=%d log=%s"
% [int(GameState.turn_number), slot, action_log.size(), str(action_log)]
)
# Replay the log through the authoritative GDScript dispatch.
return _replay_learned_actions(action_log, player)
## Replay the policy's applied-action log through the existing autoplay action
## dispatch (`ai_turn_bridge_dispatch.gd`). Translates each log dict into the
## `mc_ai::tactical::Action` JSON shape `dispatch_action` consumes, converting
## OFFSET target coords → AXIAL (the GDScript dispatch + entity positions are
## axial). Move and Attack both collapse to `MoveUnit`: `dispatch_move`
## auto-detects an enemy/ city at the target and routes to
## `resolve_move_as_attack` → `CombatResolver`. Returns the applied count.
##
## `found_city_deferred` is logged but NOT routed — the founding-writeback seam
## is Inc-3 (p1-29j boundary). `other` (Skip/Sentry/Unfortify) has no dispatch
## case and is reported as a dropped variant (Inc-2 coverage gap, acknowledged).
static func _replay_learned_actions(action_log: Array, player: RefCounted) -> int:
var index_maps: Dictionary = StateScript.build_index_maps()
var city_name: String = _generate_city_name(player)
var applied: int = 0
var deferred_founds: int = 0
var dropped_variants: Array[String] = []
for entry: Dictionary in action_log:
var kind: String = String(entry.get("kind", ""))
match kind:
"move", "attack":
# Offset → axial; emit a MoveUnit record (dispatch_move handles
# the move-as-attack case at the target hex).
var off: Vector2i = Vector2i(int(entry.get("to_col", 0)), int(entry.get("to_row", 0)))
var ax: Vector2i = HexUtilsScript.offset_to_axial(off)
var move_json: String = JSON.stringify({
"MoveUnit": {
"unit_id": int(entry.get("unit_id", -1)),
"to_hex": [ax.x, ax.y],
}
})
if DispatchScript.dispatch_action(move_json, player, index_maps, city_name):
applied += 1
"queue":
var queue_json: String = JSON.stringify({
"SetProduction": {
"city_id": int(_resolve_city_index(String(entry.get("city_id", "")))),
"item_id": String(entry.get("item", "")),
}
})
if DispatchScript.dispatch_action(queue_json, player, index_maps, city_name):
applied += 1
"found_city_deferred":
deferred_founds += 1
"other":
dropped_variants.append(String(entry.get("variant", "?")))
_:
dropped_variants.append(kind)
if deferred_founds > 0:
print("[p1-29k] DEFERRED %d FoundCity action(s) to Inc-3 (p1-29j boundary)" % deferred_founds)
if not dropped_variants.is_empty():
print("[p1-29k] dropped %d uncovered variant(s): %s" % [dropped_variants.size(), str(dropped_variants)])
return applied
## Resolve the Rust faithful-state city_id (`"<pi>_<ci>"`) to the GDScript
## index-maps city key (`pi*ID_STRIDE+ci`) that `dispatch_set_production`
## consumes via `resolve_city`. The Rust CityView id is `"{p_idx}_{c_idx}"`.
static func _resolve_city_index(rust_city_id: String) -> int:
var parts: PackedStringArray = rust_city_id.split("_")
if parts.size() != 2:
return -1
if not (parts[0].is_valid_int() and parts[1].is_valid_int()):
return -1
return int(parts[0]) * ID_STRIDE + int(parts[1])
## Stamp the faithful-state inputs that survive the whole game onto `gd_state`:

View file

@ -4927,6 +4927,89 @@ impl GdGameState {
d
}
/// p1-29k Increment 2 — run the trained learned policy for `slot` against
/// the held faithful `GameState` and return the ordered applied-action LOG
/// as `Array<Dictionary>`. The policy decides + applies to `self.inner`
/// (the throwaway per-turn scratch), and we hand the chosen `PlayerAction`s
/// back so the GDScript bridge can REPLAY them through the authoritative
/// autoplay dispatch (`ai_turn_bridge_dispatch.gd`) — one combat engine
/// (GDScript `CombatResolver`, same as the scripted slots), EventBus
/// signals for free, no second combat-resolution path. See the p1-29k
/// Inc-2 design.
///
/// Reuses `mc_player_api::drive_learned_slot_recording` — the SAME loop the
/// player-API world runs (the non-recording `drive_learned_slot` is a thin
/// wrapper that discards the log), so there is no forked dispatch.
///
/// Each log dict carries:
/// - `kind` — `"move"` | `"attack"` | `"fortify"` | `"queue"` |
/// `"found_city_deferred"` | `"other"`.
/// - `unit_id` — the stable `MapUnit.id` (== `pi*ID_STRIDE+ui`, the
/// scheme `build_index_maps` keys on). Present for unit
/// verbs.
/// - `to_col`/`to_row` — OFFSET target coords for `move`/`attack` (the
/// GDScript replay converts offset→axial; `dispatch_move`
/// consumes axial and auto-detects an enemy at the target,
/// so move + attack both collapse to one MoveUnit replay).
/// - `city_id`/`item` — for `queue` (QueueProduction).
///
/// `found_city_deferred` is logged but NOT routed — the founding-writeback
/// seam is Inc-3 (p1-29j boundary), so GDScript skips it.
#[func]
fn run_learned_slot(&mut self, slot: i64) -> Array<Dictionary> {
use mc_player_api::action::PlayerAction;
let mut out: Array<Dictionary> = Array::new();
let pid = slot.max(0) as u8;
let (_applied, log) =
mc_player_api::drive_learned_slot_recording(&mut self.inner, pid);
for action in &log {
let mut d = Dictionary::new();
match action {
PlayerAction::Move { unit_id, to } => {
d.set("kind", GString::from("move"));
d.set("unit_id", unit_id.parse::<i64>().unwrap_or(-1));
d.set("to_col", to[0] as i64);
d.set("to_row", to[1] as i64);
}
PlayerAction::Attack { unit_id, target } => {
// Replayed as a MoveUnit onto the enemy hex — `dispatch_move`
// detects the occupant and routes to `resolve_move_as_attack`
// → CombatResolver.
d.set("kind", GString::from("attack"));
d.set("unit_id", unit_id.parse::<i64>().unwrap_or(-1));
d.set("to_col", target[0] as i64);
d.set("to_row", target[1] as i64);
}
PlayerAction::Fortify { unit_id } => {
d.set("kind", GString::from("fortify"));
d.set("unit_id", unit_id.parse::<i64>().unwrap_or(-1));
}
PlayerAction::QueueProduction { city_id, item, .. } => {
d.set("kind", GString::from("queue"));
d.set("city_id", GString::from(city_id.as_str()));
d.set("item", GString::from(item.as_str()));
}
PlayerAction::FoundCity { unit_id } => {
// Inc-3 boundary (p1-29j) — logged, NOT routed.
d.set("kind", GString::from("found_city_deferred"));
d.set("unit_id", unit_id.parse::<i64>().unwrap_or(-1));
}
other => {
// Skip/Sentry/Unfortify/etc. — no `dispatch_action` case;
// surfaced as `other` so the gate run can log dropped
// variants (coverage gap, acknowledged for Inc-2).
d.set("kind", GString::from("other"));
let dbg = format!("{other:?}");
let variant_name =
dbg.split('{').next().unwrap_or("?").trim().to_string();
d.set("variant", GString::from(variant_name.as_str()));
}
}
out.push(&d);
}
out
}
/// Read player `pi`'s units back as a Godot Array of Dictionaries with
/// the same key shape as `set_player_units_from_dicts`. Used by GDScript
/// adapters to detect which units survived a turn.