diff --git a/src/game/engine/src/modules/ai/ai_turn_bridge.gd b/src/game/engine/src/modules/ai/ai_turn_bridge.gd index 820581c3..b0926377 100644 --- a/src/game/engine/src/modules/ai/ai_turn_bridge.gd +++ b/src/game/engine/src/modules/ai/ai_turn_bridge.gd @@ -152,12 +152,12 @@ static func run(player: RefCounted) -> int: # default `scripted:default`) takes the existing scripted turn unchanged. var controller_id: String = _controller_id_for_slot(int(player.index)) if controller_id.begins_with("learned:"): - # Increment 1 (this round): construct the faithful Rust GameState + - # run the non-empty-view PROBE, then fall through to the scripted - # turn so the game still advances (the policy loop + writeback is - # Increment 2 — deliberately NOT wired here). The probe result is - # logged for the Inc-1 gate; it never mutates game state. - _run_learned_probe(player, controller_id) + # Increment 2 — the learned slot decides its turn via the trained + # policy (in Rust, on the faithful state) and the chosen actions are + # REPLAYED through the authoritative GDScript dispatch. This RETURNS; + # the scripted path does NOT also run the slot (Inc-1 fell through; + # Inc-2 must not, or both AIs would drive the same slot). + return _run_learned_turn(player, controller_id) _apply_mcts_strategic_override(player) return _apply_tactical_actions(player) @@ -204,33 +204,121 @@ static func _parse_controller_map() -> void: print("AiTurnBridge: p1-29k controller map = %s" % str(_controller_map)) -## p1-29k Increment 1 — build the faithful Rust GameState for `player`'s slot -## and run the non-empty-view PROBE through `GdGameState`. Logs the per-class -## diagnostic dict. Does NOT run the policy loop and does NOT mutate state — -## this is the Inc-1 soundness gate only; the learned turn itself falls through -## to the scripted path in `run`. -static func _run_learned_probe(player: RefCounted, controller_id: String) -> void: +## p1-29k Increment 2 — drive the learned slot's whole turn. +## +## 1. Build/refresh the faithful Rust GameState (grid + catalogs once, per-turn +## unit/city snapshot every call). +## 2. (Diag) emit the per-class view diagnostics so the Inc-1 gate stays +## observable across the run. +## 3. Run the trained policy via `GdGameState.run_learned_slot` — it decides + +## applies to the throwaway Rust scratch and returns the ordered +## applied-action LOG. +## 4. REPLAY the log through the authoritative GDScript dispatch +## (`ai_turn_bridge_dispatch.gd::dispatch_action`) so moves/attacks/queue +## resolve under the SAME engine (GDScript `CombatResolver`) + EventBus +## signals the scripted slots use. Founding is logged-only (Inc-3 boundary). +## +## Returns the number of actions actually applied to the GDScript world. +static func _run_learned_turn(player: RefCounted, controller_id: String) -> int: var gd_state: RefCounted = GameState.get_gd_state() if gd_state == null: - push_error("AiTurnBridge: p1-29k probe — GdGameState unavailable") - return - if not gd_state.has_method("learned_view_diagnostics"): + push_error("AiTurnBridge: p1-29k — GdGameState unavailable") + return 0 + if not gd_state.has_method("run_learned_slot"): push_error( - "AiTurnBridge: p1-29k probe — GdGameState.learned_view_diagnostics missing " - + "(gdext build predates p1-29k Increment 1)" + "AiTurnBridge: p1-29k — GdGameState.run_learned_slot missing " + + "(gdext build predates p1-29k Increment 2)" ) - return + return 0 # Stamp grid + catalogs once per game; refresh per-player snapshot each call. if not _learned_state_initialized: if not _init_learned_state(gd_state): - return + return 0 _learned_state_initialized = true _refresh_learned_player_snapshot(gd_state) - var diag: Dictionary = gd_state.learned_view_diagnostics(int(player.index)) + var slot: int = int(player.index) + # Diagnostics (Inc-1 gate, kept observable). + if gd_state.has_method("learned_view_diagnostics"): + var diag: Dictionary = gd_state.learned_view_diagnostics(slot) + print( + "[p1-29k] learned_view turn=%d slot=%d controller=%s diag=%s" + % [int(GameState.turn_number), slot, controller_id, str(diag)] + ) + # Run the policy; get the applied-action log. + var action_log: Array = gd_state.run_learned_slot(slot) print( - "[p1-29k] learned_view turn=%d slot=%d controller=%s diag=%s" - % [int(GameState.turn_number), int(player.index), controller_id, str(diag)] + "[p1-29k] learned_actions turn=%d slot=%d n=%d log=%s" + % [int(GameState.turn_number), slot, action_log.size(), str(action_log)] ) + # Replay the log through the authoritative GDScript dispatch. + return _replay_learned_actions(action_log, player) + + +## Replay the policy's applied-action log through the existing autoplay action +## dispatch (`ai_turn_bridge_dispatch.gd`). Translates each log dict into the +## `mc_ai::tactical::Action` JSON shape `dispatch_action` consumes, converting +## OFFSET target coords → AXIAL (the GDScript dispatch + entity positions are +## axial). Move and Attack both collapse to `MoveUnit`: `dispatch_move` +## auto-detects an enemy/ city at the target and routes to +## `resolve_move_as_attack` → `CombatResolver`. Returns the applied count. +## +## `found_city_deferred` is logged but NOT routed — the founding-writeback seam +## is Inc-3 (p1-29j boundary). `other` (Skip/Sentry/Unfortify) has no dispatch +## case and is reported as a dropped variant (Inc-2 coverage gap, acknowledged). +static func _replay_learned_actions(action_log: Array, player: RefCounted) -> int: + var index_maps: Dictionary = StateScript.build_index_maps() + var city_name: String = _generate_city_name(player) + var applied: int = 0 + var deferred_founds: int = 0 + var dropped_variants: Array[String] = [] + for entry: Dictionary in action_log: + var kind: String = String(entry.get("kind", "")) + match kind: + "move", "attack": + # Offset → axial; emit a MoveUnit record (dispatch_move handles + # the move-as-attack case at the target hex). + var off: Vector2i = Vector2i(int(entry.get("to_col", 0)), int(entry.get("to_row", 0))) + var ax: Vector2i = HexUtilsScript.offset_to_axial(off) + var move_json: String = JSON.stringify({ + "MoveUnit": { + "unit_id": int(entry.get("unit_id", -1)), + "to_hex": [ax.x, ax.y], + } + }) + if DispatchScript.dispatch_action(move_json, player, index_maps, city_name): + applied += 1 + "queue": + var queue_json: String = JSON.stringify({ + "SetProduction": { + "city_id": int(_resolve_city_index(String(entry.get("city_id", "")))), + "item_id": String(entry.get("item", "")), + } + }) + if DispatchScript.dispatch_action(queue_json, player, index_maps, city_name): + applied += 1 + "found_city_deferred": + deferred_founds += 1 + "other": + dropped_variants.append(String(entry.get("variant", "?"))) + _: + dropped_variants.append(kind) + if deferred_founds > 0: + print("[p1-29k] DEFERRED %d FoundCity action(s) to Inc-3 (p1-29j boundary)" % deferred_founds) + if not dropped_variants.is_empty(): + print("[p1-29k] dropped %d uncovered variant(s): %s" % [dropped_variants.size(), str(dropped_variants)]) + return applied + + +## Resolve the Rust faithful-state city_id (`"_"`) to the GDScript +## index-maps city key (`pi*ID_STRIDE+ci`) that `dispatch_set_production` +## consumes via `resolve_city`. The Rust CityView id is `"{p_idx}_{c_idx}"`. +static func _resolve_city_index(rust_city_id: String) -> int: + var parts: PackedStringArray = rust_city_id.split("_") + if parts.size() != 2: + return -1 + if not (parts[0].is_valid_int() and parts[1].is_valid_int()): + return -1 + return int(parts[0]) * ID_STRIDE + int(parts[1]) ## Stamp the faithful-state inputs that survive the whole game onto `gd_state`: diff --git a/src/simulator/api-gdext/src/lib.rs b/src/simulator/api-gdext/src/lib.rs index 39b1cfee..cd6dcb01 100644 --- a/src/simulator/api-gdext/src/lib.rs +++ b/src/simulator/api-gdext/src/lib.rs @@ -4927,6 +4927,89 @@ impl GdGameState { d } + /// p1-29k Increment 2 — run the trained learned policy for `slot` against + /// the held faithful `GameState` and return the ordered applied-action LOG + /// as `Array`. The policy decides + applies to `self.inner` + /// (the throwaway per-turn scratch), and we hand the chosen `PlayerAction`s + /// back so the GDScript bridge can REPLAY them through the authoritative + /// autoplay dispatch (`ai_turn_bridge_dispatch.gd`) — one combat engine + /// (GDScript `CombatResolver`, same as the scripted slots), EventBus + /// signals for free, no second combat-resolution path. See the p1-29k + /// Inc-2 design. + /// + /// Reuses `mc_player_api::drive_learned_slot_recording` — the SAME loop the + /// player-API world runs (the non-recording `drive_learned_slot` is a thin + /// wrapper that discards the log), so there is no forked dispatch. + /// + /// Each log dict carries: + /// - `kind` — `"move"` | `"attack"` | `"fortify"` | `"queue"` | + /// `"found_city_deferred"` | `"other"`. + /// - `unit_id` — the stable `MapUnit.id` (== `pi*ID_STRIDE+ui`, the + /// scheme `build_index_maps` keys on). Present for unit + /// verbs. + /// - `to_col`/`to_row` — OFFSET target coords for `move`/`attack` (the + /// GDScript replay converts offset→axial; `dispatch_move` + /// consumes axial and auto-detects an enemy at the target, + /// so move + attack both collapse to one MoveUnit replay). + /// - `city_id`/`item` — for `queue` (QueueProduction). + /// + /// `found_city_deferred` is logged but NOT routed — the founding-writeback + /// seam is Inc-3 (p1-29j boundary), so GDScript skips it. + #[func] + fn run_learned_slot(&mut self, slot: i64) -> Array { + use mc_player_api::action::PlayerAction; + let mut out: Array = Array::new(); + let pid = slot.max(0) as u8; + let (_applied, log) = + mc_player_api::drive_learned_slot_recording(&mut self.inner, pid); + for action in &log { + let mut d = Dictionary::new(); + match action { + PlayerAction::Move { unit_id, to } => { + d.set("kind", GString::from("move")); + d.set("unit_id", unit_id.parse::().unwrap_or(-1)); + d.set("to_col", to[0] as i64); + d.set("to_row", to[1] as i64); + } + PlayerAction::Attack { unit_id, target } => { + // Replayed as a MoveUnit onto the enemy hex — `dispatch_move` + // detects the occupant and routes to `resolve_move_as_attack` + // → CombatResolver. + d.set("kind", GString::from("attack")); + d.set("unit_id", unit_id.parse::().unwrap_or(-1)); + d.set("to_col", target[0] as i64); + d.set("to_row", target[1] as i64); + } + PlayerAction::Fortify { unit_id } => { + d.set("kind", GString::from("fortify")); + d.set("unit_id", unit_id.parse::().unwrap_or(-1)); + } + PlayerAction::QueueProduction { city_id, item, .. } => { + d.set("kind", GString::from("queue")); + d.set("city_id", GString::from(city_id.as_str())); + d.set("item", GString::from(item.as_str())); + } + PlayerAction::FoundCity { unit_id } => { + // Inc-3 boundary (p1-29j) — logged, NOT routed. + d.set("kind", GString::from("found_city_deferred")); + d.set("unit_id", unit_id.parse::().unwrap_or(-1)); + } + other => { + // Skip/Sentry/Unfortify/etc. — no `dispatch_action` case; + // surfaced as `other` so the gate run can log dropped + // variants (coverage gap, acknowledged for Inc-2). + d.set("kind", GString::from("other")); + let dbg = format!("{other:?}"); + let variant_name = + dbg.split('{').next().unwrap_or("?").trim().to_string(); + d.set("variant", GString::from(variant_name.as_str())); + } + } + out.push(&d); + } + out + } + /// Read player `pi`'s units back as a Godot Array of Dictionaries with /// the same key shape as `set_player_units_from_dicts`. Used by GDScript /// adapters to detect which units survived a turn.