feat(@projects/@magic-civilization): add recording variant for learned slot actions

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-06-08 18:05:47 -07:00
parent a804ceb430
commit a5de8ad517
2 changed files with 30 additions and 5 deletions

View file

@ -1054,15 +1054,37 @@ const LEARNED_MAX_ACTIONS_PER_TURN: u32 = 256;
/// behalf — we stop the loop instead of applying it here to avoid recursing
/// the AI driver).
fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
// p1-29k — thin wrapper over the recording variant; the player-API world
// discards the applied-action log (behaviour-identical to the pre-split
// loop). The autoplay surface uses `drive_learned_slot_recording` directly
// so its GDScript reconciler can replay the policy's chosen actions.
drive_learned_slot_recording(state, ai_slot).0
}
/// p1-29k — the learned-slot policy loop, returning the ordered list of
/// `PlayerAction`s it actually applied (the terminal `end_turn`/`noop` that
/// stops the loop is NOT included — it is not an applied mutation). This is
/// the single source of truth for the loop body; `drive_learned_slot` calls
/// it and discards the log, so the player-API world is unchanged. The
/// autoplay surface (`GdGameState::run_learned_slot`) consumes the log to
/// reconcile the Rust post-turn state back into its GDScript entities.
///
/// `.0` is the count applied (== `log.len()`); `.1` is the log. Both are
/// returned so callers that only want the count avoid a `.len()`.
pub fn drive_learned_slot_recording(
state: &mut GameState,
ai_slot: u8,
) -> (u32, Vec<crate::action::PlayerAction>) {
let pi = ai_slot as usize;
if pi >= state.players.len() {
return 0;
return (0, Vec::new());
}
let net = match crate::learned::shared_learned_policy() {
Some(net) => net,
None => return 0, // artifact unavailable — slot passes its turn.
None => return (0, Vec::new()), // artifact unavailable — slot passes its turn.
};
let mut applied: u32 = 0;
let mut log: Vec<crate::action::PlayerAction> = Vec::new();
for _ in 0..LEARNED_MAX_ACTIONS_PER_TURN {
// Fog-aware projection, matching `drive_ai_slot` and the training
// harness default (`CP_OMNISCIENT=0`).
@ -1083,14 +1105,17 @@ fn drive_learned_slot(state: &mut GameState, ai_slot: u8) -> u32 {
crate::action::PlayerAction::EndTurn
| crate::action::PlayerAction::Noop => break,
action => match apply_action(state, ai_slot, &action) {
Ok(_) => applied += 1,
Ok(_) => {
applied += 1;
log.push(action);
}
// A rejected action with no state change would loop forever —
// stop the turn rather than spin.
Err(_) => break,
},
}
}
applied
(applied, log)
}
/// Derive a deterministic per-turn rng seed for `ai_slot`.

View file

@ -30,7 +30,7 @@ pub use controllers::{
register_controller, registered_ids, AiController, AiControllerIdent, SandboxKind,
ScriptedController, DEFAULT_CONTROLLER_ID,
};
pub use dispatch::{apply_action, apply_ai_action, suggest_actions};
pub use dispatch::{apply_action, apply_ai_action, drive_learned_slot_recording, suggest_actions};
pub use learned::{
decide_action, is_learned_controller, register_learned_controllers, Decision,
LearnedController, LEARNED_CONTROLLER_ID,