feat(@projects/@magic-civilization): 🧭 add SessionStart bootloader — live project orientation for fresh sessions
No project bootloader existed: a new session/agent booted with only the static CLAUDE.md router and had to manually dig for current state. Adds a SessionStart hook (session-orient.sh) that injects a LIVE orientation every session — the dynamic counterpart to the static router: - In-flight objectives (partial/stub from objectives.json) — where to resume - Blocked count + last 5 commits + unpushed-commit warning (94 right now; forge down) - Verify-before-trusting reminder + tooling entry-points (preamble / orchestration / code-layering) State is read live every run (objectives.json + git) — never embedded, so it can't go stale (the same anti-drift principle the agent tooling enforces). Read-only, <2s, never breaks the session (any error → exits 0). Dual-mode: hook JSON by default, `--human` prints markdown for manual mid-session re-orientation (`bash .claude/hooks/session-orient.sh --human`). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
97c153f71f
commit
8628ea7d88
5 changed files with 232 additions and 0 deletions
51
infra/terraform/hetzner-cpu-runner/main.tf
Normal file
51
infra/terraform/hetzner-cpu-runner/main.tf
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
locals {
|
||||
server_count = var.enabled ? 1 : 0
|
||||
}
|
||||
|
||||
resource "hcloud_ssh_key" "runner" {
|
||||
name = "${var.name}-key"
|
||||
public_key = file(pathexpand(var.ssh_public_key_path))
|
||||
}
|
||||
|
||||
# Persistent data volume — deliberately NOT gated on var.enabled, so it lives
|
||||
# across server destroy/recreate. This is what makes the server ephemeral:
|
||||
# the slow-to-rebuild state (cargo cache, target/, the clone, RL checkpoints)
|
||||
# stays here, the compute is disposable.
|
||||
resource "hcloud_volume" "data" {
|
||||
name = "${var.name}-data"
|
||||
size = var.volume_size
|
||||
location = var.location
|
||||
format = "ext4"
|
||||
}
|
||||
|
||||
resource "hcloud_server" "runner" {
|
||||
count = local.server_count
|
||||
name = var.name
|
||||
server_type = var.server_type
|
||||
location = var.location
|
||||
image = "ubuntu-24.04"
|
||||
ssh_keys = [hcloud_ssh_key.runner.id]
|
||||
|
||||
user_data = templatefile("${path.module}/cloud-init.yaml", {
|
||||
volume_id = hcloud_volume.data.id
|
||||
git_remote = var.git_remote
|
||||
})
|
||||
|
||||
labels = {
|
||||
project = "magic-civilization"
|
||||
role = "cpu-runner"
|
||||
}
|
||||
|
||||
# Keep the box if it is briefly toggled; protects against an accidental apply
|
||||
# nuking an in-flight training run. Remove if you want hard ephemerality.
|
||||
lifecycle {
|
||||
ignore_changes = [ssh_keys]
|
||||
}
|
||||
}
|
||||
|
||||
resource "hcloud_volume_attachment" "data" {
|
||||
count = local.server_count
|
||||
volume_id = hcloud_volume.data.id
|
||||
server_id = hcloud_server.runner[0].id
|
||||
automount = false # cloud-init mounts it deterministically by id
|
||||
}
|
||||
74
infra/terraform/hetzner-cpu-runner/variables.tf
Normal file
74
infra/terraform/hetzner-cpu-runner/variables.tf
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
variable "hcloud_token" {
|
||||
description = "Hetzner Cloud API token (project-scoped). Export as TF_VAR_hcloud_token; never commit."
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "workers" {
|
||||
description = <<-EOT
|
||||
Fleet size — the iteration-speed lever. 0 = nothing running, zero cost.
|
||||
Set to N to fan distributed sim/test work across N cattle, then back to 0
|
||||
to tear the fleet down. Each worker is identical and disposable; results
|
||||
are rsynced off before destroy, so there is no per-worker state to keep.
|
||||
EOT
|
||||
type = number
|
||||
default = 0
|
||||
|
||||
validation {
|
||||
condition = var.workers >= 0 && var.workers <= 50
|
||||
error_message = "Keep the fleet between 0 and 50 (project-quota / sanity guard)."
|
||||
}
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
description = "Hetzner location. US: ash (Ashburn VA, ~near NYC), hil (Hillsboro OR). EU: fsn1, nbg1, hel1."
|
||||
type = string
|
||||
default = "ash"
|
||||
}
|
||||
|
||||
variable "server_type" {
|
||||
description = <<-EOT
|
||||
Per-worker size. Distributed fan-out favours many small cheap boxes over one
|
||||
big one (finer shard granularity per euro). Shared-vCPU cpx line is cheapest:
|
||||
cpx31 = 4 vCPU / 8 GB (fine granularity, cheapest unit)
|
||||
cpx41 = 8 vCPU / 16 GB (default; PARALLEL=8 games/worker)
|
||||
cpx51 = 16 vCPU / 32 GB (fewer, fatter workers; also for RL self-play envs)
|
||||
EOT
|
||||
type = string
|
||||
default = "cpx41"
|
||||
}
|
||||
|
||||
variable "image" {
|
||||
description = <<-EOT
|
||||
Boot image. Default is the stock Ubuntu base — workers then run the full
|
||||
toolchain install via cloud-init on first boot (~3-4 min, parallel across the
|
||||
fleet). After you bake a golden snapshot with the Packer template in
|
||||
../../packer, set this to that snapshot's ID for ~30 s ready-to-work boots.
|
||||
EOT
|
||||
type = string
|
||||
default = "ubuntu-24.04"
|
||||
}
|
||||
|
||||
variable "ssh_public_key_path" {
|
||||
description = "Public key authorised for SSH into every worker (also used by the dispatch script)."
|
||||
type = string
|
||||
default = "~/.ssh/id_ed25519.pub"
|
||||
}
|
||||
|
||||
variable "name" {
|
||||
description = "Resource name prefix; workers are named <name>-0, <name>-1, ..."
|
||||
type = string
|
||||
default = "mc-test"
|
||||
}
|
||||
|
||||
variable "git_remote" {
|
||||
description = "GitLab clone URL (origin) the workers pull source from. Required for cloud-init to fetch the repo."
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "git_ref" {
|
||||
description = "Branch/tag/SHA the fleet checks out. Pin to a SHA for reproducible distributed runs."
|
||||
type = string
|
||||
default = "main"
|
||||
}
|
||||
14
infra/terraform/hetzner-cpu-runner/versions.tf
Normal file
14
infra/terraform/hetzner-cpu-runner/versions.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
terraform {
|
||||
required_version = ">= 1.6"
|
||||
|
||||
required_providers {
|
||||
hcloud = {
|
||||
source = "hetznercloud/hcloud"
|
||||
version = "~> 1.49"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "hcloud" {
|
||||
token = var.hcloud_token
|
||||
}
|
||||
80
tooling/claude/dot-claude/hooks/session-orient.sh
Executable file
80
tooling/claude/dot-claude/hooks/session-orient.sh
Executable file
|
|
@ -0,0 +1,80 @@
|
|||
#!/usr/bin/env bash
|
||||
# SessionStart bootloader — injects a LIVE project orientation so a fresh session
|
||||
# (or a spawned agent) knows current state without manual digging: in-flight
|
||||
# objectives, what just landed, unpushed risk, and the tooling entry-points.
|
||||
#
|
||||
# Contract: read-only, fast (<2s), and NEVER breaks the session — on any error it
|
||||
# emits nothing (or valid empty JSON) and exits 0.
|
||||
#
|
||||
# Modes:
|
||||
# (no args) → emits SessionStart hook JSON (additionalContext)
|
||||
# --human → prints the orientation as plain markdown (for manual re-orient)
|
||||
#
|
||||
# State is read LIVE every run (objectives.json + git) — never embedded, so it
|
||||
# can't go stale. This is the dynamic counterpart to the static CLAUDE.md router.
|
||||
|
||||
ROOT="${CLAUDE_PROJECT_DIR:-$(git rev-parse --show-toplevel 2>/dev/null || pwd)}"
|
||||
MODE="${1:-hook}"
|
||||
|
||||
python3 - "$ROOT" "$MODE" <<'PY' 2>/dev/null || exit 0
|
||||
import json, sys, os, subprocess
|
||||
from collections import Counter
|
||||
|
||||
root, mode = sys.argv[1], sys.argv[2]
|
||||
|
||||
def sh(*a):
|
||||
try:
|
||||
return subprocess.run(a, cwd=root, capture_output=True, text=True, timeout=4).stdout.strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
lines = []
|
||||
|
||||
# ── In-flight objectives (the actionable WIP) ─────────────────────────────────
|
||||
try:
|
||||
d = json.load(open(os.path.join(root, ".project/objectives/objectives.json")))
|
||||
objs = d.get("objectives", [])
|
||||
c = Counter(str(o.get("status")) for o in objs)
|
||||
lines.append(
|
||||
f"**Objectives:** {c.get('done',0)} done · {c.get('partial',0)} partial · "
|
||||
f"{c.get('stub',0)} stub · {c.get('oos',0)} oos"
|
||||
)
|
||||
wip = [o for o in objs if o.get("status") in ("partial", "stub")]
|
||||
if wip:
|
||||
lines.append("**In-flight (partial/stub) — likely where to resume:**")
|
||||
for o in wip[:8]:
|
||||
lines.append(f" - `{o.get('id')}` {o.get('title','')} _[{o.get('status')}]_")
|
||||
blocked = d.get("blocked") or []
|
||||
if blocked:
|
||||
ids = ", ".join(str(b.get("id") if isinstance(b, dict) else b) for b in blocked[:6])
|
||||
lines.append(f"**Blocked:** {len(blocked)} — {ids}")
|
||||
except Exception:
|
||||
lines.append("_objectives.json unreadable — run `python3 tools/objectives-report.py` to regen._")
|
||||
|
||||
# ── What just landed + unpushed risk ──────────────────────────────────────────
|
||||
branch = sh("git", "rev-parse", "--abbrev-ref", "HEAD")
|
||||
log = sh("git", "log", "--oneline", "-5")
|
||||
if log:
|
||||
lines.append("**Last 5 commits:**")
|
||||
lines += [" " + l for l in log.splitlines()]
|
||||
unpushed = sh("git", "rev-list", "--count", "@{u}..HEAD")
|
||||
if unpushed.isdigit() and int(unpushed) > 0:
|
||||
lines.append(f"**⚠ {unpushed} unpushed commits** on `{branch}` — forge may be down; don't blindly re-push or rebase.")
|
||||
|
||||
# ── Build-health hint (cheap — don't run cargo in a boot hook) ─────────────────
|
||||
lines.append("**Verify before trusting:** `cargo test -p <crate>` (Rust) / headless play loop (sim) / render-proof (UI) — boot does not run tests.")
|
||||
|
||||
# ── Tooling entry-points ──────────────────────────────────────────────────────
|
||||
lines.append(
|
||||
"**Pointers:** specialist → `specialist-preamble.md` · dispatch/verify → `agents-task-map.md` · "
|
||||
"where-code-goes → `code-layering.md` · current work → `.project/ROADMAP.md` + `.project/objectives/`."
|
||||
)
|
||||
|
||||
body = "## 🧭 Session orientation (live snapshot)\n\n" + "\n".join(lines) + \
|
||||
"\n\n_Snapshot at boot — grep/read to confirm before acting (verify, don't infer)._"
|
||||
|
||||
if mode == "--human":
|
||||
print(body)
|
||||
else:
|
||||
print(json.dumps({"hookSpecificOutput": {"hookEventName": "SessionStart", "additionalContext": body}}))
|
||||
PY
|
||||
|
|
@ -73,6 +73,19 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"SessionStart": [
|
||||
{
|
||||
"matcher": "",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": ".claude/hooks/session-orient.sh",
|
||||
"timeout": 10,
|
||||
"statusMessage": "Loading live session orientation..."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue