magicciv/scripts/run/heavy-prefix.sh

48 lines
1.6 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# heavy-prefix.sh — containment helpers for batch/long-running workloads.
#
# Sourcing this file has no side effects beyond defining functions.
#
# Purpose: keep heavy godot/python workloads (RL training, autoplay batches,
# proof renders, claude-player workers) inside the heavy-tests.slice cgroup
# so they cannot starve sshd / interactive work on apricot. Background: on
# 2026-05-18 / 2026-05-19 the box wedged when ~3000 godot workers spawned
# outside any cgroup; CPUWeight=20 on the slice lets sshd preempt.
_heavy_have_systemd_run() {
[[ "$(uname -s)" == "Linux" ]] && command -v systemd-run >/dev/null 2>&1
}
# Replace the current shell with the given command, wrapped in a transient
# scope under heavy-tests.slice. Falls back to a direct exec on non-Linux.
# Args: <unit-name> <cmd> [args...]
heavy_exec() {
local unit="${1:?heavy_exec: unit name required}"
shift
if _heavy_have_systemd_run; then
exec systemd-run --user \
--slice=heavy-tests.slice \
--scope --quiet --collect \
--unit="${unit}" \
-- nice -n 10 ionice -c 3 "$@"
fi
exec "$@"
}
# Start the given command as a detached transient .service under the slice.
# Returns immediately; logs go to the journal (journalctl --user -u <unit>).
# Args: <unit-name> <cmd> [args...]
heavy_service() {
local unit="${1:?heavy_service: unit name required}"
shift
if _heavy_have_systemd_run; then
systemd-run --user \
--slice=heavy-tests.slice \
--unit="${unit}" \
--collect --quiet \
-- "$@"
return $?
fi
nohup "$@" >/dev/null 2>&1 &
}