2026-01-22 09:50:28 -05:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
2026-01-22 10:47:38 -05:00
|
|
|
IMAGE_TAG="${TENTACLE_TAG:-latest}"
|
|
|
|
|
|
2026-01-22 09:50:28 -05:00
|
|
|
# -----------------------------
|
|
|
|
|
# Config
|
|
|
|
|
# -----------------------------
|
|
|
|
|
REGISTRY="hub.krkn.tech"
|
2026-01-22 10:19:43 -05:00
|
|
|
IMAGE_BASE="hub.krkn.tech/krkncli/cuda"
|
2026-01-22 09:50:28 -05:00
|
|
|
CONTAINER_NAME="tentacle"
|
|
|
|
|
|
|
|
|
|
SUPPORTED_CUDA=("12.4.1" "12.5.1" "12.6.3" "13.0.1" "13.1.0")
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Helpers
|
|
|
|
|
# -----------------------------
|
|
|
|
|
log() { echo -e "\033[1;32m[*]\033[0m $*"; }
|
|
|
|
|
warn() { echo -e "\033[1;33m[!]\033[0m $*"; }
|
|
|
|
|
err() { echo -e "\033[0;31m[✗]\033[0m $*" >&2; }
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Root check
|
|
|
|
|
# -----------------------------
|
|
|
|
|
if [ "$(id -u)" -ne 0 ]; then
|
|
|
|
|
err "Run as root."
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# OS check
|
|
|
|
|
# -----------------------------
|
|
|
|
|
if ! command -v apt >/dev/null; then
|
|
|
|
|
err "This installer currently supports Debian/Ubuntu only."
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Ensure Docker
|
|
|
|
|
# -----------------------------
|
|
|
|
|
if ! command -v docker >/dev/null; then
|
|
|
|
|
log "Installing Docker..."
|
|
|
|
|
curl -fsSL https://get.docker.com | sh
|
|
|
|
|
systemctl enable --now docker
|
|
|
|
|
else
|
|
|
|
|
log "Docker already installed."
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Ensure NVIDIA driver
|
|
|
|
|
# -----------------------------
|
|
|
|
|
if ! command -v nvidia-smi >/dev/null; then
|
|
|
|
|
err "NVIDIA driver not detected. Install driver first and reboot."
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
nvidia-smi || true
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Ensure NVIDIA container runtime
|
|
|
|
|
# -----------------------------
|
|
|
|
|
if ! command -v nvidia-ctk >/dev/null; then
|
|
|
|
|
log "Installing NVIDIA container toolkit..."
|
|
|
|
|
|
|
|
|
|
rm -f /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
|
|
|
|
rm -f /usr/share/keyrings/nvidia-toolkit.gpg
|
|
|
|
|
|
|
|
|
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
|
|
|
|
|
gpg --dearmor -o /usr/share/keyrings/nvidia-toolkit.gpg
|
|
|
|
|
|
|
|
|
|
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
|
|
|
|
sed 's#^deb #deb [signed-by=/usr/share/keyrings/nvidia-toolkit.gpg] #' \
|
|
|
|
|
> /etc/apt/sources.list.d/nvidia-toolkit.list
|
|
|
|
|
|
|
|
|
|
apt-get update
|
|
|
|
|
apt-get install -y nvidia-container-toolkit
|
|
|
|
|
|
|
|
|
|
nvidia-ctk runtime configure --runtime=docker
|
|
|
|
|
systemctl restart docker
|
|
|
|
|
else
|
|
|
|
|
log "NVIDIA container toolkit already installed."
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Detect CUDA version
|
|
|
|
|
# -----------------------------
|
|
|
|
|
DETECTED_CUDA="$(nvidia-smi | grep 'CUDA Version' | sed 's/.*CUDA Version: *\([0-9.]*\).*/\1/')"
|
|
|
|
|
log "Detected CUDA capability: ${DETECTED_CUDA}"
|
|
|
|
|
|
2026-01-22 11:59:14 -05:00
|
|
|
# Extract major.minor (e.g., 12.4)
|
|
|
|
|
DETECTED_MM="$(echo "$DETECTED_CUDA" | cut -d. -f1,2)"
|
|
|
|
|
|
2026-01-22 11:57:19 -05:00
|
|
|
choose_cuda() {
|
2026-01-22 11:59:14 -05:00
|
|
|
local best=""
|
|
|
|
|
local best_mm=""
|
|
|
|
|
|
|
|
|
|
for v in "${SUPPORTED_CUDA[@]}"; do
|
|
|
|
|
v_mm="$(echo "$v" | cut -d. -f1,2)"
|
|
|
|
|
|
|
|
|
|
# Only consider versions with same major.minor
|
|
|
|
|
if [ "$v_mm" = "$DETECTED_MM" ]; then
|
|
|
|
|
# Pick highest patch of that minor
|
|
|
|
|
if [ -z "$best" ] || [ "$(printf '%s\n%s\n' "$best" "$v" | sort -V | tail -n1)" = "$v" ]; then
|
|
|
|
|
best="$v"
|
|
|
|
|
fi
|
2026-01-22 11:57:19 -05:00
|
|
|
fi
|
|
|
|
|
done
|
2026-01-22 11:59:14 -05:00
|
|
|
|
|
|
|
|
if [ -n "$best" ]; then
|
|
|
|
|
echo "$best"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
2026-01-22 11:57:19 -05:00
|
|
|
return 1
|
2026-01-22 09:50:28 -05:00
|
|
|
}
|
|
|
|
|
|
2026-01-22 11:57:19 -05:00
|
|
|
if CUDA_VERSION="$(choose_cuda)"; then
|
2026-01-22 09:50:28 -05:00
|
|
|
log "Selected CUDA image version: ${CUDA_VERSION}"
|
2026-01-22 11:57:19 -05:00
|
|
|
else
|
2026-01-22 11:59:14 -05:00
|
|
|
err "No compatible CUDA image found for driver capability ${DETECTED_MM}.x"
|
|
|
|
|
err "Supported versions: ${SUPPORTED_CUDA[*]}"
|
|
|
|
|
exit 1
|
2026-01-22 09:50:28 -05:00
|
|
|
fi
|
|
|
|
|
|
2026-01-22 11:59:14 -05:00
|
|
|
|
2026-01-22 09:50:28 -05:00
|
|
|
IMAGE="${IMAGE_BASE}/${CUDA_VERSION}/tentacle:${IMAGE_TAG}"
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Registry login
|
|
|
|
|
# -----------------------------
|
2026-01-22 10:46:55 -05:00
|
|
|
docker logout "$REGISTRY" >/dev/null 2>&1 || true
|
|
|
|
|
|
2026-01-22 10:45:40 -05:00
|
|
|
REG_USER="krkn-registry"
|
|
|
|
|
|
2026-01-22 11:49:45 -05:00
|
|
|
read -rsp "Registry access token: " REG_PASS < /dev/tty
|
2026-01-22 09:50:28 -05:00
|
|
|
echo
|
|
|
|
|
|
2026-01-22 10:45:40 -05:00
|
|
|
echo "$REG_PASS" | docker login "$REGISTRY" -u "$REG_USER" --password-stdin || {
|
|
|
|
|
err "Docker login failed. Invalid token or registry unreachable."
|
|
|
|
|
exit 1
|
|
|
|
|
}
|
2026-01-22 09:50:28 -05:00
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Prompt config
|
|
|
|
|
# -----------------------------
|
|
|
|
|
echo
|
2026-01-22 11:49:45 -05:00
|
|
|
read -rp "Orchestrator address (e.g. https://krkn.example.com): " ORCH_ADDR < /dev/tty
|
2026-01-22 09:50:28 -05:00
|
|
|
if [ -z "$ORCH_ADDR" ]; then
|
|
|
|
|
err "Orchestrator address is required."
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
DEFAULT_WORKER_ID="$(hostname)"
|
2026-01-22 11:49:45 -05:00
|
|
|
read -rp "Worker ID [${DEFAULT_WORKER_ID}]: " WORKER_ID < /dev/tty
|
2026-01-22 09:50:28 -05:00
|
|
|
WORKER_ID="${WORKER_ID:-$DEFAULT_WORKER_ID}"
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Pull image
|
|
|
|
|
# -----------------------------
|
|
|
|
|
log "Pulling image: ${IMAGE}"
|
2026-01-22 10:46:55 -05:00
|
|
|
docker pull "${IMAGE}" || {
|
|
|
|
|
err "Failed to pull image. Check token permissions or image availability."
|
|
|
|
|
exit 1
|
|
|
|
|
}
|
2026-01-22 09:50:28 -05:00
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Stop old container
|
|
|
|
|
# -----------------------------
|
|
|
|
|
docker stop "${CONTAINER_NAME}" 2>/dev/null || true
|
|
|
|
|
docker rm "${CONTAINER_NAME}" 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Create dirs
|
|
|
|
|
# -----------------------------
|
|
|
|
|
mkdir -p /tmp/tentacle
|
|
|
|
|
mkdir -p /tmp/tentacle-logs
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Run container
|
|
|
|
|
# -----------------------------
|
|
|
|
|
log "Starting Tentacle worker..."
|
|
|
|
|
|
|
|
|
|
docker run -d \
|
|
|
|
|
--name "${CONTAINER_NAME}" \
|
|
|
|
|
--network host \
|
2026-01-24 18:48:47 -05:00
|
|
|
--runtime=nvidia \
|
2026-01-22 09:50:28 -05:00
|
|
|
--gpus all \
|
|
|
|
|
--restart unless-stopped \
|
2026-01-22 12:08:12 -05:00
|
|
|
-e CONDUCTOR_ADDR="${ORCH_ADDR}" \
|
2026-01-22 09:50:28 -05:00
|
|
|
-e WORKER_ID="${WORKER_ID}" \
|
|
|
|
|
-v /tmp/tentacle:/tmp/tentacle \
|
|
|
|
|
-v /tmp/tentacle-logs:/opt/tentacle/logs \
|
|
|
|
|
"${IMAGE}"
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
# Done
|
|
|
|
|
# -----------------------------
|
|
|
|
|
log "Tentacle worker installed and running."
|
|
|
|
|
|
|
|
|
|
echo
|
|
|
|
|
echo " Container: ${CONTAINER_NAME}"
|
|
|
|
|
echo " Image: ${IMAGE}"
|
|
|
|
|
echo " Logs: docker logs -f ${CONTAINER_NAME}"
|
|
|
|
|
echo " Restart: systemctl restart docker"
|
|
|
|
|
echo
|