#!/usr/bin/env bash set -euo pipefail IMAGE_TAG="${TENTACLE_TAG:-latest}" # ----------------------------- # Config # ----------------------------- REGISTRY="hub.krkn.tech" IMAGE_BASE="hub.krkn.tech/krkncli/cuda" IMAGE_TAG="latest" CONTAINER_NAME="tentacle" SUPPORTED_CUDA=("12.4.1" "12.5.1" "12.6.3" "13.0.1" "13.1.0") # ----------------------------- # Helpers # ----------------------------- log() { echo -e "\033[1;32m[*]\033[0m $*"; } warn() { echo -e "\033[1;33m[!]\033[0m $*"; } err() { echo -e "\033[0;31m[✗]\033[0m $*" >&2; } # ----------------------------- # Root check # ----------------------------- if [ "$(id -u)" -ne 0 ]; then err "Run as root." exit 1 fi # ----------------------------- # OS check # ----------------------------- if ! command -v apt >/dev/null; then err "This installer currently supports Debian/Ubuntu only." exit 1 fi # ----------------------------- # Ensure Docker # ----------------------------- if ! command -v docker >/dev/null; then log "Installing Docker..." curl -fsSL https://get.docker.com | sh systemctl enable --now docker else log "Docker already installed." fi # ----------------------------- # Ensure NVIDIA driver # ----------------------------- if ! command -v nvidia-smi >/dev/null; then err "NVIDIA driver not detected. Install driver first and reboot." exit 1 fi nvidia-smi || true # ----------------------------- # Ensure NVIDIA container runtime # ----------------------------- if ! command -v nvidia-ctk >/dev/null; then log "Installing NVIDIA container toolkit..." rm -f /etc/apt/sources.list.d/nvidia-container-toolkit.list rm -f /usr/share/keyrings/nvidia-toolkit.gpg curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \ gpg --dearmor -o /usr/share/keyrings/nvidia-toolkit.gpg curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ sed 's#^deb #deb [signed-by=/usr/share/keyrings/nvidia-toolkit.gpg] #' \ > /etc/apt/sources.list.d/nvidia-toolkit.list apt-get update apt-get install -y nvidia-container-toolkit nvidia-ctk runtime configure --runtime=docker systemctl restart docker else log "NVIDIA container toolkit already installed." fi # ----------------------------- # Detect CUDA version # ----------------------------- DETECTED_CUDA="$(nvidia-smi | grep 'CUDA Version' | sed 's/.*CUDA Version: *\([0-9.]*\).*/\1/')" log "Detected CUDA capability: ${DETECTED_CUDA}" choose_cuda() { for v in "${SUPPORTED_CUDA[@]}"; do if [[ "$DETECTED_CUDA" == "$v"* ]]; then echo "$v" return fi done # fallback: highest <= detected for v in $(printf '%s\n' "${SUPPORTED_CUDA[@]}" | sort -rV); do if [[ "$(printf '%s\n%s\n' "$v" "$DETECTED_CUDA" | sort -V | head -n1)" == "$v" ]]; then echo "$v" return fi done echo "" } CUDA_VERSION="$(choose_cuda)" if [ -z "$CUDA_VERSION" ]; then warn "Could not auto-match CUDA version, defaulting to latest (12.6.3)" CUDA_VERSION="12.6.3" else log "Selected CUDA image version: ${CUDA_VERSION}" fi IMAGE="${IMAGE_BASE}/${CUDA_VERSION}/tentacle:${IMAGE_TAG}" # ----------------------------- # Registry login # ----------------------------- docker logout "$REGISTRY" >/dev/null 2>&1 || true REG_USER="krkn-registry" read -rsp "Registry access token: " REG_PASS < /dev/tty echo echo "$REG_PASS" | docker login "$REGISTRY" -u "$REG_USER" --password-stdin || { err "Docker login failed. Invalid token or registry unreachable." exit 1 } # ----------------------------- # Prompt config # ----------------------------- echo read -rp "Orchestrator address (e.g. https://krkn.example.com): " ORCH_ADDR < /dev/tty if [ -z "$ORCH_ADDR" ]; then err "Orchestrator address is required." exit 1 fi DEFAULT_WORKER_ID="$(hostname)" read -rp "Worker ID [${DEFAULT_WORKER_ID}]: " WORKER_ID < /dev/tty WORKER_ID="${WORKER_ID:-$DEFAULT_WORKER_ID}" # ----------------------------- # Pull image # ----------------------------- log "Pulling image: ${IMAGE}" docker pull "${IMAGE}" || { err "Failed to pull image. Check token permissions or image availability." exit 1 } # ----------------------------- # Stop old container # ----------------------------- docker stop "${CONTAINER_NAME}" 2>/dev/null || true docker rm "${CONTAINER_NAME}" 2>/dev/null || true # ----------------------------- # Create dirs # ----------------------------- mkdir -p /tmp/tentacle mkdir -p /tmp/tentacle-logs # ----------------------------- # Run container # ----------------------------- log "Starting Tentacle worker..." docker run -d \ --name "${CONTAINER_NAME}" \ --network host \ --gpus all \ --restart unless-stopped \ -e ORCHESTRATOR_ADDRESS="${ORCH_ADDR}" \ -e WORKER_ID="${WORKER_ID}" \ -v /tmp/tentacle:/tmp/tentacle \ -v /tmp/tentacle-logs:/opt/tentacle/logs \ "${IMAGE}" # ----------------------------- # Done # ----------------------------- log "Tentacle worker installed and running." echo echo " Container: ${CONTAINER_NAME}" echo " Image: ${IMAGE}" echo " Logs: docker logs -f ${CONTAINER_NAME}" echo " Restart: systemctl restart docker" echo