From 6489706cf86286ed363b147b04cd0d5dbae5a512 Mon Sep 17 00:00:00 2001 From: Evan Hosinski Date: Thu, 22 Jan 2026 09:50:28 -0500 Subject: [PATCH] Add install.sh --- install.sh | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 install.sh diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..63d84ba --- /dev/null +++ b/install.sh @@ -0,0 +1,187 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------- +# Config +# ----------------------------- +REGISTRY="hub.krkn.tech" +IMAGE_BASE="hub.krkn.tech/krkn/cuda" +IMAGE_TAG="latest" +CONTAINER_NAME="tentacle" + +SUPPORTED_CUDA=("12.4.1" "12.5.1" "12.6.3" "13.0.1" "13.1.0") + +# ----------------------------- +# Helpers +# ----------------------------- +log() { echo -e "\033[1;32m[*]\033[0m $*"; } +warn() { echo -e "\033[1;33m[!]\033[0m $*"; } +err() { echo -e "\033[0;31m[✗]\033[0m $*" >&2; } + +# ----------------------------- +# Root check +# ----------------------------- +if [ "$(id -u)" -ne 0 ]; then + err "Run as root." + exit 1 +fi + +# ----------------------------- +# OS check +# ----------------------------- +if ! command -v apt >/dev/null; then + err "This installer currently supports Debian/Ubuntu only." + exit 1 +fi + +# ----------------------------- +# Ensure Docker +# ----------------------------- +if ! command -v docker >/dev/null; then + log "Installing Docker..." + curl -fsSL https://get.docker.com | sh + systemctl enable --now docker +else + log "Docker already installed." +fi + +# ----------------------------- +# Ensure NVIDIA driver +# ----------------------------- +if ! command -v nvidia-smi >/dev/null; then + err "NVIDIA driver not detected. Install driver first and reboot." + exit 1 +fi + +nvidia-smi || true + +# ----------------------------- +# Ensure NVIDIA container runtime +# ----------------------------- +if ! command -v nvidia-ctk >/dev/null; then + log "Installing NVIDIA container toolkit..." + + rm -f /etc/apt/sources.list.d/nvidia-container-toolkit.list + rm -f /usr/share/keyrings/nvidia-toolkit.gpg + + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \ + gpg --dearmor -o /usr/share/keyrings/nvidia-toolkit.gpg + + curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#^deb #deb [signed-by=/usr/share/keyrings/nvidia-toolkit.gpg] #' \ + > /etc/apt/sources.list.d/nvidia-toolkit.list + + apt-get update + apt-get install -y nvidia-container-toolkit + + nvidia-ctk runtime configure --runtime=docker + systemctl restart docker +else + log "NVIDIA container toolkit already installed." +fi + +# ----------------------------- +# Detect CUDA version +# ----------------------------- +DETECTED_CUDA="$(nvidia-smi | grep 'CUDA Version' | sed 's/.*CUDA Version: *\([0-9.]*\).*/\1/')" +log "Detected CUDA capability: ${DETECTED_CUDA}" + +choose_cuda() { + for v in "${SUPPORTED_CUDA[@]}"; do + if [[ "$DETECTED_CUDA" == "$v"* ]]; then + echo "$v" + return + fi + done + + # fallback: highest <= detected + for v in $(printf '%s\n' "${SUPPORTED_CUDA[@]}" | sort -rV); do + if [[ "$(printf '%s\n%s\n' "$v" "$DETECTED_CUDA" | sort -V | head -n1)" == "$v" ]]; then + echo "$v" + return + fi + done + + echo "" +} + +CUDA_VERSION="$(choose_cuda)" + +if [ -z "$CUDA_VERSION" ]; then + warn "Could not auto-match CUDA version, defaulting to latest (12.6.3)" + CUDA_VERSION="12.6.3" +else + log "Selected CUDA image version: ${CUDA_VERSION}" +fi + +IMAGE="${IMAGE_BASE}/${CUDA_VERSION}/tentacle:${IMAGE_TAG}" + +# ----------------------------- +# Registry login +# ----------------------------- +echo +read -rp "Registry username: " REG_USER +read -rsp "Registry password or token: " REG_PASS +echo + +echo "$REG_PASS" | docker login "$REGISTRY" -u "$REG_USER" --password-stdin + +# ----------------------------- +# Prompt config +# ----------------------------- +echo +read -rp "Orchestrator address (e.g. https://krkn.example.com): " ORCH_ADDR +if [ -z "$ORCH_ADDR" ]; then + err "Orchestrator address is required." + exit 1 +fi + +DEFAULT_WORKER_ID="$(hostname)" +read -rp "Worker ID [${DEFAULT_WORKER_ID}]: " WORKER_ID +WORKER_ID="${WORKER_ID:-$DEFAULT_WORKER_ID}" + +# ----------------------------- +# Pull image +# ----------------------------- +log "Pulling image: ${IMAGE}" +docker pull "${IMAGE}" + +# ----------------------------- +# Stop old container +# ----------------------------- +docker stop "${CONTAINER_NAME}" 2>/dev/null || true +docker rm "${CONTAINER_NAME}" 2>/dev/null || true + +# ----------------------------- +# Create dirs +# ----------------------------- +mkdir -p /tmp/tentacle +mkdir -p /tmp/tentacle-logs + +# ----------------------------- +# Run container +# ----------------------------- +log "Starting Tentacle worker..." + +docker run -d \ + --name "${CONTAINER_NAME}" \ + --network host \ + --gpus all \ + --restart unless-stopped \ + -e ORCHESTRATOR_ADDRESS="${ORCH_ADDR}" \ + -e WORKER_ID="${WORKER_ID}" \ + -v /tmp/tentacle:/tmp/tentacle \ + -v /tmp/tentacle-logs:/opt/tentacle/logs \ + "${IMAGE}" + +# ----------------------------- +# Done +# ----------------------------- +log "Tentacle worker installed and running." + +echo +echo " Container: ${CONTAINER_NAME}" +echo " Image: ${IMAGE}" +echo " Logs: docker logs -f ${CONTAINER_NAME}" +echo " Restart: systemctl restart docker" +echo \ No newline at end of file