#!/bin/bash
# =============================================================================
# Klyro Edge Installer
# https://get.klyro.security/install.sh
#
# Usage:
#   Production: curl -sfL https://get.klyro.security | sh -s -- <INSTALL_TOKEN>
#   Local:      ./install.sh inst_test --local
#
# Get your install token from: https://app.klyro.security/agents/new
#
# Local Mode (--local):
#   Sets up a complete local environment with:
#   - k3d cluster with local registry
#   - Mock Cloud gRPC server for agent registration
#   - Temporal for workflow orchestration
#   - Agent connected to mock cloud and Temporal
#   - Worker/Operator polling Temporal for tasks
#   - SQLite database for agent state
# =============================================================================
set -e

KLYRO_API_URL="${KLYRO_API_URL:-}"
NAMESPACE="klyro-system"
RELEASE_NAME="klyro-edge"

# Script and project directories
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="${SCRIPT_DIR}/.."

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'

log_info() { printf "${BLUE}[INFO]${NC} %s\n" "$1"; }
log_success() { printf "${GREEN}[OK]${NC} %s\n" "$1"; }
log_warn() { printf "${YELLOW}[WARN]${NC} %s\n" "$1"; }
log_error() { printf "${RED}[ERROR]${NC} %s\n" "$1" >&2; }
log_step() { printf "${GREEN}>>>${NC} ${BLUE}%s${NC}\n" "$1"; }
log_local() { printf "${CYAN}[LOCAL]${NC} %s\n" "$1"; }

# Spinner for long-running operations
spin() {
    local pid=$1
    local msg=$2
    local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
    local i=0
    printf "${BLUE}[...]${NC} %s " "$msg"
    while kill -0 "$pid" 2>/dev/null; do
        i=$(( (i + 1) % 10 ))
        printf "\r${BLUE}[%s]${NC} %s " "$(echo $spinstr | cut -c$((i+1)))" "$msg"
        sleep 0.1
    done
    wait "$pid"
    return $?
}

# Run command with spinner
run_with_spinner() {
    local msg="$1"
    shift
    if [ "${KLYRO_DEBUG:-}" = "true" ]; then
        printf "${BLUE}[...]${NC} %s\n" "$msg"
        "$@"
        return $?
    else
        "$@" >/dev/null 2>&1 &
        local pid=$!
        spin $pid "$msg"
        local status=$?
        if [ $status -eq 0 ]; then
            printf "\r${GREEN}[OK]${NC} %s    \n" "$msg"
        else
            printf "\r${RED}[FAIL]${NC} %s    \n" "$msg"
        fi
        return $status
    fi
}

show_banner() {
    echo ""
    printf "${BLUE}╔═══════════════════════════════════════════════════════════════╗${NC}\n"
    printf "${BLUE}║${NC}   ${GREEN}Klyro Edge Agent Installer${NC}                                ${BLUE}║${NC}\n"
    printf "${BLUE}║${NC}   AI-powered identity provisioning for Kubernetes            ${BLUE}║${NC}\n"
    printf "${BLUE}╚═══════════════════════════════════════════════════════════════╝${NC}\n"
    echo ""
}

usage() {
    cat <<EOF
Klyro Edge Installer

Usage:
  Production:  curl -sfL https://get.klyro.security | sh -s -- <INSTALL_TOKEN>
  Staging:     curl -sfL https://get.klyro.security | sh -s -- --env staging <INSTALL_TOKEN>
  Dev:         curl -sfL https://get.klyro.security | sh -s -- --env dev <INSTALL_TOKEN>
  Reinstall:   curl -sfL https://get.klyro.security | sh -s -- --reinstall
  Local:       ./install.sh <TOKEN> --local
  Local+Cloud: ./install.sh <TOKEN> --local --grpc-endpoint grpc.klyro.security:443

Arguments:
  INSTALL_TOKEN       Your one-time install token from the Klyro dashboard
  --env <env>         Target environment: dev, staging, or production (default)
  --local             Enable full local mode with mock cloud, Temporal, etc.
  --with-temporal     Deploy Temporal alongside the agent (for E2E testing)
  --grpc-endpoint     Custom gRPC endpoint (skips mock cloud setup)
  --api-endpoint      Custom API endpoint (optional, used with --grpc-endpoint)

Local Mode (--local):
  Sets up a complete local environment:
  - k3d cluster with local registry
  - Mock Cloud gRPC server for agent registration
  - Temporal for workflow orchestration
  - Agent + Worker/Operator fully connected
  - Anthropic credentials (dummy key for testing)

Local Mode with Custom Endpoints (--local --grpc-endpoint):
  Sets up local k3d cluster but connects to real/external cloud:
  - k3d cluster with local registry
  - Temporal for workflow orchestration
  - Agent connects to specified gRPC endpoint (no mock cloud)
  - Skips mock cloud connectivity tests

Production Mode:
  Requires the following environment variables:
  - ANTHROPIC_API_KEY (mandatory) - Anthropic API key for the AI worker

Get your install token:
  1. Log in to https://app.klyro.security
  2. Navigate to Agents → Add Agent
  3. Copy the install command

Environment Variables:
  ANTHROPIC_API_KEY (mandatory for prod) Anthropic API key for AI-powered provisioning
  KLYRO_API_URL    Override API endpoint (default: https://api.klyro.security)
  KLYRO_ENV        Same as --env flag (flag takes precedence)
  KLYRO_DEBUG      Enable debug output (set to 'true')

Examples:
  # Full local mode with mock cloud
  ./install.sh inst_test --local

  # Local k3d cluster connecting to real cloud
  ./install.sh inst_abc123 --local --grpc-endpoint grpc.klyro.security:443

  # Production installation
  ANTHROPIC_API_KEY=sk-ant-... ./install.sh inst_abc123

  # E2E testing with Temporal (deploys Temporal alongside agent)
  ANTHROPIC_API_KEY=sk-ant-... ./install.sh inst_abc123 --with-temporal

EOF
    exit 0
}

# =============================================================================
# LOCAL MODE FUNCTIONS
# =============================================================================

check_local_prerequisites() {
    log_local "Checking local mode prerequisites..."

    local MISSING=0

    check_command "docker" "brew install --cask docker" || MISSING=1
    check_command "k3d" "brew install k3d" || MISSING=1
    check_command "go" "brew install go" || MISSING=1

    if [ "$MISSING" -eq 1 ]; then
        echo ""
        log_error "Missing local mode tools. Please install them and try again."
        exit 1
    fi

    if ! docker info >/dev/null 2>&1; then
        log_error "Docker daemon is not running. Start Docker Desktop."
        exit 1
    fi

    log_success "Local mode prerequisites verified"
}

create_k3d_cluster() {
    local cluster_name="klyro-local"

    log_local "Setting up k3d cluster..."

    if k3d cluster list 2>/dev/null | grep -q "$cluster_name"; then
        log_info "Cluster '$cluster_name' already exists"
        k3d kubeconfig merge "$cluster_name" --kubeconfig-switch-context >/dev/null 2>&1
        export KUBECONFIG=$(k3d kubeconfig write "$cluster_name")
        return 0
    fi

    log_info "Creating k3d cluster with local registry..."
    k3d cluster create "$cluster_name" \
        --registry-create klyro-registry:0.0.0.0:5000 \
        --port "8090:80@loadbalancer" \
        --port "8443:443@loadbalancer" \
        --port "7233:7233@loadbalancer" \
        --agents 1 \
        --wait

    export KUBECONFIG=$(k3d kubeconfig write "$cluster_name")

    log_info "Waiting for cluster to be ready..."
    kubectl wait --for=condition=Ready nodes --all --timeout=120s

    log_success "k3d cluster '$cluster_name' created"
}

build_local_images() {
    log_local "Building Docker images..."

    local registry="localhost:5000"

    if [ ! -d "${PROJECT_ROOT}/gen/go" ]; then
        log_info "Generating proto files..."
        cd "${PROJECT_ROOT}"
        make proto || {
            log_warn "Proto generation failed - checking if gen/ exists..."
            if [ ! -d "${PROJECT_ROOT}/gen/go" ]; then
                log_error "Proto files not found. Run 'make proto' first."
                exit 1
            fi
        }
    fi

    log_info "Building agent image..."
    docker build -t ${registry}/klyro-agent:local \
        -f "${PROJECT_ROOT}/services/agent/Dockerfile" \
        "${PROJECT_ROOT}" || {
        log_error "Failed to build agent image"
        exit 1
    }
    docker push ${registry}/klyro-agent:local
    log_success "Agent image built and pushed"

    log_info "Building operator/worker image..."
    docker build -t ${registry}/klyro-operator:local \
        -f "${PROJECT_ROOT}/services/operator/Dockerfile" \
        "${PROJECT_ROOT}/services/operator" || {
        log_error "Failed to build operator image"
        exit 1
    }
    docker push ${registry}/klyro-operator:local
    log_success "Operator image built and pushed"

    if [ -z "$CUSTOM_GRPC_ENDPOINT" ]; then
        log_info "Building mock-cloud image..."
        docker build -t ${registry}/mock-cloud:local \
            -f "${PROJECT_ROOT}/services/agent/cmd/mock-cloud/Dockerfile" \
            "${PROJECT_ROOT}" || {
            log_error "Failed to build mock-cloud image"
            exit 1
        }
        docker push ${registry}/mock-cloud:local
        log_success "Mock-cloud image built and pushed"
    else
        log_info "Skipping mock-cloud build (using custom gRPC endpoint)"
    fi

    log_success "All images built and pushed to local registry"
}

start_mock_cloud() {
    log_local "Starting Mock Cloud gRPC server..."

    if kubectl get deployment mock-cloud -n "$NAMESPACE" >/dev/null 2>&1; then
        log_info "Mock Cloud already deployed"
        return 0
    fi

    kubectl apply -f - <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
  name: mock-cloud
  namespace: $NAMESPACE
spec:
  replicas: 1
  selector:
    matchLabels:
      app: mock-cloud
  template:
    metadata:
      labels:
        app: mock-cloud
    spec:
      containers:
      - name: mock-cloud
        image: klyro-registry:5000/mock-cloud:local
        imagePullPolicy: Always
        ports:
        - containerPort: 50051
          name: grpc
        - containerPort: 50052
          name: http
        resources:
          requests:
            cpu: 50m
            memory: 64Mi
          limits:
            cpu: 200m
            memory: 128Mi
---
apiVersion: v1
kind: Service
metadata:
  name: mock-cloud
  namespace: $NAMESPACE
spec:
  selector:
    app: mock-cloud
  ports:
  - name: grpc
    port: 50051
    targetPort: 50051
  - name: http
    port: 50052
    targetPort: 50052
EOF

    log_info "Waiting for Mock Cloud to be ready..."
    kubectl wait --for=condition=Available deployment/mock-cloud -n "$NAMESPACE" --timeout=120s || {
        log_warn "Mock Cloud not ready yet, continuing..."
    }

    log_success "Mock Cloud deployed"
}

start_temporal() {
    log_local "Starting Temporal..."

    if kubectl get pods -n "$NAMESPACE" -l app=temporal 2>/dev/null | grep -q "Running"; then
        log_info "Temporal already running"
        return 0
    fi

    log_info "Deploying Temporal with PostgreSQL..."

    # Generate random password for Temporal PostgreSQL (or reuse existing secret)
    local TEMPORAL_PG_PASSWORD
    TEMPORAL_PG_PASSWORD=$(kubectl get secret temporal-postgresql-credentials -n "$NAMESPACE" \
        -o jsonpath='{.data.password}' 2>/dev/null | base64 -d 2>/dev/null)
    if [ -z "$TEMPORAL_PG_PASSWORD" ]; then
        TEMPORAL_PG_PASSWORD=$(openssl rand -hex 16 2>/dev/null || head -c 32 /dev/urandom | od -An -tx1 | tr -d ' \n')
        kubectl create secret generic temporal-postgresql-credentials \
            --from-literal=username=temporal \
            --from-literal=password="$TEMPORAL_PG_PASSWORD" \
            --from-literal=database=temporal \
            --namespace="$NAMESPACE" \
            --dry-run=client -o yaml | kubectl apply -f - >/dev/null
        printf "    ${GREEN}[OK]${NC} Secret: temporal-postgresql-credentials (generated)\n"
    else
        printf "    ${GREEN}[OK]${NC} Secret: temporal-postgresql-credentials (existing)\n"
    fi

    kubectl apply -f - <<EOF
---
apiVersion: v1
kind: Service
metadata:
  name: temporal-postgresql
  namespace: $NAMESPACE
spec:
  selector:
    app: temporal-postgresql
  ports:
  - port: 5432
    targetPort: 5432
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: temporal-postgresql-data
  namespace: $NAMESPACE
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: temporal-postgresql
  namespace: $NAMESPACE
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: temporal-postgresql
  template:
    metadata:
      labels:
        app: temporal-postgresql
    spec:
      containers:
      - name: postgresql
        image: postgres:15-alpine
        ports:
        - containerPort: 5432
        env:
        - name: POSTGRES_USER
          valueFrom:
            secretKeyRef:
              name: temporal-postgresql-credentials
              key: username
        - name: POSTGRES_PASSWORD
          valueFrom:
            secretKeyRef:
              name: temporal-postgresql-credentials
              key: password
        - name: POSTGRES_DB
          valueFrom:
            secretKeyRef:
              name: temporal-postgresql-credentials
              key: database
        - name: PGDATA
          value: /var/lib/postgresql/data/pgdata
        volumeMounts:
        - name: data
          mountPath: /var/lib/postgresql/data
        resources:
          requests:
            cpu: 100m
            memory: 256Mi
          limits:
            cpu: 500m
            memory: 512Mi
        readinessProbe:
          exec:
            command: ["pg_isready", "-U", "temporal"]
          initialDelaySeconds: 5
          periodSeconds: 10
        livenessProbe:
          exec:
            command: ["pg_isready", "-U", "temporal"]
          initialDelaySeconds: 15
          periodSeconds: 20
      volumes:
      - name: data
        persistentVolumeClaim:
          claimName: temporal-postgresql-data
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: temporal
  namespace: $NAMESPACE
spec:
  replicas: 1
  selector:
    matchLabels:
      app: temporal
  template:
    metadata:
      labels:
        app: temporal
    spec:
      containers:
      - name: temporal
        image: temporalio/auto-setup:1.24.2
        ports:
        - containerPort: 7233
          name: grpc
        env:
        - name: DB
          value: postgres12
        - name: DB_PORT
          value: "5432"
        - name: POSTGRES_USER
          valueFrom:
            secretKeyRef:
              name: temporal-postgresql-credentials
              key: username
        - name: POSTGRES_PWD
          valueFrom:
            secretKeyRef:
              name: temporal-postgresql-credentials
              key: password
        - name: POSTGRES_SEEDS
          value: temporal-postgresql
        resources:
          requests:
            cpu: 100m
            memory: 256Mi
          limits:
            cpu: 500m
            memory: 512Mi
---
apiVersion: v1
kind: Service
metadata:
  name: temporal-frontend
  namespace: $NAMESPACE
spec:
  selector:
    app: temporal
  ports:
  - name: grpc
    port: 7233
    targetPort: 7233
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: temporal-ui
  namespace: $NAMESPACE
spec:
  replicas: 1
  selector:
    matchLabels:
      app: temporal-ui
  template:
    metadata:
      labels:
        app: temporal-ui
    spec:
      containers:
      - name: temporal-ui
        image: temporalio/ui:2.26.2
        ports:
        - containerPort: 8080
        env:
        - name: TEMPORAL_ADDRESS
          value: temporal-frontend:7233
        resources:
          requests:
            cpu: 50m
            memory: 64Mi
          limits:
            cpu: 200m
            memory: 256Mi
---
apiVersion: v1
kind: Service
metadata:
  name: temporal-web
  namespace: $NAMESPACE
spec:
  selector:
    app: temporal-ui
  ports:
  - port: 8088
    targetPort: 8080
EOF

    log_info "Waiting for PostgreSQL..."
    kubectl wait --for=condition=Available deployment/temporal-postgresql \
        -n "$NAMESPACE" --timeout=120s || {
        log_warn "PostgreSQL deployment not ready yet"
    }

    log_info "Waiting for Temporal (this may take a minute)..."
    sleep 10  # Give PostgreSQL time to initialize

    kubectl wait --for=condition=Available deployment/temporal \
        -n "$NAMESPACE" --timeout=300s || {
        log_warn "Temporal not ready yet, continuing..."
        kubectl get pods -n "$NAMESPACE" -l app=temporal
    }

    log_success "Temporal deployed"
}

fetch_credentials_local() {
    if [ -n "$CUSTOM_API_ENDPOINT" ]; then
        log_local "Fetching credentials from platform API..."

        local host_api_endpoint="${CUSTOM_API_ENDPOINT//host.docker.internal/localhost}"
        local api_url="${host_api_endpoint}/api/v1/agents/install?token=${INSTALL_TOKEN}"
        log_info "Calling: $api_url"

        local RESPONSE_FILE=$(mktemp)
        local HTTP_CODE

        HTTP_CODE=$(curl -s -w "%{http_code}" -o "$RESPONSE_FILE" "$api_url" 2>/dev/null) || HTTP_CODE="000"

        case "$HTTP_CODE" in
            200)
                log_success "Credentials retrieved from platform"
                ;;
            401|403)
                log_error "Invalid or expired install token"
                rm -f "$RESPONSE_FILE"
                exit 1
                ;;
            404)
                log_error "Install endpoint not found at: $api_url"
                rm -f "$RESPONSE_FILE"
                exit 1
                ;;
            409)
                log_error "Install token has already been used"
                rm -f "$RESPONSE_FILE"
                exit 1
                ;;
            000)
                log_error "Cannot connect to platform API at: $CUSTOM_API_ENDPOINT"
                rm -f "$RESPONSE_FILE"
                exit 1
                ;;
            *)
                log_error "API error (HTTP $HTTP_CODE)"
                cat "$RESPONSE_FILE" 2>/dev/null || true
                rm -f "$RESPONSE_FILE"
                exit 1
                ;;
        esac

        local RESPONSE=$(cat "$RESPONSE_FILE")
        rm -f "$RESPONSE_FILE"

        AGENT_ID=$(json_value "$RESPONSE" "agent_id")
        TENANT_ID=$(json_value "$RESPONSE" "tenant_id")
        SITE_ID=$(json_value "$RESPONSE" "site_id")
        CHART_VERSION=$(json_value "$RESPONSE" "chart_version")
        OAUTH2_CLIENT_ID=$(json_value "$RESPONSE" "oauth2_client_id")
        OAUTH2_CLIENT_SECRET=$(json_value "$RESPONSE" "oauth2_client_secret")
        GHCR_TOKEN=$(json_value "$RESPONSE" "ghcr_token")
        DATADOG_API_KEY=$(json_value "$RESPONSE" "datadog_api_key")
        ACCESS_TOKEN="$OAUTH2_CLIENT_SECRET"

        # Validate required fields
        if [ -z "$AGENT_ID" ] || [ -z "$OAUTH2_CLIENT_ID" ] || [ -z "$OAUTH2_CLIENT_SECRET" ]; then
            log_error "Incomplete response from API - missing required credentials"
            log_info "Response: $RESPONSE"
            exit 1
        fi

        echo ""
        echo "    ┌─────────────────────────────────────────────────────┐"
        echo "    │  Agent ID:      $AGENT_ID"
        echo "    │  Tenant ID:     $TENANT_ID"
        echo "    │  Site ID:       $SITE_ID"
        echo "    │  OAuth Client:  $OAUTH2_CLIENT_ID"
        echo "    │  Mode:          LOCAL + Custom Endpoints"
        echo "    └─────────────────────────────────────────────────────┘"
        return 0
    fi

    log_local "Using local mock credentials..."

    if [[ ! "$INSTALL_TOKEN" =~ ^inst_ ]]; then
        INSTALL_TOKEN="inst_${INSTALL_TOKEN}"
        log_info "Prefixed token with 'inst_' for mock cloud compatibility"
    fi

    AGENT_ID="klyro_local_test"
    ACCESS_TOKEN="kat_local_test_token_${RANDOM}"
    CHART_VERSION="local"
    ECR_TOKEN=""
    DATADOG_API_KEY=""

    OAUTH2_CLIENT_ID="klyro-edge-local-test"
    OAUTH2_CLIENT_SECRET="local_test_secret_${RANDOM}"

    echo ""
    echo "    ┌─────────────────────────────────────────────────────┐"
    echo "    │  Install Token: $INSTALL_TOKEN"
    echo "    │  Agent ID:      $AGENT_ID"
    echo "    │  Mode:          LOCAL (mock cloud)"
    echo "    │  gRPC:          mock-cloud:50051"
    echo "    │  Temporal:      temporal-frontend:7233"
    echo "    └─────────────────────────────────────────────────────┘"
}

create_local_secrets() {
    log_local "Creating local test secrets..."

    # Create dummy OpenAI credentials (required by worker)
    kubectl create secret generic openai-credentials \
        --from-literal=api-key="sk-local-test-key-not-real" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null

    printf "    ${GREEN}[OK]${NC} Secret: openai-credentials (dummy for local testing)\n"
}

install_helm_chart_local() {
    log_local "Installing Helm chart in local mode..."

    local CHART_PATH="${PROJECT_ROOT}/charts/klyro-edge"

    if [ ! -d "$CHART_PATH" ]; then
        log_error "Chart not found at: $CHART_PATH"
        exit 1
    fi

    # Determine gRPC endpoint and TLS settings
    local grpc_endpoint="mock-cloud:50051"
    local grpc_insecure="true"

    if [ -n "$CUSTOM_GRPC_ENDPOINT" ]; then
        grpc_endpoint="$CUSTOM_GRPC_ENDPOINT"
        # If endpoint contains standard TLS ports or https, use secure connection
        if [[ "$grpc_endpoint" == *":443"* ]] || [[ "$grpc_endpoint" == "grpc."* ]]; then
            grpc_insecure="false"
        fi
        log_info "Using custom gRPC endpoint: $grpc_endpoint (insecure=$grpc_insecure)"
    fi

    # Determine API endpoint
    local api_endpoint="https://api.klyro.security"
    if [ -n "$CUSTOM_API_ENDPOINT" ]; then
        api_endpoint="$CUSTOM_API_ENDPOINT"
        log_info "Using custom API endpoint: $api_endpoint"
    fi

    HELM_ARGS="--namespace $NAMESPACE"
    if [ -n "$GHCR_TOKEN" ]; then
        log_info "Using GHCR registry: ${GHCR_REGISTRY}"
        HELM_ARGS="$HELM_ARGS --set global.imagePullSecrets[0].name=klyro-registry-secret"
        # Agent configuration using GHCR
        HELM_ARGS="$HELM_ARGS --set agent.image.repository=${GHCR_REGISTRY}/klyro-agent"
        HELM_ARGS="$HELM_ARGS --set agent.image.tag=${CHART_VERSION:-latest}"
        # Worker configuration using GHCR
        HELM_ARGS="$HELM_ARGS --set worker.image.repository=${GHCR_REGISTRY}/klyro-operator"
        HELM_ARGS="$HELM_ARGS --set worker.image.tag=${CHART_VERSION:-latest}"
    else
        log_info "Using local k3d registry (no GHCR token)"
        HELM_ARGS="$HELM_ARGS --set global.imagePullSecrets=null"
        # Agent configuration - use full image path to local registry
        HELM_ARGS="$HELM_ARGS --set agent.image.repository=klyro-registry:5000/klyro-agent"
        HELM_ARGS="$HELM_ARGS --set agent.image.tag=local"
        # Worker configuration - use full image path to local registry
        HELM_ARGS="$HELM_ARGS --set worker.image.repository=klyro-registry:5000/klyro-operator"
        HELM_ARGS="$HELM_ARGS --set worker.image.tag=local"
    fi

    HELM_ARGS="$HELM_ARGS --set agent.image.pullPolicy=Always"
    HELM_ARGS="$HELM_ARGS --set agent.persistence.enabled=true"
    HELM_ARGS="$HELM_ARGS --set agent.persistence.storageClass=local-path"

    HELM_ARGS="$HELM_ARGS --set klyroCloud.endpoint=$api_endpoint"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.grpc.endpoint=$grpc_endpoint"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.grpc.insecure=$grpc_insecure"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.gatewayHttpUrl=$api_endpoint"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.apiKeySecretName=klyro-cloud-credentials"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.apiKeySecretKey=access-token"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.agentIdSecretKey=agent-id"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.heartbeat.enabled=true"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.heartbeat.intervalSeconds=10"

    HELM_ARGS="$HELM_ARGS --set worker.enabled=true"
    HELM_ARGS="$HELM_ARGS --set worker.image.pullPolicy=Always"
    HELM_ARGS="$HELM_ARGS --set worker.temporal.hostPort=temporal-frontend:7233"
    HELM_ARGS="$HELM_ARGS --set worker.temporal.namespace=default"
    HELM_ARGS="$HELM_ARGS --set worker.temporal.taskQueue=klyro-tasks"

    HELM_ARGS="$HELM_ARGS --set temporal.enabled=false"
    HELM_ARGS="$HELM_ARGS --set temporal.mysql.enabled=false"
    HELM_ARGS="$HELM_ARGS --set temporal-mysql.enabled=false"

    HELM_ARGS="$HELM_ARGS --set opentelemetry.enabled=false"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.clusterName=$AGENT_ID"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.collector.enabled=false"

    HELM_ARGS="$HELM_ARGS --set ecr.enabled=false"
    HELM_ARGS="$HELM_ARGS --set imagePullSecrets.create=false"

    HELM_ARGS="$HELM_ARGS --wait --timeout 15m"

    log_info "Running Helm install..."
    helm upgrade --install "$RELEASE_NAME" "$CHART_PATH" $HELM_ARGS || {
        log_error "Helm installation failed"
        echo ""
        echo "Debug commands:"
        echo "  kubectl get pods -n $NAMESPACE"
        echo "  kubectl describe pods -n $NAMESPACE"
        exit 1
    }

    log_success "Helm chart installed"
}

verify_local_installation() {
    log_local "Verifying local installation..."

    echo ""

    printf "    Checking Mock Cloud... "
    if kubectl get pods -n "$NAMESPACE" -l app=mock-cloud 2>/dev/null | grep -q "Running"; then
        echo -e "${GREEN}OK${NC}"
    else
        echo -e "${YELLOW}PENDING${NC}"
    fi

    printf "    Checking Temporal... "
    if kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=frontend 2>/dev/null | grep -q "Running"; then
        echo -e "${GREEN}OK${NC}"
    else
        echo -e "${YELLOW}PENDING${NC}"
    fi

    printf "    Checking Agent... "
    if kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=agent 2>/dev/null | grep -q "Running"; then
        echo -e "${GREEN}OK${NC}"
    else
        echo -e "${YELLOW}PENDING${NC}"
    fi

    printf "    Checking Worker... "
    if kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/component=worker 2>/dev/null | grep -q "Running"; then
        echo -e "${GREEN}OK${NC}"
    else
        echo -e "${YELLOW}PENDING${NC}"
    fi

    echo ""

    log_info "Waiting for agent to be ready..."
    kubectl rollout status deployment/${RELEASE_NAME}-agent \
        -n "$NAMESPACE" --timeout=120s 2>/dev/null || {
        log_warn "Agent deployment taking longer than expected"
    }

    sleep 3
    local POD_NAME=$(kubectl get pods -n "$NAMESPACE" \
        -l app.kubernetes.io/component=agent \
        -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)

    if [ -n "$POD_NAME" ]; then
        log_info "Testing agent health..."
        local HEALTH=$(kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \
            wget -qO- http://localhost:8080/health 2>/dev/null || echo "")

        if echo "$HEALTH" | grep -qi "ok"; then
            printf "    ${GREEN}[OK]${NC} Agent health check passed\n"
        else
            printf "    ${YELLOW}[WARN]${NC} Agent health check inconclusive\n"
        fi

        log_info "Checking agent connectivity..."
        local LOGS=$(kubectl logs -n "$NAMESPACE" "$POD_NAME" --tail=50 2>/dev/null)

        if echo "$LOGS" | grep -qi "connected.*grpc\|heartbeat.*started\|control stream"; then
            printf "    ${GREEN}[OK]${NC} Agent connected to gRPC cloud\n"
        else
            printf "    ${YELLOW}[INFO]${NC} Agent gRPC connection pending\n"
        fi

        if echo "$LOGS" | grep -qi "temporal\|connector.*initialized"; then
            printf "    ${GREEN}[OK]${NC} Agent connected to Temporal\n"
        else
            printf "    ${YELLOW}[INFO]${NC} Agent Temporal connection pending\n"
        fi
    fi

    echo ""
    log_info "Pod status:"
    kubectl get pods -n "$NAMESPACE" -o wide 2>/dev/null | sed 's/^/    /'
}

run_connectivity_tests() {
    log_local "Running connectivity tests..."

    echo ""

    local AGENT_POD=$(kubectl get pods -n "$NAMESPACE" \
        -l app.kubernetes.io/component=agent \
        -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)

    if [ -z "$AGENT_POD" ]; then
        log_warn "Agent pod not found, skipping tests"
        return 1
    fi

    # Test 1: Agent Health
    printf "    Test 1: Agent Health... "
    if kubectl exec -n "$NAMESPACE" "$AGENT_POD" -- wget -qO- http://localhost:8080/health 2>/dev/null | grep -qi "ok"; then
        echo -e "${GREEN}PASS${NC}"
    else
        echo -e "${RED}FAIL${NC}"
    fi

    # Test 2: Agent Status API
    printf "    Test 2: Agent Status API... "
    if kubectl exec -n "$NAMESPACE" "$AGENT_POD" -- wget -qO- http://localhost:8080/api/v1/status 2>/dev/null | grep -qi "agent"; then
        echo -e "${GREEN}PASS${NC}"
    else
        echo -e "${RED}FAIL${NC}"
    fi

    # Test 3: SQLite Database
    printf "    Test 3: SQLite Database... "
    if kubectl exec -n "$NAMESPACE" "$AGENT_POD" -- ls /data/klyro.db 2>/dev/null; then
        echo -e "${GREEN}PASS${NC}"
    else
        echo -e "${YELLOW}PENDING${NC} (DB will be created on first write)"
    fi

    # Test 4: Temporal Frontend Reachability
    printf "    Test 4: Temporal Frontend... "
    if kubectl exec -n "$NAMESPACE" "$AGENT_POD" -- \
        wget -qO- --timeout=5 http://temporal-frontend:7233 2>&1 | grep -qi "error\|refused"; then
        echo -e "${YELLOW}PENDING${NC}"
    else
        echo -e "${GREEN}REACHABLE${NC}"
    fi

    # Test 5: Mock Cloud gRPC Reachability
    printf "    Test 5: Mock Cloud gRPC... "
    if kubectl exec -n "$NAMESPACE" "$AGENT_POD" -- \
        wget -qO- --timeout=5 http://mock-cloud:50052/agents 2>/dev/null; then
        echo -e "${GREEN}REACHABLE${NC}"
    else
        echo -e "${YELLOW}PENDING${NC}"
    fi

    echo ""
    log_success "Connectivity tests completed"
}

# =============================================================================
# Prerequisites Check
# =============================================================================

check_command() {
    if ! command -v "$1" >/dev/null 2>&1; then
        log_error "$1 not found"
        echo "  Install: $2"
        return 1
    fi
    return 0
}

check_prerequisites() {
    log_info "Checking prerequisites..."

    MISSING=0

    check_command "curl" "https://curl.se/download.html" || MISSING=1
    check_command "kubectl" "https://kubernetes.io/docs/tasks/tools/" || MISSING=1
    check_command "helm" "https://helm.sh/docs/intro/install/" || MISSING=1

    if [ "$MISSING" -eq 1 ]; then
        echo ""
        log_error "Missing required tools. Please install them and try again."
        exit 1
    fi
    log_success "Required tools installed"

    log_info "Checking Kubernetes connectivity..."
    if ! kubectl cluster-info >/dev/null 2>&1; then
        log_error "Cannot connect to Kubernetes cluster"
        echo ""
        echo "Please ensure:"
        echo "  - You have a valid kubeconfig (~/.kube/config)"
        echo "  - Your cluster is running and accessible"
        echo "  - Run: kubectl cluster-info"
        exit 1
    fi
    log_success "Kubernetes cluster accessible"

    log_info "Checking RBAC permissions..."
    if ! kubectl auth can-i create namespace >/dev/null 2>&1; then
        log_error "Insufficient permissions to create namespace"
        echo ""
        echo "The installer needs cluster-admin or equivalent permissions."
        exit 1
    fi

    if ! kubectl auth can-i create secret --namespace="$NAMESPACE" >/dev/null 2>&1; then
        log_error "Insufficient permissions to create secrets"
        exit 1
    fi
    log_success "RBAC permissions verified"

    log_info "Checking Helm version..."
    HELM_VERSION=$(helm version --short 2>/dev/null | grep -oE 'v[0-9]+\.[0-9]+' | head -1)
    HELM_MAJOR=$(echo "$HELM_VERSION" | cut -d. -f1 | tr -d 'v')
    if [ "$HELM_MAJOR" -lt 3 ]; then
        log_error "Helm 3.x required (found: $HELM_VERSION)"
        exit 1
    fi
    log_success "Helm version: $HELM_VERSION"
}

# =============================================================================
# Token Validation & Credential Fetch
# =============================================================================

GHCR_REGISTRY="ghcr.io/klyrohq"
LOCAL_REGISTRY="localhost:5000"
K3D_REGISTRY="klyro-registry:5000"
LOCAL_IMAGE="localhost:5000/klyro-agent:dev"
GRPC_ENDPOINT="grpc.klyro.security:443"
json_value() {
    echo "$1" | grep -o "\"$2\"[[:space:]]*:[[:space:]]*\"[^\"]*\"" | sed "s/\"$2\"[[:space:]]*:[[:space:]]*\"//" | sed 's/"$//'
}

fetch_credentials() {
    INSTALL_TOKEN="$1"

    RESPONSE_FILE=$(mktemp)
    trap "rm -f $RESPONSE_FILE" EXIT

    HTTP_CODE=$(curl -sfL -w "%{http_code}" \
        -o "$RESPONSE_FILE" \
        "${KLYRO_API_URL}/api/v1/agents/install?token=${INSTALL_TOKEN}" \
        -H "User-Agent: klyro-installer/1.0" \
        2>/dev/null) || HTTP_CODE="000"

    case "$HTTP_CODE" in
        200|201)
            log_success "Install token validated"
            ;;
        400)
            log_error "Invalid install token format"
            echo "Please copy the complete token from: https://app.klyro.security/agents/new"
            exit 1
            ;;
        401|403)
            log_error "Install token expired or invalid"
            echo "Generate a new token from: https://app.klyro.security/agents/new"
            exit 1
            ;;
        404)
            log_error "Install token not found"
            exit 1
            ;;
        409)
            log_error "Install token has already been used"
            echo "Generate a new token from: https://app.klyro.security/agents/new"
            exit 1
            ;;
        429)
            log_error "Rate limited - please wait and try again"
            exit 1
            ;;
        000)
            log_error "Cannot connect to Klyro Cloud API"
            echo "Endpoint: ${KLYRO_API_URL}"
            exit 1
            ;;
        *)
            log_error "API error (HTTP $HTTP_CODE)"
            [ "${KLYRO_DEBUG:-}" = "true" ] && cat "$RESPONSE_FILE"
            exit 1
            ;;
    esac

    RESPONSE=$(cat "$RESPONSE_FILE")

    AGENT_ID=$(json_value "$RESPONSE" "agent_id")
    TENANT_ID=$(json_value "$RESPONSE" "tenant_id")
    SITE_ID=$(json_value "$RESPONSE" "site_id")
    CHART_VERSION=$(json_value "$RESPONSE" "chart_version")
    OAUTH2_CLIENT_ID=$(json_value "$RESPONSE" "oauth2_client_id")
    OAUTH2_CLIENT_SECRET=$(json_value "$RESPONSE" "oauth2_client_secret")
    GRPC_ENDPOINT=$(json_value "$RESPONSE" "grpc_endpoint")
    AUTH_ENDPOINT=$(json_value "$RESPONSE" "auth_endpoint")

    GHCR_TOKEN=$(json_value "$RESPONSE" "ghcr_token")
    DATADOG_API_KEY=$(json_value "$RESPONSE" "datadog_api_key")

    # Operator keys — server-side default, user env var overrides if set
    local api_anthropic_key=$(json_value "$RESPONSE" "anthropic_api_key")
    if [ -n "$ANTHROPIC_API_KEY" ] && [ -n "$api_anthropic_key" ] && [ "$ANTHROPIC_API_KEY" != "$api_anthropic_key" ]; then
        log_info "Using user-provided ANTHROPIC_API_KEY (overrides default)"
    fi
    ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-$api_anthropic_key}"
    BRAINTRUST_API_KEY="${BRAINTRUST_API_KEY:-$(json_value "$RESPONSE" "braintrust_api_key")}"
    OPENAI_API_KEY="${OPENAI_API_KEY:-$(json_value "$RESPONSE" "openai_api_key")}"

    ACCESS_TOKEN="$OAUTH2_CLIENT_SECRET"
    if [ -z "$AGENT_ID" ] || [ -z "$OAUTH2_CLIENT_ID" ] || [ -z "$OAUTH2_CLIENT_SECRET" ]; then
        log_error "Incomplete response from API - missing required credentials"
        [ "${KLYRO_DEBUG:-}" = "true" ] && echo "Response: $RESPONSE"
        exit 1
    fi

    echo ""
    echo "    ┌─────────────────────────────────────────────────────┐"
    echo "    │  Agent ID:      $AGENT_ID"
    echo "    │  Tenant ID:     $TENANT_ID"
    echo "    │  Site ID:       $SITE_ID"
    echo "    │  Chart Version: $CHART_VERSION"
    echo "    │  Telemetry:     $([ -n "$DATADOG_API_KEY" ] && echo "Datadog enabled" || echo "Disabled")"
    echo "    └─────────────────────────────────────────────────────┘"
}

# =============================================================================
# Cleanup Existing Installation
# =============================================================================

cleanup_existing_installation() {
    if helm status "$RELEASE_NAME" -n "$NAMESPACE" >/dev/null 2>&1; then
        log_warn "Found existing Helm release: $RELEASE_NAME"

        helm uninstall "$RELEASE_NAME" -n "$NAMESPACE" --wait >/dev/null 2>&1 &
        UNINSTALL_PID=$!

        local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
        local i=0
        printf "    ${BLUE}[%s]${NC} Uninstalling existing release..." "$(echo $spinstr | cut -c1)"
        while kill -0 "$UNINSTALL_PID" 2>/dev/null; do
            i=$(( (i + 1) % 10 ))
            printf "\r    ${BLUE}[%s]${NC} Uninstalling existing release..." "$(echo $spinstr | cut -c$((i+1)))"
            sleep 0.1
        done
        wait "$UNINSTALL_PID"
        UNINSTALL_STATUS=$?

        if [ $UNINSTALL_STATUS -eq 0 ]; then
            printf "\r    ${GREEN}[OK]${NC} Existing Helm release uninstalled    \n"
        else
            printf "\r    ${RED}[FAIL]${NC} Failed to uninstall existing release\n"
            exit 1
        fi
    else
        printf "    ${GREEN}[OK]${NC} No existing Helm release found\n"
    fi

    if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then
        log_info "Cleaning up namespace resources..."

        kubectl delete secret klyro-cloud-credentials -n "$NAMESPACE" --ignore-not-found >/dev/null 2>&1
        kubectl delete secret klyro-registry-secret -n "$NAMESPACE" --ignore-not-found >/dev/null 2>&1
        kubectl delete secret klyro-edge-oauth-credentials -n "$NAMESPACE" --ignore-not-found >/dev/null 2>&1
        kubectl delete secret anthropic-credentials -n "$NAMESPACE" --ignore-not-found >/dev/null 2>&1
        kubectl delete secret datadog-secrets -n "$NAMESPACE" --ignore-not-found >/dev/null 2>&1
        printf "    ${GREEN}[OK]${NC} Secrets cleaned up\n"

        kubectl delete pods -n "$NAMESPACE" --all --ignore-not-found >/dev/null 2>&1

        kubectl wait --for=delete pod --all -n "$NAMESPACE" --timeout=60s >/dev/null 2>&1 &
        WAIT_PID=$!

        local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
        local i=0
        printf "    ${BLUE}[%s]${NC} Waiting for pods to terminate..." "$(echo $spinstr | cut -c1)"
        while kill -0 "$WAIT_PID" 2>/dev/null; do
            i=$(( (i + 1) % 10 ))
            printf "\r    ${BLUE}[%s]${NC} Waiting for pods to terminate..." "$(echo $spinstr | cut -c$((i+1)))"
            sleep 0.1
        done
        wait "$WAIT_PID" || true
        printf "\r    ${GREEN}[OK]${NC} Pods terminated                  \n"
    else
        printf "    ${GREEN}[OK]${NC} No existing namespace found\n"
    fi
}

# =============================================================================
# Kubernetes Resources Creation
# =============================================================================

create_namespace() {
    kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - >/dev/null
    printf "    ${GREEN}[OK]${NC} Namespace: $NAMESPACE\n"
}

create_registry_secret() {
    if [ -z "$GHCR_TOKEN" ]; then
        log_warn "No GHCR token provided - skipping registry secret"
        printf "    ${YELLOW}[SKIP]${NC} Registry secret (no ghcr_token in API response)\n"
        return 0
    fi

    kubectl create secret docker-registry klyro-registry-secret \
        --docker-server="ghcr.io" \
        --docker-username="klyrohq" \
        --docker-password="$GHCR_TOKEN" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null

    printf "    ${GREEN}[OK]${NC} Secret: klyro-registry-secret (GHCR)\n"

    kubectl create secret generic ghcr-credentials \
        --from-literal=username="klyrohq" \
        --from-literal=token="$GHCR_TOKEN" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null

    printf "    ${GREEN}[OK]${NC} Secret: ghcr-credentials (OTA chart pulls)\n"
}


create_cloud_credentials_secret() {
    kubectl create secret generic klyro-cloud-credentials \
        --from-literal=agent-id="$AGENT_ID" \
        --from-literal=access-token="$ACCESS_TOKEN" \
        --from-literal=install-token="$INSTALL_TOKEN" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null

    printf "    ${GREEN}[OK]${NC} Secret: klyro-cloud-credentials\n"
}

create_oauth_credentials_secret() {
    kubectl create secret generic klyro-edge-oauth-credentials \
        --from-literal=client-id="$OAUTH2_CLIENT_ID" \
        --from-literal=client-secret="$OAUTH2_CLIENT_SECRET" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null

    printf "    ${GREEN}[OK]${NC} Secret: klyro-edge-oauth-credentials\n"
}

create_datadog_secret() {
    if [ -z "$DATADOG_API_KEY" ]; then
        printf "    ${BLUE}[SKIP]${NC} Datadog telemetry (no API key)\n"
        return 0
    fi

    kubectl create secret generic datadog-secrets \
        --from-literal=api-key="$DATADOG_API_KEY" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null

    printf "    ${GREEN}[OK]${NC} Secret: datadog-secrets\n"
}

create_anthropic_secret() {
    if [ -z "$ANTHROPIC_API_KEY" ]; then
        log_error "Anthropic API key not available"
        echo ""
        echo "The Anthropic API key was not returned by the Klyro Cloud API."
        echo "If this persists, contact support or provide it manually:"
        echo "  ANTHROPIC_API_KEY=sk-ant-... ./install.sh <TOKEN>"
        exit 1
    fi

    kubectl create secret generic anthropic-credentials \
        --from-literal=api-key="$ANTHROPIC_API_KEY" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null

    printf "    ${GREEN}[OK]${NC} Secret: anthropic-credentials\n"
}

create_openai_secret() {
    if [ -n "$OPENAI_API_KEY" ]; then
        kubectl create secret generic openai-credentials \
            --from-literal=api-key="$OPENAI_API_KEY" \
            --namespace="$NAMESPACE" \
            --dry-run=client -o yaml | kubectl apply -f - >/dev/null
        printf "    ${GREEN}[OK]${NC} Secret: openai-credentials\n"
    else
        kubectl create secret generic openai-credentials \
            --from-literal=api-key="not-configured" \
            --namespace="$NAMESPACE" \
            --dry-run=client -o yaml | kubectl apply -f - >/dev/null
        printf "    ${YELLOW}[INFO]${NC} Secret: openai-credentials (placeholder)\n"
    fi
}

create_braintrust_secret() {
    if [ -n "$BRAINTRUST_API_KEY" ]; then
        kubectl create secret generic braintrust-credentials \
            --from-literal=api-key="$BRAINTRUST_API_KEY" \
            --namespace="$NAMESPACE" \
            --dry-run=client -o yaml | kubectl apply -f - >/dev/null
        printf "    ${GREEN}[OK]${NC} Secret: braintrust-credentials\n"
    else
        printf "    ${BLUE}[SKIP]${NC} Braintrust (no API key provided)\n"
    fi
}

# =============================================================================
# Helm Installation
# =============================================================================

install_helm_chart() {
    SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
    LOCAL_CHART_PATH="${SCRIPT_DIR}/../charts/klyro-edge"
    OCI_CHART="oci://ghcr.io/klyrohq/klyro-security/charts/klyro-edge"

    if [ -d "$LOCAL_CHART_PATH" ]; then
        CHART_PATH="$LOCAL_CHART_PATH"
        log_info "Using local chart: $CHART_PATH"
    else
        CHART_PATH="$OCI_CHART"
        log_info "Local chart not found, using OCI registry: $CHART_PATH"

        if [ -n "$GHCR_TOKEN" ]; then
            log_info "Authenticating to GHCR..."
            echo "$GHCR_TOKEN" | helm registry login ghcr.io -u klyrohq --password-stdin >/dev/null 2>&1 || {
                log_error "Failed to authenticate to GHCR"
                return 1
            }
        fi

        if [ -n "$CHART_VERSION" ]; then
            log_info "Chart version: $CHART_VERSION"
        fi
    fi

    HELM_ARGS="--namespace $NAMESPACE"

    # Determine image tag — use chart version SHA if available, fallback to latest
    local IMAGE_TAG="latest"
    if [ -n "$CHART_VERSION" ] && [[ "$CHART_VERSION" == *"."* ]]; then
        # Chart version is set — the chart's values.yaml already has the correct SHA-pinned image tags
        # Don't override image tags; let the chart defaults handle it
        IMAGE_TAG=""
    fi

    if [ -n "$GHCR_TOKEN" ]; then
        HELM_ARGS="$HELM_ARGS --set global.imagePullSecrets[0].name=klyro-registry-secret"
        HELM_ARGS="$HELM_ARGS --set agent.image.repository=${GHCR_REGISTRY}/klyro-security/klyro-edge"
        HELM_ARGS="$HELM_ARGS --set worker.image.repository=${GHCR_REGISTRY}/klyro-security/klyro-operator"
        if [ -n "$IMAGE_TAG" ]; then
            HELM_ARGS="$HELM_ARGS --set agent.image.tag=${IMAGE_TAG}"
            HELM_ARGS="$HELM_ARGS --set worker.image.tag=${IMAGE_TAG}"
        fi
        log_info "Using GHCR registry: ${GHCR_REGISTRY}"
    else
        HELM_ARGS="$HELM_ARGS --set global.imagePullSecrets=null"
        HELM_ARGS="$HELM_ARGS --set agent.image.repository=${K3D_REGISTRY}/klyro-agent"
        HELM_ARGS="$HELM_ARGS --set agent.image.tag=dev"
        HELM_ARGS="$HELM_ARGS --set worker.image.repository=${K3D_REGISTRY}/klyro-operator"
        HELM_ARGS="$HELM_ARGS --set worker.image.tag=dev"
        log_warn "No GHCR token - using local k3d registry (dev mode)"
    fi

    HELM_ARGS="$HELM_ARGS --set agent.image.pullPolicy=Always"
    HELM_ARGS="$HELM_ARGS --set agent.persistence.enabled=true"

    local DEFAULT_SC=$(kubectl get storageclass -o jsonpath='{.items[?(@.metadata.annotations.storageclass\.kubernetes\.io/is-default-class=="true")].metadata.name}' 2>/dev/null)
    if [ -z "$DEFAULT_SC" ]; then
        DEFAULT_SC=$(kubectl get storageclass -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
    fi
    if [ -n "$DEFAULT_SC" ]; then
        HELM_ARGS="$HELM_ARGS --set agent.persistence.storageClass=$DEFAULT_SC"
        log_info "Using storage class: $DEFAULT_SC"
    fi

    HELM_ARGS="$HELM_ARGS --set klyroCloud.grpc.endpoint=$GRPC_ENDPOINT"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.apiKeySecretName=klyro-cloud-credentials"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.apiKeySecretKey=access-token"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.agentIdSecretKey=agent-id"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.heartbeat.enabled=true"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.heartbeat.intervalSeconds=30"
    HELM_ARGS="$HELM_ARGS --set klyroCloud.gatewayHttpUrl=${KLYRO_API_URL}"

    HELM_ARGS="$HELM_ARGS --set worker.enabled=true"
    HELM_ARGS="$HELM_ARGS --set worker.image.pullPolicy=Always"
    HELM_ARGS="$HELM_ARGS --set worker.mode=edge"
    # Detect environment from gRPC endpoint for Braintrust project isolation
    if [[ "$GRPC_ENDPOINT" == *"dev."* ]]; then
        HELM_ARGS="$HELM_ARGS --set worker.environment=dev"
    elif [[ "$GRPC_ENDPOINT" == *"staging."* ]]; then
        HELM_ARGS="$HELM_ARGS --set worker.environment=staging"
    else
        HELM_ARGS="$HELM_ARGS --set worker.environment=production"
    fi
    HELM_ARGS="$HELM_ARGS --set worker.anthropic.secretName=anthropic-credentials"
    HELM_ARGS="$HELM_ARGS --set worker.anthropic.secretKey=api-key"
    if [ -n "$BRAINTRUST_API_KEY" ]; then
        HELM_ARGS="$HELM_ARGS --set worker.braintrust.enabled=true"
        HELM_ARGS="$HELM_ARGS --set worker.braintrust.secretName=braintrust-credentials"
        HELM_ARGS="$HELM_ARGS --set worker.braintrust.secretKey=api-key"
    else
        HELM_ARGS="$HELM_ARGS --set worker.braintrust.enabled=false"
    fi
    HELM_ARGS="$HELM_ARGS --set temporal.enabled=false"
    HELM_ARGS="$HELM_ARGS --set worker.temporal.hostPort=temporal-frontend:7233"
    HELM_ARGS="$HELM_ARGS --set worker.temporal.namespace=default"
    HELM_ARGS="$HELM_ARGS --set worker.temporal.taskQueue=klyro-tasks"

    HELM_ARGS="$HELM_ARGS --set opentelemetry.enabled=true"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.collector.enabled=true"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.clusterName=$AGENT_ID"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.tenantId=$TENANT_ID"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.siteId=$SITE_ID"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.collector.exporters.prometheus.enabled=true"

    # Remote OTLP export to Klyro central observability (always enabled)
    # Derive telemetry ingest endpoint from gRPC endpoint environment
    local otel_ingest_host=""
    if [[ "$GRPC_ENDPOINT" == *"dev."* ]]; then
        otel_ingest_host="otel-ingest.dev.klyro.security"
    elif [[ "$GRPC_ENDPOINT" == *"staging."* ]]; then
        otel_ingest_host="otel-ingest.staging.klyro.security"
    else
        otel_ingest_host="otel-ingest.klyro.security"
    fi
    HELM_ARGS="$HELM_ARGS --set opentelemetry.collector.exporters.remoteOtlp.enabled=true"
    HELM_ARGS="$HELM_ARGS --set opentelemetry.collector.exporters.remoteOtlp.endpoint=${otel_ingest_host}:4320"
    if [ -n "$AUTH_ENDPOINT" ]; then
        HELM_ARGS="$HELM_ARGS --set opentelemetry.collector.exporters.remoteOtlp.auth.tokenUrl=${AUTH_ENDPOINT}/protocol/openid-connect/token"
    fi

    # Datadog exporter (optional, in addition to Klyro remote OTLP)
    if [ -n "$DATADOG_API_KEY" ]; then
        HELM_ARGS="$HELM_ARGS --set opentelemetry.collector.exporters.datadog.enabled=true"
    fi

    HELM_ARGS="$HELM_ARGS --set ecr.enabled=false"
    HELM_ARGS="$HELM_ARGS --set imagePullSecrets.create=false"

    HELM_ARGS="$HELM_ARGS --wait --timeout 15m"

    if [[ "$CHART_PATH" == oci://* ]] && [ -n "$CHART_VERSION" ]; then
        HELM_ARGS="$HELM_ARGS --version $CHART_VERSION"
    fi

    HELM_OUTPUT=$(mktemp)
    trap "rm -f $HELM_OUTPUT" EXIT

    log_info "Running Helm upgrade/install (this may take a few minutes)..."
    echo ""

    if [ "${KLYRO_DEBUG:-}" = "true" ]; then
        helm upgrade --install "$RELEASE_NAME" "$CHART_PATH" $HELM_ARGS 2>&1 | tee "$HELM_OUTPUT"
        HELM_STATUS=${PIPESTATUS[0]}
    else
        helm upgrade --install "$RELEASE_NAME" "$CHART_PATH" $HELM_ARGS >"$HELM_OUTPUT" 2>&1 &
        HELM_PID=$!

        # Show progress while helm runs
        local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
        local i=0
        printf "    ${BLUE}[%s]${NC} Deploying Klyro Edge..." "$(echo $spinstr | cut -c1)"
        while kill -0 "$HELM_PID" 2>/dev/null; do
            i=$(( (i + 1) % 10 ))
            printf "\r    ${BLUE}[%s]${NC} Deploying Klyro Edge..." "$(echo $spinstr | cut -c$((i+1)))"
            sleep 0.1
        done
        wait "$HELM_PID"
        HELM_STATUS=$?

        if [ $HELM_STATUS -eq 0 ]; then
            printf "\r    ${GREEN}[OK]${NC} Deploying Klyro Edge      \n"
        else
            printf "\r    ${RED}[FAIL]${NC} Deploying Klyro Edge      \n"
        fi
    fi

    if [ $HELM_STATUS -ne 0 ]; then
        log_error "Helm installation failed"
        echo ""
        echo "Helm output:"
        cat "$HELM_OUTPUT"
        echo ""
        echo "Debug commands:"
        echo "  kubectl get pods -n $NAMESPACE"
        echo "  kubectl describe pods -n $NAMESPACE"
        echo "  kubectl logs -n $NAMESPACE -l app.kubernetes.io/name=klyro-agent"
        exit 1
    fi

    log_success "Helm chart installed successfully"
}

# =============================================================================
# Verification
# =============================================================================

verify_installation() {
    echo ""

    # --- Agent deployment rollout (blocking) ---
    kubectl rollout status deployment/${RELEASE_NAME}-agent \
        -n "$NAMESPACE" --timeout=120s >/dev/null 2>&1 &
    ROLLOUT_PID=$!

    local spinstr='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
    local i=0
    printf "    ${BLUE}[%s]${NC} Waiting for agent deployment..." "$(echo $spinstr | cut -c1)"
    while kill -0 "$ROLLOUT_PID" 2>/dev/null; do
        i=$(( (i + 1) % 10 ))
        printf "\r    ${BLUE}[%s]${NC} Waiting for agent deployment..." "$(echo $spinstr | cut -c$((i+1)))"
        sleep 0.1
    done
    wait "$ROLLOUT_PID"
    ROLLOUT_STATUS=$?

    if [ $ROLLOUT_STATUS -eq 0 ]; then
        printf "\r    ${GREEN}[OK]${NC} Agent deployment ready        \n"
    else
        printf "\r    ${RED}[FAIL]${NC} Agent deployment did not become ready in 120s\n"
        log_error "Agent pod failed to start. Debug with:"
        echo "  kubectl get pods -n $NAMESPACE"
        echo "  kubectl describe pod -n $NAMESPACE -l app.kubernetes.io/component=agent"
        echo "  kubectl logs -n $NAMESPACE -l app.kubernetes.io/component=agent"
        exit 1
    fi

    # --- Health check with retry (blocking, up to 60s) ---
    local max_retries=12
    local retry_interval=5
    local health_ok=false

    for attempt in $(seq 1 $max_retries); do
        POD_NAME=$(kubectl get pods -n "$NAMESPACE" \
            -l app.kubernetes.io/component=agent \
            -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)

        if [ -z "$POD_NAME" ]; then
            printf "\r    ${BLUE}[%s]${NC} Waiting for agent pod... (attempt %d/%d)" \
                "$(echo $spinstr | cut -c$(( (attempt % 10) + 1 )))" "$attempt" "$max_retries"
            sleep $retry_interval
            continue
        fi

        HEALTH=$(kubectl exec -n "$NAMESPACE" "$POD_NAME" -- \
            wget -qO- http://localhost:8080/health 2>/dev/null || echo "")

        if echo "$HEALTH" | grep -qi "ok"; then
            health_ok=true
            break
        fi

        printf "\r    ${BLUE}[%s]${NC} Health check... (attempt %d/%d)        " \
            "$(echo $spinstr | cut -c$(( (attempt % 10) + 1 )))" "$attempt" "$max_retries"
        sleep $retry_interval
    done

    if [ "$health_ok" = "true" ]; then
        printf "\r    ${GREEN}[OK]${NC} Health check passed                    \n"
    else
        printf "\r    ${RED}[FAIL]${NC} Health check failed after ${max_retries} attempts\n"
        log_error "Agent is running but not healthy. Debug with:"
        echo "  kubectl logs -n $NAMESPACE $POD_NAME"
        exit 1
    fi

    # --- Worker/operator deployment check ---
    kubectl rollout status deployment/${RELEASE_NAME}-worker \
        -n "$NAMESPACE" --timeout=120s >/dev/null 2>&1 &
    WORKER_PID=$!

    printf "    ${BLUE}[...]${NC} Waiting for worker deployment..."
    wait "$WORKER_PID"
    WORKER_STATUS=$?

    if [ $WORKER_STATUS -eq 0 ]; then
        printf "\r    ${GREEN}[OK]${NC} Worker deployment ready         \n"
    else
        printf "\r    ${YELLOW}[WARN]${NC} Worker deployment taking longer than expected\n"
    fi

    echo ""
    log_info "Pod status:"
    kubectl get pods -n "$NAMESPACE" -o wide 2>/dev/null | sed 's/^/    /'
}

verify_rbac() {
    log_info "Verifying RBAC for OTA upgrades..."

    local SA_NAME="klyro-edge"
    local CR_NAME="${RELEASE_NAME}-helm-manager"
    # Supplementary ClusterRole/CRB — NOT managed by Helm, survives chart upgrades.
    # Required because helm apply can overwrite the helm-managed ClusterRole and remove
    # escalate+bind verbs, leaving the SA unable to restore its own RBAC permissions.
    local ESCALATOR_NAME="${RELEASE_NAME}-rbac-escalator"

    # Always ensure the supplementary escalator exists regardless of helm-managed RBAC state.
    kubectl apply -f - <<RBAC_EOF >/dev/null 2>&1
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: ${ESCALATOR_NAME}
  annotations:
    managed-by: "klyro-install"
rules:
  - apiGroups: ["rbac.authorization.k8s.io"]
    resources: ["roles", "rolebindings", "clusterroles", "clusterrolebindings"]
    verbs: ["get", "list", "watch", "create", "update", "patch", "delete", "bind", "escalate"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: ${ESCALATOR_NAME}
  annotations:
    managed-by: "klyro-install"
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: ${ESCALATOR_NAME}
subjects:
  - kind: ServiceAccount
    name: ${SA_NAME}
    namespace: ${NAMESPACE}
RBAC_EOF
    printf "    ${GREEN}[OK]${NC} RBAC escalator bootstrap applied\n"

    if kubectl auth can-i list secrets --namespace="$NAMESPACE" \
        --as "system:serviceaccount:${NAMESPACE}:${SA_NAME}" >/dev/null 2>&1; then
        printf "    ${GREEN}[OK]${NC} Agent can read Helm release secrets\n"
        return 0
    fi

    log_warn "Agent ServiceAccount cannot read secrets — applying RBAC fix..."

    if ! kubectl get clusterrole "$CR_NAME" >/dev/null 2>&1; then
        log_info "Creating ClusterRole $CR_NAME..."
        kubectl apply -f - <<RBAC_EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: ${CR_NAME}
rules:
  - apiGroups: [""]
    resources: ["secrets", "configmaps", "serviceaccounts"]
    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
  - apiGroups: [""]
    resources: ["pods", "services", "endpoints", "persistentvolumeclaims"]
    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
  - apiGroups: ["rbac.authorization.k8s.io"]
    resources: ["roles", "rolebindings", "clusterroles", "clusterrolebindings"]
    verbs: ["get", "list", "watch", "create", "update", "patch", "delete", "bind", "escalate"]
  - apiGroups: ["apps"]
    resources: ["deployments", "daemonsets", "replicasets", "statefulsets"]
    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
  - apiGroups: [""]
    resources: ["events"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["namespaces"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["pods", "namespaces", "nodes"]
    verbs: ["get", "watch", "list"]
  - apiGroups: ["apps"]
    resources: ["replicasets", "deployments", "daemonsets", "statefulsets"]
    verbs: ["get", "watch", "list"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
    verbs: ["get", "watch", "list"]
RBAC_EOF
    fi

    if ! kubectl get clusterrolebinding "$CR_NAME" >/dev/null 2>&1; then
        log_info "Creating ClusterRoleBinding $CR_NAME..."
        kubectl apply -f - <<RBAC_EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: ${CR_NAME}
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: ${CR_NAME}
subjects:
  - kind: ServiceAccount
    name: ${SA_NAME}
    namespace: ${NAMESPACE}
RBAC_EOF
    fi

    sleep 2
    if kubectl auth can-i list secrets --namespace="$NAMESPACE" \
        --as "system:serviceaccount:${NAMESPACE}:${SA_NAME}" >/dev/null 2>&1; then
        printf "    ${GREEN}[OK]${NC} RBAC fixed — agent can read Helm release secrets\n"
    else
        log_warn "RBAC still not working after applying fix"
        log_warn "OTA self-upgrade will not work until RBAC is resolved"
        log_warn "Debug: kubectl get clusterrole $CR_NAME -o yaml"
    fi
}

# =============================================================================
# Main
# =============================================================================

main_local() {
    show_banner
    printf "${CYAN}════════════════════════════════════════════════════════════════════${NC}\n"
    printf "${CYAN}                     LOCAL MODE INSTALLATION                        ${NC}\n"
    printf "${CYAN}════════════════════════════════════════════════════════════════════${NC}\n"
    echo ""

    local use_custom_endpoints=false
    if [ -n "$CUSTOM_GRPC_ENDPOINT" ]; then
        use_custom_endpoints=true
        log_info "Using custom gRPC endpoint: $CUSTOM_GRPC_ENDPOINT"
        if [ -n "$CUSTOM_API_ENDPOINT" ]; then
            log_info "Using custom API endpoint: $CUSTOM_API_ENDPOINT"
        fi
        echo ""
    fi

    local total_steps=8
    if [ "$use_custom_endpoints" = "true" ]; then
        total_steps=7  # Skip mock cloud step
    fi

    local step=1

    echo ""
    printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Checking local prerequisites"
    check_local_prerequisites
    ((step++))

    echo ""
    printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Setting up k3d cluster"
    create_k3d_cluster
    ((step++))

    echo ""
    printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Building Docker images"
    build_local_images
    ((step++))

    echo ""
    printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Creating Kubernetes namespace"
    create_namespace
    ((step++))

    echo ""
    printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Starting Temporal"
    start_temporal
    ((step++))

    if [ "$use_custom_endpoints" = "false" ]; then
        echo ""
        printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
        log_step "Starting Mock Cloud gRPC server"
        start_mock_cloud
        ((step++))
    fi

    echo ""
    printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Setting up credentials"
    fetch_credentials_local
    create_registry_secret
    create_cloud_credentials_secret
    create_oauth_credentials_secret
    create_local_secrets
    ((step++))

    echo ""
    printf "${CYAN}Step ${step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Installing Klyro Edge"
    install_helm_chart_local

    echo ""
    printf "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"
    log_step "Verifying installation"
    verify_local_installation
    verify_rbac

    if [ "$use_custom_endpoints" = "false" ]; then
        echo ""
        printf "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"
        run_connectivity_tests
    fi

    echo ""
    echo ""
    printf "${GREEN}╔═══════════════════════════════════════════════════════════════════╗${NC}\n"
    printf "${GREEN}║${NC}                                                                   ${GREEN}║${NC}\n"
    printf "${GREEN}║${NC}   ${GREEN}✓ Klyro Edge LOCAL installation complete!${NC}                     ${GREEN}║${NC}\n"
    printf "${GREEN}║${NC}                                                                   ${GREEN}║${NC}\n"
    printf "${GREEN}╚═══════════════════════════════════════════════════════════════════╝${NC}\n"
    echo ""
    echo "  Agent ID:      ${GREEN}$AGENT_ID${NC}"
    echo "  Namespace:     ${GREEN}$NAMESPACE${NC}"
    echo "  Release:       ${GREEN}$RELEASE_NAME${NC}"
    echo "  Mode:          ${CYAN}LOCAL${NC}"
    if [ "$use_custom_endpoints" = "true" ]; then
        echo "  gRPC Endpoint: ${CYAN}$CUSTOM_GRPC_ENDPOINT${NC}"
        [ -n "$CUSTOM_API_ENDPOINT" ] && echo "  API Endpoint:  ${CYAN}$CUSTOM_API_ENDPOINT${NC}"
    fi
    echo ""
    printf "${BLUE}Components running:${NC}\n"
    if [ "$use_custom_endpoints" = "true" ]; then
        echo "  - Agent:       Connected to custom cloud endpoints"
    else
        echo "  - Agent:       Connected to mock cloud & Temporal"
        echo "  - Mock Cloud:  gRPC server for agent registration"
    fi
    echo "  - Worker:      Polling Temporal for tasks"
    echo "  - Temporal:    Workflow orchestration"
    echo ""
    printf "${BLUE}Useful commands:${NC}\n"
    echo "  kubectl get pods -n $NAMESPACE"
    echo "  kubectl logs -n $NAMESPACE -l app.kubernetes.io/component=agent -f"
    if [ "$use_custom_endpoints" = "false" ]; then
        echo "  kubectl logs -n $NAMESPACE -l app=mock-cloud -f"
    fi
    echo "  kubectl port-forward -n $NAMESPACE svc/temporal-web 8088:8088"
    echo ""
    printf "${BLUE}To clean up:${NC}\n"
    echo "  k3d cluster delete klyro-local"
    echo ""
}

main_production() {
    show_banner

    INSTALL_TOKEN="$1"

    if [ -z "$INSTALL_TOKEN" ]; then
        log_error "Missing install token"
        echo ""
        usage
        exit 1
    fi

    case "$INSTALL_TOKEN" in
        inst_*)
            ;;
        *)
            log_warn "Token format looks unusual (expected: inst_xxxx...)"
            ;;
    esac

    local total_steps=7
    local current_step=1

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Checking prerequisites"
    check_prerequisites
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Cleaning up existing installation"
    cleanup_existing_installation
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Fetching credentials"
    fetch_credentials "$INSTALL_TOKEN"
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Creating Kubernetes resources"
    create_namespace
    create_registry_secret
    create_cloud_credentials_secret
    create_oauth_credentials_secret
    create_anthropic_secret
    create_openai_secret
    create_braintrust_secret
    create_datadog_secret

    # Store agent metadata for --reinstall recovery
    kubectl create configmap klyro-agent-info \
        --from-literal=agent-id="$AGENT_ID" \
        --from-literal=tenant-id="$TENANT_ID" \
        --from-literal=site-id="$SITE_ID" \
        --from-literal=grpc-endpoint="$GRPC_ENDPOINT" \
        --from-literal=auth-endpoint="${AUTH_ENDPOINT:-}" \
        --namespace="$NAMESPACE" \
        --dry-run=client -o yaml | kubectl apply -f - >/dev/null
    printf "    ${GREEN}[OK]${NC} ConfigMap: klyro-agent-info (for reinstall recovery)\n"
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Starting Temporal"
    start_temporal
    ((current_step++))


    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Installing Helm chart"
    install_helm_chart
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Verifying installation"
    verify_installation
    verify_rbac

    echo ""
    echo ""
    printf "${GREEN}╔═══════════════════════════════════════════════════════════════════╗${NC}\n"
    printf "${GREEN}║${NC}                                                                   ${GREEN}║${NC}\n"
    printf "${GREEN}║${NC}   ${GREEN}✓ Klyro Edge installed successfully!${NC}                          ${GREEN}║${NC}\n"
    printf "${GREEN}║${NC}                                                                   ${GREEN}║${NC}\n"
    printf "${GREEN}╚═══════════════════════════════════════════════════════════════════╝${NC}\n"
    echo ""
    echo "  Agent ID:   ${GREEN}$AGENT_ID${NC}"
    echo "  Namespace:  ${GREEN}$NAMESPACE${NC}"
    echo "  Release:    ${GREEN}$RELEASE_NAME${NC}"
    echo ""
    printf "${BLUE}Useful commands:${NC}\n"
    echo "  kubectl get pods -n $NAMESPACE"
    echo "  kubectl logs -n $NAMESPACE -l app.kubernetes.io/component=agent -f"
    echo "  helm status $RELEASE_NAME -n $NAMESPACE"
    echo ""
}

# =============================================================================
# Reinstall Mode — reuses existing agent credentials from K8s secrets
# =============================================================================

recover_credentials_from_secrets() {
    log_info "Recovering agent credentials from existing K8s secrets..."

    AGENT_ID=$(kubectl get secret klyro-cloud-credentials -n "$NAMESPACE" \
        -o jsonpath='{.data.agent-id}' 2>/dev/null | base64 -d 2>/dev/null)
    ACCESS_TOKEN=$(kubectl get secret klyro-cloud-credentials -n "$NAMESPACE" \
        -o jsonpath='{.data.access-token}' 2>/dev/null | base64 -d 2>/dev/null)
    OAUTH2_CLIENT_ID=$(kubectl get secret klyro-edge-oauth-credentials -n "$NAMESPACE" \
        -o jsonpath='{.data.client-id}' 2>/dev/null | base64 -d 2>/dev/null)
    OAUTH2_CLIENT_SECRET=$(kubectl get secret klyro-edge-oauth-credentials -n "$NAMESPACE" \
        -o jsonpath='{.data.client-secret}' 2>/dev/null | base64 -d 2>/dev/null)

    # Recover tenant/site/endpoints from agent-info configmap
    TENANT_ID=$(kubectl get configmap klyro-agent-info -n "$NAMESPACE" \
        -o jsonpath='{.data.tenant-id}' 2>/dev/null || echo "")
    SITE_ID=$(kubectl get configmap klyro-agent-info -n "$NAMESPACE" \
        -o jsonpath='{.data.site-id}' 2>/dev/null || echo "")
    GRPC_ENDPOINT=$(kubectl get configmap klyro-agent-info -n "$NAMESPACE" \
        -o jsonpath='{.data.grpc-endpoint}' 2>/dev/null || echo "grpc.klyro.security:443")
    AUTH_ENDPOINT=$(kubectl get configmap klyro-agent-info -n "$NAMESPACE" \
        -o jsonpath='{.data.auth-endpoint}' 2>/dev/null || echo "")

    GHCR_TOKEN=$(kubectl get secret ghcr-credentials -n "$NAMESPACE" \
        -o jsonpath='{.data.token}' 2>/dev/null | base64 -d 2>/dev/null)
    DATADOG_API_KEY=$(kubectl get secret datadog-secrets -n "$NAMESPACE" \
        -o jsonpath='{.data.api-key}' 2>/dev/null | base64 -d 2>/dev/null)

    if [ -z "$AGENT_ID" ] || [ -z "$OAUTH2_CLIENT_ID" ] || [ -z "$OAUTH2_CLIENT_SECRET" ]; then
        log_error "Could not recover credentials from existing secrets"
        echo ""
        echo "Required secrets not found in namespace $NAMESPACE."
        echo "Please use a fresh install token instead:"
        echo "  curl -sfL https://get.klyro.security | sh -s -- <NEW_TOKEN>"
        exit 1
    fi

    log_success "Recovered agent credentials"
    echo ""
    echo "    ┌─────────────────────────────────────────────────────┐"
    echo "    │  Agent ID:  $AGENT_ID"
    echo "    │  Mode:      reinstall (reusing existing credentials)"
    echo "    └─────────────────────────────────────────────────────┘"
}

main_reinstall() {
    show_banner
    printf "${YELLOW}════════════════════════════════════════════════════════════════════${NC}\n"
    printf "${YELLOW}                     REINSTALL MODE                                 ${NC}\n"
    printf "${YELLOW}════════════════════════════════════════════════════════════════════${NC}\n"
    echo ""

    local total_steps=5
    local current_step=1

    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Checking prerequisites"
    check_prerequisites
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Recovering credentials from existing secrets"
    recover_credentials_from_secrets
    ((current_step++))

    # Re-create secrets that may have been lost (anthropic, openai, braintrust)
    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Refreshing secrets"
    create_anthropic_secret
    create_openai_secret
    create_braintrust_secret
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Upgrading Helm chart"
    GRPC_ENDPOINT="${GRPC_ENDPOINT:-grpc.klyro.security:443}"
    AUTH_ENDPOINT="${AUTH_ENDPOINT:-https://keycloak.prod.klyro.security/realms/klyro}"
    CHART_VERSION=""  # Use latest
    install_helm_chart
    ((current_step++))

    echo ""
    printf "${BLUE}Step ${current_step}/${total_steps}${NC} ─────────────────────────────────────────────────────────\n"
    log_step "Verifying installation"
    verify_installation
    verify_rbac

    echo ""
    echo ""
    printf "${GREEN}╔═══════════════════════════════════════════════════════════════════╗${NC}\n"
    printf "${GREEN}║${NC}                                                                   ${GREEN}║${NC}\n"
    printf "${GREEN}║${NC}   ${GREEN}✓ Klyro Edge reinstalled successfully!${NC}                        ${GREEN}║${NC}\n"
    printf "${GREEN}║${NC}                                                                   ${GREEN}║${NC}\n"
    printf "${GREEN}╚═══════════════════════════════════════════════════════════════════╝${NC}\n"
    echo ""
    echo "  Agent ID:   ${GREEN}$AGENT_ID${NC}"
    echo "  Namespace:  ${GREEN}$NAMESPACE${NC}"
    echo "  Mode:       ${YELLOW}REINSTALL${NC} (reused existing credentials)"
    echo ""
}

# =============================================================================
# Environment URL Defaults
# =============================================================================

apply_env_defaults() {
    case "$KLYRO_ENV" in
        dev)
            KLYRO_API_URL="${KLYRO_API_URL:-https://api.dev.klyro.security}"
            GRPC_ENDPOINT="grpc.dev.klyro.security:443"
            AUTH_ENDPOINT="${AUTH_ENDPOINT:-https://keycloak.dev.klyro.security/realms/klyro}"
            ;;
        staging)
            KLYRO_API_URL="${KLYRO_API_URL:-https://api.staging.klyro.security}"
            GRPC_ENDPOINT="grpc.staging.klyro.security:443"
            AUTH_ENDPOINT="${AUTH_ENDPOINT:-https://keycloak.staging.klyro.security/realms/klyro}"
            ;;
        production|"")
            KLYRO_API_URL="${KLYRO_API_URL:-https://api.klyro.security}"
            GRPC_ENDPOINT="grpc.klyro.security:443"
            AUTH_ENDPOINT="${AUTH_ENDPOINT:-https://keycloak.prod.klyro.security/realms/klyro}"
            ;;
        *)
            log_error "Unknown environment: $KLYRO_ENV. Valid values: dev, staging, production"
            exit 1
            ;;
    esac
}

main() {
    is_local=false
    is_reinstall=false
    with_temporal=false
    KLYRO_ENV="${KLYRO_ENV:-}"
    INSTALL_TOKEN=""
    CUSTOM_API_ENDPOINT=""
    CUSTOM_GRPC_ENDPOINT=""

    while [ $# -gt 0 ]; do
        case "$1" in
            --help|-h|help)
                usage
                ;;
            --local|--local-dev)
                is_local=true
                shift
                ;;
            --reinstall)
                is_reinstall=true
                shift
                ;;
            --with-temporal)
                with_temporal=true
                shift
                ;;
            --env)
                if [ -z "${2:-}" ] || [[ "$2" == --* ]]; then
                    log_error "--env requires a value: dev, staging, or production"
                    exit 1
                fi
                KLYRO_ENV="$2"
                shift 2
                ;;
            --api-endpoint|--api_endpoint)
                CUSTOM_API_ENDPOINT="$2"
                shift 2
                ;;
            --grpc-endpoint|--grpc_endpoint)
                CUSTOM_GRPC_ENDPOINT="$2"
                shift 2
                ;;
            *)
                if [ -z "$INSTALL_TOKEN" ]; then
                    INSTALL_TOKEN="$1"
                fi
                shift
                ;;
        esac
    done

    # Apply environment-specific URL defaults
    apply_env_defaults

    if [ "$is_reinstall" = "true" ]; then
        main_reinstall
        exit 0
    fi

    if [ -z "$INSTALL_TOKEN" ]; then
        log_error "Missing install token"
        echo ""
        usage
        exit 1
    fi

    export CUSTOM_API_ENDPOINT
    export CUSTOM_GRPC_ENDPOINT
    export with_temporal

    if [ "$is_local" = "true" ]; then
        main_local "$INSTALL_TOKEN"
    else
        main_production "$INSTALL_TOKEN"
    fi
}

main "$@"
