#!/bin/bash set -e SCRIPT_DIR="$(dirname "$0")" METRICS_PORT="${METRICS_PORT:-9090}" NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}" TARGETS_FILE="/tmp/metrics_targets.txt" # Start node_exporter in background start_node_exporter() { echo "Starting node_exporter on port $NODE_EXPORTER_PORT..." node_exporter --web.listen-address=":$NODE_EXPORTER_PORT" & NODE_EXPORTER_PID=$! sleep 2 if ! kill -0 $NODE_EXPORTER_PID 2>/dev/null; then echo "Failed to start node_exporter" exit 1 fi echo "node_exporter running (PID $NODE_EXPORTER_PID)" } # Create the aggregator CGI script create_aggregator_script() { cat > /tmp/aggregate.sh << 'SCRIPT' #!/bin/bash NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}" TARGETS_FILE="/tmp/metrics_targets.txt" # Collect all metrics { # System metrics echo "# HELP vibestack_up Whether the metrics aggregator is up" echo "# TYPE vibestack_up gauge" echo "vibestack_up 1" echo "" # node_exporter metrics curl -s "http://localhost:$NODE_EXPORTER_PORT/metrics" 2>/dev/null || true # Skill metrics if [ -f "$TARGETS_FILE" ]; then while IFS=' ' read -r skill_name target || [ -n "$skill_name" ]; do [[ "$skill_name" =~ ^#.*$ ]] && continue [ -z "$skill_name" ] && continue [ -z "$target" ] && continue echo "" echo "# Metrics from skill: $skill_name" curl -s "http://${target}/metrics" 2>/dev/null || true done < "$TARGETS_FILE" fi } SCRIPT chmod +x /tmp/aggregate.sh } # Serve metrics using socat serve_metrics() { echo "Starting metrics aggregator on port $METRICS_PORT..." if ! command -v socat &>/dev/null; then echo "Installing socat..." apt-get update && apt-get install -y socat fi # Use socat to handle HTTP requests exec socat TCP-LISTEN:$METRICS_PORT,reuseaddr,fork EXEC:"/tmp/metrics_handler.sh" } # Create health check script create_health_script() { cat > /tmp/health.sh << 'SCRIPT' #!/bin/bash SKILLS_DIR="${SKILLS_DIR:-/skills}" # Health check endpoints for known skills declare -A HEALTH_PORTS=( ["caddy"]="2019" ["ttyd"]="7681" ["loki"]="3100" ["duckdb"]="8432" ["claude"]="8888" ["metrics"]="9090" ) result='{"status":"healthy","skills":{' first=true for skill_dir in "$SKILLS_DIR"/*/; do skill_name=$(basename "$skill_dir") [ "$skill_name" = "supervisor" ] && continue [ "$skill_name" = "metrics" ] && continue port="${HEALTH_PORTS[$skill_name]}" [ -z "$port" ] && continue # Check health if curl -sf --max-time 2 "http://localhost:$port/health" >/dev/null 2>&1 || \ curl -sf --max-time 2 "http://localhost:$port/" >/dev/null 2>&1; then status="up" else status="down" fi $first || result+=',' first=false result+="\"$skill_name\":\"$status\"" done result+='}}' # Check if any are down if echo "$result" | grep -q '"down"'; then result=$(echo "$result" | sed 's/"healthy"/"degraded"/') fi echo "$result" SCRIPT chmod +x /tmp/health.sh } # Create HTTP handler script create_handler_script() { cat > /tmp/metrics_handler.sh << 'HANDLER' #!/bin/bash # Read HTTP request read -r request_line path=$(echo "$request_line" | cut -d' ' -f2) while read -r header; do [ "$header" = $'\r' ] && break done send_response() { local content_type="$1" local body="$2" local body_length=${#body} printf "HTTP/1.1 200 OK\r\n" printf "Content-Type: %s\r\n" "$content_type" printf "Content-Length: %d\r\n" "$body_length" printf "Connection: close\r\n" printf "\r\n" printf "%s" "$body" } case "$path" in /health) health=$(/tmp/health.sh) send_response "application/json" "$health" ;; /metrics|*) metrics=$(/tmp/aggregate.sh) send_response "text/plain; charset=utf-8" "$metrics" ;; esac HANDLER chmod +x /tmp/metrics_handler.sh } # Cleanup on exit cleanup() { echo "Shutting down..." [ -n "$NODE_EXPORTER_PID" ] && kill $NODE_EXPORTER_PID 2>/dev/null exit 0 } trap cleanup SIGTERM SIGINT start_node_exporter create_aggregator_script create_health_script create_handler_script serve_metrics