Files
metrics/scripts/run.sh

175 lines
4.2 KiB
Bash

#!/bin/bash
set -e
SCRIPT_DIR="$(dirname "$0")"
METRICS_PORT="${METRICS_PORT:-9090}"
NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}"
TARGETS_FILE="/tmp/metrics_targets.txt"
# Start node_exporter in background
start_node_exporter() {
echo "Starting node_exporter on port $NODE_EXPORTER_PORT..."
node_exporter --web.listen-address=":$NODE_EXPORTER_PORT" &
NODE_EXPORTER_PID=$!
sleep 2
if ! kill -0 $NODE_EXPORTER_PID 2>/dev/null; then
echo "Failed to start node_exporter"
exit 1
fi
echo "node_exporter running (PID $NODE_EXPORTER_PID)"
}
# Create the aggregator CGI script
create_aggregator_script() {
cat > /tmp/aggregate.sh << 'SCRIPT'
#!/bin/bash
NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}"
TARGETS_FILE="/tmp/metrics_targets.txt"
# Collect all metrics
{
# System metrics
echo "# HELP vibestack_up Whether the metrics aggregator is up"
echo "# TYPE vibestack_up gauge"
echo "vibestack_up 1"
echo ""
# node_exporter metrics
curl -s "http://localhost:$NODE_EXPORTER_PORT/metrics" 2>/dev/null || true
# Skill metrics
if [ -f "$TARGETS_FILE" ]; then
while IFS=' ' read -r skill_name target || [ -n "$skill_name" ]; do
[[ "$skill_name" =~ ^#.*$ ]] && continue
[ -z "$skill_name" ] && continue
[ -z "$target" ] && continue
echo ""
echo "# Metrics from skill: $skill_name"
curl -s "http://${target}/metrics" 2>/dev/null || true
done < "$TARGETS_FILE"
fi
}
SCRIPT
chmod +x /tmp/aggregate.sh
}
# Serve metrics using socat
serve_metrics() {
echo "Starting metrics aggregator on port $METRICS_PORT..."
if ! command -v socat &>/dev/null; then
echo "Installing socat..."
apt-get update && apt-get install -y socat
fi
# Use socat to handle HTTP requests
exec socat TCP-LISTEN:$METRICS_PORT,reuseaddr,fork EXEC:"/tmp/metrics_handler.sh"
}
# Create health check script
create_health_script() {
cat > /tmp/health.sh << 'SCRIPT'
#!/bin/bash
SKILLS_DIR="${SKILLS_DIR:-/skills}"
# Health check endpoints for known skills
declare -A HEALTH_PORTS=(
["caddy"]="2019"
["ttyd"]="7681"
["loki"]="3100"
["duckdb"]="8432"
["claude"]="8888"
["metrics"]="9090"
)
result='{"status":"healthy","skills":{'
first=true
for skill_dir in "$SKILLS_DIR"/*/; do
skill_name=$(basename "$skill_dir")
[ "$skill_name" = "supervisor" ] && continue
[ "$skill_name" = "metrics" ] && continue
port="${HEALTH_PORTS[$skill_name]}"
[ -z "$port" ] && continue
# Check health
if curl -sf --max-time 2 "http://localhost:$port/health" >/dev/null 2>&1 || \
curl -sf --max-time 2 "http://localhost:$port/" >/dev/null 2>&1; then
status="up"
else
status="down"
fi
$first || result+=','
first=false
result+="\"$skill_name\":\"$status\""
done
result+='}}'
# Check if any are down
if echo "$result" | grep -q '"down"'; then
result=$(echo "$result" | sed 's/"healthy"/"degraded"/')
fi
echo "$result"
SCRIPT
chmod +x /tmp/health.sh
}
# Create HTTP handler script
create_handler_script() {
cat > /tmp/metrics_handler.sh << 'HANDLER'
#!/bin/bash
# Read HTTP request
read -r request_line
path=$(echo "$request_line" | cut -d' ' -f2)
while read -r header; do
[ "$header" = $'\r' ] && break
done
send_response() {
local content_type="$1"
local body="$2"
local body_length=${#body}
printf "HTTP/1.1 200 OK\r\n"
printf "Content-Type: %s\r\n" "$content_type"
printf "Content-Length: %d\r\n" "$body_length"
printf "Connection: close\r\n"
printf "\r\n"
printf "%s" "$body"
}
case "$path" in
/health)
health=$(/tmp/health.sh)
send_response "application/json" "$health"
;;
/metrics|*)
metrics=$(/tmp/aggregate.sh)
send_response "text/plain; charset=utf-8" "$metrics"
;;
esac
HANDLER
chmod +x /tmp/metrics_handler.sh
}
# Cleanup on exit
cleanup() {
echo "Shutting down..."
[ -n "$NODE_EXPORTER_PID" ] && kill $NODE_EXPORTER_PID 2>/dev/null
exit 0
}
trap cleanup SIGTERM SIGINT
start_node_exporter
create_aggregator_script
create_health_script
create_handler_script
serve_metrics