175 lines
4.2 KiB
Bash
175 lines
4.2 KiB
Bash
#!/bin/bash
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(dirname "$0")"
|
|
METRICS_PORT="${METRICS_PORT:-9090}"
|
|
NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}"
|
|
TARGETS_FILE="/tmp/metrics_targets.txt"
|
|
|
|
# Start node_exporter in background
|
|
start_node_exporter() {
|
|
echo "Starting node_exporter on port $NODE_EXPORTER_PORT..."
|
|
node_exporter --web.listen-address=":$NODE_EXPORTER_PORT" &
|
|
NODE_EXPORTER_PID=$!
|
|
sleep 2
|
|
|
|
if ! kill -0 $NODE_EXPORTER_PID 2>/dev/null; then
|
|
echo "Failed to start node_exporter"
|
|
exit 1
|
|
fi
|
|
echo "node_exporter running (PID $NODE_EXPORTER_PID)"
|
|
}
|
|
|
|
# Create the aggregator CGI script
|
|
create_aggregator_script() {
|
|
cat > /tmp/aggregate.sh << 'SCRIPT'
|
|
#!/bin/bash
|
|
NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}"
|
|
TARGETS_FILE="/tmp/metrics_targets.txt"
|
|
|
|
# Collect all metrics
|
|
{
|
|
# System metrics
|
|
echo "# HELP vibestack_up Whether the metrics aggregator is up"
|
|
echo "# TYPE vibestack_up gauge"
|
|
echo "vibestack_up 1"
|
|
echo ""
|
|
|
|
# node_exporter metrics
|
|
curl -s "http://localhost:$NODE_EXPORTER_PORT/metrics" 2>/dev/null || true
|
|
|
|
# Skill metrics
|
|
if [ -f "$TARGETS_FILE" ]; then
|
|
while IFS=' ' read -r skill_name target || [ -n "$skill_name" ]; do
|
|
[[ "$skill_name" =~ ^#.*$ ]] && continue
|
|
[ -z "$skill_name" ] && continue
|
|
[ -z "$target" ] && continue
|
|
|
|
echo ""
|
|
echo "# Metrics from skill: $skill_name"
|
|
curl -s "http://${target}/metrics" 2>/dev/null || true
|
|
done < "$TARGETS_FILE"
|
|
fi
|
|
}
|
|
SCRIPT
|
|
chmod +x /tmp/aggregate.sh
|
|
}
|
|
|
|
# Serve metrics using socat
|
|
serve_metrics() {
|
|
echo "Starting metrics aggregator on port $METRICS_PORT..."
|
|
|
|
if ! command -v socat &>/dev/null; then
|
|
echo "Installing socat..."
|
|
apt-get update && apt-get install -y socat
|
|
fi
|
|
|
|
# Use socat to handle HTTP requests
|
|
exec socat TCP-LISTEN:$METRICS_PORT,reuseaddr,fork EXEC:"/tmp/metrics_handler.sh"
|
|
}
|
|
|
|
# Create health check script
|
|
create_health_script() {
|
|
cat > /tmp/health.sh << 'SCRIPT'
|
|
#!/bin/bash
|
|
SKILLS_DIR="${SKILLS_DIR:-/skills}"
|
|
|
|
# Health check endpoints for known skills
|
|
declare -A HEALTH_PORTS=(
|
|
["caddy"]="2019"
|
|
["ttyd"]="7681"
|
|
["loki"]="3100"
|
|
["duckdb"]="8432"
|
|
["claude"]="8888"
|
|
["metrics"]="9090"
|
|
)
|
|
|
|
result='{"status":"healthy","skills":{'
|
|
first=true
|
|
|
|
for skill_dir in "$SKILLS_DIR"/*/; do
|
|
skill_name=$(basename "$skill_dir")
|
|
[ "$skill_name" = "supervisor" ] && continue
|
|
[ "$skill_name" = "metrics" ] && continue
|
|
|
|
port="${HEALTH_PORTS[$skill_name]}"
|
|
[ -z "$port" ] && continue
|
|
|
|
# Check health
|
|
if curl -sf --max-time 2 "http://localhost:$port/health" >/dev/null 2>&1 || \
|
|
curl -sf --max-time 2 "http://localhost:$port/" >/dev/null 2>&1; then
|
|
status="up"
|
|
else
|
|
status="down"
|
|
fi
|
|
|
|
$first || result+=','
|
|
first=false
|
|
result+="\"$skill_name\":\"$status\""
|
|
done
|
|
|
|
result+='}}'
|
|
|
|
# Check if any are down
|
|
if echo "$result" | grep -q '"down"'; then
|
|
result=$(echo "$result" | sed 's/"healthy"/"degraded"/')
|
|
fi
|
|
|
|
echo "$result"
|
|
SCRIPT
|
|
chmod +x /tmp/health.sh
|
|
}
|
|
|
|
# Create HTTP handler script
|
|
create_handler_script() {
|
|
cat > /tmp/metrics_handler.sh << 'HANDLER'
|
|
#!/bin/bash
|
|
# Read HTTP request
|
|
read -r request_line
|
|
path=$(echo "$request_line" | cut -d' ' -f2)
|
|
|
|
while read -r header; do
|
|
[ "$header" = $'\r' ] && break
|
|
done
|
|
|
|
send_response() {
|
|
local content_type="$1"
|
|
local body="$2"
|
|
local body_length=${#body}
|
|
|
|
printf "HTTP/1.1 200 OK\r\n"
|
|
printf "Content-Type: %s\r\n" "$content_type"
|
|
printf "Content-Length: %d\r\n" "$body_length"
|
|
printf "Connection: close\r\n"
|
|
printf "\r\n"
|
|
printf "%s" "$body"
|
|
}
|
|
|
|
case "$path" in
|
|
/health)
|
|
health=$(/tmp/health.sh)
|
|
send_response "application/json" "$health"
|
|
;;
|
|
/metrics|*)
|
|
metrics=$(/tmp/aggregate.sh)
|
|
send_response "text/plain; charset=utf-8" "$metrics"
|
|
;;
|
|
esac
|
|
HANDLER
|
|
chmod +x /tmp/metrics_handler.sh
|
|
}
|
|
|
|
# Cleanup on exit
|
|
cleanup() {
|
|
echo "Shutting down..."
|
|
[ -n "$NODE_EXPORTER_PID" ] && kill $NODE_EXPORTER_PID 2>/dev/null
|
|
exit 0
|
|
}
|
|
trap cleanup SIGTERM SIGINT
|
|
|
|
start_node_exporter
|
|
create_aggregator_script
|
|
create_health_script
|
|
create_handler_script
|
|
serve_metrics
|