Add /health endpoint for aggregated skill status
This commit is contained in:
@@ -68,27 +68,93 @@ serve_metrics() {
|
||||
exec socat TCP-LISTEN:$METRICS_PORT,reuseaddr,fork EXEC:"/tmp/metrics_handler.sh"
|
||||
}
|
||||
|
||||
# Create health check script
|
||||
create_health_script() {
|
||||
cat > /tmp/health.sh << 'SCRIPT'
|
||||
#!/bin/bash
|
||||
SKILLS_DIR="${SKILLS_DIR:-/skills}"
|
||||
|
||||
# Health check endpoints for known skills
|
||||
declare -A HEALTH_PORTS=(
|
||||
["caddy"]="2019"
|
||||
["ttyd"]="7681"
|
||||
["loki"]="3100"
|
||||
["duckdb"]="8432"
|
||||
["claude"]="8888"
|
||||
["metrics"]="9090"
|
||||
)
|
||||
|
||||
result='{"status":"healthy","skills":{'
|
||||
first=true
|
||||
|
||||
for skill_dir in "$SKILLS_DIR"/*/; do
|
||||
skill_name=$(basename "$skill_dir")
|
||||
[ "$skill_name" = "supervisor" ] && continue
|
||||
[ "$skill_name" = "metrics" ] && continue
|
||||
|
||||
port="${HEALTH_PORTS[$skill_name]}"
|
||||
[ -z "$port" ] && continue
|
||||
|
||||
# Check health
|
||||
if curl -sf --max-time 2 "http://localhost:$port/health" >/dev/null 2>&1 || \
|
||||
curl -sf --max-time 2 "http://localhost:$port/" >/dev/null 2>&1; then
|
||||
status="up"
|
||||
else
|
||||
status="down"
|
||||
fi
|
||||
|
||||
$first || result+=','
|
||||
first=false
|
||||
result+="\"$skill_name\":\"$status\""
|
||||
done
|
||||
|
||||
result+='}}'
|
||||
|
||||
# Check if any are down
|
||||
if echo "$result" | grep -q '"down"'; then
|
||||
result=$(echo "$result" | sed 's/"healthy"/"degraded"/')
|
||||
fi
|
||||
|
||||
echo "$result"
|
||||
SCRIPT
|
||||
chmod +x /tmp/health.sh
|
||||
}
|
||||
|
||||
# Create HTTP handler script
|
||||
create_handler_script() {
|
||||
cat > /tmp/metrics_handler.sh << 'HANDLER'
|
||||
#!/bin/bash
|
||||
# Read HTTP request (we ignore it, always serve /metrics)
|
||||
# Read HTTP request
|
||||
read -r request_line
|
||||
path=$(echo "$request_line" | cut -d' ' -f2)
|
||||
|
||||
while read -r header; do
|
||||
[ "$header" = $'\r' ] && break
|
||||
done
|
||||
|
||||
# Get metrics
|
||||
metrics=$(/tmp/aggregate.sh)
|
||||
content_length=${#metrics}
|
||||
send_response() {
|
||||
local content_type="$1"
|
||||
local body="$2"
|
||||
local body_length=${#body}
|
||||
|
||||
# Send HTTP response
|
||||
printf "HTTP/1.1 200 OK\r\n"
|
||||
printf "Content-Type: text/plain; charset=utf-8\r\n"
|
||||
printf "Content-Length: %d\r\n" "$content_length"
|
||||
printf "Content-Type: %s\r\n" "$content_type"
|
||||
printf "Content-Length: %d\r\n" "$body_length"
|
||||
printf "Connection: close\r\n"
|
||||
printf "\r\n"
|
||||
printf "%s" "$metrics"
|
||||
printf "%s" "$body"
|
||||
}
|
||||
|
||||
case "$path" in
|
||||
/health)
|
||||
health=$(/tmp/health.sh)
|
||||
send_response "application/json" "$health"
|
||||
;;
|
||||
/metrics|*)
|
||||
metrics=$(/tmp/aggregate.sh)
|
||||
send_response "text/plain; charset=utf-8" "$metrics"
|
||||
;;
|
||||
esac
|
||||
HANDLER
|
||||
chmod +x /tmp/metrics_handler.sh
|
||||
}
|
||||
@@ -103,5 +169,6 @@ trap cleanup SIGTERM SIGINT
|
||||
|
||||
start_node_exporter
|
||||
create_aggregator_script
|
||||
create_health_script
|
||||
create_handler_script
|
||||
serve_metrics
|
||||
|
||||
Reference in New Issue
Block a user