Initial metrics skill with aggregation
This commit is contained in:
73
SKILL.md
Normal file
73
SKILL.md
Normal file
@@ -0,0 +1,73 @@
|
||||
---
|
||||
name: metrics
|
||||
description: Metrics aggregator with system and skill metrics
|
||||
metadata:
|
||||
version: "1.0.0"
|
||||
vibestack:
|
||||
main: false
|
||||
metrics-port: 9090
|
||||
---
|
||||
|
||||
# Metrics Skill
|
||||
|
||||
Aggregates metrics from system (node_exporter) and other skills into a single `/metrics` endpoint.
|
||||
|
||||
## Features
|
||||
|
||||
- System metrics via node_exporter (CPU, memory, disk, network)
|
||||
- Auto-discovers skills that expose metrics
|
||||
- Single aggregated endpoint for Prometheus scraping
|
||||
- Auto-registers with Caddy if present
|
||||
|
||||
## How Skills Expose Metrics
|
||||
|
||||
Skills can expose metrics by adding to their SKILL.md:
|
||||
|
||||
```yaml
|
||||
metadata:
|
||||
vibestack:
|
||||
metrics-port: 8080
|
||||
```
|
||||
|
||||
Or by setting environment variable:
|
||||
```bash
|
||||
METRICS_PORT_myskill=8080
|
||||
```
|
||||
|
||||
The aggregator will scrape `localhost:{port}/metrics` for each discovered skill.
|
||||
|
||||
## Configuration
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `METRICS_PORT` | Aggregator port | `9090` |
|
||||
| `NODE_EXPORTER_PORT` | node_exporter port | `9100` |
|
||||
| `METRICS_DOMAIN` | Domain for Caddy auto-config | (none) |
|
||||
| `METRICS_PATH` | URL path for metrics | `/metrics` |
|
||||
|
||||
## Collected Metrics
|
||||
|
||||
### System (node_exporter)
|
||||
- `node_cpu_*` - CPU usage
|
||||
- `node_memory_*` - Memory usage
|
||||
- `node_disk_*` - Disk I/O
|
||||
- `node_network_*` - Network I/O
|
||||
- `node_filesystem_*` - Filesystem usage
|
||||
|
||||
### Per-Skill
|
||||
Each skill's metrics are prefixed with the skill name:
|
||||
```
|
||||
# From ttyd skill
|
||||
ttyd_connections_active 5
|
||||
ttyd_sessions_total 123
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Scrape aggregated metrics
|
||||
curl http://localhost:9090/metrics
|
||||
|
||||
# With Caddy proxy
|
||||
curl https://metrics.example.com/metrics
|
||||
```
|
||||
124
scripts/autorun.sh
Normal file
124
scripts/autorun.sh
Normal file
@@ -0,0 +1,124 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
NODE_EXPORTER_VERSION="${NODE_EXPORTER_VERSION:-1.7.0}"
|
||||
SKILLS_DIR="${SKILLS_DIR:-/skills}"
|
||||
|
||||
# Idempotent node_exporter installation
|
||||
install_node_exporter() {
|
||||
if command -v node_exporter &>/dev/null; then
|
||||
echo "node_exporter already installed"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Installing node_exporter v${NODE_EXPORTER_VERSION}..."
|
||||
|
||||
local arch
|
||||
case "$(uname -m)" in
|
||||
x86_64) arch="amd64" ;;
|
||||
aarch64) arch="arm64" ;;
|
||||
armv7l) arch="armv7" ;;
|
||||
*)
|
||||
echo "Unsupported architecture: $(uname -m)"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
local url="https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-${arch}.tar.gz"
|
||||
|
||||
curl -sSL "$url" -o /tmp/node_exporter.tar.gz
|
||||
tar -xzf /tmp/node_exporter.tar.gz -C /tmp
|
||||
mv /tmp/node_exporter-*/node_exporter /usr/local/bin/
|
||||
rm -rf /tmp/node_exporter*
|
||||
|
||||
echo "node_exporter installed"
|
||||
}
|
||||
|
||||
# Discover skills with metrics ports
|
||||
discover_metrics_targets() {
|
||||
local targets_file="/tmp/metrics_targets.txt"
|
||||
echo "# Auto-discovered metrics targets" > "$targets_file"
|
||||
|
||||
for skill_dir in "$SKILLS_DIR"/*/; do
|
||||
local skill_name=$(basename "$skill_dir")
|
||||
local skill_md="$skill_dir/SKILL.md"
|
||||
|
||||
# Skip self
|
||||
[ "$skill_name" = "metrics" ] && continue
|
||||
|
||||
# Check for metrics-port in SKILL.md
|
||||
if [ -f "$skill_md" ]; then
|
||||
local port=$(yq -r '.metadata.vibestack."metrics-port" // empty' < <(sed -n '/^---$/,/^---$/p' "$skill_md" | sed '1d;$d') 2>/dev/null)
|
||||
if [ -n "$port" ] && [ "$port" != "null" ]; then
|
||||
echo "$skill_name localhost:$port" >> "$targets_file"
|
||||
echo " $skill_name: metrics on port $port"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for METRICS_PORT_skillname env var
|
||||
local env_var="METRICS_PORT_${skill_name//-/_}"
|
||||
local env_port="${!env_var}"
|
||||
if [ -n "$env_port" ]; then
|
||||
echo "$skill_name localhost:$env_port" >> "$targets_file"
|
||||
echo " $skill_name: metrics on port $env_port (from env)"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Targets written to $targets_file"
|
||||
}
|
||||
|
||||
# Configure Caddy if present
|
||||
configure_caddy() {
|
||||
local caddy_dir="$SKILLS_DIR/caddy"
|
||||
local metrics_port="${METRICS_PORT:-9090}"
|
||||
local metrics_domain="${METRICS_DOMAIN:-}"
|
||||
|
||||
if [ ! -d "$caddy_dir" ]; then
|
||||
echo "Caddy not found - metrics on port $metrics_port"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Caddy detected - configuring reverse proxy..."
|
||||
mkdir -p "$caddy_dir/snippets.d"
|
||||
|
||||
local snippet="$caddy_dir/snippets.d/metrics.caddy"
|
||||
|
||||
if [ -n "$metrics_domain" ]; then
|
||||
cat > "$snippet" << EOF
|
||||
# Auto-generated by metrics skill
|
||||
$metrics_domain {
|
||||
reverse_proxy localhost:$metrics_port
|
||||
}
|
||||
EOF
|
||||
echo "Caddy config: $metrics_domain -> localhost:$metrics_port"
|
||||
else
|
||||
cat > "$snippet" << EOF
|
||||
# Auto-generated by metrics skill
|
||||
# Add to your site block:
|
||||
# handle /metrics {
|
||||
# reverse_proxy localhost:$metrics_port
|
||||
# }
|
||||
EOF
|
||||
echo "Caddy snippet created (manual config needed)"
|
||||
fi
|
||||
}
|
||||
|
||||
# Install socat for HTTP server
|
||||
install_socat() {
|
||||
if command -v socat &>/dev/null; then
|
||||
echo "socat already installed"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Installing socat..."
|
||||
apt-get update
|
||||
apt-get install -y socat
|
||||
echo "socat installed"
|
||||
}
|
||||
|
||||
install_node_exporter
|
||||
install_socat
|
||||
discover_metrics_targets
|
||||
configure_caddy
|
||||
|
||||
echo "Metrics setup complete"
|
||||
107
scripts/run.sh
Normal file
107
scripts/run.sh
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(dirname "$0")"
|
||||
METRICS_PORT="${METRICS_PORT:-9090}"
|
||||
NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}"
|
||||
TARGETS_FILE="/tmp/metrics_targets.txt"
|
||||
|
||||
# Start node_exporter in background
|
||||
start_node_exporter() {
|
||||
echo "Starting node_exporter on port $NODE_EXPORTER_PORT..."
|
||||
node_exporter --web.listen-address=":$NODE_EXPORTER_PORT" &
|
||||
NODE_EXPORTER_PID=$!
|
||||
sleep 2
|
||||
|
||||
if ! kill -0 $NODE_EXPORTER_PID 2>/dev/null; then
|
||||
echo "Failed to start node_exporter"
|
||||
exit 1
|
||||
fi
|
||||
echo "node_exporter running (PID $NODE_EXPORTER_PID)"
|
||||
}
|
||||
|
||||
# Create the aggregator CGI script
|
||||
create_aggregator_script() {
|
||||
cat > /tmp/aggregate.sh << 'SCRIPT'
|
||||
#!/bin/bash
|
||||
NODE_EXPORTER_PORT="${NODE_EXPORTER_PORT:-9100}"
|
||||
TARGETS_FILE="/tmp/metrics_targets.txt"
|
||||
|
||||
# Collect all metrics
|
||||
{
|
||||
# System metrics
|
||||
echo "# HELP vibestack_up Whether the metrics aggregator is up"
|
||||
echo "# TYPE vibestack_up gauge"
|
||||
echo "vibestack_up 1"
|
||||
echo ""
|
||||
|
||||
# node_exporter metrics
|
||||
curl -s "http://localhost:$NODE_EXPORTER_PORT/metrics" 2>/dev/null || true
|
||||
|
||||
# Skill metrics
|
||||
if [ -f "$TARGETS_FILE" ]; then
|
||||
while IFS=' ' read -r skill_name target || [ -n "$skill_name" ]; do
|
||||
[[ "$skill_name" =~ ^#.*$ ]] && continue
|
||||
[ -z "$skill_name" ] && continue
|
||||
[ -z "$target" ] && continue
|
||||
|
||||
echo ""
|
||||
echo "# Metrics from skill: $skill_name"
|
||||
curl -s "http://${target}/metrics" 2>/dev/null || true
|
||||
done < "$TARGETS_FILE"
|
||||
fi
|
||||
}
|
||||
SCRIPT
|
||||
chmod +x /tmp/aggregate.sh
|
||||
}
|
||||
|
||||
# Serve metrics using socat
|
||||
serve_metrics() {
|
||||
echo "Starting metrics aggregator on port $METRICS_PORT..."
|
||||
|
||||
if ! command -v socat &>/dev/null; then
|
||||
echo "Installing socat..."
|
||||
apt-get update && apt-get install -y socat
|
||||
fi
|
||||
|
||||
# Use socat to handle HTTP requests
|
||||
exec socat TCP-LISTEN:$METRICS_PORT,reuseaddr,fork EXEC:"/tmp/metrics_handler.sh"
|
||||
}
|
||||
|
||||
# Create HTTP handler script
|
||||
create_handler_script() {
|
||||
cat > /tmp/metrics_handler.sh << 'HANDLER'
|
||||
#!/bin/bash
|
||||
# Read HTTP request (we ignore it, always serve /metrics)
|
||||
read -r request_line
|
||||
while read -r header; do
|
||||
[ "$header" = $'\r' ] && break
|
||||
done
|
||||
|
||||
# Get metrics
|
||||
metrics=$(/tmp/aggregate.sh)
|
||||
content_length=${#metrics}
|
||||
|
||||
# Send HTTP response
|
||||
printf "HTTP/1.1 200 OK\r\n"
|
||||
printf "Content-Type: text/plain; charset=utf-8\r\n"
|
||||
printf "Content-Length: %d\r\n" "$content_length"
|
||||
printf "Connection: close\r\n"
|
||||
printf "\r\n"
|
||||
printf "%s" "$metrics"
|
||||
HANDLER
|
||||
chmod +x /tmp/metrics_handler.sh
|
||||
}
|
||||
|
||||
# Cleanup on exit
|
||||
cleanup() {
|
||||
echo "Shutting down..."
|
||||
[ -n "$NODE_EXPORTER_PID" ] && kill $NODE_EXPORTER_PID 2>/dev/null
|
||||
exit 0
|
||||
}
|
||||
trap cleanup SIGTERM SIGINT
|
||||
|
||||
start_node_exporter
|
||||
create_aggregator_script
|
||||
create_handler_script
|
||||
serve_metrics
|
||||
Reference in New Issue
Block a user