commit 4f754c7234eb20d98c66317e9d08cb53e8f469a6 Author: Azat Date: Mon Feb 2 22:37:55 2026 +0100 Initial duckdb skill with HTTP API diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..75b5b66 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,120 @@ +--- +name: duckdb +description: DuckDB embedded analytical database with HTTP API +metadata: + version: "1.0.0" + vibestack: + main: false +--- + +# DuckDB Skill + +[DuckDB](https://duckdb.org/) - fast in-process analytical database with a simple HTTP API. + +## Features + +- Embedded OLAP database (no separate server process) +- Query CSV, Parquet, JSON files directly +- SQL interface via HTTP API +- Persistent storage option +- Auto-registers with Caddy if present + +## Configuration + +| Variable | Description | Default | +|----------|-------------|---------| +| `DUCKDB_PORT` | HTTP API port | `8432` | +| `DUCKDB_DATABASE` | Database file path | `:memory:` | +| `DUCKDB_DATA_DIR` | Directory for data files | `/data/duckdb` | +| `DUCKDB_DOMAIN` | Domain for Caddy auto-config | (none) | +| `DUCKDB_READ_ONLY` | Read-only mode | `false` | + +## HTTP API + +### Execute Query + +```bash +# Simple query +curl -X POST http://localhost:8432/query \ + -H "Content-Type: application/json" \ + -d '{"sql": "SELECT 1 + 1 AS result"}' + +# Query CSV file +curl -X POST http://localhost:8432/query \ + -d '{"sql": "SELECT * FROM read_csv_auto(\"/data/duckdb/sales.csv\") LIMIT 10"}' + +# Query Parquet file +curl -X POST http://localhost:8432/query \ + -d '{"sql": "SELECT * FROM read_parquet(\"/data/duckdb/events.parquet\")"}' +``` + +### Response Format + +```json +{ + "success": true, + "columns": ["result"], + "rows": [[2]], + "row_count": 1, + "time_ms": 0.5 +} +``` + +## Use Cases + +### Analytics on Log Data + +```sql +-- Query JSON logs +SELECT + json_extract_string(line, '$.level') as level, + count(*) as count +FROM read_json_auto('/var/log/supervisor/*.log') +GROUP BY level; +``` + +### Query Remote Data + +```sql +-- Query remote Parquet (S3, HTTP) +SELECT * FROM read_parquet('https://example.com/data.parquet'); + +-- Query remote CSV +SELECT * FROM read_csv_auto('https://example.com/data.csv'); +``` + +### Create Persistent Tables + +```sql +-- Create table +CREATE TABLE events AS +SELECT * FROM read_parquet('/data/duckdb/events.parquet'); + +-- Query table +SELECT date_trunc('hour', timestamp) as hour, count(*) +FROM events +GROUP BY 1 ORDER BY 1; +``` + +## CLI Access + +```bash +# Interactive shell +duckdb /data/duckdb/analytics.db + +# One-off query +duckdb -c "SELECT * FROM 'data.csv' LIMIT 5" +``` + +## Extensions + +DuckDB supports extensions for additional functionality: + +```sql +-- Install and load extensions +INSTALL httpfs; +LOAD httpfs; + +-- Now query S3/HTTP directly +SELECT * FROM read_parquet('s3://bucket/data.parquet'); +``` diff --git a/scripts/autorun.sh b/scripts/autorun.sh new file mode 100644 index 0000000..f1b8d0d --- /dev/null +++ b/scripts/autorun.sh @@ -0,0 +1,100 @@ +#!/bin/bash +set -e + +DUCKDB_VERSION="${DUCKDB_VERSION:-1.1.3}" +SKILLS_DIR="${SKILLS_DIR:-/skills}" +DUCKDB_DATA_DIR="${DUCKDB_DATA_DIR:-/data/duckdb}" + +# Detect architecture +get_arch() { + case "$(uname -m)" in + x86_64) echo "amd64" ;; + aarch64) echo "aarch64" ;; + *) + echo "Unsupported architecture: $(uname -m)" >&2 + exit 1 + ;; + esac +} + +# Install DuckDB CLI +install_duckdb() { + if command -v duckdb &>/dev/null; then + echo "duckdb already installed: $(duckdb --version)" + return 0 + fi + + echo "Installing DuckDB v${DUCKDB_VERSION}..." + local arch=$(get_arch) + local url="https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-${arch}.zip" + + apt-get update && apt-get install -y unzip + + curl -sSL "$url" -o /tmp/duckdb.zip + unzip -o /tmp/duckdb.zip -d /tmp + mv /tmp/duckdb /usr/local/bin/duckdb + chmod +x /usr/local/bin/duckdb + rm /tmp/duckdb.zip + + echo "DuckDB installed: $(duckdb --version)" +} + +# Install socat for HTTP server +install_socat() { + if command -v socat &>/dev/null; then + return 0 + fi + + echo "Installing socat..." + apt-get update && apt-get install -y socat jq +} + +# Setup directories +setup_dirs() { + mkdir -p "$DUCKDB_DATA_DIR" + echo "Data directory: $DUCKDB_DATA_DIR" +} + +# Configure Caddy if present +configure_caddy() { + local caddy_dir="$SKILLS_DIR/caddy" + local duckdb_port="${DUCKDB_PORT:-8432}" + local duckdb_domain="${DUCKDB_DOMAIN:-}" + + if [ ! -d "$caddy_dir" ]; then + echo "Caddy not found - DuckDB API on port $duckdb_port" + return 0 + fi + + echo "Caddy detected - configuring reverse proxy..." + mkdir -p "$caddy_dir/snippets.d" + + local snippet="$caddy_dir/snippets.d/duckdb.caddy" + + if [ -n "$duckdb_domain" ]; then + cat > "$snippet" << EOF +# Auto-generated by duckdb skill +$duckdb_domain { + reverse_proxy localhost:$duckdb_port +} +EOF + echo "Caddy config: $duckdb_domain -> localhost:$duckdb_port" + else + cat > "$snippet" << EOF +# Auto-generated by duckdb skill +# Add to your site block: +# handle /duckdb/* { +# uri strip_prefix /duckdb +# reverse_proxy localhost:$duckdb_port +# } +EOF + echo "Caddy snippet created (manual config needed)" + fi +} + +install_duckdb +install_socat +setup_dirs +configure_caddy + +echo "DuckDB setup complete" diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100644 index 0000000..5c2e524 --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,129 @@ +#!/bin/bash +set -e + +DUCKDB_PORT="${DUCKDB_PORT:-8432}" +DUCKDB_DATABASE="${DUCKDB_DATABASE:-:memory:}" +DUCKDB_DATA_DIR="${DUCKDB_DATA_DIR:-/data/duckdb}" +DUCKDB_READ_ONLY="${DUCKDB_READ_ONLY:-false}" + +# Create query handler script +create_handler() { + cat > /tmp/duckdb_handler.sh << 'HANDLER' +#!/bin/bash + +DUCKDB_DATABASE="${DUCKDB_DATABASE:-:memory:}" +DUCKDB_READ_ONLY="${DUCKDB_READ_ONLY:-false}" + +# Read HTTP request +read -r request_line +method=$(echo "$request_line" | cut -d' ' -f1) +path=$(echo "$request_line" | cut -d' ' -f2) + +# Read headers +content_length=0 +while read -r header; do + header=$(echo "$header" | tr -d '\r') + [ -z "$header" ] && break + if [[ "$header" =~ ^[Cc]ontent-[Ll]ength:\ *([0-9]+) ]]; then + content_length="${BASH_REMATCH[1]}" + fi +done + +# Read body +body="" +if [ "$content_length" -gt 0 ]; then + body=$(head -c "$content_length") +fi + +# Route request +send_response() { + local status="$1" + local content_type="$2" + local body="$3" + local body_length=${#body} + + printf "HTTP/1.1 %s\r\n" "$status" + printf "Content-Type: %s\r\n" "$content_type" + printf "Content-Length: %d\r\n" "$body_length" + printf "Connection: close\r\n" + printf "\r\n" + printf "%s" "$body" +} + +# Health check +if [ "$path" = "/health" ]; then + send_response "200 OK" "application/json" '{"status":"ok"}' + exit 0 +fi + +# Query endpoint +if [ "$path" = "/query" ] && [ "$method" = "POST" ]; then + # Extract SQL from JSON body + sql=$(echo "$body" | jq -r '.sql // empty') + + if [ -z "$sql" ]; then + send_response "400 Bad Request" "application/json" '{"error":"Missing sql field"}' + exit 0 + fi + + # Build duckdb command + duckdb_args=() + if [ "$DUCKDB_READ_ONLY" = "true" ]; then + duckdb_args+=("-readonly") + fi + duckdb_args+=("-json") + duckdb_args+=("$DUCKDB_DATABASE") + + # Execute query + start_time=$(date +%s%3N) + result=$(echo "$sql" | duckdb "${duckdb_args[@]}" 2>&1) || { + error_msg=$(echo "$result" | jq -Rs '.') + send_response "400 Bad Request" "application/json" "{\"success\":false,\"error\":$error_msg}" + exit 0 + } + end_time=$(date +%s%3N) + time_ms=$((end_time - start_time)) + + # Parse result + if [ -z "$result" ] || [ "$result" = "[]" ]; then + send_response "200 OK" "application/json" "{\"success\":true,\"rows\":[],\"row_count\":0,\"time_ms\":$time_ms}" + else + row_count=$(echo "$result" | jq 'length') + columns=$(echo "$result" | jq -c '.[0] | keys') + rows=$(echo "$result" | jq -c '[.[] | [.[]]]') + + response=$(jq -n \ + --argjson columns "$columns" \ + --argjson rows "$rows" \ + --argjson row_count "$row_count" \ + --argjson time_ms "$time_ms" \ + '{success:true, columns:$columns, rows:$rows, row_count:$row_count, time_ms:$time_ms}') + + send_response "200 OK" "application/json" "$response" + fi + exit 0 +fi + +# Not found +send_response "404 Not Found" "application/json" '{"error":"Not found. Use POST /query"}' +HANDLER + + chmod +x /tmp/duckdb_handler.sh + + # Export env vars for handler + export DUCKDB_DATABASE + export DUCKDB_READ_ONLY +} + +# Serve HTTP API +serve_api() { + echo "Starting DuckDB HTTP API on port $DUCKDB_PORT..." + echo "Database: $DUCKDB_DATABASE" + echo "Data directory: $DUCKDB_DATA_DIR" + [ "$DUCKDB_READ_ONLY" = "true" ] && echo "Mode: read-only" + + exec socat TCP-LISTEN:$DUCKDB_PORT,reuseaddr,fork EXEC:"/tmp/duckdb_handler.sh" +} + +create_handler +serve_api