Initial duckdb skill with HTTP API

This commit is contained in:
Azat
2026-02-02 22:37:55 +01:00
commit 4f754c7234
3 changed files with 349 additions and 0 deletions

120
SKILL.md Normal file
View File

@@ -0,0 +1,120 @@
---
name: duckdb
description: DuckDB embedded analytical database with HTTP API
metadata:
version: "1.0.0"
vibestack:
main: false
---
# DuckDB Skill
[DuckDB](https://duckdb.org/) - fast in-process analytical database with a simple HTTP API.
## Features
- Embedded OLAP database (no separate server process)
- Query CSV, Parquet, JSON files directly
- SQL interface via HTTP API
- Persistent storage option
- Auto-registers with Caddy if present
## Configuration
| Variable | Description | Default |
|----------|-------------|---------|
| `DUCKDB_PORT` | HTTP API port | `8432` |
| `DUCKDB_DATABASE` | Database file path | `:memory:` |
| `DUCKDB_DATA_DIR` | Directory for data files | `/data/duckdb` |
| `DUCKDB_DOMAIN` | Domain for Caddy auto-config | (none) |
| `DUCKDB_READ_ONLY` | Read-only mode | `false` |
## HTTP API
### Execute Query
```bash
# Simple query
curl -X POST http://localhost:8432/query \
-H "Content-Type: application/json" \
-d '{"sql": "SELECT 1 + 1 AS result"}'
# Query CSV file
curl -X POST http://localhost:8432/query \
-d '{"sql": "SELECT * FROM read_csv_auto(\"/data/duckdb/sales.csv\") LIMIT 10"}'
# Query Parquet file
curl -X POST http://localhost:8432/query \
-d '{"sql": "SELECT * FROM read_parquet(\"/data/duckdb/events.parquet\")"}'
```
### Response Format
```json
{
"success": true,
"columns": ["result"],
"rows": [[2]],
"row_count": 1,
"time_ms": 0.5
}
```
## Use Cases
### Analytics on Log Data
```sql
-- Query JSON logs
SELECT
json_extract_string(line, '$.level') as level,
count(*) as count
FROM read_json_auto('/var/log/supervisor/*.log')
GROUP BY level;
```
### Query Remote Data
```sql
-- Query remote Parquet (S3, HTTP)
SELECT * FROM read_parquet('https://example.com/data.parquet');
-- Query remote CSV
SELECT * FROM read_csv_auto('https://example.com/data.csv');
```
### Create Persistent Tables
```sql
-- Create table
CREATE TABLE events AS
SELECT * FROM read_parquet('/data/duckdb/events.parquet');
-- Query table
SELECT date_trunc('hour', timestamp) as hour, count(*)
FROM events
GROUP BY 1 ORDER BY 1;
```
## CLI Access
```bash
# Interactive shell
duckdb /data/duckdb/analytics.db
# One-off query
duckdb -c "SELECT * FROM 'data.csv' LIMIT 5"
```
## Extensions
DuckDB supports extensions for additional functionality:
```sql
-- Install and load extensions
INSTALL httpfs;
LOAD httpfs;
-- Now query S3/HTTP directly
SELECT * FROM read_parquet('s3://bucket/data.parquet');
```

100
scripts/autorun.sh Normal file
View File

@@ -0,0 +1,100 @@
#!/bin/bash
set -e
DUCKDB_VERSION="${DUCKDB_VERSION:-1.1.3}"
SKILLS_DIR="${SKILLS_DIR:-/skills}"
DUCKDB_DATA_DIR="${DUCKDB_DATA_DIR:-/data/duckdb}"
# Detect architecture
get_arch() {
case "$(uname -m)" in
x86_64) echo "amd64" ;;
aarch64) echo "aarch64" ;;
*)
echo "Unsupported architecture: $(uname -m)" >&2
exit 1
;;
esac
}
# Install DuckDB CLI
install_duckdb() {
if command -v duckdb &>/dev/null; then
echo "duckdb already installed: $(duckdb --version)"
return 0
fi
echo "Installing DuckDB v${DUCKDB_VERSION}..."
local arch=$(get_arch)
local url="https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-${arch}.zip"
apt-get update && apt-get install -y unzip
curl -sSL "$url" -o /tmp/duckdb.zip
unzip -o /tmp/duckdb.zip -d /tmp
mv /tmp/duckdb /usr/local/bin/duckdb
chmod +x /usr/local/bin/duckdb
rm /tmp/duckdb.zip
echo "DuckDB installed: $(duckdb --version)"
}
# Install socat for HTTP server
install_socat() {
if command -v socat &>/dev/null; then
return 0
fi
echo "Installing socat..."
apt-get update && apt-get install -y socat jq
}
# Setup directories
setup_dirs() {
mkdir -p "$DUCKDB_DATA_DIR"
echo "Data directory: $DUCKDB_DATA_DIR"
}
# Configure Caddy if present
configure_caddy() {
local caddy_dir="$SKILLS_DIR/caddy"
local duckdb_port="${DUCKDB_PORT:-8432}"
local duckdb_domain="${DUCKDB_DOMAIN:-}"
if [ ! -d "$caddy_dir" ]; then
echo "Caddy not found - DuckDB API on port $duckdb_port"
return 0
fi
echo "Caddy detected - configuring reverse proxy..."
mkdir -p "$caddy_dir/snippets.d"
local snippet="$caddy_dir/snippets.d/duckdb.caddy"
if [ -n "$duckdb_domain" ]; then
cat > "$snippet" << EOF
# Auto-generated by duckdb skill
$duckdb_domain {
reverse_proxy localhost:$duckdb_port
}
EOF
echo "Caddy config: $duckdb_domain -> localhost:$duckdb_port"
else
cat > "$snippet" << EOF
# Auto-generated by duckdb skill
# Add to your site block:
# handle /duckdb/* {
# uri strip_prefix /duckdb
# reverse_proxy localhost:$duckdb_port
# }
EOF
echo "Caddy snippet created (manual config needed)"
fi
}
install_duckdb
install_socat
setup_dirs
configure_caddy
echo "DuckDB setup complete"

129
scripts/run.sh Normal file
View File

@@ -0,0 +1,129 @@
#!/bin/bash
set -e
DUCKDB_PORT="${DUCKDB_PORT:-8432}"
DUCKDB_DATABASE="${DUCKDB_DATABASE:-:memory:}"
DUCKDB_DATA_DIR="${DUCKDB_DATA_DIR:-/data/duckdb}"
DUCKDB_READ_ONLY="${DUCKDB_READ_ONLY:-false}"
# Create query handler script
create_handler() {
cat > /tmp/duckdb_handler.sh << 'HANDLER'
#!/bin/bash
DUCKDB_DATABASE="${DUCKDB_DATABASE:-:memory:}"
DUCKDB_READ_ONLY="${DUCKDB_READ_ONLY:-false}"
# Read HTTP request
read -r request_line
method=$(echo "$request_line" | cut -d' ' -f1)
path=$(echo "$request_line" | cut -d' ' -f2)
# Read headers
content_length=0
while read -r header; do
header=$(echo "$header" | tr -d '\r')
[ -z "$header" ] && break
if [[ "$header" =~ ^[Cc]ontent-[Ll]ength:\ *([0-9]+) ]]; then
content_length="${BASH_REMATCH[1]}"
fi
done
# Read body
body=""
if [ "$content_length" -gt 0 ]; then
body=$(head -c "$content_length")
fi
# Route request
send_response() {
local status="$1"
local content_type="$2"
local body="$3"
local body_length=${#body}
printf "HTTP/1.1 %s\r\n" "$status"
printf "Content-Type: %s\r\n" "$content_type"
printf "Content-Length: %d\r\n" "$body_length"
printf "Connection: close\r\n"
printf "\r\n"
printf "%s" "$body"
}
# Health check
if [ "$path" = "/health" ]; then
send_response "200 OK" "application/json" '{"status":"ok"}'
exit 0
fi
# Query endpoint
if [ "$path" = "/query" ] && [ "$method" = "POST" ]; then
# Extract SQL from JSON body
sql=$(echo "$body" | jq -r '.sql // empty')
if [ -z "$sql" ]; then
send_response "400 Bad Request" "application/json" '{"error":"Missing sql field"}'
exit 0
fi
# Build duckdb command
duckdb_args=()
if [ "$DUCKDB_READ_ONLY" = "true" ]; then
duckdb_args+=("-readonly")
fi
duckdb_args+=("-json")
duckdb_args+=("$DUCKDB_DATABASE")
# Execute query
start_time=$(date +%s%3N)
result=$(echo "$sql" | duckdb "${duckdb_args[@]}" 2>&1) || {
error_msg=$(echo "$result" | jq -Rs '.')
send_response "400 Bad Request" "application/json" "{\"success\":false,\"error\":$error_msg}"
exit 0
}
end_time=$(date +%s%3N)
time_ms=$((end_time - start_time))
# Parse result
if [ -z "$result" ] || [ "$result" = "[]" ]; then
send_response "200 OK" "application/json" "{\"success\":true,\"rows\":[],\"row_count\":0,\"time_ms\":$time_ms}"
else
row_count=$(echo "$result" | jq 'length')
columns=$(echo "$result" | jq -c '.[0] | keys')
rows=$(echo "$result" | jq -c '[.[] | [.[]]]')
response=$(jq -n \
--argjson columns "$columns" \
--argjson rows "$rows" \
--argjson row_count "$row_count" \
--argjson time_ms "$time_ms" \
'{success:true, columns:$columns, rows:$rows, row_count:$row_count, time_ms:$time_ms}')
send_response "200 OK" "application/json" "$response"
fi
exit 0
fi
# Not found
send_response "404 Not Found" "application/json" '{"error":"Not found. Use POST /query"}'
HANDLER
chmod +x /tmp/duckdb_handler.sh
# Export env vars for handler
export DUCKDB_DATABASE
export DUCKDB_READ_ONLY
}
# Serve HTTP API
serve_api() {
echo "Starting DuckDB HTTP API on port $DUCKDB_PORT..."
echo "Database: $DUCKDB_DATABASE"
echo "Data directory: $DUCKDB_DATA_DIR"
[ "$DUCKDB_READ_ONLY" = "true" ] && echo "Mode: read-only"
exec socat TCP-LISTEN:$DUCKDB_PORT,reuseaddr,fork EXEC:"/tmp/duckdb_handler.sh"
}
create_handler
serve_api