Initial duckdb skill with HTTP API
This commit is contained in:
120
SKILL.md
Normal file
120
SKILL.md
Normal file
@@ -0,0 +1,120 @@
|
||||
---
|
||||
name: duckdb
|
||||
description: DuckDB embedded analytical database with HTTP API
|
||||
metadata:
|
||||
version: "1.0.0"
|
||||
vibestack:
|
||||
main: false
|
||||
---
|
||||
|
||||
# DuckDB Skill
|
||||
|
||||
[DuckDB](https://duckdb.org/) - fast in-process analytical database with a simple HTTP API.
|
||||
|
||||
## Features
|
||||
|
||||
- Embedded OLAP database (no separate server process)
|
||||
- Query CSV, Parquet, JSON files directly
|
||||
- SQL interface via HTTP API
|
||||
- Persistent storage option
|
||||
- Auto-registers with Caddy if present
|
||||
|
||||
## Configuration
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `DUCKDB_PORT` | HTTP API port | `8432` |
|
||||
| `DUCKDB_DATABASE` | Database file path | `:memory:` |
|
||||
| `DUCKDB_DATA_DIR` | Directory for data files | `/data/duckdb` |
|
||||
| `DUCKDB_DOMAIN` | Domain for Caddy auto-config | (none) |
|
||||
| `DUCKDB_READ_ONLY` | Read-only mode | `false` |
|
||||
|
||||
## HTTP API
|
||||
|
||||
### Execute Query
|
||||
|
||||
```bash
|
||||
# Simple query
|
||||
curl -X POST http://localhost:8432/query \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"sql": "SELECT 1 + 1 AS result"}'
|
||||
|
||||
# Query CSV file
|
||||
curl -X POST http://localhost:8432/query \
|
||||
-d '{"sql": "SELECT * FROM read_csv_auto(\"/data/duckdb/sales.csv\") LIMIT 10"}'
|
||||
|
||||
# Query Parquet file
|
||||
curl -X POST http://localhost:8432/query \
|
||||
-d '{"sql": "SELECT * FROM read_parquet(\"/data/duckdb/events.parquet\")"}'
|
||||
```
|
||||
|
||||
### Response Format
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"columns": ["result"],
|
||||
"rows": [[2]],
|
||||
"row_count": 1,
|
||||
"time_ms": 0.5
|
||||
}
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Analytics on Log Data
|
||||
|
||||
```sql
|
||||
-- Query JSON logs
|
||||
SELECT
|
||||
json_extract_string(line, '$.level') as level,
|
||||
count(*) as count
|
||||
FROM read_json_auto('/var/log/supervisor/*.log')
|
||||
GROUP BY level;
|
||||
```
|
||||
|
||||
### Query Remote Data
|
||||
|
||||
```sql
|
||||
-- Query remote Parquet (S3, HTTP)
|
||||
SELECT * FROM read_parquet('https://example.com/data.parquet');
|
||||
|
||||
-- Query remote CSV
|
||||
SELECT * FROM read_csv_auto('https://example.com/data.csv');
|
||||
```
|
||||
|
||||
### Create Persistent Tables
|
||||
|
||||
```sql
|
||||
-- Create table
|
||||
CREATE TABLE events AS
|
||||
SELECT * FROM read_parquet('/data/duckdb/events.parquet');
|
||||
|
||||
-- Query table
|
||||
SELECT date_trunc('hour', timestamp) as hour, count(*)
|
||||
FROM events
|
||||
GROUP BY 1 ORDER BY 1;
|
||||
```
|
||||
|
||||
## CLI Access
|
||||
|
||||
```bash
|
||||
# Interactive shell
|
||||
duckdb /data/duckdb/analytics.db
|
||||
|
||||
# One-off query
|
||||
duckdb -c "SELECT * FROM 'data.csv' LIMIT 5"
|
||||
```
|
||||
|
||||
## Extensions
|
||||
|
||||
DuckDB supports extensions for additional functionality:
|
||||
|
||||
```sql
|
||||
-- Install and load extensions
|
||||
INSTALL httpfs;
|
||||
LOAD httpfs;
|
||||
|
||||
-- Now query S3/HTTP directly
|
||||
SELECT * FROM read_parquet('s3://bucket/data.parquet');
|
||||
```
|
||||
100
scripts/autorun.sh
Normal file
100
scripts/autorun.sh
Normal file
@@ -0,0 +1,100 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
DUCKDB_VERSION="${DUCKDB_VERSION:-1.1.3}"
|
||||
SKILLS_DIR="${SKILLS_DIR:-/skills}"
|
||||
DUCKDB_DATA_DIR="${DUCKDB_DATA_DIR:-/data/duckdb}"
|
||||
|
||||
# Detect architecture
|
||||
get_arch() {
|
||||
case "$(uname -m)" in
|
||||
x86_64) echo "amd64" ;;
|
||||
aarch64) echo "aarch64" ;;
|
||||
*)
|
||||
echo "Unsupported architecture: $(uname -m)" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Install DuckDB CLI
|
||||
install_duckdb() {
|
||||
if command -v duckdb &>/dev/null; then
|
||||
echo "duckdb already installed: $(duckdb --version)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Installing DuckDB v${DUCKDB_VERSION}..."
|
||||
local arch=$(get_arch)
|
||||
local url="https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-${arch}.zip"
|
||||
|
||||
apt-get update && apt-get install -y unzip
|
||||
|
||||
curl -sSL "$url" -o /tmp/duckdb.zip
|
||||
unzip -o /tmp/duckdb.zip -d /tmp
|
||||
mv /tmp/duckdb /usr/local/bin/duckdb
|
||||
chmod +x /usr/local/bin/duckdb
|
||||
rm /tmp/duckdb.zip
|
||||
|
||||
echo "DuckDB installed: $(duckdb --version)"
|
||||
}
|
||||
|
||||
# Install socat for HTTP server
|
||||
install_socat() {
|
||||
if command -v socat &>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Installing socat..."
|
||||
apt-get update && apt-get install -y socat jq
|
||||
}
|
||||
|
||||
# Setup directories
|
||||
setup_dirs() {
|
||||
mkdir -p "$DUCKDB_DATA_DIR"
|
||||
echo "Data directory: $DUCKDB_DATA_DIR"
|
||||
}
|
||||
|
||||
# Configure Caddy if present
|
||||
configure_caddy() {
|
||||
local caddy_dir="$SKILLS_DIR/caddy"
|
||||
local duckdb_port="${DUCKDB_PORT:-8432}"
|
||||
local duckdb_domain="${DUCKDB_DOMAIN:-}"
|
||||
|
||||
if [ ! -d "$caddy_dir" ]; then
|
||||
echo "Caddy not found - DuckDB API on port $duckdb_port"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Caddy detected - configuring reverse proxy..."
|
||||
mkdir -p "$caddy_dir/snippets.d"
|
||||
|
||||
local snippet="$caddy_dir/snippets.d/duckdb.caddy"
|
||||
|
||||
if [ -n "$duckdb_domain" ]; then
|
||||
cat > "$snippet" << EOF
|
||||
# Auto-generated by duckdb skill
|
||||
$duckdb_domain {
|
||||
reverse_proxy localhost:$duckdb_port
|
||||
}
|
||||
EOF
|
||||
echo "Caddy config: $duckdb_domain -> localhost:$duckdb_port"
|
||||
else
|
||||
cat > "$snippet" << EOF
|
||||
# Auto-generated by duckdb skill
|
||||
# Add to your site block:
|
||||
# handle /duckdb/* {
|
||||
# uri strip_prefix /duckdb
|
||||
# reverse_proxy localhost:$duckdb_port
|
||||
# }
|
||||
EOF
|
||||
echo "Caddy snippet created (manual config needed)"
|
||||
fi
|
||||
}
|
||||
|
||||
install_duckdb
|
||||
install_socat
|
||||
setup_dirs
|
||||
configure_caddy
|
||||
|
||||
echo "DuckDB setup complete"
|
||||
129
scripts/run.sh
Normal file
129
scripts/run.sh
Normal file
@@ -0,0 +1,129 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
DUCKDB_PORT="${DUCKDB_PORT:-8432}"
|
||||
DUCKDB_DATABASE="${DUCKDB_DATABASE:-:memory:}"
|
||||
DUCKDB_DATA_DIR="${DUCKDB_DATA_DIR:-/data/duckdb}"
|
||||
DUCKDB_READ_ONLY="${DUCKDB_READ_ONLY:-false}"
|
||||
|
||||
# Create query handler script
|
||||
create_handler() {
|
||||
cat > /tmp/duckdb_handler.sh << 'HANDLER'
|
||||
#!/bin/bash
|
||||
|
||||
DUCKDB_DATABASE="${DUCKDB_DATABASE:-:memory:}"
|
||||
DUCKDB_READ_ONLY="${DUCKDB_READ_ONLY:-false}"
|
||||
|
||||
# Read HTTP request
|
||||
read -r request_line
|
||||
method=$(echo "$request_line" | cut -d' ' -f1)
|
||||
path=$(echo "$request_line" | cut -d' ' -f2)
|
||||
|
||||
# Read headers
|
||||
content_length=0
|
||||
while read -r header; do
|
||||
header=$(echo "$header" | tr -d '\r')
|
||||
[ -z "$header" ] && break
|
||||
if [[ "$header" =~ ^[Cc]ontent-[Ll]ength:\ *([0-9]+) ]]; then
|
||||
content_length="${BASH_REMATCH[1]}"
|
||||
fi
|
||||
done
|
||||
|
||||
# Read body
|
||||
body=""
|
||||
if [ "$content_length" -gt 0 ]; then
|
||||
body=$(head -c "$content_length")
|
||||
fi
|
||||
|
||||
# Route request
|
||||
send_response() {
|
||||
local status="$1"
|
||||
local content_type="$2"
|
||||
local body="$3"
|
||||
local body_length=${#body}
|
||||
|
||||
printf "HTTP/1.1 %s\r\n" "$status"
|
||||
printf "Content-Type: %s\r\n" "$content_type"
|
||||
printf "Content-Length: %d\r\n" "$body_length"
|
||||
printf "Connection: close\r\n"
|
||||
printf "\r\n"
|
||||
printf "%s" "$body"
|
||||
}
|
||||
|
||||
# Health check
|
||||
if [ "$path" = "/health" ]; then
|
||||
send_response "200 OK" "application/json" '{"status":"ok"}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Query endpoint
|
||||
if [ "$path" = "/query" ] && [ "$method" = "POST" ]; then
|
||||
# Extract SQL from JSON body
|
||||
sql=$(echo "$body" | jq -r '.sql // empty')
|
||||
|
||||
if [ -z "$sql" ]; then
|
||||
send_response "400 Bad Request" "application/json" '{"error":"Missing sql field"}'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Build duckdb command
|
||||
duckdb_args=()
|
||||
if [ "$DUCKDB_READ_ONLY" = "true" ]; then
|
||||
duckdb_args+=("-readonly")
|
||||
fi
|
||||
duckdb_args+=("-json")
|
||||
duckdb_args+=("$DUCKDB_DATABASE")
|
||||
|
||||
# Execute query
|
||||
start_time=$(date +%s%3N)
|
||||
result=$(echo "$sql" | duckdb "${duckdb_args[@]}" 2>&1) || {
|
||||
error_msg=$(echo "$result" | jq -Rs '.')
|
||||
send_response "400 Bad Request" "application/json" "{\"success\":false,\"error\":$error_msg}"
|
||||
exit 0
|
||||
}
|
||||
end_time=$(date +%s%3N)
|
||||
time_ms=$((end_time - start_time))
|
||||
|
||||
# Parse result
|
||||
if [ -z "$result" ] || [ "$result" = "[]" ]; then
|
||||
send_response "200 OK" "application/json" "{\"success\":true,\"rows\":[],\"row_count\":0,\"time_ms\":$time_ms}"
|
||||
else
|
||||
row_count=$(echo "$result" | jq 'length')
|
||||
columns=$(echo "$result" | jq -c '.[0] | keys')
|
||||
rows=$(echo "$result" | jq -c '[.[] | [.[]]]')
|
||||
|
||||
response=$(jq -n \
|
||||
--argjson columns "$columns" \
|
||||
--argjson rows "$rows" \
|
||||
--argjson row_count "$row_count" \
|
||||
--argjson time_ms "$time_ms" \
|
||||
'{success:true, columns:$columns, rows:$rows, row_count:$row_count, time_ms:$time_ms}')
|
||||
|
||||
send_response "200 OK" "application/json" "$response"
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Not found
|
||||
send_response "404 Not Found" "application/json" '{"error":"Not found. Use POST /query"}'
|
||||
HANDLER
|
||||
|
||||
chmod +x /tmp/duckdb_handler.sh
|
||||
|
||||
# Export env vars for handler
|
||||
export DUCKDB_DATABASE
|
||||
export DUCKDB_READ_ONLY
|
||||
}
|
||||
|
||||
# Serve HTTP API
|
||||
serve_api() {
|
||||
echo "Starting DuckDB HTTP API on port $DUCKDB_PORT..."
|
||||
echo "Database: $DUCKDB_DATABASE"
|
||||
echo "Data directory: $DUCKDB_DATA_DIR"
|
||||
[ "$DUCKDB_READ_ONLY" = "true" ] && echo "Mode: read-only"
|
||||
|
||||
exec socat TCP-LISTEN:$DUCKDB_PORT,reuseaddr,fork EXEC:"/tmp/duckdb_handler.sh"
|
||||
}
|
||||
|
||||
create_handler
|
||||
serve_api
|
||||
Reference in New Issue
Block a user