From bf5ba832ee676fb4e773d55866a0738a06af0ed9 Mon Sep 17 00:00:00 2001 From: Azat Date: Mon, 2 Feb 2026 23:48:37 +0100 Subject: [PATCH] Initial memory skill - agent memory with pgvector semantic search --- SKILL.md | 186 +++++++++++++++++++++ scripts/autorun.sh | 142 ++++++++++++++++ scripts/run.sh | 22 +++ src/api.py | 394 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 744 insertions(+) create mode 100644 SKILL.md create mode 100644 scripts/autorun.sh create mode 100644 scripts/run.sh create mode 100644 src/api.py diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..aeb7c04 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,186 @@ +--- +name: memory +description: Agent memory with semantic search via pgvector +metadata: + version: "1.0.0" + vibestack: + main: false + requires: + - postgres +--- + +# Memory Skill + +Lightweight agent memory with semantic search using PostgreSQL + pgvector. + +## What Gets Stored + +| Type | Description | Example | +|------|-------------|---------| +| **conversation** | Chat messages with context | User questions, agent responses | +| **execution** | Task runs and outcomes | Commands run, success/failure, duration | +| **finding** | Discoveries and solutions | "Port 8080 was blocked by firewall" | +| **memory** | Persistent agent knowledge | Preferences, learned patterns | + +## Features + +- Semantic search across all memory types +- Automatic embedding on insert +- Configurable retention policies +- Simple REST API for CRUD + search +- Lightweight local embeddings (~90MB model) + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `MEMORY_PORT` | `8081` | API port | +| `MEMORY_MODEL` | `all-MiniLM-L6-v2` | Sentence-transformers model | +| `MEMORY_RETENTION_DAYS` | `30` | Auto-delete after N days (0 = forever) | +| `MEMORY_MAX_RESULTS` | `10` | Default search results limit | + +## API + +### Store Memory + +```bash +# Store a finding +curl -X POST http://localhost:8081/memory \ + -H "Content-Type: application/json" \ + -d '{ + "type": "finding", + "content": "Redis was OOM - increased REDIS_MAXMEMORY to 256mb", + "metadata": {"skill": "redis", "severity": "resolved"} + }' + +# Store conversation +curl -X POST http://localhost:8081/memory \ + -d '{ + "type": "conversation", + "content": "User asked how to check disk space. Showed df -h command.", + "metadata": {"session_id": "abc123"} + }' +``` + +### Search Memory + +```bash +# Semantic search +curl "http://localhost:8081/search?q=redis+memory+issue&limit=5" + +# Filter by type +curl "http://localhost:8081/search?q=disk+space&type=conversation" + +# Filter by metadata +curl "http://localhost:8081/search?q=error&metadata.skill=postgres" +``` + +### Response Format + +```json +{ + "results": [ + { + "id": "01HQXYZ...", + "type": "finding", + "content": "Redis was OOM - increased REDIS_MAXMEMORY to 256mb", + "metadata": {"skill": "redis"}, + "similarity": 0.87, + "created_at": "2024-01-15T10:30:00Z" + } + ] +} +``` + +### List Recent + +```bash +# Recent memories by type +curl "http://localhost:8081/memory?type=finding&limit=20" + +# All recent +curl "http://localhost:8081/memory?limit=50" +``` + +### Delete + +```bash +# Delete specific memory +curl -X DELETE "http://localhost:8081/memory/01HQXYZ..." + +# Bulk delete by type older than N days +curl -X DELETE "http://localhost:8081/memory?type=execution&older_than=7d" +``` + +## Database Schema + +```sql +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE memories ( + id TEXT PRIMARY KEY, + type TEXT NOT NULL, + content TEXT NOT NULL, + embedding vector(384), + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX idx_memories_type ON memories(type); +CREATE INDEX idx_memories_created ON memories(created_at); +CREATE INDEX idx_memories_embedding ON memories USING ivfflat (embedding vector_cosine_ops); +``` + +## Usage Patterns + +### Agent Self-Reflection + +Before starting a task, search for relevant past experiences: + +```bash +# "Have I done something like this before?" +curl "http://localhost:8081/search?q=deploy+nodejs+application" +``` + +### Error Resolution + +When encountering an error, check if it's been seen before: + +```bash +curl "http://localhost:8081/search?q=connection+refused+port+5432&type=finding" +``` + +### Conversation Context + +Recall previous discussions with user: + +```bash +curl "http://localhost:8081/search?q=user+preferences+formatting&type=conversation" +``` + +## Embedding Model + +Uses `all-MiniLM-L6-v2` from sentence-transformers: +- 384 dimensions +- ~90MB download +- Fast inference (~5ms per embedding) +- Good quality for short texts + +For even smaller footprint, set `MEMORY_MODEL=all-MiniLM-L3-v2` (~60MB, slightly lower quality). + +## Integration + +Other skills can store memories by POSTing to the API: + +```bash +# In any skill's script +store_memory() { + curl -s -X POST http://localhost:8081/memory \ + -H "Content-Type: application/json" \ + -d "{\"type\":\"$1\",\"content\":\"$2\",\"metadata\":$3}" +} + +store_memory "execution" "Backup completed successfully" '{"skill":"backup","duration":45}' +``` diff --git a/scripts/autorun.sh b/scripts/autorun.sh new file mode 100644 index 0000000..bca59e6 --- /dev/null +++ b/scripts/autorun.sh @@ -0,0 +1,142 @@ +#!/bin/bash +set -e + +POSTGRES_VERSION="${POSTGRES_VERSION:-16}" +MEMORY_MODEL="${MEMORY_MODEL:-all-MiniLM-L6-v2}" + +# Install Python if not present +install_python() { + if command -v python3 &>/dev/null; then + echo "Python already installed: $(python3 --version)" + return 0 + fi + + echo "Installing Python..." + apt-get update + apt-get install -y python3 python3-pip python3-venv + + echo "Python installed: $(python3 --version)" +} + +# Install pgvector extension +install_pgvector() { + if [ -f "/usr/share/postgresql/${POSTGRES_VERSION}/extension/vector.control" ]; then + echo "pgvector already installed" + return 0 + fi + + echo "Installing pgvector..." + apt-get update + apt-get install -y postgresql-${POSTGRES_VERSION}-pgvector + + echo "pgvector installed" +} + +# Setup Python virtual environment and dependencies +setup_python_env() { + local skill_dir="$(dirname "$(dirname "$0")")" + local venv_dir="$skill_dir/.venv" + + if [ -d "$venv_dir" ]; then + echo "Python venv already exists" + return 0 + fi + + echo "Creating Python virtual environment..." + python3 -m venv "$venv_dir" + + echo "Installing Python dependencies..." + "$venv_dir/bin/pip" install --upgrade pip + "$venv_dir/bin/pip" install \ + fastapi==0.109.0 \ + uvicorn==0.27.0 \ + sentence-transformers==2.3.1 \ + psycopg2-binary==2.9.9 \ + python-ulid==2.2.0 + + echo "Python environment ready" +} + +# Pre-download embedding model +download_model() { + local skill_dir="$(dirname "$(dirname "$0")")" + local venv_dir="$skill_dir/.venv" + + echo "Pre-downloading embedding model: $MEMORY_MODEL..." + "$venv_dir/bin/python" -c " +from sentence_transformers import SentenceTransformer +model = SentenceTransformer('$MEMORY_MODEL') +print(f'Model loaded: {model.get_sentence_embedding_dimension()} dimensions') +" + echo "Model downloaded" +} + +# Initialize database schema +init_database() { + # Wait for postgres to be available + local retries=30 + while [ $retries -gt 0 ]; do + if pg_isready -q 2>/dev/null; then + break + fi + echo "Waiting for PostgreSQL..." + sleep 1 + retries=$((retries - 1)) + done + + if [ $retries -eq 0 ]; then + echo "PostgreSQL not available - schema will be created on first run" + return 0 + fi + + # Source postgres env if available + [ -f /run/vibestack/postgres.env ] && source /run/vibestack/postgres.env + + local pg_user="${POSTGRES_USER:-vibestack}" + local pg_db="${POSTGRES_DB:-vibestack}" + + echo "Initializing memory schema..." + psql -U "$pg_user" -d "$pg_db" << 'EOF' +-- Enable pgvector +CREATE EXTENSION IF NOT EXISTS vector; + +-- Memories table +CREATE TABLE IF NOT EXISTS memories ( + id TEXT PRIMARY KEY, + type TEXT NOT NULL, + content TEXT NOT NULL, + embedding vector(384), + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Indexes +CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type); +CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC); + +-- Vector index (IVFFlat for approximate nearest neighbor) +-- Only create if we have enough rows, otherwise exact search is faster +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_memories_embedding') THEN + CREATE INDEX idx_memories_embedding ON memories + USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + END IF; +EXCEPTION WHEN others THEN + -- Index creation might fail if not enough rows, that's ok + RAISE NOTICE 'Vector index not created (will use exact search)'; +END $$; + +SELECT 'Memory schema initialized' as status; +EOF + + echo "Database schema ready" +} + +install_python +install_pgvector +setup_python_env +download_model +init_database + +echo "Memory skill setup complete" diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100644 index 0000000..5ed0156 --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -e + +MEMORY_PORT="${MEMORY_PORT:-8081}" +SKILL_DIR="$(dirname "$(dirname "$0")")" +VENV_DIR="$SKILL_DIR/.venv" + +# Source postgres connection +[ -f /run/vibestack/postgres.env ] && source /run/vibestack/postgres.env + +# Export for Python +export MEMORY_PORT +export MEMORY_MODEL="${MEMORY_MODEL:-all-MiniLM-L6-v2}" +export MEMORY_RETENTION_DAYS="${MEMORY_RETENTION_DAYS:-30}" +export MEMORY_MAX_RESULTS="${MEMORY_MAX_RESULTS:-10}" +export DATABASE_URL="${DATABASE_URL:-postgresql://vibestack:vibestack@localhost:5432/vibestack}" + +echo "Starting Memory API on port $MEMORY_PORT..." +echo "Model: $MEMORY_MODEL" +echo "Retention: ${MEMORY_RETENTION_DAYS} days" + +exec "$VENV_DIR/bin/python" "$SKILL_DIR/src/api.py" diff --git a/src/api.py b/src/api.py new file mode 100644 index 0000000..7801338 --- /dev/null +++ b/src/api.py @@ -0,0 +1,394 @@ +#!/usr/bin/env python3 +""" +Memory API - Lightweight agent memory with semantic search +""" + +import os +import json +from datetime import datetime, timedelta +from typing import Optional +from contextlib import asynccontextmanager + +import psycopg2 +from psycopg2.extras import RealDictCursor +from fastapi import FastAPI, HTTPException, Query +from fastapi.responses import JSONResponse +from pydantic import BaseModel +from sentence_transformers import SentenceTransformer +from ulid import ULID + +# Configuration +MEMORY_PORT = int(os.environ.get("MEMORY_PORT", "8081")) +MEMORY_MODEL = os.environ.get("MEMORY_MODEL", "all-MiniLM-L6-v2") +MEMORY_RETENTION_DAYS = int(os.environ.get("MEMORY_RETENTION_DAYS", "30")) +MEMORY_MAX_RESULTS = int(os.environ.get("MEMORY_MAX_RESULTS", "10")) +DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://vibestack:vibestack@localhost:5432/vibestack") + +# Global model instance +model: SentenceTransformer = None +db_pool = None + + +def get_db(): + """Get database connection.""" + return psycopg2.connect(DATABASE_URL, cursor_factory=RealDictCursor) + + +def ensure_schema(): + """Ensure database schema exists.""" + with get_db() as conn: + with conn.cursor() as cur: + cur.execute(""" + CREATE EXTENSION IF NOT EXISTS vector; + + CREATE TABLE IF NOT EXISTS memories ( + id TEXT PRIMARY KEY, + type TEXT NOT NULL, + content TEXT NOT NULL, + embedding vector(384), + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ DEFAULT NOW() + ); + + CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type); + CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC); + """) + conn.commit() + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Startup and shutdown events.""" + global model + print(f"Loading embedding model: {MEMORY_MODEL}") + model = SentenceTransformer(MEMORY_MODEL) + print(f"Model loaded: {model.get_sentence_embedding_dimension()} dimensions") + + ensure_schema() + print("Database schema verified") + + # Run retention cleanup on startup + if MEMORY_RETENTION_DAYS > 0: + cleanup_old_memories() + + yield + + print("Shutting down...") + + +app = FastAPI( + title="Memory API", + description="Agent memory with semantic search", + version="1.0.0", + lifespan=lifespan +) + + +class MemoryCreate(BaseModel): + type: str + content: str + metadata: Optional[dict] = {} + + +class MemoryResponse(BaseModel): + id: str + type: str + content: str + metadata: dict + created_at: str + similarity: Optional[float] = None + + +def embed(text: str) -> list[float]: + """Generate embedding for text.""" + return model.encode(text, normalize_embeddings=True).tolist() + + +def cleanup_old_memories(): + """Delete memories older than retention period.""" + if MEMORY_RETENTION_DAYS <= 0: + return + + cutoff = datetime.utcnow() - timedelta(days=MEMORY_RETENTION_DAYS) + with get_db() as conn: + with conn.cursor() as cur: + cur.execute( + "DELETE FROM memories WHERE created_at < %s", + (cutoff,) + ) + deleted = cur.rowcount + conn.commit() + + if deleted > 0: + print(f"Cleaned up {deleted} old memories") + + +@app.post("/memory", response_model=MemoryResponse) +async def create_memory(memory: MemoryCreate): + """Store a new memory with automatic embedding.""" + memory_id = str(ULID()) + embedding = embed(memory.content) + + with get_db() as conn: + with conn.cursor() as cur: + cur.execute(""" + INSERT INTO memories (id, type, content, embedding, metadata) + VALUES (%s, %s, %s, %s, %s) + RETURNING id, type, content, metadata, created_at + """, ( + memory_id, + memory.type, + memory.content, + embedding, + json.dumps(memory.metadata) + )) + row = cur.fetchone() + conn.commit() + + return MemoryResponse( + id=row["id"], + type=row["type"], + content=row["content"], + metadata=row["metadata"], + created_at=row["created_at"].isoformat() + ) + + +@app.get("/memory") +async def list_memories( + type: Optional[str] = None, + limit: int = Query(default=20, le=100), + offset: int = 0 +): + """List recent memories, optionally filtered by type.""" + with get_db() as conn: + with conn.cursor() as cur: + if type: + cur.execute(""" + SELECT id, type, content, metadata, created_at + FROM memories + WHERE type = %s + ORDER BY created_at DESC + LIMIT %s OFFSET %s + """, (type, limit, offset)) + else: + cur.execute(""" + SELECT id, type, content, metadata, created_at + FROM memories + ORDER BY created_at DESC + LIMIT %s OFFSET %s + """, (limit, offset)) + + rows = cur.fetchall() + + return { + "results": [ + { + "id": row["id"], + "type": row["type"], + "content": row["content"], + "metadata": row["metadata"], + "created_at": row["created_at"].isoformat() + } + for row in rows + ] + } + + +@app.get("/memory/{memory_id}", response_model=MemoryResponse) +async def get_memory(memory_id: str): + """Get a specific memory by ID.""" + with get_db() as conn: + with conn.cursor() as cur: + cur.execute(""" + SELECT id, type, content, metadata, created_at + FROM memories + WHERE id = %s + """, (memory_id,)) + row = cur.fetchone() + + if not row: + raise HTTPException(status_code=404, detail="Memory not found") + + return MemoryResponse( + id=row["id"], + type=row["type"], + content=row["content"], + metadata=row["metadata"], + created_at=row["created_at"].isoformat() + ) + + +@app.delete("/memory/{memory_id}") +async def delete_memory(memory_id: str): + """Delete a specific memory.""" + with get_db() as conn: + with conn.cursor() as cur: + cur.execute("DELETE FROM memories WHERE id = %s", (memory_id,)) + deleted = cur.rowcount + conn.commit() + + if deleted == 0: + raise HTTPException(status_code=404, detail="Memory not found") + + return {"deleted": memory_id} + + +@app.delete("/memory") +async def bulk_delete_memories( + type: Optional[str] = None, + older_than: Optional[str] = None +): + """Bulk delete memories by type and/or age.""" + conditions = [] + params = [] + + if type: + conditions.append("type = %s") + params.append(type) + + if older_than: + # Parse duration like "7d", "30d", "1w" + value = int(older_than[:-1]) + unit = older_than[-1] + if unit == 'd': + delta = timedelta(days=value) + elif unit == 'w': + delta = timedelta(weeks=value) + elif unit == 'h': + delta = timedelta(hours=value) + else: + raise HTTPException(status_code=400, detail="Invalid duration format. Use: 7d, 4w, 24h") + + cutoff = datetime.utcnow() - delta + conditions.append("created_at < %s") + params.append(cutoff) + + if not conditions: + raise HTTPException(status_code=400, detail="Specify type and/or older_than") + + where_clause = " AND ".join(conditions) + + with get_db() as conn: + with conn.cursor() as cur: + cur.execute(f"DELETE FROM memories WHERE {where_clause}", params) + deleted = cur.rowcount + conn.commit() + + return {"deleted_count": deleted} + + +@app.get("/search") +async def search_memories( + q: str, + type: Optional[str] = None, + limit: int = Query(default=None, le=50), + threshold: float = Query(default=0.3, ge=0, le=1) +): + """Semantic search across memories.""" + if limit is None: + limit = MEMORY_MAX_RESULTS + + query_embedding = embed(q) + + with get_db() as conn: + with conn.cursor() as cur: + if type: + cur.execute(""" + SELECT + id, type, content, metadata, created_at, + 1 - (embedding <=> %s::vector) as similarity + FROM memories + WHERE type = %s + ORDER BY embedding <=> %s::vector + LIMIT %s + """, (query_embedding, type, query_embedding, limit)) + else: + cur.execute(""" + SELECT + id, type, content, metadata, created_at, + 1 - (embedding <=> %s::vector) as similarity + FROM memories + ORDER BY embedding <=> %s::vector + LIMIT %s + """, (query_embedding, query_embedding, limit)) + + rows = cur.fetchall() + + # Filter by similarity threshold + results = [ + { + "id": row["id"], + "type": row["type"], + "content": row["content"], + "metadata": row["metadata"], + "similarity": round(row["similarity"], 4), + "created_at": row["created_at"].isoformat() + } + for row in rows + if row["similarity"] >= threshold + ] + + return {"results": results, "query": q} + + +@app.get("/health") +async def health_check(): + """Health check endpoint.""" + try: + with get_db() as conn: + with conn.cursor() as cur: + cur.execute("SELECT COUNT(*) as count FROM memories") + count = cur.fetchone()["count"] + + return { + "status": "healthy", + "model": MEMORY_MODEL, + "memory_count": count + } + except Exception as e: + return JSONResponse( + status_code=503, + content={"status": "unhealthy", "error": str(e)} + ) + + +@app.get("/stats") +async def get_stats(): + """Get memory statistics.""" + with get_db() as conn: + with conn.cursor() as cur: + cur.execute(""" + SELECT + type, + COUNT(*) as count, + MIN(created_at) as oldest, + MAX(created_at) as newest + FROM memories + GROUP BY type + ORDER BY count DESC + """) + type_stats = cur.fetchall() + + cur.execute("SELECT COUNT(*) as total FROM memories") + total = cur.fetchone()["total"] + + return { + "total": total, + "by_type": [ + { + "type": row["type"], + "count": row["count"], + "oldest": row["oldest"].isoformat() if row["oldest"] else None, + "newest": row["newest"].isoformat() if row["newest"] else None + } + for row in type_stats + ], + "retention_days": MEMORY_RETENTION_DAYS, + "model": MEMORY_MODEL + } + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=MEMORY_PORT)