Initial memory skill - agent memory with pgvector semantic search

2026-02-02 23:48:37 +01:00
commit bf5ba832ee
4 changed files with 744 additions and 0 deletions
--- a/SKILL.md
+++ b/SKILL.md
@@ -0,0 +1,186 @@
 ---
 name: memory
 description: Agent memory with semantic search via pgvector
 metadata:
  version: "1.0.0"
  vibestack:
    main: false
    requires:
      - postgres
 ---
 # Memory Skill
 Lightweight agent memory with semantic search using PostgreSQL + pgvector.
 ## What Gets Stored
 | Type | Description | Example |
 |------|-------------|---------|
 | **conversation** | Chat messages with context | User questions, agent responses |
 | **execution** | Task runs and outcomes | Commands run, success/failure, duration |
 | **finding** | Discoveries and solutions | "Port 8080 was blocked by firewall" |
 | **memory** | Persistent agent knowledge | Preferences, learned patterns |
 ## Features
 - Semantic search across all memory types
 - Automatic embedding on insert
 - Configurable retention policies
 - Simple REST API for CRUD + search
 - Lightweight local embeddings (~90MB model)
 ## Configuration
 ### Environment Variables
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `MEMORY_PORT` | `8081` | API port |
 | `MEMORY_MODEL` | `all-MiniLM-L6-v2` | Sentence-transformers model |
 | `MEMORY_RETENTION_DAYS` | `30` | Auto-delete after N days (0 = forever) |
 | `MEMORY_MAX_RESULTS` | `10` | Default search results limit |
 ## API
 ### Store Memory
 ```bash
 # Store a finding
 curl -X POST http://localhost:8081/memory \
  -H "Content-Type: application/json" \
  -d '{
    "type": "finding",
    "content": "Redis was OOM - increased REDIS_MAXMEMORY to 256mb",
    "metadata": {"skill": "redis", "severity": "resolved"}
  }'
 # Store conversation
 curl -X POST http://localhost:8081/memory \
  -d '{
    "type": "conversation",
    "content": "User asked how to check disk space. Showed df -h command.",
    "metadata": {"session_id": "abc123"}
  }'
 ```
 ### Search Memory
 ```bash
 # Semantic search
 curl "http://localhost:8081/search?q=redis+memory+issue&limit=5"
 # Filter by type
 curl "http://localhost:8081/search?q=disk+space&type=conversation"
 # Filter by metadata
 curl "http://localhost:8081/search?q=error&metadata.skill=postgres"
 ```
 ### Response Format
 ```json
 {
  "results": [
    {
      "id": "01HQXYZ...",
      "type": "finding",
      "content": "Redis was OOM - increased REDIS_MAXMEMORY to 256mb",
      "metadata": {"skill": "redis"},
      "similarity": 0.87,
      "created_at": "2024-01-15T10:30:00Z"
    }
  ]
 }
 ```
 ### List Recent
 ```bash
 # Recent memories by type
 curl "http://localhost:8081/memory?type=finding&limit=20"
 # All recent
 curl "http://localhost:8081/memory?limit=50"
 ```
 ### Delete
 ```bash
 # Delete specific memory
 curl -X DELETE "http://localhost:8081/memory/01HQXYZ..."
 # Bulk delete by type older than N days
 curl -X DELETE "http://localhost:8081/memory?type=execution&older_than=7d"
 ```
 ## Database Schema
 ```sql
 CREATE EXTENSION IF NOT EXISTS vector;
 CREATE TABLE memories (
    id TEXT PRIMARY KEY,
    type TEXT NOT NULL,
    content TEXT NOT NULL,
    embedding vector(384),
    metadata JSONB DEFAULT '{}',
    created_at TIMESTAMPTZ DEFAULT NOW()
 );
 CREATE INDEX idx_memories_type ON memories(type);
 CREATE INDEX idx_memories_created ON memories(created_at);
 CREATE INDEX idx_memories_embedding ON memories USING ivfflat (embedding vector_cosine_ops);
 ```
 ## Usage Patterns
 ### Agent Self-Reflection
 Before starting a task, search for relevant past experiences:
 ```bash
 # "Have I done something like this before?"
 curl "http://localhost:8081/search?q=deploy+nodejs+application"
 ```
 ### Error Resolution
 When encountering an error, check if it's been seen before:
 ```bash
 curl "http://localhost:8081/search?q=connection+refused+port+5432&type=finding"
 ```
 ### Conversation Context
 Recall previous discussions with user:
 ```bash
 curl "http://localhost:8081/search?q=user+preferences+formatting&type=conversation"
 ```
 ## Embedding Model
 Uses `all-MiniLM-L6-v2` from sentence-transformers:
 - 384 dimensions
 - ~90MB download
 - Fast inference (~5ms per embedding)
 - Good quality for short texts
 For even smaller footprint, set `MEMORY_MODEL=all-MiniLM-L3-v2` (~60MB, slightly lower quality).
 ## Integration
 Other skills can store memories by POSTing to the API:
 ```bash
 # In any skill's script
 store_memory() {
    curl -s -X POST http://localhost:8081/memory \
        -H "Content-Type: application/json" \
        -d "{\"type\":\"$1\",\"content\":\"$2\",\"metadata\":$3}"
 }
 store_memory "execution" "Backup completed successfully" '{"skill":"backup","duration":45}'
 ```
--- a/scripts/autorun.sh
+++ b/scripts/autorun.sh
@@ -0,0 +1,142 @@
 #!/bin/bash
 set -e
 POSTGRES_VERSION="${POSTGRES_VERSION:-16}"
 MEMORY_MODEL="${MEMORY_MODEL:-all-MiniLM-L6-v2}"
 # Install Python if not present
 install_python() {
    if command -v python3 &>/dev/null; then
        echo "Python already installed: $(python3 --version)"
        return 0
    fi
    echo "Installing Python..."
    apt-get update
    apt-get install -y python3 python3-pip python3-venv
    echo "Python installed: $(python3 --version)"
 }
 # Install pgvector extension
 install_pgvector() {
    if [ -f "/usr/share/postgresql/${POSTGRES_VERSION}/extension/vector.control" ]; then
        echo "pgvector already installed"
        return 0
    fi
    echo "Installing pgvector..."
    apt-get update
    apt-get install -y postgresql-${POSTGRES_VERSION}-pgvector
    echo "pgvector installed"
 }
 # Setup Python virtual environment and dependencies
 setup_python_env() {
    local skill_dir="$(dirname "$(dirname "$0")")"
    local venv_dir="$skill_dir/.venv"
    if [ -d "$venv_dir" ]; then
        echo "Python venv already exists"
        return 0
    fi
    echo "Creating Python virtual environment..."
    python3 -m venv "$venv_dir"
    echo "Installing Python dependencies..."
    "$venv_dir/bin/pip" install --upgrade pip
    "$venv_dir/bin/pip" install \
        fastapi==0.109.0 \
        uvicorn==0.27.0 \
        sentence-transformers==2.3.1 \
        psycopg2-binary==2.9.9 \
        python-ulid==2.2.0
    echo "Python environment ready"
 }
 # Pre-download embedding model
 download_model() {
    local skill_dir="$(dirname "$(dirname "$0")")"
    local venv_dir="$skill_dir/.venv"
    echo "Pre-downloading embedding model: $MEMORY_MODEL..."
    "$venv_dir/bin/python" -c "
 from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('$MEMORY_MODEL')
 print(f'Model loaded: {model.get_sentence_embedding_dimension()} dimensions')
 "
    echo "Model downloaded"
 }
 # Initialize database schema
 init_database() {
    # Wait for postgres to be available
    local retries=30
    while [ $retries -gt 0 ]; do
        if pg_isready -q 2>/dev/null; then
            break
        fi
        echo "Waiting for PostgreSQL..."
        sleep 1
        retries=$((retries - 1))
    done
    if [ $retries -eq 0 ]; then
        echo "PostgreSQL not available - schema will be created on first run"
        return 0
    fi
    # Source postgres env if available
    [ -f /run/vibestack/postgres.env ] && source /run/vibestack/postgres.env
    local pg_user="${POSTGRES_USER:-vibestack}"
    local pg_db="${POSTGRES_DB:-vibestack}"
    echo "Initializing memory schema..."
    psql -U "$pg_user" -d "$pg_db" << 'EOF'
 -- Enable pgvector
 CREATE EXTENSION IF NOT EXISTS vector;
 -- Memories table
 CREATE TABLE IF NOT EXISTS memories (
    id TEXT PRIMARY KEY,
    type TEXT NOT NULL,
    content TEXT NOT NULL,
    embedding vector(384),
    metadata JSONB DEFAULT '{}',
    created_at TIMESTAMPTZ DEFAULT NOW()
 );
 -- Indexes
 CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
 CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
 -- Vector index (IVFFlat for approximate nearest neighbor)
 -- Only create if we have enough rows, otherwise exact search is faster
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_memories_embedding') THEN
        CREATE INDEX idx_memories_embedding ON memories
        USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
    END IF;
 EXCEPTION WHEN others THEN
    -- Index creation might fail if not enough rows, that's ok
    RAISE NOTICE 'Vector index not created (will use exact search)';
 END $$;
 SELECT 'Memory schema initialized' as status;
 EOF
    echo "Database schema ready"
 }
 install_python
 install_pgvector
 setup_python_env
 download_model
 init_database
 echo "Memory skill setup complete"
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -0,0 +1,22 @@
 #!/bin/bash
 set -e
 MEMORY_PORT="${MEMORY_PORT:-8081}"
 SKILL_DIR="$(dirname "$(dirname "$0")")"
 VENV_DIR="$SKILL_DIR/.venv"
 # Source postgres connection
 [ -f /run/vibestack/postgres.env ] && source /run/vibestack/postgres.env
 # Export for Python
 export MEMORY_PORT
 export MEMORY_MODEL="${MEMORY_MODEL:-all-MiniLM-L6-v2}"
 export MEMORY_RETENTION_DAYS="${MEMORY_RETENTION_DAYS:-30}"
 export MEMORY_MAX_RESULTS="${MEMORY_MAX_RESULTS:-10}"
 export DATABASE_URL="${DATABASE_URL:-postgresql://vibestack:vibestack@localhost:5432/vibestack}"
 echo "Starting Memory API on port $MEMORY_PORT..."
 echo "Model: $MEMORY_MODEL"
 echo "Retention: ${MEMORY_RETENTION_DAYS} days"
 exec "$VENV_DIR/bin/python" "$SKILL_DIR/src/api.py"
--- a/src/api.py
+++ b/src/api.py
@@ -0,0 +1,394 @@
 #!/usr/bin/env python3
 """
 Memory API - Lightweight agent memory with semantic search
 """
 import os
 import json
 from datetime import datetime, timedelta
 from typing import Optional
 from contextlib import asynccontextmanager
 import psycopg2
 from psycopg2.extras import RealDictCursor
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from sentence_transformers import SentenceTransformer
 from ulid import ULID
 # Configuration
 MEMORY_PORT = int(os.environ.get("MEMORY_PORT", "8081"))
 MEMORY_MODEL = os.environ.get("MEMORY_MODEL", "all-MiniLM-L6-v2")
 MEMORY_RETENTION_DAYS = int(os.environ.get("MEMORY_RETENTION_DAYS", "30"))
 MEMORY_MAX_RESULTS = int(os.environ.get("MEMORY_MAX_RESULTS", "10"))
 DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://vibestack:vibestack@localhost:5432/vibestack")
 # Global model instance
 model: SentenceTransformer = None
 db_pool = None
 def get_db():
    """Get database connection."""
    return psycopg2.connect(DATABASE_URL, cursor_factory=RealDictCursor)
 def ensure_schema():
    """Ensure database schema exists."""
    with get_db() as conn:
        with conn.cursor() as cur:
            cur.execute("""
                CREATE EXTENSION IF NOT EXISTS vector;
                CREATE TABLE IF NOT EXISTS memories (
                    id TEXT PRIMARY KEY,
                    type TEXT NOT NULL,
                    content TEXT NOT NULL,
                    embedding vector(384),
                    metadata JSONB DEFAULT '{}',
                    created_at TIMESTAMPTZ DEFAULT NOW()
                );
                CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
                CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
            """)
            conn.commit()
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Startup and shutdown events."""
    global model
    print(f"Loading embedding model: {MEMORY_MODEL}")
    model = SentenceTransformer(MEMORY_MODEL)
    print(f"Model loaded: {model.get_sentence_embedding_dimension()} dimensions")
    ensure_schema()
    print("Database schema verified")
    # Run retention cleanup on startup
    if MEMORY_RETENTION_DAYS > 0:
        cleanup_old_memories()
    yield
    print("Shutting down...")
 app = FastAPI(
    title="Memory API",
    description="Agent memory with semantic search",
    version="1.0.0",
    lifespan=lifespan
 )
 class MemoryCreate(BaseModel):
    type: str
    content: str
    metadata: Optional[dict] = {}
 class MemoryResponse(BaseModel):
    id: str
    type: str
    content: str
    metadata: dict
    created_at: str
    similarity: Optional[float] = None
 def embed(text: str) -> list[float]:
    """Generate embedding for text."""
    return model.encode(text, normalize_embeddings=True).tolist()
 def cleanup_old_memories():
    """Delete memories older than retention period."""
    if MEMORY_RETENTION_DAYS <= 0:
        return
    cutoff = datetime.utcnow() - timedelta(days=MEMORY_RETENTION_DAYS)
    with get_db() as conn:
        with conn.cursor() as cur:
            cur.execute(
                "DELETE FROM memories WHERE created_at < %s",
                (cutoff,)
            )
            deleted = cur.rowcount
            conn.commit()
    if deleted > 0:
        print(f"Cleaned up {deleted} old memories")
@app.post("/memory", response_model=MemoryResponse)
 async def create_memory(memory: MemoryCreate):
    """Store a new memory with automatic embedding."""
    memory_id = str(ULID())
    embedding = embed(memory.content)
    with get_db() as conn:
        with conn.cursor() as cur:
            cur.execute("""
                INSERT INTO memories (id, type, content, embedding, metadata)
                VALUES (%s, %s, %s, %s, %s)
                RETURNING id, type, content, metadata, created_at
            """, (
                memory_id,
                memory.type,
                memory.content,
                embedding,
                json.dumps(memory.metadata)
            ))
            row = cur.fetchone()
            conn.commit()
    return MemoryResponse(
        id=row["id"],
        type=row["type"],
        content=row["content"],
        metadata=row["metadata"],
        created_at=row["created_at"].isoformat()
    )
@app.get("/memory")
 async def list_memories(
    type: Optional[str] = None,
    limit: int = Query(default=20, le=100),
    offset: int = 0
 ):
    """List recent memories, optionally filtered by type."""
    with get_db() as conn:
        with conn.cursor() as cur:
            if type:
                cur.execute("""
                    SELECT id, type, content, metadata, created_at
                    FROM memories
                    WHERE type = %s
                    ORDER BY created_at DESC
                    LIMIT %s OFFSET %s
                """, (type, limit, offset))
            else:
                cur.execute("""
                    SELECT id, type, content, metadata, created_at
                    FROM memories
                    ORDER BY created_at DESC
                    LIMIT %s OFFSET %s
                """, (limit, offset))
            rows = cur.fetchall()
    return {
        "results": [
            {
                "id": row["id"],
                "type": row["type"],
                "content": row["content"],
                "metadata": row["metadata"],
                "created_at": row["created_at"].isoformat()
            }
            for row in rows
        ]
    }
@app.get("/memory/{memory_id}", response_model=MemoryResponse)
 async def get_memory(memory_id: str):
    """Get a specific memory by ID."""
    with get_db() as conn:
        with conn.cursor() as cur:
            cur.execute("""
                SELECT id, type, content, metadata, created_at
                FROM memories
                WHERE id = %s
            """, (memory_id,))
            row = cur.fetchone()
    if not row:
        raise HTTPException(status_code=404, detail="Memory not found")
    return MemoryResponse(
        id=row["id"],
        type=row["type"],
        content=row["content"],
        metadata=row["metadata"],
        created_at=row["created_at"].isoformat()
    )
@app.delete("/memory/{memory_id}")
 async def delete_memory(memory_id: str):
    """Delete a specific memory."""
    with get_db() as conn:
        with conn.cursor() as cur:
            cur.execute("DELETE FROM memories WHERE id = %s", (memory_id,))
            deleted = cur.rowcount
            conn.commit()
    if deleted == 0:
        raise HTTPException(status_code=404, detail="Memory not found")
    return {"deleted": memory_id}
@app.delete("/memory")
 async def bulk_delete_memories(
    type: Optional[str] = None,
    older_than: Optional[str] = None
 ):
    """Bulk delete memories by type and/or age."""
    conditions = []
    params = []
    if type:
        conditions.append("type = %s")
        params.append(type)
    if older_than:
        # Parse duration like "7d", "30d", "1w"
        value = int(older_than[:-1])
        unit = older_than[-1]
        if unit == 'd':
            delta = timedelta(days=value)
        elif unit == 'w':
            delta = timedelta(weeks=value)
        elif unit == 'h':
            delta = timedelta(hours=value)
        else:
            raise HTTPException(status_code=400, detail="Invalid duration format. Use: 7d, 4w, 24h")
        cutoff = datetime.utcnow() - delta
        conditions.append("created_at < %s")
        params.append(cutoff)
    if not conditions:
        raise HTTPException(status_code=400, detail="Specify type and/or older_than")
    where_clause = " AND ".join(conditions)
    with get_db() as conn:
        with conn.cursor() as cur:
            cur.execute(f"DELETE FROM memories WHERE {where_clause}", params)
            deleted = cur.rowcount
            conn.commit()
    return {"deleted_count": deleted}
@app.get("/search")
 async def search_memories(
    q: str,
    type: Optional[str] = None,
    limit: int = Query(default=None, le=50),
    threshold: float = Query(default=0.3, ge=0, le=1)
 ):
    """Semantic search across memories."""
    if limit is None:
        limit = MEMORY_MAX_RESULTS
    query_embedding = embed(q)
    with get_db() as conn:
        with conn.cursor() as cur:
            if type:
                cur.execute("""
                    SELECT
                        id, type, content, metadata, created_at,
                        1 - (embedding <=> %s::vector) as similarity
                    FROM memories
                    WHERE type = %s
                    ORDER BY embedding <=> %s::vector
                    LIMIT %s
                """, (query_embedding, type, query_embedding, limit))
            else:
                cur.execute("""
                    SELECT
                        id, type, content, metadata, created_at,
                        1 - (embedding <=> %s::vector) as similarity
                    FROM memories
                    ORDER BY embedding <=> %s::vector
                    LIMIT %s
                """, (query_embedding, query_embedding, limit))
            rows = cur.fetchall()
    # Filter by similarity threshold
    results = [
        {
            "id": row["id"],
            "type": row["type"],
            "content": row["content"],
            "metadata": row["metadata"],
            "similarity": round(row["similarity"], 4),
            "created_at": row["created_at"].isoformat()
        }
        for row in rows
        if row["similarity"] >= threshold
    ]
    return {"results": results, "query": q}
@app.get("/health")
 async def health_check():
    """Health check endpoint."""
    try:
        with get_db() as conn:
            with conn.cursor() as cur:
                cur.execute("SELECT COUNT(*) as count FROM memories")
                count = cur.fetchone()["count"]
        return {
            "status": "healthy",
            "model": MEMORY_MODEL,
            "memory_count": count
        }
    except Exception as e:
        return JSONResponse(
            status_code=503,
            content={"status": "unhealthy", "error": str(e)}
        )
@app.get("/stats")
 async def get_stats():
    """Get memory statistics."""
    with get_db() as conn:
        with conn.cursor() as cur:
            cur.execute("""
                SELECT
                    type,
                    COUNT(*) as count,
                    MIN(created_at) as oldest,
                    MAX(created_at) as newest
                FROM memories
                GROUP BY type
                ORDER BY count DESC
            """)
            type_stats = cur.fetchall()
            cur.execute("SELECT COUNT(*) as total FROM memories")
            total = cur.fetchone()["total"]
    return {
        "total": total,
        "by_type": [
            {
                "type": row["type"],
                "count": row["count"],
                "oldest": row["oldest"].isoformat() if row["oldest"] else None,
                "newest": row["newest"].isoformat() if row["newest"] else None
            }
            for row in type_stats
        ],
        "retention_days": MEMORY_RETENTION_DAYS,
        "model": MEMORY_MODEL
    }
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=MEMORY_PORT)