Initial memory skill - agent memory with pgvector semantic search

This commit is contained in:
Azat
2026-02-02 23:48:37 +01:00
commit bf5ba832ee
4 changed files with 744 additions and 0 deletions

186
SKILL.md Normal file
View File

@@ -0,0 +1,186 @@
---
name: memory
description: Agent memory with semantic search via pgvector
metadata:
version: "1.0.0"
vibestack:
main: false
requires:
- postgres
---
# Memory Skill
Lightweight agent memory with semantic search using PostgreSQL + pgvector.
## What Gets Stored
| Type | Description | Example |
|------|-------------|---------|
| **conversation** | Chat messages with context | User questions, agent responses |
| **execution** | Task runs and outcomes | Commands run, success/failure, duration |
| **finding** | Discoveries and solutions | "Port 8080 was blocked by firewall" |
| **memory** | Persistent agent knowledge | Preferences, learned patterns |
## Features
- Semantic search across all memory types
- Automatic embedding on insert
- Configurable retention policies
- Simple REST API for CRUD + search
- Lightweight local embeddings (~90MB model)
## Configuration
### Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `MEMORY_PORT` | `8081` | API port |
| `MEMORY_MODEL` | `all-MiniLM-L6-v2` | Sentence-transformers model |
| `MEMORY_RETENTION_DAYS` | `30` | Auto-delete after N days (0 = forever) |
| `MEMORY_MAX_RESULTS` | `10` | Default search results limit |
## API
### Store Memory
```bash
# Store a finding
curl -X POST http://localhost:8081/memory \
-H "Content-Type: application/json" \
-d '{
"type": "finding",
"content": "Redis was OOM - increased REDIS_MAXMEMORY to 256mb",
"metadata": {"skill": "redis", "severity": "resolved"}
}'
# Store conversation
curl -X POST http://localhost:8081/memory \
-d '{
"type": "conversation",
"content": "User asked how to check disk space. Showed df -h command.",
"metadata": {"session_id": "abc123"}
}'
```
### Search Memory
```bash
# Semantic search
curl "http://localhost:8081/search?q=redis+memory+issue&limit=5"
# Filter by type
curl "http://localhost:8081/search?q=disk+space&type=conversation"
# Filter by metadata
curl "http://localhost:8081/search?q=error&metadata.skill=postgres"
```
### Response Format
```json
{
"results": [
{
"id": "01HQXYZ...",
"type": "finding",
"content": "Redis was OOM - increased REDIS_MAXMEMORY to 256mb",
"metadata": {"skill": "redis"},
"similarity": 0.87,
"created_at": "2024-01-15T10:30:00Z"
}
]
}
```
### List Recent
```bash
# Recent memories by type
curl "http://localhost:8081/memory?type=finding&limit=20"
# All recent
curl "http://localhost:8081/memory?limit=50"
```
### Delete
```bash
# Delete specific memory
curl -X DELETE "http://localhost:8081/memory/01HQXYZ..."
# Bulk delete by type older than N days
curl -X DELETE "http://localhost:8081/memory?type=execution&older_than=7d"
```
## Database Schema
```sql
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE memories (
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
content TEXT NOT NULL,
embedding vector(384),
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_memories_type ON memories(type);
CREATE INDEX idx_memories_created ON memories(created_at);
CREATE INDEX idx_memories_embedding ON memories USING ivfflat (embedding vector_cosine_ops);
```
## Usage Patterns
### Agent Self-Reflection
Before starting a task, search for relevant past experiences:
```bash
# "Have I done something like this before?"
curl "http://localhost:8081/search?q=deploy+nodejs+application"
```
### Error Resolution
When encountering an error, check if it's been seen before:
```bash
curl "http://localhost:8081/search?q=connection+refused+port+5432&type=finding"
```
### Conversation Context
Recall previous discussions with user:
```bash
curl "http://localhost:8081/search?q=user+preferences+formatting&type=conversation"
```
## Embedding Model
Uses `all-MiniLM-L6-v2` from sentence-transformers:
- 384 dimensions
- ~90MB download
- Fast inference (~5ms per embedding)
- Good quality for short texts
For even smaller footprint, set `MEMORY_MODEL=all-MiniLM-L3-v2` (~60MB, slightly lower quality).
## Integration
Other skills can store memories by POSTing to the API:
```bash
# In any skill's script
store_memory() {
curl -s -X POST http://localhost:8081/memory \
-H "Content-Type: application/json" \
-d "{\"type\":\"$1\",\"content\":\"$2\",\"metadata\":$3}"
}
store_memory "execution" "Backup completed successfully" '{"skill":"backup","duration":45}'
```

142
scripts/autorun.sh Normal file
View File

@@ -0,0 +1,142 @@
#!/bin/bash
set -e
POSTGRES_VERSION="${POSTGRES_VERSION:-16}"
MEMORY_MODEL="${MEMORY_MODEL:-all-MiniLM-L6-v2}"
# Install Python if not present
install_python() {
if command -v python3 &>/dev/null; then
echo "Python already installed: $(python3 --version)"
return 0
fi
echo "Installing Python..."
apt-get update
apt-get install -y python3 python3-pip python3-venv
echo "Python installed: $(python3 --version)"
}
# Install pgvector extension
install_pgvector() {
if [ -f "/usr/share/postgresql/${POSTGRES_VERSION}/extension/vector.control" ]; then
echo "pgvector already installed"
return 0
fi
echo "Installing pgvector..."
apt-get update
apt-get install -y postgresql-${POSTGRES_VERSION}-pgvector
echo "pgvector installed"
}
# Setup Python virtual environment and dependencies
setup_python_env() {
local skill_dir="$(dirname "$(dirname "$0")")"
local venv_dir="$skill_dir/.venv"
if [ -d "$venv_dir" ]; then
echo "Python venv already exists"
return 0
fi
echo "Creating Python virtual environment..."
python3 -m venv "$venv_dir"
echo "Installing Python dependencies..."
"$venv_dir/bin/pip" install --upgrade pip
"$venv_dir/bin/pip" install \
fastapi==0.109.0 \
uvicorn==0.27.0 \
sentence-transformers==2.3.1 \
psycopg2-binary==2.9.9 \
python-ulid==2.2.0
echo "Python environment ready"
}
# Pre-download embedding model
download_model() {
local skill_dir="$(dirname "$(dirname "$0")")"
local venv_dir="$skill_dir/.venv"
echo "Pre-downloading embedding model: $MEMORY_MODEL..."
"$venv_dir/bin/python" -c "
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('$MEMORY_MODEL')
print(f'Model loaded: {model.get_sentence_embedding_dimension()} dimensions')
"
echo "Model downloaded"
}
# Initialize database schema
init_database() {
# Wait for postgres to be available
local retries=30
while [ $retries -gt 0 ]; do
if pg_isready -q 2>/dev/null; then
break
fi
echo "Waiting for PostgreSQL..."
sleep 1
retries=$((retries - 1))
done
if [ $retries -eq 0 ]; then
echo "PostgreSQL not available - schema will be created on first run"
return 0
fi
# Source postgres env if available
[ -f /run/vibestack/postgres.env ] && source /run/vibestack/postgres.env
local pg_user="${POSTGRES_USER:-vibestack}"
local pg_db="${POSTGRES_DB:-vibestack}"
echo "Initializing memory schema..."
psql -U "$pg_user" -d "$pg_db" << 'EOF'
-- Enable pgvector
CREATE EXTENSION IF NOT EXISTS vector;
-- Memories table
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
content TEXT NOT NULL,
embedding vector(384),
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
-- Vector index (IVFFlat for approximate nearest neighbor)
-- Only create if we have enough rows, otherwise exact search is faster
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_memories_embedding') THEN
CREATE INDEX idx_memories_embedding ON memories
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
END IF;
EXCEPTION WHEN others THEN
-- Index creation might fail if not enough rows, that's ok
RAISE NOTICE 'Vector index not created (will use exact search)';
END $$;
SELECT 'Memory schema initialized' as status;
EOF
echo "Database schema ready"
}
install_python
install_pgvector
setup_python_env
download_model
init_database
echo "Memory skill setup complete"

22
scripts/run.sh Normal file
View File

@@ -0,0 +1,22 @@
#!/bin/bash
set -e
MEMORY_PORT="${MEMORY_PORT:-8081}"
SKILL_DIR="$(dirname "$(dirname "$0")")"
VENV_DIR="$SKILL_DIR/.venv"
# Source postgres connection
[ -f /run/vibestack/postgres.env ] && source /run/vibestack/postgres.env
# Export for Python
export MEMORY_PORT
export MEMORY_MODEL="${MEMORY_MODEL:-all-MiniLM-L6-v2}"
export MEMORY_RETENTION_DAYS="${MEMORY_RETENTION_DAYS:-30}"
export MEMORY_MAX_RESULTS="${MEMORY_MAX_RESULTS:-10}"
export DATABASE_URL="${DATABASE_URL:-postgresql://vibestack:vibestack@localhost:5432/vibestack}"
echo "Starting Memory API on port $MEMORY_PORT..."
echo "Model: $MEMORY_MODEL"
echo "Retention: ${MEMORY_RETENTION_DAYS} days"
exec "$VENV_DIR/bin/python" "$SKILL_DIR/src/api.py"

394
src/api.py Normal file
View File

@@ -0,0 +1,394 @@
#!/usr/bin/env python3
"""
Memory API - Lightweight agent memory with semantic search
"""
import os
import json
from datetime import datetime, timedelta
from typing import Optional
from contextlib import asynccontextmanager
import psycopg2
from psycopg2.extras import RealDictCursor
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
from ulid import ULID
# Configuration
MEMORY_PORT = int(os.environ.get("MEMORY_PORT", "8081"))
MEMORY_MODEL = os.environ.get("MEMORY_MODEL", "all-MiniLM-L6-v2")
MEMORY_RETENTION_DAYS = int(os.environ.get("MEMORY_RETENTION_DAYS", "30"))
MEMORY_MAX_RESULTS = int(os.environ.get("MEMORY_MAX_RESULTS", "10"))
DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://vibestack:vibestack@localhost:5432/vibestack")
# Global model instance
model: SentenceTransformer = None
db_pool = None
def get_db():
"""Get database connection."""
return psycopg2.connect(DATABASE_URL, cursor_factory=RealDictCursor)
def ensure_schema():
"""Ensure database schema exists."""
with get_db() as conn:
with conn.cursor() as cur:
cur.execute("""
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
content TEXT NOT NULL,
embedding vector(384),
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
""")
conn.commit()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Startup and shutdown events."""
global model
print(f"Loading embedding model: {MEMORY_MODEL}")
model = SentenceTransformer(MEMORY_MODEL)
print(f"Model loaded: {model.get_sentence_embedding_dimension()} dimensions")
ensure_schema()
print("Database schema verified")
# Run retention cleanup on startup
if MEMORY_RETENTION_DAYS > 0:
cleanup_old_memories()
yield
print("Shutting down...")
app = FastAPI(
title="Memory API",
description="Agent memory with semantic search",
version="1.0.0",
lifespan=lifespan
)
class MemoryCreate(BaseModel):
type: str
content: str
metadata: Optional[dict] = {}
class MemoryResponse(BaseModel):
id: str
type: str
content: str
metadata: dict
created_at: str
similarity: Optional[float] = None
def embed(text: str) -> list[float]:
"""Generate embedding for text."""
return model.encode(text, normalize_embeddings=True).tolist()
def cleanup_old_memories():
"""Delete memories older than retention period."""
if MEMORY_RETENTION_DAYS <= 0:
return
cutoff = datetime.utcnow() - timedelta(days=MEMORY_RETENTION_DAYS)
with get_db() as conn:
with conn.cursor() as cur:
cur.execute(
"DELETE FROM memories WHERE created_at < %s",
(cutoff,)
)
deleted = cur.rowcount
conn.commit()
if deleted > 0:
print(f"Cleaned up {deleted} old memories")
@app.post("/memory", response_model=MemoryResponse)
async def create_memory(memory: MemoryCreate):
"""Store a new memory with automatic embedding."""
memory_id = str(ULID())
embedding = embed(memory.content)
with get_db() as conn:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO memories (id, type, content, embedding, metadata)
VALUES (%s, %s, %s, %s, %s)
RETURNING id, type, content, metadata, created_at
""", (
memory_id,
memory.type,
memory.content,
embedding,
json.dumps(memory.metadata)
))
row = cur.fetchone()
conn.commit()
return MemoryResponse(
id=row["id"],
type=row["type"],
content=row["content"],
metadata=row["metadata"],
created_at=row["created_at"].isoformat()
)
@app.get("/memory")
async def list_memories(
type: Optional[str] = None,
limit: int = Query(default=20, le=100),
offset: int = 0
):
"""List recent memories, optionally filtered by type."""
with get_db() as conn:
with conn.cursor() as cur:
if type:
cur.execute("""
SELECT id, type, content, metadata, created_at
FROM memories
WHERE type = %s
ORDER BY created_at DESC
LIMIT %s OFFSET %s
""", (type, limit, offset))
else:
cur.execute("""
SELECT id, type, content, metadata, created_at
FROM memories
ORDER BY created_at DESC
LIMIT %s OFFSET %s
""", (limit, offset))
rows = cur.fetchall()
return {
"results": [
{
"id": row["id"],
"type": row["type"],
"content": row["content"],
"metadata": row["metadata"],
"created_at": row["created_at"].isoformat()
}
for row in rows
]
}
@app.get("/memory/{memory_id}", response_model=MemoryResponse)
async def get_memory(memory_id: str):
"""Get a specific memory by ID."""
with get_db() as conn:
with conn.cursor() as cur:
cur.execute("""
SELECT id, type, content, metadata, created_at
FROM memories
WHERE id = %s
""", (memory_id,))
row = cur.fetchone()
if not row:
raise HTTPException(status_code=404, detail="Memory not found")
return MemoryResponse(
id=row["id"],
type=row["type"],
content=row["content"],
metadata=row["metadata"],
created_at=row["created_at"].isoformat()
)
@app.delete("/memory/{memory_id}")
async def delete_memory(memory_id: str):
"""Delete a specific memory."""
with get_db() as conn:
with conn.cursor() as cur:
cur.execute("DELETE FROM memories WHERE id = %s", (memory_id,))
deleted = cur.rowcount
conn.commit()
if deleted == 0:
raise HTTPException(status_code=404, detail="Memory not found")
return {"deleted": memory_id}
@app.delete("/memory")
async def bulk_delete_memories(
type: Optional[str] = None,
older_than: Optional[str] = None
):
"""Bulk delete memories by type and/or age."""
conditions = []
params = []
if type:
conditions.append("type = %s")
params.append(type)
if older_than:
# Parse duration like "7d", "30d", "1w"
value = int(older_than[:-1])
unit = older_than[-1]
if unit == 'd':
delta = timedelta(days=value)
elif unit == 'w':
delta = timedelta(weeks=value)
elif unit == 'h':
delta = timedelta(hours=value)
else:
raise HTTPException(status_code=400, detail="Invalid duration format. Use: 7d, 4w, 24h")
cutoff = datetime.utcnow() - delta
conditions.append("created_at < %s")
params.append(cutoff)
if not conditions:
raise HTTPException(status_code=400, detail="Specify type and/or older_than")
where_clause = " AND ".join(conditions)
with get_db() as conn:
with conn.cursor() as cur:
cur.execute(f"DELETE FROM memories WHERE {where_clause}", params)
deleted = cur.rowcount
conn.commit()
return {"deleted_count": deleted}
@app.get("/search")
async def search_memories(
q: str,
type: Optional[str] = None,
limit: int = Query(default=None, le=50),
threshold: float = Query(default=0.3, ge=0, le=1)
):
"""Semantic search across memories."""
if limit is None:
limit = MEMORY_MAX_RESULTS
query_embedding = embed(q)
with get_db() as conn:
with conn.cursor() as cur:
if type:
cur.execute("""
SELECT
id, type, content, metadata, created_at,
1 - (embedding <=> %s::vector) as similarity
FROM memories
WHERE type = %s
ORDER BY embedding <=> %s::vector
LIMIT %s
""", (query_embedding, type, query_embedding, limit))
else:
cur.execute("""
SELECT
id, type, content, metadata, created_at,
1 - (embedding <=> %s::vector) as similarity
FROM memories
ORDER BY embedding <=> %s::vector
LIMIT %s
""", (query_embedding, query_embedding, limit))
rows = cur.fetchall()
# Filter by similarity threshold
results = [
{
"id": row["id"],
"type": row["type"],
"content": row["content"],
"metadata": row["metadata"],
"similarity": round(row["similarity"], 4),
"created_at": row["created_at"].isoformat()
}
for row in rows
if row["similarity"] >= threshold
]
return {"results": results, "query": q}
@app.get("/health")
async def health_check():
"""Health check endpoint."""
try:
with get_db() as conn:
with conn.cursor() as cur:
cur.execute("SELECT COUNT(*) as count FROM memories")
count = cur.fetchone()["count"]
return {
"status": "healthy",
"model": MEMORY_MODEL,
"memory_count": count
}
except Exception as e:
return JSONResponse(
status_code=503,
content={"status": "unhealthy", "error": str(e)}
)
@app.get("/stats")
async def get_stats():
"""Get memory statistics."""
with get_db() as conn:
with conn.cursor() as cur:
cur.execute("""
SELECT
type,
COUNT(*) as count,
MIN(created_at) as oldest,
MAX(created_at) as newest
FROM memories
GROUP BY type
ORDER BY count DESC
""")
type_stats = cur.fetchall()
cur.execute("SELECT COUNT(*) as total FROM memories")
total = cur.fetchone()["total"]
return {
"total": total,
"by_type": [
{
"type": row["type"],
"count": row["count"],
"oldest": row["oldest"].isoformat() if row["oldest"] else None,
"newest": row["newest"].isoformat() if row["newest"] else None
}
for row in type_stats
],
"retention_days": MEMORY_RETENTION_DAYS,
"model": MEMORY_MODEL
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=MEMORY_PORT)