commit 1d9de9d77067ff3dd279766f92984a1c6181da6b Author: Azat Date: Tue Feb 3 00:09:06 2026 +0100 LLM router - proxies to provider skills (claude, openai, ollama) diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..542ffbb --- /dev/null +++ b/SKILL.md @@ -0,0 +1,141 @@ +--- +name: llm +description: LLM router that proxies to provider skills (claude, openai, ollama) +metadata: + version: "1.0.0" + vibestack: + main: false +--- + +# LLM Skill + +Unified LLM router that proxies requests to provider-specific skills. Abstracts away which LLM backend is being used. + +## Architecture + +``` +┌─────────────┐ ┌─────────────┐ +│ client │────▶│ llm │ (router) +└─────────────┘ └──────┬──────┘ + │ + ┌──────────────────┼──────────────────┐ + ▼ ▼ ▼ +┌───────────────┐ ┌───────────────┐ ┌───────────────┐ +│ claude skill │ │ openai skill │ │ ollama skill │ +│ localhost:8888│ │ localhost:8889│ │ localhost:11434 +└───────────────┘ └───────────────┘ └───────────────┘ +``` + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `LLM_PORT` | `8082` | Router port | +| `LLM_PROVIDER` | `claude` | Active provider: `claude`, `openai`, `ollama` | +| `CLAUDE_URL` | `http://localhost:8888` | Claude skill URL | +| `OPENAI_URL` | `http://localhost:8889` | OpenAI skill URL | +| `OLLAMA_URL` | `http://localhost:11434` | Ollama URL | +| `MEMORY_URL` | (none) | Memory skill URL for conversation persistence | + +## API + +### WebSocket Chat + +Connect to `ws://localhost:8082/chat` for unified chat interface. + +**Send message:** +```json +{ + "type": "message", + "content": "Hello!", + "session_id": "optional-session-id" +} +``` + +**Receive:** +```json +{"type": "start", "session_id": "abc123"} +{"type": "token", "content": "Hello"} +{"type": "token", "content": "!"} +{"type": "end"} +``` + +### REST API + +```bash +# Chat (proxied to provider) +curl http://localhost:8082/chat \ + -H "Content-Type: application/json" \ + -d '{"message": "Hello!"}' + +# Execute (one-shot, proxied to provider) +curl http://localhost:8082/execute \ + -H "Content-Type: application/json" \ + -d '{"prompt": "List all files"}' + +# Health check +curl http://localhost:8082/health + +# Get current provider +curl http://localhost:8082/provider +``` + +## Provider Skills + +Each provider skill implements its own API. The LLM router translates: + +### Claude Skill (port 8888) +- `POST /chat` - `{"message": "...", "session_id": "..."}` +- `POST /execute` - `{"prompt": "..."}` + +### OpenAI Skill (port 8889) +- `POST /v1/chat/completions` - OpenAI format + +### Ollama (port 11434) +- `POST /api/chat` - Ollama format + +## Switching Providers + +```bash +# Use Claude (default) +LLM_PROVIDER=claude + +# Use OpenAI +LLM_PROVIDER=openai + +# Use Ollama +LLM_PROVIDER=ollama +``` + +Clients connect to `localhost:8082` - they don't need to know which provider is active. + +## Tool Calling (Pass-through) + +Tools are passed to the provider skill. When the LLM wants to call a tool: + +1. LLM router sends tool definitions to provider +2. Provider returns tool call request +3. Router passes tool call to client via WebSocket +4. Client executes tool, sends result back +5. Router forwards result to provider +6. Provider continues conversation + +```json +// Client receives +{"type": "tool_call", "name": "read_file", "arguments": {"path": "/etc/hosts"}} + +// Client sends back +{"type": "tool_result", "name": "read_file", "result": "127.0.0.1 localhost..."} +``` + +## Conversation Memory + +If `MEMORY_URL` is set, conversations are stored: + +```bash +MEMORY_URL=http://localhost:8081 +``` + +Each conversation is saved to the memory skill for later retrieval. diff --git a/scripts/autorun.sh b/scripts/autorun.sh new file mode 100644 index 0000000..560ad9f --- /dev/null +++ b/scripts/autorun.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -e + +SKILL_DIR="$(dirname "$(dirname "$0")")" + +# Install Python if not present +install_python() { + if command -v python3 &>/dev/null; then + echo "Python already installed: $(python3 --version)" + return 0 + fi + + echo "Installing Python..." + apt-get update + apt-get install -y python3 python3-pip python3-venv + + echo "Python installed: $(python3 --version)" +} + +# Setup Python virtual environment and dependencies +setup_python_env() { + local venv_dir="$SKILL_DIR/.venv" + + if [ -d "$venv_dir" ]; then + echo "Python venv already exists" + return 0 + fi + + echo "Creating Python virtual environment..." + python3 -m venv "$venv_dir" + + echo "Installing Python dependencies..." + "$venv_dir/bin/pip" install --upgrade pip + "$venv_dir/bin/pip" install \ + fastapi==0.109.0 \ + uvicorn==0.27.0 \ + websockets==12.0 \ + httpx==0.26.0 \ + pydantic==2.5.0 \ + python-ulid==2.2.0 + + echo "Python environment ready" +} + +install_python +setup_python_env + +echo "LLM router setup complete" diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100644 index 0000000..95c0bde --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +LLM_PORT="${LLM_PORT:-8082}" +SKILL_DIR="$(dirname "$(dirname "$0")")" +VENV_DIR="$SKILL_DIR/.venv" + +# Export config for Python +export LLM_PORT +export LLM_PROVIDER="${LLM_PROVIDER:-claude}" +export CLAUDE_URL="${CLAUDE_URL:-http://localhost:8888}" +export OPENAI_URL="${OPENAI_URL:-http://localhost:8889}" +export OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}" +export MEMORY_URL="${MEMORY_URL:-}" + +echo "Starting LLM Router on port $LLM_PORT..." +echo "Provider: $LLM_PROVIDER" + +case "$LLM_PROVIDER" in + claude) echo "Backend: $CLAUDE_URL" ;; + openai) echo "Backend: $OPENAI_URL" ;; + ollama) echo "Backend: $OLLAMA_URL" ;; +esac + +exec "$VENV_DIR/bin/python" "$SKILL_DIR/src/api.py" diff --git a/src/api.py b/src/api.py new file mode 100644 index 0000000..252484e --- /dev/null +++ b/src/api.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +""" +LLM Router - Proxies requests to provider skills (claude, openai, ollama) +""" + +import os +import json +import asyncio +from typing import Optional +from contextlib import asynccontextmanager + +import httpx +from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException +from fastapi.responses import JSONResponse +from pydantic import BaseModel +from ulid import ULID + +# Configuration +LLM_PORT = int(os.environ.get("LLM_PORT", "8082")) +LLM_PROVIDER = os.environ.get("LLM_PROVIDER", "claude") + +# Provider skill URLs +CLAUDE_URL = os.environ.get("CLAUDE_URL", "http://localhost:8888") +OPENAI_URL = os.environ.get("OPENAI_URL", "http://localhost:8889") +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434") + +# Memory integration +MEMORY_URL = os.environ.get("MEMORY_URL", "") + + +def get_provider_url() -> str: + """Get URL for current provider.""" + providers = { + "claude": CLAUDE_URL, + "openai": OPENAI_URL, + "ollama": OLLAMA_URL, + } + return providers.get(LLM_PROVIDER, CLAUDE_URL) + + +class ChatRequest(BaseModel): + message: str + session_id: Optional[str] = None + + +class ExecuteRequest(BaseModel): + prompt: str + + +# Memory integration +async def store_conversation(session_id: str, message: str, response: str): + """Store conversation in memory skill.""" + if not MEMORY_URL: + return + + content = f"User: {message}\nAssistant: {response}" + + try: + async with httpx.AsyncClient() as client: + await client.post( + f"{MEMORY_URL}/memory", + json={ + "type": "conversation", + "content": content, + "metadata": {"session_id": session_id, "provider": LLM_PROVIDER}, + }, + timeout=5, + ) + except Exception as e: + print(f"Failed to store conversation: {e}") + + +@asynccontextmanager +async def lifespan(app: FastAPI): + print(f"LLM Router starting on port {LLM_PORT}") + print(f"Provider: {LLM_PROVIDER} -> {get_provider_url()}") + yield + print("Shutting down...") + + +app = FastAPI( + title="LLM Router", + description="Unified LLM interface routing to provider skills", + version="1.0.0", + lifespan=lifespan, +) + + +@app.get("/health") +async def health(): + """Health check - also checks provider health.""" + provider_url = get_provider_url() + provider_healthy = False + + try: + async with httpx.AsyncClient() as client: + resp = await client.get(f"{provider_url}/health", timeout=5) + provider_healthy = resp.status_code == 200 + except: + pass + + return { + "status": "healthy" if provider_healthy else "degraded", + "provider": LLM_PROVIDER, + "provider_url": provider_url, + "provider_healthy": provider_healthy, + } + + +@app.get("/provider") +async def get_provider(): + """Get current provider info.""" + return { + "provider": LLM_PROVIDER, + "url": get_provider_url(), + } + + +@app.post("/chat") +async def chat(request: ChatRequest): + """Chat endpoint - proxies to provider skill.""" + provider_url = get_provider_url() + session_id = request.session_id or str(ULID()) + + try: + async with httpx.AsyncClient() as client: + if LLM_PROVIDER == "claude": + # Claude skill format + resp = await client.post( + f"{provider_url}/chat", + json={"message": request.message, "session_id": session_id}, + timeout=120, + ) + data = resp.json() + + if data.get("success"): + response_text = data.get("response", "") + await store_conversation(session_id, request.message, response_text) + return { + "success": True, + "response": response_text, + "session_id": session_id, + "provider": LLM_PROVIDER, + } + else: + return JSONResponse( + status_code=500, + content={"success": False, "error": data.get("error", "Unknown error")}, + ) + + elif LLM_PROVIDER == "ollama": + # Ollama format + resp = await client.post( + f"{provider_url}/api/chat", + json={ + "model": os.environ.get("OLLAMA_MODEL", "llama3.2"), + "messages": [{"role": "user", "content": request.message}], + "stream": False, + }, + timeout=120, + ) + data = resp.json() + response_text = data.get("message", {}).get("content", "") + await store_conversation(session_id, request.message, response_text) + return { + "success": True, + "response": response_text, + "session_id": session_id, + "provider": LLM_PROVIDER, + } + + elif LLM_PROVIDER == "openai": + # OpenAI skill format + resp = await client.post( + f"{provider_url}/v1/chat/completions", + json={ + "model": os.environ.get("OPENAI_MODEL", "gpt-4o"), + "messages": [{"role": "user", "content": request.message}], + }, + timeout=120, + ) + data = resp.json() + response_text = data.get("choices", [{}])[0].get("message", {}).get("content", "") + await store_conversation(session_id, request.message, response_text) + return { + "success": True, + "response": response_text, + "session_id": session_id, + "provider": LLM_PROVIDER, + } + + else: + raise HTTPException(status_code=400, detail=f"Unknown provider: {LLM_PROVIDER}") + + except httpx.RequestError as e: + return JSONResponse( + status_code=503, + content={"success": False, "error": f"Provider unavailable: {e}"}, + ) + + +@app.post("/execute") +async def execute(request: ExecuteRequest): + """Execute endpoint - proxies to provider skill.""" + provider_url = get_provider_url() + + try: + async with httpx.AsyncClient() as client: + if LLM_PROVIDER == "claude": + # Claude skill execute endpoint + resp = await client.post( + f"{provider_url}/execute", + json={"prompt": request.prompt}, + timeout=300, # Longer timeout for execution + ) + return resp.json() + + elif LLM_PROVIDER == "ollama": + # Use chat for ollama + resp = await client.post( + f"{provider_url}/api/chat", + json={ + "model": os.environ.get("OLLAMA_MODEL", "llama3.2"), + "messages": [{"role": "user", "content": request.prompt}], + "stream": False, + }, + timeout=300, + ) + data = resp.json() + return { + "success": True, + "result": data.get("message", {}).get("content", ""), + } + + else: + raise HTTPException(status_code=400, detail=f"Execute not supported for: {LLM_PROVIDER}") + + except httpx.RequestError as e: + return JSONResponse( + status_code=503, + content={"success": False, "error": f"Provider unavailable: {e}"}, + ) + + +@app.websocket("/chat") +async def websocket_chat(websocket: WebSocket): + """WebSocket chat endpoint with streaming proxy.""" + await websocket.accept() + + provider_url = get_provider_url() + session_id = str(ULID()) + + try: + while True: + data = await websocket.receive_json() + + if data.get("type") == "ping": + await websocket.send_json({"type": "pong"}) + continue + + if data.get("type") != "message": + continue + + content = data.get("content", "") + session_id = data.get("session_id") or session_id + + # Send start + await websocket.send_json({ + "type": "start", + "session_id": session_id, + "provider": LLM_PROVIDER, + }) + + try: + async with httpx.AsyncClient() as client: + if LLM_PROVIDER == "claude": + # Claude skill (non-streaming for now) + resp = await client.post( + f"{provider_url}/chat", + json={"message": content, "session_id": session_id}, + timeout=120, + ) + result = resp.json() + + if result.get("success"): + response_text = result.get("response", "") + # Send as single token (claude skill doesn't stream yet) + await websocket.send_json({"type": "token", "content": response_text}) + await store_conversation(session_id, content, response_text) + else: + await websocket.send_json({"type": "error", "message": result.get("error", "Unknown error")}) + + elif LLM_PROVIDER == "ollama": + # Ollama streaming + async with client.stream( + "POST", + f"{provider_url}/api/chat", + json={ + "model": os.environ.get("OLLAMA_MODEL", "llama3.2"), + "messages": [{"role": "user", "content": content}], + "stream": True, + }, + timeout=300, + ) as resp: + full_response = "" + async for line in resp.aiter_lines(): + if line: + chunk = json.loads(line) + if "message" in chunk and chunk["message"].get("content"): + token = chunk["message"]["content"] + full_response += token + await websocket.send_json({"type": "token", "content": token}) + + await store_conversation(session_id, content, full_response) + + else: + await websocket.send_json({"type": "error", "message": f"Unknown provider: {LLM_PROVIDER}"}) + + except httpx.RequestError as e: + await websocket.send_json({"type": "error", "message": f"Provider unavailable: {e}"}) + + # Send end + await websocket.send_json({"type": "end"}) + + except WebSocketDisconnect: + print("WebSocket disconnected") + except Exception as e: + print(f"WebSocket error: {e}") + try: + await websocket.send_json({"type": "error", "message": str(e)}) + except: + pass + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=LLM_PORT)