diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b45bd85 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,28 @@ +# Git +.git +.gitignore + +# Build artifacts +target/ +!target/release/picobot + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Docs and references +docs/ +reference/ + +# Test files +tests/ + +# Misc +*.md +*.txt +.opencode/ +CLAUDE.md +AGENTS.md +ARCHITECTURE_REVIEW.md diff --git a/.gitignore b/.gitignore index 4854d56..b48c856 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ reference/** *.env Cargo.lock .worktrees/ +design \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c28c148 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,110 @@ +# ============================================================================= +# PicoBot Docker Image +# ============================================================================= +# Build binary on host: +# cargo build --release +# +# Build image: +# docker build -t picobot . +# +# Run gateway: docker run -d -v ~/.picobot:/app/.picobot -p 19876:19876 picobot gateway +# Run chat: docker run -it -v ~/.picobot:/app/.picobot picobot chat +# ============================================================================= + +FROM debian:trixie-slim + +LABEL org.opencontainers.image.title="PicoBot" +LABEL org.opencontainers.image.description="AI agent gateway and chat client" +LABEL org.opencontainers.image.source="https://github.com/your-repo/picobot" + +# Avoid interactive prompts +ENV DEBIAN_FRONTEND=noninteractive + +# Configure domestic mirrors for pip, uv, npm (China) +ENV PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/ +ENV UV_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/ + +# Install base tools, Python, and uv in one layer to reduce duplication +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + tini \ + curl \ + gnupg \ + git \ + jq \ + tree \ + zip \ + unzip \ + sqlite3 \ + openssh-client \ + sshpass \ + dnsutils \ + poppler-utils \ + fonts-wqy-zenhei \ + fonts-wqy-microhei \ + python3 \ + python3-pip \ + python3-venv \ + && rm -rf /var/lib/apt/lists/* \ + && pip3 install --no-cache-dir --break-system-packages uv + +# Install Node.js and npx +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ + && apt-get install -y --no-install-recommends nodejs \ + && npm config set registry https://registry.npmmirror.com \ + && npm cache clean --force \ + && rm -rf /var/lib/apt/lists/* + +# Install himalaya (CLI email client) from local file +COPY docker_build/himalaya.x86_64-linux.tgz /tmp/himalaya.tgz +RUN tar -xzf /tmp/himalaya.tgz -C /usr/local/bin \ + && chmod +x /usr/local/bin/himalaya \ + && rm -f /tmp/himalaya.tgz + +# Install fd (alternative to find) +RUN curl -fsSL https://github.com/sharkdp/fd/releases/download/v9.0.0/fd-v9.0.0-x86_64-unknown-linux-gnu.tar.gz | \ + tar -xz --strip-components=1 -C /usr/local/bin \ + && chmod +x /usr/local/bin/fd + +# Install ripgrep (rg) +RUN curl -fsSL https://github.com/BurntSushi/ripgrep/releases/download/14.1.0/ripgrep-14.1.0-x86_64-unknown-linux-musl.tar.gz | \ + tar -xz --strip-components=1 -C /usr/local/bin \ + && chmod +x /usr/local/bin/rg + +# Install Chromium and chromedriver for browser automation +# Debian's chromium package is real (not a snap shim like Ubuntu 24.04) +RUN apt-get update && apt-get install -y --no-install-recommends \ + chromium \ + chromium-driver \ + && ln -sf /usr/bin/chromium /usr/local/bin/chrome \ + && ln -sf /usr/bin/chromedriver /usr/local/bin/chromedriver \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user +RUN useradd -m -s /bin/bash app + +WORKDIR /app + +# Copy pre-built binary from host +COPY target/release/picobot /app/picobot + +# Copy config template +COPY resources/templates/config.example.json /app/config.json.example + +# Create required directories +RUN mkdir -p /app/.picobot/workspace /app/.picobot/media /app/.picobot/tmp && \ + chown -R app:app /app + +USER app +ENV HOME=/app + +# Environment variables for Chromium in containers +ENV CHROME_BIN=/usr/bin/chromium +ENV TMPDIR=/app/.picobot/tmp + +ENTRYPOINT ["/app/picobot"] +CMD ["gateway"] + +EXPOSE 19876 + +ENV RUST_LOG=info diff --git a/README.md b/README.md index a63b415..a642fdb 100644 --- a/README.md +++ b/README.md @@ -1,143 +1,102 @@ # PicoBot -A multi-channel AI agent framework with a WebSocket gateway and TUI client, supporting OpenAI-compatible and Anthropic LLM providers, tool calling, session persistence, and cron-based scheduling. +PicoBot is a Rust-based personal AI assistant runtime. It runs a local gateway, connects chat channels such as the terminal TUI and Feishu/Lark, persists sessions in SQLite, and gives the agent a tool system for files, shell commands, web access, memory, scheduling, skills, MCP tools, and delegated sub-agents. -## System Architecture +## What It Does -```mermaid -graph TB - subgraph Clients - TUI["🖥️ CLI Chat (TUI)"] - FS["📱 Feishu/Lark"] - end +- Runs as a gateway server on `127.0.0.1:19876` by default. +- Provides a Ratatui terminal client over WebSocket. +- Supports Feishu/Lark messages, reactions, file upload/download, and media references. +- Calls OpenAI-compatible providers and Anthropic Messages API providers. +- Persists conversations, messages, memories, scheduled jobs, LLM call metadata, and background sub-agent tasks in SQLite. +- Loads skills from workspace, user, and shared skill directories, with built-in skills installed on first use. +- Compresses long contexts and stores timeline summaries for later recall. +- Can register tools discovered from configured MCP servers. - subgraph Gateway["Gateway Server (127.0.0.1:19876)"] - HTTP["HTTP Endpoints
GET /health
GET /ws (WebSocket upgrade)"] - WS["WebSocket Handler"] - CD["ChannelManager"] - SP["SessionManager"] - AL["AgentLoop"] - end +## Architecture - subgraph Bus["MessageBus"] - IB["Inbound Channel"] - OB["Outbound Channel"] - CC["Control Channel"] - end +```text +Channel -> MessageBus -> SessionManager -> AgentLoop -> LLM Provider + | | + | v + | Tools + v + SQLite - subgraph Storage - SQLite[("SQLite
picobot.db")] - end - - subgraph AI["AI Providers"] - OpenAI["OpenAI / DashScope"] - Anthropic["Anthropic Claude"] - end - - TUI <-->|WebSocket| WS - FS <-->|Webhook| HTTP - - CD -->|InboundMessage| IB - IB -->|DialogEvent| SP - CC -->|ControlMessage| SP - SP <--> AL - AL -->|API Call| OpenAI - AL -->|API Call| Anthropic - AL -->|Tool Call| Tools - SP -->|OutboundMessage| OB - OB --> CD - SP --> SQLite - Tools --> SQLite - - subgraph Tools - Bash["Bash"] - FileIO["File Read/Write/Edit"] - Web["HTTP Request / Web Fetch"] - Calc["Calculator"] - Skill["Get Skill"] - Msg["Send Message"] - Cron["Cron Jobs"] - end +Control messages -> SessionManager -> MessageBus -> OutboundDispatcher -> Channel ``` -### Core Data Flow +The main runtime boundary is: -```mermaid -sequenceDiagram - participant Channel as Channel
(CLI/Feishu) - participant Bus as MessageBus - participant SM as SessionManager - participant AL as AgentLoop - participant LLM as LLM Provider - participant Tool as Tools - - Channel->>Bus: InboundMessage (user input) - Bus->>SM: DialogEvent - SM->>SM: Load/Resolve Session - SM->>AL: Process (session state) - AL->>LLM: ChatCompletionRequest - LLM-->>AL: response / tool_calls - alt Tool Calls - AL->>Tool: execute tool - Tool-->>AL: result - AL->>LLM: continue with tool result - end - AL-->>SM: AgentProcessResult (text + token count) - SM->>SM: Persist to SQLite - SM->>Bus: OutboundMessage - Bus->>Channel: response to user -``` +- `channels` only receive and send external messages. +- `bus` is an async queue, not a router. +- `session` owns dialog lifecycle, persistence, memory recall, prompt assembly, compression, and task cancellation. +- `agent` runs the stateless LLM/tool loop. +- `providers` are HTTP clients for model APIs. +- `tools` execute agent actions and return string results. +- `storage` owns SQLite schema and CRUD. +- `scheduler` polls due jobs and feeds prompts back into sessions. ## Features -### Multi-Channel Support -- **CLI Chat Client** — Full TUI with session management, Markdown rendering, slash commands -- **Feishu (Lark)** — Webhook-based integration with typing indicators and media support +### Channels -### Multi-Provider LLM -- OpenAI-compatible API (GPT-4, DashScope, Volcengine, etc.) -- Anthropic Messages API (Claude) -- Cross-provider JSON Schema normalization for tool calling compatibility +- `cli_chat`: terminal TUI client connected through `/ws`. +- `feishu`: Feishu/Lark channel with configurable allow list, media directory, and reaction emoji. -### Session Management -- Multi-session conversations per channel/chat -- Create, switch, rename, archive, delete dialogs via slash commands or WebSocket -- SQLite-persisted session history with automatic TTL-based cleanup -- Context compression for long conversations approaching token limits +### LLM Providers -### Tool System -| Tool | Description | -|------|-------------| -| `bash` | Execute shell commands in workspace | -| `file_read` | Read file contents | -| `file_write` | Create/overwrite files | -| `file_edit` | Precise string substitution in files | -| `http_request` | Make HTTP API requests | -| `web_fetch` | Fetch and parse web pages | -| `calculator` | Evaluate mathematical expressions | -| `get_skill` | Load agent skills from local skill files | -| `send_message` | Send messages to other channels | -| `cron_add/list/remove/enable/disable/update` | Manage scheduled jobs | +- OpenAI-compatible chat completions, including DashScope, Volcengine, and similar APIs. +- Anthropic Messages API. +- Model-specific `input_type` metadata for text/image capability checks. +- JSON Schema cleanup for cross-provider tool compatibility. -### Scheduling -- Cron-based recurring jobs with optional timezone support -- One-shot (`at`) and interval (`every`) schedules -- Jobs trigger agent processing via specified channel/chat +### Sessions And Memory -### Skills System -- Load Markdown skill files from `~/.picobot/skills` and `~/.agents/skills` -- Skills inject specialized system prompts for specific tasks -- Automatic hot-reload on file changes +- Session IDs use `::`. +- Each channel/chat can have multiple dialogs. +- Dialog operations include create, list, switch, rename, delete, compact, dump, info, and stop. +- Session history is persisted to SQLite and can be incrementally restored after compression. +- Knowledge memories are recalled into the system prompt each turn. +- Timeline memories are produced by context compression and can be searched later. -### Observability -- Observer pattern for agent and tool telemetry -- Events: `AgentStart`, `AgentEnd`, `ToolCallStart`, `ToolCall` -- Structured JSON logging with file rotation +### Tools + +Base tools registered for the agent: + +| Tool | Purpose | +|------|---------| +| `calculator` | Math expressions and statistics | +| `file_read` / `file_write` / `file_edit` | Workspace file operations | +| `file_search` / `content_search` | File and content search | +| `bash` | Run shell commands in the workspace | +| `http_request` | HTTP API requests | +| `web_fetch` | Fetch and extract web page text | +| `get_skill` | List or load local skills | +| `memory_store` / `memory_recall` / `timeline_recall` / `memory_forget` | Long-term memory operations | +| `delegate` | Run inline, background, or parallel sub-agents | +| `send_message` | Send outbound messages to configured channels | +| `chat_manager` | Inspect sessions, channels, and stored messages | +| `cron_add/list/remove/enable/disable/update` | Manage scheduled jobs when scheduler is enabled | +| `browser` | Optional WebDriver browser automation when enabled | +| MCP tools | Dynamically registered from configured MCP servers | + +### Skills + +Skills are directories containing `SKILL.md`. Load priority is: + +1. `{workspace}/skills` +2. `~/.picobot/skills` +3. `~/.agents/skills` + +Same-name skills in higher-priority locations override lower-priority ones. Built-in skills from `resources/skills` are embedded into the binary and installed into `~/.picobot/skills` if missing. ## Quick Start ### Prerequisites -- Rust nightly (edition 2024) — use `rustup` to install + +- Rust toolchain with edition 2024 support. +- A configured LLM provider API key. ### Build @@ -147,276 +106,186 @@ cargo build ### Configure -1. Create `config.json` (or `~/.picobot/config.json`): +PicoBot loads `~/.picobot/config.json` first, then falls back to `./config.json`. On gateway startup, a template is released to `~/.picobot/config.example.json` if it does not exist. The source template is [resources/templates/config.example.json](/home/xiaoxixi/code/PicoBot/resources/templates/config.example.json). + +Minimal example: ```json { - "providers": { - "openai": { - "type": "openai", - "base_url": "https://api.openai.com/v1", - "api_key": "" - } - }, - "models": { - "gpt-4o": { - "model_id": "gpt-4o", - "temperature": 0.7, - "max_tokens": 4096 - } - }, - "agents": { - "default": { - "provider": "openai", - "model": "gpt-4o", - "max_tool_iterations": 99, - "token_limit": 128000 - } + "providers": { + "openai": { + "type": "openai", + "base_url": "https://api.openai.com/v1", + "api_key": "", + "extra_headers": {} } + }, + "models": { + "gpt-4o": { + "model_id": "gpt-4o", + "temperature": 0.7, + "max_tokens": 4096, + "input_type": ["text", "image"] + } + }, + "agents": { + "default": { + "provider": "openai", + "model": "gpt-4o", + "max_tool_iterations": 99, + "token_limit": 128000 + } + }, + "workspace_dir": "~/.picobot/workspace" } ``` -2. Set API keys via `.env` file (one `KEY=VALUE` per line): - -```env -OPENAI_API_KEY=sk-xxxxx -``` +The `.env` file in the current directory is parsed by PicoBot itself. Values like `` in JSON are replaced from the process environment after `.env` is loaded. ### Run -**Start gateway server:** - ```bash cargo run -- gateway ``` -Binds `127.0.0.1:19876` by default. Override with `--host` and `--port`. +The gateway switches the process working directory to `workspace_dir` and stores `picobot.db` there by default. -**Connect CLI client:** +In another terminal: ```bash cargo run -- chat ``` -Connects to `ws://127.0.0.1:19876/ws`. Override with `--gateway-url`. +The client connects to `ws://127.0.0.1:19876/ws` by default. Override with `--gateway-url`. -## Configuration Reference +## Configuration -Config load order: `~/.picobot/config.json` → `./config.json` (fallback). +Top-level config fields: -### Full Config Structure +| Field | Purpose | +|-------|---------| +| `providers` | Named LLM provider configs | +| `models` | Named model configs | +| `agents` | Agent-to-provider/model binding | +| `gateway` | Bind address, session DB path, cleanup, scheduler, background task limits | +| `client` | Default WebSocket URL for the TUI client | +| `channels` | Channel configs, currently Feishu/Lark | +| `memory` | Recall and consolidation settings | +| `mcp` | MCP server configs | +| `browser` | Optional WebDriver browser tool config | +| `workspace_dir` | Workspace used for file tools, shell commands, DB default, and workspace skills | -```mermaid -graph LR - Config["config.json"] - Config --> Providers["providers
ProviderConfig{}"] - Config --> Models["models
ModelConfig{}"] - Config --> Agents["agents
AgentConfig{}"] - Config --> Gateway["gateway
GatewayConfig"] - Config --> Client["client
ClientConfig"] - Config --> Channels["channels
ChannelConfig{}"] - Config --> Workspace["workspace_dir"] +Important defaults: - Providers --> PT["type (openai / anthropic)
base_url
api_key
extra_headers"] - Models --> MT["model_id
temperature
max_tokens"] - Agents --> AT["provider (ref)
model (ref)
max_tool_iterations
token_limit"] - Gateway --> GT["host / port
session_db_path
scheduler"] - Channels --> CT["feishu: app_id, app_secret
allow_from, agent, media_dir"] -``` +| Key | Default | +|-----|---------| +| `gateway.host` | `127.0.0.1` | +| `gateway.port` | `19876` | +| `gateway.max_concurrent_background_tasks` | `10` | +| `gateway.scheduler.enabled` | `true` if `scheduler` is omitted and defaulted | +| `client.gateway_url` | `ws://127.0.0.1:19876/ws` | +| `memory.recall_limit` | `5` | +| `memory.timeline_retention_days` | `90` | +| `mcp.tool_timeout_secs` | `180` | +| `browser.enabled` | `false` | -### Environment Variables - -The `.env` file in the working directory is loaded manually (not via dotenv crate). Placeholders in `config.json` written as `` are substituted at load time. - -### Gateway Config - -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| `host` | string | `127.0.0.1` | Bind address | -| `port` | u16 | `19876` | Listen port | -| `session_db_path` | string | workspace `picobot.db` | SQLite database path | -| `scheduler.enabled` | bool | `false` | Enable cron scheduler | - -### Agent Config - -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| `provider` | string | — | Provider name (key in `providers`) | -| `model` | string | — | Model name (key in `models`) | -| `max_tool_iterations` | number | `99` | Max tool call iterations per turn | -| `token_limit` | number | `128000` | Context window token limit | +MCP servers support `stdio`, `sse`, and `streamable-http` transports. Browser automation requires a compatible Chrome/Chromium and chromedriver/WebDriver endpoint. ## Slash Commands -Available in CLI chat and Feishu: +Available from CLI chat and channel text messages: -| Command | Alias | Description | -|---------|-------|-------------| -| `/new` | `/刷新` | Create a new dialog | -| `/list` | `/对话列表` | List all dialogs | -| `/switch ` | — | Switch to a dialog | -| `/rename ` | — | Rename current dialog | -| `/archive` | — | Archive current dialog | -| `/delete` | — | Delete current dialog | -| `/clear` | `/清空` | Clear current dialog history | +| Command | Description | +|---------|-------------| +| `/new` | Create a new dialog | +| `/sessions` | List recent dialogs | +| `/switch <dialog_id>` | Switch dialog | +| `/rename <title>` | Rename current dialog | +| `/delete` | Delete current dialog | +| `/compact` | Manually trigger context compression | +| `/info` | Show current dialog information | +| `/dump` | Save current dialog as Markdown | +| `/?`, `/help` | Show help | +| `/mcp` | Show MCP server and tool status | +| `/stop` | Stop active tasks and clear queued messages | -## WebSocket Protocol +## WebSocket API -The gateway exposes a WebSocket endpoint at `/ws`. Messages use typed JSON with a `type` discriminator field. - -### Client → Server (WsInbound) - -| Type | Fields | -|------|--------| -| `user_input` | `content`, `channel?`, `chat_id?`, `sender_id?` | -| `create_session` | `title?` | -| `list_sessions` | `include_archived` | -| `load_session` | `session_id` | -| `rename_session` | `session_id?`, `title` | -| `archive_session` | `session_id?` | -| `delete_session` | `session_id?` | -| `clear_history` | `chat_id?`, `session_id?` | -| `get_slash_commands` | — | -| `ping` | — | - -### Server → Client (WsOutbound) - -| Type | Fields | -|------|--------| -| `assistant_response` | `session_id`, `response`, `tokens_used?`, `tool_calls?` | -| `session_list` | `sessions[]` | -| `session_loaded` | `session_id`, `messages[]` | -| `session_created` | `session_id`, `title` | -| `session_renamed` | `session_id`, `title` | -| `session_archived` | `session_id` | -| `session_deleted` | `session_id` | -| `slash_commands` | `commands[]` | -| `error` | `message` | -| `pong` | — | - -## HTTP Endpoints +The gateway exposes: | Method | Path | Description | |--------|------|-------------| -| `GET` | `/health` | Health check — returns `{"status":"ok","version":"x.y.z"}` | +| `GET` | `/health` | Returns service health and version | | `GET` | `/ws` | WebSocket upgrade for chat clients | +Inbound WebSocket message types: + +| Type | Main fields | +|------|-------------| +| `user_input` | `content`, optional `channel`, `chat_id`, `sender_id` | +| `clear_history` | optional `chat_id`, `session_id` | +| `create_session` | optional `title` | +| `list_sessions` | `include_archived` | +| `load_session` | `session_id` | +| `rename_session` | optional `session_id`, `title` | +| `archive_session` | optional `session_id` | +| `delete_session` | optional `session_id` | +| `get_slash_commands` | none | +| `ping` | none | + +Outbound WebSocket message types include `assistant_response`, `error`, `session_established`, `session_created`, `session_list`, `session_loaded`, `session_renamed`, `session_archived`, `session_deleted`, `history_cleared`, `slash_commands_list`, `pong`, `command_executed`, and `system_notification`. + ## Testing ```bash -# Unit tests (no external dependencies) +# Unit tests cargo test --lib -# Integration tests (require API keys) +# Integration tests require real API keys in tests/test.env cp tests/test.env.example tests/test.env -# Fill in your API keys in tests/test.env cargo test --test test_integration -- --ignored cargo test --test test_tool_calling -- --ignored cargo test --test test_request_format -- --ignored - -# Run all tests -cargo test -- --ignored ``` -Integration tests are `#[ignore]` by default because they make real API calls. +Integration tests are ignored by default because they make real provider calls. -## Project Structure +## Project Layout -``` -├── src/ -│ ├── main.rs # CLI entrypoint (clap-based subcommands) -│ ├── lib.rs # Module declarations -│ ├── gateway/ # HTTP/WS server, GatewayState initialization -│ │ ├── mod.rs -│ │ ├── http.rs # Health endpoint -│ │ └── ws.rs # WebSocket handler -│ ├── client/ # TUI chat client -│ │ ├── mod.rs -│ │ └── tui/ # Ratatui-based terminal UI -│ ├── channels/ # Channel integrations -│ │ ├── base.rs # Channel trait -│ │ ├── cli_chat.rs # CLI WebSocket channel -│ │ ├── feishu.rs # Feishu/Lark webhook channel -│ │ ├── manager.rs # ChannelManager -│ │ └── slash_command.rs # Slash command parser -│ ├── bus/ # Async message bus -│ │ ├── mod.rs # MessageBus (tokio mpsc channels) -│ │ ├── message.rs # Message types -│ │ └── dispatcher.rs # OutboundDispatcher -│ ├── session/ # Session & dialog management -│ │ ├── mod.rs -│ │ ├── session.rs # Session, SessionManager -│ │ ├── session_id.rs # UnifiedSessionId -│ │ ├── commands.rs # SessionCommand enum -│ │ └── events.rs # SessionEvent, DialogInfo -│ ├── agent/ # LLM interaction loop -│ │ ├── mod.rs -│ │ ├── agent_loop.rs # AgentLoop (stateless) -│ │ ├── context_compressor.rs # Token estimation & summarization -│ │ └── system_prompt.rs # System prompt builder -│ ├── providers/ # LLM API clients -│ │ ├── mod.rs # Factory: create_provider() -│ │ ├── traits.rs # LLMProvider trait -│ │ ├── openai.rs # OpenAI-compatible client -│ │ └── anthropic.rs # Anthropic Messages API client -│ ├── tools/ # Agent tools -│ │ ├── mod.rs # create_default_tools() -│ │ ├── registry.rs # ToolRegistry -│ │ ├── traits.rs # Tool trait, ToolResult -│ │ ├── schema.rs # Cross-provider JSON Schema cleaner -│ │ ├── bash.rs # Shell command execution -│ │ ├── calculator.rs # Math expression evaluator -│ │ ├── chat_manager.rs # Session management tool -│ │ ├── cron.rs # Cron job management tools -│ │ ├── file_read.rs # File reader -│ │ ├── file_write.rs # File writer -│ │ ├── file_edit.rs # File editor (string substitution) -│ │ ├── get_skill.rs # Skill loader tool -│ │ ├── http_request.rs # HTTP request tool -│ │ ├── send_message.rs # Cross-channel messaging -│ │ └── web_fetch.rs # Web page fetcher -│ ├── skills/ # Skills loading from markdown files -│ │ └── mod.rs # SkillsLoader, Skill -│ ├── storage/ # SQLite persistence -│ │ ├── mod.rs # Storage, schema init -│ │ ├── session.rs # Session CRUD operations -│ │ ├── message.rs # Message persistence -│ │ ├── scheduler.rs # ScheduledJob, JobRun storage -│ │ └── error.rs # StorageError -│ ├── scheduler/ # Cron scheduler runtime -│ │ ├── mod.rs # Scheduler, next_run_for_schedule() -│ │ └── types.rs # Schedule enum (At/Every/Cron) -│ ├── observability/ # Telemetry observer pattern -│ │ └── mod.rs # Observer trait, ObserverEvent, MultiObserver -│ ├── protocol.rs # WebSocket message types (WsInbound/WsOutbound) -│ ├── config/ # Config loading & env substitution -│ │ └── mod.rs # Config, LLMProviderConfig, load_env_file() -│ └── logging.rs # Tracing subscriber init with file rotation -├── tests/ -│ ├── test_integration.rs # LLM provider integration tests -│ ├── test_tool_calling.rs # Tool calling integration tests -│ ├── test_request_format.rs # Request format tests -│ ├── test_scheduler.rs # Scheduler unit tests -│ ├── test.env.example # Test environment template -│ └── test.env # Actual test keys (gitignored) -├── reference/ # Third-party reference code (do not modify) -├── resources/ # Assets embedded in binary -│ └── templates/ # Templates released to ~/.picobot/ on first run -├── config.example.json # Full config example -└── Cargo.toml +```text +src/ + agent/ LLM loop, context compression, system prompts, media handling, sub-agents + bus/ Inbound, outbound, and control message queues + channels/ CLI chat and Feishu/Lark integrations + client/ Ratatui terminal UI + config/ Config loading, env substitution, path expansion + gateway/ Axum HTTP/WebSocket server and GatewayState wiring + mcp/ MCP client connections and tool wrappers + memory/ Memory manager and memory types + observability/ Agent/tool telemetry observer interfaces + providers/ OpenAI-compatible and Anthropic clients + scheduler/ Scheduled job runtime + session/ Session lifecycle, dialog commands, persistence integration + skills/ Skill loading and embedded built-in skill installation + storage/ SQLite schema and CRUD + tools/ Agent tool implementations +resources/ + skills/ Built-in skills embedded at build time + templates/ Config, AGENTS.md, and USER.md templates released on first run +tests/ Unit and ignored integration tests +reference/ Third-party reference code; do not modify as project source ``` ## Key Dependencies | Crate | Purpose | |-------|---------| -| `axum` + `tokio-tungstenite` | HTTP server & WebSocket | -| `sqlx` (SQLite) | Session/Message/Job persistence | -| `reqwest` (rustls) | LLM API & external HTTP calls | -| `ratatui` + `crossterm` | Terminal UI | -| `clap` | CLI argument parsing | -| `tracing` + `tracing-subscriber` | Structured logging | -| `cron` + `chrono-tz` | Cron schedule parsing | -| `meval` | Mathematical expression evaluation | -| `uuid` | Session/Dialog ID generation | -| `dirs` | Platform config directory resolution | +| `axum`, `tokio`, `tokio-tungstenite` | Gateway and WebSocket runtime | +| `sqlx` | SQLite persistence | +| `reqwest` | LLM and HTTP clients | +| `ratatui`, `crossterm`, `termimad` | Terminal UI | +| `rmcp` | MCP client support | +| `fantoccini` | Optional browser automation | +| `cron`, `chrono-tz` | Scheduling | +| `jieba-rs` | Chinese tokenization for memory search | +| `zstd`, `tar` | Embedded built-in skill packaging | diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3162df5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,16 @@ +services: + picobot: + image: picobot:latest + container_name: picobot + restart: unless-stopped + ports: + - "19876:19876" + volumes: + - ~/.picobot/config.json:/app/.picobot/config.json:ro + - picobot_data:/app/.picobot + environment: + - RUST_LOG=info + command: gateway + +volumes: + picobot_data: diff --git a/resources/skills/about-picobot/SKILL.md b/resources/skills/about-picobot/SKILL.md index c06e1da..f81bcf4 100644 --- a/resources/skills/about-picobot/SKILL.md +++ b/resources/skills/about-picobot/SKILL.md @@ -5,7 +5,7 @@ always: true --- # About PicoBot -PicoBot 是一个基于 Rust 的个人 AI 助手,支持多渠道(飞书、CLI)、长记忆、定时任务、Skill 系统等。 +PicoBot 是一个基于 Rust 的个人 AI 助手运行时,包含本地 Gateway、CLI TUI 客户端、飞书渠道、SQLite 会话持久化、长期记忆、定时任务、Skill 系统、MCP 工具接入和子 Agent 委托能力。 ## 目录索引 @@ -13,10 +13,10 @@ PicoBot 是一个基于 Rust 的个人 AI 助手,支持多渠道(飞书、CL | 文件 | 内容 | |------|------| -| `references/config.md` | 配置字段详解:providers、models、agents、gateway、memory、channels、mcp | -| `references/db-schema.md` | 数据库表结构:sessions、messages、memories、scheduled_jobs、llm_calls | -| `references/architecture.md` | 核心架构:数据流、会话系统、上下文压缩、记忆系统、Skill 优先级机制 | -| `references/faq.md` | 常见问题:模型切换、渠道添加、Skill 安装、历史查询、定时任务等 | +| `references/config.md` | 配置字段详解:providers、models、agents、gateway、client、channels、memory、mcp、browser | +| `references/db-schema.md` | 数据库表结构:sessions、messages、memories、scheduled_jobs、llm_calls、background_tasks | +| `references/architecture.md` | 核心架构:数据流、会话系统、上下文压缩、记忆系统、Skill 优先级、MCP、子 Agent | +| `references/faq.md` | 常见问题:模型切换、渠道添加、Skill 安装、历史查询、定时任务、MCP 等 | | `references/commands.md` | 常用命令:编译、启动网关、启动客户端、运行测试 | | `assets/config.example.json` | config.json 完整示例 | diff --git a/resources/skills/about-picobot/assets/config.example.json b/resources/skills/about-picobot/assets/config.example.json index 92f247e..11e631a 100644 --- a/resources/skills/about-picobot/assets/config.example.json +++ b/resources/skills/about-picobot/assets/config.example.json @@ -72,5 +72,15 @@ "timeline_retention_days": 90, "max_failures_before_degrade": 3 }, + "mcp": { + "servers": [], + "tool_timeout_secs": 180 + }, + "browser": { + "enabled": false, + "webdriver_url": "http://127.0.0.1:9515", + "headless": true, + "chrome_path": null + }, "workspace_dir": "~/.picobot/workspace" } diff --git a/resources/skills/about-picobot/references/architecture.md b/resources/skills/about-picobot/references/architecture.md index a38f346..e3aa463 100644 --- a/resources/skills/about-picobot/references/architecture.md +++ b/resources/skills/about-picobot/references/architecture.md @@ -17,9 +17,9 @@ Channel → MessageBus → SessionManager → AgentLoop → (tools) → SessionM | `channels` | 外部集成(飞书、CLI),仅收发消息 | | `bus` | 异步消息队列,纯队列不路由 | | `session` | 会话生命周期管理、dialog 操作 | -| `agent` | LLM 调用循环、工具执行、上下文压缩 | +| `agent` | LLM 调用循环、工具执行、上下文压缩、媒体处理、子 Agent | | `providers` | LLM API 客户端(OpenAI 兼容、Anthropic) | -| `tools` | Agent 工具(bash、文件操作、HTTP、web、get_skill 等) | +| `tools` | Agent 工具(bash、文件操作、搜索、HTTP、web、browser、memory、delegate 等) | | `skills` | Skill 加载、管理和 prompt 构建 | | `storage` | SQLite 持久化 | | `scheduler` | Cron 作业调度 | @@ -37,6 +37,8 @@ Channel → MessageBus → SessionManager → AgentLoop → (tools) → SessionM - AgentLoop 无状态,接收 dialog 事件调用 LLM、执行工具 - Providers 是纯 HTTP 客户端,无 bus/session/channel 感知 - Tools 接收原始参数,返回字符串结果 +- MCP 工具在 Gateway 初始化时连接服务器、发现工具,并包装成普通 Tool 注册到 ToolRegistry +- 子 Agent 由 `delegate` 工具创建,复用 provider 配置和按需过滤后的工具集;后台任务结果通过 MessageBus 发回原会话 ## 关键约束 @@ -45,6 +47,7 @@ Channel → MessageBus → SessionManager → AgentLoop → (tools) → SessionM - ChannelManager 持有 MessageBus 和所有 channel - OutboundDispatcher 通过 ChannelManager 路由出站消息 - Config `.env` 加载使用 `unsafe { env::set_var(...) }` +- `browser` 工具只有在 `browser.enabled=true` 时注册,依赖 Chrome/Chromium 与 WebDriver ## 上下文压缩 @@ -192,3 +195,48 @@ LLM 对话上下文接近 token 限制 (默认 128K × 70%) 时自动触发压 | 有压缩历史时 | `HistorySection` 提示 LLM 使用 `timeline_recall` | | 压缩完成后 | 摘要自动存储为 Timeline 记忆 | | 空闲时 | 可配置自动 consolidation(`idle_consolidation_minutes`) | + +--- + +## MCP 工具集成 + +Gateway 初始化时读取 `config.mcp.servers`: + +1. 按服务器配置连接 `stdio`、`sse` 或 `streamable-http` 传输 +2. 调用 MCP `list_tools` +3. 将每个 MCP tool 包装为 `McpToolWrapper` +4. 注册到当前 session 的 `ToolRegistry` + +`/mcp` 斜杠命令会显示 MCP 服务器连接状态和工具列表。 + +--- + +## 子 Agent / delegate + +`delegate` 工具用于把独立任务交给子 Agent: + +| 模式 | 行为 | +|------|------| +| `inline` | 当前轮阻塞等待子 Agent 返回 | +| `background` | 后台运行,完成后通过原 channel/chat 通知 | +| `parallel` | 多个子 Agent 并发执行并聚合结果 | + +默认工具集是只读工具:`file_read`、`file_search`、`content_search`、`web_fetch`、`http_request`、`calculator`。调用时可通过 `allowed_tools` 显式放开其他工具。后台任务会写入 `background_tasks` 表,默认 24 小时后清理。 + +--- + +## 当前斜杠命令 + +| 命令 | 说明 | +|------|------| +| `/new` | 创建新对话 | +| `/sessions` | 列出最近对话 | +| `/switch <dialog_id>` | 切换到指定对话 | +| `/rename <title>` | 重命名当前对话 | +| `/delete` | 删除当前对话 | +| `/compact` | 手动触发上下文压缩 | +| `/info` | 显示当前对话信息 | +| `/dump` | 保存当前对话为 markdown | +| `/?`, `/help` | 显示帮助 | +| `/mcp` | 显示 MCP 状态 | +| `/stop` | 停止当前任务并清空消息队列 | diff --git a/resources/skills/about-picobot/references/config.md b/resources/skills/about-picobot/references/config.md index e9c3c6a..522f0a9 100644 --- a/resources/skills/about-picobot/references/config.md +++ b/resources/skills/about-picobot/references/config.md @@ -14,8 +14,9 @@ "client": {}, // 客户端配置 "channels": {}, // 渠道配置 "memory": {}, // 记忆系统配置 - "workspace_dir": // 工作目录,默认 ~/.picobot/workspace - "mcp": {} // MCP 服务器配置 + "workspace_dir": "", // 工作目录,默认 ~/.picobot/workspace + "mcp": {}, // MCP 服务器配置 + "browser": {} // 可选浏览器自动化配置 } ``` @@ -57,8 +58,17 @@ | `session_ttl_hours` | int | - | 会话过期小时数 | | `session_db_path` | string | - | SQLite 数据库路径,默认在 workspace 下 | | `cleanup_interval_minutes` | int | - | 清理间隔 | +| `max_concurrent_background_tasks` | int | 10 | delegate 后台子任务最大并发数 | | `scheduler` | object | - | 调度器配置 | +### gateway.scheduler 字段 + +| 字段 | 类型 | 默认 | 说明 | +|------|------|------|------| +| `enabled` | bool | true | 是否启动调度器并注册 cron 工具 | +| `poll_interval_secs` | int | 60 | 检查到期任务的轮询间隔 | +| `max_concurrent` | int | 1 | 最大并发任务数,当前实现预留 | + ## memory 字段 | 字段 | 类型 | 默认 | 说明 | @@ -94,8 +104,21 @@ MCP 服务器单条配置: | 字段 | 说明 | |------|------| | `name` | 服务器名称 | -| `transport` | 传输方式: `Stdio`、`Sse`、`streamable-http` | -| `command` | 启动命令(Stdio 模式) | +| `transport` | 传输方式: `stdio`、`sse`、`streamable-http` | +| `command` | 启动命令(stdio 模式) | | `args` | 命令参数 | -| `url` | URL(Sse / streamable-http 模式) | +| `env` | 子进程环境变量 | +| `url` | URL(sse / streamable-http 模式) | +| `headers` | HTTP 传输额外请求头 | | `tool_timeout_secs` | 单独的超时设置 | + +## browser 字段 + +浏览器工具默认关闭,开启后注册 `browser` 工具。依赖 Chrome/Chromium 与 chromedriver/WebDriver。 + +| 字段 | 类型 | 默认 | 说明 | +|------|------|------|------| +| `enabled` | bool | false | 是否启用浏览器工具 | +| `webdriver_url` | string | http://127.0.0.1:9515 | WebDriver 服务地址 | +| `headless` | bool | true | 是否无头运行 | +| `chrome_path` | string | - | 自定义 Chrome/Chromium 路径 | diff --git a/resources/skills/about-picobot/references/db-schema.md b/resources/skills/about-picobot/references/db-schema.md index 39e1cc0..ec1b03b 100644 --- a/resources/skills/about-picobot/references/db-schema.md +++ b/resources/skills/about-picobot/references/db-schema.md @@ -36,6 +36,28 @@ | `tool_calls` | TEXT | 工具调用参数 JSON | | `source` | TEXT | 消息来源(跨会话消息时标记来源 session_id) | | `created_at` | INTEGER | 创建时间(unix 秒) | +| `reasoning_content` | TEXT | provider 返回的推理内容(如有) | + +## background_tasks 表 + +delegate 后台子任务表。`session_id` 不使用数据库外键,因为 session 使用软删除,关联关系由应用层维护。 + +| 字段 | 类型 | 说明 | +|------|------|------| +| `id` | TEXT PK | 后台任务 ID | +| `session_id` | TEXT | 所属会话 | +| `channel` | TEXT | 回传渠道 | +| `chat_id` | TEXT | 回传目标对话 | +| `prompt` | TEXT | 子任务提示 | +| `allowed_tools` | TEXT | 允许工具 JSON | +| `status` | TEXT | pending / running / completed / failed / cancelled | +| `result` | TEXT | 执行结果 | +| `error` | TEXT | 错误信息 | +| `tool_calls_count` | INTEGER | 工具调用次数 | +| `iterations` | INTEGER | Agent 迭代次数 | +| `started_at` | INTEGER | 开始时间 | +| `finished_at` | INTEGER | 结束时间 | +| `created_at` | INTEGER | 创建时间 | ## memories 表 diff --git a/resources/skills/about-picobot/references/tools.md b/resources/skills/about-picobot/references/tools.md index 76b1b4e..b1a97f6 100644 --- a/resources/skills/about-picobot/references/tools.md +++ b/resources/skills/about-picobot/references/tools.md @@ -124,9 +124,51 @@ --- -## file_read / file_write / file_edit / file_search — 文件操作 +## delegate — 子 Agent 委托 -工作目录内的文件读写编辑和搜索。详细的参数定义见各工具的 parameters_schema。 +创建子 Agent 处理独立任务。 + +| 参数 | 必填 | 说明 | +|------|------|------| +| `action` | 是 | `run`, `check_task`, `cancel_task`, `list_tasks` | +| `prompt` | run 必填 | 子任务描述 | +| `mode` | 否 | `inline`, `background`, `parallel`,默认 `inline` | +| `allowed_tools` | 否 | 子 Agent 可用工具列表;默认只读工具集 | +| `max_iterations` | 否 | 最大迭代次数,默认 99 | +| `timeout_secs` | 否 | 超时秒数,默认 3600 | +| `tasks` | parallel 必填 | 并行子任务数组 | +| `task_id` | 查询/取消必填 | 后台任务 ID | + +默认只读工具集:`file_read`、`file_search`、`content_search`、`web_fetch`、`http_request`、`calculator`。 + +--- + +## browser — 浏览器自动化 + +仅在 `browser.enabled=true` 时注册。底层使用 WebDriver/Chrome。 + +| action | 说明 | +|--------|------| +| `open` | 打开 URL | +| `snapshot` | 获取页面结构快照 | +| `click`, `click_at` | 点击元素或坐标 | +| `fill`, `type`, `press` | 输入文本或按键 | +| `get_text`, `get_title`, `get_url` | 读取页面信息 | +| `screenshot` | 截图,可写入文件或返回 base64 | +| `focus`, `hover`, `scroll`, `wait` | 常见交互和等待 | +| `close` | 关闭浏览器会话 | + +--- + +## MCP 工具 + +如果 `config.mcp.servers` 配置了 MCP 服务器,Gateway 启动时会连接服务器、发现工具,并把 MCP 工具包装后注册到 ToolRegistry。使用 `/mcp` 查看当前连接状态和工具列表。 + +--- + +## file_read / file_write / file_edit / file_search / content_search — 文件操作和搜索 + +工作目录内的文件读写编辑、文件名搜索和内容搜索。详细的参数定义见各工具的 parameters_schema。 ## bash — 执行命令 diff --git a/resources/templates/config.example.json b/resources/templates/config.example.json index 92f247e..11e631a 100644 --- a/resources/templates/config.example.json +++ b/resources/templates/config.example.json @@ -72,5 +72,15 @@ "timeline_retention_days": 90, "max_failures_before_degrade": 3 }, + "mcp": { + "servers": [], + "tool_timeout_secs": 180 + }, + "browser": { + "enabled": false, + "webdriver_url": "http://127.0.0.1:9515", + "headless": true, + "chrome_path": null + }, "workspace_dir": "~/.picobot/workspace" }