diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..b45bd85
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,28 @@
+# Git
+.git
+.gitignore
+
+# Build artifacts
+target/
+!target/release/picobot
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# Docs and references
+docs/
+reference/
+
+# Test files
+tests/
+
+# Misc
+*.md
+*.txt
+.opencode/
+CLAUDE.md
+AGENTS.md
+ARCHITECTURE_REVIEW.md
diff --git a/.gitignore b/.gitignore
index 4854d56..b48c856 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ reference/**
*.env
Cargo.lock
.worktrees/
+design
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..c28c148
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,110 @@
+# =============================================================================
+# PicoBot Docker Image
+# =============================================================================
+# Build binary on host:
+# cargo build --release
+#
+# Build image:
+# docker build -t picobot .
+#
+# Run gateway: docker run -d -v ~/.picobot:/app/.picobot -p 19876:19876 picobot gateway
+# Run chat: docker run -it -v ~/.picobot:/app/.picobot picobot chat
+# =============================================================================
+
+FROM debian:trixie-slim
+
+LABEL org.opencontainers.image.title="PicoBot"
+LABEL org.opencontainers.image.description="AI agent gateway and chat client"
+LABEL org.opencontainers.image.source="https://github.com/your-repo/picobot"
+
+# Avoid interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Configure domestic mirrors for pip, uv, npm (China)
+ENV PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/
+ENV UV_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/
+
+# Install base tools, Python, and uv in one layer to reduce duplication
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ ca-certificates \
+ tini \
+ curl \
+ gnupg \
+ git \
+ jq \
+ tree \
+ zip \
+ unzip \
+ sqlite3 \
+ openssh-client \
+ sshpass \
+ dnsutils \
+ poppler-utils \
+ fonts-wqy-zenhei \
+ fonts-wqy-microhei \
+ python3 \
+ python3-pip \
+ python3-venv \
+ && rm -rf /var/lib/apt/lists/* \
+ && pip3 install --no-cache-dir --break-system-packages uv
+
+# Install Node.js and npx
+RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
+ && apt-get install -y --no-install-recommends nodejs \
+ && npm config set registry https://registry.npmmirror.com \
+ && npm cache clean --force \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install himalaya (CLI email client) from local file
+COPY docker_build/himalaya.x86_64-linux.tgz /tmp/himalaya.tgz
+RUN tar -xzf /tmp/himalaya.tgz -C /usr/local/bin \
+ && chmod +x /usr/local/bin/himalaya \
+ && rm -f /tmp/himalaya.tgz
+
+# Install fd (alternative to find)
+RUN curl -fsSL https://github.com/sharkdp/fd/releases/download/v9.0.0/fd-v9.0.0-x86_64-unknown-linux-gnu.tar.gz | \
+ tar -xz --strip-components=1 -C /usr/local/bin \
+ && chmod +x /usr/local/bin/fd
+
+# Install ripgrep (rg)
+RUN curl -fsSL https://github.com/BurntSushi/ripgrep/releases/download/14.1.0/ripgrep-14.1.0-x86_64-unknown-linux-musl.tar.gz | \
+ tar -xz --strip-components=1 -C /usr/local/bin \
+ && chmod +x /usr/local/bin/rg
+
+# Install Chromium and chromedriver for browser automation
+# Debian's chromium package is real (not a snap shim like Ubuntu 24.04)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ chromium \
+ chromium-driver \
+ && ln -sf /usr/bin/chromium /usr/local/bin/chrome \
+ && ln -sf /usr/bin/chromedriver /usr/local/bin/chromedriver \
+ && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user
+RUN useradd -m -s /bin/bash app
+
+WORKDIR /app
+
+# Copy pre-built binary from host
+COPY target/release/picobot /app/picobot
+
+# Copy config template
+COPY resources/templates/config.example.json /app/config.json.example
+
+# Create required directories
+RUN mkdir -p /app/.picobot/workspace /app/.picobot/media /app/.picobot/tmp && \
+ chown -R app:app /app
+
+USER app
+ENV HOME=/app
+
+# Environment variables for Chromium in containers
+ENV CHROME_BIN=/usr/bin/chromium
+ENV TMPDIR=/app/.picobot/tmp
+
+ENTRYPOINT ["/app/picobot"]
+CMD ["gateway"]
+
+EXPOSE 19876
+
+ENV RUST_LOG=info
diff --git a/README.md b/README.md
index a63b415..a642fdb 100644
--- a/README.md
+++ b/README.md
@@ -1,143 +1,102 @@
# PicoBot
-A multi-channel AI agent framework with a WebSocket gateway and TUI client, supporting OpenAI-compatible and Anthropic LLM providers, tool calling, session persistence, and cron-based scheduling.
+PicoBot is a Rust-based personal AI assistant runtime. It runs a local gateway, connects chat channels such as the terminal TUI and Feishu/Lark, persists sessions in SQLite, and gives the agent a tool system for files, shell commands, web access, memory, scheduling, skills, MCP tools, and delegated sub-agents.
-## System Architecture
+## What It Does
-```mermaid
-graph TB
- subgraph Clients
- TUI["🖥️ CLI Chat (TUI)"]
- FS["📱 Feishu/Lark"]
- end
+- Runs as a gateway server on `127.0.0.1:19876` by default.
+- Provides a Ratatui terminal client over WebSocket.
+- Supports Feishu/Lark messages, reactions, file upload/download, and media references.
+- Calls OpenAI-compatible providers and Anthropic Messages API providers.
+- Persists conversations, messages, memories, scheduled jobs, LLM call metadata, and background sub-agent tasks in SQLite.
+- Loads skills from workspace, user, and shared skill directories, with built-in skills installed on first use.
+- Compresses long contexts and stores timeline summaries for later recall.
+- Can register tools discovered from configured MCP servers.
- subgraph Gateway["Gateway Server (127.0.0.1:19876)"]
- HTTP["HTTP Endpoints
GET /health
GET /ws (WebSocket upgrade)"]
- WS["WebSocket Handler"]
- CD["ChannelManager"]
- SP["SessionManager"]
- AL["AgentLoop"]
- end
+## Architecture
- subgraph Bus["MessageBus"]
- IB["Inbound Channel"]
- OB["Outbound Channel"]
- CC["Control Channel"]
- end
+```text
+Channel -> MessageBus -> SessionManager -> AgentLoop -> LLM Provider
+ | |
+ | v
+ | Tools
+ v
+ SQLite
- subgraph Storage
- SQLite[("SQLite
picobot.db")]
- end
-
- subgraph AI["AI Providers"]
- OpenAI["OpenAI / DashScope"]
- Anthropic["Anthropic Claude"]
- end
-
- TUI <-->|WebSocket| WS
- FS <-->|Webhook| HTTP
-
- CD -->|InboundMessage| IB
- IB -->|DialogEvent| SP
- CC -->|ControlMessage| SP
- SP <--> AL
- AL -->|API Call| OpenAI
- AL -->|API Call| Anthropic
- AL -->|Tool Call| Tools
- SP -->|OutboundMessage| OB
- OB --> CD
- SP --> SQLite
- Tools --> SQLite
-
- subgraph Tools
- Bash["Bash"]
- FileIO["File Read/Write/Edit"]
- Web["HTTP Request / Web Fetch"]
- Calc["Calculator"]
- Skill["Get Skill"]
- Msg["Send Message"]
- Cron["Cron Jobs"]
- end
+Control messages -> SessionManager -> MessageBus -> OutboundDispatcher -> Channel
```
-### Core Data Flow
+The main runtime boundary is:
-```mermaid
-sequenceDiagram
- participant Channel as Channel
(CLI/Feishu)
- participant Bus as MessageBus
- participant SM as SessionManager
- participant AL as AgentLoop
- participant LLM as LLM Provider
- participant Tool as Tools
-
- Channel->>Bus: InboundMessage (user input)
- Bus->>SM: DialogEvent
- SM->>SM: Load/Resolve Session
- SM->>AL: Process (session state)
- AL->>LLM: ChatCompletionRequest
- LLM-->>AL: response / tool_calls
- alt Tool Calls
- AL->>Tool: execute tool
- Tool-->>AL: result
- AL->>LLM: continue with tool result
- end
- AL-->>SM: AgentProcessResult (text + token count)
- SM->>SM: Persist to SQLite
- SM->>Bus: OutboundMessage
- Bus->>Channel: response to user
-```
+- `channels` only receive and send external messages.
+- `bus` is an async queue, not a router.
+- `session` owns dialog lifecycle, persistence, memory recall, prompt assembly, compression, and task cancellation.
+- `agent` runs the stateless LLM/tool loop.
+- `providers` are HTTP clients for model APIs.
+- `tools` execute agent actions and return string results.
+- `storage` owns SQLite schema and CRUD.
+- `scheduler` polls due jobs and feeds prompts back into sessions.
## Features
-### Multi-Channel Support
-- **CLI Chat Client** — Full TUI with session management, Markdown rendering, slash commands
-- **Feishu (Lark)** — Webhook-based integration with typing indicators and media support
+### Channels
-### Multi-Provider LLM
-- OpenAI-compatible API (GPT-4, DashScope, Volcengine, etc.)
-- Anthropic Messages API (Claude)
-- Cross-provider JSON Schema normalization for tool calling compatibility
+- `cli_chat`: terminal TUI client connected through `/ws`.
+- `feishu`: Feishu/Lark channel with configurable allow list, media directory, and reaction emoji.
-### Session Management
-- Multi-session conversations per channel/chat
-- Create, switch, rename, archive, delete dialogs via slash commands or WebSocket
-- SQLite-persisted session history with automatic TTL-based cleanup
-- Context compression for long conversations approaching token limits
+### LLM Providers
-### Tool System
-| Tool | Description |
-|------|-------------|
-| `bash` | Execute shell commands in workspace |
-| `file_read` | Read file contents |
-| `file_write` | Create/overwrite files |
-| `file_edit` | Precise string substitution in files |
-| `http_request` | Make HTTP API requests |
-| `web_fetch` | Fetch and parse web pages |
-| `calculator` | Evaluate mathematical expressions |
-| `get_skill` | Load agent skills from local skill files |
-| `send_message` | Send messages to other channels |
-| `cron_add/list/remove/enable/disable/update` | Manage scheduled jobs |
+- OpenAI-compatible chat completions, including DashScope, Volcengine, and similar APIs.
+- Anthropic Messages API.
+- Model-specific `input_type` metadata for text/image capability checks.
+- JSON Schema cleanup for cross-provider tool compatibility.
-### Scheduling
-- Cron-based recurring jobs with optional timezone support
-- One-shot (`at`) and interval (`every`) schedules
-- Jobs trigger agent processing via specified channel/chat
+### Sessions And Memory
-### Skills System
-- Load Markdown skill files from `~/.picobot/skills` and `~/.agents/skills`
-- Skills inject specialized system prompts for specific tasks
-- Automatic hot-reload on file changes
+- Session IDs use `::`.
+- Each channel/chat can have multiple dialogs.
+- Dialog operations include create, list, switch, rename, delete, compact, dump, info, and stop.
+- Session history is persisted to SQLite and can be incrementally restored after compression.
+- Knowledge memories are recalled into the system prompt each turn.
+- Timeline memories are produced by context compression and can be searched later.
-### Observability
-- Observer pattern for agent and tool telemetry
-- Events: `AgentStart`, `AgentEnd`, `ToolCallStart`, `ToolCall`
-- Structured JSON logging with file rotation
+### Tools
+
+Base tools registered for the agent:
+
+| Tool | Purpose |
+|------|---------|
+| `calculator` | Math expressions and statistics |
+| `file_read` / `file_write` / `file_edit` | Workspace file operations |
+| `file_search` / `content_search` | File and content search |
+| `bash` | Run shell commands in the workspace |
+| `http_request` | HTTP API requests |
+| `web_fetch` | Fetch and extract web page text |
+| `get_skill` | List or load local skills |
+| `memory_store` / `memory_recall` / `timeline_recall` / `memory_forget` | Long-term memory operations |
+| `delegate` | Run inline, background, or parallel sub-agents |
+| `send_message` | Send outbound messages to configured channels |
+| `chat_manager` | Inspect sessions, channels, and stored messages |
+| `cron_add/list/remove/enable/disable/update` | Manage scheduled jobs when scheduler is enabled |
+| `browser` | Optional WebDriver browser automation when enabled |
+| MCP tools | Dynamically registered from configured MCP servers |
+
+### Skills
+
+Skills are directories containing `SKILL.md`. Load priority is:
+
+1. `{workspace}/skills`
+2. `~/.picobot/skills`
+3. `~/.agents/skills`
+
+Same-name skills in higher-priority locations override lower-priority ones. Built-in skills from `resources/skills` are embedded into the binary and installed into `~/.picobot/skills` if missing.
## Quick Start
### Prerequisites
-- Rust nightly (edition 2024) — use `rustup` to install
+
+- Rust toolchain with edition 2024 support.
+- A configured LLM provider API key.
### Build
@@ -147,276 +106,186 @@ cargo build
### Configure
-1. Create `config.json` (or `~/.picobot/config.json`):
+PicoBot loads `~/.picobot/config.json` first, then falls back to `./config.json`. On gateway startup, a template is released to `~/.picobot/config.example.json` if it does not exist. The source template is [resources/templates/config.example.json](/home/xiaoxixi/code/PicoBot/resources/templates/config.example.json).
+
+Minimal example:
```json
{
- "providers": {
- "openai": {
- "type": "openai",
- "base_url": "https://api.openai.com/v1",
- "api_key": ""
- }
- },
- "models": {
- "gpt-4o": {
- "model_id": "gpt-4o",
- "temperature": 0.7,
- "max_tokens": 4096
- }
- },
- "agents": {
- "default": {
- "provider": "openai",
- "model": "gpt-4o",
- "max_tool_iterations": 99,
- "token_limit": 128000
- }
+ "providers": {
+ "openai": {
+ "type": "openai",
+ "base_url": "https://api.openai.com/v1",
+ "api_key": "",
+ "extra_headers": {}
}
+ },
+ "models": {
+ "gpt-4o": {
+ "model_id": "gpt-4o",
+ "temperature": 0.7,
+ "max_tokens": 4096,
+ "input_type": ["text", "image"]
+ }
+ },
+ "agents": {
+ "default": {
+ "provider": "openai",
+ "model": "gpt-4o",
+ "max_tool_iterations": 99,
+ "token_limit": 128000
+ }
+ },
+ "workspace_dir": "~/.picobot/workspace"
}
```
-2. Set API keys via `.env` file (one `KEY=VALUE` per line):
-
-```env
-OPENAI_API_KEY=sk-xxxxx
-```
+The `.env` file in the current directory is parsed by PicoBot itself. Values like `` in JSON are replaced from the process environment after `.env` is loaded.
### Run
-**Start gateway server:**
-
```bash
cargo run -- gateway
```
-Binds `127.0.0.1:19876` by default. Override with `--host` and `--port`.
+The gateway switches the process working directory to `workspace_dir` and stores `picobot.db` there by default.
-**Connect CLI client:**
+In another terminal:
```bash
cargo run -- chat
```
-Connects to `ws://127.0.0.1:19876/ws`. Override with `--gateway-url`.
+The client connects to `ws://127.0.0.1:19876/ws` by default. Override with `--gateway-url`.
-## Configuration Reference
+## Configuration
-Config load order: `~/.picobot/config.json` → `./config.json` (fallback).
+Top-level config fields:
-### Full Config Structure
+| Field | Purpose |
+|-------|---------|
+| `providers` | Named LLM provider configs |
+| `models` | Named model configs |
+| `agents` | Agent-to-provider/model binding |
+| `gateway` | Bind address, session DB path, cleanup, scheduler, background task limits |
+| `client` | Default WebSocket URL for the TUI client |
+| `channels` | Channel configs, currently Feishu/Lark |
+| `memory` | Recall and consolidation settings |
+| `mcp` | MCP server configs |
+| `browser` | Optional WebDriver browser tool config |
+| `workspace_dir` | Workspace used for file tools, shell commands, DB default, and workspace skills |
-```mermaid
-graph LR
- Config["config.json"]
- Config --> Providers["providers
ProviderConfig{}"]
- Config --> Models["models
ModelConfig{}"]
- Config --> Agents["agents
AgentConfig{}"]
- Config --> Gateway["gateway
GatewayConfig"]
- Config --> Client["client
ClientConfig"]
- Config --> Channels["channels
ChannelConfig{}"]
- Config --> Workspace["workspace_dir"]
+Important defaults:
- Providers --> PT["type (openai / anthropic)
base_url
api_key
extra_headers"]
- Models --> MT["model_id
temperature
max_tokens"]
- Agents --> AT["provider (ref)
model (ref)
max_tool_iterations
token_limit"]
- Gateway --> GT["host / port
session_db_path
scheduler"]
- Channels --> CT["feishu: app_id, app_secret
allow_from, agent, media_dir"]
-```
+| Key | Default |
+|-----|---------|
+| `gateway.host` | `127.0.0.1` |
+| `gateway.port` | `19876` |
+| `gateway.max_concurrent_background_tasks` | `10` |
+| `gateway.scheduler.enabled` | `true` if `scheduler` is omitted and defaulted |
+| `client.gateway_url` | `ws://127.0.0.1:19876/ws` |
+| `memory.recall_limit` | `5` |
+| `memory.timeline_retention_days` | `90` |
+| `mcp.tool_timeout_secs` | `180` |
+| `browser.enabled` | `false` |
-### Environment Variables
-
-The `.env` file in the working directory is loaded manually (not via dotenv crate). Placeholders in `config.json` written as `` are substituted at load time.
-
-### Gateway Config
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `host` | string | `127.0.0.1` | Bind address |
-| `port` | u16 | `19876` | Listen port |
-| `session_db_path` | string | workspace `picobot.db` | SQLite database path |
-| `scheduler.enabled` | bool | `false` | Enable cron scheduler |
-
-### Agent Config
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `provider` | string | — | Provider name (key in `providers`) |
-| `model` | string | — | Model name (key in `models`) |
-| `max_tool_iterations` | number | `99` | Max tool call iterations per turn |
-| `token_limit` | number | `128000` | Context window token limit |
+MCP servers support `stdio`, `sse`, and `streamable-http` transports. Browser automation requires a compatible Chrome/Chromium and chromedriver/WebDriver endpoint.
## Slash Commands
-Available in CLI chat and Feishu:
+Available from CLI chat and channel text messages:
-| Command | Alias | Description |
-|---------|-------|-------------|
-| `/new` | `/刷新` | Create a new dialog |
-| `/list` | `/对话列表` | List all dialogs |
-| `/switch ` | — | Switch to a dialog |
-| `/rename ` | — | Rename current dialog |
-| `/archive` | — | Archive current dialog |
-| `/delete` | — | Delete current dialog |
-| `/clear` | `/清空` | Clear current dialog history |
+| Command | Description |
+|---------|-------------|
+| `/new` | Create a new dialog |
+| `/sessions` | List recent dialogs |
+| `/switch ` | Switch dialog |
+| `/rename ` | Rename current dialog |
+| `/delete` | Delete current dialog |
+| `/compact` | Manually trigger context compression |
+| `/info` | Show current dialog information |
+| `/dump` | Save current dialog as Markdown |
+| `/?`, `/help` | Show help |
+| `/mcp` | Show MCP server and tool status |
+| `/stop` | Stop active tasks and clear queued messages |
-## WebSocket Protocol
+## WebSocket API
-The gateway exposes a WebSocket endpoint at `/ws`. Messages use typed JSON with a `type` discriminator field.
-
-### Client → Server (WsInbound)
-
-| Type | Fields |
-|------|--------|
-| `user_input` | `content`, `channel?`, `chat_id?`, `sender_id?` |
-| `create_session` | `title?` |
-| `list_sessions` | `include_archived` |
-| `load_session` | `session_id` |
-| `rename_session` | `session_id?`, `title` |
-| `archive_session` | `session_id?` |
-| `delete_session` | `session_id?` |
-| `clear_history` | `chat_id?`, `session_id?` |
-| `get_slash_commands` | — |
-| `ping` | — |
-
-### Server → Client (WsOutbound)
-
-| Type | Fields |
-|------|--------|
-| `assistant_response` | `session_id`, `response`, `tokens_used?`, `tool_calls?` |
-| `session_list` | `sessions[]` |
-| `session_loaded` | `session_id`, `messages[]` |
-| `session_created` | `session_id`, `title` |
-| `session_renamed` | `session_id`, `title` |
-| `session_archived` | `session_id` |
-| `session_deleted` | `session_id` |
-| `slash_commands` | `commands[]` |
-| `error` | `message` |
-| `pong` | — |
-
-## HTTP Endpoints
+The gateway exposes:
| Method | Path | Description |
|--------|------|-------------|
-| `GET` | `/health` | Health check — returns `{"status":"ok","version":"x.y.z"}` |
+| `GET` | `/health` | Returns service health and version |
| `GET` | `/ws` | WebSocket upgrade for chat clients |
+Inbound WebSocket message types:
+
+| Type | Main fields |
+|------|-------------|
+| `user_input` | `content`, optional `channel`, `chat_id`, `sender_id` |
+| `clear_history` | optional `chat_id`, `session_id` |
+| `create_session` | optional `title` |
+| `list_sessions` | `include_archived` |
+| `load_session` | `session_id` |
+| `rename_session` | optional `session_id`, `title` |
+| `archive_session` | optional `session_id` |
+| `delete_session` | optional `session_id` |
+| `get_slash_commands` | none |
+| `ping` | none |
+
+Outbound WebSocket message types include `assistant_response`, `error`, `session_established`, `session_created`, `session_list`, `session_loaded`, `session_renamed`, `session_archived`, `session_deleted`, `history_cleared`, `slash_commands_list`, `pong`, `command_executed`, and `system_notification`.
+
## Testing
```bash
-# Unit tests (no external dependencies)
+# Unit tests
cargo test --lib
-# Integration tests (require API keys)
+# Integration tests require real API keys in tests/test.env
cp tests/test.env.example tests/test.env
-# Fill in your API keys in tests/test.env
cargo test --test test_integration -- --ignored
cargo test --test test_tool_calling -- --ignored
cargo test --test test_request_format -- --ignored
-
-# Run all tests
-cargo test -- --ignored
```
-Integration tests are `#[ignore]` by default because they make real API calls.
+Integration tests are ignored by default because they make real provider calls.
-## Project Structure
+## Project Layout
-```
-├── src/
-│ ├── main.rs # CLI entrypoint (clap-based subcommands)
-│ ├── lib.rs # Module declarations
-│ ├── gateway/ # HTTP/WS server, GatewayState initialization
-│ │ ├── mod.rs
-│ │ ├── http.rs # Health endpoint
-│ │ └── ws.rs # WebSocket handler
-│ ├── client/ # TUI chat client
-│ │ ├── mod.rs
-│ │ └── tui/ # Ratatui-based terminal UI
-│ ├── channels/ # Channel integrations
-│ │ ├── base.rs # Channel trait
-│ │ ├── cli_chat.rs # CLI WebSocket channel
-│ │ ├── feishu.rs # Feishu/Lark webhook channel
-│ │ ├── manager.rs # ChannelManager
-│ │ └── slash_command.rs # Slash command parser
-│ ├── bus/ # Async message bus
-│ │ ├── mod.rs # MessageBus (tokio mpsc channels)
-│ │ ├── message.rs # Message types
-│ │ └── dispatcher.rs # OutboundDispatcher
-│ ├── session/ # Session & dialog management
-│ │ ├── mod.rs
-│ │ ├── session.rs # Session, SessionManager
-│ │ ├── session_id.rs # UnifiedSessionId
-│ │ ├── commands.rs # SessionCommand enum
-│ │ └── events.rs # SessionEvent, DialogInfo
-│ ├── agent/ # LLM interaction loop
-│ │ ├── mod.rs
-│ │ ├── agent_loop.rs # AgentLoop (stateless)
-│ │ ├── context_compressor.rs # Token estimation & summarization
-│ │ └── system_prompt.rs # System prompt builder
-│ ├── providers/ # LLM API clients
-│ │ ├── mod.rs # Factory: create_provider()
-│ │ ├── traits.rs # LLMProvider trait
-│ │ ├── openai.rs # OpenAI-compatible client
-│ │ └── anthropic.rs # Anthropic Messages API client
-│ ├── tools/ # Agent tools
-│ │ ├── mod.rs # create_default_tools()
-│ │ ├── registry.rs # ToolRegistry
-│ │ ├── traits.rs # Tool trait, ToolResult
-│ │ ├── schema.rs # Cross-provider JSON Schema cleaner
-│ │ ├── bash.rs # Shell command execution
-│ │ ├── calculator.rs # Math expression evaluator
-│ │ ├── chat_manager.rs # Session management tool
-│ │ ├── cron.rs # Cron job management tools
-│ │ ├── file_read.rs # File reader
-│ │ ├── file_write.rs # File writer
-│ │ ├── file_edit.rs # File editor (string substitution)
-│ │ ├── get_skill.rs # Skill loader tool
-│ │ ├── http_request.rs # HTTP request tool
-│ │ ├── send_message.rs # Cross-channel messaging
-│ │ └── web_fetch.rs # Web page fetcher
-│ ├── skills/ # Skills loading from markdown files
-│ │ └── mod.rs # SkillsLoader, Skill
-│ ├── storage/ # SQLite persistence
-│ │ ├── mod.rs # Storage, schema init
-│ │ ├── session.rs # Session CRUD operations
-│ │ ├── message.rs # Message persistence
-│ │ ├── scheduler.rs # ScheduledJob, JobRun storage
-│ │ └── error.rs # StorageError
-│ ├── scheduler/ # Cron scheduler runtime
-│ │ ├── mod.rs # Scheduler, next_run_for_schedule()
-│ │ └── types.rs # Schedule enum (At/Every/Cron)
-│ ├── observability/ # Telemetry observer pattern
-│ │ └── mod.rs # Observer trait, ObserverEvent, MultiObserver
-│ ├── protocol.rs # WebSocket message types (WsInbound/WsOutbound)
-│ ├── config/ # Config loading & env substitution
-│ │ └── mod.rs # Config, LLMProviderConfig, load_env_file()
-│ └── logging.rs # Tracing subscriber init with file rotation
-├── tests/
-│ ├── test_integration.rs # LLM provider integration tests
-│ ├── test_tool_calling.rs # Tool calling integration tests
-│ ├── test_request_format.rs # Request format tests
-│ ├── test_scheduler.rs # Scheduler unit tests
-│ ├── test.env.example # Test environment template
-│ └── test.env # Actual test keys (gitignored)
-├── reference/ # Third-party reference code (do not modify)
-├── resources/ # Assets embedded in binary
-│ └── templates/ # Templates released to ~/.picobot/ on first run
-├── config.example.json # Full config example
-└── Cargo.toml
+```text
+src/
+ agent/ LLM loop, context compression, system prompts, media handling, sub-agents
+ bus/ Inbound, outbound, and control message queues
+ channels/ CLI chat and Feishu/Lark integrations
+ client/ Ratatui terminal UI
+ config/ Config loading, env substitution, path expansion
+ gateway/ Axum HTTP/WebSocket server and GatewayState wiring
+ mcp/ MCP client connections and tool wrappers
+ memory/ Memory manager and memory types
+ observability/ Agent/tool telemetry observer interfaces
+ providers/ OpenAI-compatible and Anthropic clients
+ scheduler/ Scheduled job runtime
+ session/ Session lifecycle, dialog commands, persistence integration
+ skills/ Skill loading and embedded built-in skill installation
+ storage/ SQLite schema and CRUD
+ tools/ Agent tool implementations
+resources/
+ skills/ Built-in skills embedded at build time
+ templates/ Config, AGENTS.md, and USER.md templates released on first run
+tests/ Unit and ignored integration tests
+reference/ Third-party reference code; do not modify as project source
```
## Key Dependencies
| Crate | Purpose |
|-------|---------|
-| `axum` + `tokio-tungstenite` | HTTP server & WebSocket |
-| `sqlx` (SQLite) | Session/Message/Job persistence |
-| `reqwest` (rustls) | LLM API & external HTTP calls |
-| `ratatui` + `crossterm` | Terminal UI |
-| `clap` | CLI argument parsing |
-| `tracing` + `tracing-subscriber` | Structured logging |
-| `cron` + `chrono-tz` | Cron schedule parsing |
-| `meval` | Mathematical expression evaluation |
-| `uuid` | Session/Dialog ID generation |
-| `dirs` | Platform config directory resolution |
+| `axum`, `tokio`, `tokio-tungstenite` | Gateway and WebSocket runtime |
+| `sqlx` | SQLite persistence |
+| `reqwest` | LLM and HTTP clients |
+| `ratatui`, `crossterm`, `termimad` | Terminal UI |
+| `rmcp` | MCP client support |
+| `fantoccini` | Optional browser automation |
+| `cron`, `chrono-tz` | Scheduling |
+| `jieba-rs` | Chinese tokenization for memory search |
+| `zstd`, `tar` | Embedded built-in skill packaging |
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..3162df5
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,16 @@
+services:
+ picobot:
+ image: picobot:latest
+ container_name: picobot
+ restart: unless-stopped
+ ports:
+ - "19876:19876"
+ volumes:
+ - ~/.picobot/config.json:/app/.picobot/config.json:ro
+ - picobot_data:/app/.picobot
+ environment:
+ - RUST_LOG=info
+ command: gateway
+
+volumes:
+ picobot_data:
diff --git a/resources/skills/about-picobot/SKILL.md b/resources/skills/about-picobot/SKILL.md
index c06e1da..f81bcf4 100644
--- a/resources/skills/about-picobot/SKILL.md
+++ b/resources/skills/about-picobot/SKILL.md
@@ -5,7 +5,7 @@ always: true
---
# About PicoBot
-PicoBot 是一个基于 Rust 的个人 AI 助手,支持多渠道(飞书、CLI)、长记忆、定时任务、Skill 系统等。
+PicoBot 是一个基于 Rust 的个人 AI 助手运行时,包含本地 Gateway、CLI TUI 客户端、飞书渠道、SQLite 会话持久化、长期记忆、定时任务、Skill 系统、MCP 工具接入和子 Agent 委托能力。
## 目录索引
@@ -13,10 +13,10 @@ PicoBot 是一个基于 Rust 的个人 AI 助手,支持多渠道(飞书、CL
| 文件 | 内容 |
|------|------|
-| `references/config.md` | 配置字段详解:providers、models、agents、gateway、memory、channels、mcp |
-| `references/db-schema.md` | 数据库表结构:sessions、messages、memories、scheduled_jobs、llm_calls |
-| `references/architecture.md` | 核心架构:数据流、会话系统、上下文压缩、记忆系统、Skill 优先级机制 |
-| `references/faq.md` | 常见问题:模型切换、渠道添加、Skill 安装、历史查询、定时任务等 |
+| `references/config.md` | 配置字段详解:providers、models、agents、gateway、client、channels、memory、mcp、browser |
+| `references/db-schema.md` | 数据库表结构:sessions、messages、memories、scheduled_jobs、llm_calls、background_tasks |
+| `references/architecture.md` | 核心架构:数据流、会话系统、上下文压缩、记忆系统、Skill 优先级、MCP、子 Agent |
+| `references/faq.md` | 常见问题:模型切换、渠道添加、Skill 安装、历史查询、定时任务、MCP 等 |
| `references/commands.md` | 常用命令:编译、启动网关、启动客户端、运行测试 |
| `assets/config.example.json` | config.json 完整示例 |
diff --git a/resources/skills/about-picobot/assets/config.example.json b/resources/skills/about-picobot/assets/config.example.json
index 92f247e..11e631a 100644
--- a/resources/skills/about-picobot/assets/config.example.json
+++ b/resources/skills/about-picobot/assets/config.example.json
@@ -72,5 +72,15 @@
"timeline_retention_days": 90,
"max_failures_before_degrade": 3
},
+ "mcp": {
+ "servers": [],
+ "tool_timeout_secs": 180
+ },
+ "browser": {
+ "enabled": false,
+ "webdriver_url": "http://127.0.0.1:9515",
+ "headless": true,
+ "chrome_path": null
+ },
"workspace_dir": "~/.picobot/workspace"
}
diff --git a/resources/skills/about-picobot/references/architecture.md b/resources/skills/about-picobot/references/architecture.md
index a38f346..e3aa463 100644
--- a/resources/skills/about-picobot/references/architecture.md
+++ b/resources/skills/about-picobot/references/architecture.md
@@ -17,9 +17,9 @@ Channel → MessageBus → SessionManager → AgentLoop → (tools) → SessionM
| `channels` | 外部集成(飞书、CLI),仅收发消息 |
| `bus` | 异步消息队列,纯队列不路由 |
| `session` | 会话生命周期管理、dialog 操作 |
-| `agent` | LLM 调用循环、工具执行、上下文压缩 |
+| `agent` | LLM 调用循环、工具执行、上下文压缩、媒体处理、子 Agent |
| `providers` | LLM API 客户端(OpenAI 兼容、Anthropic) |
-| `tools` | Agent 工具(bash、文件操作、HTTP、web、get_skill 等) |
+| `tools` | Agent 工具(bash、文件操作、搜索、HTTP、web、browser、memory、delegate 等) |
| `skills` | Skill 加载、管理和 prompt 构建 |
| `storage` | SQLite 持久化 |
| `scheduler` | Cron 作业调度 |
@@ -37,6 +37,8 @@ Channel → MessageBus → SessionManager → AgentLoop → (tools) → SessionM
- AgentLoop 无状态,接收 dialog 事件调用 LLM、执行工具
- Providers 是纯 HTTP 客户端,无 bus/session/channel 感知
- Tools 接收原始参数,返回字符串结果
+- MCP 工具在 Gateway 初始化时连接服务器、发现工具,并包装成普通 Tool 注册到 ToolRegistry
+- 子 Agent 由 `delegate` 工具创建,复用 provider 配置和按需过滤后的工具集;后台任务结果通过 MessageBus 发回原会话
## 关键约束
@@ -45,6 +47,7 @@ Channel → MessageBus → SessionManager → AgentLoop → (tools) → SessionM
- ChannelManager 持有 MessageBus 和所有 channel
- OutboundDispatcher 通过 ChannelManager 路由出站消息
- Config `.env` 加载使用 `unsafe { env::set_var(...) }`
+- `browser` 工具只有在 `browser.enabled=true` 时注册,依赖 Chrome/Chromium 与 WebDriver
## 上下文压缩
@@ -192,3 +195,48 @@ LLM 对话上下文接近 token 限制 (默认 128K × 70%) 时自动触发压
| 有压缩历史时 | `HistorySection` 提示 LLM 使用 `timeline_recall` |
| 压缩完成后 | 摘要自动存储为 Timeline 记忆 |
| 空闲时 | 可配置自动 consolidation(`idle_consolidation_minutes`) |
+
+---
+
+## MCP 工具集成
+
+Gateway 初始化时读取 `config.mcp.servers`:
+
+1. 按服务器配置连接 `stdio`、`sse` 或 `streamable-http` 传输
+2. 调用 MCP `list_tools`
+3. 将每个 MCP tool 包装为 `McpToolWrapper`
+4. 注册到当前 session 的 `ToolRegistry`
+
+`/mcp` 斜杠命令会显示 MCP 服务器连接状态和工具列表。
+
+---
+
+## 子 Agent / delegate
+
+`delegate` 工具用于把独立任务交给子 Agent:
+
+| 模式 | 行为 |
+|------|------|
+| `inline` | 当前轮阻塞等待子 Agent 返回 |
+| `background` | 后台运行,完成后通过原 channel/chat 通知 |
+| `parallel` | 多个子 Agent 并发执行并聚合结果 |
+
+默认工具集是只读工具:`file_read`、`file_search`、`content_search`、`web_fetch`、`http_request`、`calculator`。调用时可通过 `allowed_tools` 显式放开其他工具。后台任务会写入 `background_tasks` 表,默认 24 小时后清理。
+
+---
+
+## 当前斜杠命令
+
+| 命令 | 说明 |
+|------|------|
+| `/new` | 创建新对话 |
+| `/sessions` | 列出最近对话 |
+| `/switch ` | 切换到指定对话 |
+| `/rename ` | 重命名当前对话 |
+| `/delete` | 删除当前对话 |
+| `/compact` | 手动触发上下文压缩 |
+| `/info` | 显示当前对话信息 |
+| `/dump` | 保存当前对话为 markdown |
+| `/?`, `/help` | 显示帮助 |
+| `/mcp` | 显示 MCP 状态 |
+| `/stop` | 停止当前任务并清空消息队列 |
diff --git a/resources/skills/about-picobot/references/config.md b/resources/skills/about-picobot/references/config.md
index e9c3c6a..522f0a9 100644
--- a/resources/skills/about-picobot/references/config.md
+++ b/resources/skills/about-picobot/references/config.md
@@ -14,8 +14,9 @@
"client": {}, // 客户端配置
"channels": {}, // 渠道配置
"memory": {}, // 记忆系统配置
- "workspace_dir": // 工作目录,默认 ~/.picobot/workspace
- "mcp": {} // MCP 服务器配置
+ "workspace_dir": "", // 工作目录,默认 ~/.picobot/workspace
+ "mcp": {}, // MCP 服务器配置
+ "browser": {} // 可选浏览器自动化配置
}
```
@@ -57,8 +58,17 @@
| `session_ttl_hours` | int | - | 会话过期小时数 |
| `session_db_path` | string | - | SQLite 数据库路径,默认在 workspace 下 |
| `cleanup_interval_minutes` | int | - | 清理间隔 |
+| `max_concurrent_background_tasks` | int | 10 | delegate 后台子任务最大并发数 |
| `scheduler` | object | - | 调度器配置 |
+### gateway.scheduler 字段
+
+| 字段 | 类型 | 默认 | 说明 |
+|------|------|------|------|
+| `enabled` | bool | true | 是否启动调度器并注册 cron 工具 |
+| `poll_interval_secs` | int | 60 | 检查到期任务的轮询间隔 |
+| `max_concurrent` | int | 1 | 最大并发任务数,当前实现预留 |
+
## memory 字段
| 字段 | 类型 | 默认 | 说明 |
@@ -94,8 +104,21 @@ MCP 服务器单条配置:
| 字段 | 说明 |
|------|------|
| `name` | 服务器名称 |
-| `transport` | 传输方式: `Stdio`、`Sse`、`streamable-http` |
-| `command` | 启动命令(Stdio 模式) |
+| `transport` | 传输方式: `stdio`、`sse`、`streamable-http` |
+| `command` | 启动命令(stdio 模式) |
| `args` | 命令参数 |
-| `url` | URL(Sse / streamable-http 模式) |
+| `env` | 子进程环境变量 |
+| `url` | URL(sse / streamable-http 模式) |
+| `headers` | HTTP 传输额外请求头 |
| `tool_timeout_secs` | 单独的超时设置 |
+
+## browser 字段
+
+浏览器工具默认关闭,开启后注册 `browser` 工具。依赖 Chrome/Chromium 与 chromedriver/WebDriver。
+
+| 字段 | 类型 | 默认 | 说明 |
+|------|------|------|------|
+| `enabled` | bool | false | 是否启用浏览器工具 |
+| `webdriver_url` | string | http://127.0.0.1:9515 | WebDriver 服务地址 |
+| `headless` | bool | true | 是否无头运行 |
+| `chrome_path` | string | - | 自定义 Chrome/Chromium 路径 |
diff --git a/resources/skills/about-picobot/references/db-schema.md b/resources/skills/about-picobot/references/db-schema.md
index 39e1cc0..ec1b03b 100644
--- a/resources/skills/about-picobot/references/db-schema.md
+++ b/resources/skills/about-picobot/references/db-schema.md
@@ -36,6 +36,28 @@
| `tool_calls` | TEXT | 工具调用参数 JSON |
| `source` | TEXT | 消息来源(跨会话消息时标记来源 session_id) |
| `created_at` | INTEGER | 创建时间(unix 秒) |
+| `reasoning_content` | TEXT | provider 返回的推理内容(如有) |
+
+## background_tasks 表
+
+delegate 后台子任务表。`session_id` 不使用数据库外键,因为 session 使用软删除,关联关系由应用层维护。
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `id` | TEXT PK | 后台任务 ID |
+| `session_id` | TEXT | 所属会话 |
+| `channel` | TEXT | 回传渠道 |
+| `chat_id` | TEXT | 回传目标对话 |
+| `prompt` | TEXT | 子任务提示 |
+| `allowed_tools` | TEXT | 允许工具 JSON |
+| `status` | TEXT | pending / running / completed / failed / cancelled |
+| `result` | TEXT | 执行结果 |
+| `error` | TEXT | 错误信息 |
+| `tool_calls_count` | INTEGER | 工具调用次数 |
+| `iterations` | INTEGER | Agent 迭代次数 |
+| `started_at` | INTEGER | 开始时间 |
+| `finished_at` | INTEGER | 结束时间 |
+| `created_at` | INTEGER | 创建时间 |
## memories 表
diff --git a/resources/skills/about-picobot/references/tools.md b/resources/skills/about-picobot/references/tools.md
index 76b1b4e..b1a97f6 100644
--- a/resources/skills/about-picobot/references/tools.md
+++ b/resources/skills/about-picobot/references/tools.md
@@ -124,9 +124,51 @@
---
-## file_read / file_write / file_edit / file_search — 文件操作
+## delegate — 子 Agent 委托
-工作目录内的文件读写编辑和搜索。详细的参数定义见各工具的 parameters_schema。
+创建子 Agent 处理独立任务。
+
+| 参数 | 必填 | 说明 |
+|------|------|------|
+| `action` | 是 | `run`, `check_task`, `cancel_task`, `list_tasks` |
+| `prompt` | run 必填 | 子任务描述 |
+| `mode` | 否 | `inline`, `background`, `parallel`,默认 `inline` |
+| `allowed_tools` | 否 | 子 Agent 可用工具列表;默认只读工具集 |
+| `max_iterations` | 否 | 最大迭代次数,默认 99 |
+| `timeout_secs` | 否 | 超时秒数,默认 3600 |
+| `tasks` | parallel 必填 | 并行子任务数组 |
+| `task_id` | 查询/取消必填 | 后台任务 ID |
+
+默认只读工具集:`file_read`、`file_search`、`content_search`、`web_fetch`、`http_request`、`calculator`。
+
+---
+
+## browser — 浏览器自动化
+
+仅在 `browser.enabled=true` 时注册。底层使用 WebDriver/Chrome。
+
+| action | 说明 |
+|--------|------|
+| `open` | 打开 URL |
+| `snapshot` | 获取页面结构快照 |
+| `click`, `click_at` | 点击元素或坐标 |
+| `fill`, `type`, `press` | 输入文本或按键 |
+| `get_text`, `get_title`, `get_url` | 读取页面信息 |
+| `screenshot` | 截图,可写入文件或返回 base64 |
+| `focus`, `hover`, `scroll`, `wait` | 常见交互和等待 |
+| `close` | 关闭浏览器会话 |
+
+---
+
+## MCP 工具
+
+如果 `config.mcp.servers` 配置了 MCP 服务器,Gateway 启动时会连接服务器、发现工具,并把 MCP 工具包装后注册到 ToolRegistry。使用 `/mcp` 查看当前连接状态和工具列表。
+
+---
+
+## file_read / file_write / file_edit / file_search / content_search — 文件操作和搜索
+
+工作目录内的文件读写编辑、文件名搜索和内容搜索。详细的参数定义见各工具的 parameters_schema。
## bash — 执行命令
diff --git a/resources/templates/config.example.json b/resources/templates/config.example.json
index 92f247e..11e631a 100644
--- a/resources/templates/config.example.json
+++ b/resources/templates/config.example.json
@@ -72,5 +72,15 @@
"timeline_retention_days": 90,
"max_failures_before_degrade": 3
},
+ "mcp": {
+ "servers": [],
+ "tool_timeout_secs": 180
+ },
+ "browser": {
+ "enabled": false,
+ "webdriver_url": "http://127.0.0.1:9515",
+ "headless": true,
+ "chrome_path": null
+ },
"workspace_dir": "~/.picobot/workspace"
}