系统提示词精简，拆分固定部分与可变部分，固化工具skill顺序，提升缓存命中率

2026-06-17 23:25:28 +08:00 · 2026-06-17 23:25:28 +08:00 · fdd7f47305
commit fdd7f47305
parent f08bf85b37
9 changed files with 432 additions and 235 deletions
--- a/src/agent/agent_loop.rs
+++ b/src/agent/agent_loop.rs
@ -427,14 +427,8 @@ impl AgentLoop {
        // Build and inject system prompt if not present
        let has_system = messages.first().is_some_and(|m| m.role == "system");
        if !has_system {
-            let system_prompt = build_system_prompt(
+            let system_prompt =
-                &self.workspace_dir,
+                build_system_prompt(&self.workspace_dir, &self.model_name, &self.tools);
                &self.model_name,
                &self.tools,
                None,
                None,
                false,
            );
            #[cfg(debug_assertions)]
            tracing::debug!("System prompt injected:\n{}", system_prompt);
            messages.insert(0, ChatMessage::system(system_prompt));
--- a/src/agent/context_compressor.rs
+++ b/src/agent/context_compressor.rs
@ -719,6 +719,9 @@ mod tests {
                    prompt_tokens: 0,
                    completion_tokens: 0,
                    total_tokens: 0,
                    cached_tokens: None,
                    cache_read_input_tokens: None,
                    cache_creation_input_tokens: None,
                },
            })
        }
--- a/src/agent/system_prompt.rs
+++ b/src/agent/system_prompt.rs
@ -16,11 +16,6 @@ pub struct PromptContext<'a> {
    pub workspace_dir: &'a Path,
    pub model_name: &'a str,
    pub tools: &'a ToolRegistry,
    pub session_id: Option<&'a str>,
    /// Pre-fetched memory context string to inject.
    pub memory_context: Option<&'a str>,
    /// Whether this session has compressed history available via timeline_recall.
    pub has_compressed_history: bool,
 }
 /// Trait for system prompt sections.
@ -43,14 +38,14 @@ impl SystemPromptBuilder {
                Box::new(AgentProfileSection),
                Box::new(UserProfileSection),
                Box::new(RuntimeSection),
                Box::new(DateTimeSection),
                Box::new(WorkspaceSection),
                Box::new(YourTaskSection),
                Box::new(DecisionOrderSection),
                Box::new(ToolHonestySection),
                Box::new(ToolUsageSection),
                Box::new(SafetySection),
                Box::new(CrossChannelSection),
                Box::new(MemorySection),
                Box::new(HistorySection),
                Box::new(DelegationSection),
            ],
        }
@ -72,7 +67,6 @@ impl SystemPromptBuilder {
            Box::new(SafetySection),
            Box::new(SubAgentToolsSection { http_get_only }),
            Box::new(WorkspaceSection),
            Box::new(DateTimeSection),
        ];
        if let Some(sp) = skills_prompt {
            sections.push(Box::new(SubAgentSkillsSection { skills_prompt: sp }));
@ -114,9 +108,27 @@ impl PromptSection for ToolHonestySection {
    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        "## 关键规则：工具诚实性
- 绝对不要编造、虚构或猜测工具结果。如果工具返回空结果，说\"没有找到结果\"。
+- 绝对不要编造、虚构或猜测工具结果。
- 如果工具调用失败，报告错误——绝不要编造数据来填补空白。
+- 如果工具返回空结果，说\"没有找到结果\"；如果工具失败，直接报告错误。
- 当不确定工具调用是否成功时，询问用户而不是猜测。"
+- 不确定时先询问或再试一次，不要用猜测补空白。"
            .to_string()
    }
 }
 /// Tool calls should stay invisible to the user.
 pub struct ToolUsageSection;
 impl PromptSection for ToolUsageSection {
    fn name(&self) -> &str {
        "tool_usage"
    }
    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        "## 工具使用方式
 - 不要向用户解释你正在调用什么工具，也不要输出工具调用过程。
 - 需要行动时直接使用工具；完成后只给结果。
 - 只有在确实缺少信息、且记忆和上下文都不足时，才向用户提问。"
            .to_string()
    }
 }
@ -132,10 +144,29 @@ impl PromptSection for YourTaskSection {
    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        "## 你的任务
-当用户发送消息时，立即行动。使用工具来完成他们的请求。尽你所有能力利用已有的工具或者skill来完成目标。
+当用户发送消息时，先判断能否直接回答；需要行动时立即使用工具或 skill。
-不要：总结此配置、描述你的能力、用元评论回复、或输出逐步指令。
+- 直接回答能答的问题，不要为了显得“在工作”而套流程。
-而是：在需要时直接使用工具，完成后给出最终答案。
+- 不要总结这份配置、描述能力、输出元评论，或把任务拆成教学步骤。
-如果任务执行的过程中缺少必要的信息，尝试检索记忆，找不到就询问用户，最好一次性问清楚所有需要的信息。"
+- 如果缺少关键信息，先查记忆和历史；仍然不足时，一次性把需要的信息问清楚。"
            .to_string()
    }
 }
 /// Explicit decision order for real user scenarios.
 pub struct DecisionOrderSection;
 impl PromptSection for DecisionOrderSection {
    fn name(&self) -> &str {
        "decision_order"
    }
    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        "## 决策顺序
 遇到真实用户请求时，按这个顺序判断：
 1. 直接回答：如果问题只需要你已有的对话上下文、已知规则或当前消息就能回答，直接答，不要调用工具。
 2. 使用工具：如果需要查文件、查记忆、查历史、联网、执行命令或调用其他外部能力，先用最少必要工具拿到结果。
 3. 追问用户：只有当缺少的信息会影响正确执行，且记忆/历史/工具都无法补足时，再问用户，而且尽量一次问全。"
            .to_string()
    }
 }
@ -229,24 +260,6 @@ impl PromptSection for AgentProfileSection {
    }
 }
 /// Current date and time.
 pub struct DateTimeSection;
 impl PromptSection for DateTimeSection {
    fn name(&self) -> &str {
        "datetime"
    }
    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        let now = chrono::Local::now();
        format!(
            "## 当前日期与时间\n\n{} ({})",
            now.format("%Y-%m-%d %H:%M:%S"),
            now.format("%Z")
        )
    }
 }
 /// Cross-channel messaging and system notification guidance for LLM.
 pub struct CrossChannelSection;
@ -255,49 +268,14 @@ impl PromptSection for CrossChannelSection {
        "cross_channel"
    }
-    fn build(&self, ctx: &PromptContext<'_>) -> String {
+    fn build(&self, _ctx: &PromptContext<'_>) -> String {
-        let session_line = if let Some(id) = ctx.session_id {
+        "## 关于会话和跨渠道消息
            format!("当前会话的 ID 是 `{}`。\n", id)
        } else {
            String::new()
        };
-        format!(
+- `[message from X]` 前缀表示消息来自其他会话或工具，不要当作当前用户的新意图。
-            r#"## 关于会话和跨渠道消息
+- 需要跨会话发送内容时，使用 `send_message`，`target_chat_id` 格式为 `<channel>:<chat_id>` 或 `<channel>:<chat_id>:<dialog_id>`。
-
+- 需要查看会话列表或更早历史时，使用 `chat_manager`，不要凭记忆猜测。
-### 会话 ID 格式
+- `chat_manager` 的 `list_messages` 支持数量和时间范围过滤。"
-每个会话都有唯一的 session ID，由三部分组成：<channel>:<chat_id>:<dialog_id>
+            .to_string()
 - channel: 消息渠道（如 "cli_chat"、"feishu"）
 - chat_id: 聊天/群组标识
 - dialog_id: 对话标识，同一 chat 下可以有多个 dialog
 {}### 跨会话消息
 对话历史中可能出现带有 `[message from X]` 前缀的 assistant 消息，
 表示此消息由 send_message 工具从别处发送过来。
 - X: 来源标识，可能是会话 ID、工具名或其他标识字符串；未指定时为 "unknown"
 收到此类消息时一般不需要主动处理，只需知晓。如果用户问及相关信息，
 可以尝试从来源处获取更多详情。
 ### send_message 工具
 向指定会话发送消息。参数：
 - target_chat_id: 格式 <channel>:<chat_id> 或 <channel>:<chat_id>:<dialog_id>
 - content: 消息内容
 ### chat_manager 工具
 管理会话和查看消息。参数：
 - action = "list_sessions" — 列出全部会话，支持通过 offset/count 翻页
 - action = "list_channels" — 列出所有可用渠道
 - action = "list_messages" — 查看指定 session 的历史消息，支持以下参数：
  - session_id (必填): 会话 ID
  - count (可选): 返回数量，默认 20，最大 100
  - offset (可选): 跳过前 N 条，用于翻页查看更早历史，默认 0
  - before_time (可选): Unix 时间戳（秒），只返回该时间之前的消息
  - after_time (可选): Unix 时间戳（秒），只返回该时间之后的消息
 当用户要求回顾历史、查找之前的消息、或你记不清之前的对话内容时，可以使用此工具的 list_messages 动作，通过调整 offset 或指定时间范围来查询具体的历史消息。"#,
            session_line
        )
    }
 }
@ -310,13 +288,8 @@ impl PromptSection for RuntimeSection {
    }
    fn build(&self, ctx: &PromptContext<'_>) -> String {
        let host = hostname::get()
            .map(|h| h.to_string_lossy().to_string())
            .unwrap_or_else(|_| "unknown".to_string());
        format!(
-            "## 运行环境\n\n主机: {} | 操作系统: {} | 模型: {}",
+            "## 运行环境\n\n使用的模型是 `{}`。所有文件操作都应默认针对当前工作目录。",
            host,
            std::env::consts::OS,
            ctx.model_name
        )
    }
@ -330,47 +303,16 @@ impl PromptSection for MemorySection {
        "memory"
    }
-    fn build(&self, ctx: &PromptContext<'_>) -> String {
+    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        let guide = r#"## 记忆系统
-### 记忆类别
+- **Knowledge（知识）**：长期存储的事实、偏好、模式、洞察。
- **Knowledge（知识）**：长期存储的事实、偏好、模式、洞察。会被注入到每轮系统提示词中。
+- **Timeline（时间线）**：历史会话摘要，可通过 `timeline_recall` 主动召回。
- **Timeline（时间线）**：历史会话摘要，可通过 timeline_recall 工具主动召回。
+- **memory_recall**：查找知识记忆。
-
+- **timeline_recall**：查看历史会话摘要。
-### 记忆工具
+- 记忆只作为参考，不要覆盖当前用户输入或已确认的上下文。
- **memory_recall**：搜索知识记忆。参数 query 是关键词列表（空格分隔），返回相关事实、偏好、洞察。
+- 适合写入记忆的内容：稳定偏好、关键项目事实、重要决策、值得复用的经验。"#;
- **timeline_recall**：搜索历史会话摘要。可选 session_id 参数限定特定会话。
+        guide.to_string()
 ### 主动记忆
 遇到以下情况时应主动使用 `memory_store` 记忆：
 - 用户明确表达的偏好（如编程语言、工具选择）
 - 重要的项目事实（如使用的框架、架构决策）
 - 值得记录的经验和教训
 "#;
        match ctx.memory_context {
            Some(context) if !context.is_empty() => {
                format!("{}\n\n### 记忆上下文\n\n{}", guide, context)
            }
            _ => guide.to_string(),
        }
    }
 }
 /// Prompt agent to use timeline_recall if compressed history exists.
 pub struct HistorySection;
 impl PromptSection for HistorySection {
    fn name(&self) -> &str {
        "history"
    }
    fn build(&self, ctx: &PromptContext<'_>) -> String {
        if ctx.has_compressed_history {
            "## 历史会话\n之前的对话摘要已归档。如需回顾历史上下文，使用 `timeline_recall` 工具搜索。".to_string()
        } else {
            String::new()
        }
    }
 }
@ -384,37 +326,12 @@ impl PromptSection for DelegationSection {
    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        "## 子 Agent 委托原则\n\n\
-         当任务复杂需要拆解时，使用 delegate 工具创建子 Agent：\n\
+         - 只有当任务可以拆成独立子任务时才委托。\n\
-         \n\
+         - 子 Agent 只拿完成任务所需的最小工具集。\n\
-         ### 何时委托\n\
+         - 永远不要把 delegate 工具再分给子 Agent。\n\
-         - 多个独立子任务可以并行处理时（使用 mode=\"parallel\"）\n\
+         - 子任务 prompt 要直接写清目标、输出格式和限制。\n\
-         - 长时间运行的任务需要后台执行时（使用 mode=\"background\"）\n\
+         - 并行任务彼此不能依赖，长期任务用 background。"
-         - 需要以不同权限（受限工具集）执行时\n\
+            .to_string()
         \n\
         ### 工具分配原则\n\
         - **最小权限**：只给子 Agent 完成其任务所需的最少工具\n\
         - **只读优先**：如果可以只用 file_read、file_search、web_fetch 完成，不要给写权限（bash、file_write、file_edit）\n\
         - **禁止递归**：永远不要把 delegate 工具分配给子 Agent\n\
         - **明确边界**：每个子 Agent 只负责一个清晰、独立的子任务\n\
         \n\
         ### Skill 分配原则\n\
         - 如果子任务的领域有对应的 skill，在 allowed_tools 中加入 get_skill\n\
         - 在任务 prompt 中明确告诉子 Agent 使用 get_skill 加载哪个技能\n\
         - 例如：\"使用 get_skill action='get' skill_name='pdf' 加载 PDF 处理技能后完成任务\"\n\
         \n\
         ### 任务描述\n\
         - 任务 prompt 要清晰、具体、有明确输出要求\n\
         - 如需额外约束，直接写在 prompt 中（例如：\"跳过 .tmp 文件\"）\n\
         - 明确说明期望的输出格式\n\
         \n\
         ### 并行模式\n\
         - 多个无依赖的子任务使用 mode=\"parallel\"，任务定义在 tasks 数组中\n\
         - 并行任务之间不应有数据依赖\n\
         - 并行任务数建议不超过 5 个\n\
         \n\
         ### 后台模式\n\
         - 预计执行时间超过 30s 的任务使用 mode=\"background\"\n\
         - 后台任务有全局并发上限，如果失败提示用户稍后重试".to_string()
    }
 }
@ -434,16 +351,16 @@ impl PromptSection for SubAgentIdentitySection {
    fn build(&self, _ctx: &PromptContext<'_>) -> String {
        format!(
            "## 子 Agent\n\n\
-             你是主 Agent 派出的子 Agent，负责完成一个具体任务。你的最终回复将汇报给主 Agent。\n\
+             你只负责完成一个具体任务，结果会汇报给主 Agent。\n\
             \n\
             ## 任务\n\n\
             {}\n\
             \n\
             ## 规则\n\
-             - 只专注于上述任务，不要探索无关话题\n\
+             - 只专注于这个任务，不要扩展到无关话题\n\
             - 只在必要时使用工具\n\
-             - 不要使用 delegate 工具（禁止递归委托）\n\
+             - 不要使用 delegate 工具\n\
-             - 如果任务无法完成，清楚说明原因\n\
+             - 无法完成时，直接说明原因\n\
             - 只返回最终结果，不要描述过程\n\
             - 超时：{}，接近时限时返回部分结果",
            self.task, self.timeout,
@ -525,25 +442,41 @@ fn load_file_from_dir(dir: &Path, filename: &str, max_chars: usize) -> Option<St
 }
 /// Build a complete system prompt with default configuration.
-pub fn build_system_prompt(
+pub fn build_system_prompt(workspace_dir: &Path, model_name: &str, tools: &ToolRegistry) -> String {
    workspace_dir: &Path,
    model_name: &str,
    tools: &ToolRegistry,
    session_id: Option<&str>,
    memory_context: Option<&str>,
    has_compressed_history: bool,
 ) -> String {
    let ctx = PromptContext {
        workspace_dir,
        model_name,
        tools,
        session_id,
        memory_context,
        has_compressed_history,
    };
    SystemPromptBuilder::with_defaults().build(&ctx)
 }
 /// Build a runtime context tail that should be appended to the latest user message.
 pub fn build_runtime_context(session_id: Option<&str>, memory_context: Option<&str>) -> String {
    let mut sections = Vec::new();
    let now = chrono::Local::now();
    sections.push(format!(
        "## 运行时上下文\n\n- 当前日期与时间: {} ({})",
        now.format("%Y-%m-%d %H:%M:%S"),
        now.format("%Z")
    ));
    if let Some(id) = session_id {
        sections.push(format!("- 会话 ID: `{}`", id));
    }
    if let Some(context) = memory_context.filter(|s| !s.trim().is_empty()) {
        sections.push(format!("### 记忆上下文\n\n{}", context));
    }
    if sections.is_empty() {
        String::new()
    } else {
        sections.join("\n")
    }
 }
 /// Build a system prompt for a sub-agent with all relevant operational sections.
 pub fn build_sub_agent_system_prompt(
    task: &str,
@ -558,9 +491,6 @@ pub fn build_sub_agent_system_prompt(
        workspace_dir,
        model_name,
        tools,
        session_id: None,
        memory_context: None,
        has_compressed_history: false,
    };
    SystemPromptBuilder::with_sub_agent_defaults(task, timeout_human, skills_prompt, http_get_only)
        .build(&ctx)
@ -579,9 +509,6 @@ mod tests {
            workspace_dir: &temp_dir,
            model_name: "test-model",
            tools: &tools,
            session_id: None,
            memory_context: None,
            has_compressed_history: false,
        };
        let prompt = SystemPromptBuilder::with_defaults().build(&ctx);
@ -589,7 +516,6 @@ mod tests {
        assert!(prompt.contains("## 关键规则：工具诚实性"));
        assert!(prompt.contains("## 安全规则"));
        assert!(prompt.contains("## 工作目录"));
        assert!(prompt.contains("## 当前日期与时间"));
        assert!(prompt.contains("## 运行环境"));
    }
@ -611,46 +537,58 @@ mod tests {
        let temp_dir = std::env::temp_dir();
        let tools = ToolRegistry::new();
-        let prompt = build_system_prompt(&temp_dir, "test-model", &tools, None, None, false);
+        let prompt = build_system_prompt(&temp_dir, "test-model", &tools);
        assert!(!prompt.is_empty());
        assert!(prompt.contains("test-model"));
    }
    #[test]
-    fn test_memory_section_with_context() {
+    fn test_prompt_contains_decision_order_section() {
        let temp_dir = std::env::temp_dir();
        let tools = ToolRegistry::new();
-        let ctx = PromptContext {
+        let prompt = build_system_prompt(&temp_dir, "test-model", &tools);
            workspace_dir: &temp_dir,
            model_name: "test",
            tools: &tools,
            session_id: None,
            memory_context: Some("- user_pref: Prefers Rust"),
            has_compressed_history: false,
        };
-        let prompt = SystemPromptBuilder::with_defaults().build(&ctx);
+        assert!(prompt.contains("## 决策顺序"));
-        assert!(prompt.contains("## 记忆上下文"));
+        assert!(prompt.contains("直接回答"));
        assert!(prompt.contains("使用工具"));
        assert!(prompt.contains("追问用户"));
    }
    #[test]
    fn test_build_system_prompt_is_stable_across_calls() {
        let temp_dir = std::env::temp_dir();
        let tools = ToolRegistry::new();
        let prompt_a = build_system_prompt(&temp_dir, "test-model", &tools);
        let prompt_b = build_system_prompt(&temp_dir, "test-model", &tools);
        assert_eq!(prompt_a, prompt_b);
    }
    #[test]
    fn test_runtime_context_with_memory() {
        let temp_dir = std::env::temp_dir();
        let tools = ToolRegistry::new();
        let _ = (temp_dir, tools);
        let prompt = build_runtime_context(Some("session-123"), Some("- user_pref: Prefers Rust"));
        assert!(prompt.contains("## 运行时上下文"));
        assert!(prompt.contains("session-123"));
        assert!(prompt.contains("Prefers Rust"));
    }
    #[test]
-    fn test_memory_section_without_context() {
+    fn test_runtime_context_without_memory() {
        let temp_dir = std::env::temp_dir();
        let tools = ToolRegistry::new();
-        let ctx = PromptContext {
+        let _ = (temp_dir, tools);
            workspace_dir: &temp_dir,
            model_name: "test",
            tools: &tools,
            session_id: None,
            memory_context: None,
            has_compressed_history: false,
        };
-        let prompt = SystemPromptBuilder::with_defaults().build(&ctx);
+        let prompt = build_runtime_context(None, None);
-        assert!(!prompt.contains("## 记忆上下文"));
+        assert!(prompt.contains("## 运行时上下文"));
        assert!(prompt.contains("当前日期与时间"));
    }
 }
--- a/src/providers/anthropic.rs
+++ b/src/providers/anthropic.rs
@ -12,12 +12,34 @@ use std::sync::Arc;
 const LLM_REQUEST_TIMEOUT_SECS: u64 = 300;
-fn convert_content_blocks(blocks: &[ContentBlock]) -> Vec<serde_json::Value> {
+#[derive(Serialize)]
 struct CacheControl {
    #[serde(rename = "type")]
    cache_type: String,
 }
 impl CacheControl {
    fn ephemeral() -> Self {
        Self {
            cache_type: "ephemeral".to_string(),
        }
    }
 }
 fn convert_content_blocks(blocks: &[ContentBlock], cacheable: bool) -> Vec<serde_json::Value> {
    blocks
        .iter()
        .map(|b| match b {
            ContentBlock::Text { text } => {
-                serde_json::json!({ "type": "text", "text": text })
+                if cacheable {
                    serde_json::json!({
                        "type": "text",
                        "text": text,
                        "cache_control": CacheControl::ephemeral(),
                    })
                } else {
                    serde_json::json!({ "type": "text", "text": text })
                }
            }
            ContentBlock::ImageUrl { image_url } => convert_image_url_to_anthropic(&image_url.url),
        })
@ -120,6 +142,8 @@ struct AnthropicTool {
    name: String,
    description: String,
    input_schema: serde_json::Value,
    #[serde(skip_serializing_if = "Option::is_none")]
    cache_control: Option<CacheControl>,
 }
 #[derive(Deserialize)]
@ -161,6 +185,10 @@ struct AnthropicUsage {
    input_tokens: u32,
    #[serde(default)]
    output_tokens: u32,
    #[serde(default)]
    cache_read_input_tokens: Option<u32>,
    #[serde(default)]
    cache_creation_input_tokens: Option<u32>,
 }
 #[async_trait]
@ -180,6 +208,7 @@ impl LLMProvider for AnthropicProvider {
                    name: t.function.name.clone(),
                    description: t.function.description.clone(),
                    input_schema: t.function.parameters.clone(),
                    cache_control: Some(CacheControl::ephemeral()),
                })
                .collect()
        });
@ -213,7 +242,7 @@ impl LLMProvider for AnthropicProvider {
                            "content": output,
                        })]
                    } else {
-                        let mut blocks = convert_content_blocks(&m.content);
+                        let mut blocks = convert_content_blocks(&m.content, m.role == "system");
                        // Append tool_use blocks from assistant messages with tool calls
                        if let Some(tool_calls) = m.tool_calls.as_ref().filter(|c| !c.is_empty()) {
                            for tc in tool_calls {
@ -369,6 +398,18 @@ impl LLMProvider for AnthropicProvider {
                    .as_ref()
                    .map(|u| u.input_tokens + u.output_tokens)
                    .unwrap_or(0),
                cached_tokens: anthropic_resp
                    .usage
                    .as_ref()
                    .and_then(|u| u.cache_read_input_tokens),
                cache_read_input_tokens: anthropic_resp
                    .usage
                    .as_ref()
                    .and_then(|u| u.cache_read_input_tokens),
                cache_creation_input_tokens: anthropic_resp
                    .usage
                    .as_ref()
                    .and_then(|u| u.cache_creation_input_tokens),
            },
        };
@ -400,3 +441,39 @@ impl LLMProvider for AnthropicProvider {
        &self.model_id
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use serde_json::json;
    #[test]
    fn test_convert_content_blocks_adds_cache_control_for_system_text() {
        let blocks = vec![ContentBlock::text("hello")];
        let serialized = convert_content_blocks(&blocks, true);
        assert_eq!(serialized[0]["type"], "text");
        assert_eq!(serialized[0]["cache_control"]["type"], "ephemeral");
    }
    #[test]
    fn test_convert_content_blocks_leaves_user_text_uncached() {
        let blocks = vec![ContentBlock::text("hello")];
        let serialized = convert_content_blocks(&blocks, false);
        assert!(serialized[0].get("cache_control").is_none());
    }
    #[test]
    fn test_anthropic_tool_serializes_cache_control() {
        let tool = AnthropicTool {
            name: "alpha".to_string(),
            description: "desc".to_string(),
            input_schema: json!({}),
            cache_control: Some(CacheControl::ephemeral()),
        };
        let value = serde_json::to_value(tool).unwrap();
        assert_eq!(value["cache_control"]["type"], "ephemeral");
    }
 }
--- a/src/providers/openai.rs
+++ b/src/providers/openai.rs
@ -188,6 +188,16 @@ struct OpenAIUsage {
    completion_tokens: u32,
    #[serde(default)]
    total_tokens: u32,
    #[serde(default)]
    cached_tokens: Option<u32>,
    #[serde(default)]
    prompt_tokens_details: Option<OpenAIPromptTokensDetails>,
 }
 #[derive(Deserialize, Default)]
 struct OpenAIPromptTokensDetails {
    #[serde(default)]
    cached_tokens: Option<u32>,
 }
 #[async_trait]
@ -332,6 +342,12 @@ impl LLMProvider for OpenAIProvider {
            })
            .collect();
        let usage = openai_resp.usage;
        let nested_cached_tokens = usage
            .prompt_tokens_details
            .as_ref()
            .and_then(|d| d.cached_tokens);
        let cached_tokens = nested_cached_tokens.or(usage.cached_tokens);
        let response = ChatCompletionResponse {
            id: openai_resp.id,
            model: openai_resp.model,
@ -339,9 +355,12 @@ impl LLMProvider for OpenAIProvider {
            reasoning_content: first_choice.message.reasoning_content,
            tool_calls,
            usage: Usage {
-                prompt_tokens: openai_resp.usage.prompt_tokens,
+                prompt_tokens: usage.prompt_tokens,
-                completion_tokens: openai_resp.usage.completion_tokens,
+                completion_tokens: usage.completion_tokens,
-                total_tokens: openai_resp.usage.total_tokens,
+                total_tokens: usage.total_tokens,
                cached_tokens: cached_tokens,
                cache_read_input_tokens: None,
                cache_creation_input_tokens: None,
            },
        };
@ -463,4 +482,39 @@ mod tests {
        assert!(message.tool_calls.is_empty());
        assert_eq!(response.usage.total_tokens, 11806);
    }
    #[test]
    fn test_decode_response_exposes_cached_tokens() {
        let text = r#"{
            "id": "d21abaa6552741949e2aba76bde59359",
            "choices": [{
                "finish_reason": "stop",
                "index": 0,
                "message": {
                    "content": "你好！",
                    "role": "assistant",
                    "tool_calls": null
                }
            }],
            "created": 1781622889,
            "model": "mimo-v2.5",
            "object": "chat.completion",
            "usage": {
                "completion_tokens": 65,
                "prompt_tokens": 11741,
                "total_tokens": 11806,
                "prompt_tokens_details": {"cached_tokens": 1200}
            }
        }"#;
        let response: OpenAIResponse = serde_json::from_str(text).unwrap();
        assert_eq!(
            response
                .usage
                .prompt_tokens_details
                .as_ref()
                .and_then(|d| d.cached_tokens),
            Some(1200)
        );
    }
 }
--- a/src/providers/traits.rs
+++ b/src/providers/traits.rs
@ -121,6 +121,12 @@ pub struct Usage {
    pub prompt_tokens: u32,
    pub completion_tokens: u32,
    pub total_tokens: u32,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cached_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cache_read_input_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cache_creation_input_tokens: Option<u32>,
 }
 #[async_trait]
--- a/src/session/session.rs
+++ b/src/session/session.rs
@ -29,7 +29,7 @@ pub enum HandleResult {
    AgentProcessing,
 }
 use crate::agent::context_compressor::ContextCompressionConfig;
-use crate::agent::system_prompt::build_system_prompt;
+use crate::agent::system_prompt::{build_runtime_context, build_system_prompt};
 use crate::agent::{AgentError, AgentLoop, ContextCompressor};
 use crate::channels::slash_command::parse_slash_command;
 use crate::config::BrowserConfig;
@ -472,6 +472,18 @@ impl Session {
        }
    }
    fn append_runtime_context_to_user_message(message: &mut ChatMessage, runtime_context: &str) {
        if runtime_context.trim().is_empty() {
            return;
        }
        if message.content.trim().is_empty() {
            message.content = runtime_context.to_string();
        } else {
            message.content = format!("{}\n\n{}", message.content, runtime_context);
        }
    }
    pub fn create_user_message_with_source(
        &self,
        content: &str,
@ -615,14 +627,11 @@ impl Session {
    }
    /// 构建系统提示词（包含 AgentLoop 的基础提示词 + skills + memory）
-    pub fn build_system_prompt(&self, skills_prompt: &str, memory_context: Option<&str>) -> String {
+    pub fn build_system_prompt(&self, skills_prompt: &str) -> String {
        let base_prompt = build_system_prompt(
            &self.provider_config.workspace_dir,
            &self.provider_config.model_id,
            &self.tools,
            Some(&self.id.to_string()),
            memory_context,
            self.last_compressed_message_at.is_some(),
        );
        if skills_prompt.trim().is_empty() {
@ -1266,7 +1275,7 @@ impl SessionManager {
                    // Build the same system prompt that would be injected to the model
                    let skills_prompt = self.skills_loader.build_skills_prompt();
-                    let system_prompt = session_guard.build_system_prompt(&skills_prompt, None);
+                    let system_prompt = session_guard.build_system_prompt(&skills_prompt);
                    let filepath = session_guard
                        .dump_to_file(&system_prompt)
@ -1989,8 +1998,7 @@ fn spawn_agent_worker(
                    let media_refs: Vec<MediaRef> =
                        task.media.iter().map(|m| m.to_media_ref()).collect();
-                    let user_message =
+                    let user_message = guard.create_user_message(&task.content, media_refs);
                        guard.create_user_message(&task.content, media_refs);
                    let user_persist = guard.add_message_in_memory(user_message, true);
                    drop(guard);
                    if let Err(e) = persist_added_message(user_persist).await {
@ -2071,12 +2079,15 @@ fn spawn_agent_worker(
                    _ => None,
                };
                let runtime_context =
                    build_runtime_context(Some(unified_str.as_str()), memory_context.as_deref());
                let system_prompt_out = {
                    let guard = session.lock().await;
                    if guard.worker_generation != worker_gen {
                        return;
                    }
-                    guard.build_system_prompt(&skills_prompt, memory_context.as_deref())
+                    guard.build_system_prompt(&skills_prompt)
                };
                let compression_result = compressor.compress_if_needed(history_raw).await;
@ -2119,6 +2130,9 @@ fn spawn_agent_worker(
                    }
                };
                history_out.insert(0, ChatMessage::system(system_prompt_out.clone()));
                if let Some(last_msg) = history_out.iter_mut().rev().find(|m| m.role == "user") {
                    Session::append_runtime_context_to_user_message(last_msg, &runtime_context);
                }
                // Phase 2 + 3: LLM call with cancellation
                let session2 = session.clone();
@ -2205,6 +2219,13 @@ fn spawn_agent_worker(
                                    0,
                                    ChatMessage::system(system_prompt_out.clone()),
                                );
                                if let Some(last_msg) = retry.iter_mut().rev().find(|m| m.role == "user")
                                {
                                    Session::append_runtime_context_to_user_message(
                                        last_msg,
                                        &runtime_context,
                                    );
                                }
                                retry
                            };
@ -2312,9 +2333,6 @@ impl SessionManager {
            &self.provider_config.workspace_dir,
            &self.provider_config.model_id,
            &self.tools,
            Some(&format!("cron:{}:{}", job_name, job_id)),
            None,
            false,
        );
        let cron_context = format!(
            "## 定时任务执行\n\n\
--- a/src/skills/mod.rs
+++ b/src/skills/mod.rs
@ -252,19 +252,21 @@ impl SkillsLoader {
    pub fn get_loaded_skills(&self) -> Vec<Skill> {
        self.reload_if_changed();
        let state = self.state.lock().unwrap();
-        state.loaded_skills.clone()
+        Self::sort_skills(state.loaded_skills.clone())
    }
    /// Get skills marked as always (checks for changes first)
    pub fn get_always_skills(&self) -> Vec<Skill> {
        self.reload_if_changed();
        let state = self.state.lock().unwrap();
-        state
+        Self::sort_skills(
-            .loaded_skills
+            state
-            .iter()
+                .loaded_skills
-            .filter(|s| s.always)
+                .iter()
-            .cloned()
+                .filter(|s| s.always)
-            .collect()
+                .cloned()
                .collect(),
        )
    }
    /// Get a specific skill by name (checks for changes first)
@ -278,9 +280,8 @@ impl SkillsLoader {
    pub fn list_skills(&self) -> Vec<(String, String)> {
        self.reload_if_changed();
        let state = self.state.lock().unwrap();
-        state
+        Self::sort_skills(state.loaded_skills.clone())
-            .loaded_skills
+            .into_iter()
            .iter()
            .map(|s| (s.name.clone(), s.description.clone()))
            .collect()
    }
@ -294,6 +295,7 @@ impl SkillsLoader {
            return String::new();
        }
        let loaded_skills = Self::sort_skills(state.loaded_skills.clone());
        let mut prompt = String::from("## Skills\n\n");
        // Directory conventions
@ -308,7 +310,7 @@ impl SkillsLoader {
        );
        // Always skills summary
-        let always_skills: Vec<_> = state.loaded_skills.iter().filter(|s| s.always).collect();
+        let always_skills: Vec<_> = loaded_skills.iter().filter(|s| s.always).collect();
        if !always_skills.is_empty() {
            prompt.push_str("### 常用技能\n\n");
            for skill in &always_skills {
@ -348,6 +350,22 @@ impl SkillsLoader {
        prompt
    }
    fn sort_skills(mut skills: Vec<Skill>) -> Vec<Skill> {
        skills.sort_by(|a, b| {
            b.always
                .cmp(&a.always)
                .then_with(|| a.name.cmp(&b.name))
                .then_with(|| a.description.cmp(&b.description))
                .then_with(|| {
                    a.path
                        .as_ref()
                        .map(|p| p.to_string_lossy().to_string())
                        .cmp(&b.path.as_ref().map(|p| p.to_string_lossy().to_string()))
                })
        });
        skills
    }
    /// Load skills from a specific directory
    fn load_skills_from_dir(&self, dir: &Path) -> Vec<Skill> {
        let mut skills = Vec::new();
@ -529,4 +547,43 @@ This is the content.
        );
        assert_eq!(extract_description("# Title"), "No description");
    }
    #[test]
    fn test_build_skills_prompt_is_sorted() {
        let loader = SkillsLoader::new_for_testing(
            PathBuf::from("/tmp/picobot"),
            PathBuf::from("/tmp/agents"),
        );
        {
            let mut state = loader.state.lock().unwrap();
            state.loaded_skills = vec![
                Skill {
                    name: "zeta".to_string(),
                    description: "Z".to_string(),
                    content: "Z".to_string(),
                    always: false,
                    path: None,
                },
                Skill {
                    name: "beta".to_string(),
                    description: "B".to_string(),
                    content: "B".to_string(),
                    always: true,
                    path: None,
                },
                Skill {
                    name: "alpha".to_string(),
                    description: "A".to_string(),
                    content: "A".to_string(),
                    always: true,
                    path: None,
                },
            ];
        }
        let prompt = loader.build_skills_prompt();
        let alpha_pos = prompt.find("**alpha**").unwrap();
        let beta_pos = prompt.find("**beta**").unwrap();
        assert!(alpha_pos < beta_pos);
    }
 }
--- a/src/tools/registry.rs
+++ b/src/tools/registry.rs
@ -39,7 +39,8 @@ impl ToolRegistry {
    }
    pub fn get_definitions(&self) -> Vec<Tool> {
-        self.tools
+        let mut defs: Vec<Tool> = self
            .tools
            .lock()
            .unwrap()
            .values()
@ -51,7 +52,10 @@ impl ToolRegistry {
                    parameters: tool.parameters_schema(),
                },
            })
-            .collect()
+            .collect();
        defs.sort_by(|a, b| a.function.name.cmp(&b.function.name));
        defs
    }
    pub fn has_tools(&self) -> bool {
@ -88,3 +92,49 @@ impl Default for ToolRegistry {
        Self::new()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::tools::traits::ToolResult;
    use async_trait::async_trait;
    use serde_json::json;
    struct TestTool(&'static str);
    #[async_trait]
    impl ToolTrait for TestTool {
        fn name(&self) -> &str {
            self.0
        }
        fn description(&self) -> &str {
            self.0
        }
        fn parameters_schema(&self) -> serde_json::Value {
            json!({})
        }
        async fn execute(&self, _args: serde_json::Value) -> anyhow::Result<ToolResult> {
            Ok(ToolResult {
                success: true,
                output: "ok".to_string(),
                error: None,
            })
        }
    }
    #[test]
    fn test_get_definitions_sorted_by_name() {
        let registry = ToolRegistry::new();
        registry.register(TestTool("zeta"));
        registry.register(TestTool("alpha"));
        registry.register(TestTool("beta"));
        let defs = registry.get_definitions();
        let names: Vec<_> = defs.into_iter().map(|tool| tool.function.name).collect();
        assert_eq!(names, vec!["alpha", "beta", "zeta"]);
    }
 }