8 changed files with 62 additions and 670 deletions
--- a/src/agent/agent_loop.rs
+++ b/src/agent/agent_loop.rs
@ -1,4 +1,3 @@
-use crate::agent::context_compressor::estimate_tokens;
 use crate::agent::system_prompt::build_system_prompt;
 use crate::bus::message::ContentBlock;
 use crate::bus::ChatMessage;
@ -227,7 +226,6 @@ pub struct AgentLoop {
    max_iterations: usize,
    workspace_dir: PathBuf,
    model_name: String,
-    context_window: usize,
    notify_tx: Option<tokio::sync::mpsc::UnboundedSender<String>>,
 }

@ -251,7 +249,6 @@ impl AgentLoop {
            tools: Arc::new(ToolRegistry::new()),
            observer: None,
            notify_tx: None,
-            context_window: 0,
            max_iterations,
            workspace_dir,
            model_name,
@ -271,7 +268,6 @@ impl AgentLoop {
            tools,
            observer: None,
            notify_tx: None,
-            context_window: 0,
            max_iterations,
            workspace_dir,
            model_name,
@ -285,7 +281,6 @@ impl AgentLoop {
            tools: Arc::new(ToolRegistry::new()),
            observer: None,
            notify_tx: None,
-            context_window: 0,
            max_iterations,
            workspace_dir,
            model_name,
@ -305,19 +300,12 @@ impl AgentLoop {
            tools,
            observer: None,
            notify_tx: None,
-            context_window: 0,
            max_iterations,
            workspace_dir,
            model_name,
        }
    }

-    /// Set the context window size for preemptive trimming.
-    pub fn with_context_window(mut self, window: usize) -> Self {
-        self.context_window = window;
-        self
-    }
-
    /// Set the workspace directory.
    pub fn with_workspace_dir(mut self, dir: PathBuf) -> Self {
        self.workspace_dir = dir;
@ -335,36 +323,6 @@ impl AgentLoop {
        self
    }

-    /// Preemptive trim: truncate old tool results in-place when history is
-    /// approaching the context window limit. Only trims tool messages with
-    /// content > TRIM_CHARS, preserving the most recent KEEP messages.
-    fn preemptive_trim_old_tool_results(
-        &self,
-        messages: &mut [ChatMessage],
-        max_chars: usize,
-        keep_recent: usize,
-    ) -> usize {
-        let end = messages.len().saturating_sub(keep_recent);
-        let start = 1; // protect system message at [0] if present
-        let mut modified = 0;
-        for i in start..end {
-            if messages[i].role != "tool" {
-                continue;
-            }
-            if messages[i].content.len() <= max_chars {
-                continue;
-            }
-            let removed = messages[i].content.len() - max_chars;
-            messages[i].content = format!(
-                "{}...\n\n[Output truncated - {} characters removed]",
-                &messages[i].content[..messages[i].content.ceil_char_boundary(max_chars)],
-                removed
-            );
-            modified += 1;
-        }
-        modified
-    }
-
    pub fn tools(&self) -> &Arc<ToolRegistry> {
        &self.tools
    }
@ -397,27 +355,6 @@ impl AgentLoop {
            #[cfg(debug_assertions)]
            tracing::debug!(iteration, "Agent iteration started");

-            // Preemptive context check: trim old tool results if token estimate
-            // exceeds 80% of context window to prevent mid-loop overflow.
-            if self.context_window > 0 {
-                let estimated = estimate_tokens(&messages);
-                let danger = (self.context_window as f64 * 0.8) as usize;
-                if estimated > danger {
-                    let trimmed = self.preemptive_trim_old_tool_results(
-                        &mut messages, 2000, 4,
-                    );
-                    if trimmed > 0 {
-                        #[cfg(debug_assertions)]
-                        tracing::debug!(
-                            estimated,
-                            danger,
-                            trimmed_msgs = trimmed,
-                            "Preemptive tool-result trim applied in loop"
-                        );
-                    }
-                }
-            }
-
            // Convert messages to LLM format
            let messages_for_llm: Vec<Message> = messages
                .iter()
--- a/src/agent/context_compressor.rs
+++ b/src/agent/context_compressor.rs
@ -15,19 +15,6 @@ pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
    (raw as f64 * 1.2) as usize
 }

-/// Extract the first number found within `max_len` characters of the start of `s`.
-/// Used by `parse_context_limit_from_error` to find token limits in error messages.
-fn find_number_nearby(s: &str, max_len: usize) -> Option<&str> {
-    let end = s.len().min(max_len);
-    let slice = &s[..end];
-    let start = slice.find(|c: char| c.is_ascii_digit())?;
-    let end = slice[start..]
-        .find(|c: char| !c.is_ascii_digit())
-        .map(|p| start + p)
-        .unwrap_or(end);
-    Some(&slice[start..end])
-}
-
 /// Configuration for context compression.
 #[derive(Debug, Clone)]
 pub struct ContextCompressionConfig {
@ -109,18 +96,13 @@ impl ContextCompressor {
        self.session_id = id;
    }

-    /// Update the context window size (e.g., after parsing actual limit from LLM error).
-    pub fn set_context_window(&mut self, window: usize) {
-        self.context_window = window;
-    }
-
    /// Always true — memory is always available (memory system is always on).
    pub fn has_memory(&self) -> bool {
        true
    }

    /// Get the compression threshold in tokens.
-    pub fn threshold(&self) -> usize {
+    fn threshold(&self) -> usize {
        (self.context_window as f64 * self.threshold_ratio) as usize
    }

@ -145,34 +127,10 @@ impl ContextCompressor {
        modified
    }

-    /// Remove orphan tool results whose declaring tool_calls have been compressed away.
-    /// Scans for tool messages with no preceding assistant tool_call, and removes them.
-    pub fn repair_tool_pairs(messages: &mut Vec<ChatMessage>) {
-        let mut declared: std::collections::HashSet<String> = std::collections::HashSet::new();
-        let mut i = 0;
-        while i < messages.len() {
-            if messages[i].role == "assistant" {
-                if let Some(ref tool_calls) = messages[i].tool_calls {
-                    for tc in tool_calls {
-                        declared.insert(tc.id.clone());
-                    }
-                }
-            } else if messages[i].role == "tool" {
-                if let Some(ref tid) = messages[i].tool_call_id {
-                    if !declared.contains(tid.as_str()) {
-                        messages.remove(i);
-                        continue;
-                    }
-                }
-            }
-            i += 1;
-        }
-    }
-
    /// Main entry point - compresses history if over threshold.
    pub async fn compress_if_needed(
        &self,
-        mut history: Vec<ChatMessage>,
+        history: Vec<ChatMessage>,
    ) -> Result<Vec<ChatMessage>, AgentError> {
        // Check if compression is needed
        let tokens = estimate_tokens(&history);
@ -188,20 +146,20 @@ impl ContextCompressor {
            "Starting context compression"
        );

-        // Fast trim pass first — modify history in place
-        let trimmed = self.fast_trim_tool_results(&mut history);
-        let tokens_after = estimate_tokens(&history);
+        // Fast trim pass first
+        let trimmed = self.fast_trim_tool_results(&mut history.clone());
        if trimmed > 0 {
+            let tokens_after = estimate_tokens(&history);
            #[cfg(debug_assertions)]
            tracing::debug!(
                trimmed_messages = trimmed,
                tokens_after = tokens_after,
                "Fast trim completed"
            );
-        }
            if tokens_after <= self.threshold() {
                return Ok(history);
            }
+        }

        // LLM summarization pass
        let mut current_history = history;
@ -233,36 +191,6 @@ impl ContextCompressor {
            }
        }

-        // Hard safety net: if still dangerously high after all passes,
-        // fall back to head+tail truncation so the LLM call doesn't overflow.
-        let final_tokens = estimate_tokens(&current_history);
-        let danger_threshold = (self.context_window as f64 * 0.9) as usize;
-        if final_tokens > danger_threshold
-            && current_history.len() > self.config.protect_first_n + self.config.protect_last_n
-        {
-            let head: Vec<_> = current_history[..self.config.protect_first_n].to_vec();
-            let tail_start = current_history.len() - self.config.protect_last_n;
-            let tail: Vec<_> = current_history[tail_start..].to_vec();
-            let dropped = current_history.len() - self.config.protect_first_n - self.config.protect_last_n;
-
-            let mut truncated = head;
-            truncated.push(ChatMessage::user(format!(
-                "[Context truncation — {} earlier messages dropped due to token limit]\n\
-                 Previous context could not be fully compressed. Continuing with most recent context.",
-                dropped
-            )));
-            truncated.extend(tail);
-
-            tracing::warn!(
-                final_tokens = final_tokens,
-                danger = danger_threshold,
-                dropped_msgs = dropped,
-                "Hard truncation fallback applied"
-            );
-
-            current_history = truncated;
-        }
-
        #[cfg(debug_assertions)]
        tracing::debug!(
            final_tokens = estimate_tokens(&current_history),
@ -273,48 +201,6 @@ impl ContextCompressor {
        Ok(current_history)
    }

-    /// Try to extract the actual context token limit from an LLM error message.
-    /// Recognizes patterns from OpenAI, Anthropic, and llama.cpp-style errors.
-    pub fn parse_context_limit_from_error(msg: &str) -> Option<usize> {
-        let lower = msg.to_lowercase();
-
-        // Common patterns: "maximum context length is 128000", "context window of 131072",
-        // "128000 token context", "available context size (8448 tokens)", "> 128000 maximum"
-        let markers = [
-            "maximum context length",
-            "context window",
-            "context length",
-            "available context size",
-        ];
-
-        for marker in &markers {
-            if let Some(pos) = lower.find(marker) {
-                let after = &lower[pos + marker.len()..];
-                // Look for a number in the vicinity (up to 10 chars after marker)
-                if let Some(num_str) = find_number_nearby(after, 50) {
-                    if let Ok(n) = num_str.parse::<usize>() {
-                        if (1024..=10_000_000).contains(&n) {
-                            return Some(n);
-                        }
-                    }
-                }
-            }
-        }
-
-        // Also try: "XXXX token context" or "XXXX limit"
-        if let Some(num_str) = find_number_nearby(&lower, lower.len()) {
-            if let Ok(n) = num_str.parse::<usize>() {
-                if (1024..=10_000_000).contains(&n)
-                    && (lower.contains("token") || lower.contains("context") || lower.contains("limit"))
-                {
-                    return Some(n);
-                }
-            }
-        }
-
-        None
-    }
-
    /// Single compression pass - summarize middle messages between user turns.
    /// Returns Some(compressed) if compression happened, None if nothing to compress.
    async fn compress_once(
@ -341,7 +227,7 @@ impl ContextCompressor {

        // Build segments: user -> (assistant turns) -> next user
        // We'll summarize the assistant turns between consecutive user messages
-        let mut new_messages = history[..user_indices[0]].to_vec();
+        let mut new_messages = history[..=user_indices[0]].to_vec();

        for i in 0..user_indices.len() - 1 {
            let user_idx = user_indices[i];
@ -386,12 +272,12 @@ impl ContextCompressor {

        // Add last user and everything after (protected)
        let last_user_idx = user_indices[user_indices.len() - 1];
+        if last_user_idx < history.len() - 1 {
+            // Add everything from last user onwards (protected)
            for i in last_user_idx..history.len() {
                new_messages.push(history[i].clone());
            }
-
-        // Remove orphan tool results whose declaring tool_calls were compressed away
-        Self::repair_tool_pairs(&mut new_messages);
+        }

        // If nothing changed, return None
        if new_messages.len() == history.len() {
@ -484,11 +370,8 @@ Be concise, aim for {} characters or less.
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::memory::MemoryManager;
    use crate::providers::ChatCompletionResponse;
-    use crate::providers::Usage;
    use async_trait::async_trait;
-    use std::sync::Arc;
    use std::sync::OnceLock;

    /// Mock provider for testing - panics if actually used for LLM calls
@ -520,34 +403,6 @@ mod tests {
        Arc::new(MockProvider)
    }

-    /// Mock summarizer that returns a simple summary — used when compress_once
-    /// needs to call the LLM for summarization.
-    struct MockSummarizer;
-
-    #[async_trait]
-    impl LLMProvider for MockSummarizer {
-        async fn chat(
-            &self,
-            _request: ChatCompletionRequest,
-        ) -> Result<ChatCompletionResponse, Box<dyn std::error::Error + Send + Sync>> {
-            Ok(ChatCompletionResponse {
-                id: "mock".into(),
-                model: "mock".into(),
-                content: "[summarized]".into(),
-                tool_calls: vec![],
-                usage: Usage { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
-            })
-        }
-
-        fn ptype(&self) -> &str { "mock" }
-        fn name(&self) -> &str { "mock" }
-        fn model_id(&self) -> &str { "mock" }
-    }
-
-    fn mock_summarizer() -> Arc<dyn LLMProvider> {
-        Arc::new(MockSummarizer)
-    }
-
    fn test_memory_manager() -> Arc<MemoryManager> {
        static MM: OnceLock<Arc<MemoryManager>> = OnceLock::new();
        MM.get_or_init(|| {
@ -599,206 +454,4 @@ mod tests {
        let compressor = ContextCompressor::new(mock_provider(), 128_000, test_memory_manager());
        assert_eq!(compressor.threshold(), 64_000);
    }
-
-    #[tokio::test]
-    async fn test_compress_if_needed_fast_trims_tool_results() {
-        // context_window=200 → threshold=100.
-        // user "Hi" (~6 raw), tool(3000 x's) → ~760 raw*1.2=912 > 100 → triggers compression.
-        // fast_trim to 50 chars should bring tokens well under 100.
-        let tmp = std::env::temp_dir().join(format!("picobot_ctx_trim_{}.db", std::process::id()));
-        let storage = Arc::new(crate::storage::Storage::new(&tmp).await.unwrap());
-        let mm = Arc::new(MemoryManager::new(storage, "test".into(), "test".into()));
-
-        let config = ContextCompressionConfig {
-            tool_result_trim_chars: 50,
-            protect_first_n: 0,
-            protect_last_n: 10,
-            max_passes: 0,
-            ..Default::default()
-        };
-        let compressor = ContextCompressor::with_config(mock_provider(), 200, config, mm);
-
-        let messages = vec![
-            ChatMessage::user("Hi"),
-            ChatMessage::tool("call1", "bash", &"x".repeat(3000)),
-        ];
-
-        let result = compressor.compress_if_needed(messages).await.unwrap();
-
-        let tool_msg = result.iter().find(|m| m.role == "tool").unwrap();
-        assert!(
-            tool_msg.content.len() < 3000,
-            "tool result should be trimmed, got {} chars",
-            tool_msg.content.len()
-        );
-        assert!(
-            tool_msg.content.contains("[Output truncated"),
-            "trim marker missing from: {}",
-            tool_msg.content
-        );
-
-        let _ = std::fs::remove_file(&tmp);
-    }
-
-    #[tokio::test]
-    async fn test_compress_once_no_duplicate_and_no_lost_user() {
-        // Verifies two boundary bugs in compress_once:
-        //  - B2A (L230): first user message duplicated when protect_first_n > 0
-        //  - B2B (L275): last user message lost when it is the final history message
-        //
-        // context_window=200 → threshold=100. Large tool outputs force LLM summarization.
-        let tmp = std::env::temp_dir().join(format!("picobot_ctx_boundary_{}.db", std::process::id()));
-        let storage = Arc::new(crate::storage::Storage::new(&tmp).await.unwrap());
-        let mm = Arc::new(MemoryManager::new(storage, "test".into(), "test".into()));
-
-        let config = ContextCompressionConfig {
-            tool_result_trim_chars: 2000,
-            protect_first_n: 1,   // system/protected → B2A: first user (after skip) duplicated
-            protect_last_n: 2,
-            max_passes: 1,
-            ..Default::default()
-        };
-        let compressor = ContextCompressor::with_config(mock_summarizer(), 200, config, mm);
-
-        // History: 9 messages, last message is user Q4.
-        // user_indices (skip 1) = [1, 3, 6, 8]
-        // B2A: init history[..=1] includes Q1, then loop i=0 pushes Q1 again → duplicate
-        // B2B: last_user_idx=8, 8 < 8 → false → Q4 not pushed → lost
-        let big = "x".repeat(3000);
-        let messages = vec![
-            ChatMessage::system("You are a helper."), // 0: protected
-            ChatMessage::user("Q1"),                   // 1: first user
-            ChatMessage::tool("t1", "bash", &big),     // 2
-            ChatMessage::user("Q2"),                   // 3
-            ChatMessage::assistant("thinking"),         // 4
-            ChatMessage::tool("t2", "bash", &big),     // 5
-            ChatMessage::user("Q3"),                   // 6
-            ChatMessage::assistant("thinking"),         // 7
-            ChatMessage::user("Q4"),                   // 8: LAST, is user → B2B triggers
-        ];
-
-        let result = compressor.compress_if_needed(messages).await.unwrap();
-
-        // B2A: "Q1" must appear exactly once
-        let q1_count = result.iter().filter(|m| m.role == "user" && m.content == "Q1").count();
-        assert_eq!(q1_count, 1, "Q1 should appear exactly once, got {}", q1_count);
-
-        // B2B: "Q4" must NOT be lost
-        let q4_count = result.iter().filter(|m| m.role == "user" && m.content == "Q4").count();
-        assert_eq!(q4_count, 1, "Q4 should appear exactly once (not lost), got {}", q4_count);
-
-        let _ = std::fs::remove_file(&tmp);
-    }
-
-    #[tokio::test]
-    async fn test_compress_hard_truncation_fallback() {
-        // When LLM compression fails (or max_passes=0) and tokens are still
-        // above 90% of context_window, a head+tail truncation kicks in.
-        let tmp = std::env::temp_dir().join(format!("picobot_ctx_trunc_{}.db", std::process::id()));
-        let storage = Arc::new(crate::storage::Storage::new(&tmp).await.unwrap());
-        let mm = Arc::new(MemoryManager::new(storage, "test".into(), "test".into()));
-
-        let config = ContextCompressionConfig {
-            tool_result_trim_chars: 500,  // trim reduces but not enough
-            protect_first_n: 1,
-            protect_last_n: 2,
-            max_passes: 0,                // no LLM summarization → will exceed danger
-            ..Default::default()
-        };
-        // context_window=100, danger_threshold=90.
-        // Each trimmed tool (~500 chars): ceil(500/4)+4 = 129 raw. 3 tools = 387.
-        // Plus users (~5 each) + system (~15) = ~417 raw * 1.2 = 500 > 90.
-        let compressor = ContextCompressor::with_config(mock_provider(), 100, config, mm);
-
-        let big = "x".repeat(3000);
-        let messages = vec![
-            ChatMessage::system("sys"),
-            ChatMessage::user("Q1"),
-            ChatMessage::tool("t1", "bash", &big),
-            ChatMessage::user("Q2"),
-            ChatMessage::tool("t2", "bash", &big),
-            ChatMessage::user("Q3"),
-            ChatMessage::tool("t3", "bash", &big),
-        ];
-
-        let result = compressor.compress_if_needed(messages).await.unwrap();
-
-        // After hard truncation: head (1) + trunc_note (1) + tail (2) = 4 messages
-        assert!(result.len() < 7, "expected truncation reduction, got {} messages", result.len());
-
-        // Truncation notice should be present
-        let has_notice = result.iter().any(|m| m.content.contains("Context truncation"));
-        assert!(has_notice, "hard truncation notice missing");
-
-        let _ = std::fs::remove_file(&tmp);
-    }
-
-    #[test]
-    fn test_repair_tool_pairs_removes_orphans() {
-        use crate::providers::ToolCall;
-
-        // Simulate compressed output: summary replaced assistant(tool_call: tc1),
-        // leaving tool(tc1) as an orphan. Legitimate tool(tc2) should be kept.
-        let mut messages = vec![
-            ChatMessage::user("Q1"),
-            ChatMessage::user("[Context Summary]\n\nsummary of previous turn"),
-            ChatMessage::tool("tc1", "bash", "orphan result"),         // orphan — tc1 never declared
-            ChatMessage::assistant("done"),                            // declares tc2
-            ChatMessage::tool("tc2", "bash", "legitimate result"),     // legit
-        ];
-        // Set tool_call_id on tool messages and tool_calls on assistant
-        messages[2].tool_call_id = Some("tc1".into());
-        messages[4].tool_call_id = Some("tc2".into());
-        messages[3].tool_calls = Some(vec![ToolCall {
-            id: "tc2".into(),
-            name: "bash".into(),
-            arguments: serde_json::json!({"cmd": "echo ok"}),
-        }]);
-
-        ContextCompressor::repair_tool_pairs(&mut messages);
-
-        // orphan should be removed; legitimate should stay
-        assert_eq!(messages.len(), 4);
-        assert!(messages.iter().all(|m| m.tool_call_id != Some("tc1".into())));
-        assert!(messages.iter().any(|m| m.tool_call_id == Some("tc2".into())));
-    }
-
-    #[test]
-    fn test_parse_context_limit_from_error() {
-        // OpenAI: "maximum context length is 128000"
-        assert_eq!(
-            ContextCompressor::parse_context_limit_from_error(
-                "This model's maximum context length is 128000 tokens."
-            ),
-            Some(128000)
-        );
-
-        // Anthropic: "context window of 200000"
-        assert_eq!(
-            ContextCompressor::parse_context_limit_from_error(
-                "Your request exceeds the context window of 200000."
-            ),
-            Some(200000)
-        );
-
-        // llama.cpp: "available context size (8448 tokens)"
-        assert_eq!(
-            ContextCompressor::parse_context_limit_from_error(
-                "context size exceeded, available context size (8448 tokens)"
-            ),
-            Some(8448)
-        );
-
-        // Non-context error should return None
-        assert_eq!(
-            ContextCompressor::parse_context_limit_from_error("Internal server error"),
-            None
-        );
-
-        // Numbers too small should be rejected
-        assert_eq!(
-            ContextCompressor::parse_context_limit_from_error("context length is 500"),
-            None
-        );
-    }
 }
--- a/src/agent/mod.rs
+++ b/src/agent/mod.rs
@ -3,5 +3,5 @@ pub mod context_compressor;
 pub mod system_prompt;

 pub use agent_loop::{AgentLoop, AgentError, AgentProcessResult};
-pub use context_compressor::{ContextCompressor, estimate_tokens};
+pub use context_compressor::ContextCompressor;
 pub use system_prompt::{build_system_prompt, PromptContext, PromptSection, SystemPromptBuilder};
--- a/src/memory/mod.rs
+++ b/src/memory/mod.rs
@ -52,21 +52,18 @@ impl MemoryManager {
    }

    /// Search memories by keyword query. Returns entries sorted by relevance.
-    /// When `session_id` is provided, results are filtered to that session.
    pub async fn recall(
        &self,
        query: &str,
        limit: usize,
        category: Option<MemoryCategory>,
-        session_id: Option<&str>,
    ) -> Result<Vec<MemoryEntry>, crate::storage::StorageError> {
        self.storage
-            .search_memories(query, category.as_ref(), session_id, limit)
+            .search_memories(query, category.as_ref(), limit)
            .await
    }

    /// Search memories by time range (Unix milliseconds).
-    /// When `session_id` is provided, results are filtered to that session.
    pub async fn recall_by_time(
        &self,
        since: i64,
@ -74,10 +71,9 @@ impl MemoryManager {
        query: Option<&str>,
        limit: usize,
        category: Option<MemoryCategory>,
-        session_id: Option<&str>,
    ) -> Result<Vec<MemoryEntry>, crate::storage::StorageError> {
        self.storage
-            .search_memories_by_time(since, until, query, category.as_ref(), session_id, limit)
+            .search_memories_by_time(since, until, query, category.as_ref(), limit)
            .await
    }

@ -88,7 +84,7 @@ impl MemoryManager {

    /// Check if the memory system has any entries (for testing/health check).
    pub async fn is_empty(&self) -> Result<bool, crate::storage::StorageError> {
-        self.recall("*", 1, None, None).await.map(|r| r.is_empty())
+        self.recall("*", 1, None).await.map(|r| r.is_empty())
    }
 }

@ -120,7 +116,7 @@ mod tests {
        .await
        .unwrap();

-        let results = mm.recall("test memory", 10, None, None).await.unwrap();
+        let results = mm.recall("test memory", 10, None).await.unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].key, "test_key");
        assert_eq!(results[0].content, "This is a test memory");
@ -150,7 +146,7 @@ mod tests {
        .await
        .unwrap();

-        let results = mm.recall("updated", 10, None, None).await.unwrap();
+        let results = mm.recall("updated", 10, None).await.unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0].content, "updated");
    }
@ -170,7 +166,7 @@ mod tests {
        .unwrap();
        mm.forget("to_delete").await.unwrap();

-        let results = mm.recall("deleted", 10, None, None).await.unwrap();
+        let results = mm.recall("deleted", 10, None).await.unwrap();
        assert!(results.is_empty());
    }

@ -198,60 +194,17 @@ mod tests {
        .unwrap();

        let know_results = mm
-            .recall("content", 10, Some(MemoryCategory::Knowledge), None)
+            .recall("content", 10, Some(MemoryCategory::Knowledge))
            .await
            .unwrap();
        assert_eq!(know_results.len(), 1);
        assert_eq!(know_results[0].key, "knowledge_1");

        let time_results = mm
-            .recall("content", 10, Some(MemoryCategory::Timeline), None)
+            .recall("content", 10, Some(MemoryCategory::Timeline))
            .await
            .unwrap();
        assert_eq!(time_results.len(), 1);
        assert_eq!(time_results[0].key, "timeline_1");
    }
-
-    #[tokio::test]
-    async fn test_session_id_filter() {
-        let (mm, _dir) = setup_memory_manager().await;
-
-        // Store a timeline entry for session A
-        mm.store(
-            "tl_a",
-            "summary from session A",
-            MemoryCategory::Timeline,
-            Some("chan:chat:dialog_a"),
-            Some(0.5),
-        )
-        .await
-        .unwrap();
-
-        // Store a timeline entry for session B
-        mm.store(
-            "tl_b",
-            "summary from session B",
-            MemoryCategory::Timeline,
-            Some("chan:chat:dialog_b"),
-            Some(0.5),
-        )
-        .await
-        .unwrap();
-
-        // Recall without session_id — should get both
-        let all = mm
-            .recall("summary", 10, Some(MemoryCategory::Timeline), None)
-            .await
-            .unwrap();
-        assert_eq!(all.len(), 2);
-
-        // Recall scoped to session A — should get only tl_a
-        let scoped = mm
-            .recall("summary", 10, Some(MemoryCategory::Timeline), Some("chan:chat:dialog_a"))
-            .await
-            .unwrap();
-        assert_eq!(scoped.len(), 1);
-        assert_eq!(scoped[0].key, "tl_a");
-        assert_eq!(scoped[0].session_id.as_deref(), Some("chan:chat:dialog_a"));
-    }
 }
--- a/src/session/session.rs
+++ b/src/session/session.rs
@ -21,18 +21,6 @@ use crate::config::LLMProviderConfig;
 use crate::agent::{AgentLoop, AgentError, ContextCompressor};
 use crate::agent::system_prompt::build_system_prompt;
 use crate::agent::context_compressor::ContextCompressionConfig;
-
-/// Check if an LLM error message indicates a context window overflow.
-fn is_context_overflow_error(msg: &str) -> bool {
-    let lower = msg.to_lowercase();
-    lower.contains("context length")
-        || lower.contains("context window")
-        || lower.contains("maximum context")
-        || lower.contains("too many tokens")
-        || lower.contains("token limit exceeded")
-        || lower.contains("prompt is too long")
-        || lower.contains("input is too long")
-}
 use crate::providers::{create_provider, LLMProvider};
 use crate::session::session_id::UnifiedSessionId;
 use crate::session::events::DialogInfo;
@ -384,11 +372,6 @@ impl Session {
        &self.compressor
    }

-    /// Get the compressor's current threshold for diagnostics/fallback.
-    pub fn compressor_threshold(&self) -> usize {
-        self.compressor.threshold()
-    }
-
    /// 创建一个临时的 AgentLoop 实例来处理消息
    pub fn create_agent(&self) -> Result<AgentLoop, AgentError> {
        Ok(AgentLoop::with_provider_and_tools(
@ -397,7 +380,7 @@ impl Session {
            self.provider_config.max_tool_iterations,
            self.provider_config.model_id.clone(),
            self.provider_config.workspace_dir.clone(),
-        ).with_context_window(self.provider_config.token_limit))
+        ))
    }

    /// 创建一个附通知通道的 AgentLoop 实例
@ -1322,7 +1305,7 @@ impl SessionManager {
            let skills_prompt = self.skills_loader.build_skills_prompt();

            // Fetch memory context
-            let memory_context = match self.memory_manager.recall(content, 5, Some(crate::memory::MemoryCategory::Knowledge), None).await {
+            let memory_context = match self.memory_manager.recall(content, 5, Some(crate::memory::MemoryCategory::Knowledge)).await {
                Ok(entries) if !entries.is_empty() => {
                    Some(entries.iter()
                        .map(|e| format!("- {}: {}", e.key, e.content))
@ -1336,17 +1319,15 @@ impl SessionManager {
                _ => None,
            };

-            // Build combined system prompt and inject at position 0 AFTER compression.
-            // This ensures AgentLoop.process() sees a system message without it participating
-            // in context compression (system prompt is dynamic and should not be persisted).
+            // Build combined system prompt and inject at position 0
+            // This ensures AgentLoop.process() sees a system message and doesn't inject its own
            let system_prompt = session_guard.build_system_prompt(&skills_prompt, memory_context.as_deref());
+            history.insert(0, ChatMessage::system(system_prompt));

-            let mut history = session_guard.compressor
+            let history = session_guard.compressor
                .compress_if_needed(history)
                .await?;

-            history.insert(0, ChatMessage::system(system_prompt.clone()));
-
            // Advance consolidation pointer — future compressions skip already-processed messages
            let now = chrono::Utc::now().timestamp_millis();
            session_guard.last_consolidated_at = Some(now);
@ -1355,28 +1336,7 @@ impl SessionManager {
            }

            let agent = session_guard.create_agent_with_notify(notify_tx)?;
-
-            // Try LLM call; on context overflow, re-compress with tighter window and retry once.
-            let result = match agent.process(history).await {
-                Ok(r) => r,
-                Err(AgentError::LlmError(ref msg))
-                    if is_context_overflow_error(msg) =>
-                {
-                    let new_window = crate::agent::ContextCompressor::parse_context_limit_from_error(msg)
-                        .unwrap_or(session_guard.compressor_threshold());
-                    tracing::warn!(
-                        new_window,
-                        error = %msg,
-                        "Context overflow in handle_message — retrying with tighter window"
-                    );
-                    session_guard.compressor.set_context_window(new_window);
-                    let raw = session_guard.get_history().to_vec();
-                    let mut retry = session_guard.compressor.compress_if_needed(raw).await?;
-                    retry.insert(0, ChatMessage::system(system_prompt));
-                    agent.process(retry).await?
-                }
-                Err(e) => return Err(e),
-            };
+            let result = agent.process(history).await?;

            for msg in result.emitted_messages {
                session_guard.add_message(msg, true).await
@ -1483,15 +1443,12 @@ impl SessionManager {
                job_name, job_id, channel, chat_id
            );
            let full_system_prompt = format!("{}{}", system_prompt, cron_context);
+            history.insert(0, ChatMessage::system(full_system_prompt));

-            // Inject system prompt AFTER compression so it doesn't participate
-            // in context compression (system prompt is dynamic and should not be persisted).
-            let mut history = session_guard.compressor
+            let history = session_guard.compressor
                .compress_if_needed(history)
                .await?;

-            history.insert(0, ChatMessage::system(full_system_prompt));
-
            let agent = session_guard.create_agent_with_notify(notify_tx)?;
            let result = agent.process(history).await?;

--- a/src/storage/memory.rs
+++ b/src/storage/memory.rs
@ -56,7 +56,6 @@ impl super::Storage {
        &self,
        query: &str,
        category: Option<&MemoryCategory>,
-        session_id: Option<&str>,
        limit: usize,
    ) -> Result<Vec<MemoryEntry>, StorageError> {
        // Build FTS5 query: segment with jieba, wrap each term in quotes, join with OR
@ -77,7 +76,7 @@ impl super::Storage {
                   m.session_id, m.created_at, m.updated_at
            FROM memory_fts f
            JOIN memories m ON f.rowid = m.rowid
-            WHERE memory_fts MATCH ? AND (? IS NULL OR m.category = ?) AND (? IS NULL OR m.session_id = ?)
+            WHERE memory_fts MATCH ? AND (? IS NULL OR m.category = ?)
            ORDER BY rank
            LIMIT ?
            "#,
@ -85,8 +84,6 @@ impl super::Storage {
        .bind(&fts_query)
        .bind(category_filter)
        .bind(category_filter)
-        .bind(session_id)
-        .bind(session_id)
        .bind(limit as i64)
        .fetch_all(self.pool())
        .await?;
@ -116,7 +113,6 @@ impl super::Storage {
                    FROM memories
                    WHERE ({})
                      AND (? IS NULL OR category = ?)
-                      AND (? IS NULL OR session_id = ?)
                    ORDER BY importance DESC, updated_at DESC
                    LIMIT ?
                    "#,
@ -131,8 +127,6 @@ impl super::Storage {
                query_builder = query_builder
                    .bind(category_filter)
                    .bind(category_filter)
-                    .bind(session_id)
-                    .bind(session_id)
                    .bind(limit as i64);

                let rows = query_builder.fetch_all(self.pool()).await?;
@ -150,7 +144,6 @@ impl super::Storage {
        until: i64,
        query: Option<&str>,
        category: Option<&MemoryCategory>,
-        session_id: Option<&str>,
        limit: usize,
    ) -> Result<Vec<MemoryEntry>, StorageError> {
        let category_filter = category.map(|c| c.as_str());
@ -187,7 +180,6 @@ impl super::Storage {
                WHERE ({})
                  AND created_at >= ? AND created_at <= ?
                  AND (? IS NULL OR category = ?)
-                  AND (? IS NULL OR session_id = ?)
                ORDER BY created_at DESC
                LIMIT ?
                "#,
@ -204,8 +196,6 @@ impl super::Storage {
                .bind(&until_dt)
                .bind(category_filter)
                .bind(category_filter)
-                .bind(session_id)
-                .bind(session_id)
                .bind(limit as i64);

            query_builder.fetch_all(self.pool()).await?
@ -217,7 +207,6 @@ impl super::Storage {
                FROM memories
                WHERE created_at >= ? AND created_at <= ?
                  AND (? IS NULL OR category = ?)
-                  AND (? IS NULL OR session_id = ?)
                ORDER BY created_at DESC
                LIMIT ?
                "#,
@ -226,8 +215,6 @@ impl super::Storage {
            .bind(&until_dt)
            .bind(category_filter)
            .bind(category_filter)
-            .bind(session_id)
-            .bind(session_id)
            .bind(limit as i64)
            .fetch_all(self.pool())
            .await?
--- a/src/tools/memory.rs
+++ b/src/tools/memory.rs
@ -24,7 +24,7 @@ impl Tool for MemoryStoreTool {
    }

    fn description(&self) -> &str {
-        "Store a fact, preference, or insight into long-term knowledge memory. \
+        "Store a fact, preference, or insight into long-term memory. \
         Use this when the user shares important information you should remember. \
         Provide a descriptive key (e.g., 'user_prefers_python', 'project_auth_approach') \
         and the full content to remember."
@ -46,6 +46,11 @@ impl Tool for MemoryStoreTool {
                    "type": "string",
                    "description": "The full content of the memory entry."
                },
+                "category": {
+                    "type": "string",
+                    "enum": ["knowledge", "timeline"],
+                    "description": "Memory category. Use 'knowledge' for facts/preferences/insights, 'timeline' for conversation summaries."
+                },
                "importance": {
                    "type": "number",
                    "description": "Importance score 0.0-1.0. Higher = more important. Use 0.8+ for critical facts, 0.5 for general info."
@ -66,10 +71,16 @@ impl Tool for MemoryStoreTool {
            .and_then(|v| v.as_str())
            .ok_or_else(|| anyhow::anyhow!("Missing required parameter: content"))?;

+        let category = args
+            .get("category")
+            .and_then(|v| v.as_str())
+            .and_then(MemoryCategory::from_str)
+            .unwrap_or(MemoryCategory::Knowledge);
+
        let importance = args.get("importance").and_then(|v| v.as_f64());

        self.memory
-            .store(key, content, MemoryCategory::Knowledge, None, importance)
+            .store(key, content, category, None, importance)
            .await?;

        Ok(ToolResult {
@ -99,8 +110,8 @@ impl Tool for MemoryRecallTool {
    }

    fn description(&self) -> &str {
-        "Search and retrieve entries from long-term knowledge memory using keyword matching. \
-         Use this to recall previously stored facts, preferences, or insights. \
+        "Search and retrieve entries from long-term memory using keyword matching. \
+         Use this to recall previously stored facts, preferences, or conversation history. \
         IMPORTANT: query must be a space-separated list of RELEVANT KEYWORDS (not a question or sentence). \
         Use multiple synonymous or related terms to increase recall. \
         Example: instead of 'what is the user location', use 'user location address city residence'. \
@ -119,6 +130,11 @@ impl Tool for MemoryRecallTool {
                    "type": "string",
                    "description": "Space-separated KEYWORDS for memory search (NOT a natural language question). Use multiple related terms for better recall, e.g. 'address city location residence'."
                },
+                "category": {
+                    "type": "string",
+                    "enum": ["knowledge", "timeline"],
+                    "description": "Filter by memory category. Omit to search all categories."
+                },
                "since": {
                    "type": "integer",
                    "description": "Start of time range (Unix milliseconds)."
@ -142,6 +158,11 @@ impl Tool for MemoryRecallTool {
            .and_then(|v| v.as_str())
            .ok_or_else(|| anyhow::anyhow!("Missing required parameter: query"))?;

+        let category = args
+            .get("category")
+            .and_then(|v| v.as_str())
+            .and_then(MemoryCategory::from_str);
+
        let limit = args.get("limit").and_then(|v| v.as_u64()).unwrap_or(10) as usize;

        let entries = if args.get("since").is_some() || args.get("until").is_some() {
@ -151,10 +172,10 @@ impl Tool for MemoryRecallTool {
                .and_then(|v| v.as_i64())
                .unwrap_or(chrono::Utc::now().timestamp_millis());
            self.memory
-                .recall_by_time(since, until, Some(query), limit, Some(MemoryCategory::Knowledge), None)
+                .recall_by_time(since, until, Some(query), limit, category)
                .await?
        } else {
-            self.memory.recall(query, limit, Some(MemoryCategory::Knowledge), None).await?
+            self.memory.recall(query, limit, category).await?
        };

        if entries.is_empty() {
@ -168,12 +189,10 @@ impl Tool for MemoryRecallTool {
        let formatted = entries
            .iter()
            .map(|e| {
-                let session = e.session_id.as_deref().map(|s| format!(" [session: {}]", s)).unwrap_or_default();
                format!(
-                    "- {} [{}]{} [importance: {:.1}]: {}",
+                    "- {} [{}] [importance: {:.1}]: {}",
                    e.key,
                    e.category.as_str(),
-                    session,
                    e.importance,
                    e.content
                )
@ -189,119 +208,6 @@ impl Tool for MemoryRecallTool {
    }
 }

-// ── TimelineRecallTool ────────────────────────────────────────────────
-
-pub struct TimelineRecallTool {
-    memory: Arc<MemoryManager>,
-}
-
-impl TimelineRecallTool {
-    pub fn new(memory: Arc<MemoryManager>) -> Self {
-        Self { memory }
-    }
-}
-
-#[async_trait]
-impl Tool for TimelineRecallTool {
-    fn name(&self) -> &str {
-        "timeline_recall"
-    }
-
-    fn description(&self) -> &str {
-        "Search and retrieve conversation summaries from timeline memory. \
-         Use this to recall what was discussed in past sessions or earlier in the current session. \
-         Optionally filter by session_id to scope to a specific conversation. \
-         IMPORTANT: query must be a space-separated list of RELEVANT KEYWORDS (not a question or sentence)."
-    }
-
-    fn read_only(&self) -> bool {
-        true
-    }
-
-    fn parameters_schema(&self) -> serde_json::Value {
-        json!({
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "Space-separated KEYWORDS for timeline search (NOT a natural language question). Use multiple related terms for better recall."
-                },
-                "session_id": {
-                    "type": "string",
-                    "description": "Filter to a specific session (format: channel:chat_id:dialog_id). Omit to search across all sessions."
-                },
-                "since": {
-                    "type": "integer",
-                    "description": "Start of time range (Unix milliseconds)."
-                },
-                "until": {
-                    "type": "integer",
-                    "description": "End of time range (Unix milliseconds)."
-                },
-                "limit": {
-                    "type": "integer",
-                    "description": "Max results to return (default 10)."
-                }
-            },
-            "required": ["query"]
-        })
-    }
-
-    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
-        let query = args
-            .get("query")
-            .and_then(|v| v.as_str())
-            .ok_or_else(|| anyhow::anyhow!("Missing required parameter: query"))?;
-
-        let session_id = args.get("session_id").and_then(|v| v.as_str());
-
-        let limit = args.get("limit").and_then(|v| v.as_u64()).unwrap_or(10) as usize;
-
-        let entries = if args.get("since").is_some() || args.get("until").is_some() {
-            let since = args.get("since").and_then(|v| v.as_i64()).unwrap_or(0);
-            let until = args
-                .get("until")
-                .and_then(|v| v.as_i64())
-                .unwrap_or(chrono::Utc::now().timestamp_millis());
-            self.memory
-                .recall_by_time(since, until, Some(query), limit, Some(MemoryCategory::Timeline), session_id)
-                .await?
-        } else {
-            self.memory.recall(query, limit, Some(MemoryCategory::Timeline), session_id).await?
-        };
-
-        if entries.is_empty() {
-            return Ok(ToolResult {
-                success: true,
-                output: "No matching timeline entries found.".to_string(),
-                error: None,
-            });
-        }
-
-        let formatted = entries
-            .iter()
-            .map(|e| {
-                let session = e.session_id.as_deref().map(|s| format!(" [session: {}]", s)).unwrap_or_default();
-                format!(
-                    "- {} [{}]{} [importance: {:.1}]: {}",
-                    e.key,
-                    e.category.as_str(),
-                    session,
-                    e.importance,
-                    e.content
-                )
-            })
-            .collect::<Vec<_>>()
-            .join("\n");
-
-        Ok(ToolResult {
-            success: true,
-            output: format!("Found {} timeline entries:\n{}", entries.len(), formatted),
-            error: None,
-        })
-    }
-}
-
 // ── MemoryForgetTool ─────────────────────────────────────────────────

 pub struct MemoryForgetTool {
--- a/src/tools/mod.rs
+++ b/src/tools/mod.rs
@ -22,7 +22,7 @@ pub use file_read::FileReadTool;
 pub use file_write::FileWriteTool;
 pub use get_skill::GetSkillTool;
 pub use http_request::HttpRequestTool;
-pub use memory::{MemoryForgetTool, MemoryRecallTool, MemoryStoreTool, TimelineRecallTool};
+pub use memory::{MemoryForgetTool, MemoryRecallTool, MemoryStoreTool};
 pub use registry::ToolRegistry;
 pub use schema::{CleaningStrategy, SchemaCleanr};
 pub use send_message::SendMessageTool;
@ -57,7 +57,6 @@ pub fn create_default_tools(

    registry.register(MemoryStoreTool::new(memory.clone()));
    registry.register(MemoryRecallTool::new(memory.clone()));
-    registry.register(TimelineRecallTool::new(memory.clone()));
    registry.register(MemoryForgetTool::new(memory.clone()));

    registry