diff --git a/src/agent/context_compressor.rs b/src/agent/context_compressor.rs index b3c88fe..a164489 100644 --- a/src/agent/context_compressor.rs +++ b/src/agent/context_compressor.rs @@ -11,31 +11,19 @@ use crate::agent::{AgentError, AgentRuntimeConfig}; const TOKEN_ESTIMATE_CHARS_PER_TOKEN: usize = 4; const TOKEN_ESTIMATE_SAFETY_MULTIPLIER: f64 = 1.2; -/// Token estimation using ~4 chars/token heuristic with 1.2x safety margin. +/// Token estimation using JSON serialization (matches actual request size) pub fn estimate_tokens(messages: &[ChatMessage]) -> usize { - let raw: usize = messages - .iter() - .map(|message| { - message - .content - .len() - .div_ceil(TOKEN_ESTIMATE_CHARS_PER_TOKEN) - + estimate_image_tokens(&message.media_refs) - + 4 - }) - .sum(); - (raw as f64 * TOKEN_ESTIMATE_SAFETY_MULTIPLIER) as usize -} + // Serialize to JSON to match actual request format sent to LLM + let serialized_len = serde_json::to_string(messages) + .map(|s| s.len()) + .unwrap_or_else(|_| { + // Fallback: use content length if serialization fails + messages.iter().map(|m| m.content.len()).sum() + }); -fn estimate_image_tokens(media_refs: &[String]) -> usize { - media_refs - .iter() - .filter_map(|path| std::fs::metadata(path).ok()) - .map(|metadata| { - let base64_chars = metadata.len().saturating_mul(4).div_ceil(3) as usize; - base64_chars.div_ceil(TOKEN_ESTIMATE_CHARS_PER_TOKEN) - }) - .sum() + // Apply safety margin for token estimation + ((serialized_len / TOKEN_ESTIMATE_CHARS_PER_TOKEN) as f64 + * TOKEN_ESTIMATE_SAFETY_MULTIPLIER) as usize } /// Configuration for context compression. @@ -502,13 +490,13 @@ mod tests { ]; let tokens = estimate_tokens(&messages); - // "Hello" (5) -> ceil(5/4)+4 = 2+4 = 6 - // "Hi there!" (8) -> ceil(8/4)+4 = 2+4 = 6 - // "How are you?" (11) -> ceil(11/4)+4 = 3+4 = 7 - // raw = 19, with 1.2x = ~23 + // JSON serialization includes: id, role, content, timestamp, etc. + // With 3 messages, the JSON overhead is significant + // Serialized JSON is typically 300-500 chars for 3 simple messages + // 500 / 4 * 1.2 = ~150 tokens assert!( - tokens > 18 && tokens < 30, - "Expected ~23 tokens, got {}", + tokens > 50 && tokens < 300, + "Expected ~100-200 tokens (JSON overhead), got {}", tokens ); } diff --git a/src/command/handlers/get_current.rs b/src/command/handlers/get_current.rs index 5c5be55..4d963b3 100644 --- a/src/command/handlers/get_current.rs +++ b/src/command/handlers/get_current.rs @@ -1,3 +1,4 @@ +use crate::agent::context_compressor::estimate_tokens; use crate::command::context::CommandContext; use crate::command::handler::{CommandHandler, CommandMetadata}; use crate::command::response::{CommandError, CommandResponse, MessageKind}; @@ -56,14 +57,22 @@ async fn handle_get_current_session( .map_err(|e| CommandError::new("GET_TOPIC_ERROR", e.to_string()))? .ok_or_else(|| CommandError::new("TOPIC_NOT_FOUND", format!("Topic not found: {}", topic_id)))?; + // Load messages and estimate tokens + let messages = handler + .store + .load_messages_for_topic(topic_id) + .map_err(|e| CommandError::new("LOAD_MESSAGES_ERROR", e.to_string()))?; + let estimated_tokens = estimate_tokens(&messages); + let last_active = format_time_ago(topic.last_active_at); let created_at = format_time_ago(topic.created_at); let message = format!( - "Current Topic:\n\n Topic ID: {}\n Title: {}\n Messages: {}\n Created: {}\n Last Active: {}", + "Current Topic:\n\n Topic ID: {}\n Title: {}\n Messages: {}\n Tokens: ~{}\n Created: {}\n Last Active: {}", topic.id, topic.title, topic.message_count, + estimated_tokens, created_at, last_active ); @@ -72,7 +81,8 @@ async fn handle_get_current_session( .with_message(MessageKind::Notification, &message) .with_metadata("topic_id", &topic.id) .with_metadata("title", &topic.title) - .with_metadata("message_count", &topic.message_count.to_string())) + .with_metadata("message_count", &topic.message_count.to_string()) + .with_metadata("estimated_tokens", &estimated_tokens.to_string())) } fn format_time_ago(timestamp_ms: i64) -> String {