feat(compressor): return CompressionResult with created_timelines flag, record marker in handle_message and /compact

This commit is contained in:
xiaoxixi 2026-05-10 14:40:13 +08:00
parent 29543444da
commit e65130450e
2 changed files with 35 additions and 14 deletions

View File

@ -70,6 +70,12 @@ pub struct ContextCompressor {
session_id: Option<String>, session_id: Option<String>,
} }
/// Result of context compression.
pub struct CompressionResult {
pub history: Vec<ChatMessage>,
pub created_timelines: bool,
}
impl ContextCompressor { impl ContextCompressor {
/// Create a new compressor with the given provider, context window size, and memory manager. /// Create a new compressor with the given provider, context window size, and memory manager.
pub fn new( pub fn new(
@ -173,11 +179,11 @@ impl ContextCompressor {
pub async fn compress_if_needed( pub async fn compress_if_needed(
&self, &self,
mut history: Vec<ChatMessage>, mut history: Vec<ChatMessage>,
) -> Result<Vec<ChatMessage>, AgentError> { ) -> Result<CompressionResult, AgentError> {
// Check if compression is needed // Check if compression is needed
let tokens = estimate_tokens(&history); let tokens = estimate_tokens(&history);
if tokens <= self.threshold() { if tokens <= self.threshold() {
return Ok(history); return Ok(CompressionResult { history, created_timelines: false });
} }
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@ -200,11 +206,12 @@ impl ContextCompressor {
); );
} }
if tokens_after <= self.threshold() { if tokens_after <= self.threshold() {
return Ok(history); return Ok(CompressionResult { history, created_timelines: false });
} }
// LLM summarization pass // LLM summarization pass
let mut current_history = history; let mut current_history = history;
let mut created_timelines = false;
for pass in 0..self.config.max_passes { for pass in 0..self.config.max_passes {
let tokens = estimate_tokens(&current_history); let tokens = estimate_tokens(&current_history);
if tokens <= self.threshold() { if tokens <= self.threshold() {
@ -221,6 +228,7 @@ impl ContextCompressor {
match self.compress_once(&current_history).await { match self.compress_once(&current_history).await {
Ok(Some(compressed)) => { Ok(Some(compressed)) => {
current_history = compressed; current_history = compressed;
created_timelines = true;
} }
Ok(None) => { Ok(None) => {
// No more compressible content // No more compressible content
@ -270,7 +278,7 @@ impl ContextCompressor {
"Context compression completed" "Context compression completed"
); );
Ok(current_history) Ok(CompressionResult { history: current_history, created_timelines })
} }
/// Try to extract the actual context token limit from an LLM error message. /// Try to extract the actual context token limit from an LLM error message.
@ -623,7 +631,7 @@ mod tests {
ChatMessage::tool("call1", "bash", &"x".repeat(3000)), ChatMessage::tool("call1", "bash", &"x".repeat(3000)),
]; ];
let result = compressor.compress_if_needed(messages).await.unwrap(); let result = compressor.compress_if_needed(messages).await.unwrap().history;
let tool_msg = result.iter().find(|m| m.role == "tool").unwrap(); let tool_msg = result.iter().find(|m| m.role == "tool").unwrap();
assert!( assert!(
@ -677,7 +685,7 @@ mod tests {
ChatMessage::user("Q4"), // 8: LAST, is user → B2B triggers ChatMessage::user("Q4"), // 8: LAST, is user → B2B triggers
]; ];
let result = compressor.compress_if_needed(messages).await.unwrap(); let result = compressor.compress_if_needed(messages).await.unwrap().history;
// B2A: "Q1" must appear exactly once // B2A: "Q1" must appear exactly once
let q1_count = result.iter().filter(|m| m.role == "user" && m.content == "Q1").count(); let q1_count = result.iter().filter(|m| m.role == "user" && m.content == "Q1").count();
@ -721,7 +729,7 @@ mod tests {
ChatMessage::tool("t3", "bash", &big), ChatMessage::tool("t3", "bash", &big),
]; ];
let result = compressor.compress_if_needed(messages).await.unwrap(); let result = compressor.compress_if_needed(messages).await.unwrap().history;
// After hard truncation: head (1) + trunc_note (1) + tail (2) = 4 messages // After hard truncation: head (1) + trunc_note (1) + tail (2) = 4 messages
assert!(result.len() < 7, "expected truncation reduction, got {} messages", result.len()); assert!(result.len() < 7, "expected truncation reduction, got {} messages", result.len());

View File

@ -858,12 +858,16 @@ impl SessionManager {
let mut session_guard = session.lock().await; let mut session_guard = session.lock().await;
let original_count = session_guard.get_history().len(); let original_count = session_guard.get_history().len();
let history = session_guard.get_history().to_vec(); let history = session_guard.get_history().to_vec();
let compressed = session_guard.compressor let result = session_guard.compressor
.compress_if_needed(history) .compress_if_needed(history)
.await?; .await?;
let compressed_count = compressed.len(); let compressed_count = result.history.len();
if result.created_timelines {
session_guard.last_compressed_message_at = Some(chrono::Utc::now().timestamp_millis());
let _ = session_guard.persist_session_meta().await;
}
session_guard.clear_history(); session_guard.clear_history();
for msg in compressed { for msg in result.history {
session_guard.add_message(msg, false).await session_guard.add_message(msg, false).await
.map_err(|e| AgentError::Other(format!("persist error: {}", e)))?; .map_err(|e| AgentError::Other(format!("persist error: {}", e)))?;
} }
@ -1346,13 +1350,17 @@ impl SessionManager {
// in context compression (system prompt is dynamic and should not be persisted). // in context compression (system prompt is dynamic and should not be persisted).
let system_prompt = session_guard.build_system_prompt(&skills_prompt, memory_context.as_deref()); let system_prompt = session_guard.build_system_prompt(&skills_prompt, memory_context.as_deref());
let mut history = session_guard.compressor let result = session_guard.compressor
.compress_if_needed(history) .compress_if_needed(history)
.await?; .await?;
if result.created_timelines {
session_guard.last_compressed_message_at = Some(chrono::Utc::now().timestamp_millis());
}
let mut history = result.history;
history.insert(0, ChatMessage::system(system_prompt.clone())); history.insert(0, ChatMessage::system(system_prompt.clone()));
// Advance consolidation pointer — future compressions skip already-processed messages // Persist consolidation state
let now = chrono::Utc::now().timestamp_millis(); let now = chrono::Utc::now().timestamp_millis();
session_guard.last_consolidated_at = Some(now); session_guard.last_consolidated_at = Some(now);
if let Err(e) = session_guard.persist_session_meta().await { if let Err(e) = session_guard.persist_session_meta().await {
@ -1376,7 +1384,11 @@ impl SessionManager {
); );
session_guard.compressor.set_context_window(new_window); session_guard.compressor.set_context_window(new_window);
let raw = session_guard.get_history().to_vec(); let raw = session_guard.get_history().to_vec();
let mut retry = session_guard.compressor.compress_if_needed(raw).await?; let retry_result = session_guard.compressor.compress_if_needed(raw).await?;
if retry_result.created_timelines {
session_guard.last_compressed_message_at = Some(chrono::Utc::now().timestamp_millis());
}
let mut retry = retry_result.history;
retry.insert(0, ChatMessage::system(system_prompt)); retry.insert(0, ChatMessage::system(system_prompt));
agent.process(retry).await? agent.process(retry).await?
} }
@ -1493,7 +1505,8 @@ impl SessionManager {
// in context compression (system prompt is dynamic and should not be persisted). // in context compression (system prompt is dynamic and should not be persisted).
let mut history = session_guard.compressor let mut history = session_guard.compressor
.compress_if_needed(history) .compress_if_needed(history)
.await?; .await?
.history;
history.insert(0, ChatMessage::system(full_system_prompt)); history.insert(0, ChatMessage::system(full_system_prompt));