use std::io::Read; use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::anyhow; use async_trait::async_trait; use base64::Engine; #[cfg(unix)] use encoding_rs::GBK; use serde_json::json; use crate::bus::MediaItem; use super::traits::{Tool, ToolContext, ToolResult}; #[derive(Debug, Clone)] pub struct SessionSendRequest { pub text: Option, pub attachments: Vec, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct SessionSendOutcome { pub published_messages: usize, pub text_sent: bool, pub attachment_count: usize, } #[async_trait] pub trait SessionMessageSender: Send + Sync + 'static { async fn send_to_current_session( &self, context: &ToolContext, request: SessionSendRequest, ) -> anyhow::Result; } pub struct NoopSessionMessageSender; #[async_trait] impl SessionMessageSender for NoopSessionMessageSender { async fn send_to_current_session( &self, _context: &ToolContext, _request: SessionSendRequest, ) -> anyhow::Result { Err(anyhow!( "session send tool is not configured with an outbound sender" )) } } pub struct SessionSendTool { sender: Arc, } impl SessionSendTool { pub fn new(sender: Arc) -> Self { Self { sender } } } #[async_trait] impl Tool for SessionSendTool { fn name(&self) -> &str { "send_session_message" } fn description(&self) -> &str { "Send a message to the current conversation through the normal channel reply path. You can send a text-only message, one or more local file attachments, or a text message followed by attachments. Use this when you need to proactively deliver content back to the current user." } fn parameters_schema(&self) -> serde_json::Value { json!({ "type": "object", "properties": { "text": { "type": "string", "description": "Optional text to send to the current conversation. This tool can send a text-only message even when no attachments are provided." }, "attachments": { "type": "array", "description": "Optional list of local file paths to send to the current conversation.", "items": { "type": "string", "description": "Absolute or workspace-relative local file path" } } }, "anyOf": [ { "required": ["text"] }, { "required": ["attachments"] } ] }) } async fn execute(&self, _args: serde_json::Value) -> anyhow::Result { Ok(error_result( "send_session_message requires tool context for the current conversation", )) } async fn execute_with_context( &self, context: &ToolContext, args: serde_json::Value, ) -> anyhow::Result { if let Err(err) = validate_context(context) { return Ok(error_result(&err.to_string())); } let text = args .get("text") .and_then(|value| value.as_str()) .map(str::trim) .filter(|value| !value.is_empty()) .map(ToOwned::to_owned); let attachments = match args.get("attachments") { Some(value) => match parse_attachments(value) { Ok(attachments) => attachments, Err(err) => return Ok(error_result(&err.to_string())), }, None => Vec::new(), }; if text.is_none() && attachments.is_empty() { return Ok(error_result( "send_session_message requires non-empty text, attachments, or both", )); } let outcome = match self .sender .send_to_current_session( context, SessionSendRequest { text, attachments, }, ) .await { Ok(outcome) => outcome, Err(err) => return Ok(error_result(&err.to_string())), }; Ok(ToolResult { success: true, output: format_success(outcome), error: None, }) } } fn validate_context(context: &ToolContext) -> anyhow::Result<()> { if context.channel_name.as_deref().unwrap_or_default().is_empty() { return Err(anyhow!( "send_session_message requires channel_name in tool context" )); } if context.chat_id.as_deref().unwrap_or_default().is_empty() { return Err(anyhow!( "send_session_message requires chat_id in tool context" )); } Ok(()) } /// 解析附件文件路径,支持非 UTF-8 编码的文件名回退。 /// /// 在 Linux 上,文件名可能是 GBK 等非 UTF-8 编码。当 LLM 通过 bash 工具 /// 的 `ls` 输出获取文件名时(bash 工具会将非 UTF-8 输出解码为 UTF-8), /// LLM 使用的是 UTF-8 字符串,与磁盘上的实际字节不匹配。 /// /// 此函数的策略是: /// 1. 先按 UTF-8 原样访问(最快路径) /// 2. 如果失败,列出父目录,用多种编码解码磁盘上的文件名字节, /// 与目标文件名比对,找到匹配项后返回实际磁盘路径 fn resolve_attachment_path(raw_path: &str) -> PathBuf { let path = Path::new(raw_path); tracing::debug!( raw_path = %raw_path, raw_path_bytes = ?raw_path.as_bytes(), "resolve_attachment_path: attempting to resolve path" ); // 1. 先按原样(UTF-8)尝试 if path.exists() { tracing::debug!( raw_path = %raw_path, "resolve_attachment_path: path exists as-is (UTF-8)" ); return path.to_path_buf(); } tracing::debug!( raw_path = %raw_path, "resolve_attachment_path: path not found as UTF-8, trying directory scan" ); // 2. 提取父目录和文件名,列出目录逐项比对 if let (Some(parent), Some(target_filename)) = (path.parent(), path.file_name()) { let target_str = target_filename.to_string_lossy(); tracing::debug!( parent = %parent.display(), target_filename = %target_str, target_filename_bytes = ?target_filename.as_encoded_bytes(), "resolve_attachment_path: scanning parent directory" ); match std::fs::read_dir(parent) { Ok(entries) => { let mut entry_count = 0; for entry in entries.flatten() { entry_count += 1; let entry_name = entry.file_name(); let is_match = filename_matches_target(&entry_name, &target_str); tracing::trace!( entry_path = %entry.path().display(), entry_name_lossy = %entry_name.to_string_lossy(), entry_name_bytes = ?entry_name.as_encoded_bytes(), is_match = is_match, "resolve_attachment_path: checking entry" ); if is_match { tracing::debug!( entry_path = %entry.path().display(), entry_name_lossy = %entry_name.to_string_lossy(), "resolve_attachment_path: MATCH FOUND via encoding fallback" ); return entry.path(); } } tracing::debug!( parent = %parent.display(), entry_count = entry_count, "resolve_attachment_path: directory scan complete, no match" ); } Err(e) => { tracing::warn!( parent = %parent.display(), error = %e, "resolve_attachment_path: failed to read directory" ); } } } else { tracing::debug!( raw_path = %raw_path, has_parent = path.parent().is_some(), has_filename = path.file_name().is_some(), "resolve_attachment_path: cannot extract parent or filename" ); } // 回退失败,返回原路径(让调用方报错) tracing::warn!( raw_path = %raw_path, "resolve_attachment_path: all resolution attempts failed, returning original path" ); path.to_path_buf() } /// 检查磁盘上的文件名(OsStr)是否与目标 UTF-8 文件名匹配。 /// /// 在 Unix 上,尝试用多种编码解码磁盘字节,与目标字符串比对。 #[cfg(unix)] fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool { use std::os::unix::ffi::OsStrExt; let bytes = on_disk_name.as_bytes(); tracing::debug!( on_disk_bytes_hex = %format_bytes_hex(bytes), target = %target, target_bytes_hex = %format_bytes_hex(target.as_bytes()), "filename_matches_target: comparing on-disk bytes with target" ); // 直接 UTF-8 匹配 match std::str::from_utf8(bytes) { Ok(decoded) => { let matches = decoded == target; tracing::debug!( decoded_utf8 = %decoded, decoded_len = decoded.len(), target_len = target.len(), matches = matches, "filename_matches_target: UTF-8 decode result" ); if matches { return true; } // UTF-8 匹配失败,继续尝试其他编码 — 可能磁盘上是 GBK tracing::debug!( "filename_matches_target: UTF-8 matched but strings differ, trying GBK decode" ); } Err(e) => { tracing::debug!( utf8_error = %e, "filename_matches_target: not valid UTF-8, trying GBK decode" ); } } // 尝试 GBK 解码 let (gbk_decoded, _, had_errors) = GBK.decode(bytes); if !had_errors { let matches = gbk_decoded == target; tracing::debug!( gbk_decoded = %gbk_decoded, gbk_decoded_len = gbk_decoded.len(), target = %target, target_len = target.len(), matches = matches, "filename_matches_target: GBK decode result" ); if matches { return true; } // GBK 解码成功但不匹配,尝试归一化后比对 let normalized_disk = normalize_filename(&gbk_decoded); let normalized_target = normalize_filename(target); if normalized_disk == normalized_target { tracing::debug!( normalized_disk = %normalized_disk, normalized_target = %normalized_target, "filename_matches_target: matched after normalization" ); return true; } } else { tracing::debug!( gbk_decoded_lossy = %gbk_decoded, had_errors = had_errors, "filename_matches_target: GBK decode had errors" ); } // 回退:lossy 转换比对 let lossy = String::from_utf8_lossy(bytes); let matches = lossy == target; tracing::debug!( lossy = %lossy, target = %target, matches = matches, "filename_matches_target: lossy fallback result" ); if matches { return true; } // 最后尝试:lossy 归一化比对 let normalized_lossy = normalize_filename(&lossy); let normalized_target = normalize_filename(target); if normalized_lossy == normalized_target { tracing::debug!( normalized_lossy = %normalized_lossy, normalized_target = %normalized_target, "filename_matches_target: matched after lossy normalization" ); return true; } false } /// 对文件名做归一化处理:去除不可见字符和空白字符差异,便于模糊匹配。 /// /// LLM 有时会在中文文件名中插入空格(如 "139 邮箱" vs "139邮箱"), /// 此函数去掉所有空白字符和零宽字符,只比对有意义的文字部分。 #[cfg(unix)] fn normalize_filename(s: &str) -> String { s.chars() .filter(|c| match *c { // 过滤 ASCII 空白 ' ' | '\t' | '\n' | '\r' => false, // 过滤零宽字符 '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' | '\u{200E}' | '\u{200F}' => false, // 过滤 Unicode 空白字符 '\u{00A0}' | '\u{3000}' => false, // 保留其他所有字符 _ => true, }) .collect() } /// 将字节切片格式化为十六进制字符串,用于调试日志。 #[cfg(unix)] fn format_bytes_hex(bytes: &[u8]) -> String { bytes .iter() .map(|b| format!("{:02x}", b)) .collect::>() .join(" ") } #[cfg(not(unix))] fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool { let matches = on_disk_name.to_string_lossy() == target; tracing::trace!( on_disk_lossy = %on_disk_name.to_string_lossy(), target = %target, matches = matches, "filename_matches_target (non-unix): comparing" ); matches } fn parse_attachments(value: &serde_json::Value) -> anyhow::Result> { // 支持两种格式:实际数组 或 字符串化的 JSON 数组 let paths = if let Some(arr) = value.as_array() { arr .iter() .filter_map(|v| v.as_str()) .map(str::trim) .filter(|v| !v.is_empty()) .map(ToOwned::to_owned) .collect::>() } else if let Some(s) = value.as_str() { // 尝试解析字符串化的 JSON 数组 serde_json::from_str::>(s) .ok() .map(|arr| { arr.iter() .filter_map(|v| v.as_str()) .map(str::trim) .filter(|v| !v.is_empty()) .map(ToOwned::to_owned) .collect::>() }) .unwrap_or_default() } else { vec![] }; if paths.is_empty() { return Err(anyhow!("attachments must be an array of local file paths")); } let mut attachments = Vec::with_capacity(paths.len()); for raw_path in paths { // 解析路径(含编码回退),确保能正确访问非 UTF-8 文件名的文件 let resolved_path = resolve_attachment_path(&raw_path); let resolved_path_str = resolved_path.to_string_lossy().to_string(); let metadata = std::fs::metadata(&resolved_path) .map_err(|err| anyhow!("failed to access attachment '{}': {}", raw_path, err))?; if !metadata.is_file() { return Err(anyhow!("attachment path is not a file: {}", raw_path)); } if metadata.len() == 0 { return Err(anyhow!("attachment file is empty: {}", raw_path)); } let content_base64 = (metadata.len() <= 50 * 1024 * 1024) .then(|| { let mut file = std::fs::File::open(&resolved_path)?; let mut buf = Vec::with_capacity(metadata.len() as usize); file.read_to_end(&mut buf)?; Ok::<_, anyhow::Error>(base64::engine::general_purpose::STANDARD.encode(&buf)) }) .transpose()?; let file_name = Path::new(&resolved_path) .file_name() .map(|n| n.to_string_lossy().to_string()); let media_type = infer_media_type(&resolved_path_str); let mut item = MediaItem::new(resolved_path_str, media_type); item.mime_type = mime_guess::from_path(&resolved_path) .first_raw() .map(ToOwned::to_owned); item.content_base64 = content_base64; item.file_name = file_name; attachments.push(item); } Ok(attachments) } fn infer_media_type(path: &str) -> &'static str { let mime = mime_guess::from_path(path).first_or_octet_stream(); if mime.essence_str().starts_with("image/") { return "image"; } match Path::new(path) .extension() .and_then(|ext| ext.to_str()) .map(|ext| ext.to_ascii_lowercase()) .as_deref() { Some("mp3") | Some("wav") | Some("ogg") | Some("m4a") | Some("opus") => "audio", Some("mp4") | Some("mov") | Some("avi") | Some("mkv") => "video", _ => "file", } } fn format_success(outcome: SessionSendOutcome) -> String { match (outcome.text_sent, outcome.attachment_count) { (true, 0) => "Sent 1 text message to the current conversation.".to_string(), (false, count) => format!("Sent {} attachment(s) to the current conversation.", count), (true, count) => format!( "Sent 1 text message and {} attachment(s) to the current conversation.", count ), } } fn error_result(message: &str) -> ToolResult { ToolResult { success: false, output: String::new(), error: Some(message.to_string()), } } #[cfg(test)] mod tests { use super::*; use tempfile::NamedTempFile; const TEST_CHANNEL: &str = "test-channel"; struct MockSender { outcome: SessionSendOutcome, } #[async_trait] impl SessionMessageSender for MockSender { async fn send_to_current_session( &self, _context: &ToolContext, _request: SessionSendRequest, ) -> anyhow::Result { Ok(self.outcome) } } fn context() -> ToolContext { ToolContext { channel_name: Some(TEST_CHANNEL.to_string()), chat_id: Some("chat-1".to_string()), ..ToolContext::default() } } #[tokio::test] async fn send_session_message_supports_text_only() { let tool = SessionSendTool::new(Arc::new(MockSender { outcome: SessionSendOutcome { published_messages: 1, text_sent: true, attachment_count: 0, }, })); let result = tool .execute_with_context(&context(), json!({ "text": "hello" })) .await .unwrap(); assert!(result.success); assert_eq!(result.output, "Sent 1 text message to the current conversation."); } #[tokio::test] async fn send_session_message_rejects_empty_request() { let tool = SessionSendTool::new(Arc::new(MockSender { outcome: SessionSendOutcome { published_messages: 0, text_sent: false, attachment_count: 0, }, })); let result = tool .execute_with_context(&context(), json!({})) .await .unwrap(); assert!(!result.success); assert_eq!( result.error.as_deref(), Some("send_session_message requires non-empty text, attachments, or both") ); } #[test] fn parse_attachments_infers_image_media_type() { let file = NamedTempFile::new().unwrap(); std::fs::write(file.path(), b"demo").unwrap(); let image_path = file.path().with_extension("png"); std::fs::rename(file.path(), &image_path).unwrap(); let attachments = parse_attachments(&json!([image_path.to_string_lossy().to_string()])) .unwrap(); assert_eq!(attachments.len(), 1); assert_eq!(attachments[0].media_type, "image"); } #[test] fn parse_attachments_handles_stringified_json_array() { let file = NamedTempFile::new().unwrap(); std::fs::write(file.path(), b"demo").unwrap(); let txt_path = file.path().with_extension("txt"); std::fs::rename(file.path(), &txt_path).unwrap(); // Test with stringified JSON array (like LLM might send) let path_str = txt_path.to_string_lossy().to_string().replace("\\", "\\\\"); let json_string = format!("[\"{}\"]", path_str); let attachments = parse_attachments(&json!(json_string)).unwrap(); assert_eq!(attachments.len(), 1); assert_eq!(attachments[0].media_type, "file"); } #[test] #[cfg(unix)] fn parse_attachments_resolves_gbk_encoded_filename() { use encoding_rs::GBK; use std::os::unix::ffi::OsStringExt; // UTF-8 中文文件名 let utf8_filename = "测试文件.txt"; // 编码为 GBK 字节 let (gbk_bytes, _, had_errors) = GBK.encode(utf8_filename); assert!(!had_errors); // 在临时目录中创建 GBK 编码文件名的文件 let tmp_dir = tempfile::tempdir().unwrap(); let gbk_osstring = std::ffi::OsString::from_vec(gbk_bytes.into_owned()); let gbk_path = tmp_dir.path().join(gbk_osstring); std::fs::write(&gbk_path, b"test content").unwrap(); // 用 UTF-8 路径调用 parse_attachments let utf8_path = tmp_dir.path().join(utf8_filename); let utf8_path_str = utf8_path.to_string_lossy().to_string(); let attachments = parse_attachments(&json!([utf8_path_str])).unwrap(); assert_eq!(attachments.len(), 1); assert_eq!(attachments[0].media_type, "file"); // 验证文件名能正确提取(用 lossy 方式,因为是 GBK 编码) assert!(attachments[0].file_name.is_some()); } }