use async_trait::async_trait; use encoding_rs::*; use serde_json::json; use crate::tools::path_utils; use crate::tools::traits::{Tool, ToolResult}; const MAX_CHARS: usize = 128_000; const MAX_FILE_BYTES: u64 = 5 * 1024 * 1024; const MAX_BINARY_BYTES: usize = 512 * 1024; const DEFAULT_LIMIT: usize = 2000; pub struct FileReadTool { allowed_dir: Option, } impl FileReadTool { pub fn new() -> Self { Self { allowed_dir: None } } pub fn with_allowed_dir(dir: String) -> Self { Self { allowed_dir: Some(dir), } } } impl Default for FileReadTool { fn default() -> Self { Self::new() } } #[async_trait] impl Tool for FileReadTool { fn name(&self) -> &str { "file_read" } fn description(&self) -> &str { "Read the contents of a file. Returns numbered lines. Use offset and limit to paginate through large files." } fn parameters_schema(&self) -> serde_json::Value { json!({ "type": "object", "properties": { "path": { "type": "string", "description": "The file path to read" }, "offset": { "type": "integer", "description": "Line number to start reading from (1-indexed, default 1)", "minimum": 1 }, "limit": { "type": "integer", "description": "Maximum number of lines to read (default 2000)", "minimum": 1 } }, "required": ["path"] }) } fn read_only(&self) -> bool { true } async fn execute(&self, args: serde_json::Value) -> anyhow::Result { let path = match args.get("path").and_then(|v| v.as_str()) { Some(p) => p, None => { return Ok(ToolResult { success: false, output: String::new(), error: Some("Missing required parameter: path".to_string()), }); } }; let offset = args .get("offset") .and_then(|v| v.as_u64()) .map(|v| v as usize) .unwrap_or(1); let limit = args .get("limit") .and_then(|v| v.as_u64()) .map(|v| v as usize) .unwrap_or(DEFAULT_LIMIT); let resolved = match path_utils::resolve_path(path, self.allowed_dir.as_deref()) { Ok(p) => p, Err(e) => { return Ok(ToolResult { success: false, output: String::new(), error: Some(e), }); } }; if !resolved.exists() { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!("File not found: {}", path)), }); } if !resolved.is_file() { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!("Not a file: {}", path)), }); } let metadata = match std::fs::metadata(&resolved) { Ok(m) => m, Err(e) => { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!("Failed to inspect file: {}", e)), }); } }; if metadata.len() > MAX_FILE_BYTES { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!( "File too large to read safely: {} bytes (max {} bytes). Use a narrower tool or inspect a smaller excerpt.", metadata.len(), MAX_FILE_BYTES )), }); } // Read raw bytes and try multiple encodings let bytes = match std::fs::read(&resolved) { Ok(b) => b, Err(e) => { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!("Failed to read file: {}", e)), }); } }; let (content, encoding_label) = decode_text(&bytes); match content { Some(text) => { let all_lines: Vec<&str> = text.lines().collect(); let total = all_lines.len(); if offset < 1 { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!("offset must be at least 1, got {}", offset)), }); } if offset > total { return Ok(ToolResult { success: false, output: String::new(), error: Some(format!( "offset {} is beyond end of file ({} lines)", offset, total )), }); } let start = offset - 1; let end = std::cmp::min(start + limit, total); let lines: Vec = all_lines[start..end] .iter() .enumerate() .map(|(i, line)| format!("{}| {}", start + i + 1, line)) .collect(); let mut result = lines.join("\n"); // Truncate if too long if result.len() > MAX_CHARS { let original_len = result.len(); let mut truncated_chars = 0; let mut end_idx = 0; for (i, line) in lines.iter().enumerate() { truncated_chars += line.len() + 1; if truncated_chars > MAX_CHARS { end_idx = i; break; } end_idx = i + 1; } result = lines[..end_idx].join("\n"); let truncated = original_len - result.len(); result.push_str(&format!("\n\n... ({} chars truncated) ...", truncated)); } if end < total { result.push_str(&format!( "\n\n(Showing lines {}-{} of {}. Use offset={} to continue.)", offset, end, total, end + 1 )); } else { result.push_str(&format!("\n\n(End of file — {} lines total)", total)); } if let Some(label) = encoding_label { result.insert_str(0, &format!("(įž–į : {})\n", label)); } Ok(ToolResult { success: true, output: result, error: None, }) } None => { // Truly binary file — base64 encode use base64::{Engine, engine::general_purpose::STANDARD}; if bytes.len() > MAX_BINARY_BYTES { let mime = mime_guess::from_path(&resolved) .first_or_octet_stream() .to_string(); return Ok(ToolResult { success: false, output: String::new(), error: Some(format!( "Binary file too large to inline: {}, {} bytes (max {} bytes).", mime, bytes.len(), MAX_BINARY_BYTES )), }); } let encoded = STANDARD.encode(&bytes); let mime = mime_guess::from_path(&resolved) .first_or_octet_stream() .to_string(); Ok(ToolResult { success: true, output: format!( "(Binary file: {}, {} bytes, base64 encoded)\n{}", mime, bytes.len(), encoded ), error: None, }) } } } } fn decode_text(bytes: &[u8]) -> (Option, Option<&'static str>) { if bytes.contains(&0) { return (None, None); } // Try UTF-8 first if let Ok(text) = std::str::from_utf8(bytes) { return (Some(text.to_string()), None); } // Try GB18030 let (cow, _, had_errors) = GB18030.decode(bytes); if !had_errors { return (Some(cow.into_owned()), Some("GB18030")); } // Try GBK let (cow, _, had_errors) = GBK.decode(bytes); if !had_errors { return (Some(cow.into_owned()), Some("GBK")); } // Try UTF-8 lossy as last resort let (cow, _, had_errors) = UTF_8.decode(bytes); if !had_errors { // UTF-8 decode succeeded via encoding_rs but std::str::from_utf8 // rejected it (maybe due to BOM or unpaired surrogates); still return it return (Some(cow.into_owned()), None); } // Check if content looks text-ish (>50% printable) for lossy UTF-8 fallback let printable_ratio = bytes .iter() .filter(|&&b| b.is_ascii_graphic() || b == b' ' || b == b'\n' || b == b'\r' || b == b'\t') .count() as f64 / bytes.len().max(1) as f64; if printable_ratio > 0.5 { let text = String::from_utf8_lossy(bytes); return (Some(text.into_owned()), Some("UTF-8(lossy)")); } (None, None) } #[cfg(test)] mod tests { use super::*; use std::io::Write; use tempfile::NamedTempFile; #[tokio::test] async fn test_read_simple_file() { let mut file = NamedTempFile::new().unwrap(); writeln!(file, "Line 1").unwrap(); writeln!(file, "Line 2").unwrap(); writeln!(file, "Line 3").unwrap(); let tool = FileReadTool::new(); let result = tool .execute(json!({ "path": file.path().to_str().unwrap() })) .await .unwrap(); assert!(result.success); assert!(result.output.contains("Line 1")); assert!(result.output.contains("Line 2")); assert!(result.output.contains("Line 3")); } #[tokio::test] async fn test_read_with_offset_limit() { let mut file = NamedTempFile::new().unwrap(); for i in 1..=10 { writeln!(file, "Line {}", i).unwrap(); } let tool = FileReadTool::new(); let result = tool .execute(json!({ "path": file.path().to_str().unwrap(), "offset": 3, "limit": 2 })) .await .unwrap(); assert!(result.success); assert!(result.output.contains("Line 3")); assert!(result.output.contains("Line 4")); assert!(!result.output.contains("Line 2")); } #[tokio::test] async fn test_file_not_found() { let tool = FileReadTool::new(); let result = tool .execute(json!({ "path": "/nonexistent/file.txt" })) .await .unwrap(); assert!(!result.success); assert!(result.error.unwrap().contains("not found")); } #[tokio::test] async fn test_is_directory() { let tool = FileReadTool::new(); let result = tool.execute(json!({ "path": "." })).await.unwrap(); assert!(!result.success); assert!(result.error.unwrap().contains("Not a file")); } #[tokio::test] async fn test_rejects_large_file_before_reading() { let mut file = NamedTempFile::new().unwrap(); file.as_file_mut() .set_len(MAX_FILE_BYTES + 1) .expect("set large file length"); let tool = FileReadTool::new(); let result = tool .execute(json!({ "path": file.path().to_str().unwrap() })) .await .unwrap(); assert!(!result.success); assert!(result.error.unwrap().contains("too large")); } #[tokio::test] async fn test_rejects_large_binary_inline() { let mut file = NamedTempFile::new().unwrap(); let bytes = vec![0_u8; MAX_BINARY_BYTES + 1]; file.write_all(&bytes).unwrap(); let tool = FileReadTool::new(); let result = tool .execute(json!({ "path": file.path().to_str().unwrap() })) .await .unwrap(); assert!(!result.success); assert!(result.error.unwrap().contains("Binary file too large")); } }