From a9e7aabed419bb3ba314e45b0b999e0621ef20c2 Mon Sep 17 00:00:00 2001 From: xiaoski Date: Tue, 7 Apr 2026 23:43:47 +0800 Subject: [PATCH] feat(tools): add file_read tool with pagination support - Read file contents with offset/limit pagination - Returns numbered lines for easy reference - Handles binary files as base64 encoded - Supports directory restriction for security - Includes 4 unit tests --- Cargo.toml | 1 + src/tools/file_read.rs | 321 +++++++++++++++++++++++++++++++++++++++++ src/tools/mod.rs | 2 + 3 files changed, 324 insertions(+) create mode 100644 src/tools/file_read.rs diff --git a/Cargo.toml b/Cargo.toml index 9b182ea..0e73bba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,4 @@ tracing-appender = "0.2" anyhow = "1.0" mime_guess = "2.0" base64 = "0.22" +tempfile = "3" diff --git a/src/tools/file_read.rs b/src/tools/file_read.rs new file mode 100644 index 0000000..6eb8ca2 --- /dev/null +++ b/src/tools/file_read.rs @@ -0,0 +1,321 @@ +use std::io::Read; +use std::path::Path; + +use async_trait::async_trait; +use serde_json::json; + +use crate::bus::message::ContentBlock; +use crate::tools::traits::{Tool, ToolResult}; + +const MAX_CHARS: usize = 128_000; +const DEFAULT_LIMIT: usize = 2000; + +pub struct FileReadTool { + allowed_dir: Option, +} + +impl FileReadTool { + pub fn new() -> Self { + Self { allowed_dir: None } + } + + pub fn with_allowed_dir(dir: String) -> Self { + Self { + allowed_dir: Some(dir), + } + } + + fn resolve_path(&self, path: &str) -> Result { + let p = Path::new(path); + let resolved = if p.is_absolute() { + p.to_path_buf() + } else { + std::env::current_dir() + .map_err(|e| format!("Failed to get current directory: {}", e))? + .join(p) + }; + + // Check directory restriction + if let Some(ref allowed) = self.allowed_dir { + let allowed_path = Path::new(allowed); + if !resolved.starts_with(allowed_path) { + return Err(format!( + "Path '{}' is outside allowed directory '{}'", + path, allowed + )); + } + } + + Ok(resolved) + } +} + +impl Default for FileReadTool { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Tool for FileReadTool { + fn name(&self) -> &str { + "file_read" + } + + fn description(&self) -> &str { + "Read the contents of a file. Returns numbered lines. Use offset and limit to paginate through large files." + } + + fn parameters_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "The file path to read" + }, + "offset": { + "type": "integer", + "description": "Line number to start reading from (1-indexed, default 1)", + "minimum": 1 + }, + "limit": { + "type": "integer", + "description": "Maximum number of lines to read (default 2000)", + "minimum": 1 + } + }, + "required": ["path"] + }) + } + + fn read_only(&self) -> bool { + true + } + + async fn execute(&self, args: serde_json::Value) -> anyhow::Result { + let path = match args.get("path").and_then(|v| v.as_str()) { + Some(p) => p, + None => { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some("Missing required parameter: path".to_string()), + }); + } + }; + + let offset = args + .get("offset") + .and_then(|v| v.as_u64()) + .map(|v| v as usize) + .unwrap_or(1); + + let limit = args + .get("limit") + .and_then(|v| v.as_u64()) + .map(|v| v as usize) + .unwrap_or(DEFAULT_LIMIT); + + let resolved = match self.resolve_path(path) { + Ok(p) => p, + Err(e) => { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(e), + }); + } + }; + + if !resolved.exists() { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!("File not found: {}", path)), + }); + } + + if !resolved.is_file() { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!("Not a file: {}", path)), + }); + } + + // Try to read as text + match std::fs::read_to_string(&resolved) { + Ok(content) => { + let all_lines: Vec<&str> = content.lines().collect(); + let total = all_lines.len(); + + if offset < 1 { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!("offset must be at least 1, got {}", offset)), + }); + } + + if offset > total { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!( + "offset {} is beyond end of file ({} lines)", + offset, total + )), + }); + } + + let start = offset - 1; + let end = std::cmp::min(start + limit, total); + let lines: Vec = all_lines[start..end] + .iter() + .enumerate() + .map(|(i, line)| format!("{}| {}", start + i + 1, line)) + .collect(); + + let mut result = lines.join("\n"); + + // Truncate if too long + if result.len() > MAX_CHARS { + let mut truncated_chars = 0; + let mut end_idx = 0; + for (i, line) in lines.iter().enumerate() { + truncated_chars += line.len() + 1; + if truncated_chars > MAX_CHARS { + end_idx = i; + break; + } + end_idx = i + 1; + } + result = lines[..end_idx].join("\n"); + result.push_str(&format!( + "\n\n... ({} chars truncated) ...", + result.len() - MAX_CHARS + )); + } + + if end < total { + result.push_str(&format!( + "\n\n(Showing lines {}-{} of {}. Use offset={} to continue.)", + offset, + end, + total, + end + 1 + )); + } else { + result.push_str(&format!("\n\n(End of file — {} lines total)", total)); + } + + Ok(ToolResult { + success: true, + output: result, + error: None, + }) + } + Err(e) => { + // Try to read as binary and encode as base64 + match std::fs::read(&resolved) { + Ok(bytes) => { + use base64::{engine::general_purpose::STANDARD, Engine}; + let encoded = STANDARD.encode(&bytes); + let mime = mime_guess::from_path(&resolved) + .first_or_octet_stream() + .to_string(); + Ok(ToolResult { + success: true, + output: format!( + "(Binary file: {}, {} bytes, base64 encoded)\n{}", + mime, + bytes.len(), + encoded + ), + error: None, + }) + } + Err(_) => Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!("Failed to read file: {}", e)), + }), + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::NamedTempFile; + use std::io::Write; + + #[tokio::test] + async fn test_read_simple_file() { + let mut file = NamedTempFile::new().unwrap(); + writeln!(file, "Line 1").unwrap(); + writeln!(file, "Line 2").unwrap(); + writeln!(file, "Line 3").unwrap(); + + let tool = FileReadTool::new(); + let result = tool + .execute(json!({ "path": file.path().to_str().unwrap() })) + .await + .unwrap(); + + assert!(result.success); + assert!(result.output.contains("Line 1")); + assert!(result.output.contains("Line 2")); + assert!(result.output.contains("Line 3")); + } + + #[tokio::test] + async fn test_read_with_offset_limit() { + let mut file = NamedTempFile::new().unwrap(); + for i in 1..=10 { + writeln!(file, "Line {}", i).unwrap(); + } + + let tool = FileReadTool::new(); + let result = tool + .execute(json!({ + "path": file.path().to_str().unwrap(), + "offset": 3, + "limit": 2 + })) + .await + .unwrap(); + + assert!(result.success); + assert!(result.output.contains("Line 3")); + assert!(result.output.contains("Line 4")); + assert!(!result.output.contains("Line 2")); + } + + #[tokio::test] + async fn test_file_not_found() { + let tool = FileReadTool::new(); + let result = tool + .execute(json!({ "path": "/nonexistent/file.txt" })) + .await + .unwrap(); + + assert!(!result.success); + assert!(result.error.unwrap().contains("not found")); + } + + #[tokio::test] + async fn test_is_directory() { + let tool = FileReadTool::new(); + let result = tool + .execute(json!({ "path": "." })) + .await + .unwrap(); + + assert!(!result.success); + assert!(result.error.unwrap().contains("Not a file")); + } +} diff --git a/src/tools/mod.rs b/src/tools/mod.rs index 0be616d..a17c8c6 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,9 +1,11 @@ pub mod calculator; +pub mod file_read; pub mod registry; pub mod schema; pub mod traits; pub use calculator::CalculatorTool; +pub use file_read::FileReadTool; pub use registry::ToolRegistry; pub use schema::{CleaningStrategy, SchemaCleanr}; pub use traits::{Tool, ToolResult};