feat(tools): add file_read tool with pagination support

- Read file contents with offset/limit pagination - Returns numbered lines for easy reference - Handles binary files as base64 encoded - Supports directory restriction for security - Includes 4 unit tests
2026-04-07 23:43:47 +08:00 · 2026-04-07 23:43:47 +08:00 · a9e7aabed4
commit a9e7aabed4
parent d5b6cd24fc
3 changed files with 324 additions and 0 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -25,3 +25,4 @@ tracing-appender = "0.2"
 anyhow = "1.0"
 mime_guess = "2.0"
 base64 = "0.22"
+tempfile = "3"
--- a/src/tools/file_read.rs
+++ b/src/tools/file_read.rs
@ -0,0 +1,321 @@
+use std::io::Read;
+use std::path::Path;
+
+use async_trait::async_trait;
+use serde_json::json;
+
+use crate::bus::message::ContentBlock;
+use crate::tools::traits::{Tool, ToolResult};
+
+const MAX_CHARS: usize = 128_000;
+const DEFAULT_LIMIT: usize = 2000;
+
+pub struct FileReadTool {
+    allowed_dir: Option<String>,
+}
+
+impl FileReadTool {
+    pub fn new() -> Self {
+        Self { allowed_dir: None }
+    }
+
+    pub fn with_allowed_dir(dir: String) -> Self {
+        Self {
+            allowed_dir: Some(dir),
+        }
+    }
+
+    fn resolve_path(&self, path: &str) -> Result<std::path::PathBuf, String> {
+        let p = Path::new(path);
+        let resolved = if p.is_absolute() {
+            p.to_path_buf()
+        } else {
+            std::env::current_dir()
+                .map_err(|e| format!("Failed to get current directory: {}", e))?
+                .join(p)
+        };
+
+        // Check directory restriction
+        if let Some(ref allowed) = self.allowed_dir {
+            let allowed_path = Path::new(allowed);
+            if !resolved.starts_with(allowed_path) {
+                return Err(format!(
+                    "Path '{}' is outside allowed directory '{}'",
+                    path, allowed
+                ));
+            }
+        }
+
+        Ok(resolved)
+    }
+}
+
+impl Default for FileReadTool {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl Tool for FileReadTool {
+    fn name(&self) -> &str {
+        "file_read"
+    }
+
+    fn description(&self) -> &str {
+        "Read the contents of a file. Returns numbered lines. Use offset and limit to paginate through large files."
+    }
+
+    fn parameters_schema(&self) -> serde_json::Value {
+        json!({
+            "type": "object",
+            "properties": {
+                "path": {
+                    "type": "string",
+                    "description": "The file path to read"
+                },
+                "offset": {
+                    "type": "integer",
+                    "description": "Line number to start reading from (1-indexed, default 1)",
+                    "minimum": 1
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Maximum number of lines to read (default 2000)",
+                    "minimum": 1
+                }
+            },
+            "required": ["path"]
+        })
+    }
+
+    fn read_only(&self) -> bool {
+        true
+    }
+
+    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
+        let path = match args.get("path").and_then(|v| v.as_str()) {
+            Some(p) => p,
+            None => {
+                return Ok(ToolResult {
+                    success: false,
+                    output: String::new(),
+                    error: Some("Missing required parameter: path".to_string()),
+                });
+            }
+        };
+
+        let offset = args
+            .get("offset")
+            .and_then(|v| v.as_u64())
+            .map(|v| v as usize)
+            .unwrap_or(1);
+
+        let limit = args
+            .get("limit")
+            .and_then(|v| v.as_u64())
+            .map(|v| v as usize)
+            .unwrap_or(DEFAULT_LIMIT);
+
+        let resolved = match self.resolve_path(path) {
+            Ok(p) => p,
+            Err(e) => {
+                return Ok(ToolResult {
+                    success: false,
+                    output: String::new(),
+                    error: Some(e),
+                });
+            }
+        };
+
+        if !resolved.exists() {
+            return Ok(ToolResult {
+                success: false,
+                output: String::new(),
+                error: Some(format!("File not found: {}", path)),
+            });
+        }
+
+        if !resolved.is_file() {
+            return Ok(ToolResult {
+                success: false,
+                output: String::new(),
+                error: Some(format!("Not a file: {}", path)),
+            });
+        }
+
+        // Try to read as text
+        match std::fs::read_to_string(&resolved) {
+            Ok(content) => {
+                let all_lines: Vec<&str> = content.lines().collect();
+                let total = all_lines.len();
+
+                if offset < 1 {
+                    return Ok(ToolResult {
+                        success: false,
+                        output: String::new(),
+                        error: Some(format!("offset must be at least 1, got {}", offset)),
+                    });
+                }
+
+                if offset > total {
+                    return Ok(ToolResult {
+                        success: false,
+                        output: String::new(),
+                        error: Some(format!(
+                            "offset {} is beyond end of file ({} lines)",
+                            offset, total
+                        )),
+                    });
+                }
+
+                let start = offset - 1;
+                let end = std::cmp::min(start + limit, total);
+                let lines: Vec<String> = all_lines[start..end]
+                    .iter()
+                    .enumerate()
+                    .map(|(i, line)| format!("{}| {}", start + i + 1, line))
+                    .collect();
+
+                let mut result = lines.join("\n");
+
+                // Truncate if too long
+                if result.len() > MAX_CHARS {
+                    let mut truncated_chars = 0;
+                    let mut end_idx = 0;
+                    for (i, line) in lines.iter().enumerate() {
+                        truncated_chars += line.len() + 1;
+                        if truncated_chars > MAX_CHARS {
+                            end_idx = i;
+                            break;
+                        }
+                        end_idx = i + 1;
+                    }
+                    result = lines[..end_idx].join("\n");
+                    result.push_str(&format!(
+                        "\n\n... ({} chars truncated) ...",
+                        result.len() - MAX_CHARS
+                    ));
+                }
+
+                if end < total {
+                    result.push_str(&format!(
+                        "\n\n(Showing lines {}-{} of {}. Use offset={} to continue.)",
+                        offset,
+                        end,
+                        total,
+                        end + 1
+                    ));
+                } else {
+                    result.push_str(&format!("\n\n(End of file — {} lines total)", total));
+                }
+
+                Ok(ToolResult {
+                    success: true,
+                    output: result,
+                    error: None,
+                })
+            }
+            Err(e) => {
+                // Try to read as binary and encode as base64
+                match std::fs::read(&resolved) {
+                    Ok(bytes) => {
+                        use base64::{engine::general_purpose::STANDARD, Engine};
+                        let encoded = STANDARD.encode(&bytes);
+                        let mime = mime_guess::from_path(&resolved)
+                            .first_or_octet_stream()
+                            .to_string();
+                        Ok(ToolResult {
+                            success: true,
+                            output: format!(
+                                "(Binary file: {}, {} bytes, base64 encoded)\n{}",
+                                mime,
+                                bytes.len(),
+                                encoded
+                            ),
+                            error: None,
+                        })
+                    }
+                    Err(_) => Ok(ToolResult {
+                        success: false,
+                        output: String::new(),
+                        error: Some(format!("Failed to read file: {}", e)),
+                    }),
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+    use std::io::Write;
+
+    #[tokio::test]
+    async fn test_read_simple_file() {
+        let mut file = NamedTempFile::new().unwrap();
+        writeln!(file, "Line 1").unwrap();
+        writeln!(file, "Line 2").unwrap();
+        writeln!(file, "Line 3").unwrap();
+
+        let tool = FileReadTool::new();
+        let result = tool
+            .execute(json!({ "path": file.path().to_str().unwrap() }))
+            .await
+            .unwrap();
+
+        assert!(result.success);
+        assert!(result.output.contains("Line 1"));
+        assert!(result.output.contains("Line 2"));
+        assert!(result.output.contains("Line 3"));
+    }
+
+    #[tokio::test]
+    async fn test_read_with_offset_limit() {
+        let mut file = NamedTempFile::new().unwrap();
+        for i in 1..=10 {
+            writeln!(file, "Line {}", i).unwrap();
+        }
+
+        let tool = FileReadTool::new();
+        let result = tool
+            .execute(json!({
+                "path": file.path().to_str().unwrap(),
+                "offset": 3,
+                "limit": 2
+            }))
+            .await
+            .unwrap();
+
+        assert!(result.success);
+        assert!(result.output.contains("Line 3"));
+        assert!(result.output.contains("Line 4"));
+        assert!(!result.output.contains("Line 2"));
+    }
+
+    #[tokio::test]
+    async fn test_file_not_found() {
+        let tool = FileReadTool::new();
+        let result = tool
+            .execute(json!({ "path": "/nonexistent/file.txt" }))
+            .await
+            .unwrap();
+
+        assert!(!result.success);
+        assert!(result.error.unwrap().contains("not found"));
+    }
+
+    #[tokio::test]
+    async fn test_is_directory() {
+        let tool = FileReadTool::new();
+        let result = tool
+            .execute(json!({ "path": "." }))
+            .await
+            .unwrap();
+
+        assert!(!result.success);
+        assert!(result.error.unwrap().contains("Not a file"));
+    }
+}
--- a/src/tools/mod.rs
+++ b/src/tools/mod.rs
@ -1,9 +1,11 @@
 pub mod calculator;
+pub mod file_read;
 pub mod registry;
 pub mod schema;
 pub mod traits;

 pub use calculator::CalculatorTool;
+pub use file_read::FileReadTool;
 pub use registry::ToolRegistry;
 pub use schema::{CleaningStrategy, SchemaCleanr};
 pub use traits::{Tool, ToolResult};