feat(tools): add file_read tool with pagination support

- Read file contents with offset/limit pagination
- Returns numbered lines for easy reference
- Handles binary files as base64 encoded
- Supports directory restriction for security
- Includes 4 unit tests
This commit is contained in:
xiaoski 2026-04-07 23:43:47 +08:00
parent d5b6cd24fc
commit a9e7aabed4
3 changed files with 324 additions and 0 deletions

View File

@ -25,3 +25,4 @@ tracing-appender = "0.2"
anyhow = "1.0"
mime_guess = "2.0"
base64 = "0.22"
tempfile = "3"

321
src/tools/file_read.rs Normal file
View File

@ -0,0 +1,321 @@
use std::io::Read;
use std::path::Path;
use async_trait::async_trait;
use serde_json::json;
use crate::bus::message::ContentBlock;
use crate::tools::traits::{Tool, ToolResult};
const MAX_CHARS: usize = 128_000;
const DEFAULT_LIMIT: usize = 2000;
pub struct FileReadTool {
allowed_dir: Option<String>,
}
impl FileReadTool {
pub fn new() -> Self {
Self { allowed_dir: None }
}
pub fn with_allowed_dir(dir: String) -> Self {
Self {
allowed_dir: Some(dir),
}
}
fn resolve_path(&self, path: &str) -> Result<std::path::PathBuf, String> {
let p = Path::new(path);
let resolved = if p.is_absolute() {
p.to_path_buf()
} else {
std::env::current_dir()
.map_err(|e| format!("Failed to get current directory: {}", e))?
.join(p)
};
// Check directory restriction
if let Some(ref allowed) = self.allowed_dir {
let allowed_path = Path::new(allowed);
if !resolved.starts_with(allowed_path) {
return Err(format!(
"Path '{}' is outside allowed directory '{}'",
path, allowed
));
}
}
Ok(resolved)
}
}
impl Default for FileReadTool {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Tool for FileReadTool {
fn name(&self) -> &str {
"file_read"
}
fn description(&self) -> &str {
"Read the contents of a file. Returns numbered lines. Use offset and limit to paginate through large files."
}
fn parameters_schema(&self) -> serde_json::Value {
json!({
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "The file path to read"
},
"offset": {
"type": "integer",
"description": "Line number to start reading from (1-indexed, default 1)",
"minimum": 1
},
"limit": {
"type": "integer",
"description": "Maximum number of lines to read (default 2000)",
"minimum": 1
}
},
"required": ["path"]
})
}
fn read_only(&self) -> bool {
true
}
async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
let path = match args.get("path").and_then(|v| v.as_str()) {
Some(p) => p,
None => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some("Missing required parameter: path".to_string()),
});
}
};
let offset = args
.get("offset")
.and_then(|v| v.as_u64())
.map(|v| v as usize)
.unwrap_or(1);
let limit = args
.get("limit")
.and_then(|v| v.as_u64())
.map(|v| v as usize)
.unwrap_or(DEFAULT_LIMIT);
let resolved = match self.resolve_path(path) {
Ok(p) => p,
Err(e) => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(e),
});
}
};
if !resolved.exists() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("File not found: {}", path)),
});
}
if !resolved.is_file() {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Not a file: {}", path)),
});
}
// Try to read as text
match std::fs::read_to_string(&resolved) {
Ok(content) => {
let all_lines: Vec<&str> = content.lines().collect();
let total = all_lines.len();
if offset < 1 {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("offset must be at least 1, got {}", offset)),
});
}
if offset > total {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!(
"offset {} is beyond end of file ({} lines)",
offset, total
)),
});
}
let start = offset - 1;
let end = std::cmp::min(start + limit, total);
let lines: Vec<String> = all_lines[start..end]
.iter()
.enumerate()
.map(|(i, line)| format!("{}| {}", start + i + 1, line))
.collect();
let mut result = lines.join("\n");
// Truncate if too long
if result.len() > MAX_CHARS {
let mut truncated_chars = 0;
let mut end_idx = 0;
for (i, line) in lines.iter().enumerate() {
truncated_chars += line.len() + 1;
if truncated_chars > MAX_CHARS {
end_idx = i;
break;
}
end_idx = i + 1;
}
result = lines[..end_idx].join("\n");
result.push_str(&format!(
"\n\n... ({} chars truncated) ...",
result.len() - MAX_CHARS
));
}
if end < total {
result.push_str(&format!(
"\n\n(Showing lines {}-{} of {}. Use offset={} to continue.)",
offset,
end,
total,
end + 1
));
} else {
result.push_str(&format!("\n\n(End of file — {} lines total)", total));
}
Ok(ToolResult {
success: true,
output: result,
error: None,
})
}
Err(e) => {
// Try to read as binary and encode as base64
match std::fs::read(&resolved) {
Ok(bytes) => {
use base64::{engine::general_purpose::STANDARD, Engine};
let encoded = STANDARD.encode(&bytes);
let mime = mime_guess::from_path(&resolved)
.first_or_octet_stream()
.to_string();
Ok(ToolResult {
success: true,
output: format!(
"(Binary file: {}, {} bytes, base64 encoded)\n{}",
mime,
bytes.len(),
encoded
),
error: None,
})
}
Err(_) => Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Failed to read file: {}", e)),
}),
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
use std::io::Write;
#[tokio::test]
async fn test_read_simple_file() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "Line 1").unwrap();
writeln!(file, "Line 2").unwrap();
writeln!(file, "Line 3").unwrap();
let tool = FileReadTool::new();
let result = tool
.execute(json!({ "path": file.path().to_str().unwrap() }))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("Line 1"));
assert!(result.output.contains("Line 2"));
assert!(result.output.contains("Line 3"));
}
#[tokio::test]
async fn test_read_with_offset_limit() {
let mut file = NamedTempFile::new().unwrap();
for i in 1..=10 {
writeln!(file, "Line {}", i).unwrap();
}
let tool = FileReadTool::new();
let result = tool
.execute(json!({
"path": file.path().to_str().unwrap(),
"offset": 3,
"limit": 2
}))
.await
.unwrap();
assert!(result.success);
assert!(result.output.contains("Line 3"));
assert!(result.output.contains("Line 4"));
assert!(!result.output.contains("Line 2"));
}
#[tokio::test]
async fn test_file_not_found() {
let tool = FileReadTool::new();
let result = tool
.execute(json!({ "path": "/nonexistent/file.txt" }))
.await
.unwrap();
assert!(!result.success);
assert!(result.error.unwrap().contains("not found"));
}
#[tokio::test]
async fn test_is_directory() {
let tool = FileReadTool::new();
let result = tool
.execute(json!({ "path": "." }))
.await
.unwrap();
assert!(!result.success);
assert!(result.error.unwrap().contains("Not a file"));
}
}

View File

@ -1,9 +1,11 @@
pub mod calculator;
pub mod file_read;
pub mod registry;
pub mod schema;
pub mod traits;
pub use calculator::CalculatorTool;
pub use file_read::FileReadTool;
pub use registry::ToolRegistry;
pub use schema::{CleaningStrategy, SchemaCleanr};
pub use traits::{Tool, ToolResult};