feat(tools): add file_read tool with pagination support
- Read file contents with offset/limit pagination - Returns numbered lines for easy reference - Handles binary files as base64 encoded - Supports directory restriction for security - Includes 4 unit tests
This commit is contained in:
parent
d5b6cd24fc
commit
a9e7aabed4
@ -25,3 +25,4 @@ tracing-appender = "0.2"
|
||||
anyhow = "1.0"
|
||||
mime_guess = "2.0"
|
||||
base64 = "0.22"
|
||||
tempfile = "3"
|
||||
|
||||
321
src/tools/file_read.rs
Normal file
321
src/tools/file_read.rs
Normal file
@ -0,0 +1,321 @@
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::bus::message::ContentBlock;
|
||||
use crate::tools::traits::{Tool, ToolResult};
|
||||
|
||||
const MAX_CHARS: usize = 128_000;
|
||||
const DEFAULT_LIMIT: usize = 2000;
|
||||
|
||||
pub struct FileReadTool {
|
||||
allowed_dir: Option<String>,
|
||||
}
|
||||
|
||||
impl FileReadTool {
|
||||
pub fn new() -> Self {
|
||||
Self { allowed_dir: None }
|
||||
}
|
||||
|
||||
pub fn with_allowed_dir(dir: String) -> Self {
|
||||
Self {
|
||||
allowed_dir: Some(dir),
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_path(&self, path: &str) -> Result<std::path::PathBuf, String> {
|
||||
let p = Path::new(path);
|
||||
let resolved = if p.is_absolute() {
|
||||
p.to_path_buf()
|
||||
} else {
|
||||
std::env::current_dir()
|
||||
.map_err(|e| format!("Failed to get current directory: {}", e))?
|
||||
.join(p)
|
||||
};
|
||||
|
||||
// Check directory restriction
|
||||
if let Some(ref allowed) = self.allowed_dir {
|
||||
let allowed_path = Path::new(allowed);
|
||||
if !resolved.starts_with(allowed_path) {
|
||||
return Err(format!(
|
||||
"Path '{}' is outside allowed directory '{}'",
|
||||
path, allowed
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(resolved)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for FileReadTool {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for FileReadTool {
|
||||
fn name(&self) -> &str {
|
||||
"file_read"
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Read the contents of a file. Returns numbered lines. Use offset and limit to paginate through large files."
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "The file path to read"
|
||||
},
|
||||
"offset": {
|
||||
"type": "integer",
|
||||
"description": "Line number to start reading from (1-indexed, default 1)",
|
||||
"minimum": 1
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of lines to read (default 2000)",
|
||||
"minimum": 1
|
||||
}
|
||||
},
|
||||
"required": ["path"]
|
||||
})
|
||||
}
|
||||
|
||||
fn read_only(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
|
||||
let path = match args.get("path").and_then(|v| v.as_str()) {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some("Missing required parameter: path".to_string()),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let offset = args
|
||||
.get("offset")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as usize)
|
||||
.unwrap_or(1);
|
||||
|
||||
let limit = args
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as usize)
|
||||
.unwrap_or(DEFAULT_LIMIT);
|
||||
|
||||
let resolved = match self.resolve_path(path) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(e),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
if !resolved.exists() {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!("File not found: {}", path)),
|
||||
});
|
||||
}
|
||||
|
||||
if !resolved.is_file() {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!("Not a file: {}", path)),
|
||||
});
|
||||
}
|
||||
|
||||
// Try to read as text
|
||||
match std::fs::read_to_string(&resolved) {
|
||||
Ok(content) => {
|
||||
let all_lines: Vec<&str> = content.lines().collect();
|
||||
let total = all_lines.len();
|
||||
|
||||
if offset < 1 {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!("offset must be at least 1, got {}", offset)),
|
||||
});
|
||||
}
|
||||
|
||||
if offset > total {
|
||||
return Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!(
|
||||
"offset {} is beyond end of file ({} lines)",
|
||||
offset, total
|
||||
)),
|
||||
});
|
||||
}
|
||||
|
||||
let start = offset - 1;
|
||||
let end = std::cmp::min(start + limit, total);
|
||||
let lines: Vec<String> = all_lines[start..end]
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, line)| format!("{}| {}", start + i + 1, line))
|
||||
.collect();
|
||||
|
||||
let mut result = lines.join("\n");
|
||||
|
||||
// Truncate if too long
|
||||
if result.len() > MAX_CHARS {
|
||||
let mut truncated_chars = 0;
|
||||
let mut end_idx = 0;
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
truncated_chars += line.len() + 1;
|
||||
if truncated_chars > MAX_CHARS {
|
||||
end_idx = i;
|
||||
break;
|
||||
}
|
||||
end_idx = i + 1;
|
||||
}
|
||||
result = lines[..end_idx].join("\n");
|
||||
result.push_str(&format!(
|
||||
"\n\n... ({} chars truncated) ...",
|
||||
result.len() - MAX_CHARS
|
||||
));
|
||||
}
|
||||
|
||||
if end < total {
|
||||
result.push_str(&format!(
|
||||
"\n\n(Showing lines {}-{} of {}. Use offset={} to continue.)",
|
||||
offset,
|
||||
end,
|
||||
total,
|
||||
end + 1
|
||||
));
|
||||
} else {
|
||||
result.push_str(&format!("\n\n(End of file — {} lines total)", total));
|
||||
}
|
||||
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: result,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
// Try to read as binary and encode as base64
|
||||
match std::fs::read(&resolved) {
|
||||
Ok(bytes) => {
|
||||
use base64::{engine::general_purpose::STANDARD, Engine};
|
||||
let encoded = STANDARD.encode(&bytes);
|
||||
let mime = mime_guess::from_path(&resolved)
|
||||
.first_or_octet_stream()
|
||||
.to_string();
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: format!(
|
||||
"(Binary file: {}, {} bytes, base64 encoded)\n{}",
|
||||
mime,
|
||||
bytes.len(),
|
||||
encoded
|
||||
),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
Err(_) => Ok(ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(format!("Failed to read file: {}", e)),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::NamedTempFile;
|
||||
use std::io::Write;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_simple_file() {
|
||||
let mut file = NamedTempFile::new().unwrap();
|
||||
writeln!(file, "Line 1").unwrap();
|
||||
writeln!(file, "Line 2").unwrap();
|
||||
writeln!(file, "Line 3").unwrap();
|
||||
|
||||
let tool = FileReadTool::new();
|
||||
let result = tool
|
||||
.execute(json!({ "path": file.path().to_str().unwrap() }))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(result.success);
|
||||
assert!(result.output.contains("Line 1"));
|
||||
assert!(result.output.contains("Line 2"));
|
||||
assert!(result.output.contains("Line 3"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_with_offset_limit() {
|
||||
let mut file = NamedTempFile::new().unwrap();
|
||||
for i in 1..=10 {
|
||||
writeln!(file, "Line {}", i).unwrap();
|
||||
}
|
||||
|
||||
let tool = FileReadTool::new();
|
||||
let result = tool
|
||||
.execute(json!({
|
||||
"path": file.path().to_str().unwrap(),
|
||||
"offset": 3,
|
||||
"limit": 2
|
||||
}))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(result.success);
|
||||
assert!(result.output.contains("Line 3"));
|
||||
assert!(result.output.contains("Line 4"));
|
||||
assert!(!result.output.contains("Line 2"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_not_found() {
|
||||
let tool = FileReadTool::new();
|
||||
let result = tool
|
||||
.execute(json!({ "path": "/nonexistent/file.txt" }))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.success);
|
||||
assert!(result.error.unwrap().contains("not found"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_is_directory() {
|
||||
let tool = FileReadTool::new();
|
||||
let result = tool
|
||||
.execute(json!({ "path": "." }))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.success);
|
||||
assert!(result.error.unwrap().contains("Not a file"));
|
||||
}
|
||||
}
|
||||
@ -1,9 +1,11 @@
|
||||
pub mod calculator;
|
||||
pub mod file_read;
|
||||
pub mod registry;
|
||||
pub mod schema;
|
||||
pub mod traits;
|
||||
|
||||
pub use calculator::CalculatorTool;
|
||||
pub use file_read::FileReadTool;
|
||||
pub use registry::ToolRegistry;
|
||||
pub use schema::{CleaningStrategy, SchemaCleanr};
|
||||
pub use traits::{Tool, ToolResult};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user