418 lines
13 KiB
Rust
418 lines
13 KiB
Rust
use async_trait::async_trait;
|
|
use encoding_rs::*;
|
|
use serde_json::json;
|
|
|
|
use crate::tools::path_utils;
|
|
use crate::tools::traits::{Tool, ToolResult};
|
|
|
|
const MAX_CHARS: usize = 128_000;
|
|
const MAX_FILE_BYTES: u64 = 5 * 1024 * 1024;
|
|
const MAX_BINARY_BYTES: usize = 512 * 1024;
|
|
const DEFAULT_LIMIT: usize = 2000;
|
|
|
|
pub struct FileReadTool {
|
|
allowed_dir: Option<String>,
|
|
}
|
|
|
|
impl FileReadTool {
|
|
pub fn new() -> Self {
|
|
Self { allowed_dir: None }
|
|
}
|
|
|
|
pub fn with_allowed_dir(dir: String) -> Self {
|
|
Self {
|
|
allowed_dir: Some(dir),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Default for FileReadTool {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Tool for FileReadTool {
|
|
fn name(&self) -> &str {
|
|
"file_read"
|
|
}
|
|
|
|
fn description(&self) -> &str {
|
|
"Read the contents of a file. Returns numbered lines. Use offset and limit to paginate through large files."
|
|
}
|
|
|
|
fn parameters_schema(&self) -> serde_json::Value {
|
|
json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"path": {
|
|
"type": "string",
|
|
"description": "The file path to read"
|
|
},
|
|
"offset": {
|
|
"type": "integer",
|
|
"description": "Line number to start reading from (1-indexed, default 1)",
|
|
"minimum": 1
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of lines to read (default 2000)",
|
|
"minimum": 1
|
|
}
|
|
},
|
|
"required": ["path"]
|
|
})
|
|
}
|
|
|
|
fn read_only(&self) -> bool {
|
|
true
|
|
}
|
|
|
|
async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
|
|
let path = match args.get("path").and_then(|v| v.as_str()) {
|
|
Some(p) => p,
|
|
None => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some("Missing required parameter: path".to_string()),
|
|
});
|
|
}
|
|
};
|
|
|
|
let offset = args
|
|
.get("offset")
|
|
.and_then(|v| v.as_u64())
|
|
.map(|v| v as usize)
|
|
.unwrap_or(1);
|
|
|
|
let limit = args
|
|
.get("limit")
|
|
.and_then(|v| v.as_u64())
|
|
.map(|v| v as usize)
|
|
.unwrap_or(DEFAULT_LIMIT);
|
|
|
|
let resolved = match path_utils::resolve_path(path, self.allowed_dir.as_deref()) {
|
|
Ok(p) => p,
|
|
Err(e) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(e),
|
|
});
|
|
}
|
|
};
|
|
|
|
if !resolved.exists() {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("File not found: {}", path)),
|
|
});
|
|
}
|
|
|
|
if !resolved.is_file() {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Not a file: {}", path)),
|
|
});
|
|
}
|
|
|
|
let metadata = match std::fs::metadata(&resolved) {
|
|
Ok(m) => m,
|
|
Err(e) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Failed to inspect file: {}", e)),
|
|
});
|
|
}
|
|
};
|
|
|
|
if metadata.len() > MAX_FILE_BYTES {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!(
|
|
"File too large to read safely: {} bytes (max {} bytes). Use a narrower tool or inspect a smaller excerpt.",
|
|
metadata.len(),
|
|
MAX_FILE_BYTES
|
|
)),
|
|
});
|
|
}
|
|
|
|
// Read raw bytes and try multiple encodings
|
|
let bytes = match std::fs::read(&resolved) {
|
|
Ok(b) => b,
|
|
Err(e) => {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("Failed to read file: {}", e)),
|
|
});
|
|
}
|
|
};
|
|
|
|
let (content, encoding_label) = decode_text(&bytes);
|
|
|
|
match content {
|
|
Some(text) => {
|
|
let all_lines: Vec<&str> = text.lines().collect();
|
|
let total = all_lines.len();
|
|
|
|
if offset < 1 {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!("offset must be at least 1, got {}", offset)),
|
|
});
|
|
}
|
|
|
|
if offset > total {
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!(
|
|
"offset {} is beyond end of file ({} lines)",
|
|
offset, total
|
|
)),
|
|
});
|
|
}
|
|
|
|
let start = offset - 1;
|
|
let end = std::cmp::min(start + limit, total);
|
|
let lines: Vec<String> = all_lines[start..end]
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, line)| format!("{}| {}", start + i + 1, line))
|
|
.collect();
|
|
|
|
let mut result = lines.join("\n");
|
|
|
|
// Truncate if too long
|
|
if result.len() > MAX_CHARS {
|
|
let original_len = result.len();
|
|
let mut truncated_chars = 0;
|
|
let mut end_idx = 0;
|
|
for (i, line) in lines.iter().enumerate() {
|
|
truncated_chars += line.len() + 1;
|
|
if truncated_chars > MAX_CHARS {
|
|
end_idx = i;
|
|
break;
|
|
}
|
|
end_idx = i + 1;
|
|
}
|
|
result = lines[..end_idx].join("\n");
|
|
let truncated = original_len - result.len();
|
|
result.push_str(&format!("\n\n... ({} chars truncated) ...", truncated));
|
|
}
|
|
|
|
if end < total {
|
|
result.push_str(&format!(
|
|
"\n\n(Showing lines {}-{} of {}. Use offset={} to continue.)",
|
|
offset,
|
|
end,
|
|
total,
|
|
end + 1
|
|
));
|
|
} else {
|
|
result.push_str(&format!("\n\n(End of file — {} lines total)", total));
|
|
}
|
|
|
|
if let Some(label) = encoding_label {
|
|
result.insert_str(0, &format!("(编码: {})\n", label));
|
|
}
|
|
|
|
Ok(ToolResult {
|
|
success: true,
|
|
output: result,
|
|
error: None,
|
|
})
|
|
}
|
|
None => {
|
|
// Truly binary file — base64 encode
|
|
use base64::{Engine, engine::general_purpose::STANDARD};
|
|
if bytes.len() > MAX_BINARY_BYTES {
|
|
let mime = mime_guess::from_path(&resolved)
|
|
.first_or_octet_stream()
|
|
.to_string();
|
|
return Ok(ToolResult {
|
|
success: false,
|
|
output: String::new(),
|
|
error: Some(format!(
|
|
"Binary file too large to inline: {}, {} bytes (max {} bytes).",
|
|
mime,
|
|
bytes.len(),
|
|
MAX_BINARY_BYTES
|
|
)),
|
|
});
|
|
}
|
|
let encoded = STANDARD.encode(&bytes);
|
|
let mime = mime_guess::from_path(&resolved)
|
|
.first_or_octet_stream()
|
|
.to_string();
|
|
Ok(ToolResult {
|
|
success: true,
|
|
output: format!(
|
|
"(Binary file: {}, {} bytes, base64 encoded)\n{}",
|
|
mime,
|
|
bytes.len(),
|
|
encoded
|
|
),
|
|
error: None,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn decode_text(bytes: &[u8]) -> (Option<String>, Option<&'static str>) {
|
|
if bytes.contains(&0) {
|
|
return (None, None);
|
|
}
|
|
|
|
// Try UTF-8 first
|
|
if let Ok(text) = std::str::from_utf8(bytes) {
|
|
return (Some(text.to_string()), None);
|
|
}
|
|
|
|
// Try GB18030
|
|
let (cow, _, had_errors) = GB18030.decode(bytes);
|
|
if !had_errors {
|
|
return (Some(cow.into_owned()), Some("GB18030"));
|
|
}
|
|
|
|
// Try GBK
|
|
let (cow, _, had_errors) = GBK.decode(bytes);
|
|
if !had_errors {
|
|
return (Some(cow.into_owned()), Some("GBK"));
|
|
}
|
|
|
|
// Try UTF-8 lossy as last resort
|
|
let (cow, _, had_errors) = UTF_8.decode(bytes);
|
|
if !had_errors {
|
|
// UTF-8 decode succeeded via encoding_rs but std::str::from_utf8
|
|
// rejected it (maybe due to BOM or unpaired surrogates); still return it
|
|
return (Some(cow.into_owned()), None);
|
|
}
|
|
|
|
// Check if content looks text-ish (>50% printable) for lossy UTF-8 fallback
|
|
let printable_ratio = bytes
|
|
.iter()
|
|
.filter(|&&b| b.is_ascii_graphic() || b == b' ' || b == b'\n' || b == b'\r' || b == b'\t')
|
|
.count() as f64
|
|
/ bytes.len().max(1) as f64;
|
|
|
|
if printable_ratio > 0.5 {
|
|
let text = String::from_utf8_lossy(bytes);
|
|
return (Some(text.into_owned()), Some("UTF-8(lossy)"));
|
|
}
|
|
|
|
(None, None)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::io::Write;
|
|
use tempfile::NamedTempFile;
|
|
|
|
#[tokio::test]
|
|
async fn test_read_simple_file() {
|
|
let mut file = NamedTempFile::new().unwrap();
|
|
writeln!(file, "Line 1").unwrap();
|
|
writeln!(file, "Line 2").unwrap();
|
|
writeln!(file, "Line 3").unwrap();
|
|
|
|
let tool = FileReadTool::new();
|
|
let result = tool
|
|
.execute(json!({ "path": file.path().to_str().unwrap() }))
|
|
.await
|
|
.unwrap();
|
|
|
|
assert!(result.success);
|
|
assert!(result.output.contains("Line 1"));
|
|
assert!(result.output.contains("Line 2"));
|
|
assert!(result.output.contains("Line 3"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_read_with_offset_limit() {
|
|
let mut file = NamedTempFile::new().unwrap();
|
|
for i in 1..=10 {
|
|
writeln!(file, "Line {}", i).unwrap();
|
|
}
|
|
|
|
let tool = FileReadTool::new();
|
|
let result = tool
|
|
.execute(json!({
|
|
"path": file.path().to_str().unwrap(),
|
|
"offset": 3,
|
|
"limit": 2
|
|
}))
|
|
.await
|
|
.unwrap();
|
|
|
|
assert!(result.success);
|
|
assert!(result.output.contains("Line 3"));
|
|
assert!(result.output.contains("Line 4"));
|
|
assert!(!result.output.contains("Line 2"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_file_not_found() {
|
|
let tool = FileReadTool::new();
|
|
let result = tool
|
|
.execute(json!({ "path": "/nonexistent/file.txt" }))
|
|
.await
|
|
.unwrap();
|
|
|
|
assert!(!result.success);
|
|
assert!(result.error.unwrap().contains("not found"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_is_directory() {
|
|
let tool = FileReadTool::new();
|
|
let result = tool.execute(json!({ "path": "." })).await.unwrap();
|
|
|
|
assert!(!result.success);
|
|
assert!(result.error.unwrap().contains("Not a file"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_rejects_large_file_before_reading() {
|
|
let mut file = NamedTempFile::new().unwrap();
|
|
file.as_file_mut()
|
|
.set_len(MAX_FILE_BYTES + 1)
|
|
.expect("set large file length");
|
|
|
|
let tool = FileReadTool::new();
|
|
let result = tool
|
|
.execute(json!({ "path": file.path().to_str().unwrap() }))
|
|
.await
|
|
.unwrap();
|
|
|
|
assert!(!result.success);
|
|
assert!(result.error.unwrap().contains("too large"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_rejects_large_binary_inline() {
|
|
let mut file = NamedTempFile::new().unwrap();
|
|
let bytes = vec![0_u8; MAX_BINARY_BYTES + 1];
|
|
file.write_all(&bytes).unwrap();
|
|
|
|
let tool = FileReadTool::new();
|
|
let result = tool
|
|
.execute(json!({ "path": file.path().to_str().unwrap() }))
|
|
.await
|
|
.unwrap();
|
|
|
|
assert!(!result.success);
|
|
assert!(result.error.unwrap().contains("Binary file too large"));
|
|
}
|
|
}
|