feat(file_read): enhance file reading with encoding detection and support for binary files

This commit is contained in:
xiaoxixi 2026-05-13 22:43:03 +08:00
parent 1a3cfbb0af
commit db24d42076
2 changed files with 85 additions and 31 deletions

View File

@ -45,3 +45,4 @@ rmcp = { version = "1.6", default-features = false, features = [
"which-command",
] }
http = "1"
encoding_rs = "0.8"

View File

@ -1,4 +1,5 @@
use async_trait::async_trait;
use encoding_rs::*;
use serde_json::json;
use crate::tools::path_utils;
@ -117,10 +118,23 @@ impl Tool for FileReadTool {
});
}
// Try to read as text
match std::fs::read_to_string(&resolved) {
Ok(content) => {
let all_lines: Vec<&str> = content.lines().collect();
// Read raw bytes and try multiple encodings
let bytes = match std::fs::read(&resolved) {
Ok(b) => b,
Err(e) => {
return Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Failed to read file: {}", e)),
});
}
};
let (content, encoding_label) = decode_text(&bytes);
match content {
Some(text) => {
let all_lines: Vec<&str> = text.lines().collect();
let total = all_lines.len();
if offset < 1 {
@ -182,7 +196,14 @@ impl Tool for FileReadTool {
end + 1
));
} else {
result.push_str(&format!("\n\n(End of file — {} lines total)", total));
result.push_str(&format!(
"\n\n(End of file — {} lines total)",
total
));
}
if let Some(label) = encoding_label {
result.insert_str(0, &format!("(编码: {})\n", label));
}
Ok(ToolResult {
@ -191,10 +212,8 @@ impl Tool for FileReadTool {
error: None,
})
}
Err(e) => {
// Try to read as binary and encode as base64
match std::fs::read(&resolved) {
Ok(bytes) => {
None => {
// Truly binary file — base64 encode
use base64::{engine::general_purpose::STANDARD, Engine};
let encoded = STANDARD.encode(&bytes);
let mime = mime_guess::from_path(&resolved)
@ -211,15 +230,49 @@ impl Tool for FileReadTool {
error: None,
})
}
Err(_) => Ok(ToolResult {
success: false,
output: String::new(),
error: Some(format!("Failed to read file: {}", e)),
}),
}
}
}
fn decode_text(bytes: &[u8]) -> (Option<String>, Option<&'static str>) {
// Try UTF-8 first
if let Ok(text) = std::str::from_utf8(bytes) {
return (Some(text.to_string()), None);
}
// Try GB18030
let (cow, _, had_errors) = GB18030.decode(bytes);
if !had_errors {
return (Some(cow.into_owned()), Some("GB18030"));
}
// Try GBK
let (cow, _, had_errors) = GBK.decode(bytes);
if !had_errors {
return (Some(cow.into_owned()), Some("GBK"));
}
// Try UTF-8 lossy as last resort
let (cow, _, had_errors) = UTF_8.decode(bytes);
if !had_errors {
// UTF-8 decode succeeded via encoding_rs but std::str::from_utf8
// rejected it (maybe due to BOM or unpaired surrogates); still return it
return (Some(cow.into_owned()), None);
}
// Check if content looks text-ish (>50% printable) for lossy UTF-8 fallback
let printable_ratio = bytes
.iter()
.filter(|&&b| b.is_ascii_graphic() || b == b' ' || b == b'\n' || b == b'\r' || b == b'\t')
.count() as f64
/ bytes.len().max(1) as f64;
if printable_ratio > 0.5 {
let text = String::from_utf8_lossy(bytes);
return (Some(text.into_owned()), Some("UTF-8(lossy)"));
}
(None, None)
}
#[cfg(test)]