feat(file_read): enhance file reading with encoding detection and support for binary files
This commit is contained in:
parent
1a3cfbb0af
commit
db24d42076
@ -45,3 +45,4 @@ rmcp = { version = "1.6", default-features = false, features = [
|
|||||||
"which-command",
|
"which-command",
|
||||||
] }
|
] }
|
||||||
http = "1"
|
http = "1"
|
||||||
|
encoding_rs = "0.8"
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use encoding_rs::*;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::tools::path_utils;
|
use crate::tools::path_utils;
|
||||||
@ -117,10 +118,23 @@ impl Tool for FileReadTool {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to read as text
|
// Read raw bytes and try multiple encodings
|
||||||
match std::fs::read_to_string(&resolved) {
|
let bytes = match std::fs::read(&resolved) {
|
||||||
Ok(content) => {
|
Ok(b) => b,
|
||||||
let all_lines: Vec<&str> = content.lines().collect();
|
Err(e) => {
|
||||||
|
return Ok(ToolResult {
|
||||||
|
success: false,
|
||||||
|
output: String::new(),
|
||||||
|
error: Some(format!("Failed to read file: {}", e)),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let (content, encoding_label) = decode_text(&bytes);
|
||||||
|
|
||||||
|
match content {
|
||||||
|
Some(text) => {
|
||||||
|
let all_lines: Vec<&str> = text.lines().collect();
|
||||||
let total = all_lines.len();
|
let total = all_lines.len();
|
||||||
|
|
||||||
if offset < 1 {
|
if offset < 1 {
|
||||||
@ -182,7 +196,14 @@ impl Tool for FileReadTool {
|
|||||||
end + 1
|
end + 1
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
result.push_str(&format!("\n\n(End of file — {} lines total)", total));
|
result.push_str(&format!(
|
||||||
|
"\n\n(End of file — {} lines total)",
|
||||||
|
total
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(label) = encoding_label {
|
||||||
|
result.insert_str(0, &format!("(编码: {})\n", label));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(ToolResult {
|
Ok(ToolResult {
|
||||||
@ -191,10 +212,8 @@ impl Tool for FileReadTool {
|
|||||||
error: None,
|
error: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Err(e) => {
|
None => {
|
||||||
// Try to read as binary and encode as base64
|
// Truly binary file — base64 encode
|
||||||
match std::fs::read(&resolved) {
|
|
||||||
Ok(bytes) => {
|
|
||||||
use base64::{engine::general_purpose::STANDARD, Engine};
|
use base64::{engine::general_purpose::STANDARD, Engine};
|
||||||
let encoded = STANDARD.encode(&bytes);
|
let encoded = STANDARD.encode(&bytes);
|
||||||
let mime = mime_guess::from_path(&resolved)
|
let mime = mime_guess::from_path(&resolved)
|
||||||
@ -211,15 +230,49 @@ impl Tool for FileReadTool {
|
|||||||
error: None,
|
error: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Err(_) => Ok(ToolResult {
|
|
||||||
success: false,
|
|
||||||
output: String::new(),
|
|
||||||
error: Some(format!("Failed to read file: {}", e)),
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_text(bytes: &[u8]) -> (Option<String>, Option<&'static str>) {
|
||||||
|
// Try UTF-8 first
|
||||||
|
if let Ok(text) = std::str::from_utf8(bytes) {
|
||||||
|
return (Some(text.to_string()), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try GB18030
|
||||||
|
let (cow, _, had_errors) = GB18030.decode(bytes);
|
||||||
|
if !had_errors {
|
||||||
|
return (Some(cow.into_owned()), Some("GB18030"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try GBK
|
||||||
|
let (cow, _, had_errors) = GBK.decode(bytes);
|
||||||
|
if !had_errors {
|
||||||
|
return (Some(cow.into_owned()), Some("GBK"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try UTF-8 lossy as last resort
|
||||||
|
let (cow, _, had_errors) = UTF_8.decode(bytes);
|
||||||
|
if !had_errors {
|
||||||
|
// UTF-8 decode succeeded via encoding_rs but std::str::from_utf8
|
||||||
|
// rejected it (maybe due to BOM or unpaired surrogates); still return it
|
||||||
|
return (Some(cow.into_owned()), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if content looks text-ish (>50% printable) for lossy UTF-8 fallback
|
||||||
|
let printable_ratio = bytes
|
||||||
|
.iter()
|
||||||
|
.filter(|&&b| b.is_ascii_graphic() || b == b' ' || b == b'\n' || b == b'\r' || b == b'\t')
|
||||||
|
.count() as f64
|
||||||
|
/ bytes.len().max(1) as f64;
|
||||||
|
|
||||||
|
if printable_ratio > 0.5 {
|
||||||
|
let text = String::from_utf8_lossy(bytes);
|
||||||
|
return (Some(text.into_owned()), Some("UTF-8(lossy)"));
|
||||||
|
}
|
||||||
|
|
||||||
|
(None, None)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user