diff --git a/Cargo.toml b/Cargo.toml index dbfe05a..32592a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,3 +45,4 @@ rmcp = { version = "1.6", default-features = false, features = [ "which-command", ] } http = "1" +encoding_rs = "0.8" diff --git a/src/tools/file_read.rs b/src/tools/file_read.rs index 1553ad3..4daecbd 100644 --- a/src/tools/file_read.rs +++ b/src/tools/file_read.rs @@ -1,4 +1,5 @@ use async_trait::async_trait; +use encoding_rs::*; use serde_json::json; use crate::tools::path_utils; @@ -117,10 +118,23 @@ impl Tool for FileReadTool { }); } - // Try to read as text - match std::fs::read_to_string(&resolved) { - Ok(content) => { - let all_lines: Vec<&str> = content.lines().collect(); + // Read raw bytes and try multiple encodings + let bytes = match std::fs::read(&resolved) { + Ok(b) => b, + Err(e) => { + return Ok(ToolResult { + success: false, + output: String::new(), + error: Some(format!("Failed to read file: {}", e)), + }); + } + }; + + let (content, encoding_label) = decode_text(&bytes); + + match content { + Some(text) => { + let all_lines: Vec<&str> = text.lines().collect(); let total = all_lines.len(); if offset < 1 { @@ -182,7 +196,14 @@ impl Tool for FileReadTool { end + 1 )); } else { - result.push_str(&format!("\n\n(End of file — {} lines total)", total)); + result.push_str(&format!( + "\n\n(End of file — {} lines total)", + total + )); + } + + if let Some(label) = encoding_label { + result.insert_str(0, &format!("(编码: {})\n", label)); } Ok(ToolResult { @@ -191,37 +212,69 @@ impl Tool for FileReadTool { error: None, }) } - Err(e) => { - // Try to read as binary and encode as base64 - match std::fs::read(&resolved) { - Ok(bytes) => { - use base64::{engine::general_purpose::STANDARD, Engine}; - let encoded = STANDARD.encode(&bytes); - let mime = mime_guess::from_path(&resolved) - .first_or_octet_stream() - .to_string(); - Ok(ToolResult { - success: true, - output: format!( - "(Binary file: {}, {} bytes, base64 encoded)\n{}", - mime, - bytes.len(), - encoded - ), - error: None, - }) - } - Err(_) => Ok(ToolResult { - success: false, - output: String::new(), - error: Some(format!("Failed to read file: {}", e)), - }), - } + None => { + // Truly binary file — base64 encode + use base64::{engine::general_purpose::STANDARD, Engine}; + let encoded = STANDARD.encode(&bytes); + let mime = mime_guess::from_path(&resolved) + .first_or_octet_stream() + .to_string(); + Ok(ToolResult { + success: true, + output: format!( + "(Binary file: {}, {} bytes, base64 encoded)\n{}", + mime, + bytes.len(), + encoded + ), + error: None, + }) } } } } +fn decode_text(bytes: &[u8]) -> (Option, Option<&'static str>) { + // Try UTF-8 first + if let Ok(text) = std::str::from_utf8(bytes) { + return (Some(text.to_string()), None); + } + + // Try GB18030 + let (cow, _, had_errors) = GB18030.decode(bytes); + if !had_errors { + return (Some(cow.into_owned()), Some("GB18030")); + } + + // Try GBK + let (cow, _, had_errors) = GBK.decode(bytes); + if !had_errors { + return (Some(cow.into_owned()), Some("GBK")); + } + + // Try UTF-8 lossy as last resort + let (cow, _, had_errors) = UTF_8.decode(bytes); + if !had_errors { + // UTF-8 decode succeeded via encoding_rs but std::str::from_utf8 + // rejected it (maybe due to BOM or unpaired surrogates); still return it + return (Some(cow.into_owned()), None); + } + + // Check if content looks text-ish (>50% printable) for lossy UTF-8 fallback + let printable_ratio = bytes + .iter() + .filter(|&&b| b.is_ascii_graphic() || b == b' ' || b == b'\n' || b == b'\r' || b == b'\t') + .count() as f64 + / bytes.len().max(1) as f64; + + if printable_ratio > 0.5 { + let text = String::from_utf8_lossy(bytes); + return (Some(text.into_owned()), Some("UTF-8(lossy)")); + } + + (None, None) +} + #[cfg(test)] mod tests { use super::*;