feat: send_session_message 支持非 UTF-8 编码中文文件路径回退
Linux 环境下,当文件名包含非 UTF-8 编码的中文字符(如 GBK) 时,LLM 从 bash ls 输出获取的 UTF-8 路径无法匹配磁盘上的实 际字节,导致文件访问失败。 此提交新增 resolve_attachment_path() 函数,采用目录扫描+ 多编码解码匹配策略: 1. 先按 UTF-8 原样访问(快速路径) 2. 失败后列出父目录,对每个文件尝试 UTF-8/GBK/lossy 解码 与目标文件名比对,匹配成功则返回磁盘实际路径 同时修复 file_name 提取使用 to_string_lossy() 替代 to_str() 避免非 UTF-8 文件名静默丢失。 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
a2c4bf1d8c
commit
0de0b93896
@ -180,25 +180,87 @@ fn validate_context(context: &ToolContext) -> anyhow::Result<()> {
|
|||||||
fn resolve_attachment_path(raw_path: &str) -> PathBuf {
|
fn resolve_attachment_path(raw_path: &str) -> PathBuf {
|
||||||
let path = Path::new(raw_path);
|
let path = Path::new(raw_path);
|
||||||
|
|
||||||
|
tracing::debug!(
|
||||||
|
raw_path = %raw_path,
|
||||||
|
raw_path_bytes = ?raw_path.as_bytes(),
|
||||||
|
"resolve_attachment_path: attempting to resolve path"
|
||||||
|
);
|
||||||
|
|
||||||
// 1. 先按原样(UTF-8)尝试
|
// 1. 先按原样(UTF-8)尝试
|
||||||
if path.exists() {
|
if path.exists() {
|
||||||
|
tracing::debug!(
|
||||||
|
raw_path = %raw_path,
|
||||||
|
"resolve_attachment_path: path exists as-is (UTF-8)"
|
||||||
|
);
|
||||||
return path.to_path_buf();
|
return path.to_path_buf();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tracing::debug!(
|
||||||
|
raw_path = %raw_path,
|
||||||
|
"resolve_attachment_path: path not found as UTF-8, trying directory scan"
|
||||||
|
);
|
||||||
|
|
||||||
// 2. 提取父目录和文件名,列出目录逐项比对
|
// 2. 提取父目录和文件名,列出目录逐项比对
|
||||||
if let (Some(parent), Some(target_filename)) = (path.parent(), path.file_name()) {
|
if let (Some(parent), Some(target_filename)) = (path.parent(), path.file_name()) {
|
||||||
let target_str = target_filename.to_string_lossy();
|
let target_str = target_filename.to_string_lossy();
|
||||||
if let Ok(entries) = std::fs::read_dir(parent) {
|
tracing::debug!(
|
||||||
for entry in entries.flatten() {
|
parent = %parent.display(),
|
||||||
let entry_name = entry.file_name();
|
target_filename = %target_str,
|
||||||
if filename_matches_target(&entry_name, &target_str) {
|
target_filename_bytes = ?target_filename.as_encoded_bytes(),
|
||||||
return entry.path();
|
"resolve_attachment_path: scanning parent directory"
|
||||||
|
);
|
||||||
|
|
||||||
|
match std::fs::read_dir(parent) {
|
||||||
|
Ok(entries) => {
|
||||||
|
let mut entry_count = 0;
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
entry_count += 1;
|
||||||
|
let entry_name = entry.file_name();
|
||||||
|
let is_match = filename_matches_target(&entry_name, &target_str);
|
||||||
|
tracing::trace!(
|
||||||
|
entry_path = %entry.path().display(),
|
||||||
|
entry_name_lossy = %entry_name.to_string_lossy(),
|
||||||
|
entry_name_bytes = ?entry_name.as_encoded_bytes(),
|
||||||
|
is_match = is_match,
|
||||||
|
"resolve_attachment_path: checking entry"
|
||||||
|
);
|
||||||
|
if is_match {
|
||||||
|
tracing::debug!(
|
||||||
|
entry_path = %entry.path().display(),
|
||||||
|
entry_name_lossy = %entry_name.to_string_lossy(),
|
||||||
|
"resolve_attachment_path: MATCH FOUND via encoding fallback"
|
||||||
|
);
|
||||||
|
return entry.path();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
tracing::debug!(
|
||||||
|
parent = %parent.display(),
|
||||||
|
entry_count = entry_count,
|
||||||
|
"resolve_attachment_path: directory scan complete, no match"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
parent = %parent.display(),
|
||||||
|
error = %e,
|
||||||
|
"resolve_attachment_path: failed to read directory"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
tracing::debug!(
|
||||||
|
raw_path = %raw_path,
|
||||||
|
has_parent = path.parent().is_some(),
|
||||||
|
has_filename = path.file_name().is_some(),
|
||||||
|
"resolve_attachment_path: cannot extract parent or filename"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 回退失败,返回原路径(让调用方报错)
|
// 回退失败,返回原路径(让调用方报错)
|
||||||
|
tracing::warn!(
|
||||||
|
raw_path = %raw_path,
|
||||||
|
"resolve_attachment_path: all resolution attempts failed, returning original path"
|
||||||
|
);
|
||||||
path.to_path_buf()
|
path.to_path_buf()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,25 +272,87 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
|
|||||||
use std::os::unix::ffi::OsStrExt;
|
use std::os::unix::ffi::OsStrExt;
|
||||||
let bytes = on_disk_name.as_bytes();
|
let bytes = on_disk_name.as_bytes();
|
||||||
|
|
||||||
// 直接 UTF-8 匹配(最快)
|
tracing::trace!(
|
||||||
if let Ok(decoded) = std::str::from_utf8(bytes) {
|
on_disk_bytes = ?bytes,
|
||||||
return decoded == target;
|
on_disk_bytes_hex = %format_bytes_hex(bytes),
|
||||||
|
target = %target,
|
||||||
|
"filename_matches_target: comparing"
|
||||||
|
);
|
||||||
|
|
||||||
|
// 直接 UTF-8 匹配
|
||||||
|
match std::str::from_utf8(bytes) {
|
||||||
|
Ok(decoded) => {
|
||||||
|
let matches = decoded == target;
|
||||||
|
tracing::trace!(
|
||||||
|
decoded_utf8 = %decoded,
|
||||||
|
matches = matches,
|
||||||
|
"filename_matches_target: UTF-8 decode result"
|
||||||
|
);
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::trace!(
|
||||||
|
utf8_error = %e,
|
||||||
|
"filename_matches_target: not valid UTF-8, trying GBK"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 尝试 GBK/GB18030 解码
|
// 尝试 GBK 解码
|
||||||
let (gbk_decoded, _, had_errors) = GBK.decode(bytes);
|
let (gbk_decoded, _, had_errors) = GBK.decode(bytes);
|
||||||
if !had_errors && gbk_decoded == target {
|
if !had_errors {
|
||||||
return true;
|
let matches = gbk_decoded == target;
|
||||||
|
tracing::debug!(
|
||||||
|
gbk_decoded = %gbk_decoded,
|
||||||
|
target = %target,
|
||||||
|
matches = matches,
|
||||||
|
gbk_decoded_bytes = ?gbk_decoded.as_bytes(),
|
||||||
|
target_bytes = ?target.as_bytes(),
|
||||||
|
"filename_matches_target: GBK decode result"
|
||||||
|
);
|
||||||
|
if matches {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tracing::debug!(
|
||||||
|
gbk_decoded_lossy = %gbk_decoded,
|
||||||
|
had_errors = had_errors,
|
||||||
|
"filename_matches_target: GBK decode had errors"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 回退:lossy 转换比对(处理非 GBK 编码)
|
// 回退:lossy 转换比对
|
||||||
let lossy = String::from_utf8_lossy(bytes);
|
let lossy = String::from_utf8_lossy(bytes);
|
||||||
lossy == target
|
let matches = lossy == target;
|
||||||
|
tracing::debug!(
|
||||||
|
lossy = %lossy,
|
||||||
|
target = %target,
|
||||||
|
matches = matches,
|
||||||
|
"filename_matches_target: lossy fallback result"
|
||||||
|
);
|
||||||
|
matches
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 将字节切片格式化为十六进制字符串,用于调试日志。
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn format_bytes_hex(bytes: &[u8]) -> String {
|
||||||
|
bytes
|
||||||
|
.iter()
|
||||||
|
.map(|b| format!("{:02x}", b))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" ")
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(unix))]
|
#[cfg(not(unix))]
|
||||||
fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool {
|
fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool {
|
||||||
on_disk_name.to_string_lossy() == target
|
let matches = on_disk_name.to_string_lossy() == target;
|
||||||
|
tracing::trace!(
|
||||||
|
on_disk_lossy = %on_disk_name.to_string_lossy(),
|
||||||
|
target = %target,
|
||||||
|
matches = matches,
|
||||||
|
"filename_matches_target (non-unix): comparing"
|
||||||
|
);
|
||||||
|
matches
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>> {
|
fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>> {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user