fix: send_session_message 增加详细 debug 日志和文件名归一化匹配
- filename_matches_target 关键日志从 trace 升级到 debug - 增加 on-disk bytes hex dump 输出,便于定位编码差异 - UTF-8 解码成功但不匹配时继续尝试 GBK 解码 - 新增 normalize_filename() 去除空白/零宽字符后模糊比对 解决 LLM 在中文文件名中多插空格的问题 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
0de0b93896
commit
694b3ce0e0
@ -272,28 +272,36 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
|
|||||||
use std::os::unix::ffi::OsStrExt;
|
use std::os::unix::ffi::OsStrExt;
|
||||||
let bytes = on_disk_name.as_bytes();
|
let bytes = on_disk_name.as_bytes();
|
||||||
|
|
||||||
tracing::trace!(
|
tracing::debug!(
|
||||||
on_disk_bytes = ?bytes,
|
|
||||||
on_disk_bytes_hex = %format_bytes_hex(bytes),
|
on_disk_bytes_hex = %format_bytes_hex(bytes),
|
||||||
target = %target,
|
target = %target,
|
||||||
"filename_matches_target: comparing"
|
target_bytes_hex = %format_bytes_hex(target.as_bytes()),
|
||||||
|
"filename_matches_target: comparing on-disk bytes with target"
|
||||||
);
|
);
|
||||||
|
|
||||||
// 直接 UTF-8 匹配
|
// 直接 UTF-8 匹配
|
||||||
match std::str::from_utf8(bytes) {
|
match std::str::from_utf8(bytes) {
|
||||||
Ok(decoded) => {
|
Ok(decoded) => {
|
||||||
let matches = decoded == target;
|
let matches = decoded == target;
|
||||||
tracing::trace!(
|
tracing::debug!(
|
||||||
decoded_utf8 = %decoded,
|
decoded_utf8 = %decoded,
|
||||||
|
decoded_len = decoded.len(),
|
||||||
|
target_len = target.len(),
|
||||||
matches = matches,
|
matches = matches,
|
||||||
"filename_matches_target: UTF-8 decode result"
|
"filename_matches_target: UTF-8 decode result"
|
||||||
);
|
);
|
||||||
return matches;
|
if matches {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// UTF-8 匹配失败,继续尝试其他编码 — 可能磁盘上是 GBK
|
||||||
|
tracing::debug!(
|
||||||
|
"filename_matches_target: UTF-8 matched but strings differ, trying GBK decode"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
tracing::trace!(
|
tracing::debug!(
|
||||||
utf8_error = %e,
|
utf8_error = %e,
|
||||||
"filename_matches_target: not valid UTF-8, trying GBK"
|
"filename_matches_target: not valid UTF-8, trying GBK decode"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -304,15 +312,26 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
|
|||||||
let matches = gbk_decoded == target;
|
let matches = gbk_decoded == target;
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
gbk_decoded = %gbk_decoded,
|
gbk_decoded = %gbk_decoded,
|
||||||
|
gbk_decoded_len = gbk_decoded.len(),
|
||||||
target = %target,
|
target = %target,
|
||||||
|
target_len = target.len(),
|
||||||
matches = matches,
|
matches = matches,
|
||||||
gbk_decoded_bytes = ?gbk_decoded.as_bytes(),
|
|
||||||
target_bytes = ?target.as_bytes(),
|
|
||||||
"filename_matches_target: GBK decode result"
|
"filename_matches_target: GBK decode result"
|
||||||
);
|
);
|
||||||
if matches {
|
if matches {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// GBK 解码成功但不匹配,尝试归一化后比对
|
||||||
|
let normalized_disk = normalize_filename(&gbk_decoded);
|
||||||
|
let normalized_target = normalize_filename(target);
|
||||||
|
if normalized_disk == normalized_target {
|
||||||
|
tracing::debug!(
|
||||||
|
normalized_disk = %normalized_disk,
|
||||||
|
normalized_target = %normalized_target,
|
||||||
|
"filename_matches_target: matched after normalization"
|
||||||
|
);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
gbk_decoded_lossy = %gbk_decoded,
|
gbk_decoded_lossy = %gbk_decoded,
|
||||||
@ -330,7 +349,43 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
|
|||||||
matches = matches,
|
matches = matches,
|
||||||
"filename_matches_target: lossy fallback result"
|
"filename_matches_target: lossy fallback result"
|
||||||
);
|
);
|
||||||
matches
|
if matches {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 最后尝试:lossy 归一化比对
|
||||||
|
let normalized_lossy = normalize_filename(&lossy);
|
||||||
|
let normalized_target = normalize_filename(target);
|
||||||
|
if normalized_lossy == normalized_target {
|
||||||
|
tracing::debug!(
|
||||||
|
normalized_lossy = %normalized_lossy,
|
||||||
|
normalized_target = %normalized_target,
|
||||||
|
"filename_matches_target: matched after lossy normalization"
|
||||||
|
);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 对文件名做归一化处理:去除不可见字符和空白字符差异,便于模糊匹配。
|
||||||
|
///
|
||||||
|
/// LLM 有时会在中文文件名中插入空格(如 "139 邮箱" vs "139邮箱"),
|
||||||
|
/// 此函数去掉所有空白字符和零宽字符,只比对有意义的文字部分。
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn normalize_filename(s: &str) -> String {
|
||||||
|
s.chars()
|
||||||
|
.filter(|c| match *c {
|
||||||
|
// 过滤 ASCII 空白
|
||||||
|
' ' | '\t' | '\n' | '\r' => false,
|
||||||
|
// 过滤零宽字符
|
||||||
|
'\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' | '\u{200E}' | '\u{200F}' => false,
|
||||||
|
// 过滤 Unicode 空白字符
|
||||||
|
'\u{00A0}' | '\u{3000}' => false,
|
||||||
|
// 保留其他所有字符
|
||||||
|
_ => true,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 将字节切片格式化为十六进制字符串,用于调试日志。
|
/// 将字节切片格式化为十六进制字符串,用于调试日志。
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user