From 694b3ce0e01fb0d349851cf9e5ce5aa132d317aa Mon Sep 17 00:00:00 2001 From: oudecheng <13802883547@139.com> Date: Thu, 11 Jun 2026 09:44:47 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20send=5Fsession=5Fmessage=20=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E8=AF=A6=E7=BB=86=20debug=20=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E5=92=8C=E6=96=87=E4=BB=B6=E5=90=8D=E5=BD=92=E4=B8=80=E5=8C=96?= =?UTF-8?q?=E5=8C=B9=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - filename_matches_target 关键日志从 trace 升级到 debug - 增加 on-disk bytes hex dump 输出,便于定位编码差异 - UTF-8 解码成功但不匹配时继续尝试 GBK 解码 - 新增 normalize_filename() 去除空白/零宽字符后模糊比对 解决 LLM 在中文文件名中多插空格的问题 Co-Authored-By: Claude Opus 4.8 --- src/tools/session_send.rs | 75 +++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/src/tools/session_send.rs b/src/tools/session_send.rs index e16fbeb..ce4573e 100644 --- a/src/tools/session_send.rs +++ b/src/tools/session_send.rs @@ -272,28 +272,36 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool use std::os::unix::ffi::OsStrExt; let bytes = on_disk_name.as_bytes(); - tracing::trace!( - on_disk_bytes = ?bytes, + tracing::debug!( on_disk_bytes_hex = %format_bytes_hex(bytes), target = %target, - "filename_matches_target: comparing" + target_bytes_hex = %format_bytes_hex(target.as_bytes()), + "filename_matches_target: comparing on-disk bytes with target" ); // 直接 UTF-8 匹配 match std::str::from_utf8(bytes) { Ok(decoded) => { let matches = decoded == target; - tracing::trace!( + tracing::debug!( decoded_utf8 = %decoded, + decoded_len = decoded.len(), + target_len = target.len(), matches = matches, "filename_matches_target: UTF-8 decode result" ); - return matches; + if matches { + return true; + } + // UTF-8 匹配失败,继续尝试其他编码 — 可能磁盘上是 GBK + tracing::debug!( + "filename_matches_target: UTF-8 matched but strings differ, trying GBK decode" + ); } Err(e) => { - tracing::trace!( + tracing::debug!( utf8_error = %e, - "filename_matches_target: not valid UTF-8, trying GBK" + "filename_matches_target: not valid UTF-8, trying GBK decode" ); } } @@ -304,15 +312,26 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool let matches = gbk_decoded == target; tracing::debug!( gbk_decoded = %gbk_decoded, + gbk_decoded_len = gbk_decoded.len(), target = %target, + target_len = target.len(), matches = matches, - gbk_decoded_bytes = ?gbk_decoded.as_bytes(), - target_bytes = ?target.as_bytes(), "filename_matches_target: GBK decode result" ); if matches { return true; } + // GBK 解码成功但不匹配,尝试归一化后比对 + let normalized_disk = normalize_filename(&gbk_decoded); + let normalized_target = normalize_filename(target); + if normalized_disk == normalized_target { + tracing::debug!( + normalized_disk = %normalized_disk, + normalized_target = %normalized_target, + "filename_matches_target: matched after normalization" + ); + return true; + } } else { tracing::debug!( gbk_decoded_lossy = %gbk_decoded, @@ -330,7 +349,43 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool matches = matches, "filename_matches_target: lossy fallback result" ); - matches + if matches { + return true; + } + + // 最后尝试:lossy 归一化比对 + let normalized_lossy = normalize_filename(&lossy); + let normalized_target = normalize_filename(target); + if normalized_lossy == normalized_target { + tracing::debug!( + normalized_lossy = %normalized_lossy, + normalized_target = %normalized_target, + "filename_matches_target: matched after lossy normalization" + ); + return true; + } + + false +} + +/// 对文件名做归一化处理:去除不可见字符和空白字符差异,便于模糊匹配。 +/// +/// LLM 有时会在中文文件名中插入空格(如 "139 邮箱" vs "139邮箱"), +/// 此函数去掉所有空白字符和零宽字符,只比对有意义的文字部分。 +#[cfg(unix)] +fn normalize_filename(s: &str) -> String { + s.chars() + .filter(|c| match *c { + // 过滤 ASCII 空白 + ' ' | '\t' | '\n' | '\r' => false, + // 过滤零宽字符 + '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' | '\u{200E}' | '\u{200F}' => false, + // 过滤 Unicode 空白字符 + '\u{00A0}' | '\u{3000}' => false, + // 保留其他所有字符 + _ => true, + }) + .collect() } /// 将字节切片格式化为十六进制字符串,用于调试日志。