fix: send_session_message 增加详细 debug 日志和文件名归一化匹配

- filename_matches_target 关键日志从 trace 升级到 debug - 增加 on-disk bytes hex dump 输出，便于定位编码差异 - UTF-8 解码成功但不匹配时继续尝试 GBK 解码 - 新增 normalize_filename() 去除空白/零宽字符后模糊比对解决 LLM 在中文文件名中多插空格的问题 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 09:44:47 +08:00 · 2026-06-11 09:44:47 +08:00 · 694b3ce0e0
commit 694b3ce0e0
parent 0de0b93896
1 changed files with 65 additions and 10 deletions
--- a/src/tools/session_send.rs
+++ b/src/tools/session_send.rs
@ -272,28 +272,36 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
    use std::os::unix::ffi::OsStrExt;
    let bytes = on_disk_name.as_bytes();
-    tracing::trace!(
+    tracing::debug!(
        on_disk_bytes = ?bytes,
        on_disk_bytes_hex = %format_bytes_hex(bytes),
        target = %target,
-        "filename_matches_target: comparing"
+        target_bytes_hex = %format_bytes_hex(target.as_bytes()),
        "filename_matches_target: comparing on-disk bytes with target"
    );
    // 直接 UTF-8 匹配
    match std::str::from_utf8(bytes) {
        Ok(decoded) => {
            let matches = decoded == target;
-            tracing::trace!(
+            tracing::debug!(
                decoded_utf8 = %decoded,
                decoded_len = decoded.len(),
                target_len = target.len(),
                matches = matches,
                "filename_matches_target: UTF-8 decode result"
            );
-            return matches;
+            if matches {
                return true;
            }
            // UTF-8 匹配失败，继续尝试其他编码 — 可能磁盘上是 GBK
            tracing::debug!(
                "filename_matches_target: UTF-8 matched but strings differ, trying GBK decode"
            );
        }
        Err(e) => {
-            tracing::trace!(
+            tracing::debug!(
                utf8_error = %e,
-                "filename_matches_target: not valid UTF-8, trying GBK"
+                "filename_matches_target: not valid UTF-8, trying GBK decode"
            );
        }
    }
@ -304,15 +312,26 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
        let matches = gbk_decoded == target;
        tracing::debug!(
            gbk_decoded = %gbk_decoded,
            gbk_decoded_len = gbk_decoded.len(),
            target = %target,
            target_len = target.len(),
            matches = matches,
            gbk_decoded_bytes = ?gbk_decoded.as_bytes(),
            target_bytes = ?target.as_bytes(),
            "filename_matches_target: GBK decode result"
        );
        if matches {
            return true;
        }
        // GBK 解码成功但不匹配，尝试归一化后比对
        let normalized_disk = normalize_filename(&gbk_decoded);
        let normalized_target = normalize_filename(target);
        if normalized_disk == normalized_target {
            tracing::debug!(
                normalized_disk = %normalized_disk,
                normalized_target = %normalized_target,
                "filename_matches_target: matched after normalization"
            );
            return true;
        }
    } else {
        tracing::debug!(
            gbk_decoded_lossy = %gbk_decoded,
@ -330,7 +349,43 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
        matches = matches,
        "filename_matches_target: lossy fallback result"
    );
-    matches
+    if matches {
        return true;
    }
    // 最后尝试：lossy 归一化比对
    let normalized_lossy = normalize_filename(&lossy);
    let normalized_target = normalize_filename(target);
    if normalized_lossy == normalized_target {
        tracing::debug!(
            normalized_lossy = %normalized_lossy,
            normalized_target = %normalized_target,
            "filename_matches_target: matched after lossy normalization"
        );
        return true;
    }
    false
 }
 /// 对文件名做归一化处理：去除不可见字符和空白字符差异，便于模糊匹配。
 ///
 /// LLM 有时会在中文文件名中插入空格（如 "139 邮箱" vs "139邮箱"），
 /// 此函数去掉所有空白字符和零宽字符，只比对有意义的文字部分。
 #[cfg(unix)]
 fn normalize_filename(s: &str) -> String {
    s.chars()
        .filter(|c| match *c {
            // 过滤 ASCII 空白
            ' ' | '\t' | '\n' | '\r' => false,
            // 过滤零宽字符
            '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' | '\u{200E}' | '\u{200F}' => false,
            // 过滤 Unicode 空白字符
            '\u{00A0}' | '\u{3000}' => false,
            // 保留其他所有字符
            _ => true,
        })
        .collect()
 }
 /// 将字节切片格式化为十六进制字符串，用于调试日志。