fix: send_session_message 增加详细 debug 日志和文件名归一化匹配

- filename_matches_target 关键日志从 trace 升级到 debug - 增加 on-disk bytes hex dump 输出，便于定位编码差异 - UTF-8 解码成功但不匹配时继续尝试 GBK 解码 - 新增 normalize_filename() 去除空白/零宽字符后模糊比对解决 LLM 在中文文件名中多插空格的问题 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 09:44:47 +08:00 · 2026-06-11 09:44:47 +08:00 · 694b3ce0e0
commit 694b3ce0e0
parent 0de0b93896
1 changed files with 65 additions and 10 deletions
--- a/src/tools/session_send.rs
+++ b/src/tools/session_send.rs
@ -272,28 +272,36 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
    use std::os::unix::ffi::OsStrExt;
    let bytes = on_disk_name.as_bytes();

-    tracing::trace!(
-        on_disk_bytes = ?bytes,
+    tracing::debug!(
        on_disk_bytes_hex = %format_bytes_hex(bytes),
        target = %target,
-        "filename_matches_target: comparing"
+        target_bytes_hex = %format_bytes_hex(target.as_bytes()),
+        "filename_matches_target: comparing on-disk bytes with target"
    );

    // 直接 UTF-8 匹配
    match std::str::from_utf8(bytes) {
        Ok(decoded) => {
            let matches = decoded == target;
-            tracing::trace!(
+            tracing::debug!(
                decoded_utf8 = %decoded,
+                decoded_len = decoded.len(),
+                target_len = target.len(),
                matches = matches,
                "filename_matches_target: UTF-8 decode result"
            );
-            return matches;
+            if matches {
+                return true;
+            }
+            // UTF-8 匹配失败，继续尝试其他编码 — 可能磁盘上是 GBK
+            tracing::debug!(
+                "filename_matches_target: UTF-8 matched but strings differ, trying GBK decode"
+            );
        }
        Err(e) => {
-            tracing::trace!(
+            tracing::debug!(
                utf8_error = %e,
-                "filename_matches_target: not valid UTF-8, trying GBK"
+                "filename_matches_target: not valid UTF-8, trying GBK decode"
            );
        }
    }
@ -304,15 +312,26 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
        let matches = gbk_decoded == target;
        tracing::debug!(
            gbk_decoded = %gbk_decoded,
+            gbk_decoded_len = gbk_decoded.len(),
            target = %target,
+            target_len = target.len(),
            matches = matches,
-            gbk_decoded_bytes = ?gbk_decoded.as_bytes(),
-            target_bytes = ?target.as_bytes(),
            "filename_matches_target: GBK decode result"
        );
        if matches {
            return true;
        }
+        // GBK 解码成功但不匹配，尝试归一化后比对
+        let normalized_disk = normalize_filename(&gbk_decoded);
+        let normalized_target = normalize_filename(target);
+        if normalized_disk == normalized_target {
+            tracing::debug!(
+                normalized_disk = %normalized_disk,
+                normalized_target = %normalized_target,
+                "filename_matches_target: matched after normalization"
+            );
+            return true;
+        }
    } else {
        tracing::debug!(
            gbk_decoded_lossy = %gbk_decoded,
@ -330,7 +349,43 @@ fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool
        matches = matches,
        "filename_matches_target: lossy fallback result"
    );
-    matches
+    if matches {
+        return true;
+    }
+
+    // 最后尝试：lossy 归一化比对
+    let normalized_lossy = normalize_filename(&lossy);
+    let normalized_target = normalize_filename(target);
+    if normalized_lossy == normalized_target {
+        tracing::debug!(
+            normalized_lossy = %normalized_lossy,
+            normalized_target = %normalized_target,
+            "filename_matches_target: matched after lossy normalization"
+        );
+        return true;
+    }
+
+    false
+}
+
+/// 对文件名做归一化处理：去除不可见字符和空白字符差异，便于模糊匹配。
+///
+/// LLM 有时会在中文文件名中插入空格（如 "139 邮箱" vs "139邮箱"），
+/// 此函数去掉所有空白字符和零宽字符，只比对有意义的文字部分。
+#[cfg(unix)]
+fn normalize_filename(s: &str) -> String {
+    s.chars()
+        .filter(|c| match *c {
+            // 过滤 ASCII 空白
+            ' ' | '\t' | '\n' | '\r' => false,
+            // 过滤零宽字符
+            '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' | '\u{200E}' | '\u{200F}' => false,
+            // 过滤 Unicode 空白字符
+            '\u{00A0}' | '\u{3000}' => false,
+            // 保留其他所有字符
+            _ => true,
+        })
+        .collect()
 }

 /// 将字节切片格式化为十六进制字符串，用于调试日志。