From 0646a17073b2ff6037e4e0668b36da69f8cfbe44 Mon Sep 17 00:00:00 2001 From: oudecheng <13802883547@139.com> Date: Wed, 10 Jun 2026 18:07:17 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E9=99=84=E4=BB=B6?= =?UTF-8?q?=E8=B7=AF=E5=BE=84=E8=A7=A3=E6=9E=90=E5=8A=9F=E8=83=BD=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E9=9D=9E=20UTF-8=20=E7=BC=96=E7=A0=81?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=90=8D=E7=9A=84=E5=9B=9E=E9=80=80=E6=9C=BA?= =?UTF-8?q?=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/session_send.rs | 93 +++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 9 deletions(-) diff --git a/src/tools/session_send.rs b/src/tools/session_send.rs index 1a22441..cb5df65 100644 --- a/src/tools/session_send.rs +++ b/src/tools/session_send.rs @@ -1,16 +1,20 @@ use std::io::Read; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::anyhow; use async_trait::async_trait; use base64::Engine; +use encoding_rs::GBK; use serde_json::json; use crate::bus::MediaItem; use super::traits::{Tool, ToolContext, ToolResult}; +#[cfg(unix)] +use std::os::unix::ffi::OsStringExt; + #[derive(Debug, Clone)] pub struct SessionSendRequest { pub text: Option, @@ -165,6 +169,42 @@ fn validate_context(context: &ToolContext) -> anyhow::Result<()> { Ok(()) } +/// 解析附件文件路径,支持非 UTF-8 编码的文件名回退。 +/// +/// 在 Linux 上,文件名可能是 GBK 等非 UTF-8 编码。当 LLM 用 UTF-8 字符串 +/// 引用这些文件时,直接访问会失败。此函数先按原样(UTF-8)尝试访问, +/// 如果文件不存在,则将文件名部分用 GBK 重新编码后再尝试。 +#[cfg_attr(not(unix), allow(unused_variables))] +fn resolve_attachment_path(raw_path: &str) -> PathBuf { + let path = Path::new(raw_path); + + // 先按原样(UTF-8)尝试 + if path.exists() { + return path.to_path_buf(); + } + + // 提取父目录和文件名,只对文件名做编码回退 + if let (Some(parent), Some(filename_os)) = (path.parent(), path.file_name()) { + let filename_str = filename_os.to_string_lossy(); + + // 尝试 GBK 编码回退 + let (gbk_bytes, _, had_errors) = GBK.encode(&filename_str); + if !had_errors { + #[cfg(unix)] + { + let os_filename = std::ffi::OsString::from_vec(gbk_bytes.into_owned()); + let resolved = parent.join(os_filename); + if resolved.exists() { + return resolved; + } + } + } + } + + // 回退失败,返回原路径(让调用方报错) + path.to_path_buf() +} + fn parse_attachments(value: &serde_json::Value) -> anyhow::Result> { // 支持两种格式:实际数组 或 字符串化的 JSON 数组 let paths = if let Some(arr) = value.as_array() { @@ -198,7 +238,11 @@ fn parse_attachments(value: &serde_json::Value) -> anyhow::Result let mut attachments = Vec::with_capacity(paths.len()); for raw_path in paths { - let metadata = std::fs::metadata(&raw_path) + // 解析路径(含编码回退),确保能正确访问非 UTF-8 文件名的文件 + let resolved_path = resolve_attachment_path(&raw_path); + let resolved_path_str = resolved_path.to_string_lossy().to_string(); + + let metadata = std::fs::metadata(&resolved_path) .map_err(|err| anyhow!("failed to access attachment '{}': {}", raw_path, err))?; if !metadata.is_file() { return Err(anyhow!("attachment path is not a file: {}", raw_path)); @@ -209,21 +253,20 @@ fn parse_attachments(value: &serde_json::Value) -> anyhow::Result let content_base64 = (metadata.len() <= 50 * 1024 * 1024) .then(|| { - let mut file = std::fs::File::open(&raw_path)?; + let mut file = std::fs::File::open(&resolved_path)?; let mut buf = Vec::with_capacity(metadata.len() as usize); file.read_to_end(&mut buf)?; Ok::<_, anyhow::Error>(base64::engine::general_purpose::STANDARD.encode(&buf)) }) .transpose()?; - let file_name = Path::new(&raw_path) + let file_name = Path::new(&resolved_path) .file_name() - .and_then(|n| n.to_str()) - .map(ToOwned::to_owned); + .map(|n| n.to_string_lossy().to_string()); - let media_type = infer_media_type(&raw_path); - let mut item = MediaItem::new(raw_path.to_string(), media_type); - item.mime_type = mime_guess::from_path(&raw_path) + let media_type = infer_media_type(&resolved_path_str); + let mut item = MediaItem::new(resolved_path_str, media_type); + item.mime_type = mime_guess::from_path(&resolved_path) .first_raw() .map(ToOwned::to_owned); item.content_base64 = content_base64; @@ -371,4 +414,36 @@ mod tests { assert_eq!(attachments.len(), 1); assert_eq!(attachments[0].media_type, "file"); } + + #[test] + #[cfg(unix)] + fn parse_attachments_resolves_gbk_encoded_filename() { + use encoding_rs::GBK; + use std::os::unix::ffi::OsStringExt; + + // UTF-8 中文文件名 + let utf8_filename = "测试文件.txt"; + + // 编码为 GBK 字节 + let (gbk_bytes, _, had_errors) = GBK.encode(utf8_filename); + assert!(!had_errors); + + // 在临时目录中创建 GBK 编码文件名的文件 + let tmp_dir = tempfile::tempdir().unwrap(); + let gbk_osstring = std::ffi::OsString::from_vec(gbk_bytes.into_owned()); + let gbk_path = tmp_dir.path().join(gbk_osstring); + + std::fs::write(&gbk_path, b"test content").unwrap(); + + // 用 UTF-8 路径调用 parse_attachments + let utf8_path = tmp_dir.path().join(utf8_filename); + let utf8_path_str = utf8_path.to_string_lossy().to_string(); + + let attachments = parse_attachments(&json!([utf8_path_str])).unwrap(); + + assert_eq!(attachments.len(), 1); + assert_eq!(attachments[0].media_type, "file"); + // 验证文件名能正确提取(用 lossy 方式,因为是 GBK 编码) + assert!(attachments[0].file_name.is_some()); + } } \ No newline at end of file