feat: 添加附件路径解析功能,支持非 UTF-8 编码文件名的回退机制
This commit is contained in:
parent
a4cdb31ba0
commit
0646a17073
@ -1,16 +1,20 @@
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use async_trait::async_trait;
|
||||
use base64::Engine;
|
||||
use encoding_rs::GBK;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::bus::MediaItem;
|
||||
|
||||
use super::traits::{Tool, ToolContext, ToolResult};
|
||||
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::ffi::OsStringExt;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SessionSendRequest {
|
||||
pub text: Option<String>,
|
||||
@ -165,6 +169,42 @@ fn validate_context(context: &ToolContext) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 解析附件文件路径,支持非 UTF-8 编码的文件名回退。
|
||||
///
|
||||
/// 在 Linux 上,文件名可能是 GBK 等非 UTF-8 编码。当 LLM 用 UTF-8 字符串
|
||||
/// 引用这些文件时,直接访问会失败。此函数先按原样(UTF-8)尝试访问,
|
||||
/// 如果文件不存在,则将文件名部分用 GBK 重新编码后再尝试。
|
||||
#[cfg_attr(not(unix), allow(unused_variables))]
|
||||
fn resolve_attachment_path(raw_path: &str) -> PathBuf {
|
||||
let path = Path::new(raw_path);
|
||||
|
||||
// 先按原样(UTF-8)尝试
|
||||
if path.exists() {
|
||||
return path.to_path_buf();
|
||||
}
|
||||
|
||||
// 提取父目录和文件名,只对文件名做编码回退
|
||||
if let (Some(parent), Some(filename_os)) = (path.parent(), path.file_name()) {
|
||||
let filename_str = filename_os.to_string_lossy();
|
||||
|
||||
// 尝试 GBK 编码回退
|
||||
let (gbk_bytes, _, had_errors) = GBK.encode(&filename_str);
|
||||
if !had_errors {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let os_filename = std::ffi::OsString::from_vec(gbk_bytes.into_owned());
|
||||
let resolved = parent.join(os_filename);
|
||||
if resolved.exists() {
|
||||
return resolved;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 回退失败,返回原路径(让调用方报错)
|
||||
path.to_path_buf()
|
||||
}
|
||||
|
||||
fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>> {
|
||||
// 支持两种格式:实际数组 或 字符串化的 JSON 数组
|
||||
let paths = if let Some(arr) = value.as_array() {
|
||||
@ -198,7 +238,11 @@ fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>
|
||||
|
||||
let mut attachments = Vec::with_capacity(paths.len());
|
||||
for raw_path in paths {
|
||||
let metadata = std::fs::metadata(&raw_path)
|
||||
// 解析路径(含编码回退),确保能正确访问非 UTF-8 文件名的文件
|
||||
let resolved_path = resolve_attachment_path(&raw_path);
|
||||
let resolved_path_str = resolved_path.to_string_lossy().to_string();
|
||||
|
||||
let metadata = std::fs::metadata(&resolved_path)
|
||||
.map_err(|err| anyhow!("failed to access attachment '{}': {}", raw_path, err))?;
|
||||
if !metadata.is_file() {
|
||||
return Err(anyhow!("attachment path is not a file: {}", raw_path));
|
||||
@ -209,21 +253,20 @@ fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>
|
||||
|
||||
let content_base64 = (metadata.len() <= 50 * 1024 * 1024)
|
||||
.then(|| {
|
||||
let mut file = std::fs::File::open(&raw_path)?;
|
||||
let mut file = std::fs::File::open(&resolved_path)?;
|
||||
let mut buf = Vec::with_capacity(metadata.len() as usize);
|
||||
file.read_to_end(&mut buf)?;
|
||||
Ok::<_, anyhow::Error>(base64::engine::general_purpose::STANDARD.encode(&buf))
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let file_name = Path::new(&raw_path)
|
||||
let file_name = Path::new(&resolved_path)
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.map(ToOwned::to_owned);
|
||||
.map(|n| n.to_string_lossy().to_string());
|
||||
|
||||
let media_type = infer_media_type(&raw_path);
|
||||
let mut item = MediaItem::new(raw_path.to_string(), media_type);
|
||||
item.mime_type = mime_guess::from_path(&raw_path)
|
||||
let media_type = infer_media_type(&resolved_path_str);
|
||||
let mut item = MediaItem::new(resolved_path_str, media_type);
|
||||
item.mime_type = mime_guess::from_path(&resolved_path)
|
||||
.first_raw()
|
||||
.map(ToOwned::to_owned);
|
||||
item.content_base64 = content_base64;
|
||||
@ -371,4 +414,36 @@ mod tests {
|
||||
assert_eq!(attachments.len(), 1);
|
||||
assert_eq!(attachments[0].media_type, "file");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn parse_attachments_resolves_gbk_encoded_filename() {
|
||||
use encoding_rs::GBK;
|
||||
use std::os::unix::ffi::OsStringExt;
|
||||
|
||||
// UTF-8 中文文件名
|
||||
let utf8_filename = "测试文件.txt";
|
||||
|
||||
// 编码为 GBK 字节
|
||||
let (gbk_bytes, _, had_errors) = GBK.encode(utf8_filename);
|
||||
assert!(!had_errors);
|
||||
|
||||
// 在临时目录中创建 GBK 编码文件名的文件
|
||||
let tmp_dir = tempfile::tempdir().unwrap();
|
||||
let gbk_osstring = std::ffi::OsString::from_vec(gbk_bytes.into_owned());
|
||||
let gbk_path = tmp_dir.path().join(gbk_osstring);
|
||||
|
||||
std::fs::write(&gbk_path, b"test content").unwrap();
|
||||
|
||||
// 用 UTF-8 路径调用 parse_attachments
|
||||
let utf8_path = tmp_dir.path().join(utf8_filename);
|
||||
let utf8_path_str = utf8_path.to_string_lossy().to_string();
|
||||
|
||||
let attachments = parse_attachments(&json!([utf8_path_str])).unwrap();
|
||||
|
||||
assert_eq!(attachments.len(), 1);
|
||||
assert_eq!(attachments[0].media_type, "file");
|
||||
// 验证文件名能正确提取(用 lossy 方式,因为是 GBK 编码)
|
||||
assert!(attachments[0].file_name.is_some());
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user