feat: 添加附件路径解析功能,支持非 UTF-8 编码文件名的回退机制
This commit is contained in:
parent
a4cdb31ba0
commit
0646a17073
@ -1,16 +1,20 @@
|
|||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use base64::Engine;
|
use base64::Engine;
|
||||||
|
use encoding_rs::GBK;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::bus::MediaItem;
|
use crate::bus::MediaItem;
|
||||||
|
|
||||||
use super::traits::{Tool, ToolContext, ToolResult};
|
use super::traits::{Tool, ToolContext, ToolResult};
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
use std::os::unix::ffi::OsStringExt;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SessionSendRequest {
|
pub struct SessionSendRequest {
|
||||||
pub text: Option<String>,
|
pub text: Option<String>,
|
||||||
@ -165,6 +169,42 @@ fn validate_context(context: &ToolContext) -> anyhow::Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// 解析附件文件路径,支持非 UTF-8 编码的文件名回退。
|
||||||
|
///
|
||||||
|
/// 在 Linux 上,文件名可能是 GBK 等非 UTF-8 编码。当 LLM 用 UTF-8 字符串
|
||||||
|
/// 引用这些文件时,直接访问会失败。此函数先按原样(UTF-8)尝试访问,
|
||||||
|
/// 如果文件不存在,则将文件名部分用 GBK 重新编码后再尝试。
|
||||||
|
#[cfg_attr(not(unix), allow(unused_variables))]
|
||||||
|
fn resolve_attachment_path(raw_path: &str) -> PathBuf {
|
||||||
|
let path = Path::new(raw_path);
|
||||||
|
|
||||||
|
// 先按原样(UTF-8)尝试
|
||||||
|
if path.exists() {
|
||||||
|
return path.to_path_buf();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 提取父目录和文件名,只对文件名做编码回退
|
||||||
|
if let (Some(parent), Some(filename_os)) = (path.parent(), path.file_name()) {
|
||||||
|
let filename_str = filename_os.to_string_lossy();
|
||||||
|
|
||||||
|
// 尝试 GBK 编码回退
|
||||||
|
let (gbk_bytes, _, had_errors) = GBK.encode(&filename_str);
|
||||||
|
if !had_errors {
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
let os_filename = std::ffi::OsString::from_vec(gbk_bytes.into_owned());
|
||||||
|
let resolved = parent.join(os_filename);
|
||||||
|
if resolved.exists() {
|
||||||
|
return resolved;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 回退失败,返回原路径(让调用方报错)
|
||||||
|
path.to_path_buf()
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>> {
|
fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>> {
|
||||||
// 支持两种格式:实际数组 或 字符串化的 JSON 数组
|
// 支持两种格式:实际数组 或 字符串化的 JSON 数组
|
||||||
let paths = if let Some(arr) = value.as_array() {
|
let paths = if let Some(arr) = value.as_array() {
|
||||||
@ -198,7 +238,11 @@ fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>
|
|||||||
|
|
||||||
let mut attachments = Vec::with_capacity(paths.len());
|
let mut attachments = Vec::with_capacity(paths.len());
|
||||||
for raw_path in paths {
|
for raw_path in paths {
|
||||||
let metadata = std::fs::metadata(&raw_path)
|
// 解析路径(含编码回退),确保能正确访问非 UTF-8 文件名的文件
|
||||||
|
let resolved_path = resolve_attachment_path(&raw_path);
|
||||||
|
let resolved_path_str = resolved_path.to_string_lossy().to_string();
|
||||||
|
|
||||||
|
let metadata = std::fs::metadata(&resolved_path)
|
||||||
.map_err(|err| anyhow!("failed to access attachment '{}': {}", raw_path, err))?;
|
.map_err(|err| anyhow!("failed to access attachment '{}': {}", raw_path, err))?;
|
||||||
if !metadata.is_file() {
|
if !metadata.is_file() {
|
||||||
return Err(anyhow!("attachment path is not a file: {}", raw_path));
|
return Err(anyhow!("attachment path is not a file: {}", raw_path));
|
||||||
@ -209,21 +253,20 @@ fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>
|
|||||||
|
|
||||||
let content_base64 = (metadata.len() <= 50 * 1024 * 1024)
|
let content_base64 = (metadata.len() <= 50 * 1024 * 1024)
|
||||||
.then(|| {
|
.then(|| {
|
||||||
let mut file = std::fs::File::open(&raw_path)?;
|
let mut file = std::fs::File::open(&resolved_path)?;
|
||||||
let mut buf = Vec::with_capacity(metadata.len() as usize);
|
let mut buf = Vec::with_capacity(metadata.len() as usize);
|
||||||
file.read_to_end(&mut buf)?;
|
file.read_to_end(&mut buf)?;
|
||||||
Ok::<_, anyhow::Error>(base64::engine::general_purpose::STANDARD.encode(&buf))
|
Ok::<_, anyhow::Error>(base64::engine::general_purpose::STANDARD.encode(&buf))
|
||||||
})
|
})
|
||||||
.transpose()?;
|
.transpose()?;
|
||||||
|
|
||||||
let file_name = Path::new(&raw_path)
|
let file_name = Path::new(&resolved_path)
|
||||||
.file_name()
|
.file_name()
|
||||||
.and_then(|n| n.to_str())
|
.map(|n| n.to_string_lossy().to_string());
|
||||||
.map(ToOwned::to_owned);
|
|
||||||
|
|
||||||
let media_type = infer_media_type(&raw_path);
|
let media_type = infer_media_type(&resolved_path_str);
|
||||||
let mut item = MediaItem::new(raw_path.to_string(), media_type);
|
let mut item = MediaItem::new(resolved_path_str, media_type);
|
||||||
item.mime_type = mime_guess::from_path(&raw_path)
|
item.mime_type = mime_guess::from_path(&resolved_path)
|
||||||
.first_raw()
|
.first_raw()
|
||||||
.map(ToOwned::to_owned);
|
.map(ToOwned::to_owned);
|
||||||
item.content_base64 = content_base64;
|
item.content_base64 = content_base64;
|
||||||
@ -371,4 +414,36 @@ mod tests {
|
|||||||
assert_eq!(attachments.len(), 1);
|
assert_eq!(attachments.len(), 1);
|
||||||
assert_eq!(attachments[0].media_type, "file");
|
assert_eq!(attachments[0].media_type, "file");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn parse_attachments_resolves_gbk_encoded_filename() {
|
||||||
|
use encoding_rs::GBK;
|
||||||
|
use std::os::unix::ffi::OsStringExt;
|
||||||
|
|
||||||
|
// UTF-8 中文文件名
|
||||||
|
let utf8_filename = "测试文件.txt";
|
||||||
|
|
||||||
|
// 编码为 GBK 字节
|
||||||
|
let (gbk_bytes, _, had_errors) = GBK.encode(utf8_filename);
|
||||||
|
assert!(!had_errors);
|
||||||
|
|
||||||
|
// 在临时目录中创建 GBK 编码文件名的文件
|
||||||
|
let tmp_dir = tempfile::tempdir().unwrap();
|
||||||
|
let gbk_osstring = std::ffi::OsString::from_vec(gbk_bytes.into_owned());
|
||||||
|
let gbk_path = tmp_dir.path().join(gbk_osstring);
|
||||||
|
|
||||||
|
std::fs::write(&gbk_path, b"test content").unwrap();
|
||||||
|
|
||||||
|
// 用 UTF-8 路径调用 parse_attachments
|
||||||
|
let utf8_path = tmp_dir.path().join(utf8_filename);
|
||||||
|
let utf8_path_str = utf8_path.to_string_lossy().to_string();
|
||||||
|
|
||||||
|
let attachments = parse_attachments(&json!([utf8_path_str])).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(attachments.len(), 1);
|
||||||
|
assert_eq!(attachments[0].media_type, "file");
|
||||||
|
// 验证文件名能正确提取(用 lossy 方式,因为是 GBK 编码)
|
||||||
|
assert!(attachments[0].file_name.is_some());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user