feat: 优化附件路径解析,增加文件名匹配逻辑以支持非 UTF-8 编码
This commit is contained in:
parent
0646a17073
commit
a2c4bf1d8c
@ -5,6 +5,7 @@ use std::sync::Arc;
|
|||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use base64::Engine;
|
use base64::Engine;
|
||||||
|
#[cfg(unix)]
|
||||||
use encoding_rs::GBK;
|
use encoding_rs::GBK;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
@ -12,9 +13,6 @@ use crate::bus::MediaItem;
|
|||||||
|
|
||||||
use super::traits::{Tool, ToolContext, ToolResult};
|
use super::traits::{Tool, ToolContext, ToolResult};
|
||||||
|
|
||||||
#[cfg(unix)]
|
|
||||||
use std::os::unix::ffi::OsStringExt;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SessionSendRequest {
|
pub struct SessionSendRequest {
|
||||||
pub text: Option<String>,
|
pub text: Option<String>,
|
||||||
@ -171,31 +169,30 @@ fn validate_context(context: &ToolContext) -> anyhow::Result<()> {
|
|||||||
|
|
||||||
/// 解析附件文件路径,支持非 UTF-8 编码的文件名回退。
|
/// 解析附件文件路径,支持非 UTF-8 编码的文件名回退。
|
||||||
///
|
///
|
||||||
/// 在 Linux 上,文件名可能是 GBK 等非 UTF-8 编码。当 LLM 用 UTF-8 字符串
|
/// 在 Linux 上,文件名可能是 GBK 等非 UTF-8 编码。当 LLM 通过 bash 工具
|
||||||
/// 引用这些文件时,直接访问会失败。此函数先按原样(UTF-8)尝试访问,
|
/// 的 `ls` 输出获取文件名时(bash 工具会将非 UTF-8 输出解码为 UTF-8),
|
||||||
/// 如果文件不存在,则将文件名部分用 GBK 重新编码后再尝试。
|
/// LLM 使用的是 UTF-8 字符串,与磁盘上的实际字节不匹配。
|
||||||
#[cfg_attr(not(unix), allow(unused_variables))]
|
///
|
||||||
|
/// 此函数的策略是:
|
||||||
|
/// 1. 先按 UTF-8 原样访问(最快路径)
|
||||||
|
/// 2. 如果失败,列出父目录,用多种编码解码磁盘上的文件名字节,
|
||||||
|
/// 与目标文件名比对,找到匹配项后返回实际磁盘路径
|
||||||
fn resolve_attachment_path(raw_path: &str) -> PathBuf {
|
fn resolve_attachment_path(raw_path: &str) -> PathBuf {
|
||||||
let path = Path::new(raw_path);
|
let path = Path::new(raw_path);
|
||||||
|
|
||||||
// 先按原样(UTF-8)尝试
|
// 1. 先按原样(UTF-8)尝试
|
||||||
if path.exists() {
|
if path.exists() {
|
||||||
return path.to_path_buf();
|
return path.to_path_buf();
|
||||||
}
|
}
|
||||||
|
|
||||||
// 提取父目录和文件名,只对文件名做编码回退
|
// 2. 提取父目录和文件名,列出目录逐项比对
|
||||||
if let (Some(parent), Some(filename_os)) = (path.parent(), path.file_name()) {
|
if let (Some(parent), Some(target_filename)) = (path.parent(), path.file_name()) {
|
||||||
let filename_str = filename_os.to_string_lossy();
|
let target_str = target_filename.to_string_lossy();
|
||||||
|
if let Ok(entries) = std::fs::read_dir(parent) {
|
||||||
// 尝试 GBK 编码回退
|
for entry in entries.flatten() {
|
||||||
let (gbk_bytes, _, had_errors) = GBK.encode(&filename_str);
|
let entry_name = entry.file_name();
|
||||||
if !had_errors {
|
if filename_matches_target(&entry_name, &target_str) {
|
||||||
#[cfg(unix)]
|
return entry.path();
|
||||||
{
|
|
||||||
let os_filename = std::ffi::OsString::from_vec(gbk_bytes.into_owned());
|
|
||||||
let resolved = parent.join(os_filename);
|
|
||||||
if resolved.exists() {
|
|
||||||
return resolved;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -205,6 +202,35 @@ fn resolve_attachment_path(raw_path: &str) -> PathBuf {
|
|||||||
path.to_path_buf()
|
path.to_path_buf()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// 检查磁盘上的文件名(OsStr)是否与目标 UTF-8 文件名匹配。
|
||||||
|
///
|
||||||
|
/// 在 Unix 上,尝试用多种编码解码磁盘字节,与目标字符串比对。
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool {
|
||||||
|
use std::os::unix::ffi::OsStrExt;
|
||||||
|
let bytes = on_disk_name.as_bytes();
|
||||||
|
|
||||||
|
// 直接 UTF-8 匹配(最快)
|
||||||
|
if let Ok(decoded) = std::str::from_utf8(bytes) {
|
||||||
|
return decoded == target;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 尝试 GBK/GB18030 解码
|
||||||
|
let (gbk_decoded, _, had_errors) = GBK.decode(bytes);
|
||||||
|
if !had_errors && gbk_decoded == target {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 回退:lossy 转换比对(处理非 GBK 编码)
|
||||||
|
let lossy = String::from_utf8_lossy(bytes);
|
||||||
|
lossy == target
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
fn filename_matches_target(on_disk_name: &std::ffi::OsStr, target: &str) -> bool {
|
||||||
|
on_disk_name.to_string_lossy() == target
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>> {
|
fn parse_attachments(value: &serde_json::Value) -> anyhow::Result<Vec<MediaItem>> {
|
||||||
// 支持两种格式:实际数组 或 字符串化的 JSON 数组
|
// 支持两种格式:实际数组 或 字符串化的 JSON 数组
|
||||||
let paths = if let Some(arr) = value.as_array() {
|
let paths = if let Some(arr) = value.as_array() {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user