PicoBot/src/providers/anthropic.rs
xiaoski 2dada36bc6 feat: introduce multimodal content handling with media support
- Added ContentBlock enum for multimodal content representation (text, image).
- Enhanced ChatMessage struct to include media references.
- Updated InboundMessage and OutboundMessage to use MediaItem for media handling.
- Implemented media download and upload functionality in FeishuChannel.
- Modified message processing in the gateway to handle media items.
- Improved logging for message processing and media handling in debug mode.
- Refactored message serialization for LLM providers to support content blocks.
2026-04-07 23:09:31 +08:00

246 lines
6.8 KiB
Rust

use async_trait::async_trait;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::bus::message::ContentBlock;
use super::{ChatCompletionRequest, ChatCompletionResponse, LLMProvider, Tool, ToolCall};
use super::traits::Usage;
fn serialize_content_blocks<S>(blocks: &[serde_json::Value], serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&serde_json::to_string(blocks).unwrap_or_else(|_| "[]".to_string()))
}
fn convert_content_blocks(blocks: &[ContentBlock]) -> Vec<serde_json::Value> {
blocks.iter().map(|b| match b {
ContentBlock::Text { text } => {
serde_json::json!({ "type": "text", "text": text })
}
ContentBlock::ImageUrl { image_url } => {
convert_image_url_to_anthropic(&image_url.url)
}
}).collect()
}
fn convert_image_url_to_anthropic(url: &str) -> serde_json::Value {
// data:image/png;base64,... -> Anthropic image block
if let Some(caps) = regex::Regex::new(r"data:(image/\w+);base64,(.+)")
.ok()
.and_then(|re| re.captures(url))
{
let media_type = caps.get(1).map(|m| m.as_str()).unwrap_or("image/png");
let data = caps.get(2).map(|d| d.as_str()).unwrap_or("");
return serde_json::json!({
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": data
}
});
}
// Regular URL -> Anthropic image block with url source
serde_json::json!({
"type": "image",
"source": {
"type": "url",
"url": url
}
})
}
pub struct AnthropicProvider {
client: Client,
name: String,
api_key: String,
base_url: String,
extra_headers: HashMap<String, String>,
model_id: String,
temperature: Option<f32>,
max_tokens: Option<u32>,
model_extra: HashMap<String, serde_json::Value>,
}
impl AnthropicProvider {
pub fn new(
name: String,
api_key: String,
base_url: String,
extra_headers: HashMap<String, String>,
model_id: String,
temperature: Option<f32>,
max_tokens: Option<u32>,
model_extra: HashMap<String, serde_json::Value>,
) -> Self {
Self {
client: Client::new(),
name,
api_key,
base_url,
extra_headers,
model_id,
temperature,
max_tokens,
model_extra,
}
}
}
#[derive(Serialize)]
struct AnthropicRequest {
model: String,
messages: Vec<AnthropicMessage>,
max_tokens: u32,
temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<AnthropicTool>>,
#[serde(flatten)]
extra: HashMap<String, serde_json::Value>,
}
#[derive(Serialize)]
struct AnthropicMessage {
role: String,
#[serde(serialize_with = "serialize_content_blocks")]
content: Vec<serde_json::Value>,
}
#[derive(Serialize)]
struct AnthropicTool {
name: String,
description: String,
input_schema: serde_json::Value,
}
#[derive(Deserialize)]
struct AnthropicResponse {
id: String,
model: String,
content: Vec<AnthropicContent>,
usage: AnthropicUsage,
}
#[derive(Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum AnthropicContent {
Text { text: String },
Thinking { thinking: String },
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value,
},
}
#[derive(Deserialize)]
struct AnthropicUsage {
input_tokens: u32,
output_tokens: u32,
}
#[async_trait]
impl LLMProvider for AnthropicProvider {
async fn chat(
&self,
request: ChatCompletionRequest,
) -> Result<ChatCompletionResponse, Box<dyn std::error::Error + Send + Sync>> {
let url = format!("{}/v1/messages", self.base_url);
let max_tokens = request.max_tokens.or(self.max_tokens).unwrap_or(1024);
let tools = request.tools.map(|tools| {
tools
.iter()
.map(|t: &Tool| AnthropicTool {
name: t.function.name.clone(),
description: t.function.description.clone(),
input_schema: t.function.parameters.clone(),
})
.collect()
});
let body = AnthropicRequest {
model: self.model_id.clone(),
messages: request
.messages
.iter()
.map(|m| AnthropicMessage {
role: m.role.clone(),
content: convert_content_blocks(&m.content),
})
.collect(),
max_tokens,
temperature: request.temperature.or(self.temperature),
tools,
extra: self.model_extra.clone(),
};
let mut req_builder = self
.client
.post(&url)
.header("x-api-key", &self.api_key)
.header("anthropic-version", "2023-06-01")
.header("Content-Type", "application/json");
for (key, value) in &self.extra_headers {
req_builder = req_builder.header(key.as_str(), value.as_str());
}
let resp = req_builder.json(&body).send().await?;
let anthropic_resp: AnthropicResponse = resp.json().await?;
let mut content = String::new();
let mut tool_calls = Vec::new();
for c in &anthropic_resp.content {
match c {
AnthropicContent::Text { text } => {
if !text.is_empty() {
if !content.is_empty() {
content.push('\n');
}
content.push_str(text);
}
}
AnthropicContent::Thinking { .. } => {}
AnthropicContent::ToolUse { id, name, input } => {
tool_calls.push(ToolCall {
id: id.clone(),
name: name.clone(),
arguments: input.clone(),
});
}
}
}
Ok(ChatCompletionResponse {
id: anthropic_resp.id,
model: anthropic_resp.model,
content,
tool_calls,
usage: Usage {
prompt_tokens: anthropic_resp.usage.input_tokens,
completion_tokens: anthropic_resp.usage.output_tokens,
total_tokens: anthropic_resp.usage.input_tokens
+ anthropic_resp.usage.output_tokens,
},
})
}
fn ptype(&self) -> &str {
"anthropic"
}
fn name(&self) -> &str {
&self.name
}
fn model_id(&self) -> &str {
&self.model_id
}
}