Compare commits
No commits in common. "b84c6f85dbaf5efc19510b66ab5047ae013c2138" and "b85578a7d23c0a2e9bc46a45084c809d8037c1ca" have entirely different histories.
b84c6f85db
...
b85578a7d2
@ -1,156 +0,0 @@
|
||||
# PicoBot 工具说明
|
||||
|
||||
## send_message — 向指定渠道发送消息
|
||||
|
||||
向指定会话发送消息,可附带文件或图片。
|
||||
|
||||
### 参数
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `target_chat_id` | 是 | 目标会话 ID,格式 `<channel>:<chat_id>` 或 `<channel>:<chat_id>:<dialog_id>` |
|
||||
| `content` | 是 | 消息文本内容 |
|
||||
| `files` | 否 | 文件路径列表 |
|
||||
| `origin` | 否 | 消息来源标识,不填则自动使用当前 session_id |
|
||||
| `file_types` | 否 | 指定文件发送类型,`{"路径": "audio"|"file"}`。未指定则自动判断 |
|
||||
|
||||
### file_types 说明
|
||||
|
||||
控制文件以何种消息类型发送,主要用于飞书渠道:
|
||||
|
||||
- `"audio"`:作为语音消息发送(仅 opus 格式支持)
|
||||
- `"file"`:作为文件附件发送
|
||||
|
||||
飞书渠道限制:上传类型和消息类型必须一致。opus 文件以 `"audio"` 发送,其他音频(mp3、wav 等)只能以 `"file"` 发送。
|
||||
|
||||
### 示例
|
||||
|
||||
```json
|
||||
{
|
||||
"target_chat_id": "feishu:oc_abc123",
|
||||
"content": "这是生成的音乐文件",
|
||||
"files": ["/workspace/music.mp3"],
|
||||
"file_types": {"/workspace/music.mp3": "file"}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## chat_manager — 会话管理
|
||||
|
||||
查看和管理会话及消息。
|
||||
|
||||
### 参数
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `action` | 是 | 操作: `list_sessions`, `list_channels`, `list_messages` |
|
||||
| `session_id` | 部分 | `list_messages` 时必填 |
|
||||
| `count` | 否 | 返回数量(默认 20,最大 100) |
|
||||
| `offset` | 否 | 跳过前 N 条,用于翻页 |
|
||||
| `before_time` | 否 | Unix 时间戳(秒),返回该时间之前的消息 |
|
||||
| `after_time` | 否 | Unix 时间戳(秒),返回该时间之后的消息 |
|
||||
|
||||
---
|
||||
|
||||
## cron — 定时任务管理
|
||||
|
||||
管理 cron 定时任务。
|
||||
|
||||
### 参数
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `action` | 是 | 操作: `add`, `list`, `update`, `remove`, `enable`, `disable` |
|
||||
| `name` | add必须 | 任务名称 |
|
||||
| `schedule` | add需要 | 调度规则: `once`(时间戳), `every`(间隔秒), `cron`(表达式) |
|
||||
| `prompt` | add必须 | 任务提示词 |
|
||||
| `channel` | add必须 | 执行渠道 |
|
||||
| `chat_id` | add必须 | 目标对话 |
|
||||
|
||||
---
|
||||
|
||||
## memory_store — 存储记忆
|
||||
|
||||
写入长期记忆(Knowledge 类别)。
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `key` | 是 | 记忆唯一键,同 key 覆盖旧值 |
|
||||
| `content` | 是 | 记忆内容 |
|
||||
| `importance` | 否 | 重要性 (0.0–1.0) |
|
||||
|
||||
## memory_recall — 搜索知识记忆
|
||||
|
||||
关键词全文搜索 Knowledge 记忆。
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `query` | 是 | 空格分隔的关键词列表 |
|
||||
| `since` | 否 | 起始时间戳(unix 毫秒) |
|
||||
| `until` | 否 | 结束时间戳 |
|
||||
| `limit` | 否 | 返回数量(默认 10) |
|
||||
|
||||
## timeline_recall — 搜索时间线
|
||||
|
||||
搜索压缩后的历史会话摘要。
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `query` | 是 | 关键词 |
|
||||
| `session_id` | 否 | 限定会话 |
|
||||
| `since` | 否 | 起始时间 |
|
||||
| `until` | 否 | 结束时间 |
|
||||
| `limit` | 否 | 返回数量 |
|
||||
|
||||
## memory_forget — 删除记忆
|
||||
|
||||
按 key 删除记忆。
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `key` | 是 | 要删除的记忆键 |
|
||||
|
||||
---
|
||||
|
||||
## get_skill — 获取 Skill
|
||||
|
||||
查询 skill 内容。
|
||||
|
||||
| 参数 | 必填 | 说明 |
|
||||
|------|------|------|
|
||||
| `action` | 否 | 操作: `get`(默认), `list` |
|
||||
| `skill_name` | get时必填 | Skill 名称 |
|
||||
|
||||
---
|
||||
|
||||
## file_read / file_write / file_edit / file_search — 文件操作
|
||||
|
||||
工作目录内的文件读写编辑和搜索。详细的参数定义见各工具的 parameters_schema。
|
||||
|
||||
## bash — 执行命令
|
||||
|
||||
在本地环境执行 bash 命令,有超时限制和安全检查(阻止 rm -rf /、fork bomb 等)。
|
||||
|
||||
## http_request / web_fetch — HTTP 和 Web 工具
|
||||
|
||||
发送 HTTP 请求和获取网页内容,有 URL 安全校验(阻止内网/本地访问)。
|
||||
|
||||
## calculator — 计算器
|
||||
|
||||
数学表达式计算和统计函数。
|
||||
|
||||
| action | 说明 |
|
||||
|--------|------|
|
||||
| `evaluate` | 计算表达式 |
|
||||
| `sum` | 求和 |
|
||||
| `average` | 平均值 |
|
||||
| `median` | 中位数 |
|
||||
| `mode` | 众数 |
|
||||
| `stdev` / `variance` | 标准差/方差 |
|
||||
| `min` / `max` | 最小值/最大值 |
|
||||
| `log` | 对数 |
|
||||
| `factorial` | 阶乘 |
|
||||
| `round` | 四舍五入 |
|
||||
| `percentage_change` | 变化百分比 |
|
||||
| `percentile` | 百分位数 |
|
||||
@ -5,7 +5,7 @@ use std::time::Duration;
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use base64::Engine;
|
||||
use fantoccini::actions::{InputSource, MouseActions, PointerAction, MOUSE_BUTTON_LEFT};
|
||||
use fantoccini::actions::{InputSource, MouseActions, PointerAction};
|
||||
use fantoccini::key::Key;
|
||||
use fantoccini::{Client, ClientBuilder, Locator};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -70,11 +70,11 @@ pub enum BrowserAction {
|
||||
#[serde(default)]
|
||||
compact: bool,
|
||||
#[serde(default)]
|
||||
depth: Option<i64>,
|
||||
depth: Option<u32>,
|
||||
},
|
||||
Click { selector: String },
|
||||
Fill { selector: String, value: String },
|
||||
Type { selector: Option<String>, text: String },
|
||||
Type { selector: String, text: String },
|
||||
GetText { selector: String },
|
||||
GetTitle,
|
||||
GetUrl,
|
||||
@ -82,9 +82,8 @@ pub enum BrowserAction {
|
||||
#[serde(default)]
|
||||
path: Option<String>,
|
||||
#[serde(default)]
|
||||
return_base64: bool,
|
||||
full_page: bool,
|
||||
},
|
||||
Focus { selector: String },
|
||||
Wait {
|
||||
#[serde(default)]
|
||||
selector: Option<String>,
|
||||
@ -95,7 +94,6 @@ pub enum BrowserAction {
|
||||
},
|
||||
Press { key: String },
|
||||
Hover { selector: String },
|
||||
ClickAt { x: u32, y: u32 },
|
||||
Scroll {
|
||||
direction: String,
|
||||
#[serde(default)]
|
||||
@ -126,7 +124,8 @@ fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result<Browse
|
||||
.unwrap_or(true),
|
||||
depth: args
|
||||
.get("depth")
|
||||
.and_then(|v| v.as_i64()),
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|d| u32::try_from(d).unwrap_or(u32::MAX)),
|
||||
}),
|
||||
"click" => {
|
||||
let selector = args
|
||||
@ -155,13 +154,13 @@ fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result<Browse
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string());
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for type"))?;
|
||||
let text = args
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'text' for type"))?;
|
||||
Ok(BrowserAction::Type {
|
||||
selector,
|
||||
selector: selector.to_string(),
|
||||
text: text.to_string(),
|
||||
})
|
||||
}
|
||||
@ -178,20 +177,11 @@ fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result<Browse
|
||||
"get_url" => Ok(BrowserAction::GetUrl),
|
||||
"screenshot" => Ok(BrowserAction::Screenshot {
|
||||
path: args.get("path").and_then(|v| v.as_str()).map(String::from),
|
||||
return_base64: args
|
||||
.get("return_base64")
|
||||
full_page: args
|
||||
.get("full_page")
|
||||
.and_then(Value::as_bool)
|
||||
.unwrap_or(false),
|
||||
}),
|
||||
"focus" => {
|
||||
let selector = args
|
||||
.get("selector")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'selector' for focus"))?;
|
||||
Ok(BrowserAction::Focus {
|
||||
selector: selector.to_string(),
|
||||
})
|
||||
}
|
||||
"wait" => Ok(BrowserAction::Wait {
|
||||
selector: args
|
||||
.get("selector")
|
||||
@ -235,17 +225,6 @@ fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result<Browse
|
||||
})
|
||||
}
|
||||
"close" => Ok(BrowserAction::Close),
|
||||
"click_at" => {
|
||||
let x = args
|
||||
.get("x")
|
||||
.and_then(|v| v.as_u64())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'x' for click_at"))? as u32;
|
||||
let y = args
|
||||
.get("y")
|
||||
.and_then(|v| v.as_u64())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing 'y' for click_at"))? as u32;
|
||||
Ok(BrowserAction::ClickAt { x, y })
|
||||
}
|
||||
other => anyhow::bail!("Unsupported browser action: {}", other),
|
||||
}
|
||||
}
|
||||
@ -257,18 +236,10 @@ impl Tool for BrowserTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Automate browser interactions via WebDriver. \
|
||||
Actions: open, snapshot, click, fill, type, get_text, get_title, \
|
||||
get_url, screenshot, wait, press, hover, scroll, close, focus, click_at. \
|
||||
Each session holds a single page; calling open again navigates \
|
||||
the current page (does not open a new tab). \
|
||||
Selectors: CSS, @e1 refs (from snapshot), text=... for text content, \
|
||||
label=... for <label> association. \
|
||||
Use focus to set focus on an element before typing or filling. \
|
||||
After click_at focuses an element, use type without selector \
|
||||
to type into the active element. \
|
||||
Limitations: press supports single keys only (no Ctrl+/Shift+ combos); \
|
||||
screenshot captures the visible viewport only."
|
||||
"Automate browser interactions using WebDriver. \
|
||||
First call open to navigate to a URL, then use other actions. \
|
||||
Use snapshot to get an accessibility tree of the page. \
|
||||
Selectors can be CSS, @e1 refs (from snapshot), text=..., or label=..."
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> Value {
|
||||
@ -281,69 +252,61 @@ impl Tool for BrowserTool {
|
||||
"enum": [
|
||||
"open", "snapshot", "click", "fill", "type",
|
||||
"get_text", "get_title", "get_url", "screenshot",
|
||||
"wait", "press", "hover", "scroll", "close", "focus", "click_at"
|
||||
"wait", "press", "hover", "scroll", "close"
|
||||
]
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "(open) URL to navigate to"
|
||||
"description": "URL to navigate to (required for open)"
|
||||
},
|
||||
"selector": {
|
||||
"type": "string",
|
||||
"description": "(click/fill/type/get_text/hover/wait/focus) CSS selector, @e1 ref, text=..., or label=... Omit for type to target active element after click_at."
|
||||
"description": "CSS selector, @e1 ref, text=..., or label=..."
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "(fill) Value to fill. On dynamic sites, try click or focus first to make element interactable."
|
||||
"description": "Value to fill into a form field"
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "(type/wait) Text to append or wait for. For type, selector optional — if omitted, types into active element."
|
||||
"description": "Text to type or wait for"
|
||||
},
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "(press) Key to press. Single keys only: Enter, Tab, Escape, Backspace, Delete, ArrowUp, ArrowDown, Space, etc."
|
||||
"description": "Key to press (Enter, Tab, Escape, Backspace, Delete, ArrowUp, ArrowDown, etc.)"
|
||||
},
|
||||
"direction": {
|
||||
"type": "string",
|
||||
"description": "(scroll) Scroll direction",
|
||||
"description": "Scroll direction",
|
||||
"enum": ["up", "down", "left", "right"]
|
||||
},
|
||||
"pixels": {
|
||||
"type": "integer",
|
||||
"description": "(scroll) Pixels to scroll (default 600)"
|
||||
"description": "Pixels to scroll (default 600)"
|
||||
},
|
||||
"ms": {
|
||||
"type": "integer",
|
||||
"description": "(wait) Milliseconds to wait"
|
||||
"description": "Milliseconds to wait (for wait action)"
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "(screenshot) File path to save screenshot. Omit to auto-save to /tmp/picobot_screenshot_<ts>.png"
|
||||
"description": "File path to save screenshot to. If omitted, returns base64."
|
||||
},
|
||||
"return_base64": {
|
||||
"full_page": {
|
||||
"type": "boolean",
|
||||
"description": "(screenshot) Also return image as base64 data URI"
|
||||
"description": "Take full-page screenshot"
|
||||
},
|
||||
"interactive_only": {
|
||||
"type": "boolean",
|
||||
"description": "(snapshot) Only show interactive elements (default true)"
|
||||
"description": "Only show interactive elements in snapshot (default true)"
|
||||
},
|
||||
"compact": {
|
||||
"type": "boolean",
|
||||
"description": "(snapshot) Compact output mode (default true)"
|
||||
"description": "Compact snapshot output (default true)"
|
||||
},
|
||||
"depth": {
|
||||
"type": "integer",
|
||||
"description": "(snapshot) Max DOM depth for traversal"
|
||||
},
|
||||
"x": {
|
||||
"type": "integer",
|
||||
"description": "(click_at) X coordinate in viewport pixels"
|
||||
},
|
||||
"y": {
|
||||
"type": "integer",
|
||||
"description": "(click_at) Y coordinate in viewport pixels"
|
||||
"description": "Max depth for snapshot traversal"
|
||||
}
|
||||
},
|
||||
"required": ["action"]
|
||||
@ -419,12 +382,6 @@ impl BrowserState {
|
||||
.await?;
|
||||
let client = self.active_client()?;
|
||||
client.goto(&url).await?;
|
||||
let _ = client
|
||||
.execute(
|
||||
"Object.defineProperty(navigator,'webdriver',{get:()=>undefined})",
|
||||
Vec::<Value>::new(),
|
||||
)
|
||||
.await;
|
||||
let current = client.current_url().await?;
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
@ -463,32 +420,8 @@ impl BrowserState {
|
||||
BrowserAction::Fill { selector, value } => {
|
||||
let client = self.active_client()?;
|
||||
let el = find_element(client, &selector).await?;
|
||||
let fill_result = async {
|
||||
let _ = el.clear().await;
|
||||
tokio::time::sleep(Duration::from_millis(300)).await;
|
||||
el.send_keys(&value).await
|
||||
}
|
||||
.await;
|
||||
match fill_result {
|
||||
Ok(()) => {}
|
||||
Err(ref e) if format!("{e}").to_lowercase().contains("not interactable") => {
|
||||
tracing::debug!("Fill via WebDriver failed, falling back to JS: {e}");
|
||||
let escaped = value
|
||||
.replace('\\', "\\\\")
|
||||
.replace('\'', "\\'")
|
||||
.replace('\n', "\\n");
|
||||
client
|
||||
.execute(
|
||||
&format!(
|
||||
"arguments[0].value='{escaped}';arguments[0].dispatchEvent(new Event('input',{{bubbles:true}}));arguments[0].dispatchEvent(new Event('change',{{bubbles:true}}));"
|
||||
),
|
||||
vec![serde_json::to_value(&el)?],
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
tracing::debug!(action = "fill", output_len = value.len(), "Browser action completed");
|
||||
el.send_keys(&value).await?;
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: format!("Filled {} with {}", selector, value),
|
||||
@ -497,52 +430,13 @@ impl BrowserState {
|
||||
}
|
||||
BrowserAction::Type { selector, text } => {
|
||||
let client = self.active_client()?;
|
||||
let type_result = match selector {
|
||||
Some(ref sel) => {
|
||||
let el = find_element(client, sel).await?;
|
||||
el.send_keys(&text).await
|
||||
}
|
||||
None => {
|
||||
// Type into active element (useful after click_at)
|
||||
let el = client.active_element().await?;
|
||||
el.send_keys(&text).await
|
||||
}
|
||||
};
|
||||
match type_result {
|
||||
Ok(()) => {}
|
||||
Err(ref e) if format!("{e}").to_lowercase().contains("not interactable") => {
|
||||
tracing::debug!("Type via WebDriver failed, falling back to JS: {e}");
|
||||
let escaped = text
|
||||
.replace('\\', "\\\\")
|
||||
.replace('\'', "\\'")
|
||||
.replace('\n', "\\n");
|
||||
if let Some(ref sel) = selector {
|
||||
let el = find_element(client, sel).await?;
|
||||
client
|
||||
.execute(
|
||||
&format!(
|
||||
"arguments[0].value='{escaped}';arguments[0].dispatchEvent(new Event('input',{{bubbles:true}}));arguments[0].dispatchEvent(new Event('change',{{bubbles:true}}));"
|
||||
),
|
||||
vec![serde_json::to_value(&el)?],
|
||||
)
|
||||
find_element(client, &selector)
|
||||
.await?
|
||||
.send_keys(&text)
|
||||
.await?;
|
||||
} else {
|
||||
client
|
||||
.execute(
|
||||
&format!(
|
||||
"var el=document.activeElement;el.value='{escaped}';el.dispatchEvent(new Event('input',{{bubbles:true}}));el.dispatchEvent(new Event('change',{{bubbles:true}}));"
|
||||
),
|
||||
Vec::<Value>::new(),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
let target = selector.as_deref().unwrap_or("activeElement");
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: format!("Typed {} chars into {}", text.len(), target),
|
||||
output: format!("Typed {} chars into {}", text.len(), selector),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
@ -573,50 +467,29 @@ impl BrowserState {
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
BrowserAction::Screenshot { path, return_base64 } => {
|
||||
BrowserAction::Screenshot { path, full_page } => {
|
||||
let client = self.active_client()?;
|
||||
let png = client.screenshot().await?;
|
||||
let save_path = path.unwrap_or_else(|| {
|
||||
format!(
|
||||
"/tmp/picobot_screenshot_{}.png",
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
)
|
||||
});
|
||||
tokio::fs::write(&save_path, &png).await?;
|
||||
if return_base64 {
|
||||
let _ = full_page;
|
||||
|
||||
match path {
|
||||
Some(p) => {
|
||||
tokio::fs::write(&p, &png).await?;
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: format!("Screenshot saved to {}", p),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
None => {
|
||||
let b64 = base64::engine::general_purpose::STANDARD.encode(&png);
|
||||
tracing::debug!(action = "screenshot", output_len = b64.len(), "Browser action completed");
|
||||
return Ok(ToolResult {
|
||||
success: true,
|
||||
output: format!("Screenshot saved to {}. Base64: data:image/png;base64,{}", save_path, b64),
|
||||
error: None,
|
||||
});
|
||||
}
|
||||
tracing::debug!(action = "screenshot", output_len = save_path.len(), "Browser action completed");
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: format!("Screenshot saved to {}", save_path),
|
||||
output: format!("data:image/png;base64,{}", b64),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
BrowserAction::Focus { selector } => {
|
||||
let client = self.active_client()?;
|
||||
let el = find_element(client, &selector).await?;
|
||||
client
|
||||
.execute(
|
||||
"arguments[0].focus(); arguments[0].scrollIntoView({block:'center'});",
|
||||
vec![serde_json::to_value(el)?],
|
||||
)
|
||||
.await?;
|
||||
tracing::debug!(action = "focus", output_len = selector.len(), "Browser action completed");
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: format!("Focused {}", selector),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
BrowserAction::Wait {
|
||||
selector,
|
||||
@ -686,52 +559,6 @@ impl BrowserState {
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
BrowserAction::ClickAt { x, y } => {
|
||||
let client = self.active_client()?;
|
||||
let actions = MouseActions::new("mouse".to_string())
|
||||
.then(PointerAction::MoveTo {
|
||||
duration: Some(Duration::from_millis(100)),
|
||||
x: x as f64,
|
||||
y: y as f64,
|
||||
})
|
||||
.then(PointerAction::Down {
|
||||
button: MOUSE_BUTTON_LEFT,
|
||||
})
|
||||
.then(PointerAction::Up {
|
||||
button: MOUSE_BUTTON_LEFT,
|
||||
});
|
||||
client.perform_actions(actions).await?;
|
||||
let _ = client.release_actions().await;
|
||||
// Get info about the clicked element
|
||||
let element_info: Value = client
|
||||
.execute(
|
||||
&format!(
|
||||
"(()=>{{var e=document.elementFromPoint({},{x});if(!e)return null;return{{tag:e.tagName.toLowerCase(),id:e.id||null,type:e.type||null,text:(e.value||e.placeholder||e.innerText||'').slice(0,80)}}}})()",
|
||||
x
|
||||
),
|
||||
Vec::<Value>::new(),
|
||||
)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let desc = match element_info.as_object() {
|
||||
Some(info) => {
|
||||
let tag = info.get("tag").and_then(|v| v.as_str()).unwrap_or("?");
|
||||
let id = info.get("id").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let el_type = info.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let text = info.get("text").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let id_str = if id.is_empty() { String::new() } else { format!("#{id}") };
|
||||
let type_str = if el_type.is_empty() { String::new() } else { format!("[type={el_type}]") };
|
||||
let text_str = if text.is_empty() { String::new() } else { format!(" ({text})") };
|
||||
format!("Clicked at ({x},{y}) on <{tag}{id_str}{type_str}>{text_str}")
|
||||
}
|
||||
None => format!("Clicked at ({}, {})", x, y),
|
||||
};
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: desc,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
BrowserAction::Scroll { direction, pixels } => {
|
||||
let client = self.active_client()?;
|
||||
let amount = i64::from(pixels.unwrap_or(600));
|
||||
@ -844,15 +671,7 @@ impl BrowserState {
|
||||
args.push(Value::String("--disable-dev-shm-usage".to_string()));
|
||||
}
|
||||
|
||||
args.push(Value::String("--window-size=1920,1080".to_string()));
|
||||
|
||||
// Anti-bot detection
|
||||
args.push(Value::String(
|
||||
"--disable-blink-features=AutomationControlled".to_string(),
|
||||
));
|
||||
args.push(Value::String(
|
||||
"--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36".to_string(),
|
||||
));
|
||||
args.push(Value::String("--window-size=1280,720".to_string()));
|
||||
|
||||
if let Some(ref binary) = chrome_binary {
|
||||
chrome_options.insert("binary".to_string(), Value::String(binary.clone()));
|
||||
@ -862,11 +681,6 @@ impl BrowserState {
|
||||
chrome_options.insert("args".to_string(), Value::Array(args));
|
||||
}
|
||||
|
||||
chrome_options.insert(
|
||||
"excludeSwitches".to_string(),
|
||||
serde_json::json!(["enable-automation"]),
|
||||
);
|
||||
|
||||
capabilities.insert(
|
||||
"goog:chromeOptions".to_string(),
|
||||
Value::Object(chrome_options),
|
||||
@ -1139,10 +953,7 @@ fn webdriver_key(key: &str) -> String {
|
||||
"pageup" => Key::PageUp.to_string(),
|
||||
"pagedown" => Key::PageDown.to_string(),
|
||||
"space" => " ".to_string(),
|
||||
other => {
|
||||
tracing::warn!("Unrecognized key '{}', this will have no effect (press only supports single named keys)", other);
|
||||
other.to_string()
|
||||
}
|
||||
other => other.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -1170,8 +981,6 @@ fn snapshot_script(interactive_only: bool, compact: bool, depth: Option<i64>) ->
|
||||
}};
|
||||
|
||||
const isInteractive = (el) => {{
|
||||
if (el.disabled || el.readOnly) return false;
|
||||
if (!isVisible(el)) return false;
|
||||
if (el.matches('a,button,input,select,textarea,summary,[role],*[tabindex]')) return true;
|
||||
return typeof el.onclick === 'function';
|
||||
}};
|
||||
@ -1184,7 +993,6 @@ fn snapshot_script(interactive_only: bool, compact: bool, depth: Option<i64>) ->
|
||||
|
||||
const ref = '@e' + (++counter);
|
||||
el.setAttribute('data-zc-ref', ref);
|
||||
const rect = el.getBoundingClientRect();
|
||||
nodes.push({{
|
||||
ref,
|
||||
depth,
|
||||
@ -1193,10 +1001,6 @@ fn snapshot_script(interactive_only: bool, compact: bool, depth: Option<i64>) ->
|
||||
role: el.getAttribute('role'),
|
||||
text,
|
||||
interactive,
|
||||
x: Math.round(rect.x),
|
||||
y: Math.round(rect.y),
|
||||
w: Math.round(rect.width),
|
||||
h: Math.round(rect.height),
|
||||
}});
|
||||
}};
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user