From b84c6f85dbaf5efc19510b66ab5047ae013c2138 Mon Sep 17 00:00:00 2001 From: xiaoski Date: Sun, 24 May 2026 16:36:11 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=B5=8F=E8=A7=88=E5=99=A8?= =?UTF-8?q?=E5=B7=A5=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/browser.rs | 314 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 255 insertions(+), 59 deletions(-) diff --git a/src/tools/browser.rs b/src/tools/browser.rs index b1c25f2..d037370 100644 --- a/src/tools/browser.rs +++ b/src/tools/browser.rs @@ -5,7 +5,7 @@ use std::time::Duration; use anyhow::Context; use async_trait::async_trait; use base64::Engine; -use fantoccini::actions::{InputSource, MouseActions, PointerAction}; +use fantoccini::actions::{InputSource, MouseActions, PointerAction, MOUSE_BUTTON_LEFT}; use fantoccini::key::Key; use fantoccini::{Client, ClientBuilder, Locator}; use serde::{Deserialize, Serialize}; @@ -70,11 +70,11 @@ pub enum BrowserAction { #[serde(default)] compact: bool, #[serde(default)] - depth: Option, + depth: Option, }, Click { selector: String }, Fill { selector: String, value: String }, - Type { selector: String, text: String }, + Type { selector: Option, text: String }, GetText { selector: String }, GetTitle, GetUrl, @@ -82,8 +82,9 @@ pub enum BrowserAction { #[serde(default)] path: Option, #[serde(default)] - full_page: bool, + return_base64: bool, }, + Focus { selector: String }, Wait { #[serde(default)] selector: Option, @@ -94,6 +95,7 @@ pub enum BrowserAction { }, Press { key: String }, Hover { selector: String }, + ClickAt { x: u32, y: u32 }, Scroll { direction: String, #[serde(default)] @@ -124,8 +126,7 @@ fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result { let selector = args @@ -154,13 +155,13 @@ fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result anyhow::Result Ok(BrowserAction::GetUrl), "screenshot" => Ok(BrowserAction::Screenshot { path: args.get("path").and_then(|v| v.as_str()).map(String::from), - full_page: args - .get("full_page") + return_base64: args + .get("return_base64") .and_then(Value::as_bool) .unwrap_or(false), }), + "focus" => { + let selector = args + .get("selector") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for focus"))?; + Ok(BrowserAction::Focus { + selector: selector.to_string(), + }) + } "wait" => Ok(BrowserAction::Wait { selector: args .get("selector") @@ -225,6 +235,17 @@ fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result Ok(BrowserAction::Close), + "click_at" => { + let x = args + .get("x") + .and_then(|v| v.as_u64()) + .ok_or_else(|| anyhow::anyhow!("Missing 'x' for click_at"))? as u32; + let y = args + .get("y") + .and_then(|v| v.as_u64()) + .ok_or_else(|| anyhow::anyhow!("Missing 'y' for click_at"))? as u32; + Ok(BrowserAction::ClickAt { x, y }) + } other => anyhow::bail!("Unsupported browser action: {}", other), } } @@ -236,10 +257,18 @@ impl Tool for BrowserTool { } fn description(&self) -> &str { - "Automate browser interactions using WebDriver. \ - First call open to navigate to a URL, then use other actions. \ - Use snapshot to get an accessibility tree of the page. \ - Selectors can be CSS, @e1 refs (from snapshot), text=..., or label=..." + "Automate browser interactions via WebDriver. \ + Actions: open, snapshot, click, fill, type, get_text, get_title, \ + get_url, screenshot, wait, press, hover, scroll, close, focus, click_at. \ + Each session holds a single page; calling open again navigates \ + the current page (does not open a new tab). \ + Selectors: CSS, @e1 refs (from snapshot), text=... for text content, \ + label=... for