A-Mahla
commited on
improve document related shortcut (#15)
Browse files
cua2-core/src/cua2_core/services/agent_utils/desktop_agent.py
CHANGED
|
@@ -172,15 +172,15 @@ class E2BVisionAgent(CodeAgent):
|
|
| 172 |
return f"Typed text: '{clean_text}'"
|
| 173 |
|
| 174 |
@tool
|
| 175 |
-
def press(
|
| 176 |
"""
|
| 177 |
Presses a keyboard key
|
| 178 |
Args:
|
| 179 |
-
|
| 180 |
"""
|
| 181 |
-
self.desktop.press(
|
| 182 |
-
self.logger.log(f"Pressed
|
| 183 |
-
return f"Pressed
|
| 184 |
|
| 185 |
@tool
|
| 186 |
def go_back() -> str:
|
|
|
|
| 172 |
return f"Typed text: '{clean_text}'"
|
| 173 |
|
| 174 |
@tool
|
| 175 |
+
def press(keys: list[str]) -> str:
|
| 176 |
"""
|
| 177 |
Presses a keyboard key
|
| 178 |
Args:
|
| 179 |
+
keys: The keys to press (e.g. ["enter", "space", "backspace", etc.]).
|
| 180 |
"""
|
| 181 |
+
self.desktop.press(keys)
|
| 182 |
+
self.logger.log(f"Pressed keys: {keys}")
|
| 183 |
+
return f"Pressed keys: {keys}"
|
| 184 |
|
| 185 |
@tool
|
| 186 |
def go_back() -> str:
|
cua2-core/src/cua2_core/services/agent_utils/prompt.py
CHANGED
|
@@ -97,6 +97,16 @@ Never manually click the browser icon — use `open_url()` directly for web page
|
|
| 97 |
- For websites: `open_url("https://google.com")`
|
| 98 |
- For applications: `launch("app_name")`
|
| 99 |
- Never manually navigate to apps via clicking icons—use the open tools directly.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
- Complete one atomic action per step: e.g., **click**, **type**, or **wait**.
|
| 101 |
- Never combine multiple tool calls in one step.
|
| 102 |
- Validate that your previous action succeeded before continuing.
|
|
@@ -155,13 +165,14 @@ final_answer("The task is complete and the text 'Hello World' is visible in the
|
|
| 155 |
|
| 156 |
<core_principles>
|
| 157 |
- Think visually and spatially.
|
| 158 |
-
- Always ground your reasoning in what
|
| 159 |
-
- Never assume what
|
| 160 |
- Always check the result of your last action.
|
| 161 |
- Be deliberate, consistent, and patient.
|
| 162 |
- **ALWAYS START** by analyzing if the task requires opening an application or URL. If so, your **first action** must be:
|
| 163 |
- For websites: `open_url("https://google.com")`
|
| 164 |
- For applications: `open("app_name")`
|
| 165 |
- **NEVER** manually navigate to apps via clicking icons—use the open tools directly.
|
|
|
|
| 166 |
</core_principles>
|
| 167 |
""".replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))
|
|
|
|
| 97 |
- For websites: `open_url("https://google.com")`
|
| 98 |
- For applications: `launch("app_name")`
|
| 99 |
- Never manually navigate to apps via clicking icons—use the open tools directly.
|
| 100 |
+
- **For document handling**, prioritize using keyboard shortcuts for common operations instead of clicking menu items:
|
| 101 |
+
- Save document: `press(['ctrl', 's'])`
|
| 102 |
+
- Copy: `press(['ctrl', 'c'])`
|
| 103 |
+
- Paste: `press(['ctrl', 'v'])`
|
| 104 |
+
- Undo: `press(['ctrl', 'z'])`
|
| 105 |
+
- Select all: `press(['ctrl', 'a'])`
|
| 106 |
+
- Find: `press(['ctrl', 'f'])`
|
| 107 |
+
- New document: `press(['ctrl', 'n'])`
|
| 108 |
+
- Open file: `press(['ctrl', 'o'])`
|
| 109 |
+
- These shortcuts are faster, more reliable, and work across most applications.
|
| 110 |
- Complete one atomic action per step: e.g., **click**, **type**, or **wait**.
|
| 111 |
- Never combine multiple tool calls in one step.
|
| 112 |
- Validate that your previous action succeeded before continuing.
|
|
|
|
| 165 |
|
| 166 |
<core_principles>
|
| 167 |
- Think visually and spatially.
|
| 168 |
+
- Always ground your reasoning in what's visible in the screenshot.
|
| 169 |
+
- Never assume what's on the next screen.
|
| 170 |
- Always check the result of your last action.
|
| 171 |
- Be deliberate, consistent, and patient.
|
| 172 |
- **ALWAYS START** by analyzing if the task requires opening an application or URL. If so, your **first action** must be:
|
| 173 |
- For websites: `open_url("https://google.com")`
|
| 174 |
- For applications: `open("app_name")`
|
| 175 |
- **NEVER** manually navigate to apps via clicking icons—use the open tools directly.
|
| 176 |
+
|
| 177 |
</core_principles>
|
| 178 |
""".replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))
|