Launch a ComputerBox with a full GUI desktop, take screenshots, control the mouse and keyboard, and automate a complete workflow — all inside an isolated VM.
ComputerBox runs a full Linux desktop (XFCE) inside an isolated VM and gives you programmatic control over the screen, mouse, and keyboard. Use it when you need GUI automation — filling out forms, testing desktop apps, or driving tools that don’t have a CLI.
Need browser automation only? Use BrowserBox instead — it’s lighter and gives you direct Playwright/Puppeteer access without a full desktop.
Create a ComputerBox, wait for the desktop to be ready, and capture a screenshot.
Python
Node.js
screenshot.py
import asyncioimport base64from boxlite import ComputerBoxasync def main(): async with ComputerBox() as desktop: # Wait for the GUI desktop to fully load await desktop.wait_until_ready() print("Desktop is ready") # Take a screenshot screenshot = await desktop.screenshot() print(f"Screen size: {screenshot['width']}x{screenshot['height']}") # Save the screenshot to a file with open("desktop.png", "wb") as f: f.write(base64.b64decode(screenshot["data"])) print("Screenshot saved to desktop.png")if __name__ == "__main__": asyncio.run(main())
screenshot.js
import { ComputerBox } from '@boxlite-ai/boxlite';import { writeFileSync } from 'fs';async function main() { const desktop = new ComputerBox(); try { // Wait for the GUI desktop to fully load await desktop.waitUntilReady(); console.log('Desktop is ready'); // Take a screenshot const screenshot = await desktop.screenshot(); console.log(`Screen size: ${screenshot.width}x${screenshot.height}`); // Save the screenshot to a file writeFileSync('desktop.png', Buffer.from(screenshot.data, 'base64')); console.log('Screenshot saved to desktop.png'); } finally { await desktop.stop(); }}main();
What’s happening:
ComputerBox boots a VM with the lscr.io/linuxserver/webtop:ubuntu-xfce image — a full Ubuntu desktop with XFCE
wait_until_ready() blocks until the desktop environment is loaded and responsive
screenshot() returns a base64-encoded PNG with the current screen contents
import asynciofrom boxlite import ComputerBoxasync def main(): async with ComputerBox() as desktop: await desktop.wait_until_ready() # Move the mouse to coordinates (512, 384) await desktop.mouse_move(512, 384) # Check where the cursor is x, y = await desktop.cursor_position() print(f"Cursor is at ({x}, {y})") # Click at the current position await desktop.left_click() # Double-click to open an item await desktop.double_click() # Right-click for context menu await desktop.right_click() # Drag from one position to another await desktop.left_click_drag(100, 100, 400, 400) print("Mouse interactions complete")if __name__ == "__main__": asyncio.run(main())
mouse.js
import { ComputerBox } from '@boxlite-ai/boxlite';async function main() { const desktop = new ComputerBox(); try { await desktop.waitUntilReady(); // Move the mouse to coordinates (512, 384) await desktop.mouseMove(512, 384); // Check where the cursor is const [x, y] = await desktop.cursorPosition(); console.log(`Cursor is at (${x}, ${y})`); // Click at the current position await desktop.leftClick(); // Double-click to open an item await desktop.doubleClick(); // Right-click for context menu await desktop.rightClick(); // Drag from one position to another await desktop.leftClickDrag(100, 100, 400, 400); console.log('Mouse interactions complete'); } finally { await desktop.stop(); }}main();
Put it all together — open a text editor, type content, save the file, and verify with a screenshot.
Python
Node.js
workflow.py
import asyncioimport base64from boxlite import ComputerBoxasync def main(): async with ComputerBox(cpu=2, memory=2048) as desktop: await desktop.wait_until_ready() # Open the Mousepad text editor (comes with XFCE) # Double-click on the desktop to deselect, then use the app menu await desktop.key("ctrl+alt+t") # Open terminal await asyncio.sleep(2) await desktop.type("mousepad /tmp/notes.txt &") await desktop.key("Return") await asyncio.sleep(3) # Type content into the editor await desktop.type("Meeting Notes") await desktop.key("Return") await desktop.key("Return") await desktop.type("1. Review Q4 targets") await desktop.key("Return") await desktop.type("2. Plan sprint backlog") await desktop.key("Return") await desktop.type("3. Assign action items") # Save the file with Ctrl+S await desktop.key("ctrl+s") await asyncio.sleep(1) # Take a screenshot to verify screenshot = await desktop.screenshot() with open("workflow_result.png", "wb") as f: f.write(base64.b64decode(screenshot["data"])) print("Workflow complete — screenshot saved to workflow_result.png")if __name__ == "__main__": asyncio.run(main())
workflow.js
import { ComputerBox } from '@boxlite-ai/boxlite';import { writeFileSync } from 'fs';function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms));}async function main() { const desktop = new ComputerBox({ cpus: 2, memoryMib: 2048 }); try { await desktop.waitUntilReady(); // Open the Mousepad text editor (comes with XFCE) await desktop.key('ctrl+alt+t'); // Open terminal await sleep(2000); await desktop.type('mousepad /tmp/notes.txt &'); await desktop.key('Return'); await sleep(3000); // Type content into the editor await desktop.type('Meeting Notes'); await desktop.key('Return'); await desktop.key('Return'); await desktop.type('1. Review Q4 targets'); await desktop.key('Return'); await desktop.type('2. Plan sprint backlog'); await desktop.key('Return'); await desktop.type('3. Assign action items'); // Save the file with Ctrl+S await desktop.key('ctrl+s'); await sleep(1000); // Take a screenshot to verify const screenshot = await desktop.screenshot(); writeFileSync('workflow_result.png', Buffer.from(screenshot.data, 'base64')); console.log('Workflow complete — screenshot saved to workflow_result.png'); } finally { await desktop.stop(); }}main();
You can also view the desktop live in your browser at http://localhost:3000 (or whatever port you set with gui_http_port). This is helpful for debugging automation scripts — watch what’s happening in real time while your code runs.