// Get all potentially relevant interactive elements
const elements = document.querySelectorAll('button, a, input, textarea, select, [role="button"], [tabindex]:not([tabindex="-1"])');
let validElementIndex = 0; // For generating consecutive indices
for (let i = 0; i < elements.length; i++) {
const element = elements;
// Check if the element is in the viewport and visible
const rect = element.getBoundingClientRect();
// Element must have some dimensions
if (rect.width === 0 || rect.height === 0) continue;
// Element must be within the viewport
if (
rect.bottom < 0 ||
rect.top > viewportHeight ||
rect.right < 0 ||
rect.left > viewportWidth
) continue;
// Check if the element is visible (not hidden by CSS)
const style = window.getComputedStyle(element);
if (
style.display === 'none' ||
style.visibility === 'hidden' ||
style.opacity === '0'
) continue;
// Get element type and text
let tagName = element.tagName.toLowerCase();
let text = '';
if (element.value && ['input', 'textarea', 'select'].includes(tagName)) {
text = element.value;
// Add label and placeholder information for input elements
if (tagName === 'input') {
// Get associated label text
let labelText = '';
if (element.id) {
const label = document.querySelector(`label[for="${element.id}"]`);
if (label) {
labelText = label.innerText.trim();
}
}
// Look for parent or sibling label
if (!labelText) {
const parentLabel = element.closest('label');
if (parentLabel) {
labelText = parentLabel.innerText.trim().replace(element.value, '').trim();
}
}
// Add label information
if (labelText) {
text = `[Label: ${labelText}] ${text}`;
}
// Add placeholder information
if (element.placeholder) {
text = `${text} [Placeholder: ${element.placeholder}]`;
}
}
} else if (element.innerText) {
text = element.innerText.trim().replace(/\\s+/g, ' ');
} else if (element.alt) { // For image buttons
text = element.alt;
} else if (element.title) { // For elements with title
text = element.title;
} else if (element.placeholder) { // For placeholder text
text = `[Placeholder: ${element.placeholder}]`;
} else if (element.type) { // For input type
text = `[${element.type}]`;
// Add label and placeholder information for text-less input elements
if (tagName === 'input') {
// Get associated label text
let labelText = '';
if (element.id) {
const label = document.querySelector(`label[for="${element.id}"]`);
if (label) {
labelText = label.innerText.trim();
}
}
// Look for parent or sibling label
if (!labelText) {
const parentLabel = element.closest('label');
if (parentLabel) {
labelText = parentLabel.innerText.trim();
}
}
// Add label information
if (labelText) {
text = `[Label: ${labelText}] ${text}`;
}
// Add placeholder information
if (element.placeholder) {
text = `${text} [Placeholder: ${element.placeholder}]`;
}
}
} else {
text = '[No text]';
}
// Maximum limit on text length to keep it clear
if (text.length > 100) {
text = text.substring(0, 97) + '...';
}
// Only add data-manus-id attribute to elements that meet the conditions
element.setAttribute('data-manus-id', `manus-element-${validElementIndex}`);
// Build selector - using only data-manus-id
const selector = `[data-manus-id="manus-element-${validElementIndex}"]`;
// Add element information to the array
interactiveElements.push({
index: validElementIndex, // Use consecutive index
tag: tagName,
text: text,
selector: selector
});
validElementIndex++; // Increment valid element counter
}
return interactiveElements;
这样大模型就可以根据 ID 号来操作元素了。
还要停止网页信息提取,目前主流做法是先去掉不可见元素后,先转成 markdown,再给大模型停止提取,以节省 Token,如下:
# Convert to Markdown
markdown_content=markdownify(visible_content)
max_content_length=min(50000,len(markdown_content))
response=awaitself.llm.ask([{
"role":"system",
"content":"You are a professional web page information extraction assistant. Please extract all information from the current page content and convert it to Markdown format."
},
{
"role":"user",
"content":markdown_content[:max_content_length]
}
])
至此,大模型就可以与网页交互与阅读网页信息内容了。
05
Web UI 设计
Web UI 编写虽然属于我的软肋,但属于 Cursor 的强项,结合对正版 Manus 的自创也可以搞得七七八八,页面比较简单。
backend:
image:simpleyyt/manus-backend
depends_on:
- -sandbox
restart: unless-stopped
volumes:
- -/var/run/docker.sock:/var/run/docker.sock:ro
networks:
- -manus-network
environment:
# OpenAI API base URL
-API_BASE=https://api.openai.com/v1
# OpenAI API key, replace with your own
-API_KEY=sk-xxxx
# LLM model name
-MODEL_NAME=gpt-4o
# LLM temperature parameter, controls randomness
-TEMPERATURE=0.7
# Maximum tokens for LLM response
-MAX_TOKENS=2000
# Google Search API key for web search capability
#- GOOGLE_SEARCH_API_KEY=
# Google Custom Search Engine ID
#- GOOGLE_SEARCH_ENGINE_ID=
# Application log level
-LOG_LEVEL=INFO
# Docker image used for the sandbox
-SANDBOX_IMAGE=simpleyyt/manus-sandbox
# Prefix for sandbox container names
-SANDBOX_NAME_PREFIX=sandbox
# Time-to-live for sandbox containers in minutes
-SANDBOX_TTL_MINUTES=30
# Docker network for sandbox containers
-SANDBOX_NETWORK=manus-network
sandbox:
image:simpleyyt/manus=sandbox
command: /bin/sh -c "exit 0" # prevent sandbox from starting, ensure image is pulled
restart:"no"
networks:
- manus-network