OpenManus 核心架构解析

mInr · 发表于 2025-4-9 16:17:44

OpenManus 介绍2025年3月5日早，Manus 的 Demo 引爆媒介；松交着 1天后，3月7日，海内DeepWisdom MetaGPT团队战CAMEL AI 团队别离拉进去启源了名目OpenManus战OWL，复刻Manus ，持续正在收集及Github社区激发普遍会商。

GitHub地点：https://github.com/mannaandpoem/OpenManus

写正在前面OpenManus团队只用1小时便完毕了中心体系，部分只用了3小时完毕终极上线，收获 Github万星，这样高的本钱，完整能够鉴于那套框架完毕自己的智能体，先钻研一下完毕

部分运行过程

先屏障失落架构完毕，仄展部分施行框架：

部分的施行框架仍是比力简朴且明了的，交下来阐发 OpenManus 的架构

具体架构

AgentOpenManus 干了根底Agent的分层设想，使患上每层的工作绝对大白，包管了可扩大性

BaseAgentBaseAgent 担当一点儿十分根底的事情，如形状办理、影象办理和施行过程run loop

ReActAgent

class ReActAgent(BaseAgent):
...
@abstractmethod
async def think(self) -> bool:
      """Process current state and decide next action"""
@abstractmethod
async def act(self) -> str:
      """Execute decided actions"""

async def step(self) -> str:
      """Execute a single step: think and act."""
      should_act = await self.think()
      if not should_act:
         return "Thinking complete - no action needed"
      return await self.act()ToolCallAgent

class ToolCallAgent(ReActAgent):
  """Base agent class for handling tool/function calls with enhanced abstraction"""
  ...
  # 可用的东西散
  available_tools: ToolCollection = ToolCollection(
      CreateChatCompletion(), Terminate()
)
  # 待施行的东西挪用
  tool_calls: List[ToolCall] = Field(default_factory=list)
  ...
  async def think(self) -> bool:
...
# Get response with tool options
response = await self.llm.ask_tool(
      messages=self.messages,
      system_msgs=(
         [Message.system_message(self.system_prompt)]
         if self.system_prompt
         else None
      ),
      tools=self.available_tools.to_params(),
      tool_choice=self.tool_choices,
)
...
self.tool_calls = tool_calls = (
         response.tool_calls if response and response.tool_calls else []
      )
content = response.content if response and response.content else ""
...
assistant_msg = (
      Message.from_tool_calls(content=content, tool_calls=self.tool_calls)
      if self.tool_calls
      else Message.assistant_message(content)
)
self.memory.add_message(assistant_msg)
...
return bool(self.tool_calls)
  ...
  async def act(self) -> str:
results = []
for co妹妹and in self.tool_calls:
      ...
      result = await self.execute_tool(co妹妹and)

      if self.max_observe:
         result = result[: self.max_observe]

      logger.info(
         f"🎯 Tool '{co妹妹and.function.name}' completed its mission! Result: {result}"
      )

      # Add tool response to memory
      tool_msg = Message.tool_message(
         content=result,
         tool_call_id=co妹妹and.id,
         name=co妹妹and.function.name,
         ...
      )
      self.memory.add_message(tool_msg)
      results.append(result)

return "\n\n".join(results)Manus那里指具体场景下的Agent完毕，承袭自ToolCallAgent，从头界说了 tools 空间，增强了 think办法

class Manus(ToolCallAgent):
  """A versatile general-purpose agent."""
  ...
  # Add general-purpose tools to the tool collection
  available_tools: ToolCollection = Field(
   default_factory=lambda: ToolCollection(
      PythonExecute(), BrowserUseTool(), StrWordStrEditor(), Terminate()
   )
  )
  # 浏览器操纵情况
  browser_context_helper: Optional[BrowserContextHelper] = None
  ...
  #增强 think办法
  async def think(self) -> bool:
"""Process current state and decide next actions with appropriate context."""
original_prompt = self.next_step_prompt
recent_messages = self.memory.messages[-3:] if self.memory.messages else []
browser_in_use = any(
      tc.function.name == BrowserUseTool().name
      for msg in recent_messages
      if msg.tool_calls
      for tc in msg.tool_calls
)

if browser_in_use:
      self.next_step_prompt = (
         await self.browser_context_helper.format_next_step_prompt()
      )

result = await super().think()

# Restore original prompt
self.next_step_prompt = original_prompt

return resultTools正在以前的代码中能够瞅到，ToolCallAgent 颠末挪用 execute_tool办法去施行 tools 挪用

ToolCollection

class ToolCollection:
"""A collection of defined tools."""
...
def __init__(self, *tools: BaseTool):
      self.tools = tools
      self.tool_map = {tool.name: tool for tool in tools}
...
# 施行东西集合的东西
async def execute(
      self, *, name: str, tool_input: Dict[str, Any] = None
) -> ToolResult:
      tool = self.tool_map.get(name)
      if not tool:
         return ToolFailure(error=f"Tool {name} is invalid")
      try:
         result = await tool(**tool_input)
         return result
      except ToolError as e:
         return ToolFailure(error=e.message)BaseTool

class BaseTool(ABC, BaseModel):
name: str
...
async def __call__(self, **kwargs) -> Any:
      """Execute the tool with given parameters."""
      return await self.execute(**kwargs)

@abstractmethod
async def execute(self, **kwargs) -> Any:
      """Execute the tool with given parameters."""CreateChatCompletion以CreateChatCompletion为例，那里具体完毕便没有干深入，差别的东西纷歧样class CreateChatCompletion(BaseTool):
  name: str = "create_chat_completion"
  ...
  async def execute(self, required: list | None = None, **kwargs) -> Any:
   """Execute the chat completion with type conversion.

   Args:
      required: List of required field names or None
      **kwargs: Response data

   Returns:
      Converted response based on response_type
   """
   required = required or self.required

   # Handle case when required is a list
   if isinstance(required, list) and len(required) > 0:
      if len(required) == 1:
            required_field = required[0]
            result = kwargs.get(required_field, "")
      else:
            # Return multiple fields as a dictionary
            return {field: kwargs.get(field, "") for field in required}
   else:
      required_field = "response"
      result = kwargs.get(required_field, "")

   # Type conversion logic
   if self.response_type == str:
      return result

   if isinstance(self.response_type, type) and issubclass(
      self.response_type, BaseModel
   ):
      return self.response_type(**kwargs)

   if get_origin(self.response_type) in (list, dict):
      return result  # Assuming result is already in correct format

   try:
      return self.response_type(result)
   except (ValueError, TypeError):
         return resultLLMLLM 对于中屏障模子之间的差别，那里只会商跟支流程相干的中心办法战属性

class LLM:
  ...
  async def ask_tool(
      self,
      messages: List[Union[dict, Message]],
      system_msgs: Optional[List[Union[dict, Message]]] = None,
      tools: Optional[List[dict]] = None,
      ...
      **kwargs,
) -> ChatCompletionMessage | None:
...
# Message 列表变换成年夜模子领受的消息格局
supports_images = self.model in MULTIMODAL_MODELS
if system_msgs:
      system_msgs = self.format_messages(system_msgs, supports_images)
      messages = system_msgs + self.format_messages(messages, supports_images)
else:
      messages = self.format_messages(messages, supports_images)

# 计较token数目可否超越限定
input_tokens = self.count_message_tokens(messages)
tools_tokens = 0
if tools:
      for tool in tools:
         tools_tokens += self.count_tokens(str(tool))
input_tokens += tools_tokens

if not self.check_token_limit(input_tokens):
      error_message = self.get_limit_error_message(input_tokens)
      raise TokenLimitExceeded(error_message)
...

# 倡议年夜模子恳求
params = {
      "model": self.model,
      "messages": messages,
      "tools": tools,
      ...
      **kwargs,
}
response: ChatCompletion = await self.client.chat.completions.create(
      **params
)

# Check if response is valid
if not response.choices or not response.choices[0].message:
      print(response)
      # raise ValueError("Invalid or empty response from LLM")
      return None

# Update token counts
self.update_token_count(
      response.usage.prompt_tokens, response.usage.completion_tokens
)

return response.choices[0].message对于 Function Call 正在那里便再也不赘述，念理解的能够瞅 OpenAI function-calling 文档

最初OpenManus后绝会鉴于目前颠簸的根底架构上会干上面多少件工作