Agent原理、构建模式(附视频链接)
对应的b站视频链接Agent 的概念、原理与构建模式 —— 从零打造一个简化版的 Claude Code_哔哩哔哩_bilibili
一、概念
背景
大模型无法感知或改变外界环境
- ide中需要把代码全部复制给大模型
- 大模型无法把输出的代码直接写到ide的文件里
定义
智能体是能够感知环境、进行决策并采取行动以实现特定目标的自主实体。它具有感知能力、决策能力和执行能力。
二、构建模式
ReAct模式
Reasoning and Acting(思考和行动)
该模式的智能体包含:
- 大模型
- 工具(函数)
- Agent主程序(下文会给出)
该模式下的agent主程序:
import ast
import inspect
import os
import re
from string import Template
from typing import List, Callable, Tupleimport click
from dotenv import load_dotenv
from openai import OpenAI
import platformfrom prompt_template import react_system_prompt_templateclass ReActAgent:def __init__(self, tools: List[Callable], model: str, project_directory: str):self.tools = { func.__name__: func for func in tools }self.model = modelself.project_directory = project_directoryself.client = OpenAI(base_url="https://openrouter.ai/api/v1",api_key=ReActAgent.get_api_key(),)def run(self, user_input: str):messages = [{"role": "system", "content": self.render_system_prompt(react_system_prompt_template)},{"role": "user", "content": f"<question>{user_input}</question>"}]while True:# 请求模型content = self.call_model(messages)# 检测 Thoughtthought_match = re.search(r"<thought>(.*?)</thought>", content, re.DOTALL)if thought_match:thought = thought_match.group(1)print(f"\n\n💭 Thought: {thought}")# 检测模型是否输出 Final Answer,如果是的话,直接返回if "<final_answer>" in content:final_answer = re.search(r"<final_answer>(.*?)</final_answer>", content, re.DOTALL)return final_answer.group(1)# 检测 Actionaction_match = re.search(r"<action>(.*?)</action>", content, re.DOTALL)if not action_match:raise RuntimeError("模型未输出 <action>")action = action_match.group(1)tool_name, args = self.parse_action(action)print(f"\n\n🔧 Action: {tool_name}({', '.join(args)})")# 只有终端命令才需要询问用户,其他的工具直接执行should_continue = input(f"\n\n是否继续?(Y/N)") if tool_name == "run_terminal_command" else "y"if should_continue.lower() != 'y':print("\n\n操作已取消。")return "操作被用户取消"try:observation = self.tools[tool_name](*args)except Exception as e:observation = f"工具执行错误:{str(e)}"print(f"\n\n🔍 Observation:{observation}")obs_msg = f"<observation>{observation}</observation>"messages.append({"role": "user", "content": obs_msg})def get_tool_list(self) -> str:"""生成工具列表字符串,包含函数签名和简要说明"""tool_descriptions = []for func in self.tools.values():name = func.__name__signature = str(inspect.signature(func))doc = inspect.getdoc(func)tool_descriptions.append(f"- {name}{signature}: {doc}")return "\n".join(tool_descriptions)def render_system_prompt(self, system_prompt_template: str) -> str:"""渲染系统提示模板,替换变量"""tool_list = self.get_tool_list()file_list = ", ".join(os.path.abspath(os.path.join(self.project_directory, f))for f in os.listdir(self.project_directory))return Template(system_prompt_template).substitute(operating_system=self.get_operating_system_name(),tool_list=tool_list,file_list=file_list)@staticmethoddef get_api_key() -> str:"""Load the API key from an environment variable."""load_dotenv()api_key = os.getenv("OPENROUTER_API_KEY")if not api_key:raise ValueError("未找到 OPENROUTER_API_KEY 环境变量,请在 .env 文件中设置。")return api_keydef call_model(self, messages):print("\n\n正在请求模型,请稍等...")response = self.client.chat.completions.create(model=self.model,messages=messages,)content = response.choices[0].message.contentmessages.append({"role": "assistant", "content": content})return contentdef parse_action(self, code_str: str) -> Tuple[str, List[str]]:match = re.match(r'(\w+)\((.*)\)', code_str, re.DOTALL)if not match:raise ValueError("Invalid function call syntax")func_name = match.group(1)args_str = match.group(2).strip()# 手动解析参数,特别处理包含多行内容的字符串args = []current_arg = ""in_string = Falsestring_char = Nonei = 0paren_depth = 0while i < len(args_str):char = args_str[i]if not in_string:if char in ['"', "'"]:in_string = Truestring_char = charcurrent_arg += charelif char == '(':paren_depth += 1current_arg += charelif char == ')':paren_depth -= 1current_arg += charelif char == ',' and paren_depth == 0:# 遇到顶层逗号,结束当前参数args.append(self._parse_single_arg(current_arg.strip()))current_arg = ""else:current_arg += charelse:current_arg += charif char == string_char and (i == 0 or args_str[i-1] != '\\'):in_string = Falsestring_char = Nonei += 1# 添加最后一个参数if current_arg.strip():args.append(self._parse_single_arg(current_arg.strip()))return func_name, argsdef _parse_single_arg(self, arg_str: str):"""解析单个参数"""arg_str = arg_str.strip()# 如果是字符串字面量if (arg_str.startswith('"') and arg_str.endswith('"')) or \(arg_str.startswith("'") and arg_str.endswith("'")):# 移除外层引号并处理转义字符inner_str = arg_str[1:-1]# 处理常见的转义字符inner_str = inner_str.replace('\\"', '"').replace("\\'", "'")inner_str = inner_str.replace('\\n', '\n').replace('\\t', '\t')inner_str = inner_str.replace('\\r', '\r').replace('\\\\', '\\')return inner_str# 尝试使用 ast.literal_eval 解析其他类型try:return ast.literal_eval(arg_str)except (SyntaxError, ValueError):# 如果解析失败,返回原始字符串return arg_strdef get_operating_system_name(self):os_map = {"Darwin": "macOS","Windows": "Windows","Linux": "Linux"}return os_map.get(platform.system(), "Unknown")def read_file(file_path):"""用于读取文件内容"""with open(file_path, "r", encoding="utf-8") as f:return f.read()def write_to_file(file_path, content):"""将指定内容写入指定文件"""with open(file_path, "w", encoding="utf-8") as f:f.write(content.replace("\\n", "\n"))return "写入成功"def run_terminal_command(command):"""用于执行终端命令"""import subprocessrun_result = subprocess.run(command, shell=True, capture_output=True, text=True)return "执行成功" if run_result.returncode == 0 else run_result.stderr@click.command()
@click.argument('project_directory',type=click.Path(exists=True, file_okay=False, dir_okay=True))
def main(project_directory):project_dir = os.path.abspath(project_directory)tools = [read_file, write_to_file, run_terminal_command]agent = ReActAgent(tools=tools, model="openai/gpt-4o", project_directory=project_dir)task = input("请输入任务:")final_answer = agent.run(task)print(f"\n\n✅ Final Answer:{final_answer}")if __name__ == "__main__":main()
Plan-And-Execute模式
该模式的智能体包含:
- Plan模型
- Re-Plan模型
- agent主程序
- 二级智能体(如ReAct模式的,嵌套关系)