AI Agent开发实战(二):实现你的第一个Agent
一、开场:Hello Agent
大家好,我是老金。
环境搭建好了,今天我们来写第一个AI Agent!
不要被”Agent”这个词吓到,本质上Agent就是一个能够:
- 接收用户输入
- 调用LLM思考
- 执行动作(可选)
- 返回结果
今天我们实现一个最简单的Agent——对话Agent。
二、Agent核心抽象
2.1 基础Agent类
# src/agents/base.py
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
from pydantic import BaseModel
from ..utils.llm_client import LLMClient
from ..utils.logger import logger
class Message(BaseModel):
"""消息模型"""
role: str # system | user | assistant | tool
content: str
name: Optional[str] = None # 用于tool消息
def to_dict(self) -> Dict[str, Any]:
"""转换为字典"""
d = {"role": self.role, "content": self.content}
if self.name:
d["name"] = self.name
return d
class AgentState(BaseModel):
"""Agent状态"""
messages: List[Message] = []
current_step: str = "idle"
iteration_count: int = 0
max_iterations: int = 10
class BaseAgent(ABC):
"""Agent基类"""
def __init__(
self,
llm_client: LLMClient,
system_prompt: str = "你是一个友好的AI助手。",
max_iterations: int = 10
):
self.llm = llm_client
self.system_prompt = system_prompt
self.state = AgentState(max_iterations=max_iterations)
# 添加系统提示
self._add_message("system", system_prompt)
def _add_message(self, role: str, content: str, name: str = None):
"""添加消息到历史"""
message = Message(role=role, content=content, name=name)
self.state.messages.append(message)
logger.info(f"Added message: {role} - {content[:50]}...")
def get_history(self) -> List[Dict[str, str]]:
"""获取消息历史"""
return [msg.to_dict() for msg in self.state.messages]
@abstractmethod
async def think(self) -> str:
"""思考(调用LLM)"""
pass
@abstractmethod
async def act(self, thought: str) -> Any:
"""执行动作"""
pass
async def run(self, user_input: str) -> str:
"""运行Agent"""
# 重置状态
self.state.current_step = "running"
self.state.iteration_count = 0
# 添加用户输入
self._add_message("user", user_input)
# 思考-执行循环
while self.state.iteration_count bool:
"""判断是否需要执行动作"""
# 简单实现:默认不需要
return False
三、对话Agent实现
3.1 简单对话Agent
# src/agents/chat_agent.py
from .base import BaseAgent, Message
from ..utils.llm_client import LLMClient
from typing import Optional
class ChatAgent(BaseAgent):
"""简单对话Agent"""
def __init__(
self,
llm_client: LLMClient,
personality: str = "友好、专业、乐于助人",
**kwargs
):
system_prompt = f"""你是一个AI助手。
性格特点:{personality}
回答要求:
1. 回答准确、有帮助
2. 语气友好、自然
3. 如果不确定,诚实说明
4. 可以适当追问澄清问题
"""
super().__init__(llm_client, system_prompt=system_prompt, **kwargs)
async def think(self) -> str:
"""调用LLM生成回复"""
# 获取历史消息
messages = self.get_history()
# 调用LLM
response = await self.llm.chat(messages)
# 添加到历史
self._add_message("assistant", response)
return response
async def act(self, thought: str) -> None:
"""对话Agent不需要额外动作"""
pass
async def chat(self, user_input: str) -> str:
"""对话入口"""
return await self.run(user_input)
3.2 使用示例
# examples/chat_demo.py
import asyncio
from src.utils.llm_client import LLMClient
from src.agents.chat_agent import ChatAgent
async def main():
# 创建LLM客户端
llm = LLMClient(provider="openai", model="gpt-4-turbo-preview")
# 创建Agent
agent = ChatAgent(
llm_client=llm,
personality="技术专家,擅长Python和AI"
)
# 对话
print("=== 开始对话(输入 'quit' 退出)===n")
while True:
user_input = input("用户: ")
if user_input.lower() == 'quit':
break
response = await agent.chat(user_input)
print(f"Agent: {response}n")
if __name__ == "__main__":
asyncio.run(main())
3.3 运行效果
=== 开始对话(输入 'quit' 退出)===
用户: 你好,介绍一下自己
Agent: 你好!我是一个AI助手,专注于技术领域,特别是Python和AI相关的话题。我可以帮你解答编程问题、解释技术概念,或者讨论AI的最新发展。有什么我可以帮你的吗?
用户: Python中的装饰器是什么?
Agent: 装饰器是Python的一个强大特性,它本质上是一个函数,用于修改其他函数的行为。
简单来说,装饰器允许你在不修改原函数代码的情况下,给函数添加额外的功能。
基本语法:
```python
@decorator
def my_function():
pass
举个例子:
def log_decorator(func):
def wrapper(*args, **kwargs):
print(f"Calling {func.__name__}")
result = func(*args, **kwargs)
print(f"Finished {func.__name__}")
return result
return wrapper
@log_decorator
def say_hello(name):
print(f"Hello, {name}!")
# 等价于 say_hello = log_decorator(say_hello)
需要更详细的解释吗?
用户: quit
## 四、带记忆的Agent
### 4.1 会话管理
```python
# src/memory/session.py
from typing import List, Dict, Optional
from datetime import datetime
from pydantic import BaseModel
import json
class Session(BaseModel):
"""会话"""
session_id: str
created_at: datetime
messages: List[Dict[str, str]] = []
metadata: Dict[str, Any] = {}
def add_message(self, role: str, content: str):
"""添加消息"""
self.messages.append({
"role": role,
"content": content,
"timestamp": datetime.now().isoformat()
})
def get_context(self, max_messages: int = 20) -> List[Dict[str, str]]:
"""获取上下文(最近N条消息)"""
recent = self.messages[-max_messages:]
return [{"role": m["role"], "content": m["content"]} for m in recent]
class SessionManager:
"""会话管理器"""
def __init__(self):
self.sessions: Dict[str, Session] = {}
def create_session(self, session_id: str = None) -> Session:
"""创建新会话"""
import uuid
sid = session_id or str(uuid.uuid4())
session = Session(
session_id=sid,
created_at=datetime.now()
)
self.sessions[sid] = session
return session
def get_session(self, session_id: str) -> Optional[Session]:
"""获取会话"""
return self.sessions.get(session_id)
def clear_session(self, session_id: str):
"""清空会话"""
if session_id in self.sessions:
self.sessions[session_id].messages = []
def delete_session(self, session_id: str):
"""删除会话"""
if session_id in self.sessions:
del self.sessions[session_id]
4.2 带会话的Agent
# src/agents/session_chat_agent.py
from .chat_agent import ChatAgent
from ..memory.session import SessionManager, Session
from ..utils.llm_client import LLMClient
from typing import Optional
class SessionChatAgent:
"""带会话管理的Chat Agent"""
def __init__(
self,
llm_client: LLMClient,
personality: str = "友好、专业",
max_history: int = 20
):
self.llm = llm_client
self.personality = personality
self.max_history = max_history
self.session_manager = SessionManager()
self._init_system_prompt()
def _init_system_prompt(self):
"""初始化系统提示"""
self.system_prompt = f"""你是一个AI助手。
性格特点:{self.personality}
你有访问之前对话的能力,可以基于上下文回答问题。
"""
def create_session(self, session_id: str = None) -> str:
"""创建新会话"""
session = self.session_manager.create_session(session_id)
return session.session_id
async def chat(
self,
user_input: str,
session_id: Optional[str] = None
) -> dict:
"""对话"""
# 获取或创建会话
if session_id:
session = self.session_manager.get_session(session_id)
if not session:
session = self.session_manager.create_session(session_id)
else:
session = self.session_manager.create_session()
# 添加用户消息
session.add_message("user", user_input)
# 构建消息列表
messages = [
{"role": "system", "content": self.system_prompt}
] + session.get_context(self.max_history)
# 调用LLM
response = await self.llm.chat(messages)
# 添加助手消息
session.add_message("assistant", response)
return {
"session_id": session.session_id,
"response": response,
"message_count": len(session.messages)
}
def get_history(self, session_id: str) -> List[Dict[str, str]]:
"""获取会话历史"""
session = self.session_manager.get_session(session_id)
if session:
return session.messages
return []
def clear_history(self, session_id: str):
"""清空会话历史"""
self.session_manager.clear_session(session_id)
4.3 使用示例
# examples/session_chat_demo.py
import asyncio
from src.utils.llm_client import LLMClient
from src.agents.session_chat_agent import SessionChatAgent
async def main():
llm = LLMClient(provider="openai", model="gpt-4-turbo-preview")
agent = SessionChatAgent(llm_client=llm)
# 创建会话
session_id = agent.create_session()
print(f"会话ID: {session_id}n")
# 多轮对话
inputs = [
"我叫张三",
"我喜欢Python",
"你记得我的名字吗?",
"我喜欢什么编程语言?"
]
for user_input in inputs:
print(f"用户: {user_input}")
result = await agent.chat(user_input, session_id)
print(f"Agent: {result['response']}n")
if __name__ == "__main__":
asyncio.run(main())
4.4 运行效果
会话ID: a1b2c3d4-e5f6-7890-abcd-ef1234567890
用户: 我叫张三
Agent: 你好,张三!很高兴认识你。有什么我可以帮你的吗?
用户: 我喜欢Python
Agent: Python是一门很棒的语言!简洁、易读,而且生态系统非常丰富。你用Python做哪方面的开发呢?
用户: 你记得我的名字吗?
Agent: 当然记得,你叫张三!
用户: 我喜欢什么编程语言?
Agent: 你告诉我你喜欢Python。
五、流式输出Agent
5.1 实现流式输出
# src/agents/streaming_agent.py
from .base import BaseAgent
from ..utils.llm_client import LLMClient
from typing import AsyncGenerator
class StreamingChatAgent(BaseAgent):
"""流式输出Agent"""
async def think_stream(self) -> AsyncGenerator[str, None]:
"""流式思考"""
messages = self.get_history()
# 使用OpenAI的流式API
if self.llm.provider == "openai":
async for chunk in await self.llm.client.chat.completions.create(
model=self.llm.model,
messages=messages,
stream=True
):
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
async def think(self) -> str:
"""非流式思考"""
messages = self.get_history()
return await self.llm.chat(messages)
async def chat_stream(self, user_input: str) -> AsyncGenerator[str, None]:
"""流式对话"""
self._add_message("user", user_input)
full_response = ""
async for chunk in self.think_stream():
full_response += chunk
yield chunk
self._add_message("assistant", full_response)
5.2 使用示例
# examples/streaming_demo.py
import asyncio
from src.utils.llm_client import LLMClient
from src.agents.streaming_agent import StreamingChatAgent
async def main():
llm = LLMClient(provider="openai", model="gpt-4-turbo-preview")
agent = StreamingChatAgent(
llm_client=llm,
system_prompt="你是一个友好的AI助手。"
)
print("用户: 给我讲一个故事")
print("Agent: ", end="", flush=True)
async for chunk in agent.chat_stream("给我讲一个故事"):
print(chunk, end="", flush=True)
print("n")
if __name__ == "__main__":
asyncio.run(main())
六、Agent测试
6.1 单元测试
# tests/test_chat_agent.py
import pytest
from unittest.mock import AsyncMock, MagicMock
from src.agents.chat_agent import ChatAgent
@pytest.mark.asyncio
async def test_chat_agent():
"""测试ChatAgent"""
# Mock LLM客户端
mock_llm = MagicMock()
mock_llm.chat = AsyncMock(return_value="这是一个测试回复")
mock_llm.provider = "openai"
mock_llm.model = "gpt-4"
# 创建Agent
agent = ChatAgent(
llm_client=mock_llm,
personality="测试"
)
# 测试对话
response = await agent.chat("测试问题")
assert response == "这是一个测试回复"
assert len(agent.state.messages) == 3 # system + user + assistant
@pytest.mark.asyncio
async def test_session_agent():
"""测试SessionChatAgent"""
mock_llm = MagicMock()
mock_llm.chat = AsyncMock(return_value="回复")
from src.agents.session_chat_agent import SessionChatAgent
agent = SessionChatAgent(llm_client=mock_llm)
session_id = agent.create_session()
result = await agent.chat("问题", session_id)
assert result["session_id"] == session_id
assert result["response"] == "回复"
七、最佳实践
7.1 Agent设计原则
| 原则 | 说明 |
|---|---|
| 单一职责 | 每个Agent专注一个任务 |
| 状态管理 | 清晰的状态定义和转换 |
| 错误处理 | 优雅处理异常情况 |
| 可观测性 | 记录关键日志和指标 |
7.2 性能考虑
# 上下文管理
class ContextManager:
"""上下文管理"""
def __init__(self, max_tokens: int = 4000):
self.max_tokens = max_tokens
self.token_counter = TokenCounter()
def truncate_messages(
self,
messages: List[Dict[str, str]]
) -> List[Dict[str, str]]:
"""截断消息以控制token数"""
# 保留system消息
system_msgs = [m for m in messages if m["role"] == "system"]
other_msgs = [m for m in messages if m["role"] != "system"]
# 从最新的消息开始保留
result = system_msgs
total_tokens = self.token_counter.count_messages(system_msgs)
for msg in reversed(other_msgs):
msg_tokens = self.token_counter.count_messages([msg])
if total_tokens + msg_tokens <= self.max_tokens:
result.insert(-1 if result and result[-1]["role"] != "system" else len(result), msg)
total_tokens += msg_tokens
else:
break
return result
八、总结
今天学到了什么
- Agent核心抽象:BaseAgent类定义了Agent的基本骨架
- 简单对话Agent:实现了最基本的对话功能
- 会话管理:支持多轮对话的上下文保持
- 流式输出:提升用户体验的流式响应
Agent核心要素
| 要素 | 作用 |
|---|---|
| 消息历史 | 存储对话上下文 |
| 系统提示 | 定义Agent性格和能力 |
| 思考方法 | 调用LLM生成回复 |
| 状态管理 | 跟踪Agent执行状态 |
下期预告
下一篇:为Agent添加工具调用能力——让Agent真正能动起来!
往期回顾
正文完