Agent Memory Systems: Short-Term, Long-Term, Episodic, Semantic Memory
Introduction
Memory is what separates stateless LLM calls from true autonomous agents. Without memory, an agent cannot learn from past interactions, maintain context across sessions, or build a model of the world. Drawing from cognitive science, agent memory can be structured into four types: short-term (working memory), long-term (persistent storage), episodic (specific experiences), and semantic (general knowledge).
Short-Term Memory (Working Memory)
Short-term memory holds the current conversation context and immediate state:
from collections import deque
from typing import Any
class ShortTermMemory:
def __init__(self, max_tokens: int = 4096, max_messages: int = 50):
self.messages: deque[dict] = deque(maxlen=max_messages)
self.max_tokens = max_tokens
self.current_tokens = 0
def add(self, role: str, content: str):
message = {"role": role, "content": content}
estimated_tokens = len(content.split()) * 1.3
# Evict oldest messages when over token limit
while self.current_tokens + estimated_tokens > self.max_tokens:
removed = self.messages.popleft()
self.current_tokens -= len(removed["content"].split()) * 1.3
self.messages.append(message)
self.current_tokens += estimated_tokens
def get_context(self) -> list[dict]:
return list(self.messages)
def summarize_and_compress(self, llm_fn) -> str:
"""When context is full, summarize old messages to make room."""
if len(self.messages) > 30:
old_messages = list(self.messages)[:-20]
summary = llm_fn(f"Summarize these conversation messages: {old_messages}")
self.messages = deque(list(self.messages)[-20:], maxlen=50)
return summary
return ""
Long-Term Memory
Long-term memory persists across sessions and is typically backed by a vector store:
import uuid
from datetime import datetime
import numpy as np
class LongTermMemory:
def __init__(self, vector_store, embedding_fn):
self.vector_store = vector_store
self.embedding_fn = embedding_fn
self.collection = "agent_memory"
def remember(self, content: str, importance: float = 0.5, metadata: dict = None):
"""Store a memory with importance score for selective recall."""
memory_id = str(uuid.uuid4())
embedding = self.embedding_fn(content)
self.vector_store.add(
ids=[memory_id],
embeddings=[embedding],
metadatas=[{
"content": content,
"importance": importance,
"timestamp": datetime.now().isoformat(),
**(metadata or {}),
}],
)
return memory_id
def recall(self, query: str, k: int = 5, min_importance: float = 0.0) -> list[dict]:
"""Retrieve the most relevant memories."""
query_emb = self.embedding_fn(query)
results = self.vector_store.query(
query_embeddings=[query_emb],
n_results=k,
where={"importance": {"$gte": min_importance}},
)
memories = []
for i, mem_id in enumerate(results["ids"][0]):
metadata = results["metadatas"][0][i]
memories.append({
"id": mem_id,
"content": metadata["content"],
"importance": metadata["importance"],
"timestamp": metadata["timestamp"],
"distance": results["distances"][0][i],
})
return memories
def forget(self, memory_id: str):
"""Delete a specific memory."""
self.vector_store.delete(ids=[memory_id])
def consolidate(self, llm_fn):
"""Periodically merge similar memories."""
all_memories = self.vector_store.get()
# Group similar memories and create consolidated summaries
# This runs as a background task
Episodic Memory
Episodic memory stores specific experiences: what happened, when, and what the outcome was:
@dataclass
class Episode:
id: str
timestamp: datetime
task: str
action_sequence: list[dict]
outcome: str
reward: float
context: dict
class EpisodicMemory:
def __init__(self, storage_backend):
self.storage = storage_backend
def record_episode(self, task: str, actions: list, outcome: str, reward: float):
episode = Episode(
id=str(uuid.uuid4()),
timestamp=datetime.now(),
task=task,
action_sequence=actions,
outcome=outcome,
reward=reward,
context={},
)
self.storage.save(f"episode_{episode.id}", episode.__dict__)
return episode.id
def retrieve_similar_episodes(self, task: str, k: int = 3) -> list[Episode]:
"""Find past episodes similar to the current task."""
all_episodes = self.storage.load_all("episode_*")
scored = []
for ep in all_episodes:
similarity = self._task_similarity(task, ep["task"])
scored.append((ep, similarity))
scored.sort(key=lambda x: x[1], reverse=True)
return [Episode(**ep) for ep, _ in scored[:k]]
def _task_similarity(self, task_a: str, task_b: str) -> float:
"""Compute similarity between two task descriptions."""
emb_a = embedding_fn(task_a)
emb_b = embedding_fn(task_b)
return cosine_similarity(emb_a, emb_b)
Semantic Memory
Semantic memory stores factual knowledge extracted from experiences:
class SemanticMemory:
def __init__(self):
self.facts: dict[str, list[dict]] = {}
self.confidence_threshold = 0.7
def add_fact(self, subject: str, predicate: str, object_: str, confidence: float):
if subject not in self.facts:
self.facts[subject] = []
self.facts[subject].append({
"predicate": predicate,
"object": object_,
"confidence": confidence,
"timestamp": datetime.now(),
})
def query_fact(self, subject: str, predicate: str = None) -> list[str]:
if subject not in self.facts:
return []
results = []
for fact in self.facts[subject]:
if predicate is None or fact["predicate"] == predicate:
if fact["confidence"] >= self.confidence_threshold:
results.append(fact["object"])
return results
def extract_facts_from_experience(self, episode: Episode, llm_fn):
"""Extract general knowledge from a specific experience."""
extraction = llm_fn(f"""
Extract factual statements from this experience.
Output as JSON array of {{"subject", "predicate", "object"}}.
Task: {episode.task}
Outcome: {episode.outcome}
""")
facts = json.loads(extraction)
for fact in facts:
self.add_fact(fact["subject"], fact["predicate"], fact["object"], confidence=0.5)
Integrated Agent Memory
Bring all four types together in a unified memory system:
class AgentMemory:
def __init__(self, short_term_capacity=4096, long_term_store=None):
self.short_term = ShortTermMemory(max_tokens=short_term_capacity)
self.long_term = LongTermMemory(long_term_store["vector_db"], long_term_store["embed_fn"])
self.episodic = EpisodicMemory(long_term_store["kv_store"])
self.semantic = SemanticMemory()
def build_prompt_context(self, query: str) -> str:
context_parts = []
# Recent conversation
context_parts.append("=== Recent Context ===\n")
context_parts.extend(self.short_term.get_context())
# Relevant long-term memories
memories = self.long_term.recall(query, k=3)
if memories:
context_parts.append("\n=== Related Memories ===\n")
context_parts.extend([m["content"] for m in memories])
# Similar past episodes
episodes = self.episodic.retrieve_similar_episodes(query, k=2)
if episodes:
context_parts.append("\n=== Similar Past Experiences ===\n")
for ep in episodes:
context_parts.append(f"Task: {ep.task}, Outcome: {ep.outcome}")
# Relevant semantic facts
entities = extract_entities(query)
for entity in entities:
facts = self.semantic.query_fact(entity)
if facts:
context_parts.append(f"\n=== Facts about {entity} ===\n")
context_parts.extend(facts)
return "\n".join(context_parts)
Conclusion
Agent memory systems mirror human cognitive architecture. Short-term memory maintains immediate conversation context with token-aware eviction. Long-term memory persistently stores important information with vector-based retrieval. Episodic memory records specific experiences for future reference. Semantic memory extracts and stores general knowledge from experiences. An integrated memory system combines all four types, giving agents both the immediate context and the accumulated wisdom needed for complex, long-running tasks.