Agent Memory Systems: Short-Term, Long-Term, Episodic, Semantic Memory


Introduction





Memory is what separates stateless LLM calls from true autonomous agents. Without memory, an agent cannot learn from past interactions, maintain context across sessions, or build a model of the world. Drawing from cognitive science, agent memory can be structured into four types: short-term (working memory), long-term (persistent storage), episodic (specific experiences), and semantic (general knowledge).





Short-Term Memory (Working Memory)





Short-term memory holds the current conversation context and immediate state:






from collections import deque


from typing import Any




class ShortTermMemory:


def __init__(self, max_tokens: int = 4096, max_messages: int = 50):


self.messages: deque[dict] = deque(maxlen=max_messages)


self.max_tokens = max_tokens


self.current_tokens = 0




def add(self, role: str, content: str):


message = {"role": role, "content": content}


estimated_tokens = len(content.split()) * 1.3




# Evict oldest messages when over token limit


while self.current_tokens + estimated_tokens > self.max_tokens:


removed = self.messages.popleft()


self.current_tokens -= len(removed["content"].split()) * 1.3




self.messages.append(message)


self.current_tokens += estimated_tokens




def get_context(self) -> list[dict]:


return list(self.messages)




def summarize_and_compress(self, llm_fn) -> str:


"""When context is full, summarize old messages to make room."""


if len(self.messages) > 30:


old_messages = list(self.messages)[:-20]


summary = llm_fn(f"Summarize these conversation messages: {old_messages}")


self.messages = deque(list(self.messages)[-20:], maxlen=50)


return summary


return ""







Long-Term Memory





Long-term memory persists across sessions and is typically backed by a vector store:






import uuid


from datetime import datetime


import numpy as np




class LongTermMemory:


def __init__(self, vector_store, embedding_fn):


self.vector_store = vector_store


self.embedding_fn = embedding_fn


self.collection = "agent_memory"




def remember(self, content: str, importance: float = 0.5, metadata: dict = None):


"""Store a memory with importance score for selective recall."""


memory_id = str(uuid.uuid4())


embedding = self.embedding_fn(content)




self.vector_store.add(


ids=[memory_id],


embeddings=[embedding],


metadatas=[{


"content": content,


"importance": importance,


"timestamp": datetime.now().isoformat(),


**(metadata or {}),


}],


)


return memory_id




def recall(self, query: str, k: int = 5, min_importance: float = 0.0) -> list[dict]:


"""Retrieve the most relevant memories."""


query_emb = self.embedding_fn(query)


results = self.vector_store.query(


query_embeddings=[query_emb],


n_results=k,


where={"importance": {"$gte": min_importance}},


)




memories = []


for i, mem_id in enumerate(results["ids"][0]):


metadata = results["metadatas"][0][i]


memories.append({


"id": mem_id,


"content": metadata["content"],


"importance": metadata["importance"],


"timestamp": metadata["timestamp"],


"distance": results["distances"][0][i],


})


return memories




def forget(self, memory_id: str):


"""Delete a specific memory."""


self.vector_store.delete(ids=[memory_id])




def consolidate(self, llm_fn):


"""Periodically merge similar memories."""


all_memories = self.vector_store.get()


# Group similar memories and create consolidated summaries


# This runs as a background task







Episodic Memory





Episodic memory stores specific experiences: what happened, when, and what the outcome was:






@dataclass


class Episode:


id: str


timestamp: datetime


task: str


action_sequence: list[dict]


outcome: str


reward: float


context: dict




class EpisodicMemory:


def __init__(self, storage_backend):


self.storage = storage_backend




def record_episode(self, task: str, actions: list, outcome: str, reward: float):


episode = Episode(


id=str(uuid.uuid4()),


timestamp=datetime.now(),


task=task,


action_sequence=actions,


outcome=outcome,


reward=reward,


context={},


)


self.storage.save(f"episode_{episode.id}", episode.__dict__)


return episode.id




def retrieve_similar_episodes(self, task: str, k: int = 3) -> list[Episode]:


"""Find past episodes similar to the current task."""


all_episodes = self.storage.load_all("episode_*")


scored = []




for ep in all_episodes:


similarity = self._task_similarity(task, ep["task"])


scored.append((ep, similarity))




scored.sort(key=lambda x: x[1], reverse=True)


return [Episode(**ep) for ep, _ in scored[:k]]




def _task_similarity(self, task_a: str, task_b: str) -> float:


"""Compute similarity between two task descriptions."""


emb_a = embedding_fn(task_a)


emb_b = embedding_fn(task_b)


return cosine_similarity(emb_a, emb_b)







Semantic Memory





Semantic memory stores factual knowledge extracted from experiences:






class SemanticMemory:


def __init__(self):


self.facts: dict[str, list[dict]] = {}


self.confidence_threshold = 0.7




def add_fact(self, subject: str, predicate: str, object_: str, confidence: float):


if subject not in self.facts:


self.facts[subject] = []


self.facts[subject].append({


"predicate": predicate,


"object": object_,


"confidence": confidence,


"timestamp": datetime.now(),


})




def query_fact(self, subject: str, predicate: str = None) -> list[str]:


if subject not in self.facts:


return []


results = []


for fact in self.facts[subject]:


if predicate is None or fact["predicate"] == predicate:


if fact["confidence"] >= self.confidence_threshold:


results.append(fact["object"])


return results




def extract_facts_from_experience(self, episode: Episode, llm_fn):


"""Extract general knowledge from a specific experience."""


extraction = llm_fn(f"""


Extract factual statements from this experience.


Output as JSON array of {{"subject", "predicate", "object"}}.




Task: {episode.task}


Outcome: {episode.outcome}


""")


facts = json.loads(extraction)


for fact in facts:


self.add_fact(fact["subject"], fact["predicate"], fact["object"], confidence=0.5)







Integrated Agent Memory





Bring all four types together in a unified memory system:






class AgentMemory:


def __init__(self, short_term_capacity=4096, long_term_store=None):


self.short_term = ShortTermMemory(max_tokens=short_term_capacity)


self.long_term = LongTermMemory(long_term_store["vector_db"], long_term_store["embed_fn"])


self.episodic = EpisodicMemory(long_term_store["kv_store"])


self.semantic = SemanticMemory()




def build_prompt_context(self, query: str) -> str:


context_parts = []




# Recent conversation


context_parts.append("=== Recent Context ===\n")


context_parts.extend(self.short_term.get_context())




# Relevant long-term memories


memories = self.long_term.recall(query, k=3)


if memories:


context_parts.append("\n=== Related Memories ===\n")


context_parts.extend([m["content"] for m in memories])




# Similar past episodes


episodes = self.episodic.retrieve_similar_episodes(query, k=2)


if episodes:


context_parts.append("\n=== Similar Past Experiences ===\n")


for ep in episodes:


context_parts.append(f"Task: {ep.task}, Outcome: {ep.outcome}")




# Relevant semantic facts


entities = extract_entities(query)


for entity in entities:


facts = self.semantic.query_fact(entity)


if facts:


context_parts.append(f"\n=== Facts about {entity} ===\n")


context_parts.extend(facts)




return "\n".join(context_parts)







Conclusion





Agent memory systems mirror human cognitive architecture. Short-term memory maintains immediate conversation context with token-aware eviction. Long-term memory persistently stores important information with vector-based retrieval. Episodic memory records specific experiences for future reference. Semantic memory extracts and stores general knowledge from experiences. An integrated memory system combines all four types, giving agents both the immediate context and the accumulated wisdom needed for complex, long-running tasks.