diff --git a/.gitignore b/.gitignore
index ba948a5..b0ba7c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,10 @@ __pycache__
 .python-version
 *.pt
 wandb/
-./Code 2. Cartpole/6. A3C/Cartpole_A3C.pgy
\ No newline at end of file
+logs/
+./Code 2. Cartpole/6. A3C/Cartpole_A3C.pgy
+# Local scratch scripts
+scripts/
+
+# Local-only docs (not for github)
+docs/
diff --git a/3-atari/1-dqn.py b/3-atari/1-dqn.py
index 2ed9132..9db4fce 100644
--- a/3-atari/1-dqn.py
+++ b/3-atari/1-dqn.py
@@ -11,7 +11,6 @@
 serious training.
 """
 import random
-import sys
 from collections import deque
 
 import numpy as np
@@ -23,17 +22,17 @@
 
 
 SAVE_PATH = "atari_dqn.pt"
-TOTAL_FRAMES = 1_000_000        # bump to ~10M for paper-quality results
-BUFFER_CAPACITY = 100_000       # bump to 1M with enough RAM
-BATCH_SIZE = 64
+TOTAL_FRAMES = 10_000_000       # Nature uses 50M agent steps; 10M is laptop-friendly
+BUFFER_CAPACITY = 500_000       # ~3.5GB RAM (uint8, single frames stacked at sample time); sized for 8GB Macs
+BATCH_SIZE = 32
 GAMMA = 0.99
 LR = 1e-4
-LEARN_START = 10_000            # frames of pure exploration before training begins
+LEARN_START = 80_000            # frames of pure exploration before training begins
 TRAIN_EVERY = 4
-TARGET_UPDATE_EVERY = 1_000     # in training steps, not env steps
+TARGET_UPDATE_EVERY = 250       # in training steps, not env steps (~1k env frames)
 EPSILON_START = 1.0
-EPSILON_END = 0.05
-EPSILON_DECAY_FRAMES = 250_000  # linear decay from start to end over this many frames
+EPSILON_END = 0.01
+EPSILON_DECAY_FRAMES = 1_000_000  # linear decay from start to end over this many frames
 
 
 # Standard Nature CNN.
@@ -57,34 +56,59 @@ def forward(self, x):
 
 
 class ReplayBuffer:
-    """Uint8 replay buffer — far more memory-efficient than storing floats."""
+    """Single-frame uint8 buffer — stacks of 4 are reconstructed at sample time,
+    cutting RAM ~4x vs. storing the full stack per slot."""
 
-    def __init__(self, capacity, obs_shape):
+    def __init__(self, capacity, frame_shape=(84, 84), stack=4):
         self.capacity = capacity
-        self.obs      = np.zeros((capacity, *obs_shape), dtype=np.uint8)
-        self.next_obs = np.zeros((capacity, *obs_shape), dtype=np.uint8)
-        self.actions  = np.zeros(capacity, dtype=np.int64)
-        self.rewards  = np.zeros(capacity, dtype=np.float32)
-        self.dones    = np.zeros(capacity, dtype=np.float32)
+        self.stack = stack
+        self.frames  = np.zeros((capacity, *frame_shape), dtype=np.uint8)
+        self.actions = np.zeros(capacity, dtype=np.int64)
+        self.rewards = np.zeros(capacity, dtype=np.float32)
+        self.dones   = np.zeros(capacity, dtype=np.float32)
         self.idx = 0
         self.size = 0
 
-    def push(self, obs, action, reward, next_obs, done):
-        self.obs[self.idx] = obs
+    def push(self, frame, action, reward, done):
+        self.frames[self.idx] = frame
         self.actions[self.idx] = action
         self.rewards[self.idx] = reward
-        self.next_obs[self.idx] = next_obs
         self.dones[self.idx] = float(done)
         self.idx = (self.idx + 1) % self.capacity
         self.size = min(self.size + 1, self.capacity)
 
+    def _stack(self, idx):
+        # Gather frames[idx-stack+1 .. idx]; newest at last channel.
+        offsets = np.arange(self.stack)
+        gather = (idx[:, None] - (self.stack - 1) + offsets[None, :]) % self.capacity
+        out = self.frames[gather]
+        # Zero out frames sitting before an episode boundary inside the stack.
+        # dones at the (stack-1) older positions mark where a prior episode ended.
+        older = self.dones[gather[:, :-1]].astype(bool)
+        # Once we cross any done walking newest→oldest, everything older is invalid.
+        invalid = np.cumsum(older[:, ::-1], axis=1)[:, ::-1] > 0
+        mask = np.concatenate([~invalid, np.ones((idx.shape[0], 1), dtype=bool)], axis=1)
+        return out * mask[:, :, None, None]
+
     def sample(self, batch_size, device):
-        idx = np.random.randint(0, self.size, size=batch_size)
+        # Reject indices whose stack would straddle the write head (stale frames).
+        while True:
+            if self.size < self.capacity:
+                if self.size < self.stack + 2:
+                    raise RuntimeError("buffer too small to sample yet")
+                idx = np.random.randint(self.stack - 1, self.size - 1, size=batch_size)
+                break
+            idx = np.random.randint(0, self.capacity, size=batch_size)
+            dist = (self.idx - 1 - idx) % self.capacity
+            if np.all(dist >= self.stack):
+                break
+        states      = self._stack(idx)
+        next_states = self._stack((idx + 1) % self.capacity)
         return (
-            torch.as_tensor(self.obs[idx], device=device),
+            torch.as_tensor(states, device=device),
             torch.as_tensor(self.actions[idx], device=device),
             torch.as_tensor(self.rewards[idx], device=device),
-            torch.as_tensor(self.next_obs[idx], device=device),
+            torch.as_tensor(next_states, device=device),
             torch.as_tensor(self.dones[idx], device=device),
         )
 
@@ -130,10 +154,12 @@ def greedy_action(obs):
 
     print(f"device: {device},  env: {args.env},  actions: {n_actions}")
 
-    buffer = ReplayBuffer(BUFFER_CAPACITY, env.observation_space.shape)
+    buffer = ReplayBuffer(BUFFER_CAPACITY)
     obs, _ = env.reset()
-    ep_return = 0.0
+    ep_return = 0.0       # accumulates within one life (LifeLossTerminalEnv ends an "episode" per life)
+    game_return = 0.0     # accumulates across all 5 lives until real game-over
     recent_returns = deque(maxlen=20)
+    recent_game_returns = deque(maxlen=20)
     train_step = 0
     last_loss = 0.0
 
@@ -146,18 +172,23 @@ def greedy_action(obs):
         else:
             action = greedy_action(obs)
 
-        next_obs, reward, terminated, truncated, _ = env.step(action)
+        next_obs, reward, terminated, truncated, info = env.step(action)
         done = terminated or truncated
         # Reward clipping (DeepMind standard) — keeps Q-values from blowing up
         # when one game has rewards in tens and another in hundreds.
         clipped = np.sign(reward)
-        buffer.push(np.asarray(obs), action, clipped, np.asarray(next_obs), done)
+        # FrameStack gives (4, 84, 84); store just the newest frame and stack at sample time.
+        buffer.push(np.asarray(obs)[-1], action, clipped, done)
 
         ep_return += reward
+        game_return += reward
         obs = next_obs
         if done:
             recent_returns.append(ep_return)
             ep_return = 0.0
+            if info.get("game_over", True):
+                recent_game_returns.append(game_return)
+                game_return = 0.0
             obs, _ = env.reset()
 
         # Training.
@@ -182,12 +213,14 @@ def greedy_action(obs):
         # Logging.
         if frame % 10_000 == 0:
             mean = float(np.mean(recent_returns)) if recent_returns else 0.0
+            game_mean = float(np.mean(recent_game_returns)) if recent_game_returns else 0.0
             print(f"frame: {frame:>8}  eps: {epsilon(frame):.3f}  "
-                  f"recent_mean_return: {mean:.1f}  buffer: {buffer.size}")
+                  f"per_life: {mean:.1f}  per_game: {game_mean:.1f}  buffer: {buffer.size}")
             if args.wandb:
                 wandb.log({
                     "global_step": frame,
                     "recent_mean_return": mean,
+                    "recent_mean_game_return": game_mean,
                     "epsilon": epsilon(frame),
                     "loss": last_loss,
                     "buffer_size": buffer.size,
diff --git a/3-atari/2-ppo.py b/3-atari/2-ppo.py
index 2542f29..9082858 100644
--- a/3-atari/2-ppo.py
+++ b/3-atari/2-ppo.py
@@ -17,7 +17,7 @@
 
 
 SAVE_PATH = "atari_ppo.pt"
-TOTAL_FRAMES = 5_000_000
+TOTAL_FRAMES = 10_000_000
 N_ENVS = 8
 ROLLOUT_STEPS = 128            # batch = N_ENVS * ROLLOUT_STEPS = 1024
 EPOCHS = 4
@@ -109,10 +109,17 @@ def policy_action(obs):
     frames_per_update = batch_size
     n_updates = TOTAL_FRAMES // frames_per_update
     obs, _ = envs.reset()
-    ep_returns_per_env = np.zeros(N_ENVS, dtype=np.float32)
+    ep_returns_per_env = np.zeros(N_ENVS, dtype=np.float32)    # per-life (resets every life loss)
+    game_returns_per_env = np.zeros(N_ENVS, dtype=np.float32)  # per-game (resets only on real game-over)
     ep_returns = []
+    game_returns = []
 
     for update in range(1, n_updates + 1):
+        # Linear LR anneal from LR -> 0 over the run (CleanRL convention).
+        lr_now = LR * (1.0 - (update - 1) / n_updates)
+        for g in optimizer.param_groups:
+            g["lr"] = lr_now
+
         obs_buf  = np.zeros((ROLLOUT_STEPS, N_ENVS, *obs_shape), dtype=np.uint8)
         act_buf  = np.zeros((ROLLOUT_STEPS, N_ENVS), dtype=np.int64)
         logp_buf = np.zeros((ROLLOUT_STEPS, N_ENVS), dtype=np.float32)
@@ -134,16 +141,22 @@ def policy_action(obs):
             logp_buf[t] = logp.cpu().numpy()
             val_buf[t]  = value.cpu().numpy()
 
-            next_obs, reward, terminated, truncated, _ = envs.step(act_buf[t])
+            next_obs, reward, terminated, truncated, info = envs.step(act_buf[t])
             done = np.logical_or(terminated, truncated)
             ep_returns_per_env += reward
+            game_returns_per_env += reward
             rew_buf[t]  = np.sign(reward).astype(np.float32)  # DeepMind reward clipping
             done_buf[t] = done.astype(np.float32)
 
+            # LifeLossTerminalEnv tags each step's info with game_over (True only on real game-over).
+            game_over = info.get("game_over", done)
             for i in range(N_ENVS):
                 if done[i]:
                     ep_returns.append(float(ep_returns_per_env[i]))
                     ep_returns_per_env[i] = 0.0
+                    if bool(game_over[i]):
+                        game_returns.append(float(game_returns_per_env[i]))
+                        game_returns_per_env[i] = 0.0
             obs = next_obs
 
         # --- GAE ---
@@ -151,12 +164,12 @@ def policy_action(obs):
             obs_t = torch.as_tensor(np.asarray(obs), device=device)
             _, last_value = model(obs_t)
         advantages, returns = compute_gae(rew_buf, val_buf, done_buf, last_value.cpu().numpy())
-        advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
 
         # Flatten (T, N_ENVS, ...) -> (T*N_ENVS, ...)
         obs_t      = torch.as_tensor(obs_buf.reshape(batch_size, *obs_shape), device=device)
         act_t      = torch.as_tensor(act_buf.reshape(batch_size), device=device)
         old_logp_t = torch.as_tensor(logp_buf.reshape(batch_size), device=device)
+        old_val_t  = torch.as_tensor(val_buf.reshape(batch_size), device=device)
         adv_t      = torch.as_tensor(advantages.reshape(batch_size), device=device)
         ret_t      = torch.as_tensor(returns.reshape(batch_size), device=device)
 
@@ -173,11 +186,22 @@ def policy_action(obs):
                 new_logp = dist.log_prob(act_t[mb])
                 entropy = dist.entropy().mean()
 
+                # Advantage normalization per minibatch (CleanRL convention).
+                mb_adv = adv_t[mb]
+                mb_adv = (mb_adv - mb_adv.mean()) / (mb_adv.std() + 1e-8)
+
                 ratio = (new_logp - old_logp_t[mb]).exp()
-                unclipped = ratio * adv_t[mb]
-                clipped = torch.clamp(ratio, 1 - CLIP_COEF, 1 + CLIP_COEF) * adv_t[mb]
+                unclipped = ratio * mb_adv
+                clipped = torch.clamp(ratio, 1 - CLIP_COEF, 1 + CLIP_COEF) * mb_adv
                 policy_loss = -torch.min(unclipped, clipped).mean()
-                value_loss = (values - ret_t[mb]).pow(2).mean()
+
+                # Value loss with clipping around the old value prediction.
+                v_clipped = old_val_t[mb] + torch.clamp(
+                    values - old_val_t[mb], -CLIP_COEF, CLIP_COEF)
+                vl_unclipped = (values - ret_t[mb]).pow(2)
+                vl_clipped   = (v_clipped - ret_t[mb]).pow(2)
+                value_loss = 0.5 * torch.max(vl_unclipped, vl_clipped).mean()
+
                 loss = policy_loss + VALUE_COEF * value_loss - ENTROPY_COEF * entropy
 
                 optimizer.zero_grad()
@@ -192,18 +216,23 @@ def policy_action(obs):
 
         global_step = update * frames_per_update
         if ep_returns:
-            recent = ep_returns[-20:]
+            life_mean = float(np.mean(ep_returns[-20:]))
+            game_mean = float(np.mean(game_returns[-20:])) if game_returns else 0.0
             print(f"update: {update:>4}  frames: {global_step:>8}  "
-                  f"recent_mean_return: {np.mean(recent):.1f}  episodes: {len(ep_returns)}")
+                  f"per_life: {life_mean:.1f}  per_game: {game_mean:.1f}  "
+                  f"lives: {len(ep_returns)}  games: {len(game_returns)}")
         if args.wandb:
             log = {
                 "global_step": global_step,
                 "policy_loss": pl_sum / n_mb,
                 "value_loss": vl_sum / n_mb,
                 "entropy": ent_sum / n_mb,
+                "lr": lr_now,
             }
             if ep_returns:
                 log["recent_mean_return"] = float(np.mean(ep_returns[-20:]))
+            if game_returns:
+                log["recent_mean_game_return"] = float(np.mean(game_returns[-20:]))
             wandb.log(log, step=global_step)
 
     torch.save(model.state_dict(), SAVE_PATH)
diff --git a/3-atari/env.py b/3-atari/env.py
index 2f3d1b9..1fb8513 100644
--- a/3-atari/env.py
+++ b/3-atari/env.py
@@ -29,6 +29,39 @@ def reset(self, **kwargs):
             obs, _ = self.env.reset(**kwargs)
         return obs, {}
 
+
+# Treats each life as its own episode for bootstrapping (so Q-targets / GAE don't
+# value-chain across deaths) but only resets the real game when all lives are
+# gone. Without this, every life loss triggers a full env.reset() — burning
+# frames on noop_max + FIRE and breaking long-horizon credit assignment.
+class LifeLossTerminalEnv(gym.Wrapper):
+    def __init__(self, env):
+        super().__init__(env)
+        self.lives = 0
+        self.game_over = True
+
+    def step(self, action):
+        obs, reward, terminated, truncated, info = self.env.step(action)
+        self.game_over = terminated or truncated
+        lives = info.get("lives", 0)
+        if 0 < lives < self.lives:
+            terminated = True
+        self.lives = lives
+        info["game_over"] = self.game_over
+        return obs, reward, terminated, truncated, info
+
+    def reset(self, **kwargs):
+        if self.game_over:
+            obs, info = self.env.reset(**kwargs)
+        else:
+            # Fake terminal from a life loss — advance one frame instead of
+            # resetting so the game keeps its remaining lives.
+            obs, _, terminated, truncated, info = self.env.step(0)
+            if terminated or truncated:
+                obs, info = self.env.reset(**kwargs)
+        self.lives = info.get("lives", 0)
+        return obs, info
+
 ENV_IDS = {
     "breakout": "ALE/Breakout-v5",
     "pong":     "ALE/Pong-v5",
@@ -61,12 +94,13 @@ def make_env(args):
         noop_max=30,
         frame_skip=4,
         screen_size=84,
-        terminal_on_life_loss=True,
+        terminal_on_life_loss=False,  # handled by LifeLossTerminalEnv below
         grayscale_obs=True,
         scale_obs=False,        # keep uint8; we normalize in the model
     )
     if "FIRE" in env.unwrapped.get_action_meanings():
         env = FireResetEnv(env)
+    env = LifeLossTerminalEnv(env)
     env = gym.wrappers.FrameStackObservation(env, stack_size=4)
     return env
 
diff --git a/README.md b/README.md
index f20c8fa..38e6f50 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,26 @@ From the basics to deep reinforcement learning, this repo provides easy-to-read
 8. A2C  — [`2-a2c.py`](./2-cartpole/2-a2c.py)
 9. PPO  — [`3-ppo.py`](./2-cartpole/3-ppo.py)
 
+**Atari** ([`3-atari/`](./3-atari))
+
+10. DQN  — [`1-dqn.py`](./3-atari/1-dqn.py)
+11. PPO  — [`2-ppo.py`](./3-atari/2-ppo.py)
+
+## Benchmarks
+
+Trained on a **MacBook Pro 14" (Apple M3, 8 GB unified memory)**, macOS 26.2, Python 3.11, PyTorch 2.11 with the MPS backend. CPU / GPU figures are read from Activity Monitor on the `python3.11` process after the run has stabilized (~5 min in); peak RAM is the process's real memory at its high-water mark. Final score is the mean per-game return over the last 20 episodes of training.
+
+### Atari — Breakout (10M agent steps, `ALE/Breakout-v5` with sticky actions)
+
+| Algorithm | Params | Train time | Final mean (per-game) | Peak RAM | CPU% | GPU% | W&B |
+|-----------|--------|------------|-----------------------|----------|------|------|-----|
+| DQN       | 1.69M  | ~9h        | 93.5 ± 9.6            | 5.27 GB  | ~60  | ~55  | [report](https://api.wandb.ai/links/rlcode/ljkn7ahp) |
+| PPO       | 1.69M  | ~3.5h      | _TBD_¹                | 1.98 GB  | ~62  | ~55  | [report](https://api.wandb.ai/links/rlcode/jbdsbn6t) |
+
+> Single seed per row, mean ± std over the final 20 logged episodes. `Params` counts only trainable network weights. `CPU%` is the single-process value reported by Activity Monitor (sum across cores, so >100% means multi-core use); `GPU%` is the same column for the Apple GPU. Sticky actions (`repeat_action_probability=0.25`) make absolute scores lower than the deterministic `*-v4` environments often cited in older papers.
+>
+> ¹ Most recent PPO run predates the `LifeLossTerminalEnv` fix and reports only per-life return (final 20: 27.2 ± 3.2). Per-game number will be filled in after the next training run.
+
 ## Setup
 
 Requires Python 3.11 and [uv](https://docs.astral.sh/uv/).
diff --git a/pyproject.toml b/pyproject.toml
index c23a6a5..b55ebf4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,4 +13,5 @@ dependencies = [
     "pygame>=2.6,<3",
     "opencv-python-headless>=4.13,<4.14",
     "wandb>=0.27.0",
+    "moviepy>=2.2.1",
 ]
diff --git a/uv.lock b/uv.lock
index 3406824..5209fae 100644
--- a/uv.lock
+++ b/uv.lock
@@ -190,6 +190,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 },
 ]
 
+[[package]]
+name = "decorator"
+version = "5.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/60/8b/32f9823da46cde7df2087faa08cd98d01b908f8dcab982cdba9c84e85355/decorator-5.3.1.tar.gz", hash = "sha256:4cbcdd55a6efadb9dbea26b858f4fb3264567b52d69ca0d25b721b553f60ea82", size = 58084 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/7f/798705f5296a58ca505d600456748d1be48078eac8a7050d8a98bc9edb89/decorator-5.3.1-py3-none-any.whl", hash = "sha256:f47fe6fdbd2edd623ecfe36875d37aba411624e2670dd395dddae1358689bb3c", size = 10365 },
+]
+
 [[package]]
 name = "farama-notifications"
 version = "0.0.6"
@@ -287,6 +296,33 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340 },
 ]
 
+[[package]]
+name = "imageio"
+version = "2.37.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "pillow" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/84/93bcd1300216ea50811cee96873b84a1bebf8d0489ffaf7f2a3756bab866/imageio-2.37.3.tar.gz", hash = "sha256:bbb37efbfc4c400fcd534b367b91fcd66d5da639aaa138034431a1c5e0a41451", size = 389673 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646 },
+]
+
+[[package]]
+name = "imageio-ffmpeg"
+version = "0.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/44/bd/c3343c721f2a1b0c9fc71c1aebf1966a3b7f08c2eea8ed5437a2865611d6/imageio_ffmpeg-0.6.0.tar.gz", hash = "sha256:e2556bed8e005564a9f925bb7afa4002d82770d6b08825078b7697ab88ba1755", size = 25210 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/58/87ef68ac83f4c7690961bce288fd8e382bc5f1513860fc7f90a9c1c1c6bf/imageio_ffmpeg-0.6.0-py3-none-macosx_10_9_intel.macosx_10_9_x86_64.whl", hash = "sha256:9d2baaf867088508d4a3458e61eeb30e945c4ad8016025545f66c4b5aaef0a61", size = 24932969 },
+    { url = "https://files.pythonhosted.org/packages/40/5c/f3d8a657d362cc93b81aab8feda487317da5b5d31c0e1fdfd5e986e55d17/imageio_ffmpeg-0.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b1ae3173414b5fc5f538a726c4e48ea97edc0d2cdc11f103afee655c463fa742", size = 21113891 },
+    { url = "https://files.pythonhosted.org/packages/33/e7/1925bfbc563c39c1d2e82501d8372734a5c725e53ac3b31b4c2d081e895b/imageio_ffmpeg-0.6.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1d47bebd83d2c5fc770720d211855f208af8a596c82d17730aa51e815cdee6dc", size = 25632706 },
+    { url = "https://files.pythonhosted.org/packages/a0/2d/43c8522a2038e9d0e7dbdf3a61195ecc31ca576fb1527a528c877e87d973/imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c7e46fcec401dd990405049d2e2f475e2b397779df2519b544b8aab515195282", size = 29498237 },
+    { url = "https://files.pythonhosted.org/packages/a0/13/59da54728351883c3c1d9fca1710ab8eee82c7beba585df8f25ca925f08f/imageio_ffmpeg-0.6.0-py3-none-win32.whl", hash = "sha256:196faa79366b4a82f95c0f4053191d2013f4714a715780f0ad2a68ff37483cc2", size = 19652251 },
+    { url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824 },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -375,6 +411,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6f/87/afead29192170917537934c6aff4b008c805fff7b1ccea0c79120d96beda/matplotlib-3.10.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3fc0364dfbe1d07f6d15c5ebd0c5bf89e126916e5a8667dd4a7a6e84c36653d4", size = 8774002 },
 ]
 
+[[package]]
+name = "moviepy"
+version = "2.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "decorator" },
+    { name = "imageio" },
+    { name = "imageio-ffmpeg" },
+    { name = "numpy" },
+    { name = "pillow" },
+    { name = "proglog" },
+    { name = "python-dotenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/de/61/15f9476e270f64c78a834e7459ca045d669f869cec24eed26807b8cd479d/moviepy-2.2.1.tar.gz", hash = "sha256:c80cb56815ece94e5e3e2d361aa40070eeb30a09d23a24c4e684d03e16deacb1", size = 58431438 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871 },
+]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -623,6 +677,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348 },
 ]
 
+[[package]]
+name = "proglog"
+version = "0.1.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "tqdm" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c2/af/c108866c452eda1132f3d6b3cb6be2ae8430c97e9309f38ca9dbd430af37/proglog-0.1.12.tar.gz", hash = "sha256:361ee074721c277b89b75c061336cb8c5f287c92b043efa562ccf7866cda931c", size = 8794 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/1b/f7ea6cde25621cd9236541c66ff018f4268012a534ec31032bcb187dc5e7/proglog-0.1.12-py3-none-any.whl", hash = "sha256:ccaafce51e80a81c65dc907a460c07ccb8ec1f78dc660cfd8f9ec3a22f01b84c", size = 6337 },
+]
+
 [[package]]
 name = "protobuf"
 version = "7.34.1"
@@ -727,6 +793,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
 ]
 
+[[package]]
+name = "python-dotenv"
+version = "1.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101 },
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -752,6 +827,7 @@ dependencies = [
     { name = "ale-py" },
     { name = "gymnasium", extra = ["atari"] },
     { name = "matplotlib" },
+    { name = "moviepy" },
     { name = "numpy" },
     { name = "opencv-python-headless" },
     { name = "pygame" },
@@ -765,6 +841,7 @@ requires-dist = [
     { name = "ale-py", specifier = ">=0.11,<0.12" },
     { name = "gymnasium", extras = ["atari"], specifier = ">=1.2,<1.3" },
     { name = "matplotlib", specifier = ">=3.10,<3.11" },
+    { name = "moviepy", specifier = ">=2.2.1" },
     { name = "numpy", specifier = ">=2.3,<2.4" },
     { name = "opencv-python-headless", specifier = ">=4.13,<4.14" },
     { name = "pygame", specifier = ">=2.6,<3" },
@@ -883,6 +960,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/58/ed8f7754299f3e91d6414b6dc09f62b3fa7c6e5d63dfe48d69ab81498a37/torchvision-0.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:de6424b12887ad884f39a0ee446994ae3cd3b6a00a9cafe1bead85a031132af0", size = 3983834 },
 ]
 
+[[package]]
+name = "tqdm"
+version = "4.67.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374 },
+]
+
 [[package]]
 name = "triton"
 version = "3.6.0"