diff --git a/.gitignore b/.gitignore index ba948a5..b0ba7c2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,10 @@ __pycache__ .python-version *.pt wandb/ -./Code 2. Cartpole/6. A3C/Cartpole_A3C.pgy \ No newline at end of file +logs/ +./Code 2. Cartpole/6. A3C/Cartpole_A3C.pgy +# Local scratch scripts +scripts/ + +# Local-only docs (not for github) +docs/ diff --git a/3-atari/1-dqn.py b/3-atari/1-dqn.py index 2ed9132..9db4fce 100644 --- a/3-atari/1-dqn.py +++ b/3-atari/1-dqn.py @@ -11,7 +11,6 @@ serious training. """ import random -import sys from collections import deque import numpy as np @@ -23,17 +22,17 @@ SAVE_PATH = "atari_dqn.pt" -TOTAL_FRAMES = 1_000_000 # bump to ~10M for paper-quality results -BUFFER_CAPACITY = 100_000 # bump to 1M with enough RAM -BATCH_SIZE = 64 +TOTAL_FRAMES = 10_000_000 # Nature uses 50M agent steps; 10M is laptop-friendly +BUFFER_CAPACITY = 500_000 # ~3.5GB RAM (uint8, single frames stacked at sample time); sized for 8GB Macs +BATCH_SIZE = 32 GAMMA = 0.99 LR = 1e-4 -LEARN_START = 10_000 # frames of pure exploration before training begins +LEARN_START = 80_000 # frames of pure exploration before training begins TRAIN_EVERY = 4 -TARGET_UPDATE_EVERY = 1_000 # in training steps, not env steps +TARGET_UPDATE_EVERY = 250 # in training steps, not env steps (~1k env frames) EPSILON_START = 1.0 -EPSILON_END = 0.05 -EPSILON_DECAY_FRAMES = 250_000 # linear decay from start to end over this many frames +EPSILON_END = 0.01 +EPSILON_DECAY_FRAMES = 1_000_000 # linear decay from start to end over this many frames # Standard Nature CNN. @@ -57,34 +56,59 @@ def forward(self, x): class ReplayBuffer: - """Uint8 replay buffer — far more memory-efficient than storing floats.""" + """Single-frame uint8 buffer — stacks of 4 are reconstructed at sample time, + cutting RAM ~4x vs. storing the full stack per slot.""" - def __init__(self, capacity, obs_shape): + def __init__(self, capacity, frame_shape=(84, 84), stack=4): self.capacity = capacity - self.obs = np.zeros((capacity, *obs_shape), dtype=np.uint8) - self.next_obs = np.zeros((capacity, *obs_shape), dtype=np.uint8) - self.actions = np.zeros(capacity, dtype=np.int64) - self.rewards = np.zeros(capacity, dtype=np.float32) - self.dones = np.zeros(capacity, dtype=np.float32) + self.stack = stack + self.frames = np.zeros((capacity, *frame_shape), dtype=np.uint8) + self.actions = np.zeros(capacity, dtype=np.int64) + self.rewards = np.zeros(capacity, dtype=np.float32) + self.dones = np.zeros(capacity, dtype=np.float32) self.idx = 0 self.size = 0 - def push(self, obs, action, reward, next_obs, done): - self.obs[self.idx] = obs + def push(self, frame, action, reward, done): + self.frames[self.idx] = frame self.actions[self.idx] = action self.rewards[self.idx] = reward - self.next_obs[self.idx] = next_obs self.dones[self.idx] = float(done) self.idx = (self.idx + 1) % self.capacity self.size = min(self.size + 1, self.capacity) + def _stack(self, idx): + # Gather frames[idx-stack+1 .. idx]; newest at last channel. + offsets = np.arange(self.stack) + gather = (idx[:, None] - (self.stack - 1) + offsets[None, :]) % self.capacity + out = self.frames[gather] + # Zero out frames sitting before an episode boundary inside the stack. + # dones at the (stack-1) older positions mark where a prior episode ended. + older = self.dones[gather[:, :-1]].astype(bool) + # Once we cross any done walking newest→oldest, everything older is invalid. + invalid = np.cumsum(older[:, ::-1], axis=1)[:, ::-1] > 0 + mask = np.concatenate([~invalid, np.ones((idx.shape[0], 1), dtype=bool)], axis=1) + return out * mask[:, :, None, None] + def sample(self, batch_size, device): - idx = np.random.randint(0, self.size, size=batch_size) + # Reject indices whose stack would straddle the write head (stale frames). + while True: + if self.size < self.capacity: + if self.size < self.stack + 2: + raise RuntimeError("buffer too small to sample yet") + idx = np.random.randint(self.stack - 1, self.size - 1, size=batch_size) + break + idx = np.random.randint(0, self.capacity, size=batch_size) + dist = (self.idx - 1 - idx) % self.capacity + if np.all(dist >= self.stack): + break + states = self._stack(idx) + next_states = self._stack((idx + 1) % self.capacity) return ( - torch.as_tensor(self.obs[idx], device=device), + torch.as_tensor(states, device=device), torch.as_tensor(self.actions[idx], device=device), torch.as_tensor(self.rewards[idx], device=device), - torch.as_tensor(self.next_obs[idx], device=device), + torch.as_tensor(next_states, device=device), torch.as_tensor(self.dones[idx], device=device), ) @@ -130,10 +154,12 @@ def greedy_action(obs): print(f"device: {device}, env: {args.env}, actions: {n_actions}") - buffer = ReplayBuffer(BUFFER_CAPACITY, env.observation_space.shape) + buffer = ReplayBuffer(BUFFER_CAPACITY) obs, _ = env.reset() - ep_return = 0.0 + ep_return = 0.0 # accumulates within one life (LifeLossTerminalEnv ends an "episode" per life) + game_return = 0.0 # accumulates across all 5 lives until real game-over recent_returns = deque(maxlen=20) + recent_game_returns = deque(maxlen=20) train_step = 0 last_loss = 0.0 @@ -146,18 +172,23 @@ def greedy_action(obs): else: action = greedy_action(obs) - next_obs, reward, terminated, truncated, _ = env.step(action) + next_obs, reward, terminated, truncated, info = env.step(action) done = terminated or truncated # Reward clipping (DeepMind standard) — keeps Q-values from blowing up # when one game has rewards in tens and another in hundreds. clipped = np.sign(reward) - buffer.push(np.asarray(obs), action, clipped, np.asarray(next_obs), done) + # FrameStack gives (4, 84, 84); store just the newest frame and stack at sample time. + buffer.push(np.asarray(obs)[-1], action, clipped, done) ep_return += reward + game_return += reward obs = next_obs if done: recent_returns.append(ep_return) ep_return = 0.0 + if info.get("game_over", True): + recent_game_returns.append(game_return) + game_return = 0.0 obs, _ = env.reset() # Training. @@ -182,12 +213,14 @@ def greedy_action(obs): # Logging. if frame % 10_000 == 0: mean = float(np.mean(recent_returns)) if recent_returns else 0.0 + game_mean = float(np.mean(recent_game_returns)) if recent_game_returns else 0.0 print(f"frame: {frame:>8} eps: {epsilon(frame):.3f} " - f"recent_mean_return: {mean:.1f} buffer: {buffer.size}") + f"per_life: {mean:.1f} per_game: {game_mean:.1f} buffer: {buffer.size}") if args.wandb: wandb.log({ "global_step": frame, "recent_mean_return": mean, + "recent_mean_game_return": game_mean, "epsilon": epsilon(frame), "loss": last_loss, "buffer_size": buffer.size, diff --git a/3-atari/2-ppo.py b/3-atari/2-ppo.py index 2542f29..9082858 100644 --- a/3-atari/2-ppo.py +++ b/3-atari/2-ppo.py @@ -17,7 +17,7 @@ SAVE_PATH = "atari_ppo.pt" -TOTAL_FRAMES = 5_000_000 +TOTAL_FRAMES = 10_000_000 N_ENVS = 8 ROLLOUT_STEPS = 128 # batch = N_ENVS * ROLLOUT_STEPS = 1024 EPOCHS = 4 @@ -109,10 +109,17 @@ def policy_action(obs): frames_per_update = batch_size n_updates = TOTAL_FRAMES // frames_per_update obs, _ = envs.reset() - ep_returns_per_env = np.zeros(N_ENVS, dtype=np.float32) + ep_returns_per_env = np.zeros(N_ENVS, dtype=np.float32) # per-life (resets every life loss) + game_returns_per_env = np.zeros(N_ENVS, dtype=np.float32) # per-game (resets only on real game-over) ep_returns = [] + game_returns = [] for update in range(1, n_updates + 1): + # Linear LR anneal from LR -> 0 over the run (CleanRL convention). + lr_now = LR * (1.0 - (update - 1) / n_updates) + for g in optimizer.param_groups: + g["lr"] = lr_now + obs_buf = np.zeros((ROLLOUT_STEPS, N_ENVS, *obs_shape), dtype=np.uint8) act_buf = np.zeros((ROLLOUT_STEPS, N_ENVS), dtype=np.int64) logp_buf = np.zeros((ROLLOUT_STEPS, N_ENVS), dtype=np.float32) @@ -134,16 +141,22 @@ def policy_action(obs): logp_buf[t] = logp.cpu().numpy() val_buf[t] = value.cpu().numpy() - next_obs, reward, terminated, truncated, _ = envs.step(act_buf[t]) + next_obs, reward, terminated, truncated, info = envs.step(act_buf[t]) done = np.logical_or(terminated, truncated) ep_returns_per_env += reward + game_returns_per_env += reward rew_buf[t] = np.sign(reward).astype(np.float32) # DeepMind reward clipping done_buf[t] = done.astype(np.float32) + # LifeLossTerminalEnv tags each step's info with game_over (True only on real game-over). + game_over = info.get("game_over", done) for i in range(N_ENVS): if done[i]: ep_returns.append(float(ep_returns_per_env[i])) ep_returns_per_env[i] = 0.0 + if bool(game_over[i]): + game_returns.append(float(game_returns_per_env[i])) + game_returns_per_env[i] = 0.0 obs = next_obs # --- GAE --- @@ -151,12 +164,12 @@ def policy_action(obs): obs_t = torch.as_tensor(np.asarray(obs), device=device) _, last_value = model(obs_t) advantages, returns = compute_gae(rew_buf, val_buf, done_buf, last_value.cpu().numpy()) - advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) # Flatten (T, N_ENVS, ...) -> (T*N_ENVS, ...) obs_t = torch.as_tensor(obs_buf.reshape(batch_size, *obs_shape), device=device) act_t = torch.as_tensor(act_buf.reshape(batch_size), device=device) old_logp_t = torch.as_tensor(logp_buf.reshape(batch_size), device=device) + old_val_t = torch.as_tensor(val_buf.reshape(batch_size), device=device) adv_t = torch.as_tensor(advantages.reshape(batch_size), device=device) ret_t = torch.as_tensor(returns.reshape(batch_size), device=device) @@ -173,11 +186,22 @@ def policy_action(obs): new_logp = dist.log_prob(act_t[mb]) entropy = dist.entropy().mean() + # Advantage normalization per minibatch (CleanRL convention). + mb_adv = adv_t[mb] + mb_adv = (mb_adv - mb_adv.mean()) / (mb_adv.std() + 1e-8) + ratio = (new_logp - old_logp_t[mb]).exp() - unclipped = ratio * adv_t[mb] - clipped = torch.clamp(ratio, 1 - CLIP_COEF, 1 + CLIP_COEF) * adv_t[mb] + unclipped = ratio * mb_adv + clipped = torch.clamp(ratio, 1 - CLIP_COEF, 1 + CLIP_COEF) * mb_adv policy_loss = -torch.min(unclipped, clipped).mean() - value_loss = (values - ret_t[mb]).pow(2).mean() + + # Value loss with clipping around the old value prediction. + v_clipped = old_val_t[mb] + torch.clamp( + values - old_val_t[mb], -CLIP_COEF, CLIP_COEF) + vl_unclipped = (values - ret_t[mb]).pow(2) + vl_clipped = (v_clipped - ret_t[mb]).pow(2) + value_loss = 0.5 * torch.max(vl_unclipped, vl_clipped).mean() + loss = policy_loss + VALUE_COEF * value_loss - ENTROPY_COEF * entropy optimizer.zero_grad() @@ -192,18 +216,23 @@ def policy_action(obs): global_step = update * frames_per_update if ep_returns: - recent = ep_returns[-20:] + life_mean = float(np.mean(ep_returns[-20:])) + game_mean = float(np.mean(game_returns[-20:])) if game_returns else 0.0 print(f"update: {update:>4} frames: {global_step:>8} " - f"recent_mean_return: {np.mean(recent):.1f} episodes: {len(ep_returns)}") + f"per_life: {life_mean:.1f} per_game: {game_mean:.1f} " + f"lives: {len(ep_returns)} games: {len(game_returns)}") if args.wandb: log = { "global_step": global_step, "policy_loss": pl_sum / n_mb, "value_loss": vl_sum / n_mb, "entropy": ent_sum / n_mb, + "lr": lr_now, } if ep_returns: log["recent_mean_return"] = float(np.mean(ep_returns[-20:])) + if game_returns: + log["recent_mean_game_return"] = float(np.mean(game_returns[-20:])) wandb.log(log, step=global_step) torch.save(model.state_dict(), SAVE_PATH) diff --git a/3-atari/env.py b/3-atari/env.py index 2f3d1b9..1fb8513 100644 --- a/3-atari/env.py +++ b/3-atari/env.py @@ -29,6 +29,39 @@ def reset(self, **kwargs): obs, _ = self.env.reset(**kwargs) return obs, {} + +# Treats each life as its own episode for bootstrapping (so Q-targets / GAE don't +# value-chain across deaths) but only resets the real game when all lives are +# gone. Without this, every life loss triggers a full env.reset() — burning +# frames on noop_max + FIRE and breaking long-horizon credit assignment. +class LifeLossTerminalEnv(gym.Wrapper): + def __init__(self, env): + super().__init__(env) + self.lives = 0 + self.game_over = True + + def step(self, action): + obs, reward, terminated, truncated, info = self.env.step(action) + self.game_over = terminated or truncated + lives = info.get("lives", 0) + if 0 < lives < self.lives: + terminated = True + self.lives = lives + info["game_over"] = self.game_over + return obs, reward, terminated, truncated, info + + def reset(self, **kwargs): + if self.game_over: + obs, info = self.env.reset(**kwargs) + else: + # Fake terminal from a life loss — advance one frame instead of + # resetting so the game keeps its remaining lives. + obs, _, terminated, truncated, info = self.env.step(0) + if terminated or truncated: + obs, info = self.env.reset(**kwargs) + self.lives = info.get("lives", 0) + return obs, info + ENV_IDS = { "breakout": "ALE/Breakout-v5", "pong": "ALE/Pong-v5", @@ -61,12 +94,13 @@ def make_env(args): noop_max=30, frame_skip=4, screen_size=84, - terminal_on_life_loss=True, + terminal_on_life_loss=False, # handled by LifeLossTerminalEnv below grayscale_obs=True, scale_obs=False, # keep uint8; we normalize in the model ) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) + env = LifeLossTerminalEnv(env) env = gym.wrappers.FrameStackObservation(env, stack_size=4) return env diff --git a/README.md b/README.md index f20c8fa..38e6f50 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,26 @@ From the basics to deep reinforcement learning, this repo provides easy-to-read 8. A2C — [`2-a2c.py`](./2-cartpole/2-a2c.py) 9. PPO — [`3-ppo.py`](./2-cartpole/3-ppo.py) +**Atari** ([`3-atari/`](./3-atari)) + +10. DQN — [`1-dqn.py`](./3-atari/1-dqn.py) +11. PPO — [`2-ppo.py`](./3-atari/2-ppo.py) + +## Benchmarks + +Trained on a **MacBook Pro 14" (Apple M3, 8 GB unified memory)**, macOS 26.2, Python 3.11, PyTorch 2.11 with the MPS backend. CPU / GPU figures are read from Activity Monitor on the `python3.11` process after the run has stabilized (~5 min in); peak RAM is the process's real memory at its high-water mark. Final score is the mean per-game return over the last 20 episodes of training. + +### Atari — Breakout (10M agent steps, `ALE/Breakout-v5` with sticky actions) + +| Algorithm | Params | Train time | Final mean (per-game) | Peak RAM | CPU% | GPU% | W&B | +|-----------|--------|------------|-----------------------|----------|------|------|-----| +| DQN | 1.69M | ~9h | 93.5 ± 9.6 | 5.27 GB | ~60 | ~55 | [report](https://api.wandb.ai/links/rlcode/ljkn7ahp) | +| PPO | 1.69M | ~3.5h | _TBD_¹ | 1.98 GB | ~62 | ~55 | [report](https://api.wandb.ai/links/rlcode/jbdsbn6t) | + +> Single seed per row, mean ± std over the final 20 logged episodes. `Params` counts only trainable network weights. `CPU%` is the single-process value reported by Activity Monitor (sum across cores, so >100% means multi-core use); `GPU%` is the same column for the Apple GPU. Sticky actions (`repeat_action_probability=0.25`) make absolute scores lower than the deterministic `*-v4` environments often cited in older papers. +> +> ¹ Most recent PPO run predates the `LifeLossTerminalEnv` fix and reports only per-life return (final 20: 27.2 ± 3.2). Per-game number will be filled in after the next training run. + ## Setup Requires Python 3.11 and [uv](https://docs.astral.sh/uv/). diff --git a/pyproject.toml b/pyproject.toml index c23a6a5..b55ebf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,4 +13,5 @@ dependencies = [ "pygame>=2.6,<3", "opencv-python-headless>=4.13,<4.14", "wandb>=0.27.0", + "moviepy>=2.2.1", ] diff --git a/uv.lock b/uv.lock index 3406824..5209fae 100644 --- a/uv.lock +++ b/uv.lock @@ -190,6 +190,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 }, ] +[[package]] +name = "decorator" +version = "5.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/60/8b/32f9823da46cde7df2087faa08cd98d01b908f8dcab982cdba9c84e85355/decorator-5.3.1.tar.gz", hash = "sha256:4cbcdd55a6efadb9dbea26b858f4fb3264567b52d69ca0d25b721b553f60ea82", size = 58084 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/7f/798705f5296a58ca505d600456748d1be48078eac8a7050d8a98bc9edb89/decorator-5.3.1-py3-none-any.whl", hash = "sha256:f47fe6fdbd2edd623ecfe36875d37aba411624e2670dd395dddae1358689bb3c", size = 10365 }, +] + [[package]] name = "farama-notifications" version = "0.0.6" @@ -287,6 +296,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340 }, ] +[[package]] +name = "imageio" +version = "2.37.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/84/93bcd1300216ea50811cee96873b84a1bebf8d0489ffaf7f2a3756bab866/imageio-2.37.3.tar.gz", hash = "sha256:bbb37efbfc4c400fcd534b367b91fcd66d5da639aaa138034431a1c5e0a41451", size = 389673 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646 }, +] + +[[package]] +name = "imageio-ffmpeg" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/bd/c3343c721f2a1b0c9fc71c1aebf1966a3b7f08c2eea8ed5437a2865611d6/imageio_ffmpeg-0.6.0.tar.gz", hash = "sha256:e2556bed8e005564a9f925bb7afa4002d82770d6b08825078b7697ab88ba1755", size = 25210 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/58/87ef68ac83f4c7690961bce288fd8e382bc5f1513860fc7f90a9c1c1c6bf/imageio_ffmpeg-0.6.0-py3-none-macosx_10_9_intel.macosx_10_9_x86_64.whl", hash = "sha256:9d2baaf867088508d4a3458e61eeb30e945c4ad8016025545f66c4b5aaef0a61", size = 24932969 }, + { url = "https://files.pythonhosted.org/packages/40/5c/f3d8a657d362cc93b81aab8feda487317da5b5d31c0e1fdfd5e986e55d17/imageio_ffmpeg-0.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b1ae3173414b5fc5f538a726c4e48ea97edc0d2cdc11f103afee655c463fa742", size = 21113891 }, + { url = "https://files.pythonhosted.org/packages/33/e7/1925bfbc563c39c1d2e82501d8372734a5c725e53ac3b31b4c2d081e895b/imageio_ffmpeg-0.6.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1d47bebd83d2c5fc770720d211855f208af8a596c82d17730aa51e815cdee6dc", size = 25632706 }, + { url = "https://files.pythonhosted.org/packages/a0/2d/43c8522a2038e9d0e7dbdf3a61195ecc31ca576fb1527a528c877e87d973/imageio_ffmpeg-0.6.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c7e46fcec401dd990405049d2e2f475e2b397779df2519b544b8aab515195282", size = 29498237 }, + { url = "https://files.pythonhosted.org/packages/a0/13/59da54728351883c3c1d9fca1710ab8eee82c7beba585df8f25ca925f08f/imageio_ffmpeg-0.6.0-py3-none-win32.whl", hash = "sha256:196faa79366b4a82f95c0f4053191d2013f4714a715780f0ad2a68ff37483cc2", size = 19652251 }, + { url = "https://files.pythonhosted.org/packages/2c/c6/fa760e12a2483469e2bf5058c5faff664acf66cadb4df2ad6205b016a73d/imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02fa47c83703c37df6bfe4896aab339013f62bf02c5ebf2dce6da56af04ffc0a", size = 31246824 }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -375,6 +411,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/87/afead29192170917537934c6aff4b008c805fff7b1ccea0c79120d96beda/matplotlib-3.10.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3fc0364dfbe1d07f6d15c5ebd0c5bf89e126916e5a8667dd4a7a6e84c36653d4", size = 8774002 }, ] +[[package]] +name = "moviepy" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "decorator" }, + { name = "imageio" }, + { name = "imageio-ffmpeg" }, + { name = "numpy" }, + { name = "pillow" }, + { name = "proglog" }, + { name = "python-dotenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/de/61/15f9476e270f64c78a834e7459ca045d669f869cec24eed26807b8cd479d/moviepy-2.2.1.tar.gz", hash = "sha256:c80cb56815ece94e5e3e2d361aa40070eeb30a09d23a24c4e684d03e16deacb1", size = 58431438 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/73/7d3b2010baa0b5eb1e4dfa9e4385e89b6716be76f2fa21a6c0fe34b68e5a/moviepy-2.2.1-py3-none-any.whl", hash = "sha256:6b56803fec2ac54b557404126ac1160e65448e03798fa282bd23e8fab3795060", size = 129871 }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -623,6 +677,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/75/a6/a0a304dc33b49145b21f4808d763822111e67d1c3a32b524a1baf947b6e1/platformdirs-4.9.6-py3-none-any.whl", hash = "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917", size = 21348 }, ] +[[package]] +name = "proglog" +version = "0.1.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/af/c108866c452eda1132f3d6b3cb6be2ae8430c97e9309f38ca9dbd430af37/proglog-0.1.12.tar.gz", hash = "sha256:361ee074721c277b89b75c061336cb8c5f287c92b043efa562ccf7866cda931c", size = 8794 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/1b/f7ea6cde25621cd9236541c66ff018f4268012a534ec31032bcb187dc5e7/proglog-0.1.12-py3-none-any.whl", hash = "sha256:ccaafce51e80a81c65dc907a460c07ccb8ec1f78dc660cfd8f9ec3a22f01b84c", size = 6337 }, +] + [[package]] name = "protobuf" version = "7.34.1" @@ -727,6 +793,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, ] +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101 }, +] + [[package]] name = "pyyaml" version = "6.0.3" @@ -752,6 +827,7 @@ dependencies = [ { name = "ale-py" }, { name = "gymnasium", extra = ["atari"] }, { name = "matplotlib" }, + { name = "moviepy" }, { name = "numpy" }, { name = "opencv-python-headless" }, { name = "pygame" }, @@ -765,6 +841,7 @@ requires-dist = [ { name = "ale-py", specifier = ">=0.11,<0.12" }, { name = "gymnasium", extras = ["atari"], specifier = ">=1.2,<1.3" }, { name = "matplotlib", specifier = ">=3.10,<3.11" }, + { name = "moviepy", specifier = ">=2.2.1" }, { name = "numpy", specifier = ">=2.3,<2.4" }, { name = "opencv-python-headless", specifier = ">=4.13,<4.14" }, { name = "pygame", specifier = ">=2.6,<3" }, @@ -883,6 +960,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/58/ed8f7754299f3e91d6414b6dc09f62b3fa7c6e5d63dfe48d69ab81498a37/torchvision-0.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:de6424b12887ad884f39a0ee446994ae3cd3b6a00a9cafe1bead85a031132af0", size = 3983834 }, ] +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374 }, +] + [[package]] name = "triton" version = "3.6.0"