From efcf563e4af762393f3c747da3f791a133789fe8 Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Wed, 12 Feb 2025 13:43:04 -0800
Subject: [PATCH 1/9] Updated`memmap_dtype` to uint32, added discord and
 playground to readme

---
 README.md                   | 12 +++++++++---
 olmo/config.py              |  2 +-
 olmo/data/memmap_dataset.py |  2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 76538ecaa..dc4d2934a 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,12 @@
   <a href="https://arxiv.org/pdf/2501.00656.pdf">
     <img alt="Paper URL" src="https://img.shields.io/badge/arxiv-2402.00838-blue">
   </a>
+  <a href="https://playground.allenai.org">
+    <img alt="Playground" src="https://img.shields.io/badge/Ai2-Playground-F0529C">
+  </a>
+  <a href="https://discord.gg/sZq3jTNVNG">
+    <img alt="Discord" src="https://img.shields.io/badge/Discord%20-%20blue?style=flat&logo=discord&label=Ai2&color=%235B65E9">
+  </a>
 </p>
 
 OLMo is a repository for training and using AI2's state-of-the-art open language models. It is designed by scientists, for scientists.
@@ -46,8 +52,8 @@ You can find *all* the checkpoints, at minimum every 1000 training steps in OLMo
 
 | Variant          | OLMo Format                                                                                          | Hugging Face Format                                                               |
 |------------------|-----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------|
-| **OLMo 7B**      | [OLMo 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv)       | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B)  |
-| **OLMo 13B**     | [OLMo 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv)     | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
+| **OLMo-2 7B**      | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv)       | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B)  |
+| **OLMo-2 13B**     | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv)     | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
 
 ### Steps to reproduce
 
@@ -81,7 +87,7 @@ Example:
 ```bash
 python scripts/train.py configs/tiny/OLMo-20M.yaml --save_overwrite
 ```
-Note: You need to upgrade PyTorch to 2.5.x to run.
+Note: You need to upgrade PyTorch to 2.5.x to run. OLMo-2-1124 uses `uint32` for `memmap_dtype`, whereas OLMo-0724 uses `uint16`.
 
 ### Stage 1
 
diff --git a/olmo/config.py b/olmo/config.py
index 6da7dc03d..e67455d96 100644
--- a/olmo/config.py
+++ b/olmo/config.py
@@ -608,7 +608,7 @@ class InstanceFilterConfig(BaseConfig):
 @dataclass
 class DataConfig(BaseConfig):
     paths: Optional[List[str]] = None
-    memmap_dtype: str = "uint16"
+    memmap_dtype: str = "uint32"
     datasets: Optional[Dict[str, List[str]]] = None
     label_mask_paths: Optional[List[str]] = None
     pad_direction: PaddingDirection = PaddingDirection.right
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 901bc5703..28d6b887b 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -47,7 +47,7 @@ def __init__(
         self,
         *paths: PathOrStr,
         chunk_size: int = 1024,
-        memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint16,
+        memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint32,
         metadata: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
         include_instance_metadata: bool = True,
         generate_attention_mask: bool = False,

From 4d84e59ba20814e148cd738980f3a573166ff2d6 Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Wed, 12 Feb 2025 14:00:54 -0800
Subject: [PATCH 2/9] fixed changelog and pytest

---
 CHANGELOG.md                      |  4 ++++
 tests/data/memmap_dataset_test.py | 22 +++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dc77f10c4..6ba69bc19 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - MPS support
 
+### Changed
+
+- Updated `memmap_dtype` to `uint32` for compatibility with OLMo-2-1124.
+
 ## [v0.6.0](https://github.com/allenai/OLMo/releases/tag/v0.6.0) - 2024-12-17
 
 ### Added
diff --git a/tests/data/memmap_dataset_test.py b/tests/data/memmap_dataset_test.py
index e267043ee..ead2e43cf 100644
--- a/tests/data/memmap_dataset_test.py
+++ b/tests/data/memmap_dataset_test.py
@@ -8,12 +8,12 @@
 
 
 def test_mmap_dataset(tmp_path: Path):
-    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
-    mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
+    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
+    mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
     mmap1.flush()
 
-    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
-    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
+    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
+    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
     mmap2.flush()
 
     ds = MemMapDataset(tmp_path / "mmap1.npy", tmp_path / "mmap2.npy", chunk_size=4)
@@ -23,8 +23,8 @@ def test_mmap_dataset(tmp_path: Path):
 
 
 def test_mmap_dataset_with_label_mask(tmp_path: Path):
-    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
-    mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
+    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
+    mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
     mmap1.flush()
 
     mask1 = [True] * 16
@@ -33,8 +33,8 @@ def test_mmap_dataset_with_label_mask(tmp_path: Path):
     mask_mmap1[:] = np.array(mask1, dtype=np.bool_)
     mask_mmap1.flush()
 
-    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
-    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
+    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
+    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
     mmap2.flush()
 
     mask2 = [True] * 16
@@ -66,7 +66,7 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
 
     # Write tokens to memory-mapped array.
     tokens_fname = tmp_path / "tokens.npy"
-    mmap = np.memmap(tokens_fname, dtype=np.uint16, mode="w+", shape=(len(all_token_ids),))
+    mmap = np.memmap(tokens_fname, dtype=np.uint32, mode="w+", shape=(len(all_token_ids),))
     mmap[:] = all_token_ids
     mmap.flush()
     del mmap
@@ -83,10 +83,10 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
 
 def test_concat_mmap_datasets(tmp_path: Path):
     # Write some data to disk.
-    mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint16, mode="w+", shape=(16,))
+    mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint32, mode="w+", shape=(16,))
     mmap1[:] = list(range(16))
     mmap1.flush()
-    mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint16, mode="w+", shape=(8,))
+    mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint32, mode="w+", shape=(8,))
     mmap2[:] = list(range(8))
     mmap2.flush()
     del mmap1, mmap2

From 376eb297f3fe8d1610eb3d603f5d947e93167261 Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Fri, 14 Feb 2025 15:54:08 -0800
Subject: [PATCH 3/9] restored old changes

---
 olmo/config.py                    |  2 +-
 tests/data/memmap_dataset_test.py | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/olmo/config.py b/olmo/config.py
index e67455d96..6da7dc03d 100644
--- a/olmo/config.py
+++ b/olmo/config.py
@@ -608,7 +608,7 @@ class InstanceFilterConfig(BaseConfig):
 @dataclass
 class DataConfig(BaseConfig):
     paths: Optional[List[str]] = None
-    memmap_dtype: str = "uint32"
+    memmap_dtype: str = "uint16"
     datasets: Optional[Dict[str, List[str]]] = None
     label_mask_paths: Optional[List[str]] = None
     pad_direction: PaddingDirection = PaddingDirection.right
diff --git a/tests/data/memmap_dataset_test.py b/tests/data/memmap_dataset_test.py
index ead2e43cf..e267043ee 100644
--- a/tests/data/memmap_dataset_test.py
+++ b/tests/data/memmap_dataset_test.py
@@ -8,12 +8,12 @@
 
 
 def test_mmap_dataset(tmp_path: Path):
-    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
-    mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
+    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
+    mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
     mmap1.flush()
 
-    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
-    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
+    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
+    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
     mmap2.flush()
 
     ds = MemMapDataset(tmp_path / "mmap1.npy", tmp_path / "mmap2.npy", chunk_size=4)
@@ -23,8 +23,8 @@ def test_mmap_dataset(tmp_path: Path):
 
 
 def test_mmap_dataset_with_label_mask(tmp_path: Path):
-    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
-    mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
+    mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
+    mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
     mmap1.flush()
 
     mask1 = [True] * 16
@@ -33,8 +33,8 @@ def test_mmap_dataset_with_label_mask(tmp_path: Path):
     mask_mmap1[:] = np.array(mask1, dtype=np.bool_)
     mask_mmap1.flush()
 
-    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
-    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
+    mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
+    mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
     mmap2.flush()
 
     mask2 = [True] * 16
@@ -66,7 +66,7 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
 
     # Write tokens to memory-mapped array.
     tokens_fname = tmp_path / "tokens.npy"
-    mmap = np.memmap(tokens_fname, dtype=np.uint32, mode="w+", shape=(len(all_token_ids),))
+    mmap = np.memmap(tokens_fname, dtype=np.uint16, mode="w+", shape=(len(all_token_ids),))
     mmap[:] = all_token_ids
     mmap.flush()
     del mmap
@@ -83,10 +83,10 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
 
 def test_concat_mmap_datasets(tmp_path: Path):
     # Write some data to disk.
-    mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint32, mode="w+", shape=(16,))
+    mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint16, mode="w+", shape=(16,))
     mmap1[:] = list(range(16))
     mmap1.flush()
-    mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint32, mode="w+", shape=(8,))
+    mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint16, mode="w+", shape=(8,))
     mmap2[:] = list(range(8))
     mmap2.flush()
     del mmap1, mmap2

From 9aa7ebf92343c86956b55ce4ebd0608903bf3f4a Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Fri, 14 Feb 2025 17:28:35 -0800
Subject: [PATCH 4/9] update

---
 olmo/data/__init__.py       | 8 +++++++-
 olmo/data/memmap_dataset.py | 6 ++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py
index 5bc68670c..4c5211cc6 100644
--- a/olmo/data/__init__.py
+++ b/olmo/data/__init__.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, cast
 
+import numpy as np
 from torch.utils.data import DataLoader, DistributedSampler
 
 from ..aliases import PathOrStr
@@ -38,10 +39,15 @@ def build_memmap_dataset(
             metadata.extend([{"label": label}] * len(label_paths))
     else:
         raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
+
+    if train_config.model.vocab_size >= 2**16:
+        optimal_memmap_dtype = np.uint32
+    else:
+        optimal_memmap_dtype = np.uint16
     return MemMapDataset(
         *paths,
         chunk_size=train_config.model.max_sequence_length,
-        memmap_dtype=data_config.effective_memmap_dtype,
+        memmap_dtype=optimal_memmap_dtype,
         metadata=metadata,
         include_instance_metadata=include_instance_metadata,
         pad_token_id=train_config.model.pad_token_id,
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 28d6b887b..e27b5b754 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
+import os
 from copy import deepcopy
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Type, Union
 
 import numpy as np
@@ -10,7 +12,7 @@
 from olmo.exceptions import OLMoEnvironmentError
 
 from ..aliases import PathOrStr
-from ..config import InstanceFilterConfig
+from ..config import InstanceFilterConfig, TrainConfig
 from ..util import _get_s3_client, file_size, get_bytes_range
 from .util import find_periodic_sequences, get_document_lengths
 
@@ -47,7 +49,7 @@ def __init__(
         self,
         *paths: PathOrStr,
         chunk_size: int = 1024,
-        memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint32,
+        memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint16,
         metadata: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
         include_instance_metadata: bool = True,
         generate_attention_mask: bool = False,

From 23effe41f8684a854647fdb506df983406f62f0e Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Fri, 14 Feb 2025 17:46:38 -0800
Subject: [PATCH 5/9] fixed lint

---
 olmo/data/__init__.py       | 5 +----
 olmo/data/memmap_dataset.py | 4 +---
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py
index 4c5211cc6..39d692e18 100644
--- a/olmo/data/__init__.py
+++ b/olmo/data/__init__.py
@@ -40,10 +40,7 @@ def build_memmap_dataset(
     else:
         raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
 
-    if train_config.model.vocab_size >= 2**16:
-        optimal_memmap_dtype = np.uint32
-    else:
-        optimal_memmap_dtype = np.uint16
+    optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size >= 2**16 else np.uint16
     return MemMapDataset(
         *paths,
         chunk_size=train_config.model.max_sequence_length,
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index e27b5b754..901bc5703 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
-import os
 from copy import deepcopy
-from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Type, Union
 
 import numpy as np
@@ -12,7 +10,7 @@
 from olmo.exceptions import OLMoEnvironmentError
 
 from ..aliases import PathOrStr
-from ..config import InstanceFilterConfig, TrainConfig
+from ..config import InstanceFilterConfig
 from ..util import _get_s3_client, file_size, get_bytes_range
 from .util import find_periodic_sequences, get_document_lengths
 

From 803c963345137e8cf98bf22524c03a6a069026e7 Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Fri, 21 Feb 2025 15:14:30 -0800
Subject: [PATCH 6/9] fixed tests

---
 tests/grad_norm_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/grad_norm_test.py b/tests/grad_norm_test.py
index 17aa27032..ff5a358c7 100644
--- a/tests/grad_norm_test.py
+++ b/tests/grad_norm_test.py
@@ -90,7 +90,7 @@ def _patch_config(cfg, max_norm):
         "test_fixtures/c4-sample.02.json.gz",
         "test_fixtures/c4-sample.03.json.gz",
     ]
-    cfg.model.vocab_size = 2**16  # some tokens in sample files are upto 65k
+    cfg.model.vocab_size = 2**32  # some tokens in sample files are upto 65k
     cfg.model.embedding_size = cfg.model.vocab_size  # this gives an error without this
     cfg.model.weight_tying = False
     cfg.model.rope = True

From 00c6befb669e5ea594a5eda29566637f4cc15be4 Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Fri, 21 Feb 2025 15:22:09 -0800
Subject: [PATCH 7/9] forced input_ids to be in range of uint16

---
 olmo/data/memmap_dataset.py | 1 +
 tests/grad_norm_test.py     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 901bc5703..93be14840 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -194,6 +194,7 @@ def __getitem__(self, index: int) -> Dict[str, Any]:
 
         # Read the data from file.
         input_ids = self._read_chunk_from_memmap(self._memmap_paths[memmap_index], memmap_local_index)
+        input_ids = input_ids % (2**16)
         out: Dict[str, Any] = {"input_ids": input_ids}
         if self.instance_filter_config is not None:
             out["instance_mask"] = self._validate_instance(input_ids)
diff --git a/tests/grad_norm_test.py b/tests/grad_norm_test.py
index ff5a358c7..17aa27032 100644
--- a/tests/grad_norm_test.py
+++ b/tests/grad_norm_test.py
@@ -90,7 +90,7 @@ def _patch_config(cfg, max_norm):
         "test_fixtures/c4-sample.02.json.gz",
         "test_fixtures/c4-sample.03.json.gz",
     ]
-    cfg.model.vocab_size = 2**32  # some tokens in sample files are upto 65k
+    cfg.model.vocab_size = 2**16  # some tokens in sample files are upto 65k
     cfg.model.embedding_size = cfg.model.vocab_size  # this gives an error without this
     cfg.model.weight_tying = False
     cfg.model.rope = True

From 167a7ac80d350b085fcb4c6ee517e2ba6a1a4167 Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Fri, 28 Feb 2025 10:30:21 -0800
Subject: [PATCH 8/9] fixed uint32 issue

---
 olmo/data/__init__.py       | 2 +-
 olmo/data/memmap_dataset.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py
index 39d692e18..464786935 100644
--- a/olmo/data/__init__.py
+++ b/olmo/data/__init__.py
@@ -40,7 +40,7 @@ def build_memmap_dataset(
     else:
         raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
 
-    optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size >= 2**16 else np.uint16
+    optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size > 2**16 else np.uint16
     return MemMapDataset(
         *paths,
         chunk_size=train_config.model.max_sequence_length,
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 93be14840..901bc5703 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -194,7 +194,6 @@ def __getitem__(self, index: int) -> Dict[str, Any]:
 
         # Read the data from file.
         input_ids = self._read_chunk_from_memmap(self._memmap_paths[memmap_index], memmap_local_index)
-        input_ids = input_ids % (2**16)
         out: Dict[str, Any] = {"input_ids": input_ids}
         if self.instance_filter_config is not None:
             out["instance_mask"] = self._validate_instance(input_ids)

From 355bd874fdf40ed77c5397c94f1c3540a076b10e Mon Sep 17 00:00:00 2001
From: aman-17 <amanrangapur@gmail.com>
Date: Fri, 28 Feb 2025 10:37:38 -0800
Subject: [PATCH 9/9] resolved README conflict

---
 README.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index dc4d2934a..22b3bd0b3 100644
--- a/README.md
+++ b/README.md
@@ -50,11 +50,10 @@ In the second stage, we train on a smaller amount of high-quality, targeted data
 You can find *all* the checkpoints, at minimum every 1000 training steps in OLMo core and Hugging Face format:
 
 
-| Variant          | OLMo Format                                                                                          | Hugging Face Format                                                               |
-|------------------|-----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------|
-| **OLMo-2 7B**      | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv)       | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B)  |
-| **OLMo-2 13B**     | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv)     | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
-
+| Variant         | OLMo Format (Stage 1)                                                                                         | OLMo Format (Stage 2) | Hugging Face Format                                                               |
+|----------------|-----------------------------------------------------------------------------------------------------|--------|----------------------------------------------------------------------------------|
+| **OLMo-2 7B**  | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv)     | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B-stage2.csv)      | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B)  |
+| **OLMo-2 13B** | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv)   | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B-stage2.csv)       | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
 ### Steps to reproduce
 
 To reproduce any of the training processes described below, run this:
@@ -87,7 +86,7 @@ Example:
 ```bash
 python scripts/train.py configs/tiny/OLMo-20M.yaml --save_overwrite
 ```
-Note: You need to upgrade PyTorch to 2.5.x to run. OLMo-2-1124 uses `uint32` for `memmap_dtype`, whereas OLMo-0724 uses `uint16`.
+Note: You need to upgrade PyTorch to 2.5.x to run.
 
 ### Stage 1