From efcf563e4af762393f3c747da3f791a133789fe8 Mon Sep 17 00:00:00 2001
From: aman-17
Date: Wed, 12 Feb 2025 13:43:04 -0800
Subject: [PATCH 1/9] Updated`memmap_dtype` to uint32, added discord and
playground to readme
---
README.md | 12 +++++++++---
olmo/config.py | 2 +-
olmo/data/memmap_dataset.py | 2 +-
3 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 76538ecaa..dc4d2934a 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,12 @@
+
+
+
+
+
+
OLMo is a repository for training and using AI2's state-of-the-art open language models. It is designed by scientists, for scientists.
@@ -46,8 +52,8 @@ You can find *all* the checkpoints, at minimum every 1000 training steps in OLMo
| Variant | OLMo Format | Hugging Face Format |
|------------------|-----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------|
-| **OLMo 7B** | [OLMo 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) |
-| **OLMo 13B** | [OLMo 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
+| **OLMo-2 7B** | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) |
+| **OLMo-2 13B** | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
### Steps to reproduce
@@ -81,7 +87,7 @@ Example:
```bash
python scripts/train.py configs/tiny/OLMo-20M.yaml --save_overwrite
```
-Note: You need to upgrade PyTorch to 2.5.x to run.
+Note: You need to upgrade PyTorch to 2.5.x to run. OLMo-2-1124 uses `uint32` for `memmap_dtype`, whereas OLMo-0724 uses `uint16`.
### Stage 1
diff --git a/olmo/config.py b/olmo/config.py
index 6da7dc03d..e67455d96 100644
--- a/olmo/config.py
+++ b/olmo/config.py
@@ -608,7 +608,7 @@ class InstanceFilterConfig(BaseConfig):
@dataclass
class DataConfig(BaseConfig):
paths: Optional[List[str]] = None
- memmap_dtype: str = "uint16"
+ memmap_dtype: str = "uint32"
datasets: Optional[Dict[str, List[str]]] = None
label_mask_paths: Optional[List[str]] = None
pad_direction: PaddingDirection = PaddingDirection.right
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 901bc5703..28d6b887b 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -47,7 +47,7 @@ def __init__(
self,
*paths: PathOrStr,
chunk_size: int = 1024,
- memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint16,
+ memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint32,
metadata: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
include_instance_metadata: bool = True,
generate_attention_mask: bool = False,
From 4d84e59ba20814e148cd738980f3a573166ff2d6 Mon Sep 17 00:00:00 2001
From: aman-17
Date: Wed, 12 Feb 2025 14:00:54 -0800
Subject: [PATCH 2/9] fixed changelog and pytest
---
CHANGELOG.md | 4 ++++
tests/data/memmap_dataset_test.py | 22 +++++++++++-----------
2 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index dc77f10c4..6ba69bc19 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- MPS support
+### Changed
+
+- Updated `memmap_dtype` to `uint32` for compatibility with OLMo-2-1124.
+
## [v0.6.0](https://github.com/allenai/OLMo/releases/tag/v0.6.0) - 2024-12-17
### Added
diff --git a/tests/data/memmap_dataset_test.py b/tests/data/memmap_dataset_test.py
index e267043ee..ead2e43cf 100644
--- a/tests/data/memmap_dataset_test.py
+++ b/tests/data/memmap_dataset_test.py
@@ -8,12 +8,12 @@
def test_mmap_dataset(tmp_path: Path):
- mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
- mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
+ mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
+ mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
mmap1.flush()
- mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
- mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
+ mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
+ mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
mmap2.flush()
ds = MemMapDataset(tmp_path / "mmap1.npy", tmp_path / "mmap2.npy", chunk_size=4)
@@ -23,8 +23,8 @@ def test_mmap_dataset(tmp_path: Path):
def test_mmap_dataset_with_label_mask(tmp_path: Path):
- mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
- mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
+ mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
+ mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
mmap1.flush()
mask1 = [True] * 16
@@ -33,8 +33,8 @@ def test_mmap_dataset_with_label_mask(tmp_path: Path):
mask_mmap1[:] = np.array(mask1, dtype=np.bool_)
mask_mmap1.flush()
- mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
- mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
+ mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
+ mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
mmap2.flush()
mask2 = [True] * 16
@@ -66,7 +66,7 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
# Write tokens to memory-mapped array.
tokens_fname = tmp_path / "tokens.npy"
- mmap = np.memmap(tokens_fname, dtype=np.uint16, mode="w+", shape=(len(all_token_ids),))
+ mmap = np.memmap(tokens_fname, dtype=np.uint32, mode="w+", shape=(len(all_token_ids),))
mmap[:] = all_token_ids
mmap.flush()
del mmap
@@ -83,10 +83,10 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
def test_concat_mmap_datasets(tmp_path: Path):
# Write some data to disk.
- mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint16, mode="w+", shape=(16,))
+ mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint32, mode="w+", shape=(16,))
mmap1[:] = list(range(16))
mmap1.flush()
- mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint16, mode="w+", shape=(8,))
+ mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint32, mode="w+", shape=(8,))
mmap2[:] = list(range(8))
mmap2.flush()
del mmap1, mmap2
From 376eb297f3fe8d1610eb3d603f5d947e93167261 Mon Sep 17 00:00:00 2001
From: aman-17
Date: Fri, 14 Feb 2025 15:54:08 -0800
Subject: [PATCH 3/9] restored old changes
---
olmo/config.py | 2 +-
tests/data/memmap_dataset_test.py | 22 +++++++++++-----------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/olmo/config.py b/olmo/config.py
index e67455d96..6da7dc03d 100644
--- a/olmo/config.py
+++ b/olmo/config.py
@@ -608,7 +608,7 @@ class InstanceFilterConfig(BaseConfig):
@dataclass
class DataConfig(BaseConfig):
paths: Optional[List[str]] = None
- memmap_dtype: str = "uint32"
+ memmap_dtype: str = "uint16"
datasets: Optional[Dict[str, List[str]]] = None
label_mask_paths: Optional[List[str]] = None
pad_direction: PaddingDirection = PaddingDirection.right
diff --git a/tests/data/memmap_dataset_test.py b/tests/data/memmap_dataset_test.py
index ead2e43cf..e267043ee 100644
--- a/tests/data/memmap_dataset_test.py
+++ b/tests/data/memmap_dataset_test.py
@@ -8,12 +8,12 @@
def test_mmap_dataset(tmp_path: Path):
- mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
- mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
+ mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
+ mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
mmap1.flush()
- mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
- mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
+ mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
+ mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
mmap2.flush()
ds = MemMapDataset(tmp_path / "mmap1.npy", tmp_path / "mmap2.npy", chunk_size=4)
@@ -23,8 +23,8 @@ def test_mmap_dataset(tmp_path: Path):
def test_mmap_dataset_with_label_mask(tmp_path: Path):
- mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,))
- mmap1[:] = np.array(list(range(16)), dtype=np.uint32)
+ mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,))
+ mmap1[:] = np.array(list(range(16)), dtype=np.uint16)
mmap1.flush()
mask1 = [True] * 16
@@ -33,8 +33,8 @@ def test_mmap_dataset_with_label_mask(tmp_path: Path):
mask_mmap1[:] = np.array(mask1, dtype=np.bool_)
mask_mmap1.flush()
- mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,))
- mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32)
+ mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,))
+ mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16)
mmap2.flush()
mask2 = [True] * 16
@@ -66,7 +66,7 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
# Write tokens to memory-mapped array.
tokens_fname = tmp_path / "tokens.npy"
- mmap = np.memmap(tokens_fname, dtype=np.uint32, mode="w+", shape=(len(all_token_ids),))
+ mmap = np.memmap(tokens_fname, dtype=np.uint16, mode="w+", shape=(len(all_token_ids),))
mmap[:] = all_token_ids
mmap.flush()
del mmap
@@ -83,10 +83,10 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_
def test_concat_mmap_datasets(tmp_path: Path):
# Write some data to disk.
- mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint32, mode="w+", shape=(16,))
+ mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint16, mode="w+", shape=(16,))
mmap1[:] = list(range(16))
mmap1.flush()
- mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint32, mode="w+", shape=(8,))
+ mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint16, mode="w+", shape=(8,))
mmap2[:] = list(range(8))
mmap2.flush()
del mmap1, mmap2
From 9aa7ebf92343c86956b55ce4ebd0608903bf3f4a Mon Sep 17 00:00:00 2001
From: aman-17
Date: Fri, 14 Feb 2025 17:28:35 -0800
Subject: [PATCH 4/9] update
---
olmo/data/__init__.py | 8 +++++++-
olmo/data/memmap_dataset.py | 6 ++++--
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py
index 5bc68670c..4c5211cc6 100644
--- a/olmo/data/__init__.py
+++ b/olmo/data/__init__.py
@@ -3,6 +3,7 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, cast
+import numpy as np
from torch.utils.data import DataLoader, DistributedSampler
from ..aliases import PathOrStr
@@ -38,10 +39,15 @@ def build_memmap_dataset(
metadata.extend([{"label": label}] * len(label_paths))
else:
raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
+
+ if train_config.model.vocab_size >= 2**16:
+ optimal_memmap_dtype = np.uint32
+ else:
+ optimal_memmap_dtype = np.uint16
return MemMapDataset(
*paths,
chunk_size=train_config.model.max_sequence_length,
- memmap_dtype=data_config.effective_memmap_dtype,
+ memmap_dtype=optimal_memmap_dtype,
metadata=metadata,
include_instance_metadata=include_instance_metadata,
pad_token_id=train_config.model.pad_token_id,
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 28d6b887b..e27b5b754 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -1,6 +1,8 @@
from __future__ import annotations
+import os
from copy import deepcopy
+from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type, Union
import numpy as np
@@ -10,7 +12,7 @@
from olmo.exceptions import OLMoEnvironmentError
from ..aliases import PathOrStr
-from ..config import InstanceFilterConfig
+from ..config import InstanceFilterConfig, TrainConfig
from ..util import _get_s3_client, file_size, get_bytes_range
from .util import find_periodic_sequences, get_document_lengths
@@ -47,7 +49,7 @@ def __init__(
self,
*paths: PathOrStr,
chunk_size: int = 1024,
- memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint32,
+ memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint16,
metadata: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
include_instance_metadata: bool = True,
generate_attention_mask: bool = False,
From 23effe41f8684a854647fdb506df983406f62f0e Mon Sep 17 00:00:00 2001
From: aman-17
Date: Fri, 14 Feb 2025 17:46:38 -0800
Subject: [PATCH 5/9] fixed lint
---
olmo/data/__init__.py | 5 +----
olmo/data/memmap_dataset.py | 4 +---
2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py
index 4c5211cc6..39d692e18 100644
--- a/olmo/data/__init__.py
+++ b/olmo/data/__init__.py
@@ -40,10 +40,7 @@ def build_memmap_dataset(
else:
raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
- if train_config.model.vocab_size >= 2**16:
- optimal_memmap_dtype = np.uint32
- else:
- optimal_memmap_dtype = np.uint16
+ optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size >= 2**16 else np.uint16
return MemMapDataset(
*paths,
chunk_size=train_config.model.max_sequence_length,
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index e27b5b754..901bc5703 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -1,8 +1,6 @@
from __future__ import annotations
-import os
from copy import deepcopy
-from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type, Union
import numpy as np
@@ -12,7 +10,7 @@
from olmo.exceptions import OLMoEnvironmentError
from ..aliases import PathOrStr
-from ..config import InstanceFilterConfig, TrainConfig
+from ..config import InstanceFilterConfig
from ..util import _get_s3_client, file_size, get_bytes_range
from .util import find_periodic_sequences, get_document_lengths
From 803c963345137e8cf98bf22524c03a6a069026e7 Mon Sep 17 00:00:00 2001
From: aman-17
Date: Fri, 21 Feb 2025 15:14:30 -0800
Subject: [PATCH 6/9] fixed tests
---
tests/grad_norm_test.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/grad_norm_test.py b/tests/grad_norm_test.py
index 17aa27032..ff5a358c7 100644
--- a/tests/grad_norm_test.py
+++ b/tests/grad_norm_test.py
@@ -90,7 +90,7 @@ def _patch_config(cfg, max_norm):
"test_fixtures/c4-sample.02.json.gz",
"test_fixtures/c4-sample.03.json.gz",
]
- cfg.model.vocab_size = 2**16 # some tokens in sample files are upto 65k
+ cfg.model.vocab_size = 2**32 # some tokens in sample files are upto 65k
cfg.model.embedding_size = cfg.model.vocab_size # this gives an error without this
cfg.model.weight_tying = False
cfg.model.rope = True
From 00c6befb669e5ea594a5eda29566637f4cc15be4 Mon Sep 17 00:00:00 2001
From: aman-17
Date: Fri, 21 Feb 2025 15:22:09 -0800
Subject: [PATCH 7/9] forced input_ids to be in range of uint16
---
olmo/data/memmap_dataset.py | 1 +
tests/grad_norm_test.py | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 901bc5703..93be14840 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -194,6 +194,7 @@ def __getitem__(self, index: int) -> Dict[str, Any]:
# Read the data from file.
input_ids = self._read_chunk_from_memmap(self._memmap_paths[memmap_index], memmap_local_index)
+ input_ids = input_ids % (2**16)
out: Dict[str, Any] = {"input_ids": input_ids}
if self.instance_filter_config is not None:
out["instance_mask"] = self._validate_instance(input_ids)
diff --git a/tests/grad_norm_test.py b/tests/grad_norm_test.py
index ff5a358c7..17aa27032 100644
--- a/tests/grad_norm_test.py
+++ b/tests/grad_norm_test.py
@@ -90,7 +90,7 @@ def _patch_config(cfg, max_norm):
"test_fixtures/c4-sample.02.json.gz",
"test_fixtures/c4-sample.03.json.gz",
]
- cfg.model.vocab_size = 2**32 # some tokens in sample files are upto 65k
+ cfg.model.vocab_size = 2**16 # some tokens in sample files are upto 65k
cfg.model.embedding_size = cfg.model.vocab_size # this gives an error without this
cfg.model.weight_tying = False
cfg.model.rope = True
From 167a7ac80d350b085fcb4c6ee517e2ba6a1a4167 Mon Sep 17 00:00:00 2001
From: aman-17
Date: Fri, 28 Feb 2025 10:30:21 -0800
Subject: [PATCH 8/9] fixed uint32 issue
---
olmo/data/__init__.py | 2 +-
olmo/data/memmap_dataset.py | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py
index 39d692e18..464786935 100644
--- a/olmo/data/__init__.py
+++ b/olmo/data/__init__.py
@@ -40,7 +40,7 @@ def build_memmap_dataset(
else:
raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
- optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size >= 2**16 else np.uint16
+ optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size > 2**16 else np.uint16
return MemMapDataset(
*paths,
chunk_size=train_config.model.max_sequence_length,
diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py
index 93be14840..901bc5703 100644
--- a/olmo/data/memmap_dataset.py
+++ b/olmo/data/memmap_dataset.py
@@ -194,7 +194,6 @@ def __getitem__(self, index: int) -> Dict[str, Any]:
# Read the data from file.
input_ids = self._read_chunk_from_memmap(self._memmap_paths[memmap_index], memmap_local_index)
- input_ids = input_ids % (2**16)
out: Dict[str, Any] = {"input_ids": input_ids}
if self.instance_filter_config is not None:
out["instance_mask"] = self._validate_instance(input_ids)
From 355bd874fdf40ed77c5397c94f1c3540a076b10e Mon Sep 17 00:00:00 2001
From: aman-17
Date: Fri, 28 Feb 2025 10:37:38 -0800
Subject: [PATCH 9/9] resolved README conflict
---
README.md | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index dc4d2934a..22b3bd0b3 100644
--- a/README.md
+++ b/README.md
@@ -50,11 +50,10 @@ In the second stage, we train on a smaller amount of high-quality, targeted data
You can find *all* the checkpoints, at minimum every 1000 training steps in OLMo core and Hugging Face format:
-| Variant | OLMo Format | Hugging Face Format |
-|------------------|-----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------|
-| **OLMo-2 7B** | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) |
-| **OLMo-2 13B** | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
-
+| Variant | OLMo Format (Stage 1) | OLMo Format (Stage 2) | Hugging Face Format |
+|----------------|-----------------------------------------------------------------------------------------------------|--------|----------------------------------------------------------------------------------|
+| **OLMo-2 7B** | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B-stage2.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) |
+| **OLMo-2 13B** | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B-stage2.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) |
### Steps to reproduce
To reproduce any of the training processes described below, run this:
@@ -87,7 +86,7 @@ Example:
```bash
python scripts/train.py configs/tiny/OLMo-20M.yaml --save_overwrite
```
-Note: You need to upgrade PyTorch to 2.5.x to run. OLMo-2-1124 uses `uint32` for `memmap_dtype`, whereas OLMo-0724 uses `uint16`.
+Note: You need to upgrade PyTorch to 2.5.x to run.
### Stage 1