From efcf563e4af762393f3c747da3f791a133789fe8 Mon Sep 17 00:00:00 2001 From: aman-17 Date: Wed, 12 Feb 2025 13:43:04 -0800 Subject: [PATCH 1/9] Updated`memmap_dtype` to uint32, added discord and playground to readme --- README.md | 12 +++++++++--- olmo/config.py | 2 +- olmo/data/memmap_dataset.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 76538ecaa..dc4d2934a 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,12 @@ Paper URL + + Playground + + + Discord +

OLMo is a repository for training and using AI2's state-of-the-art open language models. It is designed by scientists, for scientists. @@ -46,8 +52,8 @@ You can find *all* the checkpoints, at minimum every 1000 training steps in OLMo | Variant | OLMo Format | Hugging Face Format | |------------------|-----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -| **OLMo 7B** | [OLMo 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) | -| **OLMo 13B** | [OLMo 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) | +| **OLMo-2 7B** | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) | +| **OLMo-2 13B** | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) | ### Steps to reproduce @@ -81,7 +87,7 @@ Example: ```bash python scripts/train.py configs/tiny/OLMo-20M.yaml --save_overwrite ``` -Note: You need to upgrade PyTorch to 2.5.x to run. +Note: You need to upgrade PyTorch to 2.5.x to run. OLMo-2-1124 uses `uint32` for `memmap_dtype`, whereas OLMo-0724 uses `uint16`. ### Stage 1 diff --git a/olmo/config.py b/olmo/config.py index 6da7dc03d..e67455d96 100644 --- a/olmo/config.py +++ b/olmo/config.py @@ -608,7 +608,7 @@ class InstanceFilterConfig(BaseConfig): @dataclass class DataConfig(BaseConfig): paths: Optional[List[str]] = None - memmap_dtype: str = "uint16" + memmap_dtype: str = "uint32" datasets: Optional[Dict[str, List[str]]] = None label_mask_paths: Optional[List[str]] = None pad_direction: PaddingDirection = PaddingDirection.right diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py index 901bc5703..28d6b887b 100644 --- a/olmo/data/memmap_dataset.py +++ b/olmo/data/memmap_dataset.py @@ -47,7 +47,7 @@ def __init__( self, *paths: PathOrStr, chunk_size: int = 1024, - memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint16, + memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint32, metadata: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None, include_instance_metadata: bool = True, generate_attention_mask: bool = False, From 4d84e59ba20814e148cd738980f3a573166ff2d6 Mon Sep 17 00:00:00 2001 From: aman-17 Date: Wed, 12 Feb 2025 14:00:54 -0800 Subject: [PATCH 2/9] fixed changelog and pytest --- CHANGELOG.md | 4 ++++ tests/data/memmap_dataset_test.py | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc77f10c4..6ba69bc19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - MPS support +### Changed + +- Updated `memmap_dtype` to `uint32` for compatibility with OLMo-2-1124. + ## [v0.6.0](https://github.com/allenai/OLMo/releases/tag/v0.6.0) - 2024-12-17 ### Added diff --git a/tests/data/memmap_dataset_test.py b/tests/data/memmap_dataset_test.py index e267043ee..ead2e43cf 100644 --- a/tests/data/memmap_dataset_test.py +++ b/tests/data/memmap_dataset_test.py @@ -8,12 +8,12 @@ def test_mmap_dataset(tmp_path: Path): - mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,)) - mmap1[:] = np.array(list(range(16)), dtype=np.uint16) + mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,)) + mmap1[:] = np.array(list(range(16)), dtype=np.uint32) mmap1.flush() - mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,)) - mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16) + mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,)) + mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32) mmap2.flush() ds = MemMapDataset(tmp_path / "mmap1.npy", tmp_path / "mmap2.npy", chunk_size=4) @@ -23,8 +23,8 @@ def test_mmap_dataset(tmp_path: Path): def test_mmap_dataset_with_label_mask(tmp_path: Path): - mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,)) - mmap1[:] = np.array(list(range(16)), dtype=np.uint16) + mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,)) + mmap1[:] = np.array(list(range(16)), dtype=np.uint32) mmap1.flush() mask1 = [True] * 16 @@ -33,8 +33,8 @@ def test_mmap_dataset_with_label_mask(tmp_path: Path): mask_mmap1[:] = np.array(mask1, dtype=np.bool_) mask_mmap1.flush() - mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,)) - mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16) + mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,)) + mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32) mmap2.flush() mask2 = [True] * 16 @@ -66,7 +66,7 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_ # Write tokens to memory-mapped array. tokens_fname = tmp_path / "tokens.npy" - mmap = np.memmap(tokens_fname, dtype=np.uint16, mode="w+", shape=(len(all_token_ids),)) + mmap = np.memmap(tokens_fname, dtype=np.uint32, mode="w+", shape=(len(all_token_ids),)) mmap[:] = all_token_ids mmap.flush() del mmap @@ -83,10 +83,10 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_ def test_concat_mmap_datasets(tmp_path: Path): # Write some data to disk. - mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint16, mode="w+", shape=(16,)) + mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint32, mode="w+", shape=(16,)) mmap1[:] = list(range(16)) mmap1.flush() - mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint16, mode="w+", shape=(8,)) + mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint32, mode="w+", shape=(8,)) mmap2[:] = list(range(8)) mmap2.flush() del mmap1, mmap2 From 376eb297f3fe8d1610eb3d603f5d947e93167261 Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 14 Feb 2025 15:54:08 -0800 Subject: [PATCH 3/9] restored old changes --- olmo/config.py | 2 +- tests/data/memmap_dataset_test.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/olmo/config.py b/olmo/config.py index e67455d96..6da7dc03d 100644 --- a/olmo/config.py +++ b/olmo/config.py @@ -608,7 +608,7 @@ class InstanceFilterConfig(BaseConfig): @dataclass class DataConfig(BaseConfig): paths: Optional[List[str]] = None - memmap_dtype: str = "uint32" + memmap_dtype: str = "uint16" datasets: Optional[Dict[str, List[str]]] = None label_mask_paths: Optional[List[str]] = None pad_direction: PaddingDirection = PaddingDirection.right diff --git a/tests/data/memmap_dataset_test.py b/tests/data/memmap_dataset_test.py index ead2e43cf..e267043ee 100644 --- a/tests/data/memmap_dataset_test.py +++ b/tests/data/memmap_dataset_test.py @@ -8,12 +8,12 @@ def test_mmap_dataset(tmp_path: Path): - mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,)) - mmap1[:] = np.array(list(range(16)), dtype=np.uint32) + mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,)) + mmap1[:] = np.array(list(range(16)), dtype=np.uint16) mmap1.flush() - mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,)) - mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32) + mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,)) + mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16) mmap2.flush() ds = MemMapDataset(tmp_path / "mmap1.npy", tmp_path / "mmap2.npy", chunk_size=4) @@ -23,8 +23,8 @@ def test_mmap_dataset(tmp_path: Path): def test_mmap_dataset_with_label_mask(tmp_path: Path): - mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint32, shape=(16,)) - mmap1[:] = np.array(list(range(16)), dtype=np.uint32) + mmap1 = np.memmap(tmp_path / "mmap1.npy", mode="w+", dtype=np.uint16, shape=(16,)) + mmap1[:] = np.array(list(range(16)), dtype=np.uint16) mmap1.flush() mask1 = [True] * 16 @@ -33,8 +33,8 @@ def test_mmap_dataset_with_label_mask(tmp_path: Path): mask_mmap1[:] = np.array(mask1, dtype=np.bool_) mask_mmap1.flush() - mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint32, shape=(16,)) - mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint32) + mmap2 = np.memmap(tmp_path / "mmap2.npy", mode="w+", dtype=np.uint16, shape=(16,)) + mmap2[:] = np.array(list(range(16, 32)), dtype=np.uint16) mmap2.flush() mask2 = [True] * 16 @@ -66,7 +66,7 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_ # Write tokens to memory-mapped array. tokens_fname = tmp_path / "tokens.npy" - mmap = np.memmap(tokens_fname, dtype=np.uint32, mode="w+", shape=(len(all_token_ids),)) + mmap = np.memmap(tokens_fname, dtype=np.uint16, mode="w+", shape=(len(all_token_ids),)) mmap[:] = all_token_ids mmap.flush() del mmap @@ -83,10 +83,10 @@ def test_mmap_dataset_with_metadata(tokenizer: Tokenizer, tmp_path: Path, lorem_ def test_concat_mmap_datasets(tmp_path: Path): # Write some data to disk. - mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint32, mode="w+", shape=(16,)) + mmap1 = np.memmap(tmp_path / "tokens1.npy", dtype=np.uint16, mode="w+", shape=(16,)) mmap1[:] = list(range(16)) mmap1.flush() - mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint32, mode="w+", shape=(8,)) + mmap2 = np.memmap(tmp_path / "tokens2.npy", dtype=np.uint16, mode="w+", shape=(8,)) mmap2[:] = list(range(8)) mmap2.flush() del mmap1, mmap2 From 9aa7ebf92343c86956b55ce4ebd0608903bf3f4a Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 14 Feb 2025 17:28:35 -0800 Subject: [PATCH 4/9] update --- olmo/data/__init__.py | 8 +++++++- olmo/data/memmap_dataset.py | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py index 5bc68670c..4c5211cc6 100644 --- a/olmo/data/__init__.py +++ b/olmo/data/__init__.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional, cast +import numpy as np from torch.utils.data import DataLoader, DistributedSampler from ..aliases import PathOrStr @@ -38,10 +39,15 @@ def build_memmap_dataset( metadata.extend([{"label": label}] * len(label_paths)) else: raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required") + + if train_config.model.vocab_size >= 2**16: + optimal_memmap_dtype = np.uint32 + else: + optimal_memmap_dtype = np.uint16 return MemMapDataset( *paths, chunk_size=train_config.model.max_sequence_length, - memmap_dtype=data_config.effective_memmap_dtype, + memmap_dtype=optimal_memmap_dtype, metadata=metadata, include_instance_metadata=include_instance_metadata, pad_token_id=train_config.model.pad_token_id, diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py index 28d6b887b..e27b5b754 100644 --- a/olmo/data/memmap_dataset.py +++ b/olmo/data/memmap_dataset.py @@ -1,6 +1,8 @@ from __future__ import annotations +import os from copy import deepcopy +from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Type, Union import numpy as np @@ -10,7 +12,7 @@ from olmo.exceptions import OLMoEnvironmentError from ..aliases import PathOrStr -from ..config import InstanceFilterConfig +from ..config import InstanceFilterConfig, TrainConfig from ..util import _get_s3_client, file_size, get_bytes_range from .util import find_periodic_sequences, get_document_lengths @@ -47,7 +49,7 @@ def __init__( self, *paths: PathOrStr, chunk_size: int = 1024, - memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint32, + memmap_dtype: Union[Type[np.uint8], Type[np.uint16], Type[np.uint32], Type[np.uint64]] = np.uint16, metadata: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None, include_instance_metadata: bool = True, generate_attention_mask: bool = False, From 23effe41f8684a854647fdb506df983406f62f0e Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 14 Feb 2025 17:46:38 -0800 Subject: [PATCH 5/9] fixed lint --- olmo/data/__init__.py | 5 +---- olmo/data/memmap_dataset.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py index 4c5211cc6..39d692e18 100644 --- a/olmo/data/__init__.py +++ b/olmo/data/__init__.py @@ -40,10 +40,7 @@ def build_memmap_dataset( else: raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required") - if train_config.model.vocab_size >= 2**16: - optimal_memmap_dtype = np.uint32 - else: - optimal_memmap_dtype = np.uint16 + optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size >= 2**16 else np.uint16 return MemMapDataset( *paths, chunk_size=train_config.model.max_sequence_length, diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py index e27b5b754..901bc5703 100644 --- a/olmo/data/memmap_dataset.py +++ b/olmo/data/memmap_dataset.py @@ -1,8 +1,6 @@ from __future__ import annotations -import os from copy import deepcopy -from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Type, Union import numpy as np @@ -12,7 +10,7 @@ from olmo.exceptions import OLMoEnvironmentError from ..aliases import PathOrStr -from ..config import InstanceFilterConfig, TrainConfig +from ..config import InstanceFilterConfig from ..util import _get_s3_client, file_size, get_bytes_range from .util import find_periodic_sequences, get_document_lengths From 803c963345137e8cf98bf22524c03a6a069026e7 Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 21 Feb 2025 15:14:30 -0800 Subject: [PATCH 6/9] fixed tests --- tests/grad_norm_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/grad_norm_test.py b/tests/grad_norm_test.py index 17aa27032..ff5a358c7 100644 --- a/tests/grad_norm_test.py +++ b/tests/grad_norm_test.py @@ -90,7 +90,7 @@ def _patch_config(cfg, max_norm): "test_fixtures/c4-sample.02.json.gz", "test_fixtures/c4-sample.03.json.gz", ] - cfg.model.vocab_size = 2**16 # some tokens in sample files are upto 65k + cfg.model.vocab_size = 2**32 # some tokens in sample files are upto 65k cfg.model.embedding_size = cfg.model.vocab_size # this gives an error without this cfg.model.weight_tying = False cfg.model.rope = True From 00c6befb669e5ea594a5eda29566637f4cc15be4 Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 21 Feb 2025 15:22:09 -0800 Subject: [PATCH 7/9] forced input_ids to be in range of uint16 --- olmo/data/memmap_dataset.py | 1 + tests/grad_norm_test.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py index 901bc5703..93be14840 100644 --- a/olmo/data/memmap_dataset.py +++ b/olmo/data/memmap_dataset.py @@ -194,6 +194,7 @@ def __getitem__(self, index: int) -> Dict[str, Any]: # Read the data from file. input_ids = self._read_chunk_from_memmap(self._memmap_paths[memmap_index], memmap_local_index) + input_ids = input_ids % (2**16) out: Dict[str, Any] = {"input_ids": input_ids} if self.instance_filter_config is not None: out["instance_mask"] = self._validate_instance(input_ids) diff --git a/tests/grad_norm_test.py b/tests/grad_norm_test.py index ff5a358c7..17aa27032 100644 --- a/tests/grad_norm_test.py +++ b/tests/grad_norm_test.py @@ -90,7 +90,7 @@ def _patch_config(cfg, max_norm): "test_fixtures/c4-sample.02.json.gz", "test_fixtures/c4-sample.03.json.gz", ] - cfg.model.vocab_size = 2**32 # some tokens in sample files are upto 65k + cfg.model.vocab_size = 2**16 # some tokens in sample files are upto 65k cfg.model.embedding_size = cfg.model.vocab_size # this gives an error without this cfg.model.weight_tying = False cfg.model.rope = True From 167a7ac80d350b085fcb4c6ee517e2ba6a1a4167 Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 28 Feb 2025 10:30:21 -0800 Subject: [PATCH 8/9] fixed uint32 issue --- olmo/data/__init__.py | 2 +- olmo/data/memmap_dataset.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/olmo/data/__init__.py b/olmo/data/__init__.py index 39d692e18..464786935 100644 --- a/olmo/data/__init__.py +++ b/olmo/data/__init__.py @@ -40,7 +40,7 @@ def build_memmap_dataset( else: raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required") - optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size >= 2**16 else np.uint16 + optimal_memmap_dtype = np.uint32 if train_config.model.vocab_size > 2**16 else np.uint16 return MemMapDataset( *paths, chunk_size=train_config.model.max_sequence_length, diff --git a/olmo/data/memmap_dataset.py b/olmo/data/memmap_dataset.py index 93be14840..901bc5703 100644 --- a/olmo/data/memmap_dataset.py +++ b/olmo/data/memmap_dataset.py @@ -194,7 +194,6 @@ def __getitem__(self, index: int) -> Dict[str, Any]: # Read the data from file. input_ids = self._read_chunk_from_memmap(self._memmap_paths[memmap_index], memmap_local_index) - input_ids = input_ids % (2**16) out: Dict[str, Any] = {"input_ids": input_ids} if self.instance_filter_config is not None: out["instance_mask"] = self._validate_instance(input_ids) From 355bd874fdf40ed77c5397c94f1c3540a076b10e Mon Sep 17 00:00:00 2001 From: aman-17 Date: Fri, 28 Feb 2025 10:37:38 -0800 Subject: [PATCH 9/9] resolved README conflict --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index dc4d2934a..22b3bd0b3 100644 --- a/README.md +++ b/README.md @@ -50,11 +50,10 @@ In the second stage, we train on a smaller amount of high-quality, targeted data You can find *all* the checkpoints, at minimum every 1000 training steps in OLMo core and Hugging Face format: -| Variant | OLMo Format | Hugging Face Format | -|------------------|-----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -| **OLMo-2 7B** | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) | -| **OLMo-2 13B** | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) | - +| Variant | OLMo Format (Stage 1) | OLMo Format (Stage 2) | Hugging Face Format | +|----------------|-----------------------------------------------------------------------------------------------------|--------|----------------------------------------------------------------------------------| +| **OLMo-2 7B** | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B.csv) | [OLMo-2 7B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-7B-stage2.csv) | [Hugging Face for the 7B variant](https://huggingface.co/allenai/OLMo-2-1124-7B) | +| **OLMo-2 13B** | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B.csv) | [OLMo-2 13B](https://github.com/allenai/OLMo/blob/main/configs/official-1124/OLMo-2-1124-13B-stage2.csv) | [Hugging Face for the 13B variant](https://huggingface.co/allenai/OLMo-2-1124-13B) | ### Steps to reproduce To reproduce any of the training processes described below, run this: @@ -87,7 +86,7 @@ Example: ```bash python scripts/train.py configs/tiny/OLMo-20M.yaml --save_overwrite ``` -Note: You need to upgrade PyTorch to 2.5.x to run. OLMo-2-1124 uses `uint32` for `memmap_dtype`, whereas OLMo-0724 uses `uint16`. +Note: You need to upgrade PyTorch to 2.5.x to run. ### Stage 1