Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Guidelines for modifications:
* Gary Lvov
* Giulio Romualdi
* Grzegorz Malczyk
* Haixuan Xavier Tao
* Haoran Zhou
* Harsh Patel
* HoJin Jeon
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.54.3"
version = "0.54.4"

# Description
title = "Isaac Lab framework for Robot Learning"
Expand Down
20 changes: 20 additions & 0 deletions source/isaaclab/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
Changelog
---------

0.54.4 (2026-04-24)
~~~~~~~~~~~~~~~~~~~

Changed
^^^^^^^

* Removed per-call GPU→CPU synchronizations from
:class:`~isaaclab.utils.buffers.CircularBuffer` by replacing the
``torch.any(...)`` probes in ``append`` and ``__getitem__`` with a
CPU-side flag maintained by :meth:`reset`. Also removed a redundant
``.clone()`` from :meth:`~isaaclab.utils.buffers.DelayBuffer.compute`
(the underlying advanced-indexing gather already allocates fresh
storage). Public API and first-push replication semantics are
unchanged; on CUDA at large ``num_envs`` this yields a meaningful
speedup for consumers that call the delay buffer every physics step
(e.g. :class:`~isaaclab.actuators.DelayedPDActuator`,
:class:`~isaaclab.actuators.RemotizedPDActuator`, and observation
history buffers).


0.54.3 (2026-02-04)
~~~~~~~~~~~~~~~~~~~

Expand Down
46 changes: 40 additions & 6 deletions source/isaaclab/isaaclab/utils/buffers/circular_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def __init__(self, max_len: int, batch_size: int, device: str):
# the actual buffer for data storage
# note: this is initialized on the first call to :meth:`append`
self._buffer: torch.Tensor = None # type: ignore
# CPU-side flag that mirrors ``any(self._num_pushes == 0)`` without
# requiring a GPU→CPU synchronization in the hot path. It flips True
# whenever :meth:`reset` marks any batch index for first-push
# replication, and clears when the next :meth:`append` performs that
# replication. Reads in :meth:`__getitem__` use this to raise the same
# "buffer empty" error as before but without a per-call sync.
self._any_first_push_pending: bool = False

"""
Properties.
Expand Down Expand Up @@ -99,6 +106,12 @@ def reset(self, batch_ids: Sequence[int] | None = None):
Args:
batch_ids: Elements to reset in the batch dimension. Default is None, which resets all the batch indices.
"""
# A zero-length ``batch_ids`` is a no-op: nothing to zero, and raising
# ``_any_first_push_pending`` would spuriously block the next read
# (the previous ``torch.any(num_pushes == 0)`` guard was immune to
# this because it probed the actual tensor state).
if batch_ids is not None and len(batch_ids) == 0:
return
# resolve all indices
if batch_ids is None:
batch_ids = slice(None)
Expand All @@ -108,6 +121,10 @@ def reset(self, batch_ids: Sequence[int] | None = None):
# set buffer at batch_id reset indices to 0.0 so that the buffer()
# getter returns the cleared circular buffer after reset.
self._buffer[:, batch_ids, :] = 0.0
# mark that at least one batch index now has ``num_pushes == 0`` so
# the next :meth:`append` performs the first-push history replication
# and :meth:`__getitem__` rejects reads until that append happens.
self._any_first_push_pending = True

def append(self, data: torch.Tensor):
"""Append the data to the circular buffer.
Expand All @@ -129,14 +146,28 @@ def append(self, data: torch.Tensor):
if self._buffer is None:
self._pointer = -1
self._buffer = torch.empty((self.max_length, *data.shape), dtype=data.dtype, device=self._device)
# the buffer was just created, so every batch index starts with
# ``num_pushes == 0`` and must be replicated on this first append
self._any_first_push_pending = True
# move the head to the next slot
self._pointer = (self._pointer + 1) % self.max_length
# add the new data to the last layer
self._buffer[self._pointer] = data
# Check for batches with zero pushes and initialize all values in batch to first append
is_first_push = self._num_pushes == 0
if torch.any(is_first_push):
self._buffer[:, is_first_push] = data[is_first_push]
# Check for batches with zero pushes and initialize all values in
# batch to first append. The CPU flag ``_any_first_push_pending``
# mirrors ``torch.any(num_pushes == 0)`` but is maintained by
# :meth:`reset` and cleared here, so we avoid a GPU→CPU sync every
# append in the common case where no batch just reset.
if self._any_first_push_pending:
is_first_push = self._num_pushes == 0
# Broadcast-safe write that works for arbitrary trailing data
# shape. Equivalent to ``self._buffer[:, is_first_push] =
# data[is_first_push]`` but without materializing the dynamic
# boolean index (which would reintroduce a sync on some torch
# versions via shape inference).
mask = is_first_push.view(1, -1, *([1] * (data.ndim - 1)))
self._buffer = torch.where(mask, data.unsqueeze(0), self._buffer)
self._any_first_push_pending = False
# increment number of number of pushes for all batches
self._num_pushes += 1

Expand All @@ -160,8 +191,11 @@ def __getitem__(self, key: torch.Tensor) -> torch.Tensor:
# check the batch size
if len(key) != self.batch_size:
raise ValueError(f"The argument 'key' has length {key.shape[0]}, while expecting {self.batch_size}")
# check if the buffer is empty
if torch.any(self._num_pushes == 0) or self._buffer is None:
# check if the buffer is empty — equivalent to
# ``torch.any(self._num_pushes == 0)`` but sync-free: the CPU flag
# flips True in :meth:`reset` (or on buffer construction) and back to
# False when :meth:`append` has filled every reset index's history.
if self._any_first_push_pending or self._buffer is None:
raise RuntimeError("Attempting to retrieve data on an empty circular buffer. Please append data first.")

# admissible lag
Expand Down
9 changes: 6 additions & 3 deletions source/isaaclab/isaaclab/utils/buffers/delay_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ def compute(self, data: torch.Tensor) -> torch.Tensor:
"""
# add the new data to the last layer
self._circular_buffer.append(data)
# return output
delayed_data = self._circular_buffer[self._time_lags]
return delayed_data.clone()
# ``CircularBuffer.__getitem__`` uses advanced indexing
# (``self._buffer[index_in_buffer, self._ALL_INDICES]``), which
# already allocates a fresh storage. Returning the gather result
# directly is safe — consumers that mutate it in place won't touch
# the internal buffer — and skips one ``(batch, *feat)`` copy per call.
return self._circular_buffer[self._time_lags]
70 changes: 70 additions & 0 deletions source/isaaclab/test/utils/test_circular_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,76 @@ def test_key_greater_than_pushes(circular_buffer):
assert torch.equal(retrieved_data, data1)


def test_empty_batch_ids_reset_is_noop(circular_buffer):
"""``reset(batch_ids=[])`` must leave the buffer readable.

Regression test for the CPU-flag path: a zero-length ``batch_ids`` must
not flip ``_any_first_push_pending`` (nothing was actually zeroed). The
previous ``torch.any(num_pushes == 0)`` probe was immune because it read
the real tensor state; the flag-based guard must match that behavior to
avoid spuriously raising on callers that pass empty done-env lists.
"""
data = torch.ones((circular_buffer.batch_size, 2), device=circular_buffer.device)
circular_buffer.append(data)
circular_buffer.append(data)
circular_buffer.reset(batch_ids=[])
# Must not raise — all batch indices still have num_pushes > 0.
retrieved = circular_buffer[torch.tensor([0, 0, 0], device=circular_buffer.device)]
torch.testing.assert_close(retrieved, data)
# Also accept a zero-length tensor (common when built from a termination mask).
circular_buffer.reset(batch_ids=torch.tensor([], dtype=torch.long, device=circular_buffer.device))
retrieved = circular_buffer[torch.tensor([0, 0, 0], device=circular_buffer.device)]
torch.testing.assert_close(retrieved, data)


def test_partial_reset_then_read_raises(circular_buffer):
"""``__getitem__`` must still raise after a partial reset without a follow-up append.

Guards the CPU-side ``_any_first_push_pending`` flag that replaces the
previous per-call ``torch.any(num_pushes == 0)`` probe: the flag must be
set by :meth:`reset` and only cleared once :meth:`append` replicates the
first push, otherwise a reader could silently observe uninitialised slots.
"""
data = torch.ones((circular_buffer.batch_size, 2), device=circular_buffer.device)
circular_buffer.append(data)
circular_buffer.append(data)
circular_buffer.reset(batch_ids=[0])
with pytest.raises(RuntimeError):
circular_buffer[torch.tensor([0, 0, 0], device=circular_buffer.device)]


def test_interleaved_partial_reset_and_append(circular_buffer):
"""Cycle several partial resets + appends and verify each reset env's
history is fully replicated with its first post-reset sample (first-push
invariant) while untouched envs keep their real history."""
d1 = torch.tensor([[1, 1], [2, 2], [3, 3]], device=circular_buffer.device)
d2 = 10 * d1
d3 = 100 * d1

circular_buffer.append(d1)
circular_buffer.append(d2)

# Partial reset env 0 then append d3. Env 0's entire history should now
# be d3[0]; envs 1, 2 should have history including d1, d2, d3.
circular_buffer.reset(batch_ids=[0])
circular_buffer.append(d3)
for i in range(circular_buffer.max_length):
torch.testing.assert_close(circular_buffer.buffer[0, 0], circular_buffer.buffer[0, i])
torch.testing.assert_close(circular_buffer.buffer[1, -1], d3[1])
torch.testing.assert_close(circular_buffer.buffer[1, -2], d2[1])

# Now partial reset env 1 and append d1 again. Env 1's history should
# now be d1[1] everywhere; env 0 still keeps its d3 history (plus the new
# d1 append at the head), env 2 shifts normally.
circular_buffer.reset(batch_ids=[1])
circular_buffer.append(d1)
for i in range(circular_buffer.max_length):
torch.testing.assert_close(circular_buffer.buffer[1, 0], circular_buffer.buffer[1, i])
torch.testing.assert_close(circular_buffer.buffer[1, -1], d1[1])
torch.testing.assert_close(circular_buffer.buffer[0, -1], d1[0])
torch.testing.assert_close(circular_buffer.buffer[2, -1], d1[2])


def test_return_buffer_prop(circular_buffer):
"""Test retrieving the whole buffer for correct size and contents.
Returning the whole buffer should have the shape [batch_size,max_len,data.shape[1:]]
Expand Down