From b0b0f28127d20b8ee1fb849bfc7cbce0397df449 Mon Sep 17 00:00:00 2001 From: wcwxy <26245345+ChaoWao@users.noreply.github.com> Date: Mon, 20 Apr 2026 20:41:08 +0800 Subject: [PATCH] Add: ChipBootstrapChannel for per-chip bootstrap handshake (L2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a one-shot cross-process mailbox class for parent-child bootstrap communication, independent of the task-mailbox protocol. Includes C++ implementation, nanobind Python bindings, and 7 UT cases covering in-process and fork-based cross-process scenarios. Design decisions: - Mailbox size: 4096 B (one page). HEADER_SIZE=64, ERROR_MSG_SIZE=1024, PTR_CAPACITY=376 — sufficient for all foreseeable chip buffer counts. - State machine: IDLE/SUCCESS/ERROR three states. Values 0/1/2 leave headroom for future intermediate states without serialization migration. - Memory ordering: aarch64 ldar/stlr inline asm (first, per codestyle #6), x86_64 compiler barrier, __atomic_load/store fallback — same pattern as WorkerThread mailbox in worker_manager.cpp. - Error message: strncpy with explicit null termination at size-1, compatible with L4 task-mailbox error message convention. Cross-process read hardening: - Ctor rejects max_buffer_count > CHIP_BOOTSTRAP_PTR_CAPACITY so the clamp invariant holds for every subsequent read. - buffer_ptrs() clamps the shared-memory count against max_buffer_count_ so a corrupted or premature read cannot overrun the pointer region. - error_message() uses strnlen(CHIP_BOOTSTRAP_ERROR_MSG_SIZE) instead of trusting the null-terminator in shared memory. Co-Authored-By: Claude Opus 4.7 (1M context) --- python/bindings/CMakeLists.txt | 1 + python/bindings/worker_bind.h | 41 +++++ .../hierarchical/chip_bootstrap_channel.cpp | 164 ++++++++++++++++++ .../hierarchical/chip_bootstrap_channel.h | 81 +++++++++ .../py/test_worker/test_bootstrap_channel.py | 153 ++++++++++++++++ 5 files changed, 440 insertions(+) create mode 100644 src/common/hierarchical/chip_bootstrap_channel.cpp create mode 100644 src/common/hierarchical/chip_bootstrap_channel.h create mode 100644 tests/ut/py/test_worker/test_bootstrap_channel.py diff --git a/python/bindings/CMakeLists.txt b/python/bindings/CMakeLists.txt index 6b9af179c..ae392526e 100644 --- a/python/bindings/CMakeLists.txt +++ b/python/bindings/CMakeLists.txt @@ -26,6 +26,7 @@ set(HIERARCHICAL_SOURCES ${HIERARCHICAL_SRC}/worker_manager.cpp ${HIERARCHICAL_SRC}/scheduler.cpp ${HIERARCHICAL_SRC}/worker.cpp + ${HIERARCHICAL_SRC}/chip_bootstrap_channel.cpp ) nanobind_add_module(_task_interface ${BINDING_SOURCES} ${HIERARCHICAL_SOURCES}) diff --git a/python/bindings/worker_bind.h b/python/bindings/worker_bind.h index bc9e0cf5e..6b00dd142 100644 --- a/python/bindings/worker_bind.h +++ b/python/bindings/worker_bind.h @@ -30,6 +30,7 @@ #include +#include "chip_bootstrap_channel.h" #include "chip_worker.h" #include "ring.h" #include "orchestrator.h" @@ -238,4 +239,44 @@ inline void bind_worker(nb::module_ &m) { m.attr("MAILBOX_ERROR_MSG_SIZE") = static_cast(MAILBOX_ERROR_MSG_SIZE); m.attr("MAX_RING_DEPTH") = static_cast(MAX_RING_DEPTH); m.attr("MAX_SCOPE_DEPTH") = static_cast(MAX_SCOPE_DEPTH); + + // --- ChipBootstrapChannel --- + m.attr("CHIP_BOOTSTRAP_MAILBOX_SIZE") = static_cast(CHIP_BOOTSTRAP_MAILBOX_SIZE); + + nb::enum_(m, "ChipBootstrapMailboxState") + .value("IDLE", ChipBootstrapMailboxState::IDLE) + .value("SUCCESS", ChipBootstrapMailboxState::SUCCESS) + .value("ERROR", ChipBootstrapMailboxState::ERROR); + + nb::class_(m, "ChipBootstrapChannel") + .def( + "__init__", + [](ChipBootstrapChannel *self, uint64_t mailbox_ptr, size_t max_buffer_count) { + new (self) ChipBootstrapChannel(reinterpret_cast(mailbox_ptr), max_buffer_count); + }, + nb::arg("mailbox_ptr"), nb::arg("max_buffer_count") + ) + .def("reset", &ChipBootstrapChannel::reset) + .def( + "write_success", + [](ChipBootstrapChannel &self, uint64_t device_ctx, uint64_t local_window_base, uint64_t actual_window_size, + const std::vector &buffer_ptrs) { + self.write_success(device_ctx, local_window_base, actual_window_size, buffer_ptrs); + }, + nb::arg("device_ctx"), nb::arg("local_window_base"), nb::arg("actual_window_size"), nb::arg("buffer_ptrs") + ) + .def( + "write_error", + [](ChipBootstrapChannel &self, int32_t error_code, const std::string &message) { + self.write_error(error_code, message); + }, + nb::arg("error_code"), nb::arg("message") + ) + .def_prop_ro("state", &ChipBootstrapChannel::state) + .def_prop_ro("error_code", &ChipBootstrapChannel::error_code) + .def_prop_ro("device_ctx", &ChipBootstrapChannel::device_ctx) + .def_prop_ro("local_window_base", &ChipBootstrapChannel::local_window_base) + .def_prop_ro("actual_window_size", &ChipBootstrapChannel::actual_window_size) + .def_prop_ro("buffer_ptrs", &ChipBootstrapChannel::buffer_ptrs) + .def_prop_ro("error_message", &ChipBootstrapChannel::error_message); } diff --git a/src/common/hierarchical/chip_bootstrap_channel.cpp b/src/common/hierarchical/chip_bootstrap_channel.cpp new file mode 100644 index 000000000..a4843c49b --- /dev/null +++ b/src/common/hierarchical/chip_bootstrap_channel.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) PyPTO Contributors. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + * ----------------------------------------------------------------------------------------------------------- + */ + +#include "chip_bootstrap_channel.h" + +#include +#include + +// ============================================================================= +// Internal helpers +// ============================================================================= + +namespace { + +void write_state(void *mailbox, ChipBootstrapMailboxState s) { + auto *ptr = reinterpret_cast(static_cast(mailbox) + CHIP_BOOTSTRAP_OFF_STATE); + int32_t v = static_cast(s); +#if defined(__aarch64__) + __asm__ volatile("stlr %w0, [%1]" : : "r"(v), "r"(ptr) : "memory"); +#elif defined(__x86_64__) + __asm__ volatile("" ::: "memory"); + *ptr = v; +#else + __atomic_store(ptr, &v, __ATOMIC_RELEASE); +#endif +} + +ChipBootstrapMailboxState read_state(void *mailbox) { + auto *ptr = reinterpret_cast(static_cast(mailbox) + CHIP_BOOTSTRAP_OFF_STATE); + int32_t v; +#if defined(__aarch64__) + __asm__ volatile("ldar %w0, [%1]" : "=r"(v) : "r"(ptr) : "memory"); +#elif defined(__x86_64__) + v = *ptr; + __asm__ volatile("" ::: "memory"); +#else + __atomic_load(ptr, &v, __ATOMIC_ACQUIRE); +#endif + return static_cast(v); +} + +} // namespace + +// ============================================================================= +// ChipBootstrapChannel +// ============================================================================= + +ChipBootstrapChannel::ChipBootstrapChannel(void *mailbox, size_t max_buffer_count) : + mailbox_(mailbox), + max_buffer_count_(max_buffer_count) { + if (mailbox_ == nullptr) { + throw std::invalid_argument("mailbox must not be null"); + } + if (max_buffer_count_ > CHIP_BOOTSTRAP_PTR_CAPACITY) { + throw std::invalid_argument("max_buffer_count exceeds CHIP_BOOTSTRAP_PTR_CAPACITY"); + } +} + +void ChipBootstrapChannel::reset() { + std::memset(mailbox_, 0, CHIP_BOOTSTRAP_MAILBOX_SIZE); + write_state(mailbox_, ChipBootstrapMailboxState::IDLE); +} + +void ChipBootstrapChannel::write_success( + uint64_t device_ctx, uint64_t local_window_base, uint64_t actual_window_size, + const std::vector &buffer_ptrs +) { + if (buffer_ptrs.size() > max_buffer_count_) { + throw std::invalid_argument("buffer_ptrs exceeds max_buffer_count"); + } + + auto *base = static_cast(mailbox_); + + int32_t count = static_cast(buffer_ptrs.size()); + std::memcpy(base + CHIP_BOOTSTRAP_OFF_BUFFER_COUNT, &count, sizeof(count)); + std::memcpy(base + CHIP_BOOTSTRAP_OFF_DEVICE_CTX, &device_ctx, sizeof(device_ctx)); + std::memcpy(base + CHIP_BOOTSTRAP_OFF_LOCAL_WINDOW_BASE, &local_window_base, sizeof(local_window_base)); + std::memcpy(base + CHIP_BOOTSTRAP_OFF_ACTUAL_WINDOW_SIZE, &actual_window_size, sizeof(actual_window_size)); + + if (!buffer_ptrs.empty()) { + std::memcpy(base + CHIP_BOOTSTRAP_OFF_BUFFER_PTRS, buffer_ptrs.data(), buffer_ptrs.size() * sizeof(uint64_t)); + } + + write_state(mailbox_, ChipBootstrapMailboxState::SUCCESS); +} + +void ChipBootstrapChannel::write_error(int32_t error_code, const std::string &message) { + auto *base = static_cast(mailbox_); + + std::memcpy(base + CHIP_BOOTSTRAP_OFF_ERROR_CODE, &error_code, sizeof(error_code)); + + size_t max_len = CHIP_BOOTSTRAP_ERROR_MSG_SIZE - 1; + size_t copy_len = message.size() < max_len ? message.size() : max_len; + std::memcpy(base + CHIP_BOOTSTRAP_OFF_ERROR_MSG, message.data(), copy_len); + base[CHIP_BOOTSTRAP_OFF_ERROR_MSG + copy_len] = '\0'; + + write_state(mailbox_, ChipBootstrapMailboxState::ERROR); +} + +ChipBootstrapMailboxState ChipBootstrapChannel::state() const { return read_state(mailbox_); } + +int32_t ChipBootstrapChannel::error_code() const { + auto *base = static_cast(mailbox_); + int32_t v; + std::memcpy(&v, base + CHIP_BOOTSTRAP_OFF_ERROR_CODE, sizeof(v)); + return v; +} + +uint64_t ChipBootstrapChannel::device_ctx() const { + auto *base = static_cast(mailbox_); + uint64_t v; + std::memcpy(&v, base + CHIP_BOOTSTRAP_OFF_DEVICE_CTX, sizeof(v)); + return v; +} + +uint64_t ChipBootstrapChannel::local_window_base() const { + auto *base = static_cast(mailbox_); + uint64_t v; + std::memcpy(&v, base + CHIP_BOOTSTRAP_OFF_LOCAL_WINDOW_BASE, sizeof(v)); + return v; +} + +uint64_t ChipBootstrapChannel::actual_window_size() const { + auto *base = static_cast(mailbox_); + uint64_t v; + std::memcpy(&v, base + CHIP_BOOTSTRAP_OFF_ACTUAL_WINDOW_SIZE, sizeof(v)); + return v; +} + +std::vector ChipBootstrapChannel::buffer_ptrs() const { + auto *base = static_cast(mailbox_); + int32_t raw_count; + std::memcpy(&raw_count, base + CHIP_BOOTSTRAP_OFF_BUFFER_COUNT, sizeof(raw_count)); + + // Ctor guarantees max_buffer_count_ <= CHIP_BOOTSTRAP_PTR_CAPACITY, so clamping + // count against max_buffer_count_ alone is sufficient to keep the read bounded. + size_t count = + raw_count <= 0 ? + 0 : + (static_cast(raw_count) < max_buffer_count_ ? static_cast(raw_count) : max_buffer_count_); + + std::vector ptrs(count); + if (count > 0) { + std::memcpy(ptrs.data(), base + CHIP_BOOTSTRAP_OFF_BUFFER_PTRS, count * sizeof(uint64_t)); + } + return ptrs; +} + +std::string ChipBootstrapChannel::error_message() const { + auto *base = static_cast(mailbox_); + const char *msg_ptr = base + CHIP_BOOTSTRAP_OFF_ERROR_MSG; + // Bound the read against the layout size so a missing null-terminator in + // shared memory (corrupt producer, premature read) can't walk off the page. + size_t len = strnlen(msg_ptr, CHIP_BOOTSTRAP_ERROR_MSG_SIZE); + return std::string(msg_ptr, len); +} diff --git a/src/common/hierarchical/chip_bootstrap_channel.h b/src/common/hierarchical/chip_bootstrap_channel.h new file mode 100644 index 000000000..fb56736d0 --- /dev/null +++ b/src/common/hierarchical/chip_bootstrap_channel.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) PyPTO Contributors. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + * ----------------------------------------------------------------------------------------------------------- + */ + +/** + * ChipBootstrapChannel — one-shot cross-process mailbox for per-chip bootstrap. + * + * Lifecycle: parent allocates a CHIP_BOOTSTRAP_MAILBOX_SIZE shared-memory region, + * child writes SUCCESS/ERROR once, parent polls state() until done. + * Not a general-purpose mailbox — independent of the task-mailbox protocol. + */ + +#pragma once + +#include +#include +#include +#include + +static constexpr size_t CHIP_BOOTSTRAP_MAILBOX_SIZE = 4096; +static constexpr size_t CHIP_BOOTSTRAP_HEADER_SIZE = 64; +static constexpr size_t CHIP_BOOTSTRAP_ERROR_MSG_SIZE = 1024; +static constexpr size_t CHIP_BOOTSTRAP_PTR_CAPACITY = + (CHIP_BOOTSTRAP_MAILBOX_SIZE - CHIP_BOOTSTRAP_HEADER_SIZE - CHIP_BOOTSTRAP_ERROR_MSG_SIZE) / sizeof(uint64_t); + +// Fixed offsets within the mailbox region. +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_STATE = 0; +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_ERROR_CODE = 4; +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_BUFFER_COUNT = 8; +// 4 bytes implicit padding for uint64 alignment +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_DEVICE_CTX = 16; +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_LOCAL_WINDOW_BASE = 24; +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_ACTUAL_WINDOW_SIZE = 32; +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_BUFFER_PTRS = 64; +static constexpr ptrdiff_t CHIP_BOOTSTRAP_OFF_ERROR_MSG = + CHIP_BOOTSTRAP_OFF_BUFFER_PTRS + static_cast(CHIP_BOOTSTRAP_PTR_CAPACITY * sizeof(uint64_t)); + +static_assert( + CHIP_BOOTSTRAP_OFF_ERROR_MSG + static_cast(CHIP_BOOTSTRAP_ERROR_MSG_SIZE) == + static_cast(CHIP_BOOTSTRAP_MAILBOX_SIZE), + "mailbox layout must sum to 4096" +); + +enum class ChipBootstrapMailboxState : int32_t { + IDLE = 0, + SUCCESS = 1, + ERROR = 2, +}; + +class ChipBootstrapChannel { +public: + ChipBootstrapChannel(void *mailbox, size_t max_buffer_count); + + // Write side (child process). + void reset(); + void write_success( + uint64_t device_ctx, uint64_t local_window_base, uint64_t actual_window_size, + const std::vector &buffer_ptrs + ); + void write_error(int32_t error_code, const std::string &message); + + // Read side (parent process). + ChipBootstrapMailboxState state() const; + int32_t error_code() const; + uint64_t device_ctx() const; + uint64_t local_window_base() const; + uint64_t actual_window_size() const; + std::vector buffer_ptrs() const; + std::string error_message() const; + +private: + void *mailbox_; + size_t max_buffer_count_; +}; diff --git a/tests/ut/py/test_worker/test_bootstrap_channel.py b/tests/ut/py/test_worker/test_bootstrap_channel.py new file mode 100644 index 000000000..1712c14c7 --- /dev/null +++ b/tests/ut/py/test_worker/test_bootstrap_channel.py @@ -0,0 +1,153 @@ +# Copyright (c) PyPTO Contributors. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ----------------------------------------------------------------------------------------------------------- +"""Unit tests for ChipBootstrapChannel (L2 bootstrap mailbox). + +All tests run without hardware — pure shared-memory / in-process. +""" + +import ctypes +import os +from multiprocessing.shared_memory import SharedMemory + +import pytest +from _task_interface import ( # pyright: ignore[reportMissingImports] + CHIP_BOOTSTRAP_MAILBOX_SIZE, + ChipBootstrapChannel, + ChipBootstrapMailboxState, +) + + +def _mailbox_addr(shm: SharedMemory) -> int: + """Return the raw memory address of a SharedMemory buffer.""" + buf = shm.buf + assert buf is not None + return ctypes.addressof(ctypes.c_char.from_buffer(buf)) + + +class TestBootstrapChannel: + def test_fresh_channel_state_idle(self): + """Freshly reset channel reads as IDLE.""" + shm = SharedMemory(create=True, size=CHIP_BOOTSTRAP_MAILBOX_SIZE) + try: + ch = ChipBootstrapChannel(_mailbox_addr(shm), max_buffer_count=376) + # buf is zeroed by SharedMemory — state at offset 0 is 0 (IDLE) + assert ch.state == ChipBootstrapMailboxState.IDLE + finally: + shm.close() + shm.unlink() + + def test_write_success_fields(self): + """write_success stores all fields and parent reads them back.""" + shm = SharedMemory(create=True, size=CHIP_BOOTSTRAP_MAILBOX_SIZE) + try: + ch = ChipBootstrapChannel(_mailbox_addr(shm), max_buffer_count=376) + ch.reset() + ch.write_success( + device_ctx=0xDEADBEEFCAFE1234, + local_window_base=0xAABBCCDD00112233, + actual_window_size=65536, + buffer_ptrs=[0x1000, 0x2000, 0x3000], + ) + assert ch.state == ChipBootstrapMailboxState.SUCCESS + assert ch.device_ctx == 0xDEADBEEFCAFE1234 + assert ch.local_window_base == 0xAABBCCDD00112233 + assert ch.actual_window_size == 65536 + assert ch.buffer_ptrs == [0x1000, 0x2000, 0x3000] + finally: + shm.close() + shm.unlink() + + def test_write_error_fields(self): + """write_error stores error_code and message.""" + shm = SharedMemory(create=True, size=CHIP_BOOTSTRAP_MAILBOX_SIZE) + try: + ch = ChipBootstrapChannel(_mailbox_addr(shm), max_buffer_count=376) + ch.reset() + ch.write_error(42, "device not found") + assert ch.state == ChipBootstrapMailboxState.ERROR + assert ch.error_code == 42 + assert ch.error_message == "device not found" + finally: + shm.close() + shm.unlink() + + def test_state_machine_reset(self): + """write_success -> SUCCESS, reset -> IDLE.""" + shm = SharedMemory(create=True, size=CHIP_BOOTSTRAP_MAILBOX_SIZE) + try: + ch = ChipBootstrapChannel(_mailbox_addr(shm), max_buffer_count=376) + ch.reset() + ch.write_success(0, 0, 0, []) + assert ch.state == ChipBootstrapMailboxState.SUCCESS + ch.reset() + assert ch.state == ChipBootstrapMailboxState.IDLE + finally: + shm.close() + shm.unlink() + + def test_cross_process_fork(self): + """Parent allocates shm, forks, child writes, parent reads after fork.""" + shm = SharedMemory(create=True, size=CHIP_BOOTSTRAP_MAILBOX_SIZE) + try: + addr = _mailbox_addr(shm) + pid = os.fork() + if pid == 0: + # Child: wrap same shm and write success. + ch = ChipBootstrapChannel(addr, max_buffer_count=376) + ch.reset() + ch.write_success( + device_ctx=0x1111222233334444, + local_window_base=0x5555666677778888, + actual_window_size=128, + buffer_ptrs=[0xA, 0xB, 0xC, 0xD], + ) + os._exit(0) + else: + # Parent: poll until SUCCESS. + ch = ChipBootstrapChannel(addr, max_buffer_count=376) + while ch.state == ChipBootstrapMailboxState.IDLE: + pass + assert ch.state == ChipBootstrapMailboxState.SUCCESS + assert ch.device_ctx == 0x1111222233334444 + assert ch.local_window_base == 0x5555666677778888 + assert ch.actual_window_size == 128 + assert ch.buffer_ptrs == [0xA, 0xB, 0xC, 0xD] + os.waitpid(pid, 0) + finally: + shm.close() + shm.unlink() + + def test_buffer_ptrs_overflow(self): + """write_success with too many ptrs throws.""" + shm = SharedMemory(create=True, size=CHIP_BOOTSTRAP_MAILBOX_SIZE) + try: + ch = ChipBootstrapChannel(_mailbox_addr(shm), max_buffer_count=376) + ch.reset() + too_many = list(range(377)) + with pytest.raises(ValueError, match="buffer_ptrs exceeds max_buffer_count"): + ch.write_success(0, 0, 0, too_many) + finally: + shm.close() + shm.unlink() + + def test_error_message_truncation(self): + """write_error with >1024 byte message truncates to 1023 + null.""" + shm = SharedMemory(create=True, size=CHIP_BOOTSTRAP_MAILBOX_SIZE) + try: + ch = ChipBootstrapChannel(_mailbox_addr(shm), max_buffer_count=376) + ch.reset() + long_msg = "x" * 2000 + ch.write_error(-1, long_msg) + assert ch.state == ChipBootstrapMailboxState.ERROR + msg = ch.error_message + assert len(msg) == 1023 + assert msg == "x" * 1023 + finally: + shm.close() + shm.unlink()