Skip to content
1 change: 1 addition & 0 deletions cpp/arcticdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ set(arcticdb_srcs
processing/operation_dispatch_binary_operator_minus.cpp
processing/operation_dispatch_binary_operator_times.cpp
processing/operation_dispatch_binary_operator_divide.cpp
processing/operation_dispatch_binary_operator_mod.cpp
processing/operation_dispatch_ternary.cpp
processing/query_planner.cpp
processing/sorted_aggregation.cpp
Expand Down
9 changes: 9 additions & 0 deletions cpp/arcticdb/processing/expression_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ std::variant<BitSetTag, DataType> ExpressionNode::compute(
case OperationType::SUB:
case OperationType::MUL:
case OperationType::DIV:
case OperationType::MOD:
user_input::check<ErrorCode::E_INVALID_USER_ARGUMENT>(
std::holds_alternative<DataType>(left_type),
"Unexpected bitset input as left operand to {}",
Expand Down Expand Up @@ -226,6 +227,14 @@ std::variant<BitSetTag, DataType> ExpressionNode::compute(
res = data_type_from_raw_type<TargetType>();
break;
}
case OperationType::MOD: {
using TargetType = typename binary_operation_promoted_type<
Comment thread
academy-codex marked this conversation as resolved.
typename left_type_info::RawType,
typename right_type_info::RawType,
std::remove_reference_t<ModOperator>>::type;
res = data_type_from_raw_type<TargetType>();
break;
}
default:
internal::raise<ErrorCode::E_ASSERTION_FAILURE>("Unexpected binary operator");
}
Expand Down
2 changes: 2 additions & 0 deletions cpp/arcticdb/processing/operation_dispatch_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ VariantData dispatch_binary(const VariantData& left, const VariantData& right, O
return visit_binary_operator(left, right, TimesOperator{});
case OperationType::DIV:
return visit_binary_operator(left, right, DivideOperator{});
case OperationType::MOD:
return visit_binary_operator(left, right, ModOperator{});
case OperationType::EQ:
return visit_binary_comparator(left, right, EqualsOperator{});
case OperationType::NE:
Expand Down
2 changes: 2 additions & 0 deletions cpp/arcticdb/processing/operation_dispatch_binary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,8 @@ extern template VariantData visit_binary_operator<
arcticdb::TimesOperator>(const VariantData&, const VariantData&, TimesOperator&&);
extern template VariantData visit_binary_operator<
arcticdb::DivideOperator>(const VariantData&, const VariantData&, DivideOperator&&);
extern template VariantData visit_binary_operator<
arcticdb::ModOperator>(const VariantData&, const VariantData&, ModOperator&&);

// instantiated in operation_dispatch_binary_comparator.cpp to reduce compilation memory use
extern template VariantData visit_binary_comparator<
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/*
* Copyright 2026 Man Group Operations Limited
*
* Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
*
* As of the Change Date specified in that file, in accordance with the Business Source License, use of this software
* will be governed by the Apache License, version 2.0.
*/
#include <arcticdb/processing/operation_dispatch_binary.hpp>

namespace arcticdb {
template VariantData visit_binary_operator<ModOperator>(const VariantData&, const VariantData&, ModOperator&&);
}
62 changes: 62 additions & 0 deletions cpp/arcticdb/processing/operation_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <unordered_set>
#include <optional>
#include <cmath>

#include <arcticdb/processing/signed_unsigned_comparison.hpp>
#include <arcticdb/util/constants.hpp>
Expand Down Expand Up @@ -37,6 +38,7 @@ enum class OperationType : uint8_t {
SUB,
MUL,
DIV,
MOD,
// Comparison
EQ,
NE,
Expand Down Expand Up @@ -70,6 +72,7 @@ inline std::string_view operation_type_to_str(const OperationType ot) {
TO_STR(SUB)
TO_STR(MUL)
TO_STR(DIV)
TO_STR(MOD)
TO_STR(EQ)
TO_STR(NE)
TO_STR(LT)
Expand Down Expand Up @@ -103,6 +106,7 @@ struct PlusOperator;
struct MinusOperator;
struct TimesOperator;
struct DivideOperator;
struct ModOperator;
struct MembershipOperator;

namespace arithmetic_promoted_type::details {
Expand Down Expand Up @@ -230,6 +234,24 @@ struct binary_operation_promoted_type {
2 * max_width>>>>>>>>;
};

// Modulo cannot overflow, so no width-doubling is needed (unlike +/-/*).
// For mixed signed/unsigned integers, always use a signed type so that Python/Pandas
// sign semantics (result sign follows divisor) can produce negative results.
template<class LHS, class RHS>
struct binary_operation_promoted_type<LHS, RHS, ModOperator> {
static constexpr size_t max_width = arithmetic_promoted_type::details::max_width_v<LHS, RHS>;
using type = std::conditional_t<
std::is_floating_point_v<LHS> || std::is_floating_point_v<RHS>,
std::conditional_t<
std::is_floating_point_v<LHS> && std::is_floating_point_v<RHS>,
std::conditional_t<max_width == 8, double, float>,
double>,
std::conditional_t<
std::is_unsigned_v<LHS> && std::is_unsigned_v<RHS>,
arithmetic_promoted_type::details::unsigned_width_t<max_width>,
arithmetic_promoted_type::details::signed_width_t<max_width>>>;
};

template<class LHS, class RHS>
struct ternary_operation_promoted_type {
static constexpr size_t max_width = arithmetic_promoted_type::details::max_width_v<LHS, RHS>;
Expand Down Expand Up @@ -356,6 +378,33 @@ struct DivideOperator {
}
};

struct ModOperator {
template<typename T, typename U, typename V = typename binary_operation_promoted_type<T, U, ModOperator>::type>
V apply(T t, U u) {
if constexpr (std::is_floating_point_v<V>) {
const auto lhs = static_cast<V>(t);
const auto rhs = static_cast<V>(u);
// Match Python/Pandas modulo semantics where the result has the sign of the divisor.
auto result = std::fmod(lhs, rhs);
if (result != V{0} && ((rhs < V{0}) != (result < V{0}))) {
result += rhs;
}
return result;
} else {
auto lhs = static_cast<V>(t);
auto rhs = static_cast<V>(u);
auto result = lhs % rhs;
if constexpr (std::is_signed_v<V>) {
// Match Python/Pandas modulo semantics where the result has the sign of the divisor.
if (result != V{0} && ((rhs < V{0}) != (result < V{0}))) {
result += rhs;
}
}
return result;
}
}
};

struct EqualsOperator {
template<typename T, typename U>
bool operator()(T t, U u) const {
Expand Down Expand Up @@ -715,6 +764,19 @@ struct formatter<arcticdb::DivideOperator> {
}
};

template<>
struct formatter<arcticdb::ModOperator> {
template<typename ParseContext>
constexpr auto parse(ParseContext& ctx) {
return ctx.begin();
}

template<typename FormatContext>
constexpr auto format(arcticdb::ModOperator, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "%");
}
};

template<>
struct formatter<arcticdb::EqualsOperator> {
template<typename ParseContext>
Expand Down
48 changes: 47 additions & 1 deletion cpp/arcticdb/processing/test/test_operation_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,52 @@ TEST(OperationDispatch, binary_operator) {
EXPECT_THROW(visit_binary_operator(empty_column, value, PlusOperator{}), SchemaException);
// val + empty col
EXPECT_THROW(visit_binary_operator(value, empty_column, PlusOperator{}), SchemaException);

// int col % val
auto modulo_value = std::make_shared<Value>(static_cast<int64_t>(7), DataType::INT64);
auto variant_data = visit_binary_operator(int_column, modulo_value, ModOperator{});
ASSERT_TRUE(std::holds_alternative<ColumnWithStrings>(variant_data));
auto results_column = std::get<ColumnWithStrings>(variant_data).column_;
for (size_t idx = 0; idx < num_rows; idx++) {
ASSERT_EQ(static_cast<int64_t>(idx) % 7, results_column->scalar_at<int64_t>(idx));
}

auto one = std::make_shared<Value>(static_cast<int64_t>(1), DataType::INT64);
auto nonzero_column_variant = visit_binary_operator(int_column, one, PlusOperator{});
ASSERT_TRUE(std::holds_alternative<ColumnWithStrings>(nonzero_column_variant));
auto nonzero_column = std::get<ColumnWithStrings>(nonzero_column_variant);

// int col % int col (with non-zero divisor column)
auto variant_data_col_col = visit_binary_operator(int_column, nonzero_column, ModOperator{});
ASSERT_TRUE(std::holds_alternative<ColumnWithStrings>(variant_data_col_col));
auto results_column_col_col = std::get<ColumnWithStrings>(variant_data_col_col).column_;
for (size_t idx = 0; idx < num_rows; idx++) {
ASSERT_EQ(static_cast<int64_t>(idx), results_column_col_col->scalar_at<int64_t>(idx));
}

// val % int col.
auto variant_data_val_col = visit_binary_operator(modulo_value, nonzero_column, ModOperator{});
ASSERT_TRUE(std::holds_alternative<ColumnWithStrings>(variant_data_val_col));
auto results_column_val_col = std::get<ColumnWithStrings>(variant_data_val_col).column_;
for (size_t idx = 0; idx < num_rows; idx++) {
ASSERT_EQ(
static_cast<int64_t>(7) % static_cast<int64_t>(idx + 1),
results_column_val_col->scalar_at<int64_t>(idx)
);
}

// Match Python/Pandas behavior for negative floating-point values.
auto minus_three = std::make_shared<Value>(-3.0, DataType::FLOAT64);
auto plus_two = std::make_shared<Value>(2.0, DataType::FLOAT64);
auto variant_data_float = visit_binary_operator(minus_three, plus_two, ModOperator{});
ASSERT_TRUE(std::holds_alternative<std::shared_ptr<Value>>(variant_data_float));
ASSERT_DOUBLE_EQ(std::get<std::shared_ptr<Value>>(variant_data_float)->get<double>(), 1.0);

auto minus_two = std::make_shared<Value>(-2.0, DataType::FLOAT64);
auto variant_data_float_neg_divisor = visit_binary_operator(minus_three, minus_two, ModOperator{});
ASSERT_TRUE(std::holds_alternative<std::shared_ptr<Value>>(variant_data_float_neg_divisor));
ASSERT_DOUBLE_EQ(std::get<std::shared_ptr<Value>>(variant_data_float_neg_divisor)->get<double>(), -1.0);

}

TEST(OperationDispatch, binary_comparator) {
Expand Down Expand Up @@ -145,4 +191,4 @@ TEST(OperationDispatch, binary_membership) {
// empty col isnotin set
ASSERT_TRUE(std::holds_alternative<FullResult>(visit_binary_membership(empty_column, value_set, IsNotInOperator{}))
);
}
}
1 change: 1 addition & 0 deletions cpp/arcticdb/version/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ void register_bindings(py::module& version, py::exception<arcticdb::ArcticExcept
.value("SUB", OperationType::SUB)
.value("MUL", OperationType::MUL)
.value("DIV", OperationType::DIV)
.value("MOD", OperationType::MOD)
.value("EQ", OperationType::EQ)
.value("NE", OperationType::NE)
.value("LT", OperationType::LT)
Expand Down
10 changes: 9 additions & 1 deletion docs/mkdocs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,15 @@ _output (the data filtered by date range, columns and the query which filters ba
2000-01-01 13:00:00 18 8
```

For filtering based on components of a timestamp index (for example, selecting rows where the index minute is `10`), you can use modulo with `Timedelta`:

```python
q = adb.QueryBuilder()
minute_in_hour = q["index"] % pd.Timedelta(hours=1)
q = q[(minute_in_hour >= pd.Timedelta(minutes=10)) & (minute_in_hour < pd.Timedelta(minutes=11))]
library.read("test_frame", query_builder=q).data
```

### Modifications, Versioning (aka Time Travel)

ArcticDB fully supports modifying stored data via two primitives: _update_ and _append_.
Expand Down Expand Up @@ -509,4 +518,3 @@ For concurrent access to a local backend, we recommend LMDB connected to tmpfs,
- So why pay the cost of transactions when they are often not needed?
- ArcticDB doesn't have transactions because it is designed for high throughput analytical workloads


8 changes: 7 additions & 1 deletion python/arcticdb/version_store/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ def __mul__(self, right):
def __truediv__(self, right):
return self._apply(right, _OperationType.DIV)

def __mod__(self, right):
return self._apply(right, _OperationType.MOD)

def __eq__(self, right):
if is_supported_sequence(right):
return self.isin(right)
Expand Down Expand Up @@ -186,6 +189,9 @@ def __rmul__(self, left):
def __rtruediv__(self, left):
return self._rapply(left, _OperationType.DIV)

def __rmod__(self, left):
return self._rapply(left, _OperationType.MOD)

def __rand__(self, left):
if left is True:
return self
Expand Down Expand Up @@ -435,7 +441,7 @@ class QueryBuilder:

Supported arithmetic operations when projection or filtering:

* Binary arithmetic: +, -, *, /
* Binary arithmetic: +, -, *, /, %
* Unary arithmetic: -, abs

Supported filtering operations:
Expand Down
16 changes: 16 additions & 0 deletions python/tests/unit/arcticdb/version_store/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,22 @@ def test_filter_datetime_timedelta(lmdb_version_store_v1, any_output_format):
assert True


def test_filter_datetime_index_by_minute_with_modulo(lmdb_version_store_tiny_segment, any_output_format):
lib = lmdb_version_store_tiny_segment
lib._set_output_format_for_pipeline_tests(any_output_format)
symbol = "test_filter_datetime_index_by_minute_with_modulo"
index = pd.date_range("2024-01-01", periods=180, freq="min")
df = pd.DataFrame({"col": np.arange(index.shape[0], dtype=np.int64)}, index=index)
lib.write(symbol, df)

q = QueryBuilder()
minute_in_hour = q["index"] % pd.Timedelta(hours=1)
q = q[(minute_in_hour >= pd.Timedelta(minutes=10)) & (minute_in_hour < pd.Timedelta(minutes=11))]

expected = df[df.index.minute == 10]
generic_filter_test(lib, symbol, q, expected)


def test_filter_datetime_timezone_aware(lmdb_version_store_v1, any_output_format):
lib = lmdb_version_store_v1
lib._set_output_format_for_pipeline_tests(any_output_format)
Expand Down
Loading