Skip to content
Open
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 46 additions & 12 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ def sum(

res = self.__class__(self._sum(self, dim=dim), self.model)

if drop_zeros:
if drop_zeros and isinstance(res, LinearExpression):
res = res.densify_terms()

return res
Expand Down Expand Up @@ -1099,32 +1099,66 @@ def empty(self) -> EmptyDeprecationWrapper:

def densify_terms(self: GenericExpression) -> GenericExpression:
"""
Move all non-zero term entries to the front and cut off all-zero
Move all active term entries to the front and cut off dead
entries in the term-axis.

A term is considered active if its variable label is not -1
(masked/missing) and its coefficient is not 0.
"""
data = self.data.transpose(..., TERM_DIM)

vdata = data.vars.data
cdata = data.coeffs.data
axis = cdata.ndim - 1
nnz = np.nonzero(cdata)
nterm = (cdata != 0).sum(axis).max()

# A term is alive if it has a valid variable AND nonzero coefficient
alive = (vdata != -1) & (cdata != 0)
nterm_per_cell = alive.sum(axis)
Comment on lines +1114 to +1116
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

NaN coefficients may be incorrectly treated as alive terms.

The condition cdata != 0 evaluates to True for NaN values in NumPy:

>>> import numpy as np
>>> np.nan != 0
True

If a coefficient is NaN (indicating a masked/invalid term), the term would still be considered "alive" as long as vdata != -1. This could lead to keeping invalid terms during densification.

🛡️ Proposed fix to explicitly exclude NaN coefficients
         # A term is alive if it has a valid variable AND nonzero coefficient
-        alive = (vdata != -1) & (cdata != 0)
+        alive = (vdata != -1) & (cdata != 0) & ~np.isnan(cdata)
🤖 Prompt for AI Agents
In `@linopy/expressions.py` around lines 1114 - 1116, The logic that computes
alive terms treats NaN coefficients as nonzero (so NaNs are considered alive);
update the alive condition in the block that defines alive, vdata, cdata and
nterm_per_cell to explicitly exclude NaNs (e.g., require coefficients to be
finite/non-NaN in addition to being != 0), using numpy isfinite/isnan helpers to
combine with the existing (vdata != -1) and (cdata != 0) tests so NaN
coefficients are not counted as alive.

if nterm_per_cell.size == 0 or nterm_per_cell.max() == 0:
return self.__class__(data.sel({TERM_DIM: slice(0, 1)}), self.model)
nterm = int(nterm_per_cell.max())

# Nothing to compact if all term slots are already used
if nterm == cdata.shape[axis]:
return self

nnz = np.nonzero(alive)
mod_nnz = list(nnz)
mod_nnz.pop(axis)

if not mod_nnz:
# Scalar case (only _term dimension)
new_index = np.arange(len(nnz[0]))
mod_nnz.insert(axis, new_index)
new_vdata = np.full_like(vdata, -1)
new_vdata[tuple(mod_nnz)] = vdata[nnz]
data.vars.data = new_vdata
new_cdata = np.zeros_like(cdata)
new_cdata[tuple(mod_nnz)] = cdata[nnz]
data.coeffs.data = new_cdata
return self.__class__(data.sel({TERM_DIM: slice(0, nterm)}), self.model)

remaining_axes = np.vstack(mod_nnz).T
_, idx_ = np.unique(remaining_axes, axis=0, return_inverse=True)
idx = list(idx_)
new_index = np.array([idx[:i].count(j) for i, j in enumerate(idx)])
# Vectorized cumulative count within groups (O(n log n) vs O(n²))
order = np.argsort(idx_, kind="mergesort")
sorted_idx = idx_[order]
group_pos = np.ones(len(sorted_idx), dtype=np.intp)
group_pos[0] = 0
group_pos[1:] = (sorted_idx[1:] != sorted_idx[:-1]).astype(np.intp)
np.cumsum(group_pos, out=group_pos)
group_pos -= group_pos[np.searchsorted(sorted_idx, sorted_idx, side="left")]
new_index = np.empty_like(group_pos)
new_index[order] = group_pos
Comment on lines +1143 to +1152
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# First, verify the cumcount logic bug with the test script

python3 << 'EOF'
import numpy as np

# Simulate the current implementation
idx_ = np.array([0, 0, 0, 1, 1, 2])  # Already sorted for simplicity
order = np.argsort(idx_, kind="mergesort")
sorted_idx = idx_[order]

# Current buggy implementation
group_pos = np.ones(len(sorted_idx), dtype=np.intp)
group_pos[0] = 0
group_pos[1:] = (sorted_idx[1:] != sorted_idx[:-1]).astype(np.intp)
np.cumsum(group_pos, out=group_pos)
group_pos -= group_pos[np.searchsorted(sorted_idx, sorted_idx, side="left")]
new_index_buggy = np.empty_like(group_pos)
new_index_buggy[order] = group_pos

# Correct implementation
positions = np.arange(len(sorted_idx), dtype=np.intp)
group_starts = np.searchsorted(sorted_idx, sorted_idx, side="left")
group_pos_correct = positions - group_starts
new_index_correct = np.empty_like(group_pos_correct)
new_index_correct[order] = group_pos_correct

expected = np.array([0, 1, 2, 0, 1, 0])

print(f"Input groups: {sorted_idx}")
print(f"Expected positions: {expected}")
print(f"Current impl result: {new_index_buggy}")
print(f"Fixed impl result: {new_index_correct}")
print(f"Current impl correct: {np.array_equal(new_index_buggy, expected)}")
print(f"Fixed impl correct: {np.array_equal(new_index_correct, expected)}")
EOF

Repository: FBumann/linopy

Length of output: 175


🏁 Script executed:

cat -n linopy/expressions.py | sed -n '1140,1165p'

Repository: FBumann/linopy

Length of output: 1329


🏁 Script executed:

fd -e py -path "*test*" | xargs rg -l "densify_terms" 2>/dev/null

Repository: FBumann/linopy

Length of output: 287


🏁 Script executed:

fd -type f -name "*.py" | xargs rg "densify_terms" -A 10 -B 2 | head -100

Repository: FBumann/linopy

Length of output: 3158


🏁 Script executed:

fd -type f -name "*.py" -path "*test*" | head -20

Repository: FBumann/linopy

Length of output: 228


🏁 Script executed:

rg "def test.*densify" -A 10 -B 2

Repository: FBumann/linopy

Length of output: 40


🏁 Script executed:

cat -n linopy/expressions.py | sed -n '1100,1155p'

Repository: FBumann/linopy

Length of output: 2740


🏁 Script executed:

# Install numpy to run the verification script
pip install numpy -q

# Run the verification script
python3 << 'EOF'
import numpy as np

# Simulate the current implementation
idx_ = np.array([0, 0, 0, 1, 1, 2])  # Already sorted for simplicity
order = np.argsort(idx_, kind="mergesort")
sorted_idx = idx_[order]

print("="*60)
print("TRACING THE CURRENT IMPLEMENTATION:")
print("="*60)
print(f"Input groups (sorted_idx): {sorted_idx}")

# Current buggy implementation - step by step
group_pos = np.ones(len(sorted_idx), dtype=np.intp)
print(f"Step 1 - ones: {group_pos}")

group_pos[0] = 0
print(f"Step 2 - set [0]=0: {group_pos}")

group_pos[1:] = (sorted_idx[1:] != sorted_idx[:-1]).astype(np.intp)
print(f"Step 3 - group boundaries: {group_pos}")

np.cumsum(group_pos, out=group_pos)
print(f"Step 4 - after cumsum: {group_pos}")

search_indices = np.searchsorted(sorted_idx, sorted_idx, side="left")
print(f"Step 5 - searchsorted indices: {search_indices}")

group_pos_before = group_pos.copy()
group_pos -= group_pos[search_indices]
print(f"Step 6 - after subtraction: {group_pos}")
print(f"         (subtracted {group_pos_before[search_indices]} from {group_pos_before})")

new_index_buggy = np.empty_like(group_pos)
new_index_buggy[order] = group_pos
print(f"Step 7 - after unsort: {new_index_buggy}")

print("\n" + "="*60)
print("CORRECT IMPLEMENTATION:")
print("="*60)

positions = np.arange(len(sorted_idx), dtype=np.intp)
print(f"Positions: {positions}")

group_starts = np.searchsorted(sorted_idx, sorted_idx, side="left")
print(f"Group starts: {group_starts}")

group_pos_correct = positions - group_starts
print(f"Group positions (correct): {group_pos_correct}")

new_index_correct = np.empty_like(group_pos_correct)
new_index_correct[order] = group_pos_correct
print(f"After unsort: {new_index_correct}")

print("\n" + "="*60)
print("COMPARISON:")
print("="*60)
expected = np.array([0, 1, 2, 0, 1, 0])
print(f"Expected:            {expected}")
print(f"Current impl result: {new_index_buggy}")
print(f"Fixed impl result:   {new_index_correct}")
print(f"\nCurrent impl correct: {np.array_equal(new_index_buggy, expected)}")
print(f"Fixed impl correct:   {np.array_equal(new_index_correct, expected)}")
EOF

Repository: FBumann/linopy

Length of output: 1386


🏁 Script executed:

fd -type f -name "*.py" | xargs grep -l "densify_terms" 2>/dev/null

Repository: FBumann/linopy

Length of output: 230


🏁 Script executed:

rg "densify_terms" -B 3 -A 15 --type py

Repository: FBumann/linopy

Length of output: 2070


🏁 Script executed:

# Check for test files that test densify_terms
fd -type f -name "*.py" | xargs grep -l "test.*densify_terms\|densify_terms.*test" 2>/dev/null | head -5

Repository: FBumann/linopy

Length of output: 228


🏁 Script executed:

python3 << 'EOF'
import numpy as np

# Test with a more realistic example where groups are not in order
print("="*60)
print("TEST 2: Unsorted groups (realistic scenario)")
print("="*60)

# Groups: [0, 1, 0, 2, 1, 1]
idx_ = np.array([0, 1, 0, 2, 1, 1])
order = np.argsort(idx_, kind="mergesort")
sorted_idx = idx_[order]

print(f"Input (unsorted): {idx_}")
print(f"Sort order: {order}")
print(f"Sorted groups: {sorted_idx}")

# Current implementation
group_pos = np.ones(len(sorted_idx), dtype=np.intp)
group_pos[0] = 0
group_pos[1:] = (sorted_idx[1:] != sorted_idx[:-1]).astype(np.intp)
np.cumsum(group_pos, out=group_pos)
group_pos -= group_pos[np.searchsorted(sorted_idx, sorted_idx, side="left")]
new_index_buggy = np.empty_like(group_pos)
new_index_buggy[order] = group_pos

# Correct implementation
positions = np.arange(len(sorted_idx), dtype=np.intp)
group_starts = np.searchsorted(sorted_idx, sorted_idx, side="left")
group_pos_correct = positions - group_starts
new_index_correct = np.empty_like(group_pos_correct)
new_index_correct[order] = group_pos_correct

# Expected: cumulative count within groups
# Groups after sort: [0, 0, 1, 1, 1, 2]
# Positions: [0, 1, 0, 1, 2, 0]
expected = np.array([0, 1, 0, 1, 2, 0])

print(f"\nExpected result (unsorted):   {expected}")
print(f"Current impl result (unsorted): {new_index_buggy}")
print(f"Fixed impl result (unsorted):   {new_index_correct}")
print(f"\nCurrent impl matches expected: {np.array_equal(new_index_buggy, expected)}")
print(f"Fixed impl matches expected:   {np.array_equal(new_index_correct, expected)}")
EOF

Repository: FBumann/linopy

Length of output: 523


🏁 Script executed:

# Let me verify the expected result calculation for TEST 2
python3 << 'EOF'
import numpy as np

idx_ = np.array([0, 1, 0, 2, 1, 1])
order = np.argsort(idx_, kind="mergesort")
sorted_idx = idx_[order]

print("Tracing TEST 2 calculation:")
print(f"Original (unsorted) idx_: {idx_}")
print(f"Sort indices: {order}")
print(f"Sorted idx_: {sorted_idx}")

print("\nPositions in sorted order:")
for i, (original_pos, group_id) in enumerate(zip(order, sorted_idx)):
    print(f"  Sorted pos {i}: came from original pos {original_pos}, group {group_id}")

print("\nExpected positions within each group (sorted order):")
sorted_positions = np.array([0, 1, 0, 1, 2, 0])
print(f"  {sorted_positions}")

print("\nMapping back to original order using new_index[order] = group_pos:")
new_index_correct = np.empty(len(order), dtype=np.intp)
new_index_correct[order] = sorted_positions
print(f"  Original order result: {new_index_correct}")

# This is what pandas.groupby().cumcount() should produce
from scipy import stats
groups_df = idx_
cumcount_expected = np.zeros(len(idx_), dtype=np.intp)
for group_id in np.unique(idx_):
    mask = idx_ == group_id
    cumcount_expected[mask] = np.arange(np.sum(mask))

print(f"\nExpected (cumcount per group): {cumcount_expected}")
print(f"Fixed impl produces:           {new_index_correct}")
print(f"Match: {np.array_equal(new_index_correct, cumcount_expected)}")
EOF

Repository: FBumann/linopy

Length of output: 821


Fix critical bug in vectorized cumulative count logic (lines 1143-1152).

The algorithm to compute position-within-group produces incorrect results. Tracing with sorted_idx = [0, 0, 0, 1, 1, 2] (expected positions [0, 1, 2, 0, 1, 0]):

  1. After step-by-step execution, cumsum yields [0, 0, 0, 1, 1, 2] (group IDs, not positions)
  2. searchsorted returns [0, 0, 0, 3, 3, 5]
  3. Subtraction yields [0, 0, 0, 0, 0, 0] ❌ (all zeros—incorrect)

The correct approach uses position arithmetic:

-        # Vectorized cumulative count within groups (O(n log n) vs O(n²))
-        order = np.argsort(idx_, kind="mergesort")
-        sorted_idx = idx_[order]
-        group_pos = np.ones(len(sorted_idx), dtype=np.intp)
-        group_pos[0] = 0
-        group_pos[1:] = (sorted_idx[1:] != sorted_idx[:-1]).astype(np.intp)
-        np.cumsum(group_pos, out=group_pos)
-        group_pos -= group_pos[np.searchsorted(sorted_idx, sorted_idx, side="left")]
-        new_index = np.empty_like(group_pos)
-        new_index[order] = group_pos
+        # Vectorized cumulative count within groups (O(n log n) vs O(n²))
+        order = np.argsort(idx_, kind="mergesort")
+        sorted_idx = idx_[order]
+        positions = np.arange(len(sorted_idx), dtype=np.intp)
+        group_starts = np.searchsorted(sorted_idx, sorted_idx, side="left")
+        group_pos = positions - group_starts
+        new_index = np.empty_like(group_pos)
+        new_index[order] = group_pos

This produces [0, 1, 2, 0, 1, 0] as expected. All downstream index assignments in mod_nnz will be incorrect without this fix.

🤖 Prompt for AI Agents
In `@linopy/expressions.py` around lines 1143 - 1152, The current vectorized
cumulative-count produces group IDs instead of positions because you subtract
group_pos at searchsorted(sorted_idx, sorted_idx) rather than the index of the
first occurrence per group; fix by computing group_ids =
np.cumsum((sorted_idx[1:] != sorted_idx[:-1]).astype(np.intp), prepend=0) (use
the existing group_pos/breaks logic to get group_ids), compute first_indices =
np.searchsorted(sorted_idx, np.unique(sorted_idx), side="left")[group_ids], then
set positions = np.arange(len(sorted_idx), dtype=group_pos.dtype) -
first_indices and finally assign new_index[order] = positions so new_index (used
later in mod_nnz) contains correct within-group positions for idx_, order,
sorted_idx, group_pos, new_index.

mod_nnz.insert(axis, new_index)

vdata = np.full_like(cdata, -1)
vdata[tuple(mod_nnz)] = data.vars.data[nnz]
data.vars.data = vdata
new_vdata = np.full_like(vdata, -1)
new_vdata[tuple(mod_nnz)] = vdata[nnz]
data.vars.data = new_vdata

cdata = np.zeros_like(cdata)
cdata[tuple(mod_nnz)] = data.coeffs.data[nnz]
data.coeffs.data = cdata
new_cdata = np.zeros_like(cdata)
new_cdata[tuple(mod_nnz)] = cdata[nnz]
data.coeffs.data = new_cdata

return self.__class__(data.sel({TERM_DIM: slice(0, nterm)}), self.model)

Expand Down
Loading