diff --git a/src/JuMP.jl b/src/JuMP.jl index 98a37e60aa8..aa48f6ebc44 100644 --- a/src/JuMP.jl +++ b/src/JuMP.jl @@ -138,6 +138,8 @@ mutable struct GenericModel{T<:Real} <: AbstractModel # A dictionary to store timing information from the JuMP macros. enable_macro_timing::Bool macro_times::Dict{Tuple{LineNumberNode,String},Float64} + # We use `Any` as key because we haven't defined `GenericNonlinearExpr` yet + subexpressions::Dict{Any,MOI.ScalarNonlinearFunction} end value_type(::Type{GenericModel{T}}) where {T} = T @@ -251,6 +253,7 @@ function direct_generic_model( Dict{Any,MOI.ConstraintIndex}(), false, Dict{Tuple{LineNumberNode,String},Float64}(), + Dict{Any,MOI.ScalarNonlinearFunction}(), ) end diff --git a/src/constraints.jl b/src/constraints.jl index 157907ccd5e..1f9561612f5 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -760,6 +760,10 @@ function moi_function(constraint::AbstractConstraint) return moi_function(jump_function(constraint)) end +function moi_function(constraint::AbstractConstraint, model) + return moi_function(jump_function(constraint), model) +end + """ moi_set(constraint::AbstractConstraint) @@ -1016,6 +1020,17 @@ function _moi_add_constraint( return MOI.add_constraint(model, f, s) end +function check_belongs_to_model(f::Vector, model) + for func in f + check_belongs_to_model(func, model) + end +end + +function moi_function(f, model) + check_belongs_to_model(f, model) + return moi_function(f) +end + """ add_constraint( model::GenericModel, @@ -1032,10 +1047,9 @@ function add_constraint( name::String = "", ) con = model_convert(model, con) + func, set = moi_function(con, model), moi_set(con) # The type of backend(model) is unknown so we directly redirect to another # function. - check_belongs_to_model(con, model) - func, set = moi_function(con), moi_set(con) cindex = _moi_add_constraint( backend(model), func, diff --git a/src/nlp_expr.jl b/src/nlp_expr.jl index 31e243f2f12..c4ef7ceec0a 100644 --- a/src/nlp_expr.jl +++ b/src/nlp_expr.jl @@ -569,18 +569,31 @@ end moi_function(x::Number) = x -function moi_function(f::GenericNonlinearExpr{V}) where {V} +function moi_function( + f::GenericNonlinearExpr{V}, + model::JuMP.GenericModel, +) where {V} + cache = model.subexpressions + if haskey(cache, f) + return cache[f] + end ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args)) stack = Tuple{MOI.ScalarNonlinearFunction,Int,GenericNonlinearExpr{V}}[] for i in length(f.args):-1:1 if f.args[i] isa GenericNonlinearExpr{V} push!(stack, (ret, i, f.args[i])) + elseif f.args[i] isa AbstractJuMPScalar + ret.args[i] = moi_function(f.args[i], model) else ret.args[i] = moi_function(f.args[i]) end end while !isempty(stack) parent, i, arg = pop!(stack) + if haskey(cache, arg) + parent.args[i] = cache[arg] + continue + end child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args)) parent.args[i] = child for j in length(arg.args):-1:1 @@ -590,7 +603,9 @@ function moi_function(f::GenericNonlinearExpr{V}) where {V} child.args[j] = moi_function(arg.args[j]) end end + cache[arg] = child end + cache[f] = ret return ret end diff --git a/src/objective.jl b/src/objective.jl index c9dcbbde3ac..6e493045889 100644 --- a/src/objective.jl +++ b/src/objective.jl @@ -277,7 +277,7 @@ end function set_objective_function(model::GenericModel, func::AbstractJuMPScalar) check_belongs_to_model(func, model) - set_objective_function(model, moi_function(func)) + set_objective_function(model, moi_function(func, model)) return end @@ -296,7 +296,7 @@ function set_objective_function( for f in func check_belongs_to_model(f, model) end - set_objective_function(model, moi_function(func)) + set_objective_function(model, moi_function(func, model)) return end diff --git a/test/perf/benchmark_cache.jl b/test/perf/benchmark_cache.jl new file mode 100644 index 00000000000..c930ca8cdf2 --- /dev/null +++ b/test/perf/benchmark_cache.jl @@ -0,0 +1,331 @@ +# Benchmark for PR #4032: caching strategies for moi_function on +# GenericNonlinearExpr. +# +# Four modes are compared: +# :none — master behaviour: NO cache. `add_constraint` runs +# `check_belongs_to_model` and then `moi_function`, both +# of which traverse the expression independently. +# :per_call_oid — odow's actual PR snippet: a fresh `Dict{UInt64, ...}` +# allocated INSIDE every call to `moi_function`, keyed by +# `objectid(arg)` (so cache lookups are O(1)). +# `check_belongs_to_model` still runs as a separate walk. +# :model_struct — this branch as-is: cache stored on the model, keyed by +# the JuMP `GenericNonlinearExpr` itself. Default `==`/ +# `hash` on that struct is structural and walks the whole +# sub-tree, so a cache hit still costs O(size) — which +# can defeat the cache for deeply aliased DAGs. +# :model_oid — model-level cache, but keyed by `objectid` like odow's +# snippet. Shows what the branch could evolve to once +# blegat's "use hash as keys" suggestion is applied. +# +# Run from the repo root, in an environment that has BenchmarkTools added on +# top of dev'd JuMP (do NOT add BenchmarkTools to JuMP's own Project.toml): +# +# julia> using Pkg +# julia> Pkg.activate(temp = true) +# julia> Pkg.develop(path = ".") +# julia> Pkg.add("BenchmarkTools") +# julia> include("benchmark_cache.jl"); run_all() +# +# The script monkey-patches `JuMP.moi_function(::GenericNonlinearExpr, +# ::GenericModel)` so the same JuMP build can be exercised under each mode +# without rebuilding the package. +# +# Sample numbers from this branch (Julia 1.12, K=14, etc.): +# +# A: aliased tree (one big DAG, K=14) ~46 ms — all four modes within +# 1% (MOI-tree alloc cost is +# small vs. the rest of +# add_constraint here). +# B: many independent constraints (N=5000) none 84 / per_call_oid 86 / +# model_struct 91 / model_oid 86 +# → the *current branch* is the +# slowest; struct keys hurt the +# common case. Switching to +# objectid keys fixes it. +# C: shared big subexpr (N=200, M=200) model_struct 171 / model_oid 170 +# vs none 191 / per_call_oid 195 +# → 12–14% win for the model- +# level cache. Per-call cannot +# see the cross-constraint +# sharing. +# D: many aliased trees (M=200, K=8) all ~140 ms (per-constraint +# sharing is small and JuMP's +# `+` would flatten without the +# explicit GenericNonlinearExpr +# construction we use here). +# +# Takeaways: +# * The model-level scope is the right call when subexpressions are shared +# across constraints (scenario C). +# * Using the JuMP struct as the cache key (current branch) is what costs +# in scenario B — the deep `==`/`hash` adds overhead per node. Switching +# to `objectid` keys (blegat's "use hash as keys" suggestion) recovers +# most of it. +# * Scenario A's exponential blow-up is more visible in MEMORY than in +# TIME at K=14; bump K higher (or use the bench.jl example with +# |R|*|S| ≈ 3600) to make the time gap obvious. + +using Printf +using JuMP +using BenchmarkTools +import MathOptInterface as MOI + +const _G = JuMP.GenericNonlinearExpr + +# --------------------------------------------------------------------------- +# Three implementations of the GenericNonlinearExpr → ScalarNonlinearFunction +# walk. They share the same skeleton; only the cache differs. +# --------------------------------------------------------------------------- + +function _moi_no_cache(f::_G{V}) where {V} + ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args)) + stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[] + for i in length(f.args):-1:1 + if f.args[i] isa _G{V} + push!(stack, (ret, i, f.args[i])) + else + ret.args[i] = JuMP.moi_function(f.args[i]) + end + end + while !isempty(stack) + parent, i, arg = pop!(stack) + child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args)) + parent.args[i] = child + for j in length(arg.args):-1:1 + if arg.args[j] isa _G{V} + push!(stack, (child, j, arg.args[j])) + else + child.args[j] = JuMP.moi_function(arg.args[j]) + end + end + end + return ret +end + +# Cache walk parameterised by the `keyfn` that maps a JuMP NL expression to +# the dict key. `keyfn = identity` matches the branch (struct keys, deep +# hash). `keyfn = objectid` matches odow's snippet (UInt64 keys, O(1) hash). +function _moi_with_cache(f::_G{V}, cache, keyfn::F) where {V,F} + fk = keyfn(f) + if haskey(cache, fk) + return cache[fk] + end + ret = MOI.ScalarNonlinearFunction(f.head, similar(f.args)) + stack = Tuple{MOI.ScalarNonlinearFunction,Int,_G{V}}[] + for i in length(f.args):-1:1 + if f.args[i] isa _G{V} + push!(stack, (ret, i, f.args[i])) + else + ret.args[i] = JuMP.moi_function(f.args[i]) + end + end + while !isempty(stack) + parent, i, arg = pop!(stack) + argk = keyfn(arg) + if haskey(cache, argk) + parent.args[i] = cache[argk] + continue + end + child = MOI.ScalarNonlinearFunction(arg.head, similar(arg.args)) + parent.args[i] = child + for j in length(arg.args):-1:1 + if arg.args[j] isa _G{V} + push!(stack, (child, j, arg.args[j])) + else + child.args[j] = JuMP.moi_function(arg.args[j]) + end + end + cache[argk] = child + end + cache[fk] = ret + return ret +end + +# --------------------------------------------------------------------------- +# Switching mechanism. We re-define JuMP's `moi_function(::GenericNonlinearExpr, +# ::GenericModel)` so that `add_constraint` dispatches differently per mode. +# +# Modes :none and :per_call mirror what those PR options would actually +# look like in production: a separate `check_belongs_to_model` walk, then +# the (cached or uncached) build walk. +# Mode :model_level matches the branch: a single integrated walk. +# --------------------------------------------------------------------------- + +const BENCH_MODE = Ref(:model_struct) + +# Per-mode caches that need a UInt64 (objectid) key type rather than the +# branch's `Dict{Any,...}`. We store them on the model as a side table so +# they survive across constraints inside one build but are cleared between +# builds (because each scenario builds a fresh `Model()`). +const _OID_CACHE = IdDict{JuMP.GenericModel,Dict{UInt64,MOI.ScalarNonlinearFunction}}() + +function _oid_cache(model) + get!(_OID_CACHE, model) do + Dict{UInt64,MOI.ScalarNonlinearFunction}() + end +end + +@eval JuMP function moi_function( + f::JuMP.GenericNonlinearExpr{V}, + model::JuMP.GenericModel, +) where {V} + mode = Main.BENCH_MODE[] + if mode === :none + JuMP.check_belongs_to_model(f, model) + return Main._moi_no_cache(f) + elseif mode === :per_call_oid + JuMP.check_belongs_to_model(f, model) + cache = Dict{UInt64,$(MOI.ScalarNonlinearFunction)}() + return Main._moi_with_cache(f, cache, objectid) + elseif mode === :model_struct + # branch behaviour: belongs-check is integrated; struct keys (deep hash) + return Main._moi_with_cache(f, model.subexpressions, identity) + else # :model_oid + return Main._moi_with_cache(f, Main._oid_cache(model), objectid) + end +end + +# --------------------------------------------------------------------------- +# Scenarios. Each `scenario_*` returns a 0-arg closure that builds a fresh +# model from scratch, so each BenchmarkTools sample sees an empty +# `model.subexpressions` cache. +# --------------------------------------------------------------------------- + +# Scenario A — odow's case: ONE constraint with a deeply aliased binary tree. +# `e_k = e_{k-1} + e_{k-1}`. With no cache this expands to 2^K leaves; with +# either cache it stays linear in K. +# +# We build the `+` node directly via the GenericNonlinearExpr constructor so +# JuMP's `+` operator overload does NOT flatten the n-ary sum. Without this, +# `e + e` returns a flat `:+(args...)` of 2^K identical leaf references and +# the master walk only iterates 2^K times instead of doing 2^K *recursive* +# descents through K levels — which would mask the worst case the PR is +# meant to fix. +function scenario_aliased_tree(; K::Int = 14) + V = JuMP.VariableRef + return function () + model = Model() + @variable(model, x) + e = sin(x) + for _ in 1:K + e = JuMP.GenericNonlinearExpr{V}(:+, e, e) + end + @constraint(model, e <= 1) + return model + end +end + +# Scenario B — many small INDEPENDENT NL constraints. No subexpression +# sharing within or across constraints. The model-level cache pays an +# insertion + hash cost for every new node; the per-call cache pays an +# extra Dict allocation per constraint; master pays nothing. +# +# This is the case blegat warned about in the PR thread: "we start +# creating many small dictionaries if there are a lot of constraints, +# but that's probably negligible". The benchmark tells us whether it +# really is negligible. +function scenario_many_independent(; N::Int = 5_000) + return function () + model = Model() + @variable(model, x[1:N]) + for i in 1:N + @constraint(model, sin(x[i]) + cos(x[i]) * exp(x[i]) <= 1) + end + return model + end +end + +# Scenario C — many constraints sharing one big subexpression. +# `big = sum(sin(x[i]) for i in 1:N)` appears once in each of M constraints. +# The per-call cache cannot help here (each call sees `big` only once); only +# the model-level cache deduplicates `big` across constraints. This is the +# case that genuinely motivates the model-level cache. +function scenario_shared_big(; N::Int = 200, M::Int = 200) + return function () + model = Model() + @variable(model, x[1:N]) + big = sum(sin(x[i]) for i in 1:N) + for j in 1:M + @constraint(model, big * x[j] <= 1) + end + return model + end +end + +# Scenario D — many constraints, each with internal aliasing (a smaller +# binary tree per constraint, distinct leaf variable per constraint). +# Both per-call and model-level caches help WITHIN each constraint; +# across constraints there is nothing to share. This is the natural +# territory of odow's per-call dict. +function scenario_many_aliased(; M::Int = 200, K::Int = 8) + V = JuMP.VariableRef + return function () + model = Model() + @variable(model, x[1:M]) + for i in 1:M + e = sin(x[i]) + for _ in 1:K + e = JuMP.GenericNonlinearExpr{V}(:+, e, e) + end + @constraint(model, e <= 1) + end + return model + end +end + +# --------------------------------------------------------------------------- +# Runner +# --------------------------------------------------------------------------- + +const SCENARIOS = [ + ("A: aliased tree (one big DAG, K=14)", scenario_aliased_tree(K = 14)), + ("B: many independent constraints (N=5000)", scenario_many_independent(N = 5_000)), + ("C: shared big subexpr (N=200, M=200)", scenario_shared_big(N = 200, M = 200)), + ("D: many aliased trees (M=200, K=8)", scenario_many_aliased(M = 200, K = 8)), +] + +const MODES = [:none, :per_call_oid, :model_struct, :model_oid] + +function run_all() + println("Benchmarking PR #4032 cache strategies\n") + for (label, build) in SCENARIOS + println("="^72) + println(label) + println("="^72) + # Warm up each mode (compile its method specializations) before + # timing, so the first sample isn't biased by JIT. + for m in MODES + BENCH_MODE[] = m + try + build() + catch err + @warn "warm-up failed for mode $m" exception = err + end + end + results = Dict{Symbol,Any}() + for m in MODES + BENCH_MODE[] = m + b = @benchmark $build() samples = 5 evals = 1 seconds = 30 + results[m] = b + t_ms = minimum(b).time / 1e6 + mem_mb = minimum(b).memory / 1024^2 + allocs = minimum(b).allocs + @printf(" %-13s %10.2f ms %9.2f MiB %12d allocs\n", + string(m), t_ms, mem_mb, allocs) + end + # Ratios versus the branch's model_struct baseline so we can see + # where each strategy wins, ties, or loses. + base = minimum(results[:model_struct]).time + println() + for m in MODES + r = minimum(results[m]).time / base + @printf(" ratio(%-13s / model_struct) = %.2f\n", string(m), r) + end + println() + end +end + +if abspath(PROGRAM_FILE) == @__FILE__ + run_all() +end