diff --git a/src/llama-context.cpp b/src/llama-context.cpp index ee0c29235cd..9defa4771b5 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -3493,11 +3493,11 @@ void llama_perf_context_reset(llama_context * ctx) { ctx->perf_reset(); } -void llama_memory_breakdown_print(const struct llama_context * ctx) { - const auto & devices = ctx->get_model().devices; - - std::map memory_breakdown = ctx->memory_breakdown(); +void llama_memory_breakdown_print_impl( + const struct llama_context * ctx, + const std::map & memory_breakdown) { + const auto & devices = ctx->get_model().devices; std::vector> table_data; table_data.reserve(devices.size()); const std::string template_header = "%s: | %s | %s %s %s %s %s %s %s |\n"; @@ -3629,6 +3629,11 @@ void llama_memory_breakdown_print(const struct llama_context * ctx) { } } +void llama_memory_breakdown_print(const struct llama_context * ctx) { + const auto memory_breakdown = ctx->memory_breakdown(); + llama_memory_breakdown_print_impl(ctx, memory_breakdown); +} + // // training // diff --git a/src/llama-context.h b/src/llama-context.h index e0d0085c1c3..d65b275f4da 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -357,3 +357,7 @@ struct llama_context { mutable int32_t n_reused = 0; // number of times the previous graph was reused }; + +void llama_memory_breakdown_print_impl( + const struct llama_context * ctx, + const std::map & memory_breakdown); diff --git a/src/llama.cpp b/src/llama.cpp index 484372d8d10..754e9e42bd8 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -137,7 +137,7 @@ static std::vector llama_get_device_memory_data( hp_n_ctx_train = model->hparams.n_ctx_train; hp_n_expert = model->hparams.n_expert; - llama_memory_breakdown_print(ctx); // goes to debug log + llama_memory_breakdown_print_impl(ctx, memory_breakdown); // goes to debug log llama_free(ctx); llama_model_free(model);