Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3493,11 +3493,11 @@ void llama_perf_context_reset(llama_context * ctx) {
ctx->perf_reset();
}

void llama_memory_breakdown_print(const struct llama_context * ctx) {
const auto & devices = ctx->get_model().devices;

std::map<ggml_backend_buffer_type_t, llama_memory_breakdown_data> memory_breakdown = ctx->memory_breakdown();

void llama_memory_breakdown_print_impl(
const struct llama_context * ctx,
const std::map<ggml_backend_buffer_type_t, llama_memory_breakdown_data> & memory_breakdown) {
const auto & devices = ctx->get_model().devices;
std::vector<std::array<std::string, 9>> table_data;
table_data.reserve(devices.size());
const std::string template_header = "%s: | %s | %s %s %s %s %s %s %s |\n";
Expand Down Expand Up @@ -3629,6 +3629,11 @@ void llama_memory_breakdown_print(const struct llama_context * ctx) {
}
}

void llama_memory_breakdown_print(const struct llama_context * ctx) {
const auto memory_breakdown = ctx->memory_breakdown();
llama_memory_breakdown_print_impl(ctx, memory_breakdown);
}

//
// training
//
Expand Down
4 changes: 4 additions & 0 deletions src/llama-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,3 +357,7 @@ struct llama_context {

mutable int32_t n_reused = 0; // number of times the previous graph was reused
};

void llama_memory_breakdown_print_impl(
const struct llama_context * ctx,
const std::map<ggml_backend_buffer_type_t, llama_memory_breakdown_data> & memory_breakdown);
2 changes: 1 addition & 1 deletion src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ static std::vector<llama_device_memory_data> llama_get_device_memory_data(
hp_n_ctx_train = model->hparams.n_ctx_train;
hp_n_expert = model->hparams.n_expert;

llama_memory_breakdown_print(ctx); // goes to debug log
llama_memory_breakdown_print_impl(ctx, memory_breakdown); // goes to debug log

llama_free(ctx);
llama_model_free(model);
Expand Down