From c192d4d9b1ec7fe73e4d8977e45cb40c30a61054 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Wed, 10 Jul 2024 03:15:25 -0700 Subject: [PATCH 1/2] [LibOS] Add `shared_cpu_list` file to sysfs cache info This is e.g. required by the gemm-common Rust crate, see `gemm-common/src/cache.rs`. Without this file, the crate logic incorrectly calculates shared-cpu count as zero and leads to a division-by-zero exception. Signed-off-by: Dmitrii Kuvaiskii --- libos/src/fs/sys/cache_info.c | 9 ++++++++- libos/src/fs/sys/fs.c | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libos/src/fs/sys/cache_info.c b/libos/src/fs/sys/cache_info.c index 6a58cf5e76..7cb513de48 100644 --- a/libos/src/fs/sys/cache_info.c +++ b/libos/src/fs/sys/cache_info.c @@ -44,7 +44,7 @@ int sys_cache_load(struct libos_dentry* dent, char** out_data, size_t* out_size) const struct pal_topo_info* topo = &g_pal_public_state->topo_info; size_t cache_idx = topo->threads[thread_id].ids_of_caches[cache_class]; const struct pal_cache_info* cache = &topo->caches[cache_idx]; - char str[PAL_SYSFS_MAP_FILESZ] = {'\0'}; + char str[PAL_SYSFS_BUF_FILESZ] = {'\0'}; if (strcmp(name, "shared_cpu_map") == 0) { struct callback_arg callback_arg = { .cache_id_to_match = cache_idx, @@ -52,6 +52,13 @@ int sys_cache_load(struct libos_dentry* dent, char** out_data, size_t* out_size) }; ret = sys_print_as_bitmask(str, sizeof(str), topo->threads_cnt, is_same_cache, &callback_arg); + } else if (strcmp(name, "shared_cpu_list") == 0) { + struct callback_arg callback_arg = { + .cache_id_to_match = cache_idx, + .cache_class = cache_class, + }; + ret = sys_print_as_ranges(str, sizeof(str), topo->threads_cnt, + is_same_cache, &callback_arg); } else if (strcmp(name, "level") == 0) { ret = snprintf(str, sizeof(str), "%zu\n", cache->level); } else if (strcmp(name, "type") == 0) { diff --git a/libos/src/fs/sys/fs.c b/libos/src/fs/sys/fs.c index b941f60234..2ef0eb1faf 100644 --- a/libos/src/fs/sys/fs.c +++ b/libos/src/fs/sys/fs.c @@ -269,6 +269,7 @@ static void init_cpu_dir(struct pseudo_node* cpu) { indexX->list_names = &sys_resource_list_names; pseudo_add_str(indexX, "shared_cpu_map", &sys_cache_load); + pseudo_add_str(indexX, "shared_cpu_list", &sys_cache_load); pseudo_add_str(indexX, "level", &sys_cache_load); pseudo_add_str(indexX, "type", &sys_cache_load); pseudo_add_str(indexX, "size", &sys_cache_load); From e52efcdb4812a1c0fdf48b7da4e1c0217d56b118 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Wed, 10 Jul 2024 07:57:17 -0700 Subject: [PATCH 2/2] [CI-Examples] Add Candle ML framework example Candle is a minimalist ML framework for Rust with a focus on performance and ease of use. This commit adds two examples with Candle: simple matrix multiplication (to quickly test functionality) and Quantized LLaMA (to test performance). Signed-off-by: Dmitrii Kuvaiskii --- CI-Examples/candle/.gitignore | 7 ++ CI-Examples/candle/Makefile | 88 +++++++++++++++++++ CI-Examples/candle/README.md | 33 +++++++ .../candle/candle_matmul.manifest.template | 25 ++++++ .../candle/candle_quantized.manifest.template | 37 ++++++++ .../candle/prepared_matmul_src/main.rs | 14 +++ 6 files changed, 204 insertions(+) create mode 100644 CI-Examples/candle/.gitignore create mode 100644 CI-Examples/candle/Makefile create mode 100644 CI-Examples/candle/README.md create mode 100644 CI-Examples/candle/candle_matmul.manifest.template create mode 100644 CI-Examples/candle/candle_quantized.manifest.template create mode 100644 CI-Examples/candle/prepared_matmul_src/main.rs diff --git a/CI-Examples/candle/.gitignore b/CI-Examples/candle/.gitignore new file mode 100644 index 0000000000..7407aa29c7 --- /dev/null +++ b/CI-Examples/candle/.gitignore @@ -0,0 +1,7 @@ +/candle_matmul +/candle_quantized +/src + +# model +/*.bin +/*.json diff --git a/CI-Examples/candle/Makefile b/CI-Examples/candle/Makefile new file mode 100644 index 0000000000..ed27c33c49 --- /dev/null +++ b/CI-Examples/candle/Makefile @@ -0,0 +1,88 @@ +# Copyright (C) 2024 Gramine contributors +# SPDX-License-Identifier: BSD-3-Clause + +ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) + +ifeq ($(DEBUG),1) +GRAMINE_LOG_LEVEL = debug +else +GRAMINE_LOG_LEVEL = error +endif + +SRCDIR = src + +.PHONY: all +all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest +ifeq ($(SGX),1) +all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig +endif + +######################### Simple Matrix Multiplication ######################### + +$(SRCDIR)/candle_matmul/target/debug/candle_matmul: + mkdir -p $(SRCDIR) && cd $(SRCDIR) && \ + cargo new candle_matmul && cd candle_matmul && \ + cargo add --git https://github.com/huggingface/candle.git candle-core && \ + cp ../../prepared_matmul_src/main.rs ./src/main.rs && \ + cargo build + +candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul + cp $< $@ + +candle_matmul.manifest: candle_matmul.manifest.template + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + $< > $@ + +candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign + @: + +.INTERMEDIATE: candle_matmul_sgx_sign +candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul + gramine-sgx-sign \ + --manifest $< \ + --output $<.sgx + +############################## Quantized LLaMA ################################# + +llama-2-7b.ggmlv3.q4_0.bin: + ../common_tools/download --output $@ \ + --sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \ + --url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@ + +tokenizer.json: + ../common_tools/download --output $@ \ + --sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \ + --url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@ + +$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json + mkdir -p $(SRCDIR) && cd $(SRCDIR) && \ + git clone https://github.com/huggingface/candle.git candle_quantized && \ + cd candle_quantized && \ + cargo build --example quantized --release + +candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized + cp $< $@ + +candle_quantized.manifest: candle_quantized.manifest.template + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + $< > $@ + +candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign + @: + +.INTERMEDIATE: candle_quantized_sgx_sign +candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized + gramine-sgx-sign \ + --manifest $< \ + --output $<.sgx +.PHONY: clean +clean: + $(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized + +.PHONY: distclean +distclean: clean + $(RM) -r $(SRCDIR) *.tar.gz *.bin *.json diff --git a/CI-Examples/candle/README.md b/CI-Examples/candle/README.md new file mode 100644 index 0000000000..35fc41eaf7 --- /dev/null +++ b/CI-Examples/candle/README.md @@ -0,0 +1,33 @@ +# Candle + +Candle is a minimalist ML framework for Rust with a focus on performance +(including GPU support) and ease of use: https://github.com/huggingface/candle + +This directory contains the Makefile and the template manifest for the most +recent version of Candle as of this writing (v0.6.0). + +# Warning + +The `candle_quantized` app will download ~4GB of data (model + tokenizer). This +happens automatically in the Makefile. + +# Quick Start + +```sh +# build Candle (uses Rust Cargo) and the final manifest +make SGX=1 + +# run simple matrix multiplication +# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started +./candle_matmul +gramine-direct ./candle_matmul +gramine-sgx ./candle_matmul + +# run Quantized LLaMA (quantized version of the LLaMA model) +# note that for Gramine, the cmdline args are already defined in the manifest file +# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples +RAYON_NUM_THREADS=36 ./candle_quantized \ + --model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200 +RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized +RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized +``` diff --git a/CI-Examples/candle/candle_matmul.manifest.template b/CI-Examples/candle/candle_matmul.manifest.template new file mode 100644 index 0000000000..62bd36b018 --- /dev/null +++ b/CI-Examples/candle/candle_matmul.manifest.template @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Gramine contributors +# SPDX-License-Identifier: BSD-3-Clause + +libos.entrypoint = "/candle_matmul" + +loader.log_level = "{{ log_level }}" + +loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}" + +fs.mounts = [ + { path = "/candle_matmul", uri = "file:candle_matmul" }, + { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, + { path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" }, +] + +sgx.debug = true +sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} +sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }} +sgx.enclave_size = "1G" + +sgx.trusted_files = [ + "file:candle_matmul", + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/libgcc_s.so.1", +] diff --git a/CI-Examples/candle/candle_quantized.manifest.template b/CI-Examples/candle/candle_quantized.manifest.template new file mode 100644 index 0000000000..364ac79a0a --- /dev/null +++ b/CI-Examples/candle/candle_quantized.manifest.template @@ -0,0 +1,37 @@ +# Copyright (C) 2024 Gramine contributors +# SPDX-License-Identifier: BSD-3-Clause + +libos.entrypoint = "/candle_quantized" + +loader.log_level = "{{ log_level }}" + +loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}" +loader.env.RAYON_NUM_THREADS = { passthrough = true } + +loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin", + "--tokenizer", "tokenizer.json", "--sample-len", "200" ] + +fs.mounts = [ + { path = "/candle_quantized", uri = "file:candle_quantized" }, + { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, + { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, + + { path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" }, + { path = "/tokenizer.json", uri = "file:tokenizer.json" }, +] + +sgx.debug = true +sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} +sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }} +sgx.enclave_size = "32G" + +sgx.trusted_files = [ + "file:candle_quantized", + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/libcrypto.so.3", + "file:{{ arch_libdir }}/libgcc_s.so.1", + "file:{{ arch_libdir }}/libssl.so.3", + + "file:llama-2-7b.ggmlv3.q4_0.bin", + "file:tokenizer.json", +] diff --git a/CI-Examples/candle/prepared_matmul_src/main.rs b/CI-Examples/candle/prepared_matmul_src/main.rs new file mode 100644 index 0000000000..f40ca0020d --- /dev/null +++ b/CI-Examples/candle/prepared_matmul_src/main.rs @@ -0,0 +1,14 @@ +// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started + +use candle_core::{Device, Tensor}; + +fn main() -> Result<(), Box> { + let device = Device::Cpu; + + let a = Tensor::randn(0f32, 1., (2, 3), &device)?; + let b = Tensor::randn(0f32, 1., (3, 4), &device)?; + + let c = a.matmul(&b)?; + println!("{c}"); + Ok(()) +}