diff --git a/.github/workflows/small-cilkapps.yml b/.github/workflows/small-cilkapps.yml new file mode 100644 index 0000000..7d3b31f --- /dev/null +++ b/.github/workflows/small-cilkapps.yml @@ -0,0 +1,99 @@ +name: Small Cilk application tests + +permissions: + contents: read + +on: + workflow_dispatch: + push: + pull_request: + +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + small-apps: + runs-on: ${{ matrix.os }} + container: + image: ${{(startsWith(matrix.os, 'ubuntu') && 'ghcr.io/llvm/ci-ubuntu-22.04:latest') || null}} + volumes: + - /mnt/:/mnt/ + options: --user root + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macOS-13, macOS-latest] + steps: + - name: checkout + uses: actions/checkout@v4 + - name: Setup OpenCilk compiler + id: build-opencilk + uses: OpenCilk/actions/build-opencilk-project@main + with: + projects: clang + os_list: '${{ matrix.os }}' + extra_cmake_args: -DLLVM_TARGETS_TO_BUILD=host + - name: Build cheetah + id: build-cheetah + uses: OpenCilk/actions/build-cheetah@main + with: + opencilk_install: '${{ steps.build-opencilk.outputs.opencilk-installdir }}' + opencilk_build: '${{ steps.build-opencilk.outputs.opencilk-builddir }}' + - name: Build cilktools + id: build-cilktools + shell: bash + run: | + builddir="$(pwd)"/build + opencilkdir='${{ steps.build-opencilk.outputs.opencilk-installdir }}' + opencilkbuilddir='${{ steps.build-opencilk.outputs.opencilk-builddir }}' + clangversion="$($opencilkdir/bin/llvm-config --version | cut -d '.' -f 1)" + mkdir -p $builddir + cmake -G Ninja \ + -B "$builddir" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=$opencilkdir/bin/clang \ + -DCMAKE_CXX_COMPILER=$opencilkdir/bin/clang++ \ + -DLLVM_CMAKE_DIR=$opencilkdir \ + -DCMAKE_INSTALL_PREFIX=$opencilkdir \ + -DLLVM_COMMON_CMAKE_UTILS=$opencilkbuilddir/../cmake \ + -DCILKTOOLS_OUTPUT_DIR="$(pwd)"/lib/clang/$clangversion \ + -DCILKTOOLS_INSTALL_PATH=$opencilkdir/lib/clang/$clangversion + ninja -C "$builddir" install + - name: Checkout small application tests + uses: actions/checkout@v4 + with: + repository: OpenCilk/smallapps + path: smallapps + - name: Check Cilkscale + shell: bash + run: | + cheetahdir="$(pwd)"/build + opencilkdir='${{ steps.build-opencilk.outputs.opencilk-installdir }}' + make_prefix="" + if [ "${{ runner.os }}" == "macOS" ]; then + # Use xcrun to build benchmarks on macOS. + make_prefix="xcrun" + fi + $make_prefix make -C smallapps -B check \ + CC=$opencilkdir/bin/clang \ + CXX=$opencilkdir/bin/clang++ \ + EXTRA_CFLAGS="-fcilktool=cilkscale" \ + EXTRA_LDFLAGS="-fcilktool=cilkscale" + - name: Check Cilksan + shell: bash + run: | + cheetahdir="$(pwd)"/build + opencilkdir='${{ steps.build-opencilk.outputs.opencilk-installdir }}' + make_prefix="" + if [ "${{ runner.os }}" == "macOS" ]; then + # Use xcrun to build benchmarks on macOS. + make_prefix="xcrun" + fi + $make_prefix make -C smallapps -B one-check \ + CC=$opencilkdir/bin/clang \ + CXX=$opencilkdir/bin/clang++ \ + EXTRA_CFLAGS="-fsanitize=cilk -g" \ + EXTRA_LDFLAGS="-fsanitize=cilk" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6050e59 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# Building a standalone copy of the OpenCilk tools + +These instructions assume that you are building the OpenCilk tools +using the OpenCilk compiler. + +## Building with CMake + +1. Make a build directory at the top level and enter it: + + ```console + mkdir build + cd build + ``` + +2. Configure CMake. Make sure to specify `CMAKE_C_COMPILER`, + `CMAKE_CXX_COMPILER`, and `LLVM_CMAKE_DIR` to point to the + corresponding build or installation of the OpenCilk compiler + binaries. In addition, set `CMAKE_BUILD_TYPE` to specify the build + type, such as, `Debug`, for an unoptimized build with all + assertions enabled; `Release`, for an fully optimized build with + assertions disabled; or `RelWithDebInfo`, to enable some + optimizations and assertions. (The default build type is `Debug`.) + + Example configuration: + + ```console + cmake -DCMAKE_C_COMPILER=/path/to/opencilk-project/build/bin/clang -DCMAKE_C_COMPILER=/path/to/opencilk-project/build/bin/clang++ -DCMAKE_BUILD_TYPE=Release -DLLVM_CMAKE_DIR=/path/to/opencilk-project/build ../ + +3. Build the runtime: + + ```console + cmake --build . -- -j + ``` + +To clean the build, run `cmake --build . --target clean` from the build +directory. diff --git a/cilksan/CMakeLists.txt b/cilksan/CMakeLists.txt index 6fa44be..f8a3922 100644 --- a/cilksan/CMakeLists.txt +++ b/cilksan/CMakeLists.txt @@ -12,7 +12,8 @@ set(CILKSAN_SOURCES set(CILKSAN_BITCODE_SOURCE driver.cpp - libhooks.cpp) + libhooks.cpp + reducers.cpp) set(CILKSAN_RR_FILES rr_commands.py) diff --git a/cilksan/cilksan.cpp b/cilksan/cilksan.cpp index bfb204d..81b3184 100644 --- a/cilksan/cilksan.cpp +++ b/cilksan/cilksan.cpp @@ -1,8 +1,3 @@ -#include -#include -#include -#include - #include "cilksan_internal.h" #include "debug_util.h" #include "disjointset.h" @@ -12,6 +7,9 @@ #include "simple_shadow_mem.h" #include "spbag.h" #include "stack.h" +#include +#include +#include // FILE io used to print error messages FILE *err_io = stderr; diff --git a/cilksan/cilksan_internal.h b/cilksan/cilksan_internal.h index 7f65d93..7b6da63 100644 --- a/cilksan/cilksan_internal.h +++ b/cilksan/cilksan_internal.h @@ -2,10 +2,6 @@ #ifndef __CILKSAN_INTERNAL_H__ #define __CILKSAN_INTERNAL_H__ -#include -#include -#include - #include "addrmap.h" #include "csan.h" #include "dictionary.h" @@ -15,6 +11,8 @@ #include "locksets.h" #include "shadow_mem_allocator.h" #include "stack.h" +#include +#include extern bool CILKSAN_INITIALIZED; diff --git a/cilksan/csanrt.cpp b/cilksan/csanrt.cpp index 19a3240..19b9d58 100644 --- a/cilksan/csanrt.cpp +++ b/cilksan/csanrt.cpp @@ -1,8 +1,7 @@ -#include -#include -#include -#include #include "csan.h" +#include +#include +#include #define CSIRT_API __attribute__((visibility("default"))) diff --git a/cilksan/debug_util.cpp b/cilksan/debug_util.cpp index cbeaa37..8df9a0b 100644 --- a/cilksan/debug_util.cpp +++ b/cilksan/debug_util.cpp @@ -1,10 +1,10 @@ +#include "debug_util.h" +#include "driver.h" #include #include #include -#include +#include #include -#include "debug_util.h" -#include "driver.h" /* static void print_bt(FILE *f) { diff --git a/cilksan/debug_util.h b/cilksan/debug_util.h index aa0c903..8dc6662 100644 --- a/cilksan/debug_util.h +++ b/cilksan/debug_util.h @@ -1,7 +1,7 @@ #ifndef __DEBUG_UTIL_H__ #define __DEBUG_UTIL_H__ -#include +#include #ifndef CILKSAN_DEBUG #ifdef _DEBUG diff --git a/cilksan/dictionary.h b/cilksan/dictionary.h index 745876d..47c67d9 100644 --- a/cilksan/dictionary.h +++ b/cilksan/dictionary.h @@ -2,16 +2,14 @@ #ifndef __DICTIONARY__ #define __DICTIONARY__ -#include -#include -#include -#include - #include "debug_util.h" #include "disjointset.h" #include "frame_data.h" -#include "race_info.h" #include "spbag.h" +#include +#include +#include +#include using DS_t = DisjointSet_t; diff --git a/cilksan/disjointset.h b/cilksan/disjointset.h index 964a368..e50583a 100644 --- a/cilksan/disjointset.h +++ b/cilksan/disjointset.h @@ -3,13 +3,13 @@ #ifndef _DISJOINTSET_H #define _DISJOINTSET_H -#include -#include -#include - #include "aligned_alloc.h" #include "debug_util.h" #include "race_info.h" +#include +#include +#include +#include #if DISJOINTSET_DEBUG #define WHEN_DISJOINTSET_DEBUG(stmt) do { stmt; } while(0) diff --git a/cilksan/driver.cpp b/cilksan/driver.cpp index 468f024..e748965 100644 --- a/cilksan/driver.cpp +++ b/cilksan/driver.cpp @@ -1,21 +1,18 @@ +#include "checking.h" +#include "cilksan_internal.h" +#include "debug_util.h" +#include "driver.h" +#include "race_info.h" +#include "stack.h" +#include #include #include #include #include -#include #include -#include #include #include -#include "checking.h" -#include "cilksan_internal.h" -#include "debug_util.h" -#include "driver.h" -#include "race_detect_update.h" -#include "simple_shadow_mem.h" -#include "stack.h" - // FILE io used to print error messages extern FILE *err_io; @@ -1100,7 +1097,23 @@ CILKSAN_API void __cilksan_record_alloc(void *addr, size_t size) { CILKSAN_API void __cilksan_record_free(void *ptr) { CheckingRAII nocheck; - CilkSanImpl.mark_free(ptr); + if (!should_check()) { + CilkSanImpl.mark_free(ptr); + return; + } + const size_t *size = CilkSanImpl.malloc_sizes.get((uintptr_t)ptr); + if (CilkSanImpl.malloc_sizes.contains((uintptr_t)ptr)) { + if (!is_execution_parallel()) { + CilkSanImpl.clear_alloc((size_t)ptr, *size); + CilkSanImpl.clear_shadow_memory((size_t)ptr, *size); + } else { + // Treat a free as a write to all freed addresses. This way the tool will + // report a race if an operation tries to access a location that was freed + // in parallel. + CilkSanImpl.record_free((uintptr_t)ptr, *size, UNKNOWN_CSI_ID, MAType_t::FREE); + } + CilkSanImpl.malloc_sizes.remove((uintptr_t)ptr); + } } // FIXME: Currently these dynamic interposers are never used, because common diff --git a/cilksan/driver.h b/cilksan/driver.h index f6138d9..bd2c84c 100644 --- a/cilksan/driver.h +++ b/cilksan/driver.h @@ -2,14 +2,12 @@ #ifndef __DRIVER_H__ #define __DRIVER_H__ -#include -#include -#include - -#include "addrmap.h" #include "cilksan_internal.h" #include "locksets.h" #include "stack.h" +#include +#include +#include #ifndef CILKSAN_VIS #define CILKSAN_VIS __attribute__((visibility("default"))) diff --git a/cilksan/frame_data.h b/cilksan/frame_data.h index 798cb39..8145d6a 100644 --- a/cilksan/frame_data.h +++ b/cilksan/frame_data.h @@ -2,8 +2,6 @@ #ifndef __FRAME_DATA_H__ #define __FRAME_DATA_H__ -#include "cilksan_internal.h" -#include "csan.h" #include "disjointset.h" #include "hypertable.h" #include "spbag.h" diff --git a/cilksan/hook_format.inc b/cilksan/hook_format.inc index eac8afc..2b3b866 100644 --- a/cilksan/hook_format.inc +++ b/cilksan/hook_format.inc @@ -10,9 +10,13 @@ // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html // with a few common GNU extensions. -#include - #include "debug_util.h" +#include "driver.h" +#include +#include +#include +#include +#include #define CHECK(p) cilksan_assert(p) #define CHECK_GT(n, v) cilksan_assert(n > 0) @@ -149,7 +153,7 @@ static int format_get_value_size(char convSpecifier, case 'z': return sizeof(size_t); case 't': - return sizeof(ptrdiff_t); + return sizeof(std::ptrdiff_t); case 0: return sizeof(int); default: diff --git a/cilksan/hyperobject_base.h b/cilksan/hyperobject_base.h index 3ac250b..6b934c6 100644 --- a/cilksan/hyperobject_base.h +++ b/cilksan/hyperobject_base.h @@ -2,11 +2,8 @@ #ifndef _HYPEROBJECT_BASE #define _HYPEROBJECT_BASE -#include -#include -#include - #include +#include // Reducer data. // diff --git a/cilksan/hypertable.h b/cilksan/hypertable.h index 68270e5..9d0ca49 100644 --- a/cilksan/hypertable.h +++ b/cilksan/hypertable.h @@ -2,8 +2,9 @@ #ifndef _HYPERTABLE_H #define _HYPERTABLE_H -#include #include "hyperobject_base.h" +#include +#include // Helper methods for testing and setting keys. static const uintptr_t KEY_EMPTY = 0UL; @@ -26,7 +27,7 @@ class hyper_table { // An entry in the hash table. struct bucket { uintptr_t key = KEY_EMPTY; /* EMPTY, DELETED, or a user-provided pointer. */ - index_t hash; /* hash of the key when inserted into the table. */ + index_t hash = 0; /* hash of the key when inserted into the table. */ reducer_base value; void make_tombstone() { key = KEY_DELETED; } diff --git a/cilksan/libhooks.cpp b/cilksan/libhooks.cpp index 921c381..8a5940d 100644 --- a/cilksan/libhooks.cpp +++ b/cilksan/libhooks.cpp @@ -1,3 +1,7 @@ +#include "cilksan_internal.h" +#include "debug_util.h" +#include "driver.h" +#include #include #include #include @@ -5,10 +9,6 @@ #include #include -#include "cilksan_internal.h" -#include "debug_util.h" -#include "driver.h" - CILKSAN_API void __csan_default_libhook(const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count) { @@ -59,6 +59,7 @@ using v4ptrs = vec_t; using v8f32 = vec_t; using v8f64 = vec_t; using v8i8 = vec_t; +using v8i16 = vec_t; using v8i32 = vec_t; using v8ptrs = vec_t; @@ -476,6 +477,18 @@ CILKSAN_API void __csan_llvm_aarch64_neon_ld1x2_v4f32_p0( generic_aarch64_neon_ld(call_id, MAAP_count, prop, val, ptr); } +CILKSAN_API void __csan_llvm_aarch64_neon_ld1x2_v8i16_p0( + const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, + const call_prop_t prop, void *val, int8_t *ptr) { + generic_aarch64_neon_ld(call_id, MAAP_count, prop, val, ptr); +} + +CILKSAN_API void __csan_llvm_aarch64_neon_ld1x2_v16i8_p0( + const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, + const call_prop_t prop, void *val, int8_t *ptr) { + generic_aarch64_neon_ld(call_id, MAAP_count, prop, val, ptr); +} + CILKSAN_API void __csan_llvm_aarch64_neon_ld1x3_v4f32_p0( const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, const call_prop_t prop, void *val, float *ptr) { @@ -625,10 +638,10 @@ CILKSAN_API void __csan_llvm_clear_cache(const csi_id_t call_id, return; } -CILKSAN_API void __csan_llvm_stacksave(const csi_id_t call_id, - const csi_id_t func_id, - unsigned MAAP_count, - const call_prop_t prop, void *sp) { +CILKSAN_API void __csan_llvm_stacksave_p0(const csi_id_t call_id, + const csi_id_t func_id, + unsigned MAAP_count, + const call_prop_t prop, void *sp) { if (!CILKSAN_INITIALIZED) return; @@ -644,10 +657,10 @@ CILKSAN_API void __csan_llvm_stacksave(const csi_id_t call_id, CilkSanImpl.advance_stack_frame((uintptr_t)sp); } -CILKSAN_API void __csan_llvm_stackrestore(const csi_id_t call_id, - const csi_id_t func_id, - unsigned MAAP_count, - const call_prop_t prop, void *sp) { +CILKSAN_API void __csan_llvm_stackrestore_p0(const csi_id_t call_id, + const csi_id_t func_id, + unsigned MAAP_count, + const call_prop_t prop, void *sp) { START_HOOK(call_id); for (unsigned i = 0; i < MAAP_count; ++i) @@ -659,6 +672,42 @@ CILKSAN_API void __csan_llvm_stackrestore(const csi_id_t call_id, CilkSanImpl.restore_stack(call_id, (uintptr_t)sp); } +CILKSAN_API void __csan_llvm_get_dynamic_area_offset_i64(const csi_id_t call_id, + const csi_id_t func_id, + unsigned MAAP_count, + const call_prop_t prop, + int64_t result) { + if (!CILKSAN_INITIALIZED) + return; + + if (!should_check()) + return; + + for (unsigned i = 0; i < MAAP_count; ++i) + MAAPs.pop(); + + if (!is_execution_parallel()) + return; +} + +CILKSAN_API void __csan_llvm_get_dynamic_area_offset_i32(const csi_id_t call_id, + const csi_id_t func_id, + unsigned MAAP_count, + const call_prop_t prop, + int32_t result) { + if (!CILKSAN_INITIALIZED) + return; + + if (!should_check()) + return; + + for (unsigned i = 0; i < MAAP_count; ++i) + MAAPs.pop(); + + if (!is_execution_parallel()) + return; +} + CILKSAN_API void __csan_llvm_prefetch_p0(const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, const call_prop_t prop, void *addr, @@ -701,10 +750,10 @@ CILKSAN_API void __csan_llvm_trap(const csi_id_t call_id, MAAPs.pop(); } -CILKSAN_API void __csan_llvm_va_start(const csi_id_t call_id, - const csi_id_t func_id, - unsigned MAAP_count, - const call_prop_t prop, va_list ap) { +CILKSAN_API void __csan_llvm_va_start_p0(const csi_id_t call_id, + const csi_id_t func_id, + unsigned MAAP_count, + const call_prop_t prop, va_list ap) { if (!CILKSAN_INITIALIZED) return; @@ -715,9 +764,10 @@ CILKSAN_API void __csan_llvm_va_start(const csi_id_t call_id, MAAPs.pop(); } -CILKSAN_API void __csan_llvm_va_end(const csi_id_t call_id, - const csi_id_t func_id, unsigned MAAP_count, - const call_prop_t prop, va_list ap) { +CILKSAN_API void __csan_llvm_va_end_p0(const csi_id_t call_id, + const csi_id_t func_id, + unsigned MAAP_count, + const call_prop_t prop, va_list ap) { if (!CILKSAN_INITIALIZED) return; @@ -1108,6 +1158,24 @@ CILKSAN_API void __csan_bcmp(const csi_id_t call_id, const csi_id_t func_id, check_read_bytes(call_id, s2_MAAPVal, s2, n); } +CILKSAN_API void __csan_cabs(const csi_id_t call_id, const csi_id_t func_id, + unsigned MAAP_count, const call_prop_t prop, + double result, std::complex z) { + return; +} + +CILKSAN_API void __csan_cabsf(const csi_id_t call_id, const csi_id_t func_id, + unsigned MAAP_count, const call_prop_t prop, + float result, std::complex z) { + return; +} + +CILKSAN_API void __csan_cabsl(const csi_id_t call_id, const csi_id_t func_id, + unsigned MAAP_count, const call_prop_t prop, + float result, std::complex z) { + return; +} + CILKSAN_API void __csan_calloc(const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, const call_prop_t prop, void *result, size_t num, size_t size) { @@ -1245,6 +1313,24 @@ CILKSAN_API void __csan_lldiv(const csi_id_t call_id, const csi_id_t func_id, return; } +CILKSAN_API void __csan_erff(const csi_id_t call_id, const csi_id_t func_id, + unsigned MAAP_count, const call_prop_t prop, + float result, float arg) { + return; +} + +CILKSAN_API void __csan_erf(const csi_id_t call_id, const csi_id_t func_id, + unsigned MAAP_count, const call_prop_t prop, + double result, double arg) { + return; +} + +CILKSAN_API void __csan_erfl(const csi_id_t call_id, const csi_id_t func_id, + unsigned MAAP_count, const call_prop_t prop, + long double result, long double arg) { + return; +} + CILKSAN_API void __csan_execl(const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, const call_prop_t prop, int result, const char *filename, const char *arg, @@ -2165,6 +2251,25 @@ CILKSAN_API void __csan_fstat(const csi_id_t call_id, const csi_id_t func_id, check_write_bytes(call_id, buf_MAAPVal, buf, sizeof(struct stat)); } +#if defined(__linux__) +CILKSAN_API void __csan_fstat64(const csi_id_t call_id, const csi_id_t func_id, + unsigned MAAP_count, const call_prop_t prop, + int result, int fd, struct stat64 *buf) { + START_HOOK(call_id); + + MAAP_t buf_MAAPVal = MAAP_t::ModRef; + if (MAAP_count > 0) { + buf_MAAPVal = MAAPs.back().second; + MAAPs.pop(); + } + + if (!is_execution_parallel()) + return; + + check_write_bytes(call_id, buf_MAAPVal, buf, sizeof(struct stat64)); +} +#endif + CILKSAN_API void __csan_ftell(const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, const call_prop_t prop, long result, FILE *stream) { @@ -2619,7 +2724,7 @@ CILKSAN_API void __csan_memcpy(const csi_id_t call_id, const csi_id_t func_id, CILKSAN_API void __csan___memcpy_chk(const csi_id_t call_id, const csi_id_t func_id, unsigned MAAP_count, const call_prop_t prop, void *result, void *dst, const void *src, size_t len, - size_t count) { + size_t destlen) { START_HOOK(call_id); MAAP_t dest_MAAPVal = MAAP_t::ModRef, src_MAAPVal = MAAP_t::ModRef; @@ -2630,14 +2735,14 @@ CILKSAN_API void __csan___memcpy_chk(const csi_id_t call_id, const csi_id_t func MAAPs.pop(); } - if (!is_execution_parallel() || len > count) + if (!is_execution_parallel() || len > destlen) return; if (nullptr == dst || nullptr == src) return; check_read_bytes(call_id, src_MAAPVal, src, len); - check_write_bytes(call_id, dest_MAAPVal, dst, count); + check_write_bytes(call_id, dest_MAAPVal, dst, len); } CILKSAN_API void __csan_memmove(const csi_id_t call_id, const csi_id_t func_id, diff --git a/cilksan/locksets.h b/cilksan/locksets.h index f246e79..4c9c984 100644 --- a/cilksan/locksets.h +++ b/cilksan/locksets.h @@ -3,12 +3,12 @@ #ifndef _LOCKSETS_H #define _LOCKSETS_H -#include +#include "debug_util.h" +#include "dictionary.h" +#include +#include #include #include -#include - -#include "debug_util.h" enum IntersectionResult_t : uint8_t { EMPTY = 0x0, diff --git a/cilksan/print_addr.cpp b/cilksan/print_addr.cpp index 6a14811..b8012fe 100644 --- a/cilksan/print_addr.cpp +++ b/cilksan/print_addr.cpp @@ -1,18 +1,13 @@ +#include "csan.h" +#include "cilksan_internal.h" #include #include #include -#include -#include -#include #include - -#include -#include #include - -#include "csan.h" -#include "cilksan_internal.h" -#include "debug_util.h" +#include +#include +#include extern bool is_running_under_rr; diff --git a/cilksan/race_detect_update.h b/cilksan/race_detect_update.h index 015aa76..266d5c4 100644 --- a/cilksan/race_detect_update.h +++ b/cilksan/race_detect_update.h @@ -2,8 +2,8 @@ #ifndef __RACE_DETECT_UPDATE__ #define __RACE_DETECT_UPDATE__ -#include "csan.h" #include "simple_shadow_mem.h" +#include // Check races on memory [addr, addr+mem_size) with this read access. Once done // checking, update shadow_memory with this new read access. diff --git a/cilksan/race_info.h b/cilksan/race_info.h index b6ae7a3..1391ed0 100644 --- a/cilksan/race_info.h +++ b/cilksan/race_info.h @@ -2,6 +2,10 @@ #ifndef __RACE_INFO_H__ #define __RACE_INFO_H__ +#include "debug_util.h" +#include +#include + #ifndef CHECK_EQUIVALENT_STACKS #define CHECK_EQUIVALENT_STACKS false #endif diff --git a/cilksan/reducers.cpp b/cilksan/reducers.cpp index 3516940..a0e8a9f 100644 --- a/cilksan/reducers.cpp +++ b/cilksan/reducers.cpp @@ -1,10 +1,10 @@ -#include -#include -#include - #include "cilksan_internal.h" #include "debug_util.h" #include "driver.h" +#include "vector.h" +#include +#include +#include // Hooks for handling reducer hyperobjects. @@ -18,6 +18,9 @@ static void reducer_register(const csi_id_t call_id, unsigned MAAP_count, reducer_views->insert((hyper_table::bucket){ .key = (uintptr_t)key, .value = {.view = key, .reduce_fn = (__cilk_reduce_fn)reduce_ptr}}); + DBG_TRACE(REDUCER, + "reducer_register: registered %p, reducer_views %p, occupancy %d\n", + key, reducer_views, reducer_views->occupancy); } if (!is_execution_parallel()) @@ -59,6 +62,10 @@ CILKSAN_API void __csan_llvm_reducer_unregister(const csi_id_t call_id, // Remove this reducer from the table. if (hyper_table *reducer_views = CilkSanImpl.get_reducer_views()) { + DBG_TRACE( + REDUCER, + "reducer_unregister: unregistering %p, reducer_views %p, occupancy %d\n", + key, reducer_views, reducer_views->occupancy); reducer_views->remove((uintptr_t)key); } @@ -140,10 +147,16 @@ void CilkSanImpl_t::reduce_local_views() { // Reduce every reducer view in the table with its leftmost view. int32_t capacity = reducer_views->capacity; hyper_table::bucket *buckets = reducer_views->buckets; + bool holdsLeftmostViews = false; + Vector_t keysToRemove; for (int32_t i = 0; i < capacity; ++i) { hyper_table::bucket b = buckets[i]; if (!is_valid(b.key)) continue; + if (b.key == (uintptr_t)(b.value.view)) { + holdsLeftmostViews = true; + continue; + } DBG_TRACE(REDUCER, "reduce_local_views: found view to reduce at %d: %p -> %p\n", i, @@ -155,14 +168,20 @@ void CilkSanImpl_t::reduce_local_views() { // Delete the right view. free(rb.view); mark_free(rb.view); + keysToRemove.push_back(b.key); } enable_checking(); - // Delete the table of local reducer views - DBG_TRACE(REDUCER, "reduce_local_views: delete reducer_views %p\n", - reducer_views); - delete reducer_views; - f->reducer_views = nullptr; + if (!holdsLeftmostViews) { + // Delete the table of local reducer views + DBG_TRACE(REDUCER, "reduce_local_views: delete reducer_views %p\n", + reducer_views); + delete reducer_views; + f->reducer_views = nullptr; + } else { + for (int32_t i = 0; i < keysToRemove.size(); ++i) + reducer_views->remove(buckets[keysToRemove[i]].key); + } } hyper_table * diff --git a/cilksan/shadow_mem_allocator.h b/cilksan/shadow_mem_allocator.h index 227b4a0..20743c5 100644 --- a/cilksan/shadow_mem_allocator.h +++ b/cilksan/shadow_mem_allocator.h @@ -2,14 +2,12 @@ #ifndef __SHADOW_MEM_ALLOCATOR_H__ #define __SHADOW_MEM_ALLOCATOR_H__ +#include "aligned_alloc.h" +#include "dictionary.h" #include #include -#include #include -#include "aligned_alloc.h" -#include "dictionary.h" - // The memory-access-line allocator is dedicated to allocating specific // fixed-size arrays of MemoryAccess_t objects, e.g., MemoryAccess_t[1], // MemoryAccess_t[2], MemoryAccess_t[4], MemoryAccess_t[8], etc. For each array diff --git a/cilksan/simple_shadow_mem.h b/cilksan/simple_shadow_mem.h index 6217fa8..fc07de2 100644 --- a/cilksan/simple_shadow_mem.h +++ b/cilksan/simple_shadow_mem.h @@ -2,11 +2,6 @@ #ifndef __SIMPLE_SHADOW_MEM__ #define __SIMPLE_SHADOW_MEM__ -#include -#include -#include -#include - #include "checking.h" #include "cilksan_internal.h" #include "debug_util.h" @@ -14,6 +9,8 @@ #include "locksets.h" #include "shadow_mem_allocator.h" #include "vector.h" +#include +#include class SimpleShadowMem; diff --git a/cilksan/spbag.h b/cilksan/spbag.h index 43e220d..aa68540 100644 --- a/cilksan/spbag.h +++ b/cilksan/spbag.h @@ -3,15 +3,13 @@ #ifndef _SPBAG_H #define _SPBAG_H -#include -#include -#include -#include -#include - #include "debug_util.h" #include "disjointset.h" #include "race_info.h" +#include +#include +#include +#include #define UNINIT_STACK_PTR ((uintptr_t)0LL) diff --git a/cilksan/stack.h b/cilksan/stack.h index bcbe588..0fa41ac 100644 --- a/cilksan/stack.h +++ b/cilksan/stack.h @@ -3,12 +3,12 @@ #ifndef _STACK_H #define _STACK_H -#include +#include "debug_util.h" +#include +#include #include #include -#include - -#include "debug_util.h" +#include /* * Stack data structure for storing and maintaining data diff --git a/cilksan/vector.h b/cilksan/vector.h index 55d54a6..bf3052b 100644 --- a/cilksan/vector.h +++ b/cilksan/vector.h @@ -3,12 +3,10 @@ #ifndef _VECTOR_H #define _VECTOR_H -#include +#include "debug_util.h" +#include #include #include -#include - -#include "debug_util.h" // Vector data structure for storing and maintaining data // associated with the call vector. diff --git a/cilkscale/benchmark.cpp b/cilkscale/benchmark.cpp index fcadc66..1d1a8af 100644 --- a/cilkscale/benchmark.cpp +++ b/cilkscale/benchmark.cpp @@ -1,9 +1,3 @@ -#include -#include -#include -#include -#include - // Ensure that __cilkscale__ is defined, so we can provide a nontrivial // definition of getworkspan(). #ifndef __cilkscale__ @@ -11,11 +5,16 @@ #endif #include "cilkscale_timer.h" +#include #include -#include +#include +#include +#include #include +#include -#define CILKTOOL_API extern "C" __attribute__((visibility("default"))) +#define CILKTOOL_VISIBLE __attribute__((visibility("default"))) +#define CILKTOOL_API extern "C" CILKTOOL_VISIBLE #ifndef SERIAL_TOOL #define SERIAL_TOOL 1 @@ -25,30 +24,32 @@ #define TRACE_CALLS 0 #endif -#include -#if !SERIAL_TOOL +#if SERIAL_TOOL +#include +#else // !SERIAL_TOOL +#include #include using out_reducer = cilk::ostream_reducer; -#endif +#endif // SERIAL_TOOL /////////////////////////////////////////////////////////////////////////// // Data structures for timing. -#if !SERIAL_TOOL +#if SERIAL_TOOL +using cilkscale_timer_reducer = cilkscale_timer_t; +#else // Simple reducer for a cilkscale_timer. // // This reducer ensures that each stolen subcomputation gets a separate // cilkscale_timer object for probing the computation. -static void timer_identity(void *view) { - new (view) cilkscale_timer_t(); -} -static void timer_reduce(void *left, void *right) { - static_cast(right)->~cilkscale_timer_t(); +static void timerIdentity(void *View) { new (View) cilkscale_timer_t(); } +static void timerReduce(void *Left, void *Right) { + static_cast(Right)->~cilkscale_timer_t(); } -using cilkscale_timer_reducer = - cilkscale_timer_t _Hyperobject(timer_identity, timer_reduce); - +using cilkscale_timer_reducer = cilkscale_timer_t cilk_reducer(timerIdentity, + timerReduce); + #endif // Suppress diagnostic warning that reducer callbacks are not implemented for @@ -61,89 +62,78 @@ using cilkscale_timer_reducer = // std::ostream and a std::ofstream, only after the standard libraries they rely // on have been initialized, and to destroy those structures before those // libraries are deinitialized. -class BenchmarkImpl_t { +class BenchmarkImplT { public: // Timer for tracking execution time. - cilkscale_timer_t start, stop; + cilkscale_timer_t Start, Stop; #if SERIAL_TOOL - cilkscale_timer_t timer; + cilkscale_timer_t Timer; #else #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcilk-ignored" - cilkscale_timer_reducer timer; + cilkscale_timer_reducer Timer; #pragma clang diagnostic pop #endif - std::ostream &outs = std::cout; - std::ofstream outf; + std::ostream &Outs = std::cout; + std::ofstream Outf; #if !SERIAL_TOOL - out_reducer *outf_red = nullptr; + out_reducer OutfRed; #endif - std::basic_ostream *out_view() { + std::basic_ostream &outView() { #if !SERIAL_TOOL - // TODO: The compiler does not correctly bind the hyperobject - // type to a reference, otherwise a reference return value would - // be more conventional C++. - if (outf_red) - return &*outf_red; + return OutfRed; #endif - if (outf.is_open()) - return &outf; - return &outs; + if (Outf.is_open()) + return Outf; + return Outs; } - BenchmarkImpl_t(); - ~BenchmarkImpl_t(); + BenchmarkImplT(const char *OutputFilename = nullptr); + ~BenchmarkImplT(); }; #pragma clang diagnostic pop // Top-level benchmarking tool. -static BenchmarkImpl_t *create_tool(void) { - if (!__cilkrts_is_initialized()) - // If the OpenCilk runtime is not yet initialized, then csi_init will - // register a call to init_tool to initialize the tool after the runtime is - // initialized. - return nullptr; - - // Otherwise, ordered dynamic initalization should ensure that it's safe to - // create the tool. - return new BenchmarkImpl_t(); +static BenchmarkImplT *createTool(void) { + // Ordered dynamic initalization should ensure that it's safe to create the + // tool. + return new BenchmarkImplT(getenv("CILKSCALE_OUT")); } -static BenchmarkImpl_t *tool = create_tool(); +static BenchmarkImplT *Tool = createTool(); -static bool TOOL_INITIALIZED = false; +static bool CILKSCALE_BENCHMARK_INITIALIZED = false; /////////////////////////////////////////////////////////////////////////// // Routines to results // Ensure that a proper header has been emitted to OS. -template -static void ensure_header(Out &OS) { - static bool PRINT_STARTED = false; - if (PRINT_STARTED) +template static void ensureHeader(Out &OS) { + static bool PrintStarted = false; + if (PrintStarted) return; OS << "tag,time (" << cilk_time_t::units << ")\n"; - PRINT_STARTED = true; + PrintStarted = true; } // Emit the given results to OS. -template -static void print_results(Out &OS, const char *tag, cilk_time_t time) { - OS << tag << "," << time << "\n"; +template +static void printResults(Out &OS, const char *Tag, cilk_time_t Time) { + OS << Tag << "," << Time << "\n"; } // Emit the results from the overall program execution to the proper output // stream. -static void print_analysis(void) { - assert(TOOL_INITIALIZED); +static void printAnalysis(void) { + assert(CILKSCALE_BENCHMARK_INITIALIZED); - std::basic_ostream &output = *tool->out_view(); - ensure_header(output); - print_results(output, "", elapsed_time(&tool->stop, &tool->start)); + std::basic_ostream &OS = Tool->outView(); + ensureHeader(OS); + printResults(OS, "", elapsed_time(&Tool->Stop, &Tool->Start)); } /////////////////////////////////////////////////////////////////////////// @@ -152,38 +142,22 @@ static void print_analysis(void) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" -BenchmarkImpl_t::BenchmarkImpl_t() { - const char *envstr = getenv("CILKSCALE_OUT"); - if (envstr) - outf.open(envstr); - +BenchmarkImplT::BenchmarkImplT(const char *OutputFilename) + : Outf(OutputFilename) #if !SERIAL_TOOL - __cilkrts_reducer_register - (&timer, sizeof timer, timer_identity, timer_reduce); - - outf_red = new out_reducer((outf.is_open() ? outf : outs)); - __cilkrts_reducer_register - (outf_red, sizeof *outf_red, - &cilk::ostream_view>::identity, - &cilk::ostream_view>::reduce); + , + OutfRed(Outf.is_open() ? Outf : Outs) #endif - - start.gettime(); +{ + Start.gettime(); } -BenchmarkImpl_t::~BenchmarkImpl_t() { - stop.gettime(); - print_analysis(); - - if (outf.is_open()) - outf.close(); +BenchmarkImplT::~BenchmarkImplT() { + Stop.gettime(); + printAnalysis(); -#if !SERIAL_TOOL - __cilkrts_reducer_unregister(outf_red); - delete outf_red; - outf_red = nullptr; - __cilkrts_reducer_unregister(&timer); -#endif + if (Outf.is_open()) + Outf.close(); } #pragma clang diagnostic pop @@ -191,19 +165,13 @@ BenchmarkImpl_t::~BenchmarkImpl_t() { /////////////////////////////////////////////////////////////////////////// // Hooks for operating the tool. -// Custom function to intialize tool after the OpenCilk runtime is initialized. -static void init_tool(void) { - assert(nullptr == tool && "Tool already initialized"); - tool = new BenchmarkImpl_t(); -} - -static void destroy_tool(void) { - if (tool) { - delete tool; - tool = nullptr; +static void destroyTool(void) { + if (Tool) { + delete Tool; + Tool = nullptr; } - TOOL_INITIALIZED = false; + CILKSCALE_BENCHMARK_INITIALIZED = false; } CILKTOOL_API void __csi_init() { @@ -211,16 +179,13 @@ CILKTOOL_API void __csi_init() { fprintf(stderr, "__csi_init()\n"); #endif - if (!__cilkrts_is_initialized()) - __cilkrts_atinit(init_tool); - - __cilkrts_atexit(destroy_tool); + atexit(destroyTool); - TOOL_INITIALIZED = true; + CILKSCALE_BENCHMARK_INITIALIZED = true; } -CILKTOOL_API void __csi_unit_init(const char *const file_name, - const instrumentation_counts_t counts) { +CILKTOOL_API void __csi_unit_init(const char *const FileName, + const instrumentation_counts_t Counts) { return; } @@ -228,52 +193,48 @@ CILKTOOL_API void __csi_unit_init(const char *const file_name, // Probes and associated routines CILKTOOL_API wsp_t wsp_getworkspan() CILKSCALE_NOTHROW { - if (!tool) + if (!Tool) return wsp_zero(); - tool->timer.gettime(); - duration_t time_since_start = elapsed_time(&tool->timer, &tool->start); - wsp_t result = {cilk_time_t(time_since_start).get_raw_duration(), 0, 0}; + Tool->Timer.gettime(); + duration_t TimeSinceStart = elapsed_time(&Tool->Timer, &Tool->Start); + wsp_t Result = {cilk_time_t(TimeSinceStart).get_raw_duration(), 0, 0}; - return result; + return Result; } -__attribute__((visibility("default"))) wsp_t & -operator+=(wsp_t &lhs, const wsp_t &rhs) noexcept { - lhs.work += rhs.work; - return lhs; +CILKTOOL_VISIBLE wsp_t &operator+=(wsp_t &Lhs, const wsp_t &Rhs) noexcept { + Lhs.work += Rhs.work; + return Lhs; } -__attribute__((visibility("default"))) wsp_t & -operator-=(wsp_t &lhs, const wsp_t &rhs) noexcept { - lhs.work -= rhs.work; - return lhs; +CILKTOOL_VISIBLE wsp_t &operator-=(wsp_t &Lhs, const wsp_t &Rhs) noexcept { + Lhs.work -= Rhs.work; + return Lhs; } -__attribute__((visibility("default"))) std::ostream & -operator<<(std::ostream &OS, const wsp_t &pt) { - OS << cilk_time_t(pt.work); +CILKTOOL_VISIBLE std::ostream &operator<<(std::ostream &OS, const wsp_t &Pt) { + OS << cilk_time_t(Pt.work); return OS; } -__attribute__((visibility("default"))) std::ofstream & -operator<<(std::ofstream &OS, const wsp_t &pt) { - OS << cilk_time_t(pt.work); +CILKTOOL_VISIBLE std::ofstream &operator<<(std::ofstream &OS, const wsp_t &Pt) { + OS << cilk_time_t(Pt.work); return OS; } -CILKTOOL_API wsp_t wsp_add(wsp_t lhs, wsp_t rhs) CILKSCALE_NOTHROW { - lhs.work += rhs.work; - return lhs; +CILKTOOL_API wsp_t wsp_add(wsp_t Lhs, wsp_t Rhs) CILKSCALE_NOTHROW { + Lhs.work += Rhs.work; + return Lhs; } -CILKTOOL_API wsp_t wsp_sub(wsp_t lhs, wsp_t rhs) CILKSCALE_NOTHROW { - lhs.work -= rhs.work; - return lhs; +CILKTOOL_API wsp_t wsp_sub(wsp_t Lhs, wsp_t Rhs) CILKSCALE_NOTHROW { + Lhs.work -= Rhs.work; + return Lhs; } -CILKTOOL_API void wsp_dump(wsp_t wsp, const char *tag) { - std::basic_ostream &output = *tool->out_view(); - ensure_header(output); - print_results(output, tag, cilk_time_t(wsp.work)); +CILKTOOL_API void wsp_dump(wsp_t Wsp, const char *Tag) { + std::basic_ostream &Output = Tool->outView(); + ensureHeader(Output); + printResults(Output, Tag, cilk_time_t(Wsp.work)); } diff --git a/cilkscale/cilkscale.cpp b/cilkscale/cilkscale.cpp index 5bd3b95..7178b90 100644 --- a/cilkscale/cilkscale.cpp +++ b/cilkscale/cilkscale.cpp @@ -1,11 +1,3 @@ -#include -#include -#include -#include -#include -#include -#include - // Ensure that __cilkscale__ is defined, so we can provide a nontrivial // definition of getworkspan(). #ifndef __cilkscale__ @@ -13,12 +5,17 @@ #endif #include "shadow_stack.h" +#include #include #include -#include +#include +#include +#include #include +#include -#define CILKTOOL_API extern "C" __attribute__((visibility("default"))) +#define CILKTOOL_VISIBLE __attribute__((visibility("default"))) +#define CILKTOOL_API extern "C" CILKTOOL_VISIBLE #ifndef SERIAL_TOOL #define SERIAL_TOOL 1 @@ -30,12 +27,11 @@ #if SERIAL_TOOL FILE *err_io = stderr; -#else +#else // !SERIAL_TOOL #include #include - using out_reducer = cilk::ostream_reducer; -#endif +#endif // SERIAL_TOOL // defined in libopencilk extern "C" int __cilkrts_is_initialized(void); @@ -49,52 +45,38 @@ extern "C" void __cilkrts_internal_set_nworkers(unsigned int nworkers); // std::ostream and a std::ofstream, only after the standard libraries they rely // on have been initialized, and to destroy those structures before those // libraries are deinitialized. -class CilkscaleImpl_t { +class CilkscaleImplT { public: // Shadow-stack data structure, for managing work-span variables. -#if SERIAL_TOOL - shadow_stack_t *shadow_stack = nullptr; -#else - shadow_stack_reducer *shadow_stack = nullptr; -#endif + shadow_stack_reducer ShadowStack; // Output stream for printing results. - std::ostream &outs = std::cout; - std::ofstream outf; + std::ostream &Outs = std::cout; + std::ofstream Outf; #if !SERIAL_TOOL - out_reducer *outf_red = nullptr; + out_reducer OutfRed; #endif - std::basic_ostream *out_view() { + std::basic_ostream &out_view() { #if !SERIAL_TOOL - // TODO: The compiler does not correctly bind the hyperobject - // type to a reference, otherwise a reference return value would - // be more conventional C++. - if (outf_red) - return &*outf_red; + return OutfRed; #endif - if (outf.is_open()) - return &outf; - return &outs; + if (Outf.is_open()) + return Outf; + return Outs; } - CilkscaleImpl_t(); - ~CilkscaleImpl_t(); + CilkscaleImplT(const char *OutputFilename = nullptr); + ~CilkscaleImplT(); }; // Top-level Cilkscale tool. -static CilkscaleImpl_t *create_tool(void) { - if (!__cilkrts_is_initialized()) - // If the OpenCilk runtime is not yet initialized, then csi_init will - // register a call to init_tool to initialize the tool after the runtime is - // initialized. - return nullptr; - - // Otherwise, ordered dynamic initalization should ensure that it's safe to - // create the tool. - return new CilkscaleImpl_t(); +static CilkscaleImplT *createTool(void) { + // Ordered dynamic initalization should ensure that it's safe to create the + // tool. + return new CilkscaleImplT(getenv("CILKSCALE_OUT")); } -static CilkscaleImpl_t *tool = create_tool(); +static CilkscaleImplT *Tool = createTool(); bool CILKSCALE_INITIALIZED = false; @@ -102,10 +84,9 @@ bool CILKSCALE_INITIALIZED = false; // Utilities for printing analysis results // Ensure that a proper header has been emitted to OS. -template -static void ensure_header(Out &OS) { - static bool PRINT_STARTED = false; - if (PRINT_STARTED) +template static void ensureHeader(Out &OS) { + static bool PrintStarted = false; + if (PrintStarted) return; OS << "tag,work (" << cilk_time_t::units << ")" @@ -114,33 +95,33 @@ static void ensure_header(Out &OS) { << ",burdened_span (" << cilk_time_t::units << ")" << ",burdened_parallelism\n"; - PRINT_STARTED = true; + PrintStarted = true; } // Emit the given results to OS. -template -static void print_results(Out &OS, const char *tag, cilk_time_t work, - cilk_time_t span, cilk_time_t bspan) { - OS << tag - << "," << work << "," << span << "," << work.get_val_d() / span.get_val_d() - << "," << bspan << "," << work.get_val_d() / bspan.get_val_d() << "\n"; +template +static void printResults(Out &OS, const char *Tag, cilk_time_t Work, + cilk_time_t Span, cilk_time_t BSpan) { + OS << Tag << "," << Work << "," << Span << "," + << Work.get_val_d() / Span.get_val_d() << "," << BSpan << "," + << Work.get_val_d() / BSpan.get_val_d() << "\n"; } // Emit the results from the overall program execution to the proper output // stream. -static void print_analysis(void) { +static void printAnalysis(void) { assert(CILKSCALE_INITIALIZED); - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - assert(frame_type::NONE != bottom.type); + assert(frame_type::NONE != Bottom.type); - cilk_time_t work = bottom.contin_work; - cilk_time_t span = bottom.contin_span; - cilk_time_t bspan = bottom.contin_bspan; + cilk_time_t Work = Bottom.contin_work; + cilk_time_t Span = Bottom.contin_span; + cilk_time_t BSpan = Bottom.contin_bspan; - std::basic_ostream &output = *tool->out_view(); - ensure_header(output); - print_results(output, "", work, span, bspan); + std::basic_ostream &OS = Tool->out_view(); + ensureHeader(OS); + printResults(OS, "", Work, Span, BSpan); } /////////////////////////////////////////////////////////////////////////// @@ -168,57 +149,31 @@ static inline void ensure_serial_tool(void) { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" -CilkscaleImpl_t::CilkscaleImpl_t() { -#if SERIAL_TOOL - shadow_stack = new shadow_stack_t(frame_type::MAIN); -#else - shadow_stack = new shadow_stack_reducer(); - __cilkrts_reducer_register(shadow_stack, sizeof(*shadow_stack), - &shadow_stack_t::identity, - &shadow_stack_t::reduce); -#endif - - const char *envstr = getenv("CILKSCALE_OUT"); - if (envstr) - outf.open(envstr); - +CilkscaleImplT::CilkscaleImplT(const char *OutputFilename) + : Outf(OutputFilename) #if !SERIAL_TOOL - outf_red = new out_reducer((outf.is_open() ? outf : outs)); - __cilkrts_reducer_register( - outf_red, sizeof(*outf_red), - &cilk::ostream_view>::identity, - &cilk::ostream_view>::reduce); + , + OutfRed(Outf.is_open() ? Outf : Outs) #endif - - shadow_stack->push(frame_type::SPAWNER); - shadow_stack->start.gettime(); +{ + // TODO: Verify that this push() is not necessary. + // shadow_stack.push(frame_type::SPAWNER); + ShadowStack.start.gettime(); } -CilkscaleImpl_t::~CilkscaleImpl_t() { - tool->shadow_stack->stop.gettime(); - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); +CilkscaleImplT::~CilkscaleImplT() { + ShadowStack.stop.gettime(); + shadow_stack_frame_t &Bottom = ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; - print_analysis(); + printAnalysis(); - if (outf.is_open()) - outf.close(); - -#if !SERIAL_TOOL - __cilkrts_reducer_unregister(shadow_stack); -#endif - delete shadow_stack; - shadow_stack = nullptr; - -#if !SERIAL_TOOL - __cilkrts_reducer_unregister(outf_red); - delete outf_red; - outf_red = nullptr; -#endif + if (Outf.is_open()) + Outf.close(); } #pragma clang diagnostic pop @@ -226,16 +181,10 @@ CilkscaleImpl_t::~CilkscaleImpl_t() { /////////////////////////////////////////////////////////////////////////// // Hooks for operating the tool. -// Custom function to intialize tool after the OpenCilk runtime is initialized. -static void init_tool(void) { - assert(nullptr == tool && "Tool already initialized"); - tool = new CilkscaleImpl_t(); -} - -static void destroy_tool(void) { - if (tool) { - delete tool; - tool = nullptr; +static void destroyTool(void) { + if (Tool) { + delete Tool; + Tool = nullptr; } CILKSCALE_INITIALIZED = false; @@ -246,10 +195,7 @@ CILKTOOL_API void __csi_init() { fprintf(stderr, "__csi_init()\n"); #endif - if (!__cilkrts_is_initialized()) - __cilkrts_atinit(init_tool); - - __cilkrts_atexit(destroy_tool); + atexit(destroyTool); #if SERIAL_TOOL ensure_serial_tool(); @@ -258,8 +204,8 @@ CILKTOOL_API void __csi_init() { CILKSCALE_INITIALIZED = true; } -CILKTOOL_API void __csi_unit_init(const char *const file_name, - const instrumentation_counts_t counts) { +CILKTOOL_API void __csi_unit_init(const char *const FileName, + const instrumentation_counts_t Counts) { return; } @@ -268,8 +214,8 @@ void __csi_bb_entry(const csi_id_t bb_id, const bb_prop_t prop) { if (!CILKSCALE_INITIALIZED) return; - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); - get_bb_time(&bottom.contin_work, &bottom.contin_span, &bottom.contin_bspan, + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); + get_bb_time(&Bottom.contin_work, &Bottom.contin_span, &Bottom.contin_bspan, bb_id); return; } @@ -284,38 +230,36 @@ void __csi_func_entry(const csi_id_t func_id, const func_prop_t prop) { if (!prop.may_spawn) return; - tool->shadow_stack->stop.gettime(); + Tool->ShadowStack.stop.gettime(); #if TRACE_CALLS fprintf(stderr, "[W%d] func_entry(%ld)\n", __cilkrts_get_worker_number(), func_id); #endif - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; - shadow_stack_frame_t &p_bottom = tool->shadow_stack->peek_bot(); - cilk_time_t p_contin_work = p_bottom.contin_work; - cilk_time_t p_contin_span = p_bottom.contin_span; - cilk_time_t p_contin_bspan = p_bottom.contin_bspan; + shadow_stack_frame_t &PBottom = Tool->ShadowStack.peek_bot(); + cilk_time_t PContinWork = PBottom.contin_work; + cilk_time_t PContinSpan = PBottom.contin_span; + cilk_time_t PContinBSpan = PBottom.contin_bspan; // Push new frame onto the stack - shadow_stack_frame_t &c_bottom = - tool->shadow_stack->push(frame_type::SPAWNER); - c_bottom.contin_work = p_contin_work; - c_bottom.contin_span = p_contin_span; - c_bottom.contin_bspan = p_contin_bspan; + shadow_stack_frame_t &CBottom = Tool->ShadowStack.push(frame_type::SPAWNER); + CBottom.contin_work = PContinWork; + CBottom.contin_span = PContinSpan; + CBottom.contin_bspan = PContinBSpan; - // stack.start.gettime(); // Because of the high overhead of calling gettime(), especially compared to // the running time of the operations in this hook, the work and span // measurements appear more stable if we simply use the recorded time as the // new start time. - tool->shadow_stack->start = tool->shadow_stack->stop; + Tool->ShadowStack.start = Tool->ShadowStack.stop; } CILKTOOL_API @@ -326,49 +270,49 @@ void __csi_func_exit(const csi_id_t func_exit_id, const csi_id_t func_id, if (!prop.may_spawn) return; - tool->shadow_stack->stop.gettime(); + Tool->ShadowStack.stop.gettime(); #if TRACE_CALLS fprintf(stderr, "[W%d] func_exit(%ld)\n", __cilkrts_get_worker_number(), func_id); #endif - duration_t strand_time = tool->shadow_stack->elapsed_time(); + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); - assert(cilk_time_t::zero() == tool->shadow_stack->peek_bot().lchild_span); + assert(cilk_time_t::zero() == Tool->ShadowStack.peek_bot().lchild_span); + assert(cilk_time_t::zero() == Tool->ShadowStack.peek_bot().achild_work); // Pop the stack - shadow_stack_frame_t &c_bottom = tool->shadow_stack->pop(); - shadow_stack_frame_t &p_bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &CBottom = Tool->ShadowStack.pop(); + shadow_stack_frame_t &PBottom = Tool->ShadowStack.peek_bot(); - p_bottom.contin_work = c_bottom.contin_work + strand_time; - p_bottom.contin_span = c_bottom.contin_span + strand_time; - p_bottom.contin_bspan = c_bottom.contin_bspan + strand_time; + PBottom.contin_work = CBottom.contin_work + StrandTime; + PBottom.contin_span = CBottom.contin_span + StrandTime; + PBottom.contin_bspan = CBottom.contin_bspan + StrandTime; - // stack.start.gettime(); // Because of the high overhead of calling gettime(), especially compared to // the running time of the operations in this hook, the work and span // measurements appear more stable if we simply use the recorded time as the // new start time. - tool->shadow_stack->start = tool->shadow_stack->stop; + Tool->ShadowStack.start = Tool->ShadowStack.stop; } CILKTOOL_API void __csi_detach(const csi_id_t detach_id, const unsigned sync_reg, const detach_prop_t prop) { - tool->shadow_stack->stop.gettime(); + Tool->ShadowStack.stop.gettime(); #if TRACE_CALLS fprintf(stderr, "[W%d] detach(%ld)\n", __cilkrts_get_worker_number(), detach_id); #endif - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; } CILKTOOL_API @@ -379,50 +323,49 @@ void __csi_task(const csi_id_t task_id, const csi_id_t detach_id, task_id, detach_id); #endif - shadow_stack_frame_t &p_bottom = tool->shadow_stack->peek_bot(); - cilk_time_t p_contin_work = p_bottom.contin_work; - cilk_time_t p_contin_span = p_bottom.contin_span; - cilk_time_t p_contin_bspan = p_bottom.contin_bspan; + shadow_stack_frame_t &PBottom = Tool->ShadowStack.peek_bot(); + cilk_time_t PContinWork = PBottom.contin_work; + cilk_time_t PContinSpan = PBottom.contin_span; + cilk_time_t PContinBSpan = PBottom.contin_bspan; // Push new frame onto the stack. - shadow_stack_frame_t &c_bottom = tool->shadow_stack->push(frame_type::HELPER); - c_bottom.contin_work = p_contin_work; - c_bottom.contin_span = p_contin_span; - c_bottom.contin_bspan = p_contin_bspan; + shadow_stack_frame_t &CBottom = Tool->ShadowStack.push(frame_type::HELPER); + CBottom.contin_work = PContinWork; + CBottom.contin_span = PContinSpan; + CBottom.contin_bspan = PContinBSpan; - tool->shadow_stack->start.gettime(); + Tool->ShadowStack.start.gettime(); } CILKTOOL_API void __csi_task_exit(const csi_id_t task_exit_id, const csi_id_t task_id, const csi_id_t detach_id, const unsigned sync_reg, const task_exit_prop_t prop) { - tool->shadow_stack->stop.gettime(); + Tool->ShadowStack.stop.gettime(); #if TRACE_CALLS fprintf(stderr, "[W%d] task_exit(%ld, %ld, %ld)\n", __cilkrts_get_worker_number(), task_exit_id, task_id, detach_id); #endif - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; - assert(cilk_time_t::zero() == bottom.lchild_span); + assert(cilk_time_t::zero() == Bottom.lchild_span); // Pop the stack - shadow_stack_frame_t &c_bottom = tool->shadow_stack->pop(); - shadow_stack_frame_t &p_bottom = tool->shadow_stack->peek_bot(); - p_bottom.achild_work += c_bottom.contin_work - p_bottom.contin_work; + shadow_stack_frame_t &CBottom = Tool->ShadowStack.pop(); + shadow_stack_frame_t &PBottom = Tool->ShadowStack.peek_bot(); + PBottom.achild_work += CBottom.contin_work - PBottom.contin_work; // Check if the span of c_bottom exceeds that of the previous longest child. - if (c_bottom.contin_span > p_bottom.lchild_span) - p_bottom.lchild_span = c_bottom.contin_span; - if (c_bottom.contin_bspan + cilkscale_timer_t::burden - > p_bottom.lchild_bspan) - p_bottom.lchild_bspan = c_bottom.contin_bspan + cilkscale_timer_t::burden; + if (CBottom.contin_span > PBottom.lchild_span) + PBottom.lchild_span = CBottom.contin_span; + if (CBottom.contin_bspan + cilkscale_timer_t::burden > PBottom.lchild_bspan) + PBottom.lchild_bspan = CBottom.contin_bspan + cilkscale_timer_t::burden; } CILKTOOL_API @@ -435,7 +378,7 @@ void __csi_detach_continue(const csi_id_t detach_continue_id, __cilkrts_get_worker_number(), detach_continue_id, detach_id, prop); #endif - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); if (prop.is_unwind) { // In opencilk, upon reaching the unwind destination of a detach, all @@ -443,184 +386,180 @@ void __csi_detach_continue(const csi_id_t detach_continue_id, // logic from after_sync here to compute work and span. // Add achild_work to contin_work, and reset contin_work. - bottom.contin_work += bottom.achild_work; - bottom.achild_work = cilk_time_t::zero(); + Bottom.contin_work += Bottom.achild_work; + Bottom.achild_work = cilk_time_t::zero(); // Select the largest of lchild_span and contin_span, and then reset // lchild_span. - if (bottom.lchild_span > bottom.contin_span) - bottom.contin_span = bottom.lchild_span; - bottom.lchild_span = cilk_time_t::zero(); + if (Bottom.lchild_span > Bottom.contin_span) + Bottom.contin_span = Bottom.lchild_span; + Bottom.lchild_span = cilk_time_t::zero(); - if (bottom.lchild_bspan > bottom.contin_bspan) - bottom.contin_bspan = bottom.lchild_bspan; - bottom.lchild_bspan = cilk_time_t::zero(); + if (Bottom.lchild_bspan > Bottom.contin_bspan) + Bottom.contin_bspan = Bottom.lchild_bspan; + Bottom.lchild_bspan = cilk_time_t::zero(); } else { - bottom.contin_bspan += cilkscale_timer_t::burden; + Bottom.contin_bspan += cilkscale_timer_t::burden; } - tool->shadow_stack->start.gettime(); + Tool->ShadowStack.start.gettime(); } CILKTOOL_API void __csi_before_sync(const csi_id_t sync_id, const unsigned sync_reg) { - tool->shadow_stack->stop.gettime(); + Tool->ShadowStack.stop.gettime(); #if TRACE_CALLS - fprintf(stderr, "[W%d] before_sync(%ld)\n", __cilkrts_get_worker_number(), - sync_id); + fprintf(stderr, "[W%d] before_sync(%ld, %d)\n", __cilkrts_get_worker_number(), + sync_id, sync_reg); #endif - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; } CILKTOOL_API void __csi_after_sync(const csi_id_t sync_id, const unsigned sync_reg) { #if TRACE_CALLS - fprintf(stderr, "[W%d] after_sync(%ld)\n", __cilkrts_get_worker_number(), - sync_id); + fprintf(stderr, "[W%d] after_sync(%ld, %d)\n", __cilkrts_get_worker_number(), + sync_id, sync_reg); #endif - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); // Update the work and span recorded for the bottom-most frame on the stack. - // Add achild_work to contin_work, and reset contin_work. - bottom.contin_work += bottom.achild_work; - bottom.achild_work = cilk_time_t::zero(); + // Add achild_work to contin_work, and reset achild_work. + Bottom.contin_work += Bottom.achild_work; + Bottom.achild_work = cilk_time_t::zero(); // Select the largest of lchild_span and contin_span, and then reset // lchild_span. - if (bottom.lchild_span > bottom.contin_span) - bottom.contin_span = bottom.lchild_span; - bottom.lchild_span = cilk_time_t::zero(); + if (Bottom.lchild_span > Bottom.contin_span) + Bottom.contin_span = Bottom.lchild_span; + Bottom.lchild_span = cilk_time_t::zero(); - if (bottom.lchild_bspan > bottom.contin_bspan) - bottom.contin_bspan = bottom.lchild_bspan; - bottom.lchild_bspan = cilk_time_t::zero(); + if (Bottom.lchild_bspan > Bottom.contin_bspan) + Bottom.contin_bspan = Bottom.lchild_bspan; + Bottom.lchild_bspan = cilk_time_t::zero(); - tool->shadow_stack->start.gettime(); + Tool->ShadowStack.start.gettime(); } /////////////////////////////////////////////////////////////////////////// // Probes and associated routines CILKTOOL_API wsp_t wsp_getworkspan() CILKSCALE_NOTHROW { - tool->shadow_stack->stop.gettime(); + Tool->ShadowStack.stop.gettime(); #if TRACE_CALLS fprintf(stderr, "getworkspan()\n"); #endif - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; - wsp_t result = {tool->shadow_stack->peek_bot().contin_work.get_raw_duration(), - tool->shadow_stack->peek_bot().contin_span.get_raw_duration(), - tool->shadow_stack->peek_bot().contin_bspan.get_raw_duration()}; + wsp_t Result = {Tool->ShadowStack.peek_bot().contin_work.get_raw_duration(), + Tool->ShadowStack.peek_bot().contin_span.get_raw_duration(), + Tool->ShadowStack.peek_bot().contin_bspan.get_raw_duration()}; // Because of the high overhead of calling gettime(), especially compared to // the running time of the operations in this hook, the work and span // measurements appear more stable if we simply use the recorded time as the // new start time. - tool->shadow_stack->start = tool->shadow_stack->stop; + Tool->ShadowStack.start = Tool->ShadowStack.stop; - return result; + return Result; } -__attribute__((visibility("default"))) wsp_t & -operator+=(wsp_t &lhs, const wsp_t &rhs) noexcept { - lhs.work += rhs.work; - lhs.span += rhs.span; - lhs.bspan += rhs.bspan; - return lhs; +CILKTOOL_VISIBLE wsp_t &operator+=(wsp_t &Lhs, const wsp_t &Rhs) noexcept { + Lhs.work += Rhs.work; + Lhs.span += Rhs.span; + Lhs.bspan += Rhs.bspan; + return Lhs; } -__attribute__((visibility("default"))) wsp_t & -operator-=(wsp_t &lhs, const wsp_t &rhs) noexcept { - lhs.work -= rhs.work; - lhs.span -= rhs.span; - lhs.bspan -= rhs.bspan; - return lhs; +CILKTOOL_VISIBLE wsp_t &operator-=(wsp_t &Lhs, const wsp_t &Rhs) noexcept { + Lhs.work -= Rhs.work; + Lhs.span -= Rhs.span; + Lhs.bspan -= Rhs.bspan; + return Lhs; } -__attribute__((visibility("default"))) std::ostream & -operator<<(std::ostream &OS, const wsp_t &pt) { - tool->shadow_stack->stop.gettime(); +CILKTOOL_VISIBLE std::ostream &operator<<(std::ostream &OS, const wsp_t &pt) { + Tool->ShadowStack.stop.gettime(); - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; - cilk_time_t work = cilk_time_t(pt.work); - cilk_time_t span = cilk_time_t(pt.span); - cilk_time_t bspan = cilk_time_t(pt.bspan); - OS << work << ", " << span << ", " << work.get_val_d() / span.get_val_d() - << ", " << bspan << ", " << work.get_val_d() / bspan.get_val_d(); + cilk_time_t Work = cilk_time_t(pt.work); + cilk_time_t Span = cilk_time_t(pt.span); + cilk_time_t BSpan = cilk_time_t(pt.bspan); + OS << Work << ", " << Span << ", " << Work.get_val_d() / Span.get_val_d() + << ", " << BSpan << ", " << Work.get_val_d() / BSpan.get_val_d(); - tool->shadow_stack->start.gettime(); + Tool->ShadowStack.start.gettime(); return OS; } -__attribute__((visibility("default"))) std::ofstream & -operator<<(std::ofstream &OS, const wsp_t &pt) { - tool->shadow_stack->stop.gettime(); +CILKTOOL_VISIBLE std::ofstream &operator<<(std::ofstream &OS, const wsp_t &pt) { + Tool->ShadowStack.stop.gettime(); - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; - cilk_time_t work = cilk_time_t(pt.work); - cilk_time_t span = cilk_time_t(pt.span); - cilk_time_t bspan = cilk_time_t(pt.bspan); - OS << work << ", " << span << ", " << work.get_val_d() / span.get_val_d() - << ", " << bspan << ", " << work.get_val_d() / bspan.get_val_d(); + cilk_time_t Work = cilk_time_t(pt.work); + cilk_time_t Span = cilk_time_t(pt.span); + cilk_time_t BSpan = cilk_time_t(pt.bspan); + OS << Work << ", " << Span << ", " << Work.get_val_d() / Span.get_val_d() + << ", " << BSpan << ", " << Work.get_val_d() / BSpan.get_val_d(); - tool->shadow_stack->start.gettime(); + Tool->ShadowStack.start.gettime(); return OS; } -CILKTOOL_API wsp_t wsp_add(wsp_t lhs, wsp_t rhs) CILKSCALE_NOTHROW { - lhs.work += rhs.work; - lhs.span += rhs.span; - lhs.bspan += rhs.bspan; - return lhs; +CILKTOOL_API wsp_t wsp_add(wsp_t Lhs, wsp_t Rhs) CILKSCALE_NOTHROW { + Lhs.work += Rhs.work; + Lhs.span += Rhs.span; + Lhs.bspan += Rhs.bspan; + return Lhs; } -CILKTOOL_API wsp_t wsp_sub(wsp_t lhs, wsp_t rhs) CILKSCALE_NOTHROW { - lhs.work -= rhs.work; - lhs.span -= rhs.span; - lhs.bspan -= rhs.bspan; - return lhs; +CILKTOOL_API wsp_t wsp_sub(wsp_t Lhs, wsp_t Rhs) CILKSCALE_NOTHROW { + Lhs.work -= Rhs.work; + Lhs.span -= Rhs.span; + Lhs.bspan -= Rhs.bspan; + return Lhs; } -CILKTOOL_API void wsp_dump(wsp_t wsp, const char *tag) { - tool->shadow_stack->stop.gettime(); +CILKTOOL_API void wsp_dump(wsp_t Wsp, const char *Tag) { + Tool->ShadowStack.stop.gettime(); - shadow_stack_frame_t &bottom = tool->shadow_stack->peek_bot(); + shadow_stack_frame_t &Bottom = Tool->ShadowStack.peek_bot(); - duration_t strand_time = tool->shadow_stack->elapsed_time(); - bottom.contin_work += strand_time; - bottom.contin_span += strand_time; - bottom.contin_bspan += strand_time; + duration_t StrandTime = Tool->ShadowStack.elapsed_time(); + Bottom.contin_work += StrandTime; + Bottom.contin_span += StrandTime; + Bottom.contin_bspan += StrandTime; - std::basic_ostream &output = *tool->out_view(); - ensure_header(output); - print_results(output, tag, cilk_time_t(wsp.work), cilk_time_t(wsp.span), - cilk_time_t(wsp.bspan)); + std::basic_ostream &OS = Tool->out_view(); + ensureHeader(OS); + printResults(OS, Tag, cilk_time_t(Wsp.work), cilk_time_t(Wsp.span), + cilk_time_t(Wsp.bspan)); - tool->shadow_stack->start.gettime(); + Tool->ShadowStack.start.gettime(); } diff --git a/cilkscale/cilkscale_timer.h b/cilkscale/cilkscale_timer.h index 74dd47c..8df04d9 100644 --- a/cilkscale/cilkscale_timer.h +++ b/cilkscale/cilkscale_timer.h @@ -4,8 +4,8 @@ #include #include -#include #include +#include #define RDTSC 1 #define CLOCK 2 @@ -23,7 +23,7 @@ #endif /////////////////////////////////////////////////////////////////////////// -// Data structures and helper methods for time of user strands. +// Data structures and helper methods to time user strands. #if CSCALETIMER == RDTSC || CSCALETIMER == INST using duration_t = raw_duration_t; #else // CSCALETIMER == CLOCK @@ -32,6 +32,8 @@ using duration_t = std::chrono::nanoseconds; static_assert(sizeof(duration_t) == sizeof(raw_duration_t), "Mistmatched sizes for time values."); +// Class representing time for a user strand. This class is designed to hide +// the specific details of which timer is used. class cilk_time_t { duration_t val; @@ -50,12 +52,14 @@ class cilk_time_t { #endif // CSCALETIMER } + // Comparison operators between cilk_time_t's and duration_t's. friend bool operator==(const cilk_time_t &lhs, const duration_t &rhs) { return lhs.val == rhs; } friend bool operator>(const cilk_time_t &lhs, const duration_t &rhs) { return lhs.val > rhs; } + // Arithmetic operators between cilk_time_t's and duration_t's. friend cilk_time_t operator+(cilk_time_t lhs, const duration_t &rhs) { lhs.val += rhs; return lhs; @@ -73,13 +77,14 @@ class cilk_time_t { return *this; } + // Comparison operators between cilk_time_t's. friend bool operator==(const cilk_time_t &lhs, const cilk_time_t &rhs) { return lhs.val == rhs.val; } friend bool operator>(const cilk_time_t &lhs, const cilk_time_t &rhs) { return lhs.val > rhs.val; } - + // Arithmetic operators between cilk_time_t's. friend cilk_time_t operator+(cilk_time_t lhs, const cilk_time_t &rhs) { lhs.val += rhs.val; return lhs; diff --git a/cilkscale/shadow_stack.h b/cilkscale/shadow_stack.h index e92f0f7..485bc09 100644 --- a/cilkscale/shadow_stack.h +++ b/cilkscale/shadow_stack.h @@ -3,11 +3,17 @@ #define INCLUDED_SHADOW_STACK_H #include "cilkscale_timer.h" +#include +#include #ifndef SERIAL_TOOL #define SERIAL_TOOL 1 #endif +#if SERIAL_TOOL +#include +#endif + #ifndef TRACE_CALLS #define TRACE_CALLS 0 #endif @@ -199,7 +205,7 @@ struct shadow_stack_t { } }; -typedef shadow_stack_t _Hyperobject(shadow_stack_t::identity, +typedef shadow_stack_t cilk_reducer(shadow_stack_t::identity, shadow_stack_t::reduce) shadow_stack_reducer; diff --git a/test/cilksan/TestCases/alloctypes.cpp b/test/cilksan/TestCases/alloctypes.cpp index 11b30a9..cd5a098 100644 --- a/test/cilksan/TestCases/alloctypes.cpp +++ b/test/cilksan/TestCases/alloctypes.cpp @@ -93,7 +93,7 @@ void global_test() { // CHECK-GLOBAL: Race detected on location [[GLOBAL]] // CHECK-GLOBAL: * Write {{[0-9a-f]+}} global_test -// CHECK-GLOBALOB: * Write {{[0-9a-f]+}} global_test +// CHECK-GLOBAL: * Write {{[0-9a-f]+}} global_test // CHECK-GLOBAL: Common calling context // CHECK-GLOBAL-NEXT: Parfor diff --git a/test/cilksan/TestCases/cabs-fstat64.cpp b/test/cilksan/TestCases/cabs-fstat64.cpp new file mode 100644 index 0000000..414ad67 --- /dev/null +++ b/test/cilksan/TestCases/cabs-fstat64.cpp @@ -0,0 +1,18 @@ +// RUN: %clangxx_cilksan -fopencilk -Og %s -o %t -g +// RUN: %run %t 2>&1 | FileCheck %s +// UNSUPPORTED: darwin + +#include +#include +#include + +int main() { + std::cout << std::abs(std::complex(1.0, 2.0)) << std::endl; + struct stat64 buf; + ::fstat64(0, &buf); + std::cout << buf.st_dev << std::endl; + return 0; +} + +// CHECK: Cilksan detected 0 distinct races. +// CHECK-NEXT: Cilksan suppressed 0 duplicate race reports. diff --git a/test/cilksan/TestCases/call-once.cpp b/test/cilksan/TestCases/call-once.cpp index a37907b..572360b 100644 --- a/test/cilksan/TestCases/call-once.cpp +++ b/test/cilksan/TestCases/call-once.cpp @@ -2,6 +2,8 @@ // RUN: %run %t 2>&1 | FileCheck %s // RUN: %clangxx_cilksan -fopencilk -Og %s -o %t -mllvm -cilksan-maap-checks=false // RUN: %run %t 2>&1 | FileCheck %s +// TODO: Figure out how to support this case on Darwin. +// UNSUPPORTED: darwin #include #include diff --git a/test/cilksan/TestCases/nested-loops-with-reducers.cpp b/test/cilksan/TestCases/nested-loops-with-reducers.cpp new file mode 100644 index 0000000..e6f8e75 --- /dev/null +++ b/test/cilksan/TestCases/nested-loops-with-reducers.cpp @@ -0,0 +1,201 @@ +// RUN: %clangxx_cilksan -std=c++20 -fopencilk -O3 -g %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CILKSAN +#include +#include +#include +#include +#include + +#include + +struct Scalar { + constexpr static uint64_t PRIME = 0xffffffff00000001ull; + + // Scalar() { /* Deliberately skip initialization of _raw to allow more + // aggresive optimization */ + // } + + Scalar() = default; + + explicit Scalar(uint64_t raw) : _raw{raw} {} + + explicit operator uint64_t() const { return _raw; } + + auto operator+(Scalar other) const -> Scalar { + const uint64_t sum = _raw + other._raw; + const Scalar ret{ + sum < _raw || sum < other._raw || sum >= PRIME ? sum - PRIME : sum}; + return ret; + } + + auto operator-(Scalar other) const -> Scalar { + const uint64_t diff = _raw - other._raw; + const Scalar ret{(diff > _raw) ? diff + PRIME : diff}; + return ret; + } + + auto operator*(Scalar other) const -> Scalar { + // Start by carrying out an ordinary 64x64->128 bit multiplication + const uint32_t a0 = _raw, a1 = _raw >> 32; + const uint32_t b0 = other._raw, b1 = other._raw >> 32; + const uint64_t p0 = static_cast(a0) * b0, + p1 = static_cast(a0) * b1, + p2 = static_cast(a1) * b0, + p3 = static_cast(a1) * b1; + const uint32_t cy = ((p0 >> 32) + static_cast(p1) + + static_cast(p2)) >> + 32; + const uint64_t x = p0 + (p1 << 32) + (p2 << 32), + y = p3 + (p1 >> 32) + (p2 >> 32) + cy; + // Store result in 4 32-bit words + const uint32_t c0 = x, c1 = x >> 32, c2 = y, c3 = y >> 32; + // Now perform reduction: modulus is phi^2 - phi + 1 where phi = 2^32 + // ab = c0 + c1*phi + c2*phi^2 + c3*phi^3 + // Exploit phi^2 = phi-1 and phi^3 = phi * (phi-1) = (phi-1) - phi = -1 + // ab = c0 + c1*phi + c2*(phi-1) - c3 + // = (c0-c2-c3) + (c1+c2)*phi + const Scalar ret = (Scalar(c0) - Scalar(c2) - Scalar(c3)) + + (Scalar(static_cast(c1) << 32) + + Scalar(static_cast(c2) << 32)); + return ret; + } + + auto operator==(Scalar other) const -> bool { return _raw == other._raw; } + + auto operator!=(Scalar other) const -> bool { return _raw != other._raw; } + + // No comparison operators as they make no sense for finite fields + + auto operator+=(Scalar other) -> Scalar & { return *this = *this + other; } + + auto operator-=(Scalar other) -> Scalar & { return *this = *this - other; } + + auto operator*=(Scalar other) -> Scalar & { return *this = *this * other; } + + template inline static auto random(RNG &rng) -> Scalar { + static std::uniform_int_distribution dist(0, PRIME - 1); + return Scalar{dist(rng)}; + } + + auto is_valid() const -> bool { return _raw < PRIME; } + + private: + uint64_t _raw; +}; + +static inline void zero_scalar(void *view) { + *reinterpret_cast(view) = Scalar{0}; +} + +static inline void add_scalar(void *left, void *right) { + *reinterpret_cast(left) += *reinterpret_cast(right); +} + +using ScalarAddReducer = Scalar cilk_reducer(zero_scalar, add_scalar); + +auto reduce_with_cilk(Scalar **as, Scalar **bs, Scalar *c, Scalar *coeffs, + size_t n, size_t m) -> std::array { + ScalarAddReducer p0{0}, p2{0}, p3{0}; + cilk_for (size_t i = 0; i < n; i++) { + // Obtain dense representations of the polynomials + const Scalar *a = as[i]; + const Scalar *b = bs[i]; + const size_t half = m / 2; + + ScalarAddReducer lp0{0}, lp2{0}, lp3{0}; + cilk_for (size_t j = 0; j < half; j++) { + lp0 += a[j] * b[j] * c[j]; + const Scalar a2 = a[j + half] + a[j + half] - a[j], + b2 = b[j + half] + b[j + half] - b[j], + c2 = c[j + half] + c[j + half] - c[j]; + lp2 += a2 * b2 * c2; + const Scalar a3 = a2 + a[j + half] - a[j], + b3 = b2 + b[j + half] - b[j], + c3 = c2 + c[j + half] - c[j]; + lp3 += a3 * b3 * c3; + } + p0 += coeffs[i] * lp0; + p2 += coeffs[i] * lp2; + p3 += coeffs[i] * lp3; + } + return {p0, p2, p3}; +} + +auto reduce_serial(Scalar **as, Scalar **bs, Scalar *c, Scalar *coeffs, + size_t n, size_t m) -> std::array { + Scalar p0{0}, p2{0}, p3{0}; + for (size_t i = 0; i < n; i++) { + // Obtain dense representations of the polynomials + const Scalar *a = as[i]; + const Scalar *b = bs[i]; + const size_t half = m / 2; + + Scalar lp0{0}, lp2{0}, lp3{0}; + for (size_t j = 0; j < half; j++) { + lp0 += a[j] * b[j] * c[j]; + const Scalar a2 = a[j + half] + a[j + half] - a[j], + b2 = b[j + half] + b[j + half] - b[j], + c2 = c[j + half] + c[j + half] - c[j]; + lp2 += a2 * b2 * c2; + const Scalar a3 = a2 + a[j + half] - a[j], + b3 = b2 + b[j + half] - b[j], + c3 = c2 + c[j + half] - c[j]; + lp3 += a3 * b3 * c3; + } + + p0 += coeffs[i] * lp0; + p2 += coeffs[i] * lp2; + p3 += coeffs[i] * lp3; + } + return {p0, p2, p3}; +} + +auto main() -> int { + const size_t N = 12; + const size_t M = 128; + + std::mt19937_64 rng{42}; + + Scalar *as[N]; + Scalar *bs[N]; + Scalar c[M]; + Scalar coeffs[N]; + + for (size_t i = 0; i < N; i++) { + as[i] = new Scalar[M]; + bs[i] = new Scalar[M]; + coeffs[i] = Scalar::random(rng); + for (size_t j = 0; j < M; j++) { + as[i][j] = Scalar::random(rng); + bs[i][j] = Scalar::random(rng); + } + } + for (size_t i = 0; i < M; i++) { + c[i] = Scalar::random(rng); + } + + const auto res_cilk = reduce_with_cilk(as, bs, c, coeffs, N, M); + const auto res_serial = reduce_serial(as, bs, c, coeffs, N, M); + if (res_cilk != res_serial) { + printf("res_cilk = %lu, %lu, %lu\n", static_cast(res_cilk[0]), + static_cast(res_cilk[1]), + static_cast(res_cilk[2])); + printf("res_serial = %lu, %lu, %lu\n", + static_cast(res_serial[0]), + static_cast(res_serial[1]), + static_cast(res_serial[2])); + } + for (size_t i = 0; i < N; i++) { + delete[] as[i]; + delete[] bs[i]; + } + return 0; +} + +// NOLINTEND + +// CHECK-NOT: res_cilk = +// CHECK-NOT: res_serial = + +// CILKSAN: Cilksan detected 0 distinct races. +// CILKSAN-NEXT: Cilksan suppressed 0 duplicate race reports. diff --git a/test/cilksan/TestCases/write-free-race.cpp b/test/cilksan/TestCases/write-free-race.cpp new file mode 100755 index 0000000..671b5f0 --- /dev/null +++ b/test/cilksan/TestCases/write-free-race.cpp @@ -0,0 +1,35 @@ +// RUN: %clangxx_cilksan -fopencilk -O0 -g %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_cilksan -fopencilk -Og -g %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s + +#include + +int constexpr size = 1 << 10; + +__attribute__((noinline)) +void f(int* ptr) { + free(ptr); +} + +__attribute__((noinline)) +void g(int * ptr) { + ptr[0] = 7; +} + +int main() { + int* arr = (int*)malloc(sizeof(int) * size); + cilk_spawn g(arr); + cilk_spawn f(arr); + return 0; +} + +// CHECK: Race detected +// CHECK-NEXT: * Write {{[0-9a-f]+}} g +// CHECK-NEXT: to variable +// CHECK-NEXT: Spawn {{[0-9a-f]+}} main +// CHECK-NEXT: * Free +// CHECK-NEXT: Call {{[0-9a-f]+}} main + +// CHECK: Cilksan detected 1 distinct races. +// CHECK-NEXT: Cilksan suppressed 0 duplicate race reports. diff --git a/test/lit.common.cfg.py b/test/lit.common.cfg.py index 8b7facd..c3227d2 100644 --- a/test/lit.common.cfg.py +++ b/test/lit.common.cfg.py @@ -597,13 +597,13 @@ def is_windows_lto_supported(): config.substitutions.append( ("%ld_flags_rpath_exe" + postfix, '-Wl,-rpath,@executable_path/ %dynamiclib' + postfix) ) config.substitutions.append( ("%ld_flags_rpath_so" + postfix, '-install_name @rpath/`basename %dynamiclib{}`'.format(postfix)) ) elif config.host_os in ('FreeBSD', 'NetBSD', 'OpenBSD'): - config.substitutions.append( ("%ld_flags_rpath_exe" + postfix, "-Wl,-z,origin -Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec" + postfix) ) + config.substitutions.append( ("%ld_flags_rpath_exe" + postfix, r"-Wl,-z,origin -Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec" + postfix) ) config.substitutions.append( ("%ld_flags_rpath_so" + postfix, '') ) elif config.host_os == 'Linux': - config.substitutions.append( ("%ld_flags_rpath_exe" + postfix, "-Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec" + postfix) ) + config.substitutions.append( ("%ld_flags_rpath_exe" + postfix, r"-Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec" + postfix) ) config.substitutions.append( ("%ld_flags_rpath_so" + postfix, '') ) elif config.host_os == 'SunOS': - config.substitutions.append( ("%ld_flags_rpath_exe" + postfix, "-Wl,-R\$ORIGIN -L%T -l%xdynamiclib_namespec" + postfix) ) + config.substitutions.append( ("%ld_flags_rpath_exe" + postfix, r"-Wl,-R\$ORIGIN -L%T -l%xdynamiclib_namespec" + postfix) ) config.substitutions.append( ("%ld_flags_rpath_so" + postfix, '') ) # Must be defined after the substitutions that use %dynamiclib.