diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 9491f51b7..72961fe0b 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -175,6 +175,30 @@ jobs: - name: test run: cd build && ctest -V + build-ubuntu-static-dispatch: + name: Build static dispatch on ubuntu-24.04 + runs-on: ubuntu-24.04 + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - name: dependencies + run: sudo apt install python3-mako libgtest-dev libfmt-dev + - name: configure + run: mkdir build && cd build && cmake -DVOLK_STATIC_DISPATCH=generic -DCMAKE_C_FLAGS="-Werror" -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON .. + - name: build + run: cmake --build build -j$(nproc) + - name: Print info + run: | + ./build/apps/volk-config-info --alignment + ./build/apps/volk-config-info --avail-machines + ./build/apps/volk-config-info --all-machines + ./build/apps/volk-config-info --malloc + ./build/apps/volk-config-info --cc + - name: test + run: cd build && ctest -V + build-windows: runs-on: windows-latest diff --git a/CMakeLists.txt b/CMakeLists.txt index bcf324614..325fd534f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,15 +103,52 @@ if(MSVC) endif(MSVC) ######################################################################## -# Dependencies setup +# Build options ######################################################################## +set(VOLK_STATIC_DISPATCH "" CACHE STRING + "Static dispatch to a specific machine (e.g. neonv8). Empty means dynamic dispatch.") +cmake_dependent_option(ENABLE_UTILITY_APPS "Enable utility apps" ON "NOT CMAKE_CROSSCOMPILING" OFF) +cmake_dependent_option(ENABLE_TESTING "Enable QA testing" ON "NOT CMAKE_CROSSCOMPILING" OFF) +cmake_dependent_option(ENABLE_MODTOOL "Enable volk_modtool python utility" ON "NOT CMAKE_CROSSCOMPILING" OFF) +cmake_dependent_option(ENABLE_PROFILING "Launch system profiler after build" OFF "ENABLE_UTILITY_APPS;NOT CMAKE_CROSSCOMPILING" OFF) -# cpu_features - sensible defaults, user settable option -if(CMAKE_SYSTEM_PROCESSOR MATCHES - "(^mips)|(^arm)|(^aarch64)|(x86_64)|(AMD64|amd64)|(^i.86$)|(^powerpc)|(^ppc)|(^riscv)") - option(VOLK_CPU_FEATURES "Volk uses cpu_features" ON) +######################################################################## +# Print summary of options +######################################################################## +if (VOLK_STATIC_DISPATCH) + message(STATUS "Using static dispatch for: ${VOLK_STATIC_DISPATCH}") else() - option(VOLK_CPU_FEATURES "Volk uses cpu_features" OFF) + message(STATUS "Using dynamic dispatch.") +endif() + +if(ENABLE_UTILITY_APPS) + message(STATUS "Utility apps enabled.") +endif() + +if (ENABLE_TESTING) + message(STATUS "Testing enabled.") +endif() + +if (ENABLE_MODTOOL) + message(STATUS "volk_modtool python utility enabled.") +endif() + +if (ENABLE_PROFILING) + message(STATUS "Launch system profiler after build enabled.") +endif() + +######################################################################## +# Dependencies setup +######################################################################## + +# cpu_features - only needed for dynamic dispatch (runtime CPU detection) +if(NOT VOLK_STATIC_DISPATCH) + if(CMAKE_SYSTEM_PROCESSOR MATCHES + "(^mips)|(^arm)|(^aarch64)|(x86_64)|(AMD64|amd64)|(^i.86$)|(^powerpc)|(^ppc)|(^riscv)") + option(VOLK_CPU_FEATURES "Volk uses cpu_features" ON) + else() + option(VOLK_CPU_FEATURES "Volk uses cpu_features" OFF) + endif() endif() if(VOLK_CPU_FEATURES) @@ -183,7 +220,7 @@ endif() ######################################################################## # Check if Orc is available ######################################################################## -option(ENABLE_ORC "Enable Orc" True) +option(ENABLE_ORC "Enable Orc" ON) if(ENABLE_ORC) find_package(ORC) else(ENABLE_ORC) @@ -225,27 +262,35 @@ install( FILES_MATCHING PATTERN "*.h") +set(VOLK_INSTALL_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_alloc.hh + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_complex.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_common.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_mathematical_functions.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/saturation_arithmetic.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx_intrinsics.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_intrinsics.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_fma_intrinsics.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx512_intrinsics.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse_intrinsics.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse3_intrinsics.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_neon_intrinsics.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_malloc.h + ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_dispatch.h + ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_config_fixed.h + ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_typedefs.h + ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_version.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/constants.h) + +if(NOT VOLK_STATIC_DISPATCH) + list(APPEND VOLK_INSTALL_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_prefs.h + ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_cpu.h) +endif() + install( - FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_prefs.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_alloc.hh - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_complex.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_common.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_mathematical_functions.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/saturation_arithmetic.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx_intrinsics.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_intrinsics.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_fma_intrinsics.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx512_intrinsics.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse_intrinsics.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse3_intrinsics.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_neon_intrinsics.h - ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk.h - ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_cpu.h - ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_config_fixed.h - ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_typedefs.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_malloc.h - ${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_version.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/constants.h + FILES ${VOLK_INSTALL_HEADERS} DESTINATION include/volk COMPONENT "volk_devel") @@ -315,26 +360,6 @@ install( DESTINATION ${CMAKE_MODULES_DIR}/volk) ######################################################################## -# Option to enable QA testing, on by default -######################################################################## -option(ENABLE_TESTING "Enable QA testing" ON) -if(ENABLE_TESTING) - message(STATUS "QA Testing is enabled.") -else() - message(STATUS "QA Testing is disabled.") -endif() -message(STATUS " Modify using: -DENABLE_TESTING=ON/OFF") - -######################################################################## -# Utility apps (rely on an OS being present instead of a bare-metal target) -######################################################################## -option(ENABLE_UTILITY_APPS "Enable utility apps" ON) -if(ENABLE_UTILITY_APPS) - message(STATUS "Utility apps are enabled.") -else() - message(STATUS "Utility apps are disabled.") -endif() - # fmt - required for qa_utils table printing (tests and utility apps) # For static builds, always use FetchContent to ensure we get a static library if(ENABLE_TESTING OR ENABLE_UTILITY_APPS) @@ -343,8 +368,7 @@ if(ENABLE_TESTING OR ENABLE_UTILITY_APPS) endif() if(NOT fmt_FOUND) if(ENABLE_STATIC_LIBS) - message( - STATUS "Static build: using FetchContent to build fmt as static library ...") + message(STATUS "Static build: using FetchContent to build fmt as static library ...") else() message(STATUS "fmt package not found. Using FetchContent to download ...") endif() @@ -364,8 +388,6 @@ endif() ######################################################################## # Option to enable post-build profiling using volk_profile, off by default ######################################################################## -cmake_dependent_option(ENABLE_PROFILING "Launch system profiler after build" OFF - "ENABLE_UTILITY_APPS" OFF) if(ENABLE_PROFILING) if(DEFINED VOLK_CONFIGPATH) get_filename_component(VOLK_CONFIGPATH ${VOLK_CONFIGPATH} ABSOLUTE) @@ -386,7 +408,6 @@ if(ENABLE_PROFILING) else() message(STATUS "System profiling is disabled.") endif() -message(STATUS " Modify using: -DENABLE_PROFILING=ON/OFF") ######################################################################## # Setup the library @@ -396,7 +417,9 @@ add_subdirectory(lib) ######################################################################## # Add tests ######################################################################## -add_subdirectory(tests) +if(ENABLE_TESTING) + add_subdirectory(tests) +endif() ######################################################################## # Utility apps @@ -405,7 +428,6 @@ if(ENABLE_UTILITY_APPS) add_subdirectory(apps) endif() -option(ENABLE_MODTOOL "Enable volk_modtool python utility" True) if(ENABLE_MODTOOL) add_subdirectory(python/volk_modtool) endif() diff --git a/include/volk/volk.h b/include/volk/volk.h new file mode 100644 index 000000000..2d192c3a2 --- /dev/null +++ b/include/volk/volk.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011-2020 Free Software Foundation, Inc. + * + * This file is part of VOLK + * + * SPDX-License-Identifier: LGPL-3.0-or-later + */ + +#ifndef INCLUDED_VOLK_RUNTIME +#define INCLUDED_VOLK_RUNTIME + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +__VOLK_DECL_BEGIN + +#define VOLK_OR_PTR(ptr0, ptr1) (const void*)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1))) + +#include + +__VOLK_DECL_END + +#endif /*INCLUDED_VOLK_RUNTIME*/ diff --git a/kernels/volk/volk_32f_expfast_32f.h b/kernels/volk/volk_32f_expfast_32f.h index 48174db01..55a52e236 100644 --- a/kernels/volk/volk_32f_expfast_32f.h +++ b/kernels/volk/volk_32f_expfast_32f.h @@ -53,10 +53,10 @@ #include #include -#define Mln2 0.6931471805f -#define A 8388608.0f -#define B 1065353216.0f -#define C 60801.0f +#define VOLK_EXPFAST_Mln2 0.6931471805f +#define VOLK_EXPFAST_A 8388608.0f +#define VOLK_EXPFAST_B 1065353216.0f +#define VOLK_EXPFAST_C 60801.0f #ifndef INCLUDED_volk_32f_expfast_32f_a_H @@ -78,8 +78,8 @@ static inline void volk_32f_expfast_32f_a_avx_fma(float* bVector, __m256 aVal, bVal, a, b; __m256i exp; - a = _mm256_set1_ps(A / Mln2); - b = _mm256_set1_ps(B - C); + a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < eighthPoints; number++) { aVal = _mm256_load_ps(aPtr); @@ -114,8 +114,8 @@ volk_32f_expfast_32f_a_avx(float* bVector, const float* aVector, unsigned int nu __m256 aVal, bVal, a, b; __m256i exp; - a = _mm256_set1_ps(A / Mln2); - b = _mm256_set1_ps(B - C); + a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < eighthPoints; number++) { aVal = _mm256_load_ps(aPtr); @@ -150,8 +150,8 @@ static inline void volk_32f_expfast_32f_a_sse4_1(float* bVector, __m128 aVal, bVal, a, b; __m128i exp; - a = _mm_set1_ps(A / Mln2); - b = _mm_set1_ps(B - C); + a = _mm_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + b = _mm_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < quarterPoints; number++) { aVal = _mm_load_ps(aPtr); @@ -191,8 +191,8 @@ static inline void volk_32f_expfast_32f_u_avx_fma(float* bVector, __m256 aVal, bVal, a, b; __m256i exp; - a = _mm256_set1_ps(A / Mln2); - b = _mm256_set1_ps(B - C); + a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < eighthPoints; number++) { aVal = _mm256_loadu_ps(aPtr); @@ -226,8 +226,8 @@ volk_32f_expfast_32f_u_avx(float* bVector, const float* aVector, unsigned int nu __m256 aVal, bVal, a, b; __m256i exp; - a = _mm256_set1_ps(A / Mln2); - b = _mm256_set1_ps(B - C); + a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < eighthPoints; number++) { aVal = _mm256_loadu_ps(aPtr); @@ -263,8 +263,8 @@ static inline void volk_32f_expfast_32f_u_sse4_1(float* bVector, __m128 aVal, bVal, a, b; __m128i exp; - a = _mm_set1_ps(A / Mln2); - b = _mm_set1_ps(B - C); + a = _mm_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + b = _mm_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < quarterPoints; number++) { aVal = _mm_loadu_ps(aPtr); @@ -313,8 +313,8 @@ volk_32f_expfast_32f_neon(float* bVector, const float* aVector, unsigned int num unsigned int number = 0; const unsigned int quarterPoints = num_points / 4; - float32x4_t a = vdupq_n_f32(A / Mln2); - float32x4_t b = vdupq_n_f32(B - C); + float32x4_t a = vdupq_n_f32(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + float32x4_t b = vdupq_n_f32(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < quarterPoints; number++) { float32x4_t aVal = vld1q_f32(aPtr); @@ -346,8 +346,8 @@ volk_32f_expfast_32f_neonv8(float* bVector, const float* aVector, unsigned int n unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; - float32x4_t a = vdupq_n_f32(A / Mln2); - float32x4_t b = vdupq_n_f32(B - C); + float32x4_t a = vdupq_n_f32(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2); + float32x4_t b = vdupq_n_f32(VOLK_EXPFAST_B - VOLK_EXPFAST_C); for (; number < eighthPoints; number++) { __VOLK_PREFETCH(aPtr + 16); @@ -383,8 +383,10 @@ static inline void volk_32f_expfast_32f_rvv(float* bVector, const float* aVector, unsigned int num_points) { size_t vlmax = __riscv_vsetvlmax_e32m8(); - const vfloat32m8_t ca = __riscv_vfmv_v_f_f32m8(A / Mln2, vlmax); - const vfloat32m8_t cb = __riscv_vfmv_v_f_f32m8(B - C, vlmax); + const vfloat32m8_t ca = + __riscv_vfmv_v_f_f32m8(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2, vlmax); + const vfloat32m8_t cb = + __riscv_vfmv_v_f_f32m8(VOLK_EXPFAST_B - VOLK_EXPFAST_C, vlmax); size_t n = num_points; for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) { @@ -397,4 +399,9 @@ volk_32f_expfast_32f_rvv(float* bVector, const float* aVector, unsigned int num_ } #endif /*LV_HAVE_RVV*/ +#undef VOLK_EXPFAST_Mln2 +#undef VOLK_EXPFAST_A +#undef VOLK_EXPFAST_B +#undef VOLK_EXPFAST_C + #endif /* INCLUDED_volk_32f_expfast_32f_u_H */ diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 7127641bb..38ade588d 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -25,10 +25,12 @@ if(HAVE_FENV_H) add_definitions(-DHAVE_FENV_H) endif() -check_include_file(dlfcn.h HAVE_DLFCN_H) -if(HAVE_DLFCN_H) - add_definitions(-DHAVE_DLFCN_H) - list(APPEND volk_libraries ${CMAKE_DL_LIBS}) +if(NOT VOLK_STATIC_DISPATCH) + check_include_file(dlfcn.h HAVE_DLFCN_H) + if(HAVE_DLFCN_H) + add_definitions(-DHAVE_DLFCN_H) + list(APPEND volk_libraries ${CMAKE_DL_LIBS}) + endif() endif() ######################################################################## @@ -337,6 +339,20 @@ endforeach(arch) ######################################################################## message(STATUS "Available machines: ${available_machines}") +######################################################################## +# Static dispatch: validate machine name +######################################################################## +if(VOLK_STATIC_DISPATCH) + list(FIND available_machines "${VOLK_STATIC_DISPATCH}" _sd_idx) + if(_sd_idx EQUAL -1) + message(FATAL_ERROR + "VOLK_STATIC_DISPATCH='${VOLK_STATIC_DISPATCH}' is not available.\n" + "Available machines: ${available_machines}") + endif() + message(STATUS "Static dispatch: ${VOLK_STATIC_DISPATCH}") + set(available_machines ${VOLK_STATIC_DISPATCH}) +endif() + ######################################################################## # Create rules to run the volk generator ######################################################################## @@ -347,7 +363,11 @@ file(GLOB py_files ${PROJECT_SOURCE_DIR}/gen/*.py) file(GLOB h_files ${PROJECT_SOURCE_DIR}/kernels/volk/*.h) macro(gen_template tmpl output) - list(APPEND volk_gen_sources ${output}) + if(${output} MATCHES "\\.h$") + list(APPEND volk_gen_headers ${output}) + else() + list(APPEND volk_gen_sources ${output}) + endif() add_custom_command( OUTPUT ${output} DEPENDS ${xml_files} ${py_files} ${h_files} ${tmpl} @@ -359,21 +379,28 @@ endmacro(gen_template) make_directory(${PROJECT_BINARY_DIR}/include/volk) -gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk.tmpl.h - ${PROJECT_BINARY_DIR}/include/volk/volk.h) -gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk.tmpl.c ${PROJECT_BINARY_DIR}/lib/volk.c) gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_typedefs.tmpl.h ${PROJECT_BINARY_DIR}/include/volk/volk_typedefs.h) -gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_cpu.tmpl.h - ${PROJECT_BINARY_DIR}/include/volk/volk_cpu.h) -gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_cpu.tmpl.c - ${PROJECT_BINARY_DIR}/lib/volk_cpu.c) gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_config_fixed.tmpl.h ${PROJECT_BINARY_DIR}/include/volk/volk_config_fixed.h) -gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_machines.tmpl.h - ${PROJECT_BINARY_DIR}/lib/volk_machines.h) -gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_machines.tmpl.c - ${PROJECT_BINARY_DIR}/lib/volk_machines.c) + +if(VOLK_STATIC_DISPATCH) + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_static_dispatch.tmpl.h + ${PROJECT_BINARY_DIR}/include/volk/volk_dispatch.h ${VOLK_STATIC_DISPATCH}) +else() + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_dynamic_dispatch.tmpl.h + ${PROJECT_BINARY_DIR}/include/volk/volk_dispatch.h) + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_dynamic_dispatch.tmpl.c + ${PROJECT_BINARY_DIR}/lib/volk.c) + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_cpu.tmpl.h + ${PROJECT_BINARY_DIR}/include/volk/volk_cpu.h) + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_cpu.tmpl.c + ${PROJECT_BINARY_DIR}/lib/volk_cpu.c) + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_machines.tmpl.h + ${PROJECT_BINARY_DIR}/lib/volk_machines.h) + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_machines.tmpl.c + ${PROJECT_BINARY_DIR}/lib/volk_machines.c) +endif() set(BASE_CFLAGS NONE) string(TOUPPER ${CMAKE_BUILD_TYPE} CBTU) @@ -400,37 +427,39 @@ set(COMPILER_INFO "${CMAKE_C_COMPILER}:::${CMAKE_C_FLAGS_${GRCBTU}} ${CMAKE_C_FLAGS}\n${CMAKE_CXX_COMPILER}:::${CMAKE_CXX_FLAGS_${GRCBTU}} ${CMAKE_CXX_FLAGS}\n" ) -foreach(machine_name ${available_machines}) - #generate machine source - set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${machine_name}.c) - gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_machine_xxx.tmpl.c ${machine_source} - ${machine_name}) - - #determine machine flags - execute_process( - COMMAND - ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B} - ${PROJECT_SOURCE_DIR}/gen/volk_compile_utils.py --mode "machine_flags" - --machine "${machine_name}" --compiler "${COMPILER_NAME}" - OUTPUT_VARIABLE ${machine_name}_flags - OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT VOLK_STATIC_DISPATCH) + foreach(machine_name ${available_machines}) + #generate machine source + set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${machine_name}.c) + gen_template(${PROJECT_SOURCE_DIR}/tmpl/volk_machine_xxx.tmpl.c ${machine_source} + ${machine_name}) - message( - STATUS - "BUILD INFO ::: ${machine_name} ::: ${COMPILER_NAME} ::: ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}" - ) - set(COMPILER_INFO - "${COMPILER_INFO}${machine_name}:::${COMPILER_NAME}:::${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}\n" - ) - if(${machine_name}_flags AND NOT MSVC) - set_source_files_properties(${machine_source} - PROPERTIES COMPILE_FLAGS "${${machine_name}_flags}") - endif() + #determine machine flags + execute_process( + COMMAND + ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B} + ${PROJECT_SOURCE_DIR}/gen/volk_compile_utils.py --mode "machine_flags" + --machine "${machine_name}" --compiler "${COMPILER_NAME}" + OUTPUT_VARIABLE ${machine_name}_flags + OUTPUT_STRIP_TRAILING_WHITESPACE) + + message( + STATUS + "BUILD INFO ::: ${machine_name} ::: ${COMPILER_NAME} ::: ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}" + ) + set(COMPILER_INFO + "${COMPILER_INFO}${machine_name}:::${COMPILER_NAME}:::${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}\n" + ) + if(${machine_name}_flags AND NOT MSVC) + set_source_files_properties(${machine_source} + PROPERTIES COMPILE_FLAGS "${${machine_name}_flags}") + endif() - #add to available machine defs - string(TOUPPER LV_MACHINE_${machine_name} machine_def) - list(APPEND machine_defs ${machine_def}) -endforeach(machine_name) + #add to available machine defs + string(TOUPPER LV_MACHINE_${machine_name} machine_def) + list(APPEND machine_defs ${machine_def}) + endforeach(machine_name) +endif() # Convert to a C string to compile and display properly string(STRIP "${cmake_c_compiler_version}" cmake_c_compiler_version) @@ -472,7 +501,7 @@ endforeach(ARCH) ######################################################################## # Handle orc support ######################################################################## -if(ORC_FOUND) +if(ORC_FOUND AND NOT VOLK_STATIC_DISPATCH) #setup orc library usage include_directories(${ORC_INCLUDE_DIRS}) link_directories(${ORC_LIBRARY_DIRS}) @@ -494,14 +523,13 @@ if(ORC_FOUND) list(APPEND volk_sources ${orcc_gen}) endforeach(orc_file) -else() +elseif(NOT ORC_FOUND) message(STATUS "Did not find liborc and orcc, disabling orc support...") endif() ######################################################################## # Handle the generated constants ######################################################################## - message(STATUS "Loading version ${VERSION} into constants...") #double escape for windows backslash path separators @@ -521,14 +549,22 @@ if(NOT WIN32) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -Wno-deprecated-declarations") endif() -list(APPEND volk_sources ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c - ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c - ${CMAKE_CURRENT_SOURCE_DIR}/volk_malloc.c ${volk_gen_sources}) - -#set the machine definitions where applicable -set_source_files_properties( - ${CMAKE_CURRENT_BINARY_DIR}/volk.c ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c - PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") +if(VOLK_STATIC_DISPATCH) + list(APPEND volk_sources + ${CMAKE_CURRENT_SOURCE_DIR}/volk_malloc.c + ${volk_gen_sources}) +else() + list(APPEND volk_sources + ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_malloc.c + ${volk_gen_sources}) + + #set the machine definitions where applicable + set_source_files_properties( + ${CMAKE_CURRENT_BINARY_DIR}/volk.c ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c + PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") +endif() if(MSVC) #add compatibility includes for stdint types @@ -544,7 +580,7 @@ endif() #library linkage, but we have to define them here for compiling to #work. There are options starting with CMake 3.13 for using the OBJECT #to propagate this information. -add_library(volk_obj OBJECT ${volk_sources}) +add_library(volk_obj OBJECT ${volk_sources} ${volk_gen_headers}) target_include_directories( volk_obj PRIVATE $ @@ -612,7 +648,7 @@ target_include_directories( PUBLIC $) #Configure target properties -if(ORC_FOUND) +if(ORC_FOUND AND NOT VOLK_STATIC_DISPATCH) target_link_libraries(volk PRIVATE ${ORC_LIBRARIES}) endif() if(NOT MSVC) @@ -646,7 +682,7 @@ if(ENABLE_STATIC_LIBS) target_link_libraries(volk_static PRIVATE $) endif() - if(ORC_FOUND) + if(ORC_FOUND AND NOT VOLK_STATIC_DISPATCH) target_link_libraries(volk_static PUBLIC ${ORC_LIBRARIES_STATIC}) endif() if(NOT ANDROID) diff --git a/lib/constants.c.in b/lib/constants.c.in index 049bc04a4..9753ae6ae 100644 --- a/lib/constants.c.in +++ b/lib/constants.c.in @@ -11,14 +11,20 @@ #include #endif +#cmakedefine VOLK_STATIC_DISPATCH + +#ifndef VOLK_STATIC_DISPATCH #include +#endif #include const char* volk_prefix() { +#ifndef VOLK_STATIC_DISPATCH const char *prefix = getenv("VOLK_PREFIX"); if (prefix != NULL) return prefix; +#endif return "@prefix@"; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 42dbeca0f..233449d91 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,10 +6,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later # -if(NOT ENABLE_TESTING) - return() -endif(NOT ENABLE_TESTING) - find_package(fmt) find_package(GTest) diff --git a/tmpl/volk.tmpl.c b/tmpl/volk_dynamic_dispatch.tmpl.c similarity index 100% rename from tmpl/volk.tmpl.c rename to tmpl/volk_dynamic_dispatch.tmpl.c diff --git a/tmpl/volk.tmpl.h b/tmpl/volk_dynamic_dispatch.tmpl.h similarity index 74% rename from tmpl/volk.tmpl.h rename to tmpl/volk_dynamic_dispatch.tmpl.h index b26c542cf..418800a9b 100644 --- a/tmpl/volk.tmpl.h +++ b/tmpl/volk_dynamic_dispatch.tmpl.h @@ -7,21 +7,6 @@ * SPDX-License-Identifier: LGPL-3.0-or-later */ -#ifndef INCLUDED_VOLK_RUNTIME -#define INCLUDED_VOLK_RUNTIME - -#include -#include -#include -#include -#include -#include - -#include -#include - -__VOLK_DECL_BEGIN - typedef struct volk_func_desc { const char **impl_names; @@ -39,25 +24,7 @@ VOLK_API const char* volk_get_machine(void); //! Get the machine alignment in bytes VOLK_API size_t volk_get_alignment(void); -/*! - * The VOLK_OR_PTR macro is a convenience macro - * for checking the alignment of a set of pointers. - * Example usage: - * volk_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2))) - */ -#define VOLK_OR_PTR(ptr0, ptr1) \ - (const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1))) - -/*! - * Is the pointer on a machine alignment boundary? - * - * Note: for performance reasons, this function - * is not usable until another volk API call is made - * which will perform certain initialization tasks. - * - * \param ptr the pointer to some memory buffer - * \return 1 for alignment boundary, else 0 - */ +//! Is the pointer on a machine alignment boundary? VOLK_API bool volk_is_aligned(const void *ptr); // Just drop the deprecated attribute in case we are on Windows. Clang and GCC support `__attribute__`. @@ -107,7 +74,3 @@ extern VOLK_API volk_func_desc_t ${kern.name}_get_func_desc(void); % endif %endfor - -__VOLK_DECL_END - -#endif /*INCLUDED_VOLK_RUNTIME*/ diff --git a/tmpl/volk_static_dispatch.tmpl.h b/tmpl/volk_static_dispatch.tmpl.h new file mode 100644 index 000000000..4ea2950ac --- /dev/null +++ b/tmpl/volk_static_dispatch.tmpl.h @@ -0,0 +1,82 @@ +/* -*- c++ -*- */ +/* + * Copyright 2011-2020 Free Software Foundation, Inc. + * + * This file is part of VOLK + * + * SPDX-License-Identifier: LGPL-3.0-or-later + */ + +<% + sd_machine = machine_dict[args[0]] + sd_arch_names = sd_machine.arch_names + arch_idx = {arch.name: i for i, arch in enumerate(archs)} + + def dep_score(impl): + return sum(1 << arch_idx[d] for d in impl.deps) if impl.deps else 0 + + def pick_best(impls, want_aligned): + candidates = [i for i in impls if i.is_aligned == want_aligned] + if not candidates: + candidates = [i for i in impls if i.is_aligned != want_aligned] + return max(candidates, key=dep_score) if candidates else None +%> + +//! Returns the name of the machine this instance will use +static inline const char* volk_get_machine(void) +{ + return "${sd_machine.name}"; +} + +//! Get the machine alignment in bytes +static inline size_t volk_get_alignment(void) +{ + return ${sd_machine.alignment}; +} + +//! Is the pointer on a machine alignment boundary? +static inline bool volk_is_aligned(const void* ptr) +{ + return ((intptr_t)(ptr) & (intptr_t)${sd_machine.alignment - 1}) == 0; +} + +%for arch in sd_machine.archs: +#define LV_HAVE_${arch.name.upper()} 1 +%endfor + +/* Forward-declare all kernel dispatchers so that cross-kernel calls inside + * implementation headers can resolve to the generic name without requiring + * the full definition to already be visible. */ +%for kern in kernels: +static inline void ${kern.name}(${kern.arglist_full}); +%endfor + +%for kern in kernels: +#include +%endfor + +/* Define _a/_u aliases and the alignment-dispatching inline for each kernel. */ +%for kern in kernels: +<% + impls = kern.get_impls(sd_arch_names) + best_a = pick_best(impls, True) + best_u = pick_best(impls, False) +%>\ +#define ${kern.name}_a ${kern.name}_${best_a.name} +#define ${kern.name}_u ${kern.name}_${best_u.name} + +static inline void ${kern.name}(${kern.arglist_full}) +{ + if (volk_is_aligned(<% num_open_parens = 0 %>\ +%for arg_type, arg_name in kern.args: +%if '*' in arg_type: +VOLK_OR_PTR(${arg_name},<% num_open_parens += 1 %>\ +%endif +%endfor +0<% end_open_parens = ')'*num_open_parens %>${end_open_parens})) + ${kern.name}_a(${kern.arglist_names}); + else + ${kern.name}_u(${kern.arglist_names}); +} + +%endfor