Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,30 @@ jobs:
- name: test
run: cd build && ctest -V

build-ubuntu-static-dispatch:
name: Build static dispatch on ubuntu-24.04
runs-on: ubuntu-24.04

steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- name: dependencies
run: sudo apt install python3-mako libgtest-dev libfmt-dev
- name: configure
run: mkdir build && cd build && cmake -DVOLK_STATIC_DISPATCH=generic -DCMAKE_C_FLAGS="-Werror" -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON ..
- name: build
run: cmake --build build -j$(nproc)
- name: Print info
run: |
./build/apps/volk-config-info --alignment
./build/apps/volk-config-info --avail-machines
./build/apps/volk-config-info --all-machines
./build/apps/volk-config-info --malloc
./build/apps/volk-config-info --cc
- name: test
run: cd build && ctest -V

build-windows:
runs-on: windows-latest

Expand Down
130 changes: 76 additions & 54 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,52 @@ if(MSVC)
endif(MSVC)

########################################################################
# Dependencies setup
# Build options
########################################################################
set(VOLK_STATIC_DISPATCH "" CACHE STRING
"Static dispatch to a specific machine (e.g. neonv8). Empty means dynamic dispatch.")
cmake_dependent_option(ENABLE_UTILITY_APPS "Enable utility apps" ON "NOT CMAKE_CROSSCOMPILING" OFF)
cmake_dependent_option(ENABLE_TESTING "Enable QA testing" ON "NOT CMAKE_CROSSCOMPILING" OFF)
cmake_dependent_option(ENABLE_MODTOOL "Enable volk_modtool python utility" ON "NOT CMAKE_CROSSCOMPILING" OFF)
cmake_dependent_option(ENABLE_PROFILING "Launch system profiler after build" OFF "ENABLE_UTILITY_APPS;NOT CMAKE_CROSSCOMPILING" OFF)

# cpu_features - sensible defaults, user settable option
if(CMAKE_SYSTEM_PROCESSOR MATCHES
"(^mips)|(^arm)|(^aarch64)|(x86_64)|(AMD64|amd64)|(^i.86$)|(^powerpc)|(^ppc)|(^riscv)")
option(VOLK_CPU_FEATURES "Volk uses cpu_features" ON)
########################################################################
# Print summary of options
########################################################################
if (VOLK_STATIC_DISPATCH)
message(STATUS "Using static dispatch for: ${VOLK_STATIC_DISPATCH}")
else()
option(VOLK_CPU_FEATURES "Volk uses cpu_features" OFF)
message(STATUS "Using dynamic dispatch.")
endif()

if(ENABLE_UTILITY_APPS)
message(STATUS "Utility apps enabled.")
endif()

if (ENABLE_TESTING)
message(STATUS "Testing enabled.")
endif()

if (ENABLE_MODTOOL)
message(STATUS "volk_modtool python utility enabled.")
endif()

if (ENABLE_PROFILING)
message(STATUS "Launch system profiler after build enabled.")
endif()

########################################################################
# Dependencies setup
########################################################################

# cpu_features - only needed for dynamic dispatch (runtime CPU detection)
if(NOT VOLK_STATIC_DISPATCH)
if(CMAKE_SYSTEM_PROCESSOR MATCHES
"(^mips)|(^arm)|(^aarch64)|(x86_64)|(AMD64|amd64)|(^i.86$)|(^powerpc)|(^ppc)|(^riscv)")
option(VOLK_CPU_FEATURES "Volk uses cpu_features" ON)
else()
option(VOLK_CPU_FEATURES "Volk uses cpu_features" OFF)
endif()
endif()

if(VOLK_CPU_FEATURES)
Expand Down Expand Up @@ -183,7 +220,7 @@ endif()
########################################################################
# Check if Orc is available
########################################################################
option(ENABLE_ORC "Enable Orc" True)
option(ENABLE_ORC "Enable Orc" ON)
if(ENABLE_ORC)
find_package(ORC)
else(ENABLE_ORC)
Expand Down Expand Up @@ -225,27 +262,35 @@ install(
FILES_MATCHING
PATTERN "*.h")

set(VOLK_INSTALL_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_alloc.hh
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_complex.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_common.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_mathematical_functions.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/saturation_arithmetic.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_fma_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx512_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse3_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_neon_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_malloc.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_dispatch.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_config_fixed.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_typedefs.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_version.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/constants.h)

if(NOT VOLK_STATIC_DISPATCH)
list(APPEND VOLK_INSTALL_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_prefs.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_cpu.h)
endif()

install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_prefs.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_alloc.hh
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_complex.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_common.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_mathematical_functions.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/saturation_arithmetic.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx2_fma_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_avx512_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_sse3_intrinsics.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_neon_intrinsics.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_cpu.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_config_fixed.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_typedefs.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/volk_malloc.h
${CMAKE_CURRENT_BINARY_DIR}/include/volk/volk_version.h
${CMAKE_CURRENT_SOURCE_DIR}/include/volk/constants.h
FILES ${VOLK_INSTALL_HEADERS}
DESTINATION include/volk
COMPONENT "volk_devel")

Expand Down Expand Up @@ -315,26 +360,6 @@ install(
DESTINATION ${CMAKE_MODULES_DIR}/volk)

########################################################################
# Option to enable QA testing, on by default
########################################################################
option(ENABLE_TESTING "Enable QA testing" ON)
if(ENABLE_TESTING)
message(STATUS "QA Testing is enabled.")
else()
message(STATUS "QA Testing is disabled.")
endif()
message(STATUS " Modify using: -DENABLE_TESTING=ON/OFF")

########################################################################
# Utility apps (rely on an OS being present instead of a bare-metal target)
########################################################################
option(ENABLE_UTILITY_APPS "Enable utility apps" ON)
if(ENABLE_UTILITY_APPS)
message(STATUS "Utility apps are enabled.")
else()
message(STATUS "Utility apps are disabled.")
endif()

# fmt - required for qa_utils table printing (tests and utility apps)
# For static builds, always use FetchContent to ensure we get a static library
if(ENABLE_TESTING OR ENABLE_UTILITY_APPS)
Expand All @@ -343,8 +368,7 @@ if(ENABLE_TESTING OR ENABLE_UTILITY_APPS)
endif()
if(NOT fmt_FOUND)
if(ENABLE_STATIC_LIBS)
message(
STATUS "Static build: using FetchContent to build fmt as static library ...")
message(STATUS "Static build: using FetchContent to build fmt as static library ...")
else()
message(STATUS "fmt package not found. Using FetchContent to download ...")
endif()
Expand All @@ -364,8 +388,6 @@ endif()
########################################################################
# Option to enable post-build profiling using volk_profile, off by default
########################################################################
cmake_dependent_option(ENABLE_PROFILING "Launch system profiler after build" OFF
"ENABLE_UTILITY_APPS" OFF)
if(ENABLE_PROFILING)
if(DEFINED VOLK_CONFIGPATH)
get_filename_component(VOLK_CONFIGPATH ${VOLK_CONFIGPATH} ABSOLUTE)
Expand All @@ -386,7 +408,6 @@ if(ENABLE_PROFILING)
else()
message(STATUS "System profiling is disabled.")
endif()
message(STATUS " Modify using: -DENABLE_PROFILING=ON/OFF")

########################################################################
# Setup the library
Expand All @@ -396,7 +417,9 @@ add_subdirectory(lib)
########################################################################
# Add tests
########################################################################
add_subdirectory(tests)
if(ENABLE_TESTING)
add_subdirectory(tests)
endif()

########################################################################
# Utility apps
Expand All @@ -405,7 +428,6 @@ if(ENABLE_UTILITY_APPS)
add_subdirectory(apps)
endif()

option(ENABLE_MODTOOL "Enable volk_modtool python utility" True)
if(ENABLE_MODTOOL)
add_subdirectory(python/volk_modtool)
endif()
Expand Down
32 changes: 32 additions & 0 deletions include/volk/volk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* -*- c++ -*- */
/*
* Copyright 2011-2020 Free Software Foundation, Inc.
*
* This file is part of VOLK
*
* SPDX-License-Identifier: LGPL-3.0-or-later
*/

#ifndef INCLUDED_VOLK_RUNTIME
#define INCLUDED_VOLK_RUNTIME

#include <volk/volk_common.h>
#include <volk/volk_complex.h>
#include <volk/volk_config_fixed.h>
#include <volk/volk_malloc.h>
#include <volk/volk_typedefs.h>
#include <volk/volk_version.h>

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

__VOLK_DECL_BEGIN

#define VOLK_OR_PTR(ptr0, ptr1) (const void*)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1)))

#include <volk/volk_dispatch.h>

__VOLK_DECL_END

#endif /*INCLUDED_VOLK_RUNTIME*/
51 changes: 29 additions & 22 deletions kernels/volk/volk_32f_expfast_32f.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@
#include <math.h>
#include <stdio.h>

#define Mln2 0.6931471805f
#define A 8388608.0f
#define B 1065353216.0f
#define C 60801.0f
#define VOLK_EXPFAST_Mln2 0.6931471805f
#define VOLK_EXPFAST_A 8388608.0f
#define VOLK_EXPFAST_B 1065353216.0f
#define VOLK_EXPFAST_C 60801.0f


#ifndef INCLUDED_volk_32f_expfast_32f_a_H
Expand All @@ -78,8 +78,8 @@ static inline void volk_32f_expfast_32f_a_avx_fma(float* bVector,

__m256 aVal, bVal, a, b;
__m256i exp;
a = _mm256_set1_ps(A / Mln2);
b = _mm256_set1_ps(B - C);
a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < eighthPoints; number++) {
aVal = _mm256_load_ps(aPtr);
Expand Down Expand Up @@ -114,8 +114,8 @@ volk_32f_expfast_32f_a_avx(float* bVector, const float* aVector, unsigned int nu

__m256 aVal, bVal, a, b;
__m256i exp;
a = _mm256_set1_ps(A / Mln2);
b = _mm256_set1_ps(B - C);
a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < eighthPoints; number++) {
aVal = _mm256_load_ps(aPtr);
Expand Down Expand Up @@ -150,8 +150,8 @@ static inline void volk_32f_expfast_32f_a_sse4_1(float* bVector,

__m128 aVal, bVal, a, b;
__m128i exp;
a = _mm_set1_ps(A / Mln2);
b = _mm_set1_ps(B - C);
a = _mm_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
b = _mm_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < quarterPoints; number++) {
aVal = _mm_load_ps(aPtr);
Expand Down Expand Up @@ -191,8 +191,8 @@ static inline void volk_32f_expfast_32f_u_avx_fma(float* bVector,

__m256 aVal, bVal, a, b;
__m256i exp;
a = _mm256_set1_ps(A / Mln2);
b = _mm256_set1_ps(B - C);
a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < eighthPoints; number++) {
aVal = _mm256_loadu_ps(aPtr);
Expand Down Expand Up @@ -226,8 +226,8 @@ volk_32f_expfast_32f_u_avx(float* bVector, const float* aVector, unsigned int nu

__m256 aVal, bVal, a, b;
__m256i exp;
a = _mm256_set1_ps(A / Mln2);
b = _mm256_set1_ps(B - C);
a = _mm256_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
b = _mm256_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < eighthPoints; number++) {
aVal = _mm256_loadu_ps(aPtr);
Expand Down Expand Up @@ -263,8 +263,8 @@ static inline void volk_32f_expfast_32f_u_sse4_1(float* bVector,

__m128 aVal, bVal, a, b;
__m128i exp;
a = _mm_set1_ps(A / Mln2);
b = _mm_set1_ps(B - C);
a = _mm_set1_ps(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
b = _mm_set1_ps(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < quarterPoints; number++) {
aVal = _mm_loadu_ps(aPtr);
Expand Down Expand Up @@ -313,8 +313,8 @@ volk_32f_expfast_32f_neon(float* bVector, const float* aVector, unsigned int num
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;

float32x4_t a = vdupq_n_f32(A / Mln2);
float32x4_t b = vdupq_n_f32(B - C);
float32x4_t a = vdupq_n_f32(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
float32x4_t b = vdupq_n_f32(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < quarterPoints; number++) {
float32x4_t aVal = vld1q_f32(aPtr);
Expand Down Expand Up @@ -346,8 +346,8 @@ volk_32f_expfast_32f_neonv8(float* bVector, const float* aVector, unsigned int n
unsigned int number = 0;
const unsigned int eighthPoints = num_points / 8;

float32x4_t a = vdupq_n_f32(A / Mln2);
float32x4_t b = vdupq_n_f32(B - C);
float32x4_t a = vdupq_n_f32(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2);
float32x4_t b = vdupq_n_f32(VOLK_EXPFAST_B - VOLK_EXPFAST_C);

for (; number < eighthPoints; number++) {
__VOLK_PREFETCH(aPtr + 16);
Expand Down Expand Up @@ -383,8 +383,10 @@ static inline void
volk_32f_expfast_32f_rvv(float* bVector, const float* aVector, unsigned int num_points)
{
size_t vlmax = __riscv_vsetvlmax_e32m8();
const vfloat32m8_t ca = __riscv_vfmv_v_f_f32m8(A / Mln2, vlmax);
const vfloat32m8_t cb = __riscv_vfmv_v_f_f32m8(B - C, vlmax);
const vfloat32m8_t ca =
__riscv_vfmv_v_f_f32m8(VOLK_EXPFAST_A / VOLK_EXPFAST_Mln2, vlmax);
const vfloat32m8_t cb =
__riscv_vfmv_v_f_f32m8(VOLK_EXPFAST_B - VOLK_EXPFAST_C, vlmax);

size_t n = num_points;
for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl) {
Expand All @@ -397,4 +399,9 @@ volk_32f_expfast_32f_rvv(float* bVector, const float* aVector, unsigned int num_
}
#endif /*LV_HAVE_RVV*/

#undef VOLK_EXPFAST_Mln2
#undef VOLK_EXPFAST_A
#undef VOLK_EXPFAST_B
#undef VOLK_EXPFAST_C

#endif /* INCLUDED_volk_32f_expfast_32f_u_H */
Loading
Loading