Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ jobs:
submodules: recursive
- name: Install Linux deps
if: runner.os == 'Linux'
run: sudo apt-get update && sudo apt-get -y --no-install-recommends install git g++ cmake ninja-build llvm-15-dev zlib1g-dev flex bison libfl-dev libzstd-dev
run: sudo apt-get update && sudo apt-get -y --no-install-recommends install cmake ninja-build libfl-dev
- name: Install macOS deps
if: runner.os == 'macOS'
run: brew install cmake ninja llvm@15 zlib flex bison boost zstd ncurses
run: brew install flex bison boost
- uses: actions/checkout@v6
if: runner.os == 'Linux'
with:
Expand All @@ -43,7 +43,7 @@ jobs:
./b2 install link=static runtime-link=shared threading=multi variant=release cxxflags=-fPIC
- name: Configure (Linux)
if: runner.os == 'Linux'
run: cmake -S ocelot -B ocelot/build -G Ninja -DBUILD_TESTS=ON -DBUILD_TESTS_CUDA=OFF -DLLVM_DIR=$(llvm-config-15 --cmakedir) -DBOOST_ROOT=${{ github.workspace }}/boost-fpic
run: cmake -S ocelot -B ocelot/build -G Ninja -DBUILD_TESTS=ON -DBUILD_TESTS_CUDA=OFF -DBOOST_ROOT=${{ github.workspace }}/boost-fpic
- name: Configure (macOS)
if: runner.os == 'macOS'
run: cmake -S ocelot -B ocelot/build -G Ninja -DBUILD_TESTS=ON -DBUILD_TESTS_CUDA=OFF -DCMAKE_POLICY_VERSION_MINIMUM=3.5
Expand Down
90 changes: 63 additions & 27 deletions ocelot/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ option(BUILD_TESTS "Build tests: default OFF" OFF)
option(BUILD_TESTS_CUDA "Build CUDA tests: default ON" ON)
option(BUILD_TOOLS "Build tool executables: default ON" ON)
option(ENABLE_OPENGL "Build OpenGL interop support" OFF)
option(ENABLE_LLVM "Build LLVM backend supoort" OFF)

if (NOT APPLE AND BUILD_TESTS AND BUILD_TESTS_CUDA)
project(gpuocelot C CXX CUDA ASM)
Expand Down Expand Up @@ -37,14 +38,16 @@ set(Boost_USE_MULTITHREADED ON)
find_package(Boost COMPONENTS filesystem thread REQUIRED)
find_package(FLEX 2.5 REQUIRED)
find_package(BISON 2.5 REQUIRED)
find_package(ZLIB REQUIRED)
find_library(ZSTD_LIB NAMES zstd libzstd)
find_package(Curses REQUIRED)

if (ENABLE_OPENGL)
find_package(GLEW REQUIRED)
endif()

if (ENABLE_LLVM)
find_package(ZLIB REQUIRED)
find_library(ZSTD_LIB NAMES zstd libzstd)
find_package(Curses REQUIRED)

if ("x${BUILD_LLVM}" STREQUAL "xOFF")
find_package(LLVM REQUIRED CONFIG)

Expand Down Expand Up @@ -232,6 +235,7 @@ set(LLVM_LIBRARIES
add_library(llvm INTERFACE)
add_dependencies(llvm llvm-project)
endif()
endif()

set(BUILD_SHARED_LIBS OFF) # ThirdParty/res_embed option
add_subdirectory(ThirdParty/hydrazine)
Expand All @@ -242,17 +246,13 @@ include(ResEmbed)
separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${PROJECT_NAME}_DEFINITIONS)

set(${PROJECT_NAME}_INCLUDE_DIRS
${LLVM_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/ThirdParty/cuda-fatbin-decompression
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_BINARY_DIR})

set(${PROJECT_NAME}_LINK_LIBRARIES
hydrazine
Boost::filesystem
ZLIB::ZLIB
${CURSES_LIBRARIES}
${ZSTD_LIB}
${CMAKE_DL_LIBS})

if (ENABLE_OPENGL)
Expand All @@ -263,16 +263,28 @@ else()
list(APPEND ${PROJECT_NAME}_DEFINITIONS ENABLE_OPENGL=0)
endif()

if (ENABLE_LLVM)
list(APPEND ${PROJECT_NAME}_INCLUDE_DIRS ${LLVM_INCLUDE_DIRS})
list(APPEND ${PROJECT_NAME}_LINK_LIBRARIES ZLIB::ZLIB ${CURSES_LIBRARIES} ${ZSTD_LIB})
set(${PROJECT_NAME}_LIBRARY_DIRS ${LLVM_LIBRARY_DIR})

if ("x${BUILD_LLVM}" STREQUAL "xON")
list(APPEND ${PROJECT_NAME}_LINK_LIBRARIES llvm)
endif()

set(${PROJECT_NAME}_LIBRARY_DIRS ${LLVM_LIBRARY_DIR})
list(APPEND ${PROJECT_NAME}_DEFINITIONS ENABLE_LLVM=1)
else()
list(APPEND ${PROJECT_NAME}_DEFINITIONS ENABLE_LLVM=0)
endif()


include(CTest)

function(ocelot_add_tests name)
file(GLOB TESTS "src/${name}/test/Test*.cpp" "src/${name}/test/Test*.cu")
if (NOT ENABLE_LLVM)
list(FILTER TESTS EXCLUDE REGEX "LLVM|TestExternalFunctions")
endif()
foreach(TEST ${TESTS})
get_filename_component(TEST_WE ${TEST} NAME_WE)
set(TEST_NAME ${PROJECT_NAME}_${name}_${TEST_WE})
Expand Down Expand Up @@ -375,7 +387,7 @@ target_include_directories(${PROJECT_NAME}_cuda PRIVATE ${${PROJECT_NAME}_INCLUD
target_link_libraries(${PROJECT_NAME}_cuda PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES})
target_link_directories(${PROJECT_NAME}_cuda PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS})

add_library(${PROJECT_NAME}_executive STATIC
set(${PROJECT_NAME}_executive_sources
src/executive/ATIExecutableKernel.cpp
src/executive/ATIGPUDevice.cpp
src/executive/CTAContext.cpp
Expand All @@ -387,20 +399,27 @@ add_library(${PROJECT_NAME}_executive STATIC
src/executive/EmulatorDevice.cpp
src/executive/ExecutableKernel.cpp
src/executive/FrameInfo.cpp
src/executive/LLVMContext.cpp
src/executive/LLVMCooperativeThreadArray.cpp
src/executive/LLVMExecutableKernel.cpp
src/executive/LLVMExecutionManager.cpp
src/executive/LLVMFunctionCallStack.cpp
src/executive/LLVMWorkerThread.cpp
src/executive/MulticoreCPUDevice.cpp
src/executive/NVIDIAExecutableKernel.cpp
src/executive/NVIDIAGPUDevice.cpp
src/executive/PassThroughDevice.cpp
src/executive/ReconvergenceMechanism.cpp
src/executive/RemoteDevice.cpp
src/executive/RuntimeException.cpp
src/executive/TextureOperations.cpp)

if (ENABLE_LLVM)
list(APPEND ${PROJECT_NAME}_executive_sources
src/executive/LLVMContext.cpp
src/executive/LLVMCooperativeThreadArray.cpp
src/executive/LLVMExecutableKernel.cpp
src/executive/LLVMExecutionManager.cpp
src/executive/LLVMFunctionCallStack.cpp
src/executive/LLVMWorkerThread.cpp
src/executive/MulticoreCPUDevice.cpp)
endif()

add_library(${PROJECT_NAME}_executive STATIC ${${PROJECT_NAME}_executive_sources})

set_property(TARGET ${PROJECT_NAME}_executive PROPERTY CXX_STANDARD 14)
set_property(TARGET ${PROJECT_NAME}_executive PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(${PROJECT_NAME}_executive PRIVATE ${${PROJECT_NAME}_DEFINITIONS})
Expand All @@ -409,7 +428,7 @@ target_include_directories(${PROJECT_NAME}_executive PRIVATE ${${PROJECT_NAME}_I
target_link_libraries(${PROJECT_NAME}_executive PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES})
target_link_directories(${PROJECT_NAME}_executive PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS})

add_library(${PROJECT_NAME}_ir STATIC
set(${PROJECT_NAME}_ir_sources
src/ir/ControlFlowGraph.cpp
src/ir/Dim3.cpp
src/ir/ExternalFunctionSet.cpp
Expand All @@ -421,11 +440,6 @@ add_library(${PROJECT_NAME}_ir STATIC
src/ir/IRKernel.cpp
src/ir/Instruction.cpp
src/ir/Kernel.cpp
src/ir/LLVMInstruction.cpp
src/ir/LLVMKernel.cpp
src/ir/LLVMModuleManager.cpp
src/ir/LLVMState.cpp
src/ir/LLVMStatement.cpp
src/ir/Local.cpp
src/ir/Module.cpp
src/ir/PTXInstruction.cpp
Expand All @@ -434,6 +448,18 @@ add_library(${PROJECT_NAME}_ir STATIC
src/ir/PTXStatement.cpp
src/ir/Parameter.cpp
src/ir/Texture.cpp)

if (ENABLE_LLVM)
list(APPEND ${PROJECT_NAME}_ir_sources
src/ir/LLVMInstruction.cpp
src/ir/LLVMKernel.cpp
src/ir/LLVMModuleManager.cpp
src/ir/LLVMState.cpp
src/ir/LLVMStatement.cpp)
endif()

add_library(${PROJECT_NAME}_ir STATIC ${${PROJECT_NAME}_ir_sources})

set_property(TARGET ${PROJECT_NAME}_ir PROPERTY CXX_STANDARD 14)
set_property(TARGET ${PROJECT_NAME}_ir PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(${PROJECT_NAME}_ir PRIVATE ${${PROJECT_NAME}_DEFINITIONS})
Expand Down Expand Up @@ -525,10 +551,16 @@ target_include_directories(${PROJECT_NAME}_transforms PRIVATE ${${PROJECT_NAME}_
target_link_libraries(${PROJECT_NAME}_transforms PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES})
target_link_directories(${PROJECT_NAME}_transforms PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS})

add_library(${PROJECT_NAME}_translator STATIC
set(${PROJECT_NAME}_translator_sources
src/translator/PTXToILTranslator.cpp
src/translator/PTXToLLVMTranslator.cpp
src/translator/Translator.cpp)

if (ENABLE_LLVM)
list(APPEND ${PROJECT_NAME}_translator_sources src/translator/PTXToLLVMTranslator.cpp)
endif()

add_library(${PROJECT_NAME}_translator STATIC ${${PROJECT_NAME}_translator_sources})

set_property(TARGET ${PROJECT_NAME}_translator PROPERTY CXX_STANDARD 14)
set_property(TARGET ${PROJECT_NAME}_translator PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(${PROJECT_NAME}_translator PRIVATE ${${PROJECT_NAME}_DEFINITIONS})
Expand Down Expand Up @@ -583,14 +615,16 @@ endif()
target_link_libraries(${PROJECT_NAME} PUBLIC ${${PROJECT_NAME}_LINK_LIBRARIES})

# link llvm and remove unused sections
if (ENABLE_LLVM)
target_link_libraries(${PROJECT_NAME} PUBLIC ${ZLIB_LIBRARIES} ${CURSES_LIBRARIES})
if(APPLE)
target_link_libraries(${PROJECT_NAME} PRIVATE -Wl,-dead_strip ${LLVM_LIBRARIES})
else()
target_link_libraries(${PROJECT_NAME} PRIVATE -Wl,--gc-sections -Wl,--start-group ${LLVM_LIBRARIES} -Wl,--end-group)
endif()
endif()

target_link_directories(${PROJECT_NAME} PUBLIC ${${PROJECT_NAME}_LIBRARY_DIRS})
target_link_libraries(${PROJECT_NAME} PUBLIC ${ZLIB_LIBRARIES} ${CURSES_LIBRARIES})
install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION $<IF:$<PLATFORM_ID:Darwin>,/usr/local/lib,/usr/lib>)
install(CODE "execute_process(COMMAND ldconfig)")

Expand Down Expand Up @@ -637,8 +671,10 @@ if(BUILD_TESTS)
res_embed(TARGET ${PROJECT_NAME}_executive_TestKernels NAME "TestKernels_ptx"
PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/executive/test/TestKernels.ptx KEYWORD)

res_embed(TARGET ${PROJECT_NAME}_executive_TestLLVMKernels NAME "TestLLVMKernels_ptx"
PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/executive/test/TestLLVMKernels.ptx KEYWORD)
if (ENABLE_LLVM)
res_embed(TARGET ${PROJECT_NAME}_executive_TestLLVMKernels NAME "TestLLVMKernels_ptx"
PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/executive/test/TestLLVMKernels.ptx KEYWORD)
endif()

if(BUILD_TESTS_CUDA)
ocelot_add_tests(cuda)
Expand Down
24 changes: 16 additions & 8 deletions ocelot/include/ocelot/ir/ExternalFunctionSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,19 @@
#include <ocelot/ir/PTXKernel.h>

// Forward Declarations
#if ENABLE_LLVM
namespace llvm { class Module; }
#endif

namespace ir
{

#if ENABLE_LLVM
typedef llvm::Module ExternalFunctionModule;
#else
struct ExternalFunctionModule;
#endif

/*! \brief Holds a collection of external functions

The idea here is to allow arbitrary PTX functions to call into external
Expand All @@ -33,7 +41,7 @@ class ExternalFunctionSet
{
public:
ExternalFunction(const std::string& identifier = "",
void* functionPointer = 0, llvm::Module* m = 0);
void* functionPointer = 0, ExternalFunctionModule* m = 0);

public:
void call(void* parameters, const ir::PTXKernel::Prototype& p);
Expand All @@ -45,10 +53,10 @@ class ExternalFunctionSet
typedef void (*ExternalCallType)(void*);

private:
std::string _name;
void* _functionPointer;
llvm::Module* _module;
ExternalCallType _externalFunctionPointer;
std::string _name;
void* _functionPointer;
ExternalFunctionModule* _module;
ExternalCallType _externalFunctionPointer;
};

typedef std::map<std::string, ExternalFunction> FunctionSet;
Expand All @@ -71,10 +79,10 @@ class ExternalFunctionSet
ExternalFunction* find(const std::string& name) const;

private:
FunctionSet _functions;
llvm::Module* module;
FunctionSet _functions;
ExternalFunctionModule* module;

llvm::Module* _module();
ExternalFunctionModule* _module();

};

Expand Down
8 changes: 8 additions & 0 deletions ocelot/src/executive/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
#include <ocelot/executive/NVIDIAGPUDevice.h>
#include <ocelot/executive/ATIGPUDevice.h>
#include <ocelot/executive/EmulatorDevice.h>
#if ENABLE_LLVM
#include <ocelot/executive/MulticoreCPUDevice.h>
#endif
#include <ocelot/executive/RemoteDevice.h>
#include <ocelot/executive/PassThroughDevice.h>
#include <ocelot/api/OcelotConfiguration.h>
Expand Down Expand Up @@ -86,7 +88,9 @@ executive::DeviceVector executive::Device::createDevices(
break;
case ir::Instruction::LLVM:
{
#if ENABLE_LLVM
devices.push_back(new MulticoreCPUDevice(flags));
#endif
}
break;
case ir::Instruction::CAL:
Expand Down Expand Up @@ -135,7 +139,11 @@ unsigned int executive::Device::deviceCount(ir::Instruction::Architecture isa,
break;
case ir::Instruction::LLVM:
{
#if ENABLE_LLVM
return 1;
#else
return 0;
#endif
}
break;
case ir::Instruction::CAL:
Expand Down
Loading