diff options
| author | Fangrui Song <i@maskray.me> | 2025-06-20 20:13:04 -0700 |
|---|---|---|
| committer | Fangrui Song <i@maskray.me> | 2025-06-20 20:13:04 -0700 |
| commit | 95fbfc9be5d2842a945c04a20fe6244df9b10e18 (patch) | |
| tree | 00f80558d11aa5805e6d6f290663c6da44e5e6ef /offload | |
| parent | a9ba028b98ffd53d9c7d00ca7563d74810fcf6e7 (diff) | |
| parent | 17e8465a3eb0cae48b9f62d27fd26f2b070f1f9b (diff) | |
[𝘀𝗽𝗿] changes introduced through rebaseusers/MaskRay/spr/main.move-relocation-specifier-constants-to-aarch64
Created using spr 1.3.5-bogner
[skip ci]
Diffstat (limited to 'offload')
| -rw-r--r-- | offload/liboffload/API/Common.td | 1 | ||||
| -rw-r--r-- | offload/liboffload/include/OffloadImpl.hpp | 13 | ||||
| -rw-r--r-- | offload/liboffload/src/OffloadImpl.cpp | 101 | ||||
| -rw-r--r-- | offload/liboffload/src/OffloadLib.cpp | 5 | ||||
| -rw-r--r-- | offload/plugins-nextgen/common/include/PluginInterface.h | 41 | ||||
| -rw-r--r-- | offload/tools/offload-tblgen/EntryPointGen.cpp | 49 | ||||
| -rw-r--r-- | offload/unittests/CMakeLists.txt | 73 | ||||
| -rw-r--r-- | offload/unittests/OffloadAPI/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | offload/unittests/OffloadAPI/common/Environment.cpp | 2 | ||||
| -rw-r--r-- | offload/unittests/OffloadAPI/device_code/CMakeLists.txt | 69 | ||||
| -rw-r--r-- | offload/unittests/OffloadAPI/init/olInit.cpp | 22 |
11 files changed, 235 insertions, 145 deletions
diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td index 8a2ecd6c6e8f..cd8c3c63fde8 100644 --- a/offload/liboffload/API/Common.td +++ b/offload/liboffload/API/Common.td @@ -106,6 +106,7 @@ def ErrorCode : Enum { Etor<"ASSEMBLE_FAILURE", "assembler failure while processing binary image">, Etor<"LINK_FAILURE", "linker failure while processing binary image">, Etor<"BACKEND_FAILURE", "the plugin backend is in an invalid or unsupported state">, + Etor<"UNINITIALIZED", "not initialized">, // Handle related errors - only makes sense for liboffload Etor<"INVALID_NULL_HANDLE", "a handle argument is null when it should not be">, diff --git a/offload/liboffload/include/OffloadImpl.hpp b/offload/liboffload/include/OffloadImpl.hpp index 9b0a21cb9ae1..f98164d5e178 100644 --- a/offload/liboffload/include/OffloadImpl.hpp +++ b/offload/liboffload/include/OffloadImpl.hpp @@ -22,12 +22,13 @@ #include "llvm/ADT/StringSet.h" #include "llvm/Support/Error.h" -struct OffloadConfig { - bool TracingEnabled = false; - bool ValidationEnabled = true; -}; - -OffloadConfig &offloadConfig(); +namespace llvm { +namespace offload { +bool isTracingEnabled(); +bool isValidationEnabled(); +bool isOffloadInitialized(); +} // namespace offload +} // namespace llvm // Use the StringSet container to efficiently deduplicate repeated error // strings (e.g. if the same error is hit constantly in a long running program) diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 770c212d804d..eba8e91ed688 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -93,22 +93,37 @@ struct AllocInfo { ol_alloc_type_t Type; }; -using AllocInfoMapT = DenseMap<void *, AllocInfo>; -AllocInfoMapT &allocInfoMap() { - static AllocInfoMapT AllocInfoMap{}; - return AllocInfoMap; -} +// Global shared state for liboffload +struct OffloadContext; +static OffloadContext *OffloadContextVal; +struct OffloadContext { + OffloadContext(OffloadContext &) = delete; + OffloadContext(OffloadContext &&) = delete; + OffloadContext &operator=(OffloadContext &) = delete; + OffloadContext &operator=(OffloadContext &&) = delete; + + bool TracingEnabled = false; + bool ValidationEnabled = true; + DenseMap<void *, AllocInfo> AllocInfoMap{}; + SmallVector<ol_platform_impl_t, 4> Platforms{}; + + ol_device_handle_t HostDevice() { + // The host platform is always inserted last + return &Platforms.back().Devices[0]; + } -using PlatformVecT = SmallVector<ol_platform_impl_t, 4>; -PlatformVecT &Platforms() { - static PlatformVecT Platforms; - return Platforms; -} + static OffloadContext &get() { + assert(OffloadContextVal); + return *OffloadContextVal; + } +}; -ol_device_handle_t HostDevice() { - // The host platform is always inserted last - return &Platforms().back().Devices[0]; +// If the context is uninited, then we assume tracing is disabled +bool isTracingEnabled() { + return isOffloadInitialized() && OffloadContext::get().TracingEnabled; } +bool isValidationEnabled() { return OffloadContext::get().ValidationEnabled; } +bool isOffloadInitialized() { return OffloadContextVal != nullptr; } template <typename HandleT> Error olDestroy(HandleT Handle) { delete Handle; @@ -130,10 +145,12 @@ constexpr ol_platform_backend_t pluginNameToBackend(StringRef Name) { #include "Shared/Targets.def" void initPlugins() { + auto *Context = new OffloadContext{}; + // Attempt to create an instance of each supported plugin. #define PLUGIN_TARGET(Name) \ do { \ - Platforms().emplace_back(ol_platform_impl_t{ \ + Context->Platforms.emplace_back(ol_platform_impl_t{ \ std::unique_ptr<GenericPluginTy>(createPlugin_##Name()), \ {}, \ pluginNameToBackend(#Name)}); \ @@ -141,7 +158,7 @@ void initPlugins() { #include "Shared/Targets.def" // Preemptively initialize all devices in the plugin - for (auto &Platform : Platforms()) { + for (auto &Platform : Context->Platforms) { // Do not use the host plugin - it isn't supported. if (Platform.BackendType == OL_PLATFORM_BACKEND_UNKNOWN) continue; @@ -157,15 +174,16 @@ void initPlugins() { } // Add the special host device - auto &HostPlatform = Platforms().emplace_back( + auto &HostPlatform = Context->Platforms.emplace_back( ol_platform_impl_t{nullptr, {ol_device_impl_t{-1, nullptr, nullptr}}, OL_PLATFORM_BACKEND_HOST}); - HostDevice()->Platform = &HostPlatform; + Context->HostDevice()->Platform = &HostPlatform; + + Context->TracingEnabled = std::getenv("OFFLOAD_TRACE"); + Context->ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION"); - offloadConfig().TracingEnabled = std::getenv("OFFLOAD_TRACE"); - offloadConfig().ValidationEnabled = - !std::getenv("OFFLOAD_DISABLE_VALIDATION"); + OffloadContextVal = Context; } // TODO: We can properly reference count here and manage the resources in a more @@ -228,38 +246,39 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device, ReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); // Find the info if it exists under any of the given names - auto GetInfo = [&](std::vector<std::string> Names) { - if (Device == HostDevice()) - return std::string("Host"); + auto GetInfoString = [&](std::vector<std::string> Names) { + if (Device == OffloadContext::get().HostDevice()) + return "Host"; if (!Device->Device) - return std::string(""); + return ""; auto Info = Device->Device->obtainInfoImpl(); if (auto Err = Info.takeError()) - return std::string(""); + return ""; for (auto Name : Names) { if (auto Entry = Info->get(Name)) - return (*Entry)->Value; + return std::get<std::string>((*Entry)->Value).c_str(); } - return std::string(""); + return ""; }; switch (PropName) { case OL_DEVICE_INFO_PLATFORM: return ReturnValue(Device->Platform); case OL_DEVICE_INFO_TYPE: - return Device == HostDevice() ? ReturnValue(OL_DEVICE_TYPE_HOST) - : ReturnValue(OL_DEVICE_TYPE_GPU); + return Device == OffloadContext::get().HostDevice() + ? ReturnValue(OL_DEVICE_TYPE_HOST) + : ReturnValue(OL_DEVICE_TYPE_GPU); case OL_DEVICE_INFO_NAME: - return ReturnValue(GetInfo({"Device Name"}).c_str()); + return ReturnValue(GetInfoString({"Device Name"})); case OL_DEVICE_INFO_VENDOR: - return ReturnValue(GetInfo({"Vendor Name"}).c_str()); + return ReturnValue(GetInfoString({"Vendor Name"})); case OL_DEVICE_INFO_DRIVER_VERSION: return ReturnValue( - GetInfo({"CUDA Driver Version", "HSA Runtime Version"}).c_str()); + GetInfoString({"CUDA Driver Version", "HSA Runtime Version"})); default: return createOffloadError(ErrorCode::INVALID_ENUMERATION, "getDeviceInfo enum '%i' is invalid", PropName); @@ -280,7 +299,7 @@ Error olGetDeviceInfoSize_impl(ol_device_handle_t Device, } Error olIterateDevices_impl(ol_device_iterate_cb_t Callback, void *UserData) { - for (auto &Platform : Platforms()) { + for (auto &Platform : OffloadContext::get().Platforms) { for (auto &Device : Platform.Devices) { if (!Callback(&Device, UserData)) { break; @@ -311,16 +330,17 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, return Alloc.takeError(); *AllocationOut = *Alloc; - allocInfoMap().insert_or_assign(*Alloc, AllocInfo{Device, Type}); + OffloadContext::get().AllocInfoMap.insert_or_assign(*Alloc, + AllocInfo{Device, Type}); return Error::success(); } Error olMemFree_impl(void *Address) { - if (!allocInfoMap().contains(Address)) + if (!OffloadContext::get().AllocInfoMap.contains(Address)) return createOffloadError(ErrorCode::INVALID_ARGUMENT, "address is not a known allocation"); - auto AllocInfo = allocInfoMap().at(Address); + auto AllocInfo = OffloadContext::get().AllocInfoMap.at(Address); auto Device = AllocInfo.Device; auto Type = AllocInfo.Type; @@ -328,7 +348,7 @@ Error olMemFree_impl(void *Address) { Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type))) return Res; - allocInfoMap().erase(Address); + OffloadContext::get().AllocInfoMap.erase(Address); return Error::success(); } @@ -395,7 +415,8 @@ Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr, ol_device_handle_t DstDevice, const void *SrcPtr, ol_device_handle_t SrcDevice, size_t Size, ol_event_handle_t *EventOut) { - if (DstDevice == HostDevice() && SrcDevice == HostDevice()) { + auto Host = OffloadContext::get().HostDevice(); + if (DstDevice == Host && SrcDevice == Host) { if (!Queue) { std::memcpy(DstPtr, SrcPtr, Size); return Error::success(); @@ -410,11 +431,11 @@ Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr, // If no queue is given the memcpy will be synchronous auto QueueImpl = Queue ? Queue->AsyncInfo : nullptr; - if (DstDevice == HostDevice()) { + if (DstDevice == Host) { if (auto Res = SrcDevice->Device->dataRetrieve(DstPtr, SrcPtr, Size, QueueImpl)) return Res; - } else if (SrcDevice == HostDevice()) { + } else if (SrcDevice == Host) { if (auto Res = DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, QueueImpl)) return Res; diff --git a/offload/liboffload/src/OffloadLib.cpp b/offload/liboffload/src/OffloadLib.cpp index 8662d3a44124..0a65815e5969 100644 --- a/offload/liboffload/src/OffloadLib.cpp +++ b/offload/liboffload/src/OffloadLib.cpp @@ -30,11 +30,6 @@ ol_code_location_t *¤tCodeLocation() { return CodeLoc; } -OffloadConfig &offloadConfig() { - static OffloadConfig Config{}; - return Config; -} - namespace llvm { namespace offload { // Pull in the declarations for the implementation functions. The actual entry diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index f5d995532b7a..91df80030437 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -17,6 +17,7 @@ #include <list> #include <map> #include <shared_mutex> +#include <variant> #include <vector> #include "ExclusiveAccess.h" @@ -122,7 +123,8 @@ struct InfoTreeNode { static constexpr uint64_t IndentSize = 4; std::string Key; - std::string Value; + using VariantType = std::variant<uint64_t, std::string, bool, std::monostate>; + VariantType Value; std::string Units; // Need to specify a default value number of elements here as `InfoTreeNode`'s // size is unknown. This is a vector (rather than a Key->Value map) since: @@ -131,15 +133,15 @@ struct InfoTreeNode { // * The same key can appear multiple times std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children; - InfoTreeNode() : InfoTreeNode("", "", "") {} - InfoTreeNode(std::string Key, std::string Value, std::string Units) + InfoTreeNode() : InfoTreeNode("", std::monostate{}, "") {} + InfoTreeNode(std::string Key, VariantType Value, std::string Units) : Key(Key), Value(Value), Units(Units) {} /// Add a new info entry as a child of this node. The entry requires at least /// a key string in \p Key. The value in \p Value is optional and can be any /// type that is representable as a string. The units in \p Units is optional /// and must be a string. - template <typename T = std::string> + template <typename T = std::monostate> InfoTreeNode *add(std::string Key, T Value = T(), const std::string &Units = std::string()) { assert(!Key.empty() && "Invalid info key"); @@ -147,15 +149,15 @@ struct InfoTreeNode { if (!Children) Children = std::make_unique<llvm::SmallVector<InfoTreeNode, 8>>(); - std::string ValueStr; - if constexpr (std::is_same_v<T, bool>) - ValueStr = Value ? "Yes" : "No"; + VariantType ValueVariant; + if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, std::monostate>) + ValueVariant = Value; else if constexpr (std::is_arithmetic_v<T>) - ValueStr = std::to_string(Value); + ValueVariant = static_cast<uint64_t>(Value); else - ValueStr = Value; + ValueVariant = std::string{Value}; - return &Children->emplace_back(Key, ValueStr, Units); + return &Children->emplace_back(Key, ValueVariant, Units); } std::optional<InfoTreeNode *> get(StringRef Key) { @@ -184,8 +186,23 @@ private: MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize; llvm::outs() << std::string(KeyIndentSize, ' ') << Key - << std::string(ValIndentSize, ' ') << Value - << (Units.empty() ? "" : " ") << Units << "\n"; + << std::string(ValIndentSize, ' '); + std::visit( + [](auto &&V) { + using T = std::decay_t<decltype(V)>; + if constexpr (std::is_same_v<T, std::string>) + llvm::outs() << V; + else if constexpr (std::is_same_v<T, bool>) + llvm::outs() << (V ? "Yes" : "No"); + else if constexpr (std::is_same_v<T, uint64_t>) + llvm::outs() << V; + else if constexpr (std::is_same_v<T, std::monostate>) { + // Do nothing + } else + static_assert(false, "doPrint visit not exhaustive"); + }, + Value); + llvm::outs() << (Units.empty() ? "" : " ") << Units << "\n"; } // Print children diff --git a/offload/tools/offload-tblgen/EntryPointGen.cpp b/offload/tools/offload-tblgen/EntryPointGen.cpp index 85c5c50bf2f2..4e42e4905b99 100644 --- a/offload/tools/offload-tblgen/EntryPointGen.cpp +++ b/offload/tools/offload-tblgen/EntryPointGen.cpp @@ -35,21 +35,30 @@ static void EmitValidationFunc(const FunctionRec &F, raw_ostream &OS) { } OS << ") {\n"; - OS << TAB_1 "if (offloadConfig().ValidationEnabled) {\n"; - // Emit validation checks - for (const auto &Return : F.getReturns()) { - for (auto &Condition : Return.getConditions()) { - if (Condition.starts_with("`") && Condition.ends_with("`")) { - auto ConditionString = Condition.substr(1, Condition.size() - 2); - OS << formatv(TAB_2 "if ({0}) {{\n", ConditionString); - OS << formatv(TAB_3 "return createOffloadError(error::ErrorCode::{0}, " - "\"validation failure: {1}\");\n", - Return.getUnprefixedValue(), ConditionString); - OS << TAB_2 "}\n\n"; + bool HasValidation = llvm::any_of(F.getReturns(), [](auto &R) { + return llvm::any_of(R.getConditions(), [](auto &C) { + return C.starts_with("`") && C.ends_with("`"); + }); + }); + + if (HasValidation) { + OS << TAB_1 "if (llvm::offload::isValidationEnabled()) {\n"; + // Emit validation checks + for (const auto &Return : F.getReturns()) { + for (auto &Condition : Return.getConditions()) { + if (Condition.starts_with("`") && Condition.ends_with("`")) { + auto ConditionString = Condition.substr(1, Condition.size() - 2); + OS << formatv(TAB_2 "if ({0}) {{\n", ConditionString); + OS << formatv(TAB_3 + "return createOffloadError(error::ErrorCode::{0}, " + "\"validation failure: {1}\");\n", + Return.getUnprefixedValue(), ConditionString); + OS << TAB_2 "}\n\n"; + } } } + OS << TAB_1 "}\n\n"; } - OS << TAB_1 "}\n\n"; // Perform actual function call to the implementation ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2); @@ -73,8 +82,12 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) { } OS << ") {\n"; + // Check offload is initialized + if (F.getName() != "olInit") + OS << "if (!llvm::offload::isOffloadInitialized()) return &UninitError;"; + // Emit pre-call prints - OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n"; + OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n"; OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName()); OS << TAB_1 "}\n\n"; @@ -85,7 +98,7 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) { PrefixLower, F.getName(), ParamNameList); // Emit post-call prints - OS << TAB_1 "if (offloadConfig().TracingEnabled) {\n"; + OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n"; if (F.getParams().size() > 0) { OS << formatv(TAB_2 "{0} Params = {{", F.getParamStructName()); for (const auto &Param : F.getParams()) { @@ -134,6 +147,14 @@ static void EmitCodeLocWrapper(const FunctionRec &F, raw_ostream &OS) { void EmitOffloadEntryPoints(const RecordKeeper &Records, raw_ostream &OS) { OS << GenericHeader; + + constexpr const char *UninitMessage = + "liboffload has not been initialized - please call olInit before using " + "this API"; + OS << formatv("static {0}_error_struct_t UninitError = " + "{{{1}_ERRC_UNINITIALIZED, \"{2}\"};", + PrefixLower, PrefixUpper, UninitMessage); + for (auto *R : Records.getAllDerivedDefinitions("Function")) { EmitValidationFunc(FunctionRec{R}, OS); EmitEntryPointFunc(FunctionRec{R}, OS); diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt index 985dd892d804..7cd41e1dcdaf 100644 --- a/offload/unittests/CMakeLists.txt +++ b/offload/unittests/CMakeLists.txt @@ -1,6 +1,72 @@ add_custom_target(OffloadUnitTests) set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests") +function(add_offload_test_device_code test_filename test_name) + set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + + # Try to build with support for NVPTX devices. + if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE) + endif() + check_cxx_compiler_flag( + "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX) + + if(OFFLOAD_TESTS_FORCE_NVPTX_ARCH) + set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVPTX_ARCH}") + elseif(PLATFORM_HAS_NVPTX) + set(nvptx_arch "native") + endif() + + if(nvptx_arch AND CUDAToolkit_FOUND) + set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin") + add_custom_command( + OUTPUT ${output_file} + COMMAND ${CMAKE_C_COMPILER} + --target=nvptx64-nvidia-cuda -march=${nvptx_arch} + -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN} + -c ${SRC_PATH} -o ${output_file} + DEPENDS ${SRC_PATH} + ) + add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file}) + endif() + endif() + + # Try to build with support for AMDGPU devices. + if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) + check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU) + + if(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH) + set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}") + elseif(PLATFORM_HAS_AMDGPU) + set(amdgpu_arch "native") + endif() + + if(amdgpu_arch) + set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin") + add_custom_command( + OUTPUT ${output_file} + COMMAND ${CMAKE_C_COMPILER} + --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch} + -nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file} + DEPENDS ${SRC_PATH} + ) + add_custom_target(${test_name}.amdgpu DEPENDS ${output_file}) + endif() + endif() + + # Create a single dependency target for the device code. + add_custom_target(${test_name}.bin) + if(TARGET ${test_name}.amdgpu) + add_dependencies(${test_name}.bin ${test_name}.amdgpu) + endif() + if(TARGET ${test_name}.nvptx64) + add_dependencies(${test_name}.bin ${test_name}.nvptx64) + endif() +endfunction() + function(add_offload_unittest test_dirname) set(target_name "${test_dirname}.unittests") @@ -9,10 +75,15 @@ function(add_offload_unittest test_dirname) add_unittest(OffloadUnitTests "${target_name}" ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp ${files}) - add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins) + add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries) target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}") target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON}) target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE}) endfunction() +set(OFFLOAD_TESTS_FORCE_NVPTX_ARCH "" CACHE STRING + "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61") +set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING + "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030") + add_subdirectory(OffloadAPI) diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt index 2844b675e5de..05e862865ed3 100644 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -12,6 +12,10 @@ add_offload_unittest("event" event/olDestroyEvent.cpp event/olWaitEvent.cpp) +add_offload_unittest("init" + init/olInit.cpp) +target_compile_definitions("init.unittests" PRIVATE DISABLE_WRAPPER) + add_offload_unittest("kernel" kernel/olGetKernel.cpp kernel/olLaunchKernel.cpp) diff --git a/offload/unittests/OffloadAPI/common/Environment.cpp b/offload/unittests/OffloadAPI/common/Environment.cpp index 943347246b6d..ef092cd4187d 100644 --- a/offload/unittests/OffloadAPI/common/Environment.cpp +++ b/offload/unittests/OffloadAPI/common/Environment.cpp @@ -17,11 +17,13 @@ using namespace llvm; // Wrapper so we don't have to constantly init and shutdown Offload in every // test, while having sensible lifetime for the platform environment +#ifndef DISABLE_WRAPPER struct OffloadInitWrapper { OffloadInitWrapper() { olInit(); } ~OffloadInitWrapper() { olShutDown(); } }; static OffloadInitWrapper Wrapper{}; +#endif static cl::opt<std::string> SelectedPlatform("platform", cl::desc("Only test the specified platform"), diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index c2e4d0cb24e6..132c7a7c51fb 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -1,72 +1,7 @@ -macro(add_offload_test_device_code test_filename test_name) - set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename}) - - # Build for NVPTX - if(OFFLOAD_TEST_TARGET_NVIDIA) - set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin) - add_custom_command(OUTPUT ${BIN_PATH} - COMMAND - ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda - ${ARGN} - -march=${LIBOMPTARGET_DEP_CUDA_ARCH} - --cuda-path=${CUDA_ROOT} - ${SRC_PATH} -o ${BIN_PATH} - DEPENDS ${SRC_PATH} - ) - list(APPEND BIN_PATHS ${BIN_PATH}) - endif() - - # Build for AMDGPU - if(OFFLOAD_TEST_TARGET_AMDGPU) - set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin) - add_custom_command(OUTPUT ${BIN_PATH} - COMMAND - ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib - ${ARGN} - -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH} - ${SRC_PATH} -o ${BIN_PATH} - DEPENDS ${SRC_PATH} - ) - list(APPEND BIN_PATHS ${BIN_PATH}) - endif() - - # TODO: Build for host CPU -endmacro() - - -# Decide what device targets to build for. LibomptargetGetDependencies is -# included at the top-level so the GPUs present on the system are already -# detected. -set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING - "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61") -set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING - "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030") - -find_package(CUDAToolkit QUIET) -if(CUDAToolkit_FOUND) - get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE) -endif() -if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH) - set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}) - set(OFFLOAD_TEST_TARGET_NVIDIA ON) -elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) - set(OFFLOAD_TEST_TARGET_NVIDIA ON) -endif() - -if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH) - set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}) - set(OFFLOAD_TEST_TARGET_AMDGPU ON) -elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD) - list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH) - set(OFFLOAD_TEST_TARGET_AMDGPU ON) -endif() - add_offload_test_device_code(foo.c foo) add_offload_test_device_code(bar.c bar) -# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion -# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload +# Compile with optimizations to eliminate AMDGPU implicit arguments. add_offload_test_device_code(noargs.c noargs -O3) -add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS}) - +add_custom_target(offload_device_binaries DEPENDS foo.bin bar.bin noargs.bin) set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) diff --git a/offload/unittests/OffloadAPI/init/olInit.cpp b/offload/unittests/OffloadAPI/init/olInit.cpp new file mode 100644 index 000000000000..8e27e77cd0fb --- /dev/null +++ b/offload/unittests/OffloadAPI/init/olInit.cpp @@ -0,0 +1,22 @@ +//===------- Offload API tests - olInit -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// NOTE: For this test suite, the implicit olInit/olShutDown doesn't happen, so +// tests have to do it themselves + +#include "../common/Fixtures.hpp" +#include <OffloadAPI.h> +#include <gtest/gtest.h> + +struct olInitTest : ::testing::Test {}; + +TEST_F(olInitTest, Uninitialized) { + ASSERT_ERROR(OL_ERRC_UNINITIALIZED, + olIterateDevices( + [](ol_device_handle_t, void *) { return false; }, nullptr)); +} |
