diff options
| author | modiking <mmo@nvidia.com> | 2025-10-03 09:48:59 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-03 09:48:59 -0700 |
| commit | 74180eb024f3e45c4e0ebeb5dd07f34f85ff6539 (patch) | |
| tree | 2a9c76c9225e184b7da17b754c4f7e4c4c49bb9e /flang-rt | |
| parent | d0e98909d28be377408b1e52fa35423a2236036c (diff) | |
[flang][rt] Add noinline attributes for CUDA compile path for successful compilation (#161760)
NVCC does more aggressive inlining than Clang/GCC causing the exported
functions in extrema.cpp and findloc.cpp to become extremely large from
function specializations leading to compilation timeouts. Marking the 2
functions in this change as noinline for NVCC alleviates this problem as
it removes the worst of the cross-matrix argument specializations.
Also remove the workaround in
https://github.com/llvm/llvm-project/pull/156542 that opted out
findloc.cpp from the CUDA flang-rt build
Testing:
ninja flang-rt builds in ~30 minutes, these 2 files build in ~3 minutes
Diffstat (limited to 'flang-rt')
| -rw-r--r-- | flang-rt/lib/runtime/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | flang-rt/lib/runtime/extrema.cpp | 9 | ||||
| -rw-r--r-- | flang-rt/lib/runtime/findloc.cpp | 11 |
3 files changed, 13 insertions, 10 deletions
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 6548ec955b2b..e8f70bd544e0 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -178,9 +178,6 @@ endif () if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx") set(sources ${gpu_sources}) elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") - # findloc.cpp has some issues with higher compute capability. Remove it - # from CUDA build until we can lower its memory footprint. - list(REMOVE_ITEM supported_sources findloc.cpp) set(sources ${supported_sources}) else () set(sources ${supported_sources} ${host_sources} ${f128_sources}) diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp index 9846529665e8..c4575cced901 100644 --- a/flang-rt/lib/runtime/extrema.cpp +++ b/flang-rt/lib/runtime/extrema.cpp @@ -397,9 +397,12 @@ template <TypeCategory CAT, bool IS_MAX, template <typename, bool, bool> class COMPARE> struct DoPartialMaxOrMinLocHelper { template <int KIND> struct Functor { - RT_API_ATTRS void operator()(const char *intrinsic, Descriptor &result, - const Descriptor &x, int kind, int dim, const Descriptor *mask, - bool back, Terminator &terminator) const { + // NVCC inlines more aggressively which causes too many specializations of + // this function to be inlined causing compiler timeouts. Set as + // noinline to allow compilation to complete. + RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(const char *intrinsic, + Descriptor &result, const Descriptor &x, int kind, int dim, + const Descriptor *mask, bool back, Terminator &terminator) const { DoPartialMaxOrMinLoc<CAT, KIND, IS_MAX, COMPARE>( intrinsic, result, x, kind, dim, mask, back, terminator); } diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp index 5485f4b97bd2..b5031ec95508 100644 --- a/flang-rt/lib/runtime/findloc.cpp +++ b/flang-rt/lib/runtime/findloc.cpp @@ -153,10 +153,13 @@ template <TypeCategory CAT, class HELPER> struct NumericFindlocHelper { template <int KIND> struct Functor { - RT_API_ATTRS void operator()(TypeCategory targetCat, int targetKind, - Descriptor &result, const Descriptor &x, const Descriptor &target, - int kind, int dim, const Descriptor *mask, bool back, - Terminator &terminator) const { + // NVCC inlines more aggressively which causes too many specializations of + // this function to be inlined causing compiler timeouts. Set as + // noinline to allow compilation to complete. + RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(TypeCategory targetCat, + int targetKind, Descriptor &result, const Descriptor &x, + const Descriptor &target, int kind, int dim, const Descriptor *mask, + bool back, Terminator &terminator) const { switch (targetCat) { case TypeCategory::Integer: case TypeCategory::Unsigned: |
