diff options
| author | Aiden Grossman <aidengrossman@google.com> | 2025-11-18 15:26:23 +0000 |
|---|---|---|
| committer | Aiden Grossman <aidengrossman@google.com> | 2025-11-18 15:26:23 +0000 |
| commit | 6cb05cad372eb58e05af3afaca7730bc0cee5a88 (patch) | |
| tree | 18c2c9b1e581c62dcccf11f67c8d698f5cef83ea | |
| parent | c23d02ddcd8f62f2225dd6afb066542444ab7626 (diff) | |
| parent | 2ede6afff07ad26419f22e00967120dbfc9e5617 (diff) | |
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.asan-make-most-tests-run-under-internal-shell-on-darwin
Created using spr 1.3.7
[skip ci]
49 files changed, 1172 insertions, 524 deletions
diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c index d780e37f3d15..99ae4506b1f1 100644 --- a/clang/test/CIR/CodeGen/call.c +++ b/clang/test/CIR/CodeGen/call.c @@ -130,7 +130,7 @@ int f12(void) { // OGCG: %{{.+}} = call i32 @f10(i32 noundef 1) #[[ATTR0:.+]] // OGCG-NEXT: %{{.+}} = call i32 @f11(i32 noundef 2) #[[ATTR1:.+]] -// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none) } +// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none, target_mem0: none, target_mem1: none) } // LLVM: attributes #[[ATTR1]] = { nounwind willreturn memory(none) } // OGCG: attributes #[[ATTR0]] = { nounwind willreturn memory(read) } diff --git a/libcxx/include/__config b/libcxx/include/__config index 8f461599ffd5..d79ace0cbb89 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -546,6 +546,12 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif +# if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1 +# else +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0 +# endif + # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex index ee7e088aac54..c1cb039f83a5 100644 --- a/libcxx/include/ccomplex +++ b/libcxx/include/ccomplex @@ -26,18 +26,10 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ccomplex - _LIBCPP_DEPRECATED_("removed in C++20. Include <complex> instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include <complex> instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning <ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CCOMPLEX diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646 index 34164362dc10..d9eae4129102 100644 --- a/libcxx/include/ciso646 +++ b/libcxx/include/ciso646 @@ -24,13 +24,10 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ciso646 - _LIBCPP_DEPRECATED_("removed in C++20. Include <version> instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ciso646 _LIBCPP_NODEBUG = __standard_header_ciso646; - +# if _LIBCPP_STD_VER >= 20 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning <ciso646> is removed in C++20. Include <version> instead. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CISO646 diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign index 7f8dd1e1fbaf..7aa8cc81ad14 100644 --- a/libcxx/include/cstdalign +++ b/libcxx/include/cstdalign @@ -43,17 +43,10 @@ Macros: # undef __alignof_is_defined # define __alignof_is_defined 1 -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_cstdalign _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning <cstdalign> is deprecated in C++17 and removed in C++20. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CSTDALIGN diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool index a432d5f08b9a..805a287bd762 100644 --- a/libcxx/include/cstdbool +++ b/libcxx/include/cstdbool @@ -31,17 +31,10 @@ Macros: # undef __bool_true_false_are_defined # define __bool_true_false_are_defined 1 -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdbool _LIBCPP_NODEBUG = __standard_header_cstdbool; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_cstdbool _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdbool _LIBCPP_NODEBUG = __standard_header_cstdbool; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning <cstdbool> is deprecated in C++17 and removed in C++20. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CSTDBOOL diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath index db0786f1e2c4..13b7a96e4d8f 100644 --- a/libcxx/include/ctgmath +++ b/libcxx/include/ctgmath @@ -28,17 +28,8 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ctgmath - _LIBCPP_DEPRECATED_("removed in C++20. Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning <ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead. # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/test/libcxx/transitive_includes.gen.py b/libcxx/test/libcxx/transitive_includes.gen.py index 6ed35af7e275..2b643e1f2ad4 100644 --- a/libcxx/test/libcxx/transitive_includes.gen.py +++ b/libcxx/test/libcxx/transitive_includes.gen.py @@ -89,7 +89,7 @@ else: // UNSUPPORTED: LIBCXX-FREEBSD-FIXME // RUN: mkdir %t -// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt +// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -Wno-deprecated --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt // RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv // RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv // RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp index 0eaf82ce5cef..8df89d0ba920 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include <ccomplex> -#if TEST_STD_VER >= 20 -// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include <complex> instead.}} -#else -// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include <complex> instead.}} -#endif +// expected-warning@ccomplex:* {{<ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp index 04acd1008154..32b57033331c 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp @@ -15,4 +15,5 @@ // UNSUPPORTED: clang-modules-build #include <ciso646> -// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include <version> instead.}} + +// expected-warning@ciso646:* {{<ciso646> is removed in C++20. Include <version> instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp index dc9f1af55b3f..23a7709a9d65 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include <cstdalign> -#if TEST_STD_VER >= 20 -// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}} -#else -// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}} -#endif +// expected-warning@cstdalign:* {{<cstdalign> is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp index eddefe14d35e..c2c0f03c52d3 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include <cstdbool> -#if TEST_STD_VER >= 20 -// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}} -#else -// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}} -#endif +// expected-warning@cstdbool:* {{<cstdbool> is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp index 097ab1643d15..4f5564915443 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include <ctgmath> -#if TEST_STD_VER >= 20 -// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include <cmath> and <complex> instead.}} -#else -// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include <cmath> and <complex> instead.}} -#endif +// expected-warning@ctgmath:* {{<ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead.}} diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp index 12d778408d5e..e58e760a5ce8 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: std-at-least-c++23 +// REQUIRES: std-at-least-c++26 // <tuple> @@ -21,11 +21,6 @@ void test() { // expected-error@*:* {{static assertion failed}} - // Turns to an error since C++26 (Disallow Binding a Returned Glvalue to a Temporary https://wg21.link/P2748R5). -#if TEST_STD_VER >= 26 // expected-error@tuple:* {{returning reference to local temporary object}} -#else - // expected-warning@tuple:* {{returning reference to local temporary object}} -#endif std::ignore = std::make_from_tuple<const int&>(std::tuple<char>{}); } diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py index 975209c273f8..76e9115295b9 100644 --- a/libcxx/utils/libcxx/test/format.py +++ b/libcxx/utils/libcxx/test/format.py @@ -99,7 +99,7 @@ def parseScript(test, preamble): substitutions.append( ( "%{verify}", - "%{cxx} %s %{flags} %{compile_flags} -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0", + "%{cxx} %s %{flags} %{compile_flags} -U_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0", ) ) substitutions.append(("%{run}", "%{exec} %t.exe")) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 2a97df4785ec..b0dc79729251 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -762,7 +762,7 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); break; default: - llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } } diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index 0be1fc172ebd..e8ce453559ed 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -79,6 +79,9 @@ public: /// Return true if the value is the smallest signed value. LLVM_ABI bool isMinSignedValue() const; + /// Return true if the value is the largest signed value. + LLVM_ABI bool isMaxSignedValue() const; + /// Return true if this is a finite and non-zero floating-point scalar /// constant or a fixed width vector constant with all finite and non-zero /// elements. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 6f44713bd22c..8968f6b934d7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" @@ -6676,6 +6677,62 @@ static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst, return MinMaxOptResult::CannotOptimize; } +static Value *simplifySVEIntReduction(Intrinsic::ID IID, Type *ReturnType, + Value *Op0, Value *Op1) { + Constant *C0 = dyn_cast<Constant>(Op0); + Constant *C1 = dyn_cast<Constant>(Op1); + unsigned Width = ReturnType->getPrimitiveSizeInBits(); + + // All false predicate or reduction of neutral values ==> neutral result. + switch (IID) { + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isNullValue())) + return ConstantInt::get(ReturnType, 0); + break; + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_uminv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isAllOnesValue())) + return ConstantInt::get(ReturnType, APInt::getMaxValue(Width)); + break; + case Intrinsic::aarch64_sve_smaxv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isMinSignedValue())) + return ConstantInt::get(ReturnType, APInt::getSignedMinValue(Width)); + break; + case Intrinsic::aarch64_sve_sminv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isMaxSignedValue())) + return ConstantInt::get(ReturnType, APInt::getSignedMaxValue(Width)); + break; + } + + switch (IID) { + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_uminv: + // sve_reduce_##(all, splat(X)) ==> X + if (C0 && C0->isAllOnesValue()) { + if (Value *SplatVal = getSplatValue(Op1)) { + assert(SplatVal->getType() == ReturnType && "Unexpected result type!"); + return SplatVal; + } + } + break; + case Intrinsic::aarch64_sve_eorv: + // sve_reduce_xor(all, splat(X)) ==> 0 + if (C0 && C0->isAllOnesValue()) + return ConstantInt::get(ReturnType, 0); + break; + } + + return nullptr; +} + Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -7037,6 +7094,17 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, break; } + + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_uminv: + return simplifySVEIntReduction(IID, ReturnType, Op0, Op1); default: break; } diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index f497c574ee75..36d0d35d024c 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -616,9 +616,8 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol /// table into the values table. void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { - for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); - VI != VE; ++VI) - EnumerateValue(VI->getValue()); + for (const auto &VI : VST) + EnumerateValue(VI.getValue()); } /// Insert all of the values referenced by named metadata in the specified diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp index bca820fa807c..4acc064dbc21 100644 --- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp +++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp @@ -64,7 +64,6 @@ dwarf::CFIProgram DWARFCFIState::convert(MCCFIInstruction Directive) { /* CodeAlignmentFactor */ 1, /* DataAlignmentFactor */ 1, Context->getTargetTriple().getArch()); - auto MaybeCurrentRow = getCurrentUnwindRow(); switch (Directive.getOperation()) { case MCCFIInstruction::OpSameValue: CFIP.addInstruction(dwarf::DW_CFA_same_value, Directive.getRegister()); diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index cbce8bd73610..a3aa5e957165 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -183,6 +183,23 @@ bool Constant::isMinSignedValue() const { return false; } +bool Constant::isMaxSignedValue() const { + // Check for INT_MAX integers + if (const ConstantInt *CI = dyn_cast<ConstantInt>(this)) + return CI->isMaxValue(/*isSigned=*/true); + + // Check for FP which are bitcasted from INT_MAX integers + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) + return CFP->getValueAPF().bitcastToAPInt().isMaxSignedValue(); + + // Check for splats of INT_MAX values. + if (getType()->isVectorTy()) + if (const auto *SplatVal = getSplatValue()) + return SplatVal->isMaxSignedValue(); + + return false; +} + bool Constant::isNotMinSignedValue() const { // Check for INT_MIN integers if (const ConstantInt *CI = dyn_cast<ConstantInt>(this)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index 9af812960542..b7078825928b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -314,9 +314,7 @@ public: #endif bool empty() const { return Nodes.empty(); } - const iterator_range<nodes_iterator> nodes() const { - return {Nodes.begin(), Nodes.end()}; - } + iterator_range<nodes_iterator> nodes() const { return Nodes; } const Node &getNode(unsigned ID) const { return *Nodes[ID]; } unsigned getNumNodes() const { return Nodes.size(); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index f533a47150a7..741392247c0d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -152,11 +152,12 @@ public: /// its underlying Instruction. VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands, Instruction *Inst = nullptr, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { VPInstruction *NewVPInst = tryInsertInstruction( - new VPInstruction(Opcode, Operands, {}, MD, DL, Name)); + new VPInstruction(Opcode, Operands, Flags, MD, DL, Name)); NewVPInst->setUnderlyingValue(Inst); return NewVPInst; } @@ -329,7 +330,7 @@ public: else if (Opcode == Instruction::ZExt) Flags = VPIRFlags::NonNegFlagsTy(false); return tryInsertInstruction( - new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags)); + new VPWidenCastRecipe(Opcode, Op, ResultTy, nullptr, Flags)); } VPScalarIVStepsRecipe * diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 356d759b9479..c680b6fca84c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7750,7 +7750,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, }, Range); if (ShouldUseVectorIntrinsic) - return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, + return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, *VPI, VPI->getDebugLoc()); Function *Variant = nullptr; @@ -7804,7 +7804,8 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, } Ops.push_back(VPI->getOperand(VPI->getNumOperands() - 1)); - return new VPWidenCallRecipe(CI, Variant, Ops, VPI->getDebugLoc()); + return new VPWidenCallRecipe(CI, Variant, Ops, *VPI, *VPI, + VPI->getDebugLoc()); } return nullptr; @@ -7842,7 +7843,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc()); Ops[1] = SafeRHS; - return new VPWidenRecipe(*I, Ops, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc()); } [[fallthrough]]; } @@ -7888,7 +7889,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { // For other binops, the legacy cost model only checks the second operand. NewOps[1] = GetConstantViaSCEV(NewOps[1]); } - return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc()); } case Instruction::ExtractValue: { SmallVector<VPValue *> NewOps(VPI->operands()); @@ -7896,7 +7897,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index"); unsigned Idx = EVI->getIndices()[0]; NewOps.push_back(Plan.getConstantInt(32, Idx)); - return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc()); } }; } @@ -7981,7 +7982,8 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, (Range.Start.isScalable() && isa<IntrinsicInst>(I))) && "Should not predicate a uniform recipe"); auto *Recipe = - new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI); + new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI, + *VPI, VPI->getDebugLoc()); return Recipe; } @@ -8231,17 +8233,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return nullptr; if (VPI->getOpcode() == Instruction::GetElementPtr) - return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Instr), R->operands()); + return new VPWidenGEPRecipe(cast<GetElementPtrInst>(Instr), R->operands(), + *VPI, VPI->getDebugLoc()); if (VPI->getOpcode() == Instruction::Select) - return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands(), - *VPI); + return new VPWidenSelectRecipe(cast<SelectInst>(Instr), R->operands(), *VPI, + *VPI, VPI->getDebugLoc()); if (Instruction::isCast(VPI->getOpcode())) { - auto *CastR = cast<VPInstructionWithType>(R); auto *CI = cast<CastInst>(Instr); + auto *CastR = cast<VPInstructionWithType>(VPI); return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0), - CastR->getResultType(), *CI, *VPI); + CastR->getResultType(), CI, *VPI, *VPI, + VPI->getDebugLoc()); } return tryToWiden(VPI); @@ -8269,8 +8273,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction, SmallVector<VPValue *, 2> Ops; Ops.push_back(Plan.getOrAddLiveIn(Zero)); Ops.push_back(BinOp); - BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRMetadata(), - ReductionI->getDebugLoc()); + BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRFlags(*ReductionI), + VPIRMetadata(), ReductionI->getDebugLoc()); Builder.insert(BinOp->getDefiningRecipe()); ReductionOpcode = Instruction::Add; } @@ -8454,9 +8458,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) { // Only create recipe for the final invariant store of the reduction. if (Legal->isInvariantStoreOfReduction(SI)) { + auto *VPI = cast<VPInstruction>(SingleDef); auto *Recipe = new VPReplicateRecipe( - SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, - *cast<VPInstruction>(SingleDef)); + SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, *VPI, + *VPI, VPI->getDebugLoc()); Recipe->insertBefore(*MiddleVPBB, MBIP); } R.eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fc29ab0c8409..fedbcfb6bd32 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -883,14 +883,6 @@ public: /// using IR flags. struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, - DebugLoc DL = DebugLoc::getUnknown()) - : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {} - - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, - Instruction &I) - : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {} - - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands, const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown()) : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {} @@ -1474,9 +1466,12 @@ public: VPIRMetadata(Metadata), Opcode(Opcode) {} VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands, - const VPIRMetadata &Metadata, DebugLoc DL) - : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), - VPIRMetadata(Metadata), Opcode(I.getOpcode()) {} + const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, + DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL), + VPIRMetadata(Metadata), Opcode(I.getOpcode()) { + setUnderlyingValue(&I); + } ~VPWidenRecipe() override = default; @@ -1517,30 +1512,22 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - CastInst &UI, const VPIRMetadata &Metadata) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), - VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { - assert(UI.getOpcode() == Opcode && - "opcode of underlying cast doesn't match"); - } - VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - const VPIRFlags &Flags = {}, + CastInst *CI = nullptr, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL), VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { assert(flagsValidForOpcode(Opcode) && "Set flags not supported for the provided opcode"); + setUnderlyingValue(CI); } ~VPWidenCastRecipe() override = default; VPWidenCastRecipe *clone() override { - auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this, - *this, getDebugLoc()); - if (auto *UV = getUnderlyingValue()) - New->setUnderlyingValue(UV); - return New; + return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, + cast_or_null<CastInst>(getUnderlyingValue()), + *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) @@ -1585,13 +1572,17 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments, Type *Ty, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), + : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags, + DL), VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), MayReadFromMemory(CI.mayReadFromMemory()), MayWriteToMemory(CI.mayWriteToMemory()), - MayHaveSideEffects(CI.mayHaveSideEffects()) {} + MayHaveSideEffects(CI.mayHaveSideEffects()) { + setUnderlyingValue(&CI); + } VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments, Type *Ty, @@ -1617,7 +1608,7 @@ public: VPWidenIntrinsicRecipe *clone() override { if (Value *CI = getUnderlyingValue()) return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID, - operands(), ResultTy, *this, + operands(), ResultTy, *this, *this, getDebugLoc()); return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy, *this, *this, getDebugLoc()); @@ -1671,10 +1662,11 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, public: VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef<VPValue *> CallArguments, - DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, - *cast<Instruction>(UV)), - VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) { + const VPIRFlags &Flags = {}, + const VPIRMetadata &Metadata = {}, DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL), + VPIRMetadata(Metadata), Variant(Variant) { + setUnderlyingValue(UV); assert( isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) && "last operand must be the called function"); @@ -1684,7 +1676,7 @@ public: VPWidenCallRecipe *clone() override { return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(), - getDebugLoc()); + *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) @@ -1761,16 +1753,19 @@ protected: /// instruction. struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { - VPWidenSelectRecipe(SelectInst &I, ArrayRef<VPValue *> Operands, - const VPIRMetadata &MD = {}) - : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I), - VPIRMetadata(MD) {} + VPWidenSelectRecipe(SelectInst *SI, ArrayRef<VPValue *> Operands, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, + DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL), + VPIRMetadata(MD) { + setUnderlyingValue(SI); + } ~VPWidenSelectRecipe() override = default; VPWidenSelectRecipe *clone() override { - return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()), - operands(), *this); + return new VPWidenSelectRecipe(cast<SelectInst>(getUnderlyingInstr()), + operands(), *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) @@ -1822,9 +1817,12 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { } public: - VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP), + VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef<VPValue *> Operands, + const VPIRFlags &Flags = {}, + DebugLoc DL = DebugLoc::getUnknown()) + : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL), SourceElementTy(GEP->getSourceElementType()) { + setUnderlyingValue(GEP); SmallVector<std::pair<unsigned, MDNode *>> Metadata; (void)Metadata; getMetadataToPropagate(GEP, Metadata); @@ -1835,7 +1833,7 @@ public: VPWidenGEPRecipe *clone() override { return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()), - operands()); + operands(), *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC) @@ -2929,10 +2927,12 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, public: VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands, bool IsSingleScalar, VPValue *Mask = nullptr, - VPIRMetadata Metadata = {}) - : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), + const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {}, + DebugLoc DL = DebugLoc::getUnknown()) + : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL), VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), IsPredicated(Mask) { + setUnderlyingValue(I); if (Mask) addOperand(Mask); } @@ -2940,9 +2940,9 @@ public: ~VPReplicateRecipe() override = default; VPReplicateRecipe *clone() override { - auto *Copy = - new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar, - isPredicated() ? getMask() : nullptr, *this); + auto *Copy = new VPReplicateRecipe( + getUnderlyingInstr(), operands(), IsSingleScalar, + isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc()); Copy->transferFlags(*this); return Copy; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 612202d04977..dbbde1cafa9f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -190,7 +190,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // recipes. if (Br->isConditional()) { VPValue *Cond = getOrCreateVPOperand(Br->getCondition()); - VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, + VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {}, VPIRMetadata(*Inst), Inst->getDebugLoc()); } @@ -205,7 +205,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())}; for (auto Case : SI->cases()) Ops.push_back(getOrCreateVPOperand(Case.getCaseValue())); - VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, + VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {}, VPIRMetadata(*Inst), Inst->getDebugLoc()); continue; } @@ -255,13 +255,14 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, if (auto *CI = dyn_cast<CastInst>(Inst)) { NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0], CI->getType(), CI->getDebugLoc(), - {}, MD); + VPIRFlags(*CI), MD); NewR->setUnderlyingValue(CI); } else { // Build VPInstruction for any arbitrary Instruction without specific // representation in VPlan. - NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, MD, - Inst->getDebugLoc()); + NewR = + VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, + VPIRFlags(*Inst), MD, Inst->getDebugLoc()); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fca6554ad77c..ef36e29aaa5c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2056,24 +2056,26 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { switch (OpType) { case OperationType::OverflowingBinOp: return Opcode == Instruction::Add || Opcode == Instruction::Sub || - Opcode == Instruction::Mul || + Opcode == Instruction::Mul || Opcode == Instruction::Shl || Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart; case OperationType::Trunc: return Opcode == Instruction::Trunc; case OperationType::DisjointOp: return Opcode == Instruction::Or; case OperationType::PossiblyExactOp: - return Opcode == Instruction::AShr; + return Opcode == Instruction::AShr || Opcode == Instruction::LShr || + Opcode == Instruction::UDiv || Opcode == Instruction::SDiv; case OperationType::GEPOp: return Opcode == Instruction::GetElementPtr || Opcode == VPInstruction::PtrAdd || Opcode == VPInstruction::WidePtrAdd; case OperationType::FPMathOp: - return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || - Opcode == Instruction::FSub || Opcode == Instruction::FNeg || - Opcode == Instruction::FDiv || Opcode == Instruction::FRem || - Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc || - Opcode == Instruction::FCmp || Opcode == Instruction::Select || + return Opcode == Instruction::Call || Opcode == Instruction::FAdd || + Opcode == Instruction::FMul || Opcode == Instruction::FSub || + Opcode == Instruction::FNeg || Opcode == Instruction::FDiv || + Opcode == Instruction::FRem || Opcode == Instruction::FPExt || + Opcode == Instruction::FPTrunc || Opcode == Instruction::FCmp || + Opcode == Instruction::Select || Opcode == VPInstruction::WideIVStep || Opcode == VPInstruction::ReductionStartVector || Opcode == VPInstruction::ComputeReductionResult; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 26563242de28..25557f1d5d65 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -104,24 +104,26 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, *VPI, Ingredient.getDebugLoc()); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); + NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands(), *VPI, + Ingredient.getDebugLoc()); } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { Intrinsic::ID VectorID = getVectorIntrinsicIDForCall(CI, &TLI); if (VectorID == Intrinsic::not_intrinsic) return false; NewRecipe = new VPWidenIntrinsicRecipe( *CI, getVectorIntrinsicIDForCall(CI, &TLI), - drop_end(Ingredient.operands()), CI->getType(), *VPI, - CI->getDebugLoc()); + drop_end(Ingredient.operands()), CI->getType(), VPIRFlags(*CI), + *VPI, CI->getDebugLoc()); } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { - NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands(), *VPI); + NewRecipe = new VPWidenSelectRecipe(SI, Ingredient.operands(), *VPI, + *VPI, Ingredient.getDebugLoc()); } else if (auto *CI = dyn_cast<CastInst>(Inst)) { - NewRecipe = - new VPWidenCastRecipe(CI->getOpcode(), Ingredient.getOperand(0), - CI->getType(), *CI, *VPI); + NewRecipe = new VPWidenCastRecipe( + CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI, + VPIRFlags(*CI), VPIRMetadata(*CI)); } else { NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands(), *VPI, - Ingredient.getDebugLoc()); + *VPI, Ingredient.getDebugLoc()); } } @@ -226,7 +228,8 @@ static bool sinkScalarOperands(VPlan &Plan) { // then cloning should be sufficient here. Instruction *I = SinkCandidate->getUnderlyingInstr(); Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true, - nullptr /*Mask*/, *SinkCandidateRepR); + nullptr /*Mask*/, *SinkCandidateRepR, + *SinkCandidateRepR); // TODO: add ".cloned" suffix to name of Clone's VPValue. } else { Clone = SinkCandidate->clone(); @@ -385,7 +388,8 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, // mask but in the replicate region. auto *RecipeWithoutMask = new VPReplicateRecipe( PredRecipe->getUnderlyingInstr(), drop_end(PredRecipe->operands()), - PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe); + PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe, *PredRecipe, + PredRecipe->getDebugLoc()); auto *Pred = Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); @@ -691,7 +695,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { // analysis. auto Users = collectUsersRecursively(PhiR); for (VPUser *U : reverse(Users)) { - auto *Def = dyn_cast<VPSingleDefRecipe>(U); + auto *Def = dyn_cast<VPRecipeWithIRFlags>(U); auto *RepR = dyn_cast<VPReplicateRecipe>(U); // Skip recipes that shouldn't be narrowed. if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) || @@ -704,7 +708,8 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { continue; auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), - Def->operands(), /*IsUniform*/ true); + Def->operands(), /*IsUniform*/ true, + /*Mask*/ nullptr, /*Flags*/ *Def); Clone->insertAfter(Def); Def->replaceAllUsesWith(Clone); } @@ -1423,12 +1428,13 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) continue; - auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R); + auto *RepOrWidenR = cast<VPRecipeWithIRFlags>(&R); if (RepR && isa<StoreInst>(RepR->getUnderlyingInstr()) && vputils::isSingleScalar(RepR->getOperand(1))) { auto *Clone = new VPReplicateRecipe( RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), - true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); + true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Flags*/, + *RepR /*Metadata*/, RepR->getDebugLoc()); Clone->insertBefore(RepOrWidenR); unsigned ExtractOpc = vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) @@ -1469,9 +1475,9 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { })) continue; - auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(), - RepOrWidenR->operands(), - true /*IsSingleScalar*/); + auto *Clone = new VPReplicateRecipe( + RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), + true /*IsSingleScalar*/, nullptr, *RepOrWidenR); Clone->insertBefore(RepOrWidenR); RepOrWidenR->replaceAllUsesWith(Clone); if (isDeadRecipe(*RepOrWidenR)) @@ -3824,15 +3830,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, Ext0->getOpcode() == Ext1->getOpcode() && IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) { auto *NewExt0 = new VPWidenCastRecipe( - Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0, - *Ext0, Ext0->getDebugLoc()); + Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), nullptr, + *Ext0, *Ext0, Ext0->getDebugLoc()); NewExt0->insertBefore(Ext0); VPWidenCastRecipe *NewExt1 = NewExt0; if (Ext0 != Ext1) { NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0), - Ext->getResultType(), *Ext1, *Ext1, - Ext1->getDebugLoc()); + Ext->getResultType(), nullptr, *Ext1, + *Ext1, Ext1->getDebugLoc()); NewExt1->insertBefore(Ext1); } Mul->setOperand(0, NewExt0); @@ -4353,7 +4359,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) { // process one original iteration. auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp}, /*IsUniform*/ true, - /*Mask*/ nullptr, *WideLoad); + /*Mask*/ nullptr, {}, *WideLoad); N->insertBefore(WideLoad); NarrowedOps.insert(N); return N; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index d4b8b72beb94..d76d2ed5f1c7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -518,9 +518,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, // TODO: have cloning of replicate recipes also provide the desired result // coupled with setting its operands to NewOps (deriving IsSingleScalar and // Mask from the operands?) - New = - new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, - /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); + New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, + /*IsSingleScalar=*/true, /*Mask=*/nullptr, + *RepR, *RepR, RepR->getDebugLoc()); } else { assert(isa<VPInstruction>(DefR) && "DefR must be a VPReplicateRecipe or VPInstruction"); diff --git a/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll new file mode 100644 index 000000000000..a54d6044d04b --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll @@ -0,0 +1,912 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=instsimplify < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; ANDV +; + +define i8 @andv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i8 @andv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i8 %out +} + +define i8 @andv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @andv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -1)) + ret i8 %out +} + +define i8 @andv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @andv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i8 %out +} + +define i8 @andv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @andv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i8 %out +} + +define i16 @andv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @andv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -1 +; + %out = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -1)) + ret i16 %out +} + +define i16 @andv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @andv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i16 %out +} + +define i32 @andv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @andv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -1 +; + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -1)) + ret i32 %out +} + +define i32 @andv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @andv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i32 %out +} + +define i64 @andv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @andv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -1 +; + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -1)) + ret i64 %out +} + +define i64 @andv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @andv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +; +; EORV +; + +define i8 @eorv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i8 @eorv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i8 %out +} + +define i8 @eorv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @eorv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i8 %out +} + +define i8 @eorv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @eorv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1)) + ret i8 %out +} + +define i8 @eorv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @eorv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i8 %out +} + +define i16 @eorv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @eorv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i16 %out +} + +define i16 @eorv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @eorv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1)) + ret i16 %out +} + +define i32 @eorv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @eorv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i32 %out +} + +define i32 @eorv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @eorv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1)) + ret i32 %out +} + +define i64 @eorv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @eorv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +define i64 @eorv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @eorv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1)) + ret i64 %out +} + +; +; ORV +; + +define i8 @orv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i8 @orv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i8 %out +} + +define i8 @orv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @orv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i8 %out +} + +define i8 @orv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @orv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1)) + ret i8 %out +} + +define i8 @orv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @orv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i8 %out +} + +define i16 @orv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @orv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i16 %out +} + +define i16 @orv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @orv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1)) + ret i16 %out +} + +define i32 @orv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @orv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i32 %out +} + +define i32 @orv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @orv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1)) + ret i32 %out +} + +define i64 @orv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @orv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +define i64 @orv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @orv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1)) + ret i64 %out +} + +; +; SADDV +; + +define i64 @saddv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i64 @saddv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i64 %out +} + +define i64 @saddv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1)) + ret i64 %out +} + +define i64 @saddv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i64 @saddv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i8 0 +; CHECK-NEXT: [[A_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[A_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A_SPLAT]]) +; CHECK-NEXT: ret i64 [[OUT]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i64 %out +} + +define i64 @saddv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1)) + ret i64 %out +} + +define i64 @saddv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1)) + ret i64 %out +} + +define i64 @saddv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1)) + ret i64 %out +} + +; +; SMAXV +; + +define i8 @smaxv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -128 +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i8 %out +} + +define i8 @smaxv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -128 +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -128)) + ret i8 %out +} + +define i8 @smaxv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i8 %out +} + +define i8 @smaxv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i8 %out +} + +define i16 @smaxv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @smaxv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -32768 +; + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -32768)) + ret i16 %out +} + +define i16 @smaxv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @smaxv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i16 %out +} + +define i32 @smaxv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @smaxv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -2147483648 +; + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -2147483648)) + ret i32 %out +} + +define i32 @smaxv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @smaxv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i32 %out +} + +define i64 @smaxv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @smaxv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -9223372036854775808 +; + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -9223372036854775808)) + ret i64 %out +} + +define i64 @smaxv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @smaxv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +; +; SMINV +; + +define i8 @sminv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i8 @sminv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 127 +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i8 %out +} + +define i8 @sminv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @sminv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 127 +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 127)) + ret i8 %out +} + +define i8 @sminv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @sminv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i8 %out +} + +define i8 @sminv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @sminv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i8 %out +} + +define i16 @sminv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @sminv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 32767 +; + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 32767)) + ret i16 %out +} + +define i16 @sminv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @sminv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i16 %out +} + +define i32 @sminv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @sminv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 2147483647 +; + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 2147483647)) + ret i32 %out +} + +define i32 @sminv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @sminv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i32 %out +} + +define i64 @sminv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @sminv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 9223372036854775807 +; + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 9223372036854775807)) + ret i64 %out +} + +define i64 @sminv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @sminv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +; +; UADDV +; + +define i64 @uaddv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i64 %out +} + +define i64 @uaddv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1)) + ret i64 %out +} + +define i64 @uaddv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i8 0 +; CHECK-NEXT: [[A_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[A_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A_SPLAT]]) +; CHECK-NEXT: ret i64 [[OUT]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i64 %out +} + +define i64 @uaddv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1)) + ret i64 %out +} + +define i64 @uaddv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1)) + ret i64 %out +} + +define i64 @uaddv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1)) + ret i64 %out +} + +; +; UMAXV +; + +define i8 @umaxv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i8 %out +} + +define i8 @umaxv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i8 %out +} + +define i8 @umaxv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 1)) + ret i8 %out +} + +define i8 @umaxv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i8 %out +} + +define i16 @umaxv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @umaxv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i16 %out +} + +define i16 @umaxv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @umaxv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 1)) + ret i16 %out +} + +define i32 @umaxv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @umaxv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i32 %out +} + +define i32 @umaxv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @umaxv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 1)) + ret i32 %out +} + +define i64 @umaxv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @umaxv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +define i64 @umaxv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @umaxv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 1)) + ret i64 %out +} + +; +; UMINV +; + +define i8 @uminv_i8_no_active(<vscale x 16 x i8> %a) #0 { +; CHECK-LABEL: define i8 @uminv_i8_no_active( +; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a) + ret i8 %out +} + +define i8 @uminv_i8_splat_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @uminv_i8_splat_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat(i8 -1)) + ret i8 %out +} + +define i8 @uminv_i8_splat_non_neutral_val(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: define i8 @uminv_i8_splat_non_neutral_val( +; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer) + ret i8 %out +} + +define i8 @uminv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @uminv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement <vscale x 16 x i8> poison, i8 %a, i8 0 + %a.splat = shufflevector <vscale x 16 x i8> %a.insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> %a.splat) + ret i8 %out +} + +define i16 @uminv_i16_splat_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @uminv_i16_splat_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -1 +; + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat(i16 -1)) + ret i16 %out +} + +define i16 @uminv_i16_splat_non_neutral_val(<vscale x 8 x i1> %pg) #0 { +; CHECK-LABEL: define i16 @uminv_i16_splat_non_neutral_val( +; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> zeroinitializer) + ret i16 %out +} + +define i32 @uminv_i32_splat_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @uminv_i32_splat_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -1 +; + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 -1)) + ret i32 %out +} + +define i32 @uminv_i32_splat_non_neutral_val(<vscale x 4 x i1> %pg) #0 { +; CHECK-LABEL: define i32 @uminv_i32_splat_non_neutral_val( +; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> zeroinitializer) + ret i32 %out +} + +define i64 @uminv_i64_splat_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uminv_i64_splat_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -1 +; + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat(i64 -1)) + ret i64 %out +} + +define i64 @uminv_i64_splat_non_neutral_val(<vscale x 2 x i1> %pg) #0 { +; CHECK-LABEL: define i64 @uminv_i64_splat_non_neutral_val( +; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> zeroinitializer) + ret i64 %out +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg new file mode 100644 index 000000000000..10d4a0e953ed --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AArch64" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 20676f370229..10c265519952 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -14,23 +14,23 @@ define void @foo(i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: outer.header: ; CHECK-NEXT: EMIT-SCALAR ir<%outer.iv> = phi [ ir<%outer.iv.next>, outer.latch ], [ ir<0>, ir-bb<entry> ] -; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> +; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr inbounds ir<@arr2>, ir<0>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> -; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> +; CHECK-NEXT: EMIT ir<%add> = add nsw ir<%outer.iv>, ir<%n> ; CHECK-NEXT: Successor(s): inner ; CHECK-EMPTY: ; CHECK-NEXT: inner: ; CHECK-NEXT: EMIT-SCALAR ir<%inner.iv> = phi [ ir<%inner.iv.next>, inner ], [ ir<0>, outer.header ] -; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> +; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr inbounds ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> -; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> +; CHECK-NEXT: EMIT ir<%inner.iv.next> = add nuw nsw ir<%inner.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8> ; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> ; CHECK-NEXT: Successor(s): outer.latch, inner ; CHECK-EMPTY: ; CHECK-NEXT: outer.latch: -; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> +; CHECK-NEXT: EMIT ir<%outer.iv.next> = add nuw nsw ir<%outer.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%outer.ec> = icmp eq ir<%outer.iv.next>, ir<8> ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header ; CHECK-EMPTY: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index b99d656c5c50..5742df2aa3c5 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -139,12 +139,12 @@ compound=true "vector.body:\l" + " EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" + " EMIT-SCALAR ir\<%indvars.iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%indvars.iv.next\>, vector.body ]\l" + - " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%indvars.iv\>\l" + + " EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%indvars.iv\>\l" + " EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" + " EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" + " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" + - " EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" + + " EMIT ir\<%exitcond\> = icmp ne ir\<%indvars.iv.next\>, ir\<%N\>\l" + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + @@ -305,9 +305,9 @@ compound=true "vector.body:\l" + " EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" + " EMIT-SCALAR ir\<%iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%iv.next\>, loop.latch ]\l" + - " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%iv\>\l" + + " EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%iv\>\l" + " EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" + - " EMIT ir\<%c\> = icmp ir\<%l1\>, ir\<0\>\l" + + " EMIT ir\<%c\> = icmp eq ir\<%l1\>, ir\<0\>\l" + "Successor(s): loop.latch\l" ] N4 -> N6 [ label=""] @@ -316,7 +316,7 @@ compound=true " EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" + " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" + - " EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" + + " EMIT ir\<%exitcond\> = icmp ne ir\<%iv.next\>, ir\<%N\>\l" + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 3842ba235ead..63776b78a208 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1009,7 +1009,7 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) { SmallVector<VPValue *, 2> Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe WidenR(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe WidenR(*AI, Args); checkVPRecipeCastImpl<VPWidenRecipe, VPUser, VPIRMetadata>(&WidenR); delete AI; @@ -1053,7 +1053,7 @@ TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(Op3); - VPWidenSelectRecipe WidenSelectR(*SelectI, + VPWidenSelectRecipe WidenSelectR(SelectI, make_range(Args.begin(), Args.end())); checkVPRecipeCastImpl<VPWidenSelectRecipe, VPUser, VPIRMetadata>( @@ -1093,7 +1093,7 @@ TEST_F(VPRecipeTest, CastVPWidenCastRecipeToVPUser) { IntegerType *Int64 = IntegerType::get(C, 64); auto *Cast = CastInst::CreateZExtOrBitCast(PoisonValue::get(Int32), Int64); VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); - VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast, {}); + VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, Cast); checkVPRecipeCastImpl<VPWidenCastRecipe, VPUser, VPIRMetadata>(&Recipe); delete Cast; @@ -1264,7 +1264,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { SmallVector<VPValue *, 2> Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe Recipe(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe Recipe(*AI, Args); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1283,7 +1283,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(Op3); - VPWidenSelectRecipe Recipe(*SelectI, make_range(Args.begin(), Args.end())); + VPWidenSelectRecipe Recipe(SelectI, make_range(Args.begin(), Args.end())); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1412,7 +1412,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(CalledFn); - VPWidenCallRecipe Recipe(Call, TheFn, Args); + VPWidenCallRecipe Recipe(Call, TheFn, Args, VPIRFlags(), VPIRMetadata()); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1468,8 +1468,7 @@ TEST_F(VPRecipeTest, dumpRecipeInPlan) { VPValue *ExtVPV2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); Args.push_back(ExtVPV1); Args.push_back(ExtVPV2); - VPWidenRecipe *WidenR = - new VPWidenRecipe(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe *WidenR = new VPWidenRecipe(*AI, Args); VPBB1->appendRecipe(WidenR); { diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index d92c7e450c67..768ca46a9bed 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -30,13 +30,6 @@ extern "C" { -/// Definitions -///{ - -#define omp_invalid_device -2 - -///} - /// Type declarations ///{ diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index 00910704a979..fbb4a06accf8 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -270,8 +270,6 @@ extern "C" { void ompx_dump_mapping_tables(void); int omp_get_num_devices(void); int omp_get_device_num(void); -int omp_get_device_from_uid(const char *DeviceUid); -const char *omp_get_uid_from_device(int DeviceNum); int omp_get_initial_device(void); void *omp_target_alloc(size_t Size, int DeviceNum); void omp_target_free(void *DevicePtr, int DeviceNum); diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 6e85e5764449..dd83a3ccd08e 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -40,8 +40,6 @@ EXTERN void ompx_dump_mapping_tables() { using namespace llvm::omp::target::ompt; #endif -using GenericDeviceTy = llvm::omp::target::plugin::GenericDeviceTy; - void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name); void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, @@ -70,62 +68,6 @@ EXTERN int omp_get_device_num(void) { return HostDevice; } -static inline bool is_initial_device_uid(const char *DeviceUid) { - return strcmp(DeviceUid, GenericPluginTy::getHostDeviceUid()) == 0; -} - -EXTERN int omp_get_device_from_uid(const char *DeviceUid) { - TIMESCOPE(); - OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - - if (!DeviceUid) { - DP("Call to omp_get_device_from_uid returning omp_invalid_device\n"); - return omp_invalid_device; - } - if (is_initial_device_uid(DeviceUid)) { - DP("Call to omp_get_device_from_uid returning initial device number %d\n", - omp_get_initial_device()); - return omp_get_initial_device(); - } - - int DeviceNum = omp_invalid_device; - - auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor(); - for (const DeviceTy &Device : PM->devices(ExclusiveDevicesAccessor)) { - const char *Uid = Device.RTL->getDevice(Device.RTLDeviceID).getDeviceUid(); - if (Uid && strcmp(DeviceUid, Uid) == 0) { - DeviceNum = Device.DeviceID; - break; - } - } - - DP("Call to omp_get_device_from_uid returning %d\n", DeviceNum); - return DeviceNum; -} - -EXTERN const char *omp_get_uid_from_device(int DeviceNum) { - TIMESCOPE(); - OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - - if (DeviceNum == omp_invalid_device) { - DP("Call to omp_get_uid_from_device returning nullptr\n"); - return nullptr; - } - if (DeviceNum == omp_get_initial_device()) { - DP("Call to omp_get_uid_from_device returning initial device UID\n"); - return GenericPluginTy::getHostDeviceUid(); - } - - auto DeviceOrErr = PM->getDevice(DeviceNum); - if (!DeviceOrErr) - FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); - - const char *Uid = - DeviceOrErr->RTL->getDevice(DeviceOrErr->RTLDeviceID).getDeviceUid(); - DP("Call to omp_get_uid_from_device returning %s\n", Uid); - return Uid; -} - EXTERN int omp_get_initial_device(void) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2ebc23e3cf60..910a5b6c827a 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -40,8 +40,6 @@ VERS1.0 { omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; - omp_get_device_from_uid; - omp_get_uid_from_device; omp_get_initial_device; omp_target_alloc; omp_target_free; diff --git a/offload/test/api/omp_device_uid.c b/offload/test/api/omp_device_uid.c deleted file mode 100644 index 2a41d8d04ef8..000000000000 --- a/offload/test/api/omp_device_uid.c +++ /dev/null @@ -1,76 +0,0 @@ -// RUN: %libomptarget-compile-run-and-check-generic - -#include <omp.h> -#include <stdio.h> -#include <string.h> - -int test_omp_device_uid(int device_num) { - const char *device_uid = omp_get_uid_from_device(device_num); - if (device_uid == NULL) { - printf("FAIL for device %d: omp_get_uid_from_device returned NULL\n", - device_num); - return 0; - } - - int device_num_from_uid = omp_get_device_from_uid(device_uid); - if (device_num_from_uid != device_num) { - printf( - "FAIL for device %d: omp_get_device_from_uid returned %d (UID: %s)\n", - device_num, device_num_from_uid, device_uid); - return 0; - } - - if (device_num == omp_get_initial_device()) - return 1; - - int success = 1; - -// Note that the following code may be executed on the host if the host is the -// device -#pragma omp target map(tofrom : success) device(device_num) - { - int device_num = omp_get_device_num(); - - // omp_get_uid_from_device() in the device runtime is a dummy function - // returning NULL - const char *device_uid = omp_get_uid_from_device(device_num); - - // omp_get_device_from_uid() in the device runtime is a dummy function - // returning omp_invalid_device. - int device_num_from_uid = omp_get_device_from_uid(device_uid); - - // Depending on whether we're executing on the device or the host, we either - // got NULL as the device UID or the correct device UID. Consequently, - // omp_get_device_from_uid() either returned omp_invalid_device or the - // correct device number (aka omp_get_initial_device()). - if (device_uid ? device_num_from_uid != device_num - : device_num_from_uid != omp_invalid_device) { - printf("FAIL for device %d (target): omp_get_device_from_uid returned %d " - "(UID: %s)\n", - device_num, device_num_from_uid, device_uid); - success = 0; - } - } - - return success; -} - -int main() { - int num_devices = omp_get_num_devices(); - int num_failed = 0; - // (also test initial device aka num_devices) - for (int i = 0; i < num_devices + 1; i++) { - if (!test_omp_device_uid(i)) { - printf("FAIL for device %d\n", i); - num_failed++; - } - } - if (num_failed) { - printf("FAIL\n"); - return 1; - } - printf("PASS\n"); - return 0; -} - -// CHECK: PASS diff --git a/openmp/device/include/DeviceTypes.h b/openmp/device/include/DeviceTypes.h index 213ccfe58b4f..2e5d92380f04 100644 --- a/openmp/device/include/DeviceTypes.h +++ b/openmp/device/include/DeviceTypes.h @@ -21,9 +21,6 @@ template <typename T> using Constant = __gpu_constant T; template <typename T> using Local = __gpu_local T; template <typename T> using Global = __gpu_local T; -// See definition in OpenMP (omp.h.var/omp_lib.(F90|h).var) -#define omp_invalid_device -2 - enum omp_proc_bind_t { omp_proc_bind_false = 0, omp_proc_bind_true = 1, diff --git a/openmp/device/include/Interface.h b/openmp/device/include/Interface.h index 71c3b1fc06d4..c4bfaaa2404b 100644 --- a/openmp/device/include/Interface.h +++ b/openmp/device/include/Interface.h @@ -130,10 +130,6 @@ int omp_get_num_devices(void); int omp_get_device_num(void); -int omp_get_device_from_uid(const char *DeviceUid); - -const char *omp_get_uid_from_device(int DeviceNum); - int omp_get_num_teams(void); int omp_get_team_num(); diff --git a/openmp/device/src/State.cpp b/openmp/device/src/State.cpp index 985e6b169137..9f38cf26f8c6 100644 --- a/openmp/device/src/State.cpp +++ b/openmp/device/src/State.cpp @@ -403,12 +403,6 @@ int omp_get_num_devices(void) { return config::getNumDevices(); } int omp_get_device_num(void) { return config::getDeviceNum(); } -int omp_get_device_from_uid(const char *DeviceUid) { - return omp_invalid_device; -} - -const char *omp_get_uid_from_device(int DeviceNum) { return nullptr; } - int omp_get_num_teams(void) { return mapping::getNumberOfBlocksInKernel(); } int omp_get_team_num() { return mapping::getBlockIdInKernel(); } diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports index 00becd1a657f..3983dae80c9f 100644 --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -544,8 +544,6 @@ kmp_set_disp_num_buffers 890 omp_get_devices_all_allocator 819 omp_get_memspace_num_resources 820 omp_get_submemspace 821 - omp_get_device_from_uid 822 - omp_get_uid_from_device 823 %ifndef stub __kmpc_set_default_allocator __kmpc_get_default_allocator diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var index e98df731ad88..74f385feb3ea 100644 --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -536,11 +536,6 @@ /* OpenMP 5.2 */ extern int __KAI_KMPC_CONVENTION omp_in_explicit_task(void); - #define omp_invalid_device -2 - - /* OpenMP 6.0 */ - extern int __KAI_KMPC_CONVENTION omp_get_device_from_uid(const char *DeviceUid); - extern const char * __KAI_KMPC_CONVENTION omp_get_uid_from_device(int DeviceNum); /* LLVM Extensions */ extern void *llvm_omp_target_dynamic_shared_alloc(void); diff --git a/openmp/runtime/src/include/omp_lib.F90.var b/openmp/runtime/src/include/omp_lib.F90.var index 159b42ab5b5c..90d7e49ebf54 100644 --- a/openmp/runtime/src/include/omp_lib.F90.var +++ b/openmp/runtime/src/include/omp_lib.F90.var @@ -215,8 +215,6 @@ integer (kind=omp_interop_kind), parameter, public :: omp_interop_none = 0 - integer (kind=omp_integer_kind), parameter, public :: omp_invalid_device = -2 - interface ! *** @@ -419,18 +417,6 @@ integer (kind=omp_integer_kind) omp_get_device_num end function omp_get_device_num - function omp_get_uid_from_device(device_num) bind(c) - use omp_lib_kinds - integer (kind=omp_integer_kind), value :: device_num - character (len=*) omp_get_uid_from_device - end function omp_get_uid_from_device - - function omp_get_device_from_uid(device_uid) bind(c) - use omp_lib_kinds - character (len=*), value :: device_uid - integer (kind=omp_integer_kind) omp_get_device_from_uid - end function omp_get_device_from_uid - function omp_pause_resource(kind, device_num) bind(c) use omp_lib_kinds integer (kind=omp_pause_resource_kind), value :: kind diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var index 468eb03e99ef..a50bb018c7cc 100644 --- a/openmp/runtime/src/include/omp_lib.h.var +++ b/openmp/runtime/src/include/omp_lib.h.var @@ -291,9 +291,6 @@ integer(kind=omp_interop_kind)omp_interop_none parameter(omp_interop_none=0) - integer(kind=omp_integer_kind)omp_invalid_device - parameter(omp_invalid_device=-2) - interface ! *** @@ -489,18 +486,6 @@ integer (kind=omp_integer_kind) omp_get_device_num end function omp_get_device_num - function omp_get_uid_from_device(device_num) bind(c) - import - integer (kind=omp_integer_kind), value :: device_num - character (len=*) omp_get_uid_from_device - end function omp_get_uid_from_device - - function omp_get_device_from_uid(device_uid) bind(c) - import - character (len=*), value :: device_uid - integer (kind=omp_integer_kind) omp_get_device_from_uid - end function omp_get_device_from_uid - function omp_pause_resource(kind, device_num) bind(c) import integer (kind=omp_pause_resource_kind), value :: kind @@ -1174,8 +1159,6 @@ !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_initial_device !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_num_devices !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_device_num -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_uid_from_device -!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_device_from_uid !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_pause_resource !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_pause_resource_all !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_supported_active_levels @@ -1259,8 +1242,6 @@ !$omp declare target(omp_get_initial_device ) !$omp declare target(omp_get_num_devices ) !$omp declare target(omp_get_device_num ) -!$omp declare target(omp_get_uid_from_device ) -!$omp declare target(omp_get_device_from_uid ) !$omp declare target(omp_pause_resource ) !$omp declare target(omp_pause_resource_all ) !$omp declare target(omp_get_supported_active_levels ) diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 49c56d2b9a76..2b0063eb23a0 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -1543,38 +1543,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) { #endif } -// These functions will be defined in libomptarget. When libomptarget is not -// loaded, we assume we are on the host. +// This function will be defined in libomptarget. When libomptarget is not +// loaded, we assume we are on the host and return KMP_HOST_DEVICE. // Compiler/libomptarget will handle this if called inside target. int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE_EXTERNAL; int FTN_STDCALL FTN_GET_DEVICE_NUM(void) { return KMP_EXPAND_NAME(FTN_GET_INITIAL_DEVICE)(); } -const char *FTN_STDCALL FTN_GET_UID_FROM_DEVICE(int device_num) - KMP_WEAK_ATTRIBUTE_EXTERNAL; -const char *FTN_STDCALL FTN_GET_UID_FROM_DEVICE(int device_num) { -#if KMP_OS_DARWIN || KMP_OS_WASI || defined(KMP_STUB) - return nullptr; -#else - const char *(*fptr)(int); - if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_uid_from_device"))) - return (*fptr)(device_num); - // Returns the same string as used by libomptarget - return "HOST"; -#endif -} -int FTN_STDCALL FTN_GET_DEVICE_FROM_UID(const char *device_uid) - KMP_WEAK_ATTRIBUTE_EXTERNAL; -int FTN_STDCALL FTN_GET_DEVICE_FROM_UID(const char *device_uid) { -#if KMP_OS_DARWIN || KMP_OS_WASI || defined(KMP_STUB) - return omp_invalid_device; -#else - int (*fptr)(const char *); - if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_device_from_uid"))) - return (*fptr)(device_uid); - return KMP_EXPAND_NAME(FTN_GET_INITIAL_DEVICE)(); -#endif -} // Compiler will ensure that this is only called from host in sequential region int FTN_STDCALL KMP_EXPAND_NAME(FTN_PAUSE_RESOURCE)(kmp_pause_status_t kind, diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h index c439a058f22b..ae0ed067235e 100644 --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -140,8 +140,6 @@ #define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources #define FTN_GET_SUBMEMSPACE omp_get_submemspace #define FTN_GET_DEVICE_NUM omp_get_device_num -#define FTN_GET_UID_FROM_DEVICE omp_get_uid_from_device -#define FTN_GET_DEVICE_FROM_UID omp_get_device_from_uid #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format #define FTN_DISPLAY_AFFINITY omp_display_affinity @@ -291,8 +289,6 @@ #define FTN_ALLOC omp_alloc_ #define FTN_FREE omp_free_ #define FTN_GET_DEVICE_NUM omp_get_device_num_ -#define FTN_GET_UID_FROM_DEVICE omp_get_uid_from_device_ -#define FTN_GET_DEVICE_FROM_UID omp_get_device_from_uid_ #define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_ #define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_ #define FTN_DISPLAY_AFFINITY omp_display_affinity_ @@ -440,8 +436,6 @@ #define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES #define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM -#define FTN_GET_UID_FROM_DEVICE OMP_GET_UID_FROM_DEVICE -#define FTN_GET_DEVICE_FROM_UID OMP_GET_DEVICE_FROM_UID #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT #define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY @@ -591,8 +585,6 @@ #define FTN_ALLOC OMP_ALLOC_ #define FTN_FREE OMP_FREE_ #define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM_ -#define FTN_GET_UID_FROM_DEVICE OMP_GET_UID_FROM_DEVICE_ -#define FTN_GET_DEVICE_FROM_UID OMP_GET_DEVICE_FROM_UID_ #define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_ #define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_ #define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_ diff --git a/openmp/runtime/test/api/omp_device_uid.c b/openmp/runtime/test/api/omp_device_uid.c deleted file mode 100644 index 40a1cbb644c7..000000000000 --- a/openmp/runtime/test/api/omp_device_uid.c +++ /dev/null @@ -1,77 +0,0 @@ -// RUN: %libomp-compile-and-run 2>&1 | FileCheck %s -// Linking fails for icc 18 -// UNSUPPORTED: icc-18 - -#include <omp_testsuite.h> -#include <string.h> - -int test_omp_device_uid(int device_num) { - const char *device_uid = omp_get_uid_from_device(device_num); - if (device_uid == NULL) { - printf("FAIL for device %d: omp_get_uid_from_device returned NULL\n", - device_num); - return 0; - } - - int device_num_from_uid = omp_get_device_from_uid(device_uid); - if (device_num_from_uid != device_num) { - printf( - "FAIL for device %d: omp_get_device_from_uid returned %d (UID: %s)\n", - device_num, device_num_from_uid, device_uid); - return 0; - } - - if (device_num == omp_get_initial_device()) - return 1; - - int success = 1; - -// Note that the following code may be executed on the host if the host is the -// device -#pragma omp target map(tofrom : success) device(device_num) - { - int device_num = omp_get_device_num(); - - // omp_get_uid_from_device() in the device runtime is a dummy function - // returning NULL - const char *device_uid = omp_get_uid_from_device(device_num); - - // omp_get_device_from_uid() in the device runtime is a dummy function - // returning omp_invalid_device. - int device_num_from_uid = omp_get_device_from_uid(device_uid); - - // Depending on whether we're executing on the device or the host, we either - // got NULL as the device UID or the correct device UID. Consequently, - // omp_get_device_from_uid() either returned omp_invalid_device or the - // correct device number (aka omp_get_initial_device()). - if (device_uid ? device_num_from_uid != device_num - : device_num_from_uid != omp_invalid_device) { - printf("FAIL for device %d (target): omp_get_device_from_uid returned %d " - "(UID: %s)\n", - device_num, device_num_from_uid, device_uid); - success = 0; - } - } - - return success; -} - -int main() { - int num_devices = omp_get_num_devices(); - int num_failed = 0; - // (also test initial device aka num_devices) - for (int i = 0; i < num_devices + 1; i++) { - if (!test_omp_device_uid(i)) { - printf("FAIL for device %d\n", i); - num_failed++; - } - } - if (num_failed) { - printf("FAIL\n"); - return 1; - } - printf("PASS\n"); - return 0; -} - -// CHECK: PASS |
