summaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp')
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp58
1 files changed, 58 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 87a46287c402..07cf08c54985 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+ llvm_unreachable("Unknown BuiltinID for wave reduction");
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+ return Intrinsic::amdgcn_wave_reduce_add;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+ return Intrinsic::amdgcn_wave_reduce_sub;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+ return Intrinsic::amdgcn_wave_reduce_min;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+ return Intrinsic::amdgcn_wave_reduce_umin;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+ return Intrinsic::amdgcn_wave_reduce_max;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+ return Intrinsic::amdgcn_wave_reduce_umax;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+ return Intrinsic::amdgcn_wave_reduce_and;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+ return Intrinsic::amdgcn_wave_reduce_or;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+ return Intrinsic::amdgcn_wave_reduce_xor;
+ }
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
+ Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID);
+ llvm::Value *Value = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Strategy = EmitScalarExpr(E->getArg(1));
+ llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});
+ return Builder.CreateCall(F, {Value, Strategy});
+ }
case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
// Translate from the intrinsics's struct return to the builtin's out