diff options
Diffstat (limited to 'clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp')
| -rw-r--r-- | clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 87a46287c402..07cf08c54985 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); } +static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) { + switch (BuiltinID) { + default: + llvm_unreachable("Unknown BuiltinID for wave reduction"); + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64: + return Intrinsic::amdgcn_wave_reduce_add; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64: + return Intrinsic::amdgcn_wave_reduce_sub; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + return Intrinsic::amdgcn_wave_reduce_min; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + return Intrinsic::amdgcn_wave_reduce_umin; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + return Intrinsic::amdgcn_wave_reduce_max; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + return Intrinsic::amdgcn_wave_reduce_umax; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + return Intrinsic::amdgcn_wave_reduce_and; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + return Intrinsic::amdgcn_wave_reduce_or; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: + return Intrinsic::amdgcn_wave_reduce_xor; + } +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: { + Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID); + llvm::Value *Value = EmitScalarExpr(E->getArg(0)); + llvm::Value *Strategy = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()}); + return Builder.CreateCall(F, {Value, Strategy}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out |
