diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2024-05-21 07:57:04 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-21 07:57:04 +0200 |
| commit | 66b76faffb211b3cb2d58e3ab9401e6396447de9 (patch) | |
| tree | 0bf9898fc3d49c2a6eb05c34f5bc24d4f2049394 /llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | |
| parent | e411c88b7223d87520af819fdc012c9dbb46e575 (diff) | |
AMDGPU: Directly emit sqrt intrinsic when folding rootn(x, 2) (#92598)
This avoids depending on pre/post link runs.
Depends #92595
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 36 |
1 files changed, 25 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 47de1791dae3..aab79ceb57f2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include <cmath> @@ -1175,17 +1176,30 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, return true; } - Module *M = Parent->getParent(); - if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) - if (FunctionCallee FPExpr = - getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { - LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 - << ")\n"); - Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); - replaceCall(FPOp, nval); - return true; - } - } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) + Module *M = B.GetInsertBlock()->getModule(); + + CallInst *CI = cast<CallInst>(FPOp); + if (ci_opr1 == 2 && + shouldReplaceLibcallWithIntrinsic(CI, + /*AllowMinSizeF32=*/true, + /*AllowF64=*/true)) { + // rootn(x, 2) = sqrt(x) + LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n"); + + CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI); + NewCall->takeName(CI); + + // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some + // metadata. + MDBuilder MDHelper(M->getContext()); + MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f)); + NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD); + + replaceCall(CI, NewCall); + return true; + } + + if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) if (FunctionCallee FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0 |
