summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2024-05-21 07:57:04 +0200
committerGitHub <noreply@github.com>2024-05-21 07:57:04 +0200
commit66b76faffb211b3cb2d58e3ab9401e6396447de9 (patch)
tree0bf9898fc3d49c2a6eb05c34f5bc24d4f2049394 /llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
parente411c88b7223d87520af819fdc012c9dbb46e575 (diff)
AMDGPU: Directly emit sqrt intrinsic when folding rootn(x, 2) (#92598)
This avoids depending on pre/post link runs. Depends #92595
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp36
1 files changed, 25 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 47de1791dae3..aab79ceb57f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include <cmath>
@@ -1175,17 +1176,30 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
return true;
}
- Module *M = Parent->getParent();
- if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
- if (FunctionCallee FPExpr =
- getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
- LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0
- << ")\n");
- Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
- replaceCall(FPOp, nval);
- return true;
- }
- } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
+ Module *M = B.GetInsertBlock()->getModule();
+
+ CallInst *CI = cast<CallInst>(FPOp);
+ if (ci_opr1 == 2 &&
+ shouldReplaceLibcallWithIntrinsic(CI,
+ /*AllowMinSizeF32=*/true,
+ /*AllowF64=*/true)) {
+ // rootn(x, 2) = sqrt(x)
+ LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
+
+ CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
+ NewCall->takeName(CI);
+
+ // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
+ // metadata.
+ MDBuilder MDHelper(M->getContext());
+ MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
+ NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
+
+ replaceCall(CI, NewCall);
+ return true;
+ }
+
+ if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
if (FunctionCallee FPExpr =
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0