summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2024-07-01 20:16:22 +0200
committerGitHub <noreply@github.com>2024-07-01 20:16:22 +0200
commitd3e7c4ce7a3d7f08cea02cba8f34c590a349688b (patch)
treebadf067481bc0909ca88841e7471fc9296246030 /llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
parent658c03d5879fb78c8d6e0e43b97bc0e0027826d5 (diff)
AMDGPU: Use real copysign in fast pow (#97152)
Previously this would introduce some codegen regressions, but those have been avoided by simplifying demanded bits on copysign operations.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp7
1 files changed, 4 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 456f3cb332cf..27fa67ce5b45 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1131,17 +1131,18 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
if (needcopysign) {
Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
Type *nTy = FPOp->getType()->getWithNewType(nTyS);
- unsigned size = nTy->getScalarSizeInBits();
Value *opr_n = FPOp->getOperand(1);
if (opr_n->getType()->getScalarType()->isIntegerTy())
opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
else
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
+ unsigned size = nTy->getScalarSizeInBits();
Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
- nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
- nval = B.CreateBitCast(nval, opr0->getType());
+
+ nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
+ nullptr, "__pow_sign");
}
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "