diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2024-07-01 20:16:22 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-07-01 20:16:22 +0200 |
| commit | d3e7c4ce7a3d7f08cea02cba8f34c590a349688b (patch) | |
| tree | badf067481bc0909ca88841e7471fc9296246030 /llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | |
| parent | 658c03d5879fb78c8d6e0e43b97bc0e0027826d5 (diff) | |
AMDGPU: Use real copysign in fast pow (#97152)
Previously this would introduce some codegen regressions, but
those have been avoided by simplifying demanded bits on copysign
operations.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 456f3cb332cf..27fa67ce5b45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1131,17 +1131,18 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, if (needcopysign) { Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits()); Type *nTy = FPOp->getType()->getWithNewType(nTyS); - unsigned size = nTy->getScalarSizeInBits(); Value *opr_n = FPOp->getOperand(1); if (opr_n->getType()->getScalarType()->isIntegerTy()) opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou"); else opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); + unsigned size = nTy->getScalarSizeInBits(); Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); - nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); - nval = B.CreateBitCast(nval, opr0->getType()); + + nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()), + nullptr, "__pow_sign"); } LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " |
