From 7314565281ec28b745502c3f429fd431e16673eb Mon Sep 17 00:00:00 2001 From: Frederik Harwath Date: Mon, 13 Oct 2025 09:18:03 +0200 Subject: [AMDGPU] expand-fp: unify scalarization (NFC) (#158588) Extend the existing "scalarize" function which is used for the fp-integer conversion instruction expansion to BinaryOperator instructions and reuse it for the frem expansion; a similar function for scalarizing BinaryOperator instructions exists in the ExpandLargeDivRem pass and this change is a step towards merging that pass with ExpandFp. Further refactoring: Scalarize directly instead of using the "ReplaceVector" as a worklist, rename "Replace" vector to "Worklist", and hoist a check for unsupported scalable vectors to the top of the instruction visiting loop. --- llvm/lib/CodeGen/ExpandFp.cpp | 135 +++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 80 deletions(-) (limited to 'llvm/lib/CodeGen/ExpandFp.cpp') diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 9cc6c6a706c5..c500357c396c 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -82,7 +82,7 @@ public: } static FRemExpander create(IRBuilder<> &B, Type *Ty) { - assert(canExpandType(Ty)); + assert(canExpandType(Ty) && "Expected supported floating point type"); // The type to use for the computation of the remainder. This may be // wider than the input/result type which affects the ... @@ -356,8 +356,9 @@ Value *FRemExpander::buildFRem(Value *X, Value *Y, static bool expandFRem(BinaryOperator &I, std::optional &SQ) { LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n'); - Type *ReturnTy = I.getType(); - assert(FRemExpander::canExpandType(ReturnTy->getScalarType())); + Type *Ty = I.getType(); + assert(FRemExpander::canExpandType(Ty) && + "Expected supported floating point type"); FastMathFlags FMF = I.getFastMathFlags(); // TODO Make use of those flags for optimization? @@ -368,32 +369,10 @@ static bool expandFRem(BinaryOperator &I, std::optional &SQ) { B.setFastMathFlags(FMF); B.SetCurrentDebugLocation(I.getDebugLoc()); - Type *ElemTy = ReturnTy->getScalarType(); - const FRemExpander Expander = FRemExpander::create(B, ElemTy); - - Value *Ret; - if (ReturnTy->isFloatingPointTy()) - Ret = FMF.approxFunc() - ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) - : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); - else { - auto *VecTy = cast(ReturnTy); - - // This could use SplitBlockAndInsertForEachLane but the interface - // is a bit awkward for a constant number of elements and it will - // boil down to the same code. - // TODO Expand the FRem instruction only once and reuse the code. - Value *Nums = I.getOperand(0); - Value *Denums = I.getOperand(1); - Ret = PoisonValue::get(I.getType()); - for (int I = 0, E = VecTy->getNumElements(); I != E; ++I) { - Value *Num = B.CreateExtractElement(Nums, I); - Value *Denum = B.CreateExtractElement(Denums, I); - Value *Rem = FMF.approxFunc() ? Expander.buildApproxFRem(Num, Denum) - : Expander.buildFRem(Num, Denum, SQ); - Ret = B.CreateInsertElement(Ret, Rem, I); - } - } + const FRemExpander Expander = FRemExpander::create(B, Ty); + Value *Ret = FMF.approxFunc() + ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1)) + : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ); I.replaceAllUsesWith(Ret); Ret->takeName(&I); @@ -939,7 +918,8 @@ static void expandIToFP(Instruction *IToFP) { IToFP->eraseFromParent(); } -static void scalarize(Instruction *I, SmallVectorImpl &Replace) { +static void scalarize(Instruction *I, + SmallVectorImpl &Worklist) { VectorType *VTy = cast(I->getType()); IRBuilder<> Builder(I); @@ -948,12 +928,25 @@ static void scalarize(Instruction *I, SmallVectorImpl &Replace) { Value *Result = PoisonValue::get(VTy); for (unsigned Idx = 0; Idx < NumElements; ++Idx) { Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); - Value *Cast = Builder.CreateCast(cast(I)->getOpcode(), Ext, - I->getType()->getScalarType()); - Result = Builder.CreateInsertElement(Result, Cast, Idx); - if (isa(Cast)) - Replace.push_back(cast(Cast)); + + Value *NewOp = nullptr; + if (auto *BinOp = dyn_cast(I)) + NewOp = Builder.CreateBinOp( + BinOp->getOpcode(), Ext, + Builder.CreateExtractElement(I->getOperand(1), Idx)); + else if (auto *CastI = dyn_cast(I)) + NewOp = Builder.CreateCast(CastI->getOpcode(), Ext, + I->getType()->getScalarType()); + else + llvm_unreachable("Unsupported instruction type"); + + Result = Builder.CreateInsertElement(Result, NewOp, Idx); + if (auto *ScalarizedI = dyn_cast(NewOp)) { + ScalarizedI->copyIRFlags(I, true); + Worklist.push_back(ScalarizedI); + } } + I->replaceAllUsesWith(Result); I->dropAllReferences(); I->eraseFromParent(); @@ -989,10 +982,17 @@ static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) { return TLI.getLibcallName(fremToLibcall(Ty->getScalarType())); } +static void addToWorklist(Instruction &I, + SmallVector &Worklist) { + if (I.getOperand(0)->getType()->isVectorTy()) + scalarize(&I, Worklist); + else + Worklist.push_back(&I); +} + static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC) { - SmallVector Replace; - SmallVector ReplaceVector; + SmallVector Worklist; bool Modified = false; unsigned MaxLegalFpConvertBitWidth = @@ -1003,56 +1003,39 @@ static bool runImpl(Function &F, const TargetLowering &TLI, if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) return false; - for (auto &I : instructions(F)) { - switch (I.getOpcode()) { - case Instruction::FRem: { - Type *Ty = I.getType(); - // TODO: This pass doesn't handle scalable vectors. - if (Ty->isScalableTy()) - continue; - - if (targetSupportsFrem(TLI, Ty) || - !FRemExpander::canExpandType(Ty->getScalarType())) - continue; - - Replace.push_back(&I); - Modified = true; + for (auto It = inst_begin(&F), End = inst_end(F); It != End;) { + Instruction &I = *It++; + Type *Ty = I.getType(); + // TODO: This pass doesn't handle scalable vectors. + if (Ty->isScalableTy()) + continue; + switch (I.getOpcode()) { + case Instruction::FRem: + if (!targetSupportsFrem(TLI, Ty) && + FRemExpander::canExpandType(Ty->getScalarType())) { + addToWorklist(I, Worklist); + Modified = true; + } break; - } case Instruction::FPToUI: case Instruction::FPToSI: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - - auto *IntTy = cast(I.getType()->getScalarType()); + auto *IntTy = cast(Ty->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); + addToWorklist(I, Worklist); Modified = true; break; } case Instruction::UIToFP: case Instruction::SIToFP: { - // TODO: This pass doesn't handle scalable vectors. - if (I.getOperand(0)->getType()->isScalableTy()) - continue; - auto *IntTy = cast(I.getOperand(0)->getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - if (I.getOperand(0)->getType()->isVectorTy()) - ReplaceVector.push_back(&I); - else - Replace.push_back(&I); - Modified = true; + addToWorklist(I, Worklist); break; } default: @@ -1060,16 +1043,8 @@ static bool runImpl(Function &F, const TargetLowering &TLI, } } - while (!ReplaceVector.empty()) { - Instruction *I = ReplaceVector.pop_back_val(); - scalarize(I, Replace); - } - - if (Replace.empty()) - return false; - - while (!Replace.empty()) { - Instruction *I = Replace.pop_back_val(); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); if (I->getOpcode() == Instruction::FRem) { auto SQ = [&]() -> std::optional { if (AC) { -- cgit v1.2.3 From 8cc862ce3bd51d1fe8ff84c35aee03457077ac54 Mon Sep 17 00:00:00 2001 From: Frederik Harwath Date: Mon, 13 Oct 2025 17:21:02 +0200 Subject: [AMDGPU] expand-fp: always report modifications (#163153) The last change to the pass in PR #158588 lost the assignment to the "Modified" variable for one of the pass optimizations. Add it back. This fixes the test failure in `CodeGen/AMDGPU/itofp.i128.bf.ll` (in a `LLVM_ENABLE_EXPENSIVE_CHECKS=ON` build). --- llvm/lib/CodeGen/ExpandFp.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'llvm/lib/CodeGen/ExpandFp.cpp') diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index c500357c396c..04c700869cd6 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -1036,6 +1036,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI, continue; addToWorklist(I, Worklist); + Modified = true; break; } default: -- cgit v1.2.3