diff options
| author | NagaChaitanya Vellanki <pnagato@protonmail.com> | 2025-10-24 04:09:38 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-24 11:09:38 +0000 |
| commit | 2db482d4ea97d4b2a690775655534c2b48695319 (patch) | |
| tree | 81916778a8c5f1f6934d91d411aa24639380d13a /clang/lib/AST/ExprConstant.cpp | |
| parent | 9ef60ff7ff187f5d80e745d3047d0f0b1e684cac (diff) | |
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow shufps/pd shuffles intrinsics to be used in constexpr (#164078)
Resolves #161208
Diffstat (limited to 'clang/lib/AST/ExprConstant.cpp')
| -rw-r--r-- | clang/lib/AST/ExprConstant.cpp | 84 |
1 files changed, 83 insertions, 1 deletions
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ae698788aaa2..29ee08950512 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11619,6 +11619,39 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, return true; } +static bool evalShuffleGeneric( + EvalInfo &Info, const CallExpr *Call, APValue &Out, + llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> + GetSourceIndex) { + + const auto *VT = Call->getType()->getAs<VectorType>(); + if (!VT) + return false; + + APSInt MaskImm; + if (!EvaluateInteger(Call->getArg(2), MaskImm, Info)) + return false; + unsigned ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue()); + + APValue A, B; + if (!EvaluateAsRValue(Info, Call->getArg(0), A) || + !EvaluateAsRValue(Info, Call->getArg(1), B)) + return false; + + unsigned NumElts = VT->getNumElements(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(NumElts); + + for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { + auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); + const APValue &Src = (SrcVecIdx == 0) ? A : B; + ResultElements.push_back(Src.getVectorElt(SrcIdx)); + } + + Out = APValue(ResultElements.data(), ResultElements.size()); + return true; +} + static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call, APValue &Out) { APValue SrcVec, ControlVec; @@ -12398,7 +12431,56 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } - + case X86::BI__builtin_ia32_shufps: + case X86::BI__builtin_ia32_shufps256: + case X86::BI__builtin_ia32_shufps512: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, + unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + constexpr unsigned LaneBits = 128u; + unsigned NumElemPerLane = LaneBits / 32; + unsigned NumSelectableElems = NumElemPerLane / 2; + unsigned BitsPerElem = 2; + unsigned IndexMask = (1u << BitsPerElem) - 1; + unsigned MaskBits = 8; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned ElemInLane = DstIdx % NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; + unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; + return {SrcIdx, LaneOffset + Index}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_shufpd: + case X86::BI__builtin_ia32_shufpd256: + case X86::BI__builtin_ia32_shufpd512: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, + unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { + constexpr unsigned LaneBits = 128u; + unsigned NumElemPerLane = LaneBits / 64; + unsigned NumSelectableElems = NumElemPerLane / 2; + unsigned BitsPerElem = 1; + unsigned IndexMask = (1u << BitsPerElem) - 1; + unsigned MaskBits = 8; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned ElemInLane = DstIdx % NumElemPerLane; + unsigned LaneOffset = Lane * NumElemPerLane; + unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; + unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; + unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; + return {SrcIdx, LaneOffset + Index}; + })) + return false; + return Success(R, E); + } case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: case X86::BI__builtin_ia32_pshufb512: { |
