diff options
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 50 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/store-float-conversion.ll | 117 |
2 files changed, 167 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4f13a14d2464..3cbba25f7729 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24016,6 +24016,54 @@ static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG, Store->getMemOperand()); } +// Combine store (fp_to_int X) with optional extensions/trunctions to use vector +// semantics when NEON is available. +static void combineFPToIntStore(StoreSDNode *ST, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!Subtarget->isNeonAvailable()) + return; + + SDValue Value = ST->getValue(); + + // Peel + for (;;) { + if (!Value->hasOneUse()) + break; + if (!ISD::isExtOpcode(Value.getOpcode()) && + Value.getOpcode() != ISD::TRUNCATE && !Value->isAssert()) + break; + Value = Value.getOperand(0); + } + + if (Value.getOpcode() != ISD::FP_TO_UINT && + Value.getOpcode() != ISD::FP_TO_SINT) + return; + if (!Value.hasOneUse()) + return; + + SDValue FPSrc = Value.getOperand(0); + EVT SrcVT = FPSrc.getValueType(); + if (SrcVT.isVector()) + return; + + EVT VecSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, 1); + EVT DstVT = MVT::getIntegerVT(SrcVT.getScalarSizeInBits()); + EVT VecDstVT = EVT::getVectorVT(*DAG.getContext(), DstVT, 1); + SDLoc DL(ST); + SDValue UndefVec = DAG.getUNDEF(VecSrcVT); + SDValue Zero = DAG.getConstant(0, DL, MVT::i64); + SDValue VecFP = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecSrcVT, UndefVec, FPSrc, Zero); + + SDValue VecConv = DAG.getNode(Value.getOpcode(), DL, VecDstVT, VecFP); + SDValue Extracted = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DstVT, VecConv, Zero); + + DCI.CombineTo(Value.getNode(), Extracted); +} + bool isHalvingTruncateOfLegalScalableType(EVT SrcVT, EVT DstVT) { return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) || (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) || @@ -24098,6 +24146,8 @@ static SDValue performSTORECombine(SDNode *N, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc DL(ST); + combineFPToIntStore(ST, DCI, DAG, Subtarget); + auto hasValidElementTypeForFPTruncStore = [](EVT VT) { EVT EltVT = VT.getVectorElementType(); return EltVT == MVT::f32 || EltVT == MVT::f64; diff --git a/llvm/test/CodeGen/AArch64/store-float-conversion.ll b/llvm/test/CodeGen/AArch64/store-float-conversion.ll new file mode 100644 index 000000000000..ca12fcb1dcc1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/store-float-conversion.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=aarch64 < %s | FileCheck %s + +define void @f32_to_u8(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_u8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: str b0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui float %f to i32 + %trunc = trunc i32 %conv to i8 + store i8 %trunc, ptr %dst + ret void +} + +define void @f32_to_s8(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: str b0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i8 + store i8 %trunc, ptr %dst + ret void +} + +define void @f32_to_u16(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui float %f to i32 + %trunc = trunc i32 %conv to i16 + store i16 %trunc, ptr %dst + ret void +} + +define void @f32_to_s16(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i16 + store i16 %trunc, ptr %dst + ret void +} + +define void @f32_to_u32(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui float %f to i32 + store i32 %conv, ptr %dst + ret void +} + +define void @f32_to_s32(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + store i32 %conv, ptr %dst + ret void +} + +define void @f64_to_u64(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptoui double %d to i64 + store i64 %conv, ptr %dst + ret void +} + +define void @f64_to_s64(double %d, ptr %dst) { +; CHECK-LABEL: f64_to_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret +entry: + %conv = fptosi double %d to i64 + store i64 %conv, ptr %dst + ret void +} + +define i32 @f32_to_i32_multiple_uses(float %f, ptr %dst) { +; CHECK-LABEL: f32_to_i32_multiple_uses: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: strb w8, [x9] +; CHECK-NEXT: ret +entry: + %conv = fptosi float %f to i32 + %trunc = trunc i32 %conv to i8 + store i8 %trunc, ptr %dst + ret i32 %conv +} |
