summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/X86/combine-gfni.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/combine-gfni.ll')
-rw-r--r--llvm/test/CodeGen/X86/combine-gfni.ll101
1 files changed, 101 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-gfni.ll b/llvm/test/CodeGen/X86/combine-gfni.ll
new file mode 100644
index 000000000000..b105cdf7ea89
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-gfni.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+gfni,+avx512bw | FileCheck %s --check-prefixes=AVX512
+
+define <16 x i8> @gf2p8affineqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+; SSE-LABEL: gf2p8affineqb_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; SSE-NEXT: gf2p8affineqb $11, %xmm1, %xmm1
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pandn %xmm0, %xmm3
+; SSE-NEXT: por %xmm1, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gf2p8affineqb_freeze:
+; AVX: # %bb.0:
+; AVX-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: gf2p8affineqb_freeze:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %xmm2, %k1
+; AVX512-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm0 {%k1}
+; AVX512-NEXT: retq
+ %i = icmp slt <16 x i8> %a2, zeroinitializer
+ %g = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11)
+ %f = freeze <16 x i8> %g
+ %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
+ ret <16 x i8> %r
+}
+
+define <16 x i8> @gf2p8affineinvqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+; SSE-LABEL: gf2p8affineinvqb_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; SSE-NEXT: gf2p8affineinvqb $11, %xmm1, %xmm1
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pandn %xmm0, %xmm3
+; SSE-NEXT: por %xmm1, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gf2p8affineinvqb_freeze:
+; AVX: # %bb.0:
+; AVX-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: gf2p8affineinvqb_freeze:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %xmm2, %k1
+; AVX512-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm0 {%k1}
+; AVX512-NEXT: retq
+ %i = icmp slt <16 x i8> %a2, zeroinitializer
+ %g = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11)
+ %f = freeze <16 x i8> %g
+ %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
+ ret <16 x i8> %r
+}
+
+define <16 x i8> @gf2p8mulb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+; SSE-LABEL: gf2p8mulb_freeze:
+; SSE: # %bb.0:
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; SSE-NEXT: gf2p8mulb %xmm1, %xmm1
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pandn %xmm0, %xmm3
+; SSE-NEXT: por %xmm1, %xmm3
+; SSE-NEXT: movdqa %xmm3, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gf2p8mulb_freeze:
+; AVX: # %bb.0:
+; AVX-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512-LABEL: gf2p8mulb_freeze:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovb2m %xmm2, %k1
+; AVX512-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm0 {%k1}
+; AVX512-NEXT: retq
+ %i = icmp slt <16 x i8> %a2, zeroinitializer
+ %g = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %a1, <16 x i8> %a1)
+ %f = freeze <16 x i8> %g
+ %r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
+ ret <16 x i8> %r
+}
+
+declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
+declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
+declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8)
+declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8)
+declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>)
+declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>)