summaryrefslogtreecommitdiff
path: root/llvm/test/Analysis/CostModel/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Analysis/CostModel/ARM')
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-abs.ll76
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll14
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-cmp.ll90
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll436
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-minmax.ll508
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll824
-rw-r--r--llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll858
7 files changed, 1303 insertions, 1503 deletions
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
index cc8f2da57f07..254c191569f8 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -31,55 +30,30 @@ declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1)
define i32 @abs(i32 %arg) {
-; MVE-RECIP-LABEL: 'abs'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'abs'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'abs'
+; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
%V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
index 664b828fa9fd..a12fd00589b6 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -9,8 +9,8 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
define void @v4i32(i32 %index, i32 %TC) {
; CHECK-LABEL: 'v4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
ret void
@@ -18,8 +18,8 @@ define void @v4i32(i32 %index, i32 %TC) {
define void @v8i16(i32 %index, i32 %TC) {
; CHECK-LABEL: 'v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
ret void
@@ -27,8 +27,8 @@ define void @v8i16(i32 %index, i32 %TC) {
define void @v16i8(i32 %index, i32 %TC) {
; CHECK-LABEL: 'v16i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
ret void
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
index fa1cf17b9174..4e6ebbc59396 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll
@@ -1,30 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
define void @icmp() {
; CHECK-LABEL: 'icmp'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2i8 = icmp slt <2 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = icmp slt <4 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = icmp slt <8 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = icmp slt <16 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2i16 = icmp slt <2 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = icmp slt <4 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = icmp slt <8 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2i32 = icmp slt <2 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = icmp slt <4 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v2i128 = icmp slt <2 x i128> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i128 = icmp slt <4 x i128> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %v2i8 = icmp slt <2 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = icmp slt <4 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = icmp slt <8 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = icmp slt <16 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:132 CodeSize:130 Lat:132 SizeLat:132 for: %v32i8 = icmp slt <32 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %v2i16 = icmp slt <2 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = icmp slt <4 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = icmp slt <8 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:68 CodeSize:66 Lat:68 SizeLat:68 for: %v16i16 = icmp slt <16 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %v2i32 = icmp slt <2 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = icmp slt <4 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:34 Lat:36 SizeLat:36 for: %v8i32 = icmp slt <8 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:68 Lat:72 SizeLat:72 for: %v16i32 = icmp slt <16 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:18 Lat:36 SizeLat:36 for: %v2i64 = icmp slt <2 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:36 Lat:72 SizeLat:72 for: %v4i64 = icmp slt <4 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:144 CodeSize:72 Lat:144 SizeLat:144 for: %v8i64 = icmp slt <8 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:68 CodeSize:34 Lat:68 SizeLat:68 for: %v2i128 = icmp slt <2 x i128> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:136 CodeSize:68 Lat:136 SizeLat:136 for: %v4i128 = icmp slt <4 x i128> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v2i8 = icmp slt <2 x i8> undef, undef
%v4i8 = icmp slt <4 x i8> undef, undef
@@ -54,32 +54,32 @@ define void @icmp() {
define void @fcmp() {
; CHECK-MVE-LABEL: 'fcmp'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVE-NEXT: Cost Model: Found costs of 12 for: %v2f16 = fcmp olt <2 x half> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 24 for: %v4f16 = fcmp olt <4 x half> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 48 for: %v8f16 = fcmp olt <8 x half> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 96 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 12 for: %v2f32 = fcmp olt <2 x float> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 24 for: %v4f32 = fcmp olt <4 x float> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 48 for: %v8f32 = fcmp olt <8 x float> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 96 for: %v16f32 = fcmp olt <16 x float> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 12 for: %v2f64 = fcmp olt <2 x double> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 24 for: %v4f64 = fcmp olt <4 x double> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of 48 for: %v8f64 = fcmp olt <8 x double> undef, undef
+; CHECK-MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-MVEFP-LABEL: 'fcmp'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f16 = fcmp olt <2 x half> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f16 = fcmp olt <4 x half> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8f16 = fcmp olt <8 x half> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:68 CodeSize:66 Lat:68 SizeLat:68 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v2f32 = fcmp olt <2 x float> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4f32 = fcmp olt <4 x float> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:36 CodeSize:34 Lat:36 SizeLat:36 for: %v8f32 = fcmp olt <8 x float> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:72 CodeSize:68 Lat:72 SizeLat:72 for: %v16f32 = fcmp olt <16 x float> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v2f64 = fcmp olt <2 x double> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v4f64 = fcmp olt <4 x double> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v8f64 = fcmp olt <8 x double> undef, undef
+; CHECK-MVEFP-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v2f16 = fcmp olt <2 x half> undef, undef
%v4f16 = fcmp olt <4 x half> undef, undef
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
index fa18f4724c39..5a23ebf0c2b8 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll
@@ -1,37 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
define i32 @masked_gather() {
; CHECK-LABEL: 'masked_gather'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32p = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x ptr> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+; CHECK-NEXT: Cost Model: Found costs of 16 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found costs of 6 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found costs of 96 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of 32 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of 6 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of 64 for: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 24 for: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 12 for: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 6 for: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 72 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of 34 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of 192 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of 80 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of 18 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of 160 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 72 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 36 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 18 for: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 320 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 144 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 72 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 36 for: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 18 for: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 12 for: %V4I32p = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x ptr> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0
;
%V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
%V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
@@ -72,32 +72,32 @@ define i32 @masked_gather() {
define i32 @masked_scatter() {
; CHECK-LABEL: 'masked_scatter'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
+; CHECK-NEXT: Cost Model: Found costs of 16 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 6 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 96 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 32 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 6 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 24 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 12 for: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 6 for: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 72 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 34 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 192 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 80 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 18 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 160 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 72 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 36 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 18 for: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 320 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 144 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 72 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 36 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of 18 for: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0
;
call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
@@ -136,29 +136,29 @@ define i32 @masked_scatter() {
define void @gep_v4i32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i32, ptr %base, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i32, ptr %base, <4 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepu = getelementptr i32, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indzext = zext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep2 = getelementptr i32, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indsext = sext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep3 = getelementptr i32, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepu = getelementptr i32, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of 56 for: %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
%res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
@@ -195,29 +195,29 @@ define void @gep_v4i32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4
define void @gep_v4f32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr float, ptr %base, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr float, ptr %base, <4 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gepu = getelementptr float, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indzext = zext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gep2 = getelementptr float, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indsext = sext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gep3 = getelementptr float, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gepu = getelementptr float, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of 32 for: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of 32 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
%res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
@@ -254,27 +254,27 @@ define void @gep_v4f32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4
define void @gep_v4i16(ptr %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res1, <4 x ptr> %gep1, i32 2, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, ptr %base, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res2, <4 x ptr> %gep2, i32 2, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, ptr %base, <4 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res3, <4 x ptr> %gep3, i32 2, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i16, ptr %base, <4 x i16> %ind16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res5zext = zext <4 x i16> %res5 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <4 x i32> %res5zext to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <4 x i16> %res6 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of 56 for: %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res1, <4 x ptr> %gep1, i32 2, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indzext = zext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep2 = getelementptr i16, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 56 for: %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res2, <4 x ptr> %gep2, i32 2, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %indsext = sext <4 x i16> %ind16 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep3 = getelementptr i16, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found costs of 56 for: %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 56 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res3, <4 x ptr> %gep3, i32 2, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep5 = getelementptr i16, ptr %base, <4 x i16> %ind16
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res5zext = zext <4 x i16> %res5 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %res5trunc = trunc <4 x i32> %res5zext to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res6sext = sext <4 x i16> %res6 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %res6trunc = trunc <4 x i32> %res6sext to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
%res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
@@ -308,16 +308,16 @@ define void @gep_v4i16(ptr %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %
define void @gep_v4i8(ptr %base, <4 x i8> %ind8, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res5zext = zext <4 x i8> %res5 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <4 x i32> %res5zext to <4 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res6sext = sext <4 x i8> %res6 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %res5zext = zext <4 x i8> %res5 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %res5trunc = trunc <4 x i32> %res5zext to <4 x i8>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %res6sext = sext <4 x i8> %res6 to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %res6trunc = trunc <4 x i32> %res6sext to <4 x i8>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; result zext
%gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
@@ -337,36 +337,36 @@ define void @gep_v4i8(ptr %base, <4 x i8> %ind8, <4 x i1> %mask) {
define void @gep_v8i16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i8> %ind8, <8 x i1> %mask) {
; CHECK-LABEL: 'gep_v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, ptr %base, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, ptr %base, <8 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext4 = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i16, ptr %base, <8 x i32> %indzext4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %indtrunc = trunc <8 x i32> %ind32 to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %indtrunc, <8 x ptr> %gep4, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ressext = sext <8 x i16> %res to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %restrunc = trunc <8 x i32> %ressext to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %restrunc, <8 x ptr> %gep4, i32 4, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of 112 for: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep2 = getelementptr i16, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indsext = sext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep3 = getelementptr i16, ptr %base, <8 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found costs of 112 for: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 112 for: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of 112 for: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of 112 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indzext4 = zext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep4 = getelementptr i16, ptr %base, <8 x i32> %indzext4
+; CHECK-NEXT: Cost Model: Found costs of 16 for: %indtrunc = trunc <8 x i32> %ind32 to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %indtrunc, <8 x ptr> %gep4, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %ressext = sext <8 x i16> %res to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 16 for: %restrunc = trunc <8 x i32> %ressext to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %restrunc, <8 x ptr> %gep4, i32 4, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; no offset ext
%gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
@@ -418,28 +418,28 @@ define void @gep_v8i16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8
define void @gep_v8f16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i1> %mask) {
; CHECK-LABEL: 'gep_v8f16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr half, ptr %base, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr half, ptr %base, <8 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of 64 for: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gep2 = getelementptr half, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %indsext = sext <8 x i16> %ind16 to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gep3 = getelementptr half, ptr %base, <8 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found costs of 64 for: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 64 for: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of 64 for: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of 64 for: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; no offset ext
%gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
@@ -479,17 +479,17 @@ define void @gep_v8f16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8
define void @gep_v8i8(ptr %base, <8 x i8> %ind8, <8 x i1> %mask) {
; CHECK-LABEL: 'gep_v8i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %indzext = zext <8 x i8> %ind8 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, ptr %base, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res5zext = zext <8 x i8> %res5 to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <8 x i16> %res5zext to <8 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res5trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <8 x i8> %res6 to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <8 x i16> %res6sext to <8 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res6trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <8 x i8> %ind8 to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep5 = getelementptr i8, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res5zext = zext <8 x i8> %res5 to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %res5trunc = trunc <8 x i16> %res5zext to <8 x i8>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res5trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %res6sext = sext <8 x i8> %res6 to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %res6trunc = trunc <8 x i16> %res6sext to <8 x i8>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res6trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; result zext
%indzext = zext <8 x i8> %ind8 to <8 x i32>
@@ -510,26 +510,26 @@ define void @gep_v8i8(ptr %base, <8 x i8> %ind8, <8 x i1> %mask) {
define void @gep_v16i8(ptr %base, ptr %base16, <16 x i8> %ind8, <16 x i32> %ind32, <16 x i1> %mask) {
; CHECK-LABEL: 'gep_v16i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res1, <16 x ptr> %gep1, i32 2, <16 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indzext = zext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i8, ptr %base, <16 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res2, <16 x ptr> %gep2, i32 2, <16 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indsext = sext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i8, ptr %base, <16 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res3, <16 x ptr> %gep3, i32 2, <16 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <16 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <16 x ptr> %gepbs to <16 x ptr>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbsb, i32 2, <16 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indzext4 = zext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %indtrunc, <16 x ptr> %gep4, i32 2, <16 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found costs of 224 for: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res1, <16 x ptr> %gep1, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %indzext = zext <16 x i8> %ind8 to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep2 = getelementptr i8, ptr %base, <16 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res2, <16 x ptr> %gep2, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %indsext = sext <16 x i8> %ind8 to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gep3 = getelementptr i8, ptr %base, <16 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found costs of 224 for: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res3, <16 x ptr> %gep3, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbs = getelementptr i16, ptr %base16, <16 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbsb = bitcast <16 x ptr> %gepbs to <16 x ptr>
+; CHECK-NEXT: Cost Model: Found costs of 224 for: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbsb, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %indzext4 = zext <16 x i8> %ind8 to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found costs of 32 for: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8>
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %indtrunc, <16 x ptr> %gep4, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; no offset ext
%gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
@@ -565,10 +565,10 @@ define void @gep_v16i8(ptr %base, ptr %base16, <16 x i8> %ind8, <16 x i32> %ind3
define void @gep_v16i8p(<16 x ptr> %base, i32 %off, <16 x i1> %mask) {
; CHECK-LABEL: 'gep_v16i8p'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbs, i32 2, <16 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 0 for: %gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
+; CHECK-NEXT: Cost Model: Found costs of 224 for: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of 224 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbs, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
%resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
index 01341e4dcb64..e4cc8fed5052 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=MVE-RECIP,MVEI-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=MVE-SIZE,MVEI-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE-RECIP,MVEF-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE-SIZE,MVEF-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=MVE,MVEI
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=MVE,MVEF
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -33,55 +31,30 @@ declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>)
define i32 @smin(i32 %arg) {
-; MVE-RECIP-LABEL: 'smin'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'smin'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'smin'
+; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -140,55 +113,30 @@ declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>)
define i32 @smax(i32 %arg) {
-; MVE-RECIP-LABEL: 'smax'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'smax'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'smax'
+; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -248,55 +196,30 @@ declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>)
define i32 @umin(i32 %arg) {
-; MVE-RECIP-LABEL: 'umin'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'umin'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'umin'
+; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -354,56 +277,31 @@ declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>)
-define i32 @sub(i32 %arg) {
-; MVE-RECIP-LABEL: 'sub'
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; MVE-SIZE-LABEL: 'sub'
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
-; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+define i32 @umax(i32 %arg) {
+; MVE-LABEL: 'umax'
+; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:38 CodeSize:19 Lat:38 SizeLat:38 for: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:76 CodeSize:38 Lat:76 SizeLat:76 for: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:152 CodeSize:76 Lat:152 SizeLat:152 for: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:2 SizeLat:2 for: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:22 CodeSize:11 Lat:22 SizeLat:22 for: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
;
%I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
%V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -455,77 +353,41 @@ declare <16 x half> @llvm.minnum.v16f16(<16 x half>, <16 x half>)
declare <32 x half> @llvm.minnum.v32f16(<32 x half>, <32 x half>)
define float @minnum(float %arg) {
-; MVEI-RECIP-LABEL: 'minnum'
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
+; MVEI-LABEL: 'minnum'
+; MVEI-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.minnum.f64(double undef, double undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F16 = call half @llvm.minnum.f16(half undef, half undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:352 CodeSize:64 Lat:352 SizeLat:352 for: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
;
-; MVEI-SIZE-LABEL: 'minnum'
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
-;
-; MVEF-RECIP-LABEL: 'minnum'
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
-;
-; MVEF-SIZE-LABEL: 'minnum'
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
+; MVEF-LABEL: 'minnum'
+; MVEF-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.minnum.f64(double undef, double undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEF-NEXT: Cost Model: Found costs of 1 for: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of 1 for: %F16 = call half @llvm.minnum.f16(half undef, half undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
;
%F64 = call double @llvm.minnum.f64(double undef, double undef)
%V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
@@ -567,77 +429,41 @@ declare <16 x half> @llvm.maxnum.v16f16(<16 x half>, <16 x half>)
declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>)
define float @maxnum(float %arg) {
-; MVEI-RECIP-LABEL: 'maxnum'
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
-;
-; MVEI-SIZE-LABEL: 'maxnum'
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
-;
-; MVEF-RECIP-LABEL: 'maxnum'
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
+; MVEI-LABEL: 'maxnum'
+; MVEI-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:176 CodeSize:32 Lat:176 SizeLat:176 for: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:352 CodeSize:64 Lat:352 SizeLat:352 for: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEI-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
;
-; MVEF-SIZE-LABEL: 'maxnum'
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
-; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
+; MVEF-LABEL: 'maxnum'
+; MVEF-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %F64 = call double @llvm.maxnum.f64(double undef, double undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:22 CodeSize:4 Lat:22 SizeLat:22 for: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:44 CodeSize:8 Lat:44 SizeLat:44 for: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:88 CodeSize:16 Lat:88 SizeLat:88 for: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; MVEF-NEXT: Cost Model: Found costs of 1 for: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; MVEF-NEXT: Cost Model: Found costs of 1 for: %F16 = call half @llvm.maxnum.f16(half undef, half undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef)
+; MVEF-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret float undef
;
%F64 = call double @llvm.maxnum.f64(double undef, double undef)
%V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll b/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll
index ef0b28ea2604..bd854611080d 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll
@@ -1,60 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF2
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -mve-max-interleave-factor=4 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UF4
define void @vld2(ptr %p) {
; CHECK-LABEL: 'vld2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <4 x i8>, ptr %p, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <8 x i8>, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = load <16 x i8>, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = load <32 x i8>, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <4 x i16>, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = load <8 x i16>, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = load <16 x i16>, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16 = load <32 x i16>, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = load <4 x i32>, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = load <8 x i32>, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = load <16 x i32>, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i32 = load <32 x i32>, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <4 x i64>, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = load <8 x i64>, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v4i64_0 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v4i64_1 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64 = load <16 x i64>, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v8i64_0 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v8i64_1 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i64 = load <32 x i64>, ptr %p, align 256
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1536 for instruction: %v16i64_0 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1536 for instruction: %v16i64_1 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <4 x i8>, ptr %p, align 4
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <8 x i8>, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <16 x i8>, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v32i8 = load <32 x i8>, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <4 x i16>, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <8 x i16>, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <16 x i16>, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v32i16 = load <32 x i16>, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <4 x i32>, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <8 x i32>, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <16 x i32>, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v32i32 = load <32 x i32>, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT: Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <4 x i64>, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <8 x i64>, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v4i64_0 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v4i64_1 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <16 x i64>, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v8i64_0 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v8i64_1 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <32 x i64>, ptr %p, align 256
+; CHECK-NEXT: Cost Model: Found costs of RThru:1536 CodeSize:768 Lat:1536 SizeLat:1536 for: %v16i64_0 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1536 CodeSize:768 Lat:1536 SizeLat:1536 for: %v16i64_1 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v4i8 = load <4 x i8>, ptr %p
%v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
@@ -114,71 +114,71 @@ define void @vld2(ptr %p) {
define void @vld3(ptr %p) {
; CHECK-LABEL: 'vld3'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2i8 = load <6 x i8>, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i8_1 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i8_2 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %v4i8 = load <12 x i8>, ptr %p, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i8_0 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <24 x i8>, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <48 x i8>, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2i16 = load <6 x i16>, ptr %p, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <12 x i16>, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <24 x i16>, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <48 x i16>, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <6 x i32>, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <12 x i32>, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <24 x i32>, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <48 x i32>, ptr %p, align 256
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <6 x i64>, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 1, i32 4>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v2i64_2 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 2, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <12 x i64>, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v4i64_0 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v4i64_1 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v4i64_2 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <24 x i64>, ptr %p, align 256
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v8i64_0 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v8i64_1 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v8i64_2 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <48 x i64>, ptr %p, align 512
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v16i64_0 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v16i64_1 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v16i64_2 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:4 SizeLat:1 for: %v2i8 = load <6 x i8>, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i8_1 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i8_2 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:98 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <12 x i8>, ptr %p, align 16
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i8_0 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <24 x i8>, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <48 x i8>, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:4 SizeLat:1 for: %v2i16 = load <6 x i16>, ptr %p, align 16
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <12 x i16>, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <24 x i16>, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <48 x i16>, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i32 = load <6 x i32>, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <12 x i32>, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <24 x i32>, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <48 x i32>, ptr %p, align 256
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <6 x i64>, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v2i64_2 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <12 x i64>, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v4i64_0 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v4i64_1 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v4i64_2 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <24 x i64>, ptr %p, align 256
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v8i64_0 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v8i64_1 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v8i64_2 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <48 x i64>, ptr %p, align 512
+; CHECK-NEXT: Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v16i64_0 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v16i64_1 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v16i64_2 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v2i8 = load <6 x i8>, ptr %p
%v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3>
@@ -253,170 +253,170 @@ define void @vld3(ptr %p) {
define void @vld4(ptr %p) {
; CHECK-UF2-LABEL: 'vld4'
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 8
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 8
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i8 = load <8 x i8>, ptr %p, align 8
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <16 x i8>, ptr %p, align 8
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <32 x i8>, ptr %p, align 32
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <64 x i8>, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i16 = load <8 x i16>, ptr %p, align 8
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <16 x i16>, ptr %p, align 32
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <32 x i16>, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <64 x i16>, ptr %p, align 128
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i32 = load <8 x i32>, ptr %p, align 32
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:80 Lat:160 SizeLat:160 for: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <16 x i32>, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <32 x i32>, ptr %p, align 128
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <64 x i32>, ptr %p, align 256
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <8 x i64>, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <16 x i64>, ptr %p, align 128
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <32 x i64>, ptr %p, align 256
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <64 x i64>, ptr %p, align 512
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-UF4-LABEL: 'vld4'
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 8
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 8
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i8 = load <8 x i8>, ptr %p, align 8
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v4i8 = load <16 x i8>, ptr %p, align 8
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v8i8 = load <32 x i8>, ptr %p, align 32
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v16i8 = load <64 x i8>, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:258 CodeSize:129 Lat:258 SizeLat:258 for: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:4 SizeLat:1 for: %v2i16 = load <8 x i16>, ptr %p, align 8
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v4i16 = load <16 x i16>, ptr %p, align 32
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v8i16 = load <32 x i16>, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:130 CodeSize:65 Lat:130 SizeLat:130 for: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v16i16 = load <64 x i16>, ptr %p, align 128
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:260 CodeSize:130 Lat:260 SizeLat:260 for: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:1 for: %v2i32 = load <8 x i32>, ptr %p, align 32
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:34 CodeSize:17 Lat:34 SizeLat:34 for: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v4i32 = load <16 x i32>, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:66 CodeSize:33 Lat:66 SizeLat:66 for: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v8i32 = load <32 x i32>, ptr %p, align 128
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:132 CodeSize:66 Lat:132 SizeLat:132 for: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v16i32 = load <64 x i32>, ptr %p, align 256
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:264 CodeSize:132 Lat:264 SizeLat:264 for: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:4 SizeLat:1 for: %v2i64 = load <8 x i64>, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:320 CodeSize:160 Lat:320 SizeLat:320 for: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:4 SizeLat:1 for: %v4i64 = load <16 x i64>, ptr %p, align 128
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:640 CodeSize:320 Lat:640 SizeLat:640 for: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:4 SizeLat:1 for: %v8i64 = load <32 x i64>, ptr %p, align 256
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:1280 CodeSize:640 Lat:1280 SizeLat:1280 for: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:64 CodeSize:1 Lat:4 SizeLat:1 for: %v16i64 = load <64 x i64>, ptr %p, align 512
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2560 CodeSize:1280 Lat:2560 SizeLat:2560 for: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v2i8 = load <8 x i8>, ptr %p
%v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4>
@@ -507,39 +507,39 @@ define void @vld4(ptr %p) {
define void @vst2(ptr %p) {
; CHECK-LABEL: 'vst2'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4i8, ptr %p, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v4i16, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v4i32, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i64> %v4i64, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i8> %v4i8, ptr %p, align 4
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i8> %v8i8, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> %v16i8, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i8> %v32i8, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i16> %v4i16, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> %v8i16, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i16> %v16i16, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i16> %v32i16, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i32> %v4i32, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i32> %v8i32, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i32> %v16i32, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i32> %v32i32, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <4 x i64> %v4i64, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i64> %v8i64, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i64> %v16i64, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i64> %v32i64, ptr %p, align 256
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
store <4 x i8> %v4i8, ptr %p
@@ -583,39 +583,39 @@ define void @vst2(ptr %p) {
define void @vst3(ptr %p) {
; CHECK-LABEL: 'vst3'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 50 for instruction: store <6 x i8> %v8i8, ptr %p, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 98 for instruction: store <12 x i8> %v16i8, ptr %p, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <24 x i8> %v32i8, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <48 x i8> %v64i8, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 50 for instruction: store <6 x i16> %v8i16, ptr %p, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i16> %v16i16, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <24 x i16> %v32i16, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <48 x i16> %v64i16, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i32> %v8i32, ptr %p, align 32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <12 x i32> %v16i32, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <24 x i32> %v32i32, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <48 x i32> %v64i32, ptr %p, align 256
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <6 x i64> %v8i64, ptr %p, align 64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <12 x i64> %v16i64, ptr %p, align 128
-; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <24 x i64> %v32i64, ptr %p, align 256
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1536 for instruction: %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <48 x i64> %v64i64, ptr %p, align 512
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:96 CodeSize:48 Lat:96 SizeLat:96 for: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i8> %v8i8, ptr %p, align 8
+; CHECK-NEXT: Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:98 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i8> %v16i8, ptr %p, align 16
+; CHECK-NEXT: Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i8> %v32i8, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i8> %v64i8, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:96 CodeSize:48 Lat:96 SizeLat:96 for: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:50 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i16> %v8i16, ptr %p, align 16
+; CHECK-NEXT: Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i16> %v16i16, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i16> %v32i16, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i16> %v64i16, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:96 CodeSize:48 Lat:96 SizeLat:96 for: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i32> %v8i32, ptr %p, align 32
+; CHECK-NEXT: Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i32> %v16i32, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i32> %v32i32, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i32> %v64i32, ptr %p, align 256
+; CHECK-NEXT: Cost Model: Found costs of RThru:192 CodeSize:96 Lat:192 SizeLat:192 for: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <6 x i64> %v8i64, ptr %p, align 64
+; CHECK-NEXT: Cost Model: Found costs of RThru:384 CodeSize:192 Lat:384 SizeLat:384 for: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <12 x i64> %v16i64, ptr %p, align 128
+; CHECK-NEXT: Cost Model: Found costs of RThru:768 CodeSize:384 Lat:768 SizeLat:768 for: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
+; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <24 x i64> %v32i64, ptr %p, align 256
+; CHECK-NEXT: Cost Model: Found costs of RThru:1536 CodeSize:768 Lat:1536 SizeLat:1536 for: %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> <i32 0, i32 16, i32 32, i32 1, i32 17, i32 33, i32 2, i32 18, i32 34, i32 3, i32 19, i32 35, i32 4, i32 20, i32 36, i32 5, i32 21, i32 37, i32 6, i32 22, i32 38, i32 7, i32 23, i32 39, i32 8, i32 24, i32 40, i32 9, i32 25, i32 41, i32 10, i32 26, i32 42, i32 11, i32 27, i32 43, i32 12, i32 28, i32 44, i32 13, i32 29, i32 45, i32 14, i32 30, i32 46, i32 15, i32 31, i32 47>
+; CHECK-NEXT: Cost Model: Found costs of RThru:64 CodeSize:1 Lat:1 SizeLat:1 for: store <48 x i64> %v64i64, ptr %p, align 512
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
store <6 x i8> %v8i8, ptr %p
@@ -659,74 +659,74 @@ define void @vst3(ptr %p) {
define void @vst4(ptr %p) {
; CHECK-UF2-LABEL: 'vst4'
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <64 x i8> %v64i8, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <64 x i16> %v64i16, ptr %p, align 128
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <64 x i32> %v64i32, ptr %p, align 256
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i64> %v64i64, ptr %p, align 512
-; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i8> %v8i8, ptr %p, align 8
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> %v16i8, ptr %p, align 8
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i8> %v32i8, ptr %p, align 32
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i8> %v64i8, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> %v8i16, ptr %p, align 8
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i16> %v16i16, ptr %p, align 32
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i16> %v32i16, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i16> %v64i16, ptr %p, align 128
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i32> %v8i32, ptr %p, align 32
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i32> %v16i32, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i32> %v32i32, ptr %p, align 128
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i32> %v64i32, ptr %p, align 256
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i64> %v8i64, ptr %p, align 64
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i64> %v16i64, ptr %p, align 128
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i64> %v32i64, ptr %p, align 256
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:64 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i64> %v64i64, ptr %p, align 512
+; CHECK-UF2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-UF4-LABEL: 'vst4'
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <64 x i8> %v64i8, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <64 x i16> %v64i16, ptr %p, align 128
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <64 x i32> %v64i32, ptr %p, align 256
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i64> %v64i64, ptr %p, align 512
-; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i8> %v8i8, ptr %p, align 8
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i8> %v16i8, ptr %p, align 8
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i8> %v32i8, ptr %p, align 32
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i8> %v64i8, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i16> %v8i16, ptr %p, align 8
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i16> %v16i16, ptr %p, align 32
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i16> %v32i16, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i16> %v64i16, ptr %p, align 128
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i32> %v8i32, ptr %p, align 32
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i32> %v16i32, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i32> %v32i32, ptr %p, align 128
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i32> %v64i32, ptr %p, align 256
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:256 CodeSize:128 Lat:256 SizeLat:256 for: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: store <8 x i64> %v8i64, ptr %p, align 64
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:512 CodeSize:256 Lat:512 SizeLat:512 for: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: store <16 x i64> %v16i64, ptr %p, align 128
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:1024 CodeSize:512 Lat:1024 SizeLat:1024 for: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:32 CodeSize:1 Lat:1 SizeLat:1 for: store <32 x i64> %v32i64, ptr %p, align 256
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:2048 CodeSize:1024 Lat:2048 SizeLat:2048 for: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> <i32 0, i32 16, i32 32, i32 48, i32 1, i32 17, i32 33, i32 49, i32 2, i32 18, i32 34, i32 50, i32 3, i32 19, i32 35, i32 51, i32 4, i32 20, i32 36, i32 52, i32 5, i32 21, i32 37, i32 53, i32 6, i32 22, i32 38, i32 54, i32 7, i32 23, i32 39, i32 55, i32 8, i32 24, i32 40, i32 56, i32 9, i32 25, i32 41, i32 57, i32 10, i32 26, i32 42, i32 58, i32 11, i32 27, i32 43, i32 59, i32 12, i32 28, i32 44, i32 60, i32 13, i32 29, i32 45, i32 61, i32 14, i32 30, i32 46, i32 62, i32 15, i32 31, i32 47, i32 63>
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:64 CodeSize:1 Lat:1 SizeLat:1 for: store <64 x i64> %v64i64, ptr %p, align 512
+; CHECK-UF4-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
store <8 x i8> %v8i8, ptr %p
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll
index 8edc2e6fc156..5f1bce902b93 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
define void @add_i8() {
; CHECK-LABEL: 'add_i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -27,32 +27,32 @@ define void @add_i8() {
define void @add_i16() {
; CHECK-LABEL: 'add_i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3za = zext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sa = sext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa)
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0za = zext <1 x i8> undef to <1 x i16>
%a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za)
@@ -99,52 +99,52 @@ define void @add_i16() {
define void @add_i32() {
; CHECK-LABEL: 'add_i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2za = zext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2sa = sext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a6za = zext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a6sa = sext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7za = zext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sa = sext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa)
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0za = zext <1 x i8> undef to <1 x i32>
%a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za)
@@ -221,72 +221,72 @@ define void @add_i32() {
define void @add_i64() {
; CHECK-LABEL: 'add_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a1za = zext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a1sa = sext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:298 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:1322 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a6za = zext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a6sa = sext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a7za = zext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a7sa = sext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:296 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:1320 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10za = zext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10sa = sext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a11za = zext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %a11sa = sext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a12za = zext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:1 Lat:1 SizeLat:1 for: %a12sa = sext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %a13za = zext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:264 CodeSize:1 Lat:1 SizeLat:1 for: %a13sa = sext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa)
+; CHECK-NEXT: Cost Model: Found costs of RThru:288 CodeSize:1 Lat:1 SizeLat:1 for: %a14za = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za)
+; CHECK-NEXT: Cost Model: Found costs of RThru:1056 CodeSize:1 Lat:1 SizeLat:1 for: %a14sa = sext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa)
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0za = zext <1 x i8> undef to <1 x i64>
%a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za)
@@ -393,17 +393,17 @@ define void @add_i64() {
define void @mla_i8() {
; CHECK-LABEL: 'mla_i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0m = mul <1 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a1m = mul <2 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4m = mul <16 x i8> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0m = mul <1 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %a1m = mul <2 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2m = mul <4 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3m = mul <8 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a4m = mul <16 x i8> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0m = mul <1 x i8> undef, undef
%a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m)
@@ -425,57 +425,57 @@ define void @mla_i8() {
define void @mla_i16() {
; CHECK-LABEL: 'mla_i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i16> %a0za, %a0zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i16> %a0sa, %a0sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sb = sext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i16> %a2sa, %a2sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zb = zext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zm = mul <8 x i16> %a3za, %a3zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sb = sext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sm = mul <8 x i16> %a3sa, %a3sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4zb = zext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4zm = mul <16 x i16> %a4za, %a4zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sb = sext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a6m = mul <2 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9m = mul <16 x i16> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0zb = zext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0zm = mul <1 x i16> %a0za, %a0zb
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sb = sext <1 x i8> undef to <1 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0sm = mul <1 x i16> %a0sa, %a0sb
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1zb = zext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT: Cost Model: Found costs of 26 for: %a1zm = mul <2 x i16> %a1za, %a1zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sb = sext <2 x i8> undef to <2 x i16>
+; CHECK-NEXT: Cost Model: Found costs of 26 for: %a1sm = mul <2 x i16> %a1sa, %a1sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2zb = zext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2zm = mul <4 x i16> %a2za, %a2zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a2sb = sext <4 x i8> undef to <4 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2sm = mul <4 x i16> %a2sa, %a2sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3za = zext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3zb = zext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3zm = mul <8 x i16> %a3za, %a3zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sa = sext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sb = sext <8 x i8> undef to <8 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3sm = mul <8 x i16> %a3sa, %a3sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4zb = zext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4zm = mul <16 x i16> %a4za, %a4zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a4sb = sext <16 x i8> undef to <16 x i16>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4sm = mul <16 x i16> %a4sa, %a4sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a5m = mul <1 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %a6m = mul <2 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7m = mul <4 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a8m = mul <8 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a9m = mul <16 x i16> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0za = zext <1 x i8> undef to <1 x i16>
%a0zb = zext <1 x i8> undef to <1 x i16>
@@ -547,97 +547,97 @@ define void @mla_i16() {
define void @mla_i32() {
; CHECK-LABEL: 'mla_i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i32> %a0za, %a0zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i32> %a0sa, %a0sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sb = sext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i32> %a2sa, %a2sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3zb = zext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3zm = mul <8 x i32> %a3za, %a3zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sb = sext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3sm = mul <8 x i32> %a3sa, %a3sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4zb = zext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4zm = mul <16 x i32> %a4za, %a4zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sb = sext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4sm = mul <16 x i32> %a4sa, %a4sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sb = sext <1 x i16> undef to <1 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sm = mul <1 x i32> %a5sa, %a5sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6zb = zext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sb = sext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sb = sext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sm = mul <4 x i32> %a7sa, %a7sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8zb = zext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8zm = mul <8 x i32> %a8za, %a8zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sb = sext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8sm = mul <8 x i32> %a8sa, %a8sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9zb = zext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9zm = mul <16 x i32> %a9za, %a9zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sb = sext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a11m = mul <2 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14m = mul <16 x i32> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0zb = zext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0zm = mul <1 x i32> %a0za, %a0zb
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a0sb = sext <1 x i8> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a0sm = mul <1 x i32> %a0sa, %a0sb
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1za = zext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a1zb = zext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 26 for: %a1zm = mul <2 x i32> %a1za, %a1zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sa = sext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a1sb = sext <2 x i8> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 26 for: %a1sm = mul <2 x i32> %a1sa, %a1sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2za = zext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2zb = zext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2zm = mul <4 x i32> %a2za, %a2zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2sa = sext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a2sb = sext <4 x i8> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2sm = mul <4 x i32> %a2sa, %a2sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3zb = zext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3zm = mul <8 x i32> %a3za, %a3zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a3sb = sext <8 x i8> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3sm = mul <8 x i32> %a3sa, %a3sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4zb = zext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4zm = mul <16 x i32> %a4za, %a4zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a4sb = sext <16 x i8> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4sm = mul <16 x i32> %a4sa, %a4sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5zb = zext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a5zm = mul <1 x i32> %a5za, %a5zb
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a5sb = sext <1 x i16> undef to <1 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a5sm = mul <1 x i32> %a5sa, %a5sb
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a6za = zext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a6zb = zext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 26 for: %a6zm = mul <2 x i32> %a6za, %a6zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a6sa = sext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %a6sb = sext <2 x i16> undef to <2 x i32>
+; CHECK-NEXT: Cost Model: Found costs of 26 for: %a6sm = mul <2 x i32> %a6sa, %a6sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7za = zext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7zb = zext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7zm = mul <4 x i32> %a7za, %a7zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sa = sext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sb = sext <4 x i16> undef to <4 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7sm = mul <4 x i32> %a7sa, %a7sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8zb = zext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8zm = mul <8 x i32> %a8za, %a8zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %a8sb = sext <8 x i16> undef to <8 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8sm = mul <8 x i32> %a8sa, %a8sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9zb = zext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9zm = mul <16 x i32> %a9za, %a9zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %a9sb = sext <16 x i16> undef to <16 x i32>
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9sm = mul <16 x i32> %a9sa, %a9sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a10m = mul <1 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 4 for: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %a11m = mul <2 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a12m = mul <4 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a13m = mul <8 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a14m = mul <16 x i32> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0za = zext <1 x i8> undef to <1 x i32>
%a0zb = zext <1 x i8> undef to <1 x i32>
@@ -759,137 +759,137 @@ define void @mla_i32() {
define void @mla_i64() {
; CHECK-LABEL: 'mla_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0zm = mul <1 x i64> %a0za, %a0zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0sm = mul <1 x i64> %a0sa, %a0sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1zb = zext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sb = sext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2zb = zext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sb = sext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3zb = zext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3zm = mul <8 x i64> %a3za, %a3zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sb = sext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3sm = mul <8 x i64> %a3sa, %a3sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4zb = zext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sb = sext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5zm = mul <1 x i64> %a5za, %a5zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5sm = mul <1 x i64> %a5sa, %a5sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6zb = zext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sb = sext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7zb = zext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sb = sext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8zb = zext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8zm = mul <8 x i64> %a8za, %a8zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sb = sext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8sm = mul <8 x i64> %a8sa, %a8sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9zb = zext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sb = sext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10zm = mul <1 x i64> %a10za, %a10zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10sm = mul <1 x i64> %a10sa, %a10sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11zb = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sb = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12zb = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sb = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13zb = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13zm = mul <8 x i64> %a13za, %a13zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sb = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13sm = mul <8 x i64> %a13sa, %a13sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14zb = zext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sb = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a15m = mul <1 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a16m = mul <2 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a17m = mul <4 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %a18m = mul <8 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %a19m = mul <16 x i64> undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0za = zext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0zb = zext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a0zm = mul <1 x i64> %a0za, %a0zb
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0sa = sext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a0sb = sext <1 x i8> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a0sm = mul <1 x i64> %a0sa, %a0sb
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a1za = zext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a1zb = zext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 52 for: %a1zm = mul <2 x i64> %a1za, %a1zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a1sa = sext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a1sb = sext <2 x i8> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 52 for: %a1sm = mul <2 x i64> %a1sa, %a1sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a2za = zext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a2zb = zext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 104 for: %a2zm = mul <4 x i64> %a2za, %a2zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a2sa = sext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a2sb = sext <4 x i8> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 104 for: %a2sm = mul <4 x i64> %a2sa, %a2sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a3za = zext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a3zb = zext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 208 for: %a3zm = mul <8 x i64> %a3za, %a3zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a3sa = sext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a3sb = sext <8 x i8> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 208 for: %a3sm = mul <8 x i64> %a3sa, %a3sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:298 CodeSize:1 Lat:1 SizeLat:1 for: %a4za = zext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:298 CodeSize:1 Lat:1 SizeLat:1 for: %a4zb = zext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 416 for: %a4zm = mul <16 x i64> %a4za, %a4zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:1322 CodeSize:1 Lat:1 SizeLat:1 for: %a4sa = sext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1322 CodeSize:1 Lat:1 SizeLat:1 for: %a4sb = sext <16 x i8> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 416 for: %a4sm = mul <16 x i64> %a4sa, %a4sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a5za = zext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a5zb = zext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a5zm = mul <1 x i64> %a5za, %a5zb
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %a5sa = sext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %a5sb = sext <1 x i16> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a5sm = mul <1 x i64> %a5sa, %a5sb
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a6za = zext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a6zb = zext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 52 for: %a6zm = mul <2 x i64> %a6za, %a6zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a6sa = sext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:10 Lat:20 SizeLat:20 for: %a6sb = sext <2 x i16> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 52 for: %a6sm = mul <2 x i64> %a6sa, %a6sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a7za = zext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a7zb = zext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 104 for: %a7zm = mul <4 x i64> %a7za, %a7zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a7sa = sext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %a7sb = sext <4 x i16> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 104 for: %a7sm = mul <4 x i64> %a7sa, %a7sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a8za = zext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:1 Lat:1 SizeLat:1 for: %a8zb = zext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 208 for: %a8zm = mul <8 x i64> %a8za, %a8zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a8sa = sext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:330 CodeSize:1 Lat:1 SizeLat:1 for: %a8sb = sext <8 x i16> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 208 for: %a8sm = mul <8 x i64> %a8sa, %a8sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:296 CodeSize:1 Lat:1 SizeLat:1 for: %a9za = zext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:296 CodeSize:1 Lat:1 SizeLat:1 for: %a9zb = zext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 416 for: %a9zm = mul <16 x i64> %a9za, %a9zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:1320 CodeSize:1 Lat:1 SizeLat:1 for: %a9sa = sext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1320 CodeSize:1 Lat:1 SizeLat:1 for: %a9sb = sext <16 x i16> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 416 for: %a9sm = mul <16 x i64> %a9sa, %a9sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10za = zext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10zb = zext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a10zm = mul <1 x i64> %a10za, %a10zb
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10sa = sext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:1 Lat:1 SizeLat:1 for: %a10sb = sext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a10sm = mul <1 x i64> %a10sa, %a10sb
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a11za = zext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a11zb = zext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 52 for: %a11zm = mul <2 x i64> %a11za, %a11zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %a11sa = sext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %a11sb = sext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 52 for: %a11sm = mul <2 x i64> %a11sa, %a11sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a12za = zext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %a12zb = zext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 104 for: %a12zm = mul <4 x i64> %a12za, %a12zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:1 Lat:1 SizeLat:1 for: %a12sa = sext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:1 Lat:1 SizeLat:1 for: %a12sb = sext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 104 for: %a12sm = mul <4 x i64> %a12sa, %a12sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %a13za = zext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %a13zb = zext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 208 for: %a13zm = mul <8 x i64> %a13za, %a13zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:264 CodeSize:1 Lat:1 SizeLat:1 for: %a13sa = sext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:264 CodeSize:1 Lat:1 SizeLat:1 for: %a13sb = sext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 208 for: %a13sm = mul <8 x i64> %a13sa, %a13sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:288 CodeSize:1 Lat:1 SizeLat:1 for: %a14za = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:288 CodeSize:1 Lat:1 SizeLat:1 for: %a14zb = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 416 for: %a14zm = mul <16 x i64> %a14za, %a14zb
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:1056 CodeSize:1 Lat:1 SizeLat:1 for: %a14sa = sext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of RThru:1056 CodeSize:1 Lat:1 SizeLat:1 for: %a14sb = sext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT: Cost Model: Found costs of 416 for: %a14sm = mul <16 x i64> %a14sa, %a14sb
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %a15m = mul <1 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of 8 for: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %a16m = mul <2 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m)
+; CHECK-NEXT: Cost Model: Found costs of 40 for: %a17m = mul <4 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m)
+; CHECK-NEXT: Cost Model: Found costs of 80 for: %a18m = mul <8 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m)
+; CHECK-NEXT: Cost Model: Found costs of 160 for: %a19m = mul <16 x i64> undef, undef
+; CHECK-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a0za = zext <1 x i8> undef to <1 x i64>
%a0zb = zext <1 x i8> undef to <1 x i64>