// RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NOOPT %s // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s typedef double dx5x5_t __attribute__((matrix_type(5, 5))); typedef float fx2x3_t __attribute__((matrix_type(2, 3))); typedef int ix9x3_t __attribute__((matrix_type(9, 3))); typedef unsigned long long ullx4x2_t __attribute__((matrix_type(4, 2))); // Floating point matrix/scalar additions. void add_matrix_matrix_double(dx5x5_t a, dx5x5_t b, dx5x5_t c) { // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b, <25 x double> noundef %c) // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[B]], [[C]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 a = b + c; } void add_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) { // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b) // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[A]], [[B]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 a += b; } void subtract_compound_assign_matrix_double(dx5x5_t a, dx5x5_t b) { // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_double(<25 x double> noundef %a, <25 x double> noundef %b) // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[A]], [[B]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 a -= b; } void add_matrix_matrix_float(fx2x3_t a, fx2x3_t b, fx2x3_t c) { // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b, <6 x float> noundef %c) // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 a = b + c; } void add_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) { // CHECK-LABEL: define{{.*}} void @add_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b) // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 a += b; } void subtract_compound_assign_matrix_float(fx2x3_t a, fx2x3_t b) { // CHECK-LABEL: define{{.*}} void @subtract_compound_assign_matrix_float(<6 x float> noundef %a, <6 x float> noundef %b) // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 a -= b; } void add_matrix_scalar_double_float(dx5x5_t a, float vf) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf) // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 a = a + vf; } void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf) // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 a += vf; } void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_float(<25 x double> noundef %a, float noundef %vf) // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 a -= vf; } void add_matrix_scalar_double_double(dx5x5_t a, double vd) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd) // NOOPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} // OPT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 a = a + vd; } void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd) // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // store <25 x double> [[RES]], ptr {{.*}}, align 8 a += vd; } void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_double_double(<25 x double> noundef %a, double noundef %vd) // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]] // store <25 x double> [[RES]], ptr {{.*}}, align 8 a -= vd; } void add_matrix_scalar_float_float(fx2x3_t b, float vf) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf) // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 b = b + vf; } void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf) // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}} // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 b += vf; } void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_float(<6 x float> noundef %b, float noundef %vf) // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}} // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 b -= vf; } void add_matrix_scalar_float_double(fx2x3_t b, double vd) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd) // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 b = b + vd; } void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd) // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 b += vd; } void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_float_double(<6 x float> noundef %b, double noundef %vd) // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 b -= vd; } // Integer matrix/scalar additions void add_matrix_matrix_int(ix9x3_t a, ix9x3_t b, ix9x3_t c) { // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b, <27 x i32> noundef %c) // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[B]], [[C]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4 a = b + c; } void add_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b) // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[A]], [[B]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4 a += b; } void subtract_compound_matrix_matrix_int(ix9x3_t a, ix9x3_t b) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_int(<27 x i32> noundef %a, <27 x i32> noundef %b) // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[A]], [[B]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr {{.*}}, align 4 a -= b; } void add_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b, ullx4x2_t c) { // CHECK-LABEL: define{{.*}} void @add_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b, <8 x i64> noundef %c) // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 a = b + c; } void add_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b) // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 a += b; } void subtract_compound_matrix_matrix_unsigned_long_long(ullx4x2_t a, ullx4x2_t b) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_matrix_unsigned_long_long(<8 x i64> noundef %a, <8 x i64> noundef %b) // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 a -= b; } void add_matrix_scalar_int_short(ix9x3_t a, short vs) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs) // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // NOOPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a = a + vs; } void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs) // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a += vs; } void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_short(<27 x i32> noundef %a, i16 noundef signext %vs) // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a -= vs; } void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli) // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a = a + vli; } void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli) // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a += vli; } void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_long_int(<27 x i32> noundef %a, i64 noundef %vli) // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a -= vli; } void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli) // NOOPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} // OPT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a = a + vulli; } void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli) // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a += vulli; } void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int vulli) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_int_unsigned_long_long(<27 x i32> noundef %a, i64 noundef %vulli) // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 // NOOPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <27 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 a -= vulli; } void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs) // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b = vs + b; } void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs) // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b += vs; } void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_short(<8 x i64> noundef %b, i16 noundef signext %vs) // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b -= vs; } void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli) // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b = vli + b; } void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli) // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b += vli; } void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_int(<8 x i64> noundef %b, i64 noundef %vli) // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b -= vli; } void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) { // CHECK-LABEL: define{{.*}} void @add_matrix_scalar_long_long_int_unsigned_long_long // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b = vulli + b; } void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) { // CHECK-LABEL: define{{.*}} void @add_compound_matrix_scalar_long_long_int_unsigned_long_long // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b += vulli; } void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned long long int vulli) { // CHECK-LABEL: define{{.*}} void @subtract_compound_matrix_scalar_long_long_int_unsigned_long_long // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 b -= vulli; } // Tests for matrix multiplication. void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) { // CHECK-LABEL: @multiply_matrix_matrix_double( // NOOPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5) // CHECK-NEXT: store <25 x double> [[RES]], ptr %a, align 8 // CHECK: ret void // dx5x5_t a; a = b * c; } void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) { // CHECK-LABEL: @multiply_compound_matrix_matrix_double( // NOOPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[C:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[B:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5) // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void b *= c; } typedef int ix3x9_t __attribute__((matrix_type(3, 9))); typedef int ix9x9_t __attribute__((matrix_type(9, 9))); // CHECK-LABEL: @multiply_matrix_matrix_int( // NOOPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // NOOPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // OPT: [[B:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[C:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9) // CHECK-NEXT: store <81 x i32> [[RES]], ptr %a, align 4 // CHECK: ret void // void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) { ix9x9_t a; a = b * c; } // CHECK-LABEL: @multiply_double_matrix_scalar_float( // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void // void multiply_double_matrix_scalar_float(dx5x5_t a, float s) { a = a * s; } // CHECK-LABEL: @multiply_compound_double_matrix_scalar_float // NOOPT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} // OPT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void // void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) { a *= s; } // CHECK-LABEL: @multiply_double_matrix_scalar_double( // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void // void multiply_double_matrix_scalar_double(dx5x5_t a, double s) { a = a * s; } // CHECK-LABEL: @multiply_compound_double_matrix_scalar_double( // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} // NOOPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) { a *= s; } // CHECK-LABEL: @multiply_float_matrix_scalar_double( // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]] // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void // void multiply_float_matrix_scalar_double(fx2x3_t b, double s) { b = s * b; } // CHECK-LABEL: @multiply_compound_float_matrix_scalar_double( // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]] // store <6 x float> %3, ptr [[MAT_ADDR]], align 4 // ret void void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) { b *= s; } // CHECK-LABEL: @multiply_int_matrix_scalar_short( // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void // void multiply_int_matrix_scalar_short(ix9x3_t b, short s) { b = s * b; } // CHECK-LABEL: @multiply_compound_int_matrix_scalar_short( // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void // void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) { b *= s; } // CHECK-LABEL: @multiply_int_matrix_scalar_ull( // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void // void multiply_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { b = b * s; } void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { // CHECK-LABEL: @multiply_compound_int_matrix_scalar_ull( // NOOPT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} // OPT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void b *= s; } // CHECK-LABEL: @multiply_float_matrix_constant( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // void multiply_float_matrix_constant(fx2x3_t a) { a = a * 2.5; } // CHECK-LABEL: @multiply_compound_float_matrix_constant( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void void multiply_compound_float_matrix_constant(fx2x3_t a) { a *= 2.5; } // CHECK-LABEL: @multiply_int_matrix_constant( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // void multiply_int_matrix_constant(ix9x3_t a) { a = 5 * a; } // CHECK-LABEL: @multiply_compound_int_matrix_constant( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4 // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5) // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // void multiply_compound_int_matrix_constant(ix9x3_t a) { a *= 5; } // CHECK-LABEL: @divide_double_matrix_scalar_float( // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void // void divide_double_matrix_scalar_float(dx5x5_t a, float s) { a = a / s; } // CHECK-LABEL: @divide_double_matrix_scalar_double( // NOOPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} // OPT: [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]] // CHECK-NEXT: store <25 x double> [[RES]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void // void divide_double_matrix_scalar_double(dx5x5_t a, double s) { a = a / s; } // CHECK-LABEL: @divide_float_matrix_scalar_double( // NOOPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} // OPT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void // void divide_float_matrix_scalar_double(fx2x3_t b, double s) { b = b / s; } // CHECK-LABEL: @divide_int_matrix_scalar_short( // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // NOOPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void // void divide_int_matrix_scalar_short(ix9x3_t b, short s) { b = b / s; } // CHECK-LABEL: @divide_int_matrix_scalar_ull( // NOOPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} // OPT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void // void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { b = b / s; } // CHECK-LABEL: @divide_ull_matrix_scalar_ull( // NOOPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8{{$}} // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} // OPT: [[MAT:%.*]] = load <8 x i64>, ptr [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0 // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]] // CHECK-NEXT: store <8 x i64> [[RES]], ptr [[MAT_ADDR]], align 8 // CHECK-NEXT: ret void // void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) { b = b / s; } // CHECK-LABEL: @divide_float_matrix_constant( // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4 // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // void divide_float_matrix_constant(fx2x3_t a) { a = a / 2.5; } // Tests for the matrix type operators. typedef double dx5x5_t __attribute__((matrix_type(5, 5))); typedef float fx2x3_t __attribute__((matrix_type(2, 3))); // Check that we can use matrix index expression on different floating point // matrixes and indices. void insert_double_matrix_const_idx_ll_u_double(dx5x5_t a, double d, fx2x3_t b, float e, int j, unsigned k) { // CHECK-LABEL: @insert_double_matrix_const_idx_ll_u_double( // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}} // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 5 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr {{.*}}, align 8 // CHECK-NEXT: ret void a[0ll][1u] = d; } void insert_double_matrix_const_idx_i_u_double(dx5x5_t a, double d) { // CHECK-LABEL: @insert_double_matrix_const_idx_i_u_double( // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}} // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MAT:%.*]] = load <25 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <25 x double> [[MAT]], double [[D]], i64 21 // CHECK-NEXT: store <25 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8 // CHECK-NEXT: ret void a[1][4u] = d; } void insert_float_matrix_const_idx_ull_i_float(fx2x3_t b, float e) { // CHECK-LABEL: @insert_float_matrix_const_idx_ull_i_float( // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 3 // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void b[1ull][1] = e; } void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) { // CHECK-LABEL: @insert_float_matrix_idx_i_u_float( // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[J_EXT:%.*]] = sext i32 [[J]] to i64 // NOOPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}} // OPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]] // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]] // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void b[j][k] = e; } void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned long long k) { // CHECK-LABEL: @insert_float_matrix_idx_s_ull_float( // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} // NOOPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}} // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[J_EXT:%.*]] = sext i16 [[J]] to i64 // NOOPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}} // OPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]] // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]] // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void (b)[j][k] = e; } // Check that we can can use matrix index expressions on integer matrixes. typedef int ix9x3_t __attribute__((matrix_type(9, 3))); void insert_int_idx_expr(ix9x3_t a, int i) { // CHECK-LABEL: @insert_int_idx_expr( // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}} // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}} // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]] // CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64 // CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]] // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]] // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void a[4 + i][1 + 1u] = i; } // Check that we can can use matrix index expressions on FP and integer // matrixes. typedef int ix9x3_t __attribute__((matrix_type(9, 3))); void insert_float_into_int_matrix(ix9x3_t *a, int i) { // CHECK-LABEL: @insert_float_into_int_matrix( // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // NOOPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8{{$}} // OPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[MAT_ADDR1]], align 4{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I]], i64 13 // CHECK-NEXT: store <27 x i32> [[MATINS]], ptr [[MAT_ADDR1]], align 4 // CHECK-NEXT: ret void (*a)[4][1] = i; } // Check that we can use overloaded matrix index expressions on matrixes with // matching dimensions, but different element types. typedef double dx3x3_t __attribute__((matrix_type(3, 3))); typedef float fx3x3_t __attribute__((matrix_type(3, 3))); void insert_matching_dimensions1(dx3x3_t a, double i) { // CHECK-LABEL: @insert_matching_dimensions1( // NOOPT: [[I:%.*]] = load double, ptr %i.addr, align 8{{$}} // OPT: [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, ptr [[MAT_ADDR:%.*]], align 8{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], i64 5 // CHECK-NEXT: store <9 x double> [[MATINS]], ptr [[MAT_ADDR]], align 8 // CHECK-NEXT: ret void a[2u][1u] = i; } void insert_matching_dimensions(fx3x3_t b, float e) { // CHECK-LABEL: @insert_matching_dimensions( // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], i64 7 // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void b[1u][2u] = e; } double extract_double(dx5x5_t a) { // CHECK-LABEL: @extract_double( // NOOPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8{{$}} // OPT: [[MAT:%.*]] = load <25 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <25 x double> [[MAT]], i64 12 // CHECK-NEXT: ret double [[MATEXT]] return a[2][3 - 1u]; } double extract_float(fx3x3_t b) { // CHECK-LABEL: @extract_float( // NOOPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}} // OPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 5 // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double // CHECK-NEXT: ret double [[TO_DOUBLE]] return b[2][1]; } int extract_int(ix9x3_t c, unsigned long j) { // CHECK-LABEL: @extract_int( // NOOPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}} // NOOPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}} // OPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // OPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]] // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]] // CHECK-NEXT: ret i32 [[MATEXT]] return c[j][j]; } typedef double dx3x2_t __attribute__((matrix_type(3, 2))); double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) { // CHECK-LABEL: @test_extract_matrix_pointer1( // NOOPT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} // OPT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64 // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]] // NOOPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}} // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // OPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 1 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}} // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 2 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]] // CHECK-NEXT: ret double [[MATEXT]] return ptr[1][2][j][1]; } double test_extract_matrix_pointer2(dx3x2_t **ptr) { // CHECK-LABEL: @test_extract_matrix_pointer2( // CHECK-NEXT: entry: // NOOPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8{{$}} // OPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 4 // NOOPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8{{$}} // OPT-NEXT: [[PTR2:%.*]] = load ptr, ptr [[PTR_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], ptr [[PTR2]], i64 6 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR2_IDX]], align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 5 // CHECK-NEXT: ret double [[MATEXT]] return (*(*(ptr + 4) + 6))[2][1 * 3 - 2]; } void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) { // CHECK-LABEL: @insert_extract( // NOOPT: [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}} // OPT: [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0 // NOOPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // OPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]] // NOOPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}} // OPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3 // CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2 // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}} // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]] // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 // CHECK-NEXT: ret void b[2][j] = b[0][k]; } void insert_compound_stmt(dx5x5_t a) { // CHECK-LABEL: define{{.*}} void @insert_compound_stmt(<25 x double> noundef %a) // CHECK: [[A:%.*]] = load <25 x double>, ptr [[A_PTR:%.*]], align 8{{$}} // CHECK-NEXT: [[EXT:%.*]] = extractelement <25 x double> [[A]], i64 17 // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00 // CHECK-NEXT: [[A2:%.*]] = load <25 x double>, ptr [[A_PTR]], align 8{{$}} // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[A2]], double [[SUB]], i64 17 // CHECK-NEXT: store <25 x double> [[INS]], ptr [[A_PTR]], align 8 // CHECK-NEXT: ret void a[2][3] -= 1.0; } struct Foo { fx2x3_t mat; }; void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j) { // CHECK-LABEL: define{{.*}} void @insert_compound_stmt_field(ptr noundef %a, float noundef %f, i32 noundef %i, i32 noundef %j) // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[I_EXT:%.*]] = zext i32 [[I]] to i64 // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64 // CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2 // CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]] // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}} // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]] // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}} // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}} // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]] // CHECK-NEXT: store <6 x float> [[INS]], ptr %mat, align 4 // CHECK-NEXT: ret void a->mat[i][j] += f; } void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) { // CHECK-LABEL: define{{.*}} void @matrix_as_idx(<27 x i32> noundef %a, i32 noundef %i, i32 noundef %j, <25 x double> noundef %b) // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}} // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64 // NOOPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}} // OPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64 // CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9 // CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]] // NOOPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4{{$}} // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // OPT-NEXT: [[A:%.*]] = load <27 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]] // CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64 // NOOPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}} // OPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64 // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}} // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64 // CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9 // CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]] // NOOPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4{{$}} // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // OPT-NEXT: [[A2:%.*]] = load <27 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]] // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2 // CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64 // CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5 // CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]] // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25 // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) // CHECK-NEXT: [[B:%.*]] = load <25 x double>, ptr [[B_PTR:%.*]], align 8{{$}} // CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]] // CHECK-NEXT: store <25 x double> [[INS]], ptr [[B_PTR]], align 8 b[a[i][j]][a[j][i] + 2] = 1.5; }