; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s \ ; RUN: | FileCheck --check-prefixes=O32,O32-BE %s ; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s \ ; RUN: | FileCheck --check-prefixes=O32,O32-LE %s ; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi o32 < %s \ ; RUN-TODO: | FileCheck --check-prefixes=O32 %s ; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi o32 < %s \ ; RUN-TODO: | FileCheck --check-prefixes=O32 %s ; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n32 < %s \ ; RUN: | FileCheck --check-prefixes=N32,N32-BE %s ; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n32 < %s \ ; RUN: | FileCheck --check-prefixes=N32,N32-LE %s ; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n64 < %s \ ; RUN: | FileCheck --check-prefixes=N64,N64-BE %s ; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n64 < %s \ ; RUN: | FileCheck --check-prefixes=N64,N64-LE %s ; Test struct returns for all ABI's and byte orders. @struct_byte = global {i8} zeroinitializer @struct_2byte = global {i8,i8} zeroinitializer @struct_3xi16 = global {[3 x i16]} zeroinitializer @struct_6xi32 = global {[6 x i32]} zeroinitializer @struct_128xi16 = global {[128 x i16]} zeroinitializer declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) define inreg {i8} @ret_struct_i8() nounwind { ; O32-LABEL: ret_struct_i8: ; O32: # %bb.0: # %entry ; O32-NEXT: lui $1, %hi(struct_byte) ; O32-NEXT: jr $ra ; O32-NEXT: lbu $2, %lo(struct_byte)($1) ; ; N32-BE-LABEL: ret_struct_i8: ; N32-BE: # %bb.0: # %entry ; N32-BE-NEXT: lui $1, %hi(struct_byte) ; N32-BE-NEXT: lb $1, %lo(struct_byte)($1) ; N32-BE-NEXT: jr $ra ; N32-BE-NEXT: dsll $2, $1, 56 ; ; N32-LE-LABEL: ret_struct_i8: ; N32-LE: # %bb.0: # %entry ; N32-LE-NEXT: lui $1, %hi(struct_byte) ; N32-LE-NEXT: jr $ra ; N32-LE-NEXT: lb $2, %lo(struct_byte)($1) ; ; N64-BE-LABEL: ret_struct_i8: ; N64-BE: # %bb.0: # %entry ; N64-BE-NEXT: lui $1, %highest(struct_byte) ; N64-BE-NEXT: daddiu $1, $1, %higher(struct_byte) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: daddiu $1, $1, %hi(struct_byte) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: lb $1, %lo(struct_byte)($1) ; N64-BE-NEXT: jr $ra ; N64-BE-NEXT: dsll $2, $1, 56 ; ; N64-LE-LABEL: ret_struct_i8: ; N64-LE: # %bb.0: # %entry ; N64-LE-NEXT: lui $1, %highest(struct_byte) ; N64-LE-NEXT: daddiu $1, $1, %higher(struct_byte) ; N64-LE-NEXT: dsll $1, $1, 16 ; N64-LE-NEXT: daddiu $1, $1, %hi(struct_byte) ; N64-LE-NEXT: dsll $1, $1, 16 ; N64-LE-NEXT: jr $ra ; N64-LE-NEXT: lb $2, %lo(struct_byte)($1) entry: %0 = load volatile {i8}, ptr @struct_byte ret {i8} %0 } ; This test is based on the way clang currently lowers {i8,i8} to {i16}. ; FIXME: It should probably work for without any lowering too but this doesn't ; work as expected. Each member gets mapped to a register rather than ; packed into a single register. define inreg {i16} @ret_struct_i16() nounwind { ; O32-LABEL: ret_struct_i16: ; O32: # %bb.0: # %entry ; O32-NEXT: addiu $sp, $sp, -8 ; O32-NEXT: lui $1, %hi(struct_2byte) ; O32-NEXT: lhu $1, %lo(struct_2byte)($1) ; O32-NEXT: sh $1, 0($sp) ; O32-NEXT: lhu $2, 0($sp) ; O32-NEXT: jr $ra ; O32-NEXT: addiu $sp, $sp, 8 ; ; N32-BE-LABEL: ret_struct_i16: ; N32-BE: # %bb.0: # %entry ; N32-BE-NEXT: addiu $sp, $sp, -16 ; N32-BE-NEXT: lui $1, %hi(struct_2byte) ; N32-BE-NEXT: lhu $1, %lo(struct_2byte)($1) ; N32-BE-NEXT: sh $1, 8($sp) ; N32-BE-NEXT: lh $1, 8($sp) ; N32-BE-NEXT: dsll $2, $1, 48 ; N32-BE-NEXT: jr $ra ; N32-BE-NEXT: addiu $sp, $sp, 16 ; ; N32-LE-LABEL: ret_struct_i16: ; N32-LE: # %bb.0: # %entry ; N32-LE-NEXT: addiu $sp, $sp, -16 ; N32-LE-NEXT: lui $1, %hi(struct_2byte) ; N32-LE-NEXT: lhu $1, %lo(struct_2byte)($1) ; N32-LE-NEXT: sh $1, 8($sp) ; N32-LE-NEXT: lh $2, 8($sp) ; N32-LE-NEXT: jr $ra ; N32-LE-NEXT: addiu $sp, $sp, 16 ; ; N64-BE-LABEL: ret_struct_i16: ; N64-BE: # %bb.0: # %entry ; N64-BE-NEXT: daddiu $sp, $sp, -16 ; N64-BE-NEXT: lui $1, %highest(struct_2byte) ; N64-BE-NEXT: daddiu $1, $1, %higher(struct_2byte) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: daddiu $1, $1, %hi(struct_2byte) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: lhu $1, %lo(struct_2byte)($1) ; N64-BE-NEXT: sh $1, 8($sp) ; N64-BE-NEXT: lh $1, 8($sp) ; N64-BE-NEXT: dsll $2, $1, 48 ; N64-BE-NEXT: jr $ra ; N64-BE-NEXT: daddiu $sp, $sp, 16 ; ; N64-LE-LABEL: ret_struct_i16: ; N64-LE: # %bb.0: # %entry ; N64-LE-NEXT: daddiu $sp, $sp, -16 ; N64-LE-NEXT: lui $1, %highest(struct_2byte) ; N64-LE-NEXT: daddiu $1, $1, %higher(struct_2byte) ; N64-LE-NEXT: dsll $1, $1, 16 ; N64-LE-NEXT: daddiu $1, $1, %hi(struct_2byte) ; N64-LE-NEXT: dsll $1, $1, 16 ; N64-LE-NEXT: lhu $1, %lo(struct_2byte)($1) ; N64-LE-NEXT: sh $1, 8($sp) ; N64-LE-NEXT: lh $2, 8($sp) ; N64-LE-NEXT: jr $ra ; N64-LE-NEXT: daddiu $sp, $sp, 16 entry: %retval = alloca {i8,i8}, align 8 call void @llvm.memcpy.p0.p0.i64(ptr %retval, ptr @struct_2byte, i64 2, i1 false) %0 = load volatile {i16}, ptr %retval ret {i16} %0 } ; Ensure that structures bigger than 32-bits but smaller than 64-bits are ; also returned in the upper bits on big endian targets. Previously, these were ; missed by the CCPromoteToType and the shift didn't happen. define inreg {i48} @ret_struct_3xi16() nounwind { ; O32-BE-LABEL: ret_struct_3xi16: ; O32-BE: # %bb.0: # %entry ; O32-BE-NEXT: lui $1, %hi(struct_3xi16) ; O32-BE-NEXT: lw $2, %lo(struct_3xi16)($1) ; O32-BE-NEXT: sll $3, $2, 16 ; O32-BE-NEXT: addiu $1, $1, %lo(struct_3xi16) ; O32-BE-NEXT: lhu $1, 4($1) ; O32-BE-NEXT: or $3, $1, $3 ; O32-BE-NEXT: jr $ra ; O32-BE-NEXT: srl $2, $2, 16 ; ; O32-LE-LABEL: ret_struct_3xi16: ; O32-LE: # %bb.0: # %entry ; O32-LE-NEXT: lui $1, %hi(struct_3xi16) ; O32-LE-NEXT: lw $2, %lo(struct_3xi16)($1) ; O32-LE-NEXT: addiu $1, $1, %lo(struct_3xi16) ; O32-LE-NEXT: jr $ra ; O32-LE-NEXT: lhu $3, 4($1) ; ; N32-BE-LABEL: ret_struct_3xi16: ; N32-BE: # %bb.0: # %entry ; N32-BE-NEXT: lui $1, %hi(struct_3xi16) ; N32-BE-NEXT: lw $2, %lo(struct_3xi16)($1) ; N32-BE-NEXT: dsll $2, $2, 32 ; N32-BE-NEXT: addiu $1, $1, %lo(struct_3xi16) ; N32-BE-NEXT: lhu $1, 4($1) ; N32-BE-NEXT: dsll $1, $1, 16 ; N32-BE-NEXT: jr $ra ; N32-BE-NEXT: or $2, $2, $1 ; ; N32-LE-LABEL: ret_struct_3xi16: ; N32-LE: # %bb.0: # %entry ; N32-LE-NEXT: lui $1, %hi(struct_3xi16) ; N32-LE-NEXT: lwu $2, %lo(struct_3xi16)($1) ; N32-LE-NEXT: addiu $1, $1, %lo(struct_3xi16) ; N32-LE-NEXT: lh $1, 4($1) ; N32-LE-NEXT: dsll $1, $1, 32 ; N32-LE-NEXT: jr $ra ; N32-LE-NEXT: or $2, $2, $1 ; ; N64-BE-LABEL: ret_struct_3xi16: ; N64-BE: # %bb.0: # %entry ; N64-BE-NEXT: lui $1, %highest(struct_3xi16) ; N64-BE-NEXT: daddiu $1, $1, %higher(struct_3xi16) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: daddiu $1, $1, %hi(struct_3xi16) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: lw $2, %lo(struct_3xi16)($1) ; N64-BE-NEXT: dsll $2, $2, 32 ; N64-BE-NEXT: daddiu $1, $1, %lo(struct_3xi16) ; N64-BE-NEXT: lhu $1, 4($1) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: jr $ra ; N64-BE-NEXT: or $2, $2, $1 ; ; N64-LE-LABEL: ret_struct_3xi16: ; N64-LE: # %bb.0: # %entry ; N64-LE-NEXT: lui $1, %highest(struct_3xi16) ; N64-LE-NEXT: daddiu $1, $1, %higher(struct_3xi16) ; N64-LE-NEXT: dsll $1, $1, 16 ; N64-LE-NEXT: daddiu $1, $1, %hi(struct_3xi16) ; N64-LE-NEXT: dsll $1, $1, 16 ; N64-LE-NEXT: lwu $2, %lo(struct_3xi16)($1) ; N64-LE-NEXT: daddiu $1, $1, %lo(struct_3xi16) ; N64-LE-NEXT: lh $1, 4($1) ; N64-LE-NEXT: dsll $1, $1, 32 ; N64-LE-NEXT: jr $ra ; N64-LE-NEXT: or $2, $2, $1 entry: %0 = load volatile i48, ptr @struct_3xi16, align 2 %1 = insertvalue {i48} undef, i48 %0, 0 ret {i48} %1 } ; Ensure that large structures (>128-bit) are returned indirectly. ; We pick an extremely large structure so we don't have to match inlined memcpy's. define void @ret_struct_128xi16(ptr sret({[128 x i16]}) %returnval) { ; O32-LABEL: ret_struct_128xi16: ; O32: # %bb.0: # %entry ; O32-NEXT: addiu $sp, $sp, -24 ; O32-NEXT: .cfi_def_cfa_offset 24 ; O32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; O32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill ; O32-NEXT: .cfi_offset 31, -4 ; O32-NEXT: .cfi_offset 16, -8 ; O32-NEXT: move $16, $4 ; O32-NEXT: lui $1, %hi(struct_128xi16) ; O32-NEXT: addiu $5, $1, %lo(struct_128xi16) ; O32-NEXT: jal memcpy ; O32-NEXT: addiu $6, $zero, 256 ; O32-NEXT: move $2, $16 ; O32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload ; O32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; O32-NEXT: jr $ra ; O32-NEXT: addiu $sp, $sp, 24 ; ; N32-LABEL: ret_struct_128xi16: ; N32: # %bb.0: # %entry ; N32-NEXT: addiu $sp, $sp, -16 ; N32-NEXT: .cfi_def_cfa_offset 16 ; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill ; N32-NEXT: sd $16, 0($sp) # 8-byte Folded Spill ; N32-NEXT: .cfi_offset 31, -8 ; N32-NEXT: .cfi_offset 16, -16 ; N32-NEXT: lui $1, %hi(struct_128xi16) ; N32-NEXT: addiu $5, $1, %lo(struct_128xi16) ; N32-NEXT: sll $16, $4, 0 ; N32-NEXT: jal memcpy ; N32-NEXT: daddiu $6, $zero, 256 ; N32-NEXT: move $2, $16 ; N32-NEXT: ld $16, 0($sp) # 8-byte Folded Reload ; N32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload ; N32-NEXT: jr $ra ; N32-NEXT: addiu $sp, $sp, 16 ; ; N64-LABEL: ret_struct_128xi16: ; N64: # %bb.0: # %entry ; N64-NEXT: daddiu $sp, $sp, -16 ; N64-NEXT: .cfi_def_cfa_offset 16 ; N64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill ; N64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill ; N64-NEXT: .cfi_offset 31, -8 ; N64-NEXT: .cfi_offset 16, -16 ; N64-NEXT: move $16, $4 ; N64-NEXT: lui $1, %highest(struct_128xi16) ; N64-NEXT: daddiu $1, $1, %higher(struct_128xi16) ; N64-NEXT: dsll $1, $1, 16 ; N64-NEXT: daddiu $1, $1, %hi(struct_128xi16) ; N64-NEXT: dsll $1, $1, 16 ; N64-NEXT: daddiu $5, $1, %lo(struct_128xi16) ; N64-NEXT: jal memcpy ; N64-NEXT: daddiu $6, $zero, 256 ; N64-NEXT: move $2, $16 ; N64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload ; N64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload ; N64-NEXT: jr $ra ; N64-NEXT: daddiu $sp, $sp, 16 entry: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %returnval, ptr align 2 @struct_128xi16, i64 256, i1 false) ret void } ; Ensure that large structures (>128-bit) are returned indirectly. ; This will generate inlined memcpy's anyway so pick the smallest large ; structure ; This time we let the backend lower the sret argument. define {[6 x i32]} @ret_struct_6xi32() { ; O32-LABEL: ret_struct_6xi32: ; O32: # %bb.0: # %entry ; O32-NEXT: lui $1, %hi(struct_6xi32) ; O32-NEXT: lw $2, %lo(struct_6xi32)($1) ; O32-NEXT: addiu $1, $1, %lo(struct_6xi32) ; O32-NEXT: lw $3, 4($1) ; O32-NEXT: lw $5, 8($1) ; O32-NEXT: lw $6, 12($1) ; O32-NEXT: lw $7, 16($1) ; O32-NEXT: lw $1, 20($1) ; O32-NEXT: sw $1, 20($4) ; O32-NEXT: sw $7, 16($4) ; O32-NEXT: sw $6, 12($4) ; O32-NEXT: sw $5, 8($4) ; O32-NEXT: sw $3, 4($4) ; O32-NEXT: jr $ra ; O32-NEXT: sw $2, 0($4) ; ; N32-LABEL: ret_struct_6xi32: ; N32: # %bb.0: # %entry ; N32-NEXT: sll $1, $4, 0 ; N32-NEXT: lui $2, %hi(struct_6xi32) ; N32-NEXT: lw $3, %lo(struct_6xi32)($2) ; N32-NEXT: addiu $2, $2, %lo(struct_6xi32) ; N32-NEXT: lw $4, 4($2) ; N32-NEXT: lw $5, 8($2) ; N32-NEXT: lw $6, 12($2) ; N32-NEXT: lw $7, 16($2) ; N32-NEXT: lw $2, 20($2) ; N32-NEXT: sw $2, 20($1) ; N32-NEXT: sw $7, 16($1) ; N32-NEXT: sw $6, 12($1) ; N32-NEXT: sw $5, 8($1) ; N32-NEXT: sw $4, 4($1) ; N32-NEXT: jr $ra ; N32-NEXT: sw $3, 0($1) ; ; N64-LABEL: ret_struct_6xi32: ; N64: # %bb.0: # %entry ; N64-NEXT: lui $1, %highest(struct_6xi32) ; N64-NEXT: daddiu $1, $1, %higher(struct_6xi32) ; N64-NEXT: dsll $1, $1, 16 ; N64-NEXT: daddiu $1, $1, %hi(struct_6xi32) ; N64-NEXT: dsll $1, $1, 16 ; N64-NEXT: lw $2, %lo(struct_6xi32)($1) ; N64-NEXT: daddiu $1, $1, %lo(struct_6xi32) ; N64-NEXT: lw $3, 4($1) ; N64-NEXT: lw $5, 8($1) ; N64-NEXT: lw $6, 12($1) ; N64-NEXT: lw $7, 16($1) ; N64-NEXT: lw $1, 20($1) ; N64-NEXT: sw $1, 20($4) ; N64-NEXT: sw $7, 16($4) ; N64-NEXT: sw $6, 12($4) ; N64-NEXT: sw $5, 8($4) ; N64-NEXT: sw $3, 4($4) ; N64-NEXT: jr $ra ; N64-NEXT: sw $2, 0($4) entry: %0 = load volatile {[6 x i32]}, ptr @struct_6xi32, align 2 ret {[6 x i32]} %0 }