1; RUN: llc < %s -stack-symbol-ordering=0 -mcpu=generic -mattr=+avx -mtriple=x86_64-apple-darwin10 | FileCheck %s 2; rdar://11496434 3 4; no VLAs or dynamic alignment 5define i32 @t1() nounwind uwtable ssp { 6entry: 7 %a = alloca i32, align 4 8 call void @t1_helper(i32* %a) nounwind 9 %0 = load i32, i32* %a, align 4 10 %add = add nsw i32 %0, 13 11 ret i32 %add 12 13; CHECK: _t1 14; CHECK-NOT: andq $-{{[0-9]+}}, %rsp 15; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi 16; CHECK: callq _t1_helper 17; CHECK: movl [[OFFSET]](%rsp), %eax 18; CHECK: addl $13, %eax 19} 20 21declare void @t1_helper(i32*) 22 23; dynamic realignment 24define i32 @t2() nounwind uwtable ssp { 25entry: 26 %a = alloca i32, align 4 27 %v = alloca <8 x float>, align 32 28 call void @t2_helper(i32* %a, <8 x float>* %v) nounwind 29 %0 = load i32, i32* %a, align 4 30 %add = add nsw i32 %0, 13 31 ret i32 %add 32 33; CHECK: _t2 34; CHECK: pushq %rbp 35; CHECK: movq %rsp, %rbp 36; CHECK: andq $-32, %rsp 37; CHECK: subq ${{[0-9]+}}, %rsp 38; 39; CHECK: leaq {{[0-9]*}}(%rsp), %rdi 40; CHECK: movq %rsp, %rsi 41; CHECK: callq _t2_helper 42; 43; CHECK: movq %rbp, %rsp 44; CHECK: popq %rbp 45} 46 47declare void @t2_helper(i32*, <8 x float>*) 48 49; VLAs 50define i32 @t3(i64 %sz) nounwind uwtable ssp { 51entry: 52 %a = alloca i32, align 4 53 %vla = alloca i32, i64 %sz, align 16 54 call void @t3_helper(i32* %a, i32* %vla) nounwind 55 %0 = load i32, i32* %a, align 4 56 %add = add nsw i32 %0, 13 57 ret i32 %add 58 59; CHECK: _t3 60; CHECK: pushq %rbp 61; CHECK: movq %rsp, %rbp 62; CHECK-NOT: andq $-{{[0-9]+}}, %rsp 63; CHECK: subq ${{[0-9]+}}, %rsp 64; 65; CHECK: movq %rbp, %rsp 66; CHECK: popq %rbp 67} 68 69declare void @t3_helper(i32*, i32*) 70 71; VLAs + Dynamic realignment 72define i32 @t4(i64 %sz) nounwind uwtable ssp { 73entry: 74 %a = alloca i32, align 4 75 %v = alloca <8 x float>, align 32 76 %vla = alloca i32, i64 %sz, align 16 77 call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind 78 %0 = load i32, i32* %a, align 4 79 %add = add nsw i32 %0, 13 80 ret i32 %add 81 82; CHECK: _t4 83; CHECK: pushq %rbp 84; CHECK: movq %rsp, %rbp 85; CHECK: pushq %rbx 86; CHECK: andq $-32, %rsp 87; CHECK: subq ${{[0-9]+}}, %rsp 88; CHECK: movq %rsp, %rbx 89; 90; CHECK: leaq {{[0-9]*}}(%rbx), %rdi 91; CHECK: movq %rbx, %rdx 92; CHECK: callq _t4_helper 93; 94; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp 95; CHECK: popq %rbx 96; CHECK: popq %rbp 97} 98 99declare void @t4_helper(i32*, i32*, <8 x float>*) 100 101; Spilling an AVX register shouldn't cause dynamic realignment 102define i32 @t5(float* nocapture %f) nounwind uwtable ssp { 103entry: 104 %a = alloca i32, align 4 105 %0 = bitcast float* %f to <8 x float>* 106 %1 = load <8 x float>, <8 x float>* %0, align 32 107 call void @t5_helper1(i32* %a) nounwind 108 call void @t5_helper2(<8 x float> %1) nounwind 109 %2 = load i32, i32* %a, align 4 110 %add = add nsw i32 %2, 13 111 ret i32 %add 112 113; CHECK: _t5 114; CHECK: subq ${{[0-9]+}}, %rsp 115; 116; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]] 117; CHECK: vmovups [[AVXREG]], (%rsp) 118; CHECK: leaq {{[0-9]+}}(%rsp), %rdi 119; CHECK: callq _t5_helper1 120; CHECK: vmovups (%rsp), %ymm0 121; CHECK: callq _t5_helper2 122; CHECK: movl {{[0-9]+}}(%rsp), %eax 123} 124 125declare void @t5_helper1(i32*) 126 127declare void @t5_helper2(<8 x float>) 128 129; VLAs + Dynamic realignment + Spill 130; FIXME: RA has already reserved RBX, so we can't do dynamic realignment. 131define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp { 132entry: 133; CHECK: _t6 134 %a = alloca i32, align 4 135 %0 = bitcast float* %f to <8 x float>* 136 %1 = load <8 x float>, <8 x float>* %0, align 32 137 %vla = alloca i32, i64 %sz, align 16 138 call void @t6_helper1(i32* %a, i32* %vla) nounwind 139 call void @t6_helper2(<8 x float> %1) nounwind 140 %2 = load i32, i32* %a, align 4 141 %add = add nsw i32 %2, 13 142 ret i32 %add 143} 144 145declare void @t6_helper1(i32*, i32*) 146 147declare void @t6_helper2(<8 x float>) 148 149; VLAs + Dynamic realignment + byval 150; The byval adjust the sp after the prolog, but if we're restoring the sp from 151; the base pointer we use the original adjustment. 152%struct.struct_t = type { [5 x i32] } 153 154define void @t7(i32 %size, %struct.struct_t* byval(%struct.struct_t) align 8 %arg1) nounwind uwtable { 155entry: 156 %x = alloca i32, align 32 157 store i32 0, i32* %x, align 32 158 %0 = zext i32 %size to i64 159 %vla = alloca i32, i64 %0, align 16 160 %1 = load i32, i32* %x, align 32 161 call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval(%struct.struct_t) align 8 %arg1) 162 ret void 163 164; CHECK: _t7 165; CHECK: pushq %rbp 166; CHECK: movq %rsp, %rbp 167; CHECK: pushq %rbx 168; CHECK: andq $-32, %rsp 169; CHECK: subq ${{[0-9]+}}, %rsp 170; CHECK: movq %rsp, %rbx 171 172; Stack adjustment for byval 173; CHECK: subq {{.*}}, %rsp 174; CHECK: callq _bar 175; CHECK-NOT: addq {{.*}}, %rsp 176; CHECK: leaq -8(%rbp), %rsp 177; CHECK: popq %rbx 178; CHECK: popq %rbp 179} 180 181declare i8* @llvm.stacksave() nounwind 182 183declare void @bar(i32, i32*, %struct.struct_t* byval(%struct.struct_t) align 8) 184 185declare void @llvm.stackrestore(i8*) nounwind 186