1; Test the loop alignment. 2; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC 3; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 4; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 5; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 6; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR 7 8; Test the loop alignment and the option -disable-ppc-innermost-loop-align32. 9; RUN: llc -verify-machineinstrs -mcpu=a2 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 10; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 11; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 12; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 13; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32 14 15 16%struct.parm = type { i32*, i32, i32 } 17 18; Test the loop alignment when the innermost hot loop has more than 8 instructions. 19define void @big_loop(%struct.parm* %arg) { 20entry: 21 %localArg.sroa.0.0..sroa_idx = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 0 22 %localArg.sroa.0.0.copyload = load i32*, i32** %localArg.sroa.0.0..sroa_idx, align 8 23 %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 1 24 %localArg.sroa.4.0.copyload = load i32, i32* %localArg.sroa.4.0..sroa_idx56, align 8 25 %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 2 26 %localArg.sroa.5.0.copyload = load i32, i32* %localArg.sroa.5.0..sroa_idx58, align 4 27 %0 = sext i32 %localArg.sroa.5.0.copyload to i64 28 br label %do.body 29 30do.body: ; preds = %do.end, %entry 31 %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ] 32 br label %do.body3 33 34do.body3: ; preds = %do.body3, %do.body 35 %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ] 36 %1 = add nsw i64 %indvars.iv, 2 37 %arrayidx = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %1 38 %2 = add nsw i64 %indvars.iv, 3 39 %3 = trunc i64 %1 to i32 40 %4 = add nsw i64 %indvars.iv, 4 41 %arrayidx10 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %2 42 %5 = trunc i64 %2 to i32 43 store i32 %5, i32* %arrayidx10, align 4 44 %arrayidx12 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %4 45 %6 = trunc i64 %4 to i32 46 store i32 %6, i32* %arrayidx12, align 4 47 store i32 %3, i32* %arrayidx, align 4 48 %arrayidx21 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %indvars.iv 49 %7 = trunc i64 %indvars.iv to i32 50 %8 = add i32 %7, 1 51 store i32 %8, i32* %arrayidx21, align 4 52 %indvars.iv.next = add nsw i64 %indvars.iv, -1 53 %9 = icmp eq i64 %indvars.iv, 0 54 br i1 %9, label %do.end, label %do.body3 55 56do.end: ; preds = %do.body3 57 %dec24 = add nsw i32 %m.0, -1 58 %tobool25 = icmp eq i32 %m.0, 0 59 br i1 %tobool25, label %do.end26, label %do.body 60 61do.end26: ; preds = %do.end 62 %arrayidx28 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %0 63 store i32 0, i32* %arrayidx28, align 4 64 ret void 65 66 67; CHECK-LABEL: @big_loop 68; CHECK: mtctr 69; GENERIC: .p2align 4 70; PWR: .p2align 5 71; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 72; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 73; CHECK: bdnz 74} 75 76; Test the loop alignment when the innermost hot loop has 5-8 instructions. 77define void @general_loop(i32* %s, i64 %m) { 78entry: 79 %tobool40 = icmp eq i64 %m, 0 80 br i1 %tobool40, label %while.end18, label %while.body3.lr.ph 81 82while.cond.loopexit: ; preds = %while.body3 83 %tobool = icmp eq i64 %dec, 0 84 br i1 %tobool, label %while.end18, label %while.body3.lr.ph 85 86while.body3.lr.ph: ; preds = %entry, %while.cond.loopexit 87 %m.addr.041 = phi i64 [ %dec, %while.cond.loopexit ], [ %m, %entry ] 88 %dec = add nsw i64 %m.addr.041, -1 89 %conv = trunc i64 %m.addr.041 to i32 90 %conv11 = trunc i64 %dec to i32 91 br label %while.body3 92 93while.body3: ; preds = %while.body3.lr.ph, %while.body3 94 %n.039 = phi i64 [ %m.addr.041, %while.body3.lr.ph ], [ %dec16, %while.body3 ] 95 %inc = add nsw i64 %n.039, 1 96 %arrayidx = getelementptr inbounds i32, i32* %s, i64 %n.039 97 %inc5 = add nsw i64 %n.039, 2 98 %arrayidx6 = getelementptr inbounds i32, i32* %s, i64 %inc 99 %sub = sub nsw i64 %dec, %inc5 100 %conv7 = trunc i64 %sub to i32 101 %arrayidx9 = getelementptr inbounds i32, i32* %s, i64 %inc5 102 store i32 %conv7, i32* %arrayidx9, align 4 103 store i32 %conv11, i32* %arrayidx6, align 4 104 store i32 %conv, i32* %arrayidx, align 4 105 %dec16 = add nsw i64 %n.039, -1 106 %tobool2 = icmp eq i64 %dec16, 0 107 br i1 %tobool2, label %while.cond.loopexit, label %while.body3 108 109while.end18: ; preds = %while.cond.loopexit, %entry 110 ret void 111 112 113; CHECK-LABEL: @general_loop 114; CHECK: mtctr 115; GENERIC: .p2align 4 116; PWR: .p2align 5 117; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 118; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 5 119; CHECK: bdnz 120} 121 122; Test the small loop alignment when the innermost hot loop has less than 4 instructions. 123define void @small_loop(i64 %m) { 124entry: 125 br label %do.body 126 127do.body: ; preds = %do.end, %entry 128 %m.addr.0 = phi i64 [ %m, %entry ], [ %1, %do.end ] 129 br label %do.body1 130 131do.body1: ; preds = %do.body1, %do.body 132 %n.0 = phi i64 [ %m.addr.0, %do.body ], [ %0, %do.body1 ] 133 %0 = tail call i64 asm "subi $0,$0,1", "=r,0"(i64 %n.0) 134 %tobool = icmp eq i64 %0, 0 135 br i1 %tobool, label %do.end, label %do.body1 136 137do.end: ; preds = %do.body1 138 %1 = tail call i64 asm "subi $1,$1,1", "=r,0"(i64 %m.addr.0) 139 %tobool3 = icmp eq i64 %1, 0 140 br i1 %tobool3, label %do.end4, label %do.body 141 142do.end4: ; preds = %do.end 143 ret void 144 145 146; CHECK-LABEL: @small_loop 147; CHECK: mr 148; GENERIC: .p2align 4 149; PWR: .p2align 5 150; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 151; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align 4 152; CHECK: bne 153} 154 155; Test the loop alignment when the innermost cold loop has more than 8 instructions. 156define void @big_loop_cold_innerloop(%struct.parm* %arg) { 157entry: 158 %localArg.sroa.0.0..sroa_idx = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 0 159 %localArg.sroa.0.0.copyload = load i32*, i32** %localArg.sroa.0.0..sroa_idx, align 8 160 %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 1 161 %localArg.sroa.4.0.copyload = load i32, i32* %localArg.sroa.4.0..sroa_idx56, align 8 162 %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 2 163 %localArg.sroa.5.0.copyload = load i32, i32* %localArg.sroa.5.0..sroa_idx58, align 4 164 %0 = sext i32 %localArg.sroa.5.0.copyload to i64 165 br label %do.body 166 167do.body: ; preds = %do.end, %entry 168 %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ] 169 br label %do.body3 170 171do.body3: ; preds = %do.body3, %do.body 172 %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ] 173 %1 = add nsw i64 %indvars.iv, 2 174 %arrayidx = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %1 175 %2 = add nsw i64 %indvars.iv, 3 176 %3 = trunc i64 %1 to i32 177 %4 = add nsw i64 %indvars.iv, 4 178 %arrayidx10 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %2 179 %5 = trunc i64 %2 to i32 180 store i32 %5, i32* %arrayidx10, align 4 181 %arrayidx12 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %4 182 %6 = trunc i64 %4 to i32 183 store i32 %6, i32* %arrayidx12, align 4 184 store i32 %3, i32* %arrayidx, align 4 185 %arrayidx21 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %indvars.iv 186 %7 = trunc i64 %indvars.iv to i32 187 %8 = add i32 %7, 1 188 store i32 %8, i32* %arrayidx21, align 4 189 %indvars.iv.next = add nsw i64 %indvars.iv, -1 190 %9 = icmp eq i64 %indvars.iv, 0 191 br i1 %9, label %do.end, label %do.body3 192 193do.end: ; preds = %do.body3 194 %dec24 = add nsw i32 %m.0, -1 195 %tobool25 = icmp eq i32 %m.0, 0 196 br i1 %tobool25, label %do.end26, label %do.body 197 198do.end26: ; preds = %do.end 199 %arrayidx28 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %0 200 store i32 0, i32* %arrayidx28, align 4 201 ret void 202 203 204; CHECK-LABEL: @big_loop_cold_innerloop 205; CHECK: mtctr 206; PWR: .p2align 5 207; CHECK-NOT: .p2align 5 208; CHECK: bdnz 209} 210