1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 4; RUN: FileCheck %s --check-prefix=CHECK-LE 5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 6; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 7; RUN: FileCheck %s --check-prefix=CHECK-BE 8; RUN: opt --passes=sroa,loop-vectorize,loop-unroll,instcombine -S \ 9; RUN: -vectorizer-maximize-bandwidth --mtriple=powerpc64le-- -mcpu=pwr10 < %s | \ 10; RUN: FileCheck %s --check-prefix=CHECK-OPT 11 12target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" 13 14define dso_local signext i32 @test_32byte_vector() nounwind { 15; CHECK-LE-LABEL: test_32byte_vector: 16; CHECK-LE: # %bb.0: # %entry 17; CHECK-LE-NEXT: mflr r0 18; CHECK-LE-NEXT: std r30, -16(r1) 19; CHECK-LE-NEXT: mr r30, r1 20; CHECK-LE-NEXT: std r0, 16(r1) 21; CHECK-LE-NEXT: clrldi r0, r1, 59 22; CHECK-LE-NEXT: subfic r0, r0, -96 23; CHECK-LE-NEXT: stdux r1, r1, r0 24; CHECK-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha 25; CHECK-LE-NEXT: addis r4, r2, .LCPI0_1@toc@ha 26; CHECK-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l 27; CHECK-LE-NEXT: addi r4, r4, .LCPI0_1@toc@l 28; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 29; CHECK-LE-NEXT: lxvd2x vs1, 0, r4 30; CHECK-LE-NEXT: addi r4, r1, 48 31; CHECK-LE-NEXT: addi r3, r1, 32 32; CHECK-LE-NEXT: stxvd2x vs0, 0, r4 33; CHECK-LE-NEXT: stxvd2x vs1, 0, r3 34; CHECK-LE-NEXT: bl test 35; CHECK-LE-NEXT: nop 36; CHECK-LE-NEXT: lwa r3, 32(r1) 37; CHECK-LE-NEXT: mr r1, r30 38; CHECK-LE-NEXT: ld r0, 16(r1) 39; CHECK-LE-NEXT: ld r30, -16(r1) 40; CHECK-LE-NEXT: mtlr r0 41; CHECK-LE-NEXT: blr 42; 43; CHECK-BE-LABEL: test_32byte_vector: 44; CHECK-BE: # %bb.0: # %entry 45; CHECK-BE-NEXT: mflr r0 46; CHECK-BE-NEXT: std r30, -16(r1) 47; CHECK-BE-NEXT: std r0, 16(r1) 48; CHECK-BE-NEXT: clrldi r0, r1, 59 49; CHECK-BE-NEXT: mr r30, r1 50; CHECK-BE-NEXT: subfic r0, r0, -192 51; CHECK-BE-NEXT: stdux r1, r1, r0 52; CHECK-BE-NEXT: lis r3, -8192 53; CHECK-BE-NEXT: li r4, 5 54; CHECK-BE-NEXT: lis r5, -16384 55; CHECK-BE-NEXT: lis r6, -32768 56; CHECK-BE-NEXT: ori r3, r3, 1 57; CHECK-BE-NEXT: rldic r4, r4, 32, 29 58; CHECK-BE-NEXT: ori r5, r5, 1 59; CHECK-BE-NEXT: ori r6, r6, 1 60; CHECK-BE-NEXT: rldic r3, r3, 3, 29 61; CHECK-BE-NEXT: ori r4, r4, 6 62; CHECK-BE-NEXT: rldic r5, r5, 2, 30 63; CHECK-BE-NEXT: rldic r6, r6, 1, 31 64; CHECK-BE-NEXT: std r3, 152(r1) 65; CHECK-BE-NEXT: addi r3, r1, 128 66; CHECK-BE-NEXT: std r4, 144(r1) 67; CHECK-BE-NEXT: std r5, 136(r1) 68; CHECK-BE-NEXT: std r6, 128(r1) 69; CHECK-BE-NEXT: bl test 70; CHECK-BE-NEXT: nop 71; CHECK-BE-NEXT: lwa r3, 128(r1) 72; CHECK-BE-NEXT: mr r1, r30 73; CHECK-BE-NEXT: ld r0, 16(r1) 74; CHECK-BE-NEXT: ld r30, -16(r1) 75; CHECK-BE-NEXT: mtlr r0 76; CHECK-BE-NEXT: blr 77entry: 78 %a = alloca <8 x i32>, align 32 79 %0 = bitcast <8 x i32>* %a to i8* 80 call void @llvm.lifetime.start.p0i8(i64 32, i8* %0) 81 store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* %a, align 32 82 call void @test(<8 x i32>* %a) 83 %1 = load <8 x i32>, <8 x i32>* %a, align 32 84 %vecext = extractelement <8 x i32> %1, i32 0 85 %2 = bitcast <8 x i32>* %a to i8* 86 call void @llvm.lifetime.end.p0i8(i64 32, i8* %2) 87 ret i32 %vecext 88} 89 90define dso_local signext i32 @test_32byte_aligned_vector() nounwind { 91; CHECK-LE-LABEL: test_32byte_aligned_vector: 92; CHECK-LE: # %bb.0: # %entry 93; CHECK-LE-NEXT: mflr r0 94; CHECK-LE-NEXT: std r30, -16(r1) 95; CHECK-LE-NEXT: mr r30, r1 96; CHECK-LE-NEXT: std r0, 16(r1) 97; CHECK-LE-NEXT: clrldi r0, r1, 59 98; CHECK-LE-NEXT: subfic r0, r0, -64 99; CHECK-LE-NEXT: stdux r1, r1, r0 100; CHECK-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha 101; CHECK-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l 102; CHECK-LE-NEXT: lxvd2x vs0, 0, r3 103; CHECK-LE-NEXT: addi r3, r1, 32 104; CHECK-LE-NEXT: stxvd2x vs0, 0, r3 105; CHECK-LE-NEXT: bl test1 106; CHECK-LE-NEXT: nop 107; CHECK-LE-NEXT: lwa r3, 32(r1) 108; CHECK-LE-NEXT: mr r1, r30 109; CHECK-LE-NEXT: ld r0, 16(r1) 110; CHECK-LE-NEXT: ld r30, -16(r1) 111; CHECK-LE-NEXT: mtlr r0 112; CHECK-LE-NEXT: blr 113; 114; CHECK-BE-LABEL: test_32byte_aligned_vector: 115; CHECK-BE: # %bb.0: # %entry 116; CHECK-BE-NEXT: mflr r0 117; CHECK-BE-NEXT: std r30, -16(r1) 118; CHECK-BE-NEXT: std r0, 16(r1) 119; CHECK-BE-NEXT: clrldi r0, r1, 59 120; CHECK-BE-NEXT: mr r30, r1 121; CHECK-BE-NEXT: subfic r0, r0, -160 122; CHECK-BE-NEXT: stdux r1, r1, r0 123; CHECK-BE-NEXT: lis r3, -16384 124; CHECK-BE-NEXT: lis r4, -32768 125; CHECK-BE-NEXT: ori r3, r3, 1 126; CHECK-BE-NEXT: ori r4, r4, 1 127; CHECK-BE-NEXT: rldic r3, r3, 2, 30 128; CHECK-BE-NEXT: rldic r4, r4, 1, 31 129; CHECK-BE-NEXT: std r3, 136(r1) 130; CHECK-BE-NEXT: addi r3, r1, 128 131; CHECK-BE-NEXT: std r4, 128(r1) 132; CHECK-BE-NEXT: bl test1 133; CHECK-BE-NEXT: nop 134; CHECK-BE-NEXT: lwa r3, 128(r1) 135; CHECK-BE-NEXT: mr r1, r30 136; CHECK-BE-NEXT: ld r0, 16(r1) 137; CHECK-BE-NEXT: ld r30, -16(r1) 138; CHECK-BE-NEXT: mtlr r0 139; CHECK-BE-NEXT: blr 140entry: 141 %a = alloca <4 x i32>, align 32 142 %0 = bitcast <4 x i32>* %a to i8* 143 call void @llvm.lifetime.start.p0i8(i64 16, i8* %0) 144 store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %a, align 32 145 call void @test1(<4 x i32>* %a) 146 %1 = load <4 x i32>, <4 x i32>* %a, align 32 147 %vecext = extractelement <4 x i32> %1, i32 0 148 %2 = bitcast <4 x i32>* %a to i8* 149 call void @llvm.lifetime.end.p0i8(i64 16, i8* %2) 150 ret i32 %vecext 151} 152 153 154@Arr1 = dso_local global [64 x i8] zeroinitializer, align 1 155 156define dso_local void @test_Array() nounwind { 157; CHECK-OPT-LABEL: @test_Array( 158; CHECK-OPT-NEXT: entry: 159; CHECK-OPT-NEXT: %Arr2 = alloca [64 x i16], align 2 160; CHECK-OPT: store <16 x i16> [[TMP0:%.*]], <16 x i16>* [[TMP0:%.*]], align 2 161; CHECK-LE-LABEL: test_Array: 162; CHECK-LE: # %bb.0: # %entry 163; CHECK-LE-NEXT: mflr r0 164; CHECK-LE-NEXT: std r0, 16(r1) 165; CHECK-LE-NEXT: stdu r1, -176(r1) 166; CHECK-LE-NEXT: addis r4, r2, Arr1@toc@ha 167; CHECK-LE-NEXT: li r3, 0 168; CHECK-LE-NEXT: li r6, 65 169; CHECK-LE-NEXT: addi r5, r1, 46 170; CHECK-LE-NEXT: addi r4, r4, Arr1@toc@l 171; CHECK-LE-NEXT: stw r3, 44(r1) 172; CHECK-LE-NEXT: addi r4, r4, -1 173; CHECK-LE-NEXT: mtctr r6 174; CHECK-LE-NEXT: bdz .LBB2_2 175; CHECK-LE-NEXT: .p2align 5 176; CHECK-LE-NEXT: .LBB2_1: # %for.body 177; CHECK-LE-NEXT: # 178; CHECK-LE-NEXT: lbz r6, 1(r4) 179; CHECK-LE-NEXT: addi r7, r5, 2 180; CHECK-LE-NEXT: addi r4, r4, 1 181; CHECK-LE-NEXT: addi r3, r3, 1 182; CHECK-LE-NEXT: sth r6, 2(r5) 183; CHECK-LE-NEXT: mr r5, r7 184; CHECK-LE-NEXT: bdnz .LBB2_1 185; CHECK-LE-NEXT: .LBB2_2: # %for.cond.cleanup 186; CHECK-LE-NEXT: addi r3, r1, 48 187; CHECK-LE-NEXT: bl test_arr 188; CHECK-LE-NEXT: nop 189; CHECK-LE-NEXT: addi r1, r1, 176 190; CHECK-LE-NEXT: ld r0, 16(r1) 191; CHECK-LE-NEXT: mtlr r0 192; CHECK-LE-NEXT: blr 193; 194; CHECK-BE-LABEL: test_Array: 195; CHECK-BE: # %bb.0: # %entry 196; CHECK-BE-NEXT: mflr r0 197; CHECK-BE-NEXT: std r0, 16(r1) 198; CHECK-BE-NEXT: stdu r1, -256(r1) 199; CHECK-BE-NEXT: addis r5, r2, Arr1@toc@ha 200; CHECK-BE-NEXT: li r3, 0 201; CHECK-BE-NEXT: addi r5, r5, Arr1@toc@l 202; CHECK-BE-NEXT: addi r4, r1, 126 203; CHECK-BE-NEXT: li r6, 65 204; CHECK-BE-NEXT: stw r3, 124(r1) 205; CHECK-BE-NEXT: addi r5, r5, -1 206; CHECK-BE-NEXT: mtctr r6 207; CHECK-BE-NEXT: bdz .LBB2_2 208; CHECK-BE-NEXT: .LBB2_1: # %for.body 209; CHECK-BE-NEXT: # 210; CHECK-BE-NEXT: lbz r6, 1(r5) 211; CHECK-BE-NEXT: addi r5, r5, 1 212; CHECK-BE-NEXT: addi r3, r3, 1 213; CHECK-BE-NEXT: sth r6, 2(r4) 214; CHECK-BE-NEXT: addi r4, r4, 2 215; CHECK-BE-NEXT: bdnz .LBB2_1 216; CHECK-BE-NEXT: .LBB2_2: # %for.cond.cleanup 217; CHECK-BE-NEXT: addi r3, r1, 128 218; CHECK-BE-NEXT: bl test_arr 219; CHECK-BE-NEXT: nop 220; CHECK-BE-NEXT: addi r1, r1, 256 221; CHECK-BE-NEXT: ld r0, 16(r1) 222; CHECK-BE-NEXT: mtlr r0 223; CHECK-BE-NEXT: blr 224entry: 225 %Arr2 = alloca [64 x i16], align 2 226 %i = alloca i32, align 4 227 %0 = bitcast [64 x i16]* %Arr2 to i8* 228 call void @llvm.lifetime.start.p0i8(i64 128, i8* %0) 229 %1 = bitcast i32* %i to i8* 230 call void @llvm.lifetime.start.p0i8(i64 4, i8* %1) 231 store i32 0, i32* %i, align 4 232 br label %for.cond 233 234for.cond: ; preds = %for.inc, %entry 235 %2 = load i32, i32* %i, align 4 236 %cmp = icmp slt i32 %2, 64 237 br i1 %cmp, label %for.body, label %for.cond.cleanup 238 239for.cond.cleanup: ; preds = %for.cond 240 %3 = bitcast i32* %i to i8* 241 call void @llvm.lifetime.end.p0i8(i64 4, i8* %3) 242 br label %for.end 243 244for.body: ; preds = %for.cond 245 %4 = load i32, i32* %i, align 4 246 %idxprom = sext i32 %4 to i64 247 %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @Arr1, i64 0, i64 %idxprom 248 %5 = load i8, i8* %arrayidx, align 1 249 %conv = zext i8 %5 to i16 250 %6 = load i32, i32* %i, align 4 251 %idxprom1 = sext i32 %6 to i64 252 %arrayidx2 = getelementptr inbounds [64 x i16], [64 x i16]* %Arr2, i64 0, i64 %idxprom1 253 store i16 %conv, i16* %arrayidx2, align 2 254 br label %for.inc 255 256for.inc: ; preds = %for.body 257 %7 = load i32, i32* %i, align 4 258 %inc = add nsw i32 %7, 1 259 store i32 %inc, i32* %i, align 4 260 br label %for.cond 261 262for.end: ; preds = %for.cond.cleanup 263 %arraydecay = getelementptr inbounds [64 x i16], [64 x i16]* %Arr2, i64 0, i64 0 264 call void @test_arr(i16* %arraydecay) 265 %8 = bitcast [64 x i16]* %Arr2 to i8* 266 call void @llvm.lifetime.end.p0i8(i64 128, i8* %8) 267 ret void 268} 269 270declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) nounwind 271 272declare void @test(<8 x i32>*) nounwind 273declare void @test1(<4 x i32>*) nounwind 274declare void @test_arr(i16*) 275 276declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) nounwind 277