1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK 3 4; Exercise tail folding on RISCV w/scalable vectors. 5 6target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 7target triple = "riscv64" 8 9define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { 10; CHECK-LABEL: @vector_add( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 13; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] 14; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 15; CHECK: vector.ph: 16; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 17; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 18; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 19; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] 20; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] 21; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 22; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 23; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 24; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 25; CHECK: vector.body: 26; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 27; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 28; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[INDEX]], i32 0 29; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 30; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64() 31; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP6]] 32; CHECK-NEXT: [[VEC_IV:%.*]] = add <vscale x 1 x i64> [[BROADCAST_SPLAT]], [[TMP7]] 33; CHECK-NEXT: [[TMP8:%.*]] = icmp ule <vscale x 1 x i64> [[VEC_IV]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1023, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer) 34; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] 35; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 36; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP10]], i32 8, <vscale x 1 x i1> [[TMP8]], <vscale x 1 x i64> poison) 37; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 1 x i64> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT2]] 38; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[TMP11]], ptr [[TMP10]], i32 8, <vscale x 1 x i1> [[TMP8]]) 39; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() 40; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] 41; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 42; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 43; CHECK: middle.block: 44; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 45; CHECK: scalar.ph: 46; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 47; CHECK-NEXT: br label [[FOR_BODY:%.*]] 48; CHECK: for.body: 49; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 50; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 51; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 52; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 53; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 54; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 55; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 56; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 57; CHECK: for.end: 58; CHECK-NEXT: ret void 59; 60entry: 61 br label %for.body 62 63for.body: 64 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 65 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 66 %elem = load i64, ptr %arrayidx 67 %add = add i64 %elem, %v 68 store i64 %add, ptr %arrayidx 69 %iv.next = add nuw nsw i64 %iv, 1 70 %exitcond.not = icmp eq i64 %iv.next, 1024 71 br i1 %exitcond.not, label %for.end, label %for.body 72 73for.end: 74 ret void 75} 76 77 78; a[b[i]] = v, exercise scatter support 79define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 80; CHECK-LABEL: @indexed_store( 81; CHECK-NEXT: entry: 82; CHECK-NEXT: br label [[FOR_BODY:%.*]] 83; CHECK: for.body: 84; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 85; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 86; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 87; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 88; CHECK-NEXT: store i64 [[V:%.*]], ptr [[AADDR]], align 8 89; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 90; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 91; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 92; CHECK: for.end: 93; CHECK-NEXT: ret void 94; 95entry: 96 br label %for.body 97 98for.body: 99 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 100 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 101 %aidx = load i64, ptr %baddr 102 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 103 store i64 %v, ptr %aaddr 104 %iv.next = add nuw nsw i64 %iv, 1 105 %exitcond.not = icmp eq i64 %iv.next, 1024 106 br i1 %exitcond.not, label %for.end, label %for.body 107 108for.end: 109 ret void 110} 111 112define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 113; CHECK-LABEL: @indexed_load( 114; CHECK-NEXT: entry: 115; CHECK-NEXT: br label [[FOR_BODY:%.*]] 116; CHECK: for.body: 117; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 118; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 119; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 120; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 121; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 122; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 123; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 124; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 125; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 126; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 127; CHECK: for.end: 128; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ] 129; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 130; 131entry: 132 br label %for.body 133 134for.body: 135 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 136 %sum = phi i64 [0, %entry], [%sum.next, %for.body] 137 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 138 %aidx = load i64, ptr %baddr 139 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 140 %elem = load i64, ptr %aaddr 141 %iv.next = add nuw nsw i64 %iv, 1 142 %sum.next = add i64 %sum, %elem 143 %exitcond.not = icmp eq i64 %iv.next, 1024 144 br i1 %exitcond.not, label %for.end, label %for.body 145 146for.end: 147 ret i64 %sum.next 148} 149 150define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { 151; CHECK-LABEL: @splat_int( 152; CHECK-NEXT: entry: 153; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 154; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] 155; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 156; CHECK: vector.ph: 157; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 158; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() 159; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 160; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] 161; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] 162; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] 163; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 164; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 165; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 166; CHECK: vector.body: 167; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 168; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 169; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[INDEX]], i32 0 170; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 171; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64() 172; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP6]] 173; CHECK-NEXT: [[VEC_IV:%.*]] = add <vscale x 1 x i64> [[BROADCAST_SPLAT]], [[TMP7]] 174; CHECK-NEXT: [[TMP8:%.*]] = icmp ule <vscale x 1 x i64> [[VEC_IV]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1023, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer) 175; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] 176; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 177; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP10]], i32 8, <vscale x 1 x i1> [[TMP8]]) 178; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() 179; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]] 180; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 181; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 182; CHECK: middle.block: 183; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 184; CHECK: scalar.ph: 185; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 186; CHECK-NEXT: br label [[FOR_BODY:%.*]] 187; CHECK: for.body: 188; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 189; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 190; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 191; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 192; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 193; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 194; CHECK: for.end: 195; CHECK-NEXT: ret void 196; 197entry: 198 br label %for.body 199 200for.body: 201 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 202 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 203 store i64 %v, ptr %arrayidx 204 %iv.next = add nuw nsw i64 %iv, 1 205 %exitcond.not = icmp eq i64 %iv.next, 1024 206 br i1 %exitcond.not, label %for.end, label %for.body 207 208for.end: 209 ret void 210} 211 212define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 213; CHECK-LABEL: @uniform_store( 214; CHECK-NEXT: entry: 215; CHECK-NEXT: br label [[FOR_BODY:%.*]] 216; CHECK: for.body: 217; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 218; CHECK-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 219; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 220; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 221; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 222; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 223; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 224; CHECK: for.end: 225; CHECK-NEXT: ret void 226; 227entry: 228 br label %for.body 229 230for.body: 231 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 232 store i64 %v, ptr %b, align 8 233 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 234 store i64 %v, ptr %arrayidx 235 %iv.next = add nuw nsw i64 %iv, 1 236 %exitcond.not = icmp eq i64 %iv.next, 1024 237 br i1 %exitcond.not, label %for.end, label %for.body 238 239for.end: 240 ret void 241} 242 243define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 244; CHECK-LABEL: @uniform_load( 245; CHECK-NEXT: entry: 246; CHECK-NEXT: br label [[FOR_BODY:%.*]] 247; CHECK: for.body: 248; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 249; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 250; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 251; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 252; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 253; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 254; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 255; CHECK: for.end: 256; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] 257; CHECK-NEXT: ret i64 [[V_LCSSA]] 258; 259entry: 260 br label %for.body 261 262for.body: 263 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 264 %v = load i64, ptr %b, align 8 265 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 266 store i64 %v, ptr %arrayidx 267 %iv.next = add nuw nsw i64 %iv, 1 268 %exitcond.not = icmp eq i64 %iv.next, 1024 269 br i1 %exitcond.not, label %for.end, label %for.body 270 271for.end: 272 ret i64 %v 273} 274