1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5 6; Check vectorization on an interleaved load group of factor 2 and an interleaved 7; store group of factor 2. 8 9; int AB[1024]; 10; int CD[1024]; 11; void test_array_load2_store2(int C, int D) { 12; for (int i = 0; i < 1024; i+=2) { 13; int A = AB[i]; 14; int B = AB[i+1]; 15; CD[i] = A + C; 16; CD[i+1] = B * D; 17; } 18; } 19 20 21@AB = common global [1024 x i32] zeroinitializer, align 4 22@CD = common global [1024 x i32] zeroinitializer, align 4 23 24define void @test_array_load2_store2(i32 %C, i32 %D) { 25; CHECK-LABEL: @test_array_load2_store2( 26; CHECK-NEXT: entry: 27; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 28; CHECK: vector.ph: 29; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 30; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 31; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[D:%.*]], i32 0 32; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer 33; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 34; CHECK: vector.body: 35; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 36; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 37; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 [[OFFSET_IDX]] 38; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <8 x i32>* 39; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 40; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 41; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 42; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[OFFSET_IDX]], 1 43; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] 44; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[STRIDED_VEC1]], [[BROADCAST_SPLAT3]] 45; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 [[TMP2]] 46; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 -1 47; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* 48; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 49; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP7]], align 4 50; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 51; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 52; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 53; CHECK: middle.block: 54; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 55; CHECK: scalar.ph: 56; CHECK-NEXT: br label [[FOR_BODY:%.*]] 57; CHECK: for.body: 58; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] 59; CHECK: for.end: 60; CHECK-NEXT: ret void 61; 62entry: 63 br label %for.body 64 65for.body: ; preds = %for.body, %entry 66 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 67 %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv 68 %tmp = load i32, i32* %arrayidx0, align 4 69 %tmp1 = or i64 %indvars.iv, 1 70 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1 71 %tmp2 = load i32, i32* %arrayidx1, align 4 72 %add = add nsw i32 %tmp, %C 73 %mul = mul nsw i32 %tmp2, %D 74 %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv 75 store i32 %add, i32* %arrayidx2, align 4 76 %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1 77 store i32 %mul, i32* %arrayidx3, align 4 78 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 79 %cmp = icmp slt i64 %indvars.iv.next, 1024 80 br i1 %cmp, label %for.body, label %for.end 81 82for.end: ; preds = %for.body 83 ret void 84} 85 86; int A[3072]; 87; struct ST S[1024]; 88; void test_struct_st3() { 89; int *ptr = A; 90; for (int i = 0; i < 1024; i++) { 91; int X1 = *ptr++; 92; int X2 = *ptr++; 93; int X3 = *ptr++; 94; T[i].x = X1 + 1; 95; T[i].y = X2 + 2; 96; T[i].z = X3 + 3; 97; } 98; } 99 100 101%struct.ST3 = type { i32, i32, i32 } 102@A = common global [3072 x i32] zeroinitializer, align 4 103@S = common global [1024 x %struct.ST3] zeroinitializer, align 4 104 105define void @test_struct_array_load3_store3() { 106; CHECK-LABEL: @test_struct_array_load3_store3( 107; CHECK-NEXT: entry: 108; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 109; CHECK: vector.ph: 110; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 111; CHECK: vector.body: 112; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 113; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 114; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr [3072 x i32], [3072 x i32]* @A, i64 0, i64 [[TMP0]] 115; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* 116; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4 117; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 118; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 119; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 120; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1> 121; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], <i32 2, i32 2, i32 2, i32 2> 122; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], <i32 3, i32 3, i32 3, i32 3> 123; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 [[INDEX]], i32 2 124; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 -2 125; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* 126; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 127; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 128; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 129; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[TMP7]], align 4 130; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 131; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 132; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 133; CHECK: middle.block: 134; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 135; CHECK: scalar.ph: 136; CHECK-NEXT: br label [[FOR_BODY:%.*]] 137; CHECK: for.body: 138; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 139; CHECK: for.end: 140; CHECK-NEXT: ret void 141; 142entry: 143 br label %for.body 144 145for.body: ; preds = %for.body, %entry 146 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 147 %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ] 148 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1 149 %tmp = load i32, i32* %ptr.016, align 4 150 %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2 151 %tmp1 = load i32, i32* %incdec.ptr, align 4 152 %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3 153 %tmp2 = load i32, i32* %incdec.ptr1, align 4 154 %add = add nsw i32 %tmp, 1 155 %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0 156 store i32 %add, i32* %x, align 4 157 %add3 = add nsw i32 %tmp1, 2 158 %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1 159 store i32 %add3, i32* %y, align 4 160 %add6 = add nsw i32 %tmp2, 3 161 %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2 162 store i32 %add6, i32* %z, align 4 163 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 164 %exitcond = icmp eq i64 %indvars.iv.next, 1024 165 br i1 %exitcond, label %for.end, label %for.body 166 167for.end: ; preds = %for.body 168 ret void 169} 170 171; Check vectorization on an interleaved load group of factor 4. 172 173; struct ST4{ 174; int x; 175; int y; 176; int z; 177; int w; 178; }; 179; int test_struct_load4(struct ST4 *S) { 180; int r = 0; 181; for (int i = 0; i < 1024; i++) { 182; r += S[i].x; 183; r -= S[i].y; 184; r += S[i].z; 185; r -= S[i].w; 186; } 187; return r; 188; } 189 190%struct.ST4 = type { i32, i32, i32, i32 } 191 192define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) { 193; 194; CHECK-LABEL: @test_struct_load4( 195; CHECK-NEXT: entry: 196; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 197; CHECK: vector.ph: 198; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 199; CHECK: vector.body: 200; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 201; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 202; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.ST4* [[S:%.*]], i64 [[INDEX]], i32 0 203; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* 204; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 4 205; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 206; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 207; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 208; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 209; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI]] 210; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[STRIDED_VEC2]] 211; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC3]] 212; CHECK-NEXT: [[TMP5]] = sub <4 x i32> [[TMP3]], [[TMP4]] 213; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 214; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 215; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 216; CHECK: middle.block: 217; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) 218; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 219; CHECK: scalar.ph: 220; CHECK-NEXT: br label [[FOR_BODY:%.*]] 221; CHECK: for.body: 222; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 223; CHECK: for.end: 224; CHECK-NEXT: [[SUB8_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 225; CHECK-NEXT: ret i32 [[SUB8_LCSSA]] 226; 227entry: 228 br label %for.body 229 230for.body: ; preds = %for.body, %entry 231 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 232 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ] 233 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0 234 %tmp = load i32, i32* %x, align 4 235 %add = add nsw i32 %tmp, %r.022 236 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1 237 %tmp1 = load i32, i32* %y, align 4 238 %sub = sub i32 %add, %tmp1 239 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2 240 %tmp2 = load i32, i32* %z, align 4 241 %add5 = add nsw i32 %sub, %tmp2 242 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3 243 %tmp3 = load i32, i32* %w, align 4 244 %sub8 = sub i32 %add5, %tmp3 245 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 246 %exitcond = icmp eq i64 %indvars.iv.next, 1024 247 br i1 %exitcond, label %for.end, label %for.body 248 249for.end: ; preds = %for.body 250 ret i32 %sub8 251} 252 253; Check vectorization on an interleaved store group of factor 4. 254 255; void test_struct_store4(int *A, struct ST4 *B) { 256; int *ptr = A; 257; for (int i = 0; i < 1024; i++) { 258; int X = *ptr++; 259; B[i].x = X + 1; 260; B[i].y = X * 2; 261; B[i].z = X + 3; 262; B[i].w = X + 4; 263; } 264; } 265 266 267define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) { 268; CHECK-LABEL: @test_struct_store4( 269; CHECK-NEXT: entry: 270; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 271; CHECK: vector.ph: 272; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 273; CHECK: vector.body: 274; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 275; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] 276; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* 277; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 278; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1> 279; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1> 280; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 3, i32 3, i32 3, i32 3> 281; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 4, i32 4, i32 4, i32 4> 282; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], %struct.ST4* [[B:%.*]], i64 [[INDEX]], i32 3 283; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 -3 284; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>* 285; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 286; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 287; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 288; CHECK-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], <16 x i32>* [[TMP7]], align 4 289; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 290; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 291; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 292; CHECK: middle.block: 293; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 294; CHECK: scalar.ph: 295; CHECK-NEXT: br label [[FOR_BODY:%.*]] 296; CHECK: for.cond.cleanup: 297; CHECK-NEXT: ret void 298; CHECK: for.body: 299; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 300; 301entry: 302 br label %for.body 303 304for.cond.cleanup: ; preds = %for.body 305 ret void 306 307for.body: ; preds = %for.body, %entry 308 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 309 %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ] 310 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1 311 %tmp = load i32, i32* %ptr.024, align 4 312 %add = add nsw i32 %tmp, 1 313 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0 314 store i32 %add, i32* %x, align 4 315 %mul = shl nsw i32 %tmp, 1 316 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1 317 store i32 %mul, i32* %y, align 4 318 %add3 = add nsw i32 %tmp, 3 319 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2 320 store i32 %add3, i32* %z, align 4 321 %add6 = add nsw i32 %tmp, 4 322 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3 323 store i32 %add6, i32* %w, align 4 324 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 325 %exitcond = icmp eq i64 %indvars.iv.next, 1024 326 br i1 %exitcond, label %for.cond.cleanup, label %for.body 327} 328 329; Check vectorization on a reverse interleaved load group of factor 2 and 330; a reverse interleaved store group of factor 2. 331 332; struct ST2 { 333; int x; 334; int y; 335; }; 336; 337; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) { 338; for (int i = 1023; i >= 0; i--) { 339; int a = A[i].x + i; // interleaved load of index 0 340; int b = A[i].y - i; // interleaved load of index 1 341; B[i].x = a; // interleaved store of index 0 342; B[i].y = b; // interleaved store of index 1 343; } 344; } 345 346 347%struct.ST2 = type { i32, i32 } 348 349define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) { 350; CHECK-LABEL: @test_reversed_load2_store2( 351; CHECK-NEXT: entry: 352; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 353; CHECK: vector.ph: 354; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 355; CHECK: vector.body: 356; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 357; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 1023, i32 1022, i32 1021, i32 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] 358; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] 359; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.ST2* [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 360; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 -6 361; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <8 x i32>* 362; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4 363; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 364; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 365; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 366; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 367; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND3]] 368; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND3]] 369; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ST2]], %struct.ST2* [[B:%.*]], i64 [[OFFSET_IDX]], i32 1 370; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i64 -7 371; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* 372; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 373; CHECK-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 374; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE5]], <4 x i32> [[REVERSE6]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 375; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP7]], align 4 376; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 377; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], <i32 -4, i32 -4, i32 -4, i32 -4> 378; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 379; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 380; CHECK: middle.block: 381; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 382; CHECK: scalar.ph: 383; CHECK-NEXT: br label [[FOR_BODY:%.*]] 384; CHECK: for.cond.cleanup: 385; CHECK-NEXT: ret void 386; CHECK: for.body: 387; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] 388; 389entry: 390 br label %for.body 391 392for.cond.cleanup: ; preds = %for.body 393 ret void 394 395for.body: ; preds = %for.body, %entry 396 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ] 397 %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0 398 %tmp = load i32, i32* %x, align 4 399 %tmp1 = trunc i64 %indvars.iv to i32 400 %add = add nsw i32 %tmp, %tmp1 401 %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1 402 %tmp2 = load i32, i32* %y, align 4 403 %sub = sub nsw i32 %tmp2, %tmp1 404 %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0 405 store i32 %add, i32* %x5, align 4 406 %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1 407 store i32 %sub, i32* %y8, align 4 408 %indvars.iv.next = add nsw i64 %indvars.iv, -1 409 %cmp = icmp sgt i64 %indvars.iv, 0 410 br i1 %cmp, label %for.body, label %for.cond.cleanup 411} 412 413; Check vectorization on an interleaved load group of factor 2 with 1 gap 414; (missing the load of odd elements). Because the vectorized loop would 415; speculatively access memory out-of-bounds, we must execute at least one 416; iteration of the scalar loop. 417 418; void even_load_static_tc(int *A, int *B) { 419; for (unsigned i = 0; i < 1024; i+=2) 420; B[i/2] = A[i] * 2; 421; } 422 423 424define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { 425; CHECK-LABEL: @even_load_static_tc( 426; CHECK-NEXT: entry: 427; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 428; CHECK: vector.ph: 429; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 430; CHECK: vector.body: 431; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 432; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 433; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] 434; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <8 x i32>* 435; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 436; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 437; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1> 438; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[INDEX]], 9223372036854775804 439; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]] 440; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* 441; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP5]], align 4 442; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 443; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 508 444; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 445; CHECK: middle.block: 446; CHECK-NEXT: br label [[SCALAR_PH]] 447; CHECK: scalar.ph: 448; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1016, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 449; CHECK-NEXT: br label [[FOR_BODY:%.*]] 450; CHECK: for.cond.cleanup: 451; CHECK-NEXT: ret void 452; CHECK: for.body: 453; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 454; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 455; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 456; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP]], 1 457; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 458; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] 459; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4 460; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 461; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV]], 1022 462; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP13:![0-9]+]] 463; 464entry: 465 br label %for.body 466 467for.cond.cleanup: ; preds = %for.body 468 ret void 469 470for.body: ; preds = %for.body, %entry 471 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 472 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 473 %tmp = load i32, i32* %arrayidx, align 4 474 %mul = shl nsw i32 %tmp, 1 475 %tmp1 = lshr exact i64 %indvars.iv, 1 476 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1 477 store i32 %mul, i32* %arrayidx2, align 4 478 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 479 %cmp = icmp ult i64 %indvars.iv.next, 1024 480 br i1 %cmp, label %for.body, label %for.cond.cleanup 481} 482 483; Check vectorization on an interleaved load group of factor 2 with 1 gap 484; (missing the load of odd elements). Because the vectorized loop would 485; speculatively access memory out-of-bounds, we must execute at least one 486; iteration of the scalar loop. 487 488; void even_load_dynamic_tc(int *A, int *B, unsigned N) { 489; for (unsigned i = 0; i < N; i+=2) 490; B[i/2] = A[i] * 2; 491; } 492 493 494define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i64 %N) { 495; CHECK-LABEL: @even_load_dynamic_tc( 496; CHECK-NEXT: entry: 497; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 2) 498; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 499; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 500; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 501; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 502; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 503; CHECK: vector.ph: 504; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3 505; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 506; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]] 507; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] 508; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 509; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 510; CHECK: vector.body: 511; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 512; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 513; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] 514; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* 515; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 4 516; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 517; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1> 518; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[INDEX]], 9223372036854775804 519; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP8]] 520; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* 521; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4 522; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 523; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 524; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 525; CHECK: middle.block: 526; CHECK-NEXT: br label [[SCALAR_PH]] 527; CHECK: scalar.ph: 528; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 529; CHECK-NEXT: br label [[FOR_BODY:%.*]] 530; CHECK: for.cond.cleanup: 531; CHECK-NEXT: ret void 532; CHECK: for.body: 533; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 534; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 535; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 536; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP]], 1 537; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 538; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] 539; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4 540; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 541; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]] 542; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 543; 544entry: 545 br label %for.body 546 547for.cond.cleanup: ; preds = %for.body 548 ret void 549 550for.body: ; preds = %for.body, %entry 551 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 552 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 553 %tmp = load i32, i32* %arrayidx, align 4 554 %mul = shl nsw i32 %tmp, 1 555 %tmp1 = lshr exact i64 %indvars.iv, 1 556 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1 557 store i32 %mul, i32* %arrayidx2, align 4 558 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 559 %cmp = icmp ult i64 %indvars.iv.next, %N 560 br i1 %cmp, label %for.body, label %for.cond.cleanup 561} 562 563; Check vectorization on a reverse interleaved load group of factor 2 with 1 564; gap and a reverse interleaved store group of factor 2. The interleaved load 565; group should be removed since it has a gap and is reverse. 566 567; struct pair { 568; int x; 569; int y; 570; }; 571; 572; void load_gap_reverse(struct pair *P1, struct pair *P2, int X) { 573; for (int i = 1023; i >= 0; i--) { 574; int a = X + i; 575; int b = A[i].y - i; 576; B[i].x = a; 577; B[i].y = b; 578; } 579; } 580 581 582%pair = type { i64, i64 } 583define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) { 584; CHECK-LABEL: @load_gap_reverse( 585; CHECK-NEXT: entry: 586; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 587; CHECK: vector.ph: 588; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X:%.*]], i32 0 589; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 590; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 591; CHECK: vector.body: 592; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 593; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1023, i64 1022, i64 1021, i64 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 594; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] 595; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1022, [[INDEX]] 596; CHECK-NEXT: [[TMP1:%.*]] = sub i64 1021, [[INDEX]] 597; CHECK-NEXT: [[TMP2:%.*]] = sub i64 1020, [[INDEX]] 598; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], [[VEC_IND]] 599; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P1:%.*]], i64 [[OFFSET_IDX]], i32 0 600; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P1]], i64 [[TMP0]], i32 0 601; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P1]], i64 [[TMP1]], i32 0 602; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P1]], i64 [[TMP2]], i32 0 603; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P2:%.*]], i64 [[OFFSET_IDX]], i32 1 604; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P2]], i64 [[TMP0]], i32 1 605; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P2]], i64 [[TMP1]], i32 1 606; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P2]], i64 [[TMP2]], i32 1 607; CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP8]], align 8 608; CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP9]], align 8 609; CHECK-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP10]], align 8 610; CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP11]], align 8 611; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0 612; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 613; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 614; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 615; CHECK-NEXT: [[TMP20:%.*]] = sub nsw <4 x i64> [[TMP19]], [[VEC_IND]] 616; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 617; CHECK-NEXT: store i64 [[TMP21]], i64* [[TMP4]], align 8 618; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 619; CHECK-NEXT: store i64 [[TMP22]], i64* [[TMP5]], align 8 620; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 621; CHECK-NEXT: store i64 [[TMP23]], i64* [[TMP6]], align 8 622; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 623; CHECK-NEXT: store i64 [[TMP24]], i64* [[TMP7]], align 8 624; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0 625; CHECK-NEXT: store i64 [[TMP25]], i64* [[TMP8]], align 8 626; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1 627; CHECK-NEXT: store i64 [[TMP26]], i64* [[TMP9]], align 8 628; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2 629; CHECK-NEXT: store i64 [[TMP27]], i64* [[TMP10]], align 8 630; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3 631; CHECK-NEXT: store i64 [[TMP28]], i64* [[TMP11]], align 8 632; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 633; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4> 634; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 635; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 636; CHECK: middle.block: 637; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 638; CHECK: scalar.ph: 639; CHECK-NEXT: br label [[FOR_BODY:%.*]] 640; CHECK: for.body: 641; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]] 642; CHECK: for.exit: 643; CHECK-NEXT: ret void 644; 645entry: 646 br label %for.body 647 648for.body: 649 %i = phi i64 [ 1023, %entry ], [ %i.next, %for.body ] 650 %0 = add nsw i64 %X, %i 651 %1 = getelementptr inbounds %pair, %pair* %P1, i64 %i, i32 0 652 %2 = getelementptr inbounds %pair, %pair* %P2, i64 %i, i32 1 653 %3 = load i64, i64* %2, align 8 654 %4 = sub nsw i64 %3, %i 655 store i64 %0, i64* %1, align 8 656 store i64 %4, i64* %2, align 8 657 %i.next = add nsw i64 %i, -1 658 %cond = icmp sgt i64 %i, 0 659 br i1 %cond, label %for.body, label %for.exit 660 661for.exit: 662 ret void 663} 664 665; Check vectorization on interleaved access groups identified from mixed 666; loads/stores. 667; void mixed_load2_store2(int *A, int *B) { 668; for (unsigned i = 0; i < 1024; i+=2) { 669; B[i] = A[i] * A[i+1]; 670; B[i+1] = A[i] + A[i+1]; 671; } 672; } 673 674 675define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { 676; CHECK-LABEL: @mixed_load2_store2( 677; CHECK-NEXT: entry: 678; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 679; CHECK: vector.ph: 680; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 681; CHECK: vector.body: 682; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 683; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 684; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] 685; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <8 x i32>* 686; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 687; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 688; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 689; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]] 690; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 691; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 692; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]] 693; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[OFFSET_IDX]] 694; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>* 695; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 696; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP5]], align 4 697; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 698; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 699; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 700; CHECK: middle.block: 701; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 702; CHECK: scalar.ph: 703; CHECK-NEXT: br label [[FOR_BODY:%.*]] 704; CHECK: for.cond.cleanup: 705; CHECK-NEXT: ret void 706; CHECK: for.body: 707; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP19:![0-9]+]] 708; 709entry: 710 br label %for.body 711 712for.cond.cleanup: ; preds = %for.body 713 ret void 714 715for.body: ; preds = %for.body, %entry 716 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 717 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 718 %tmp = load i32, i32* %arrayidx, align 4 719 %tmp1 = or i64 %indvars.iv, 1 720 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1 721 %tmp2 = load i32, i32* %arrayidx2, align 4 722 %mul = mul nsw i32 %tmp2, %tmp 723 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 724 store i32 %mul, i32* %arrayidx4, align 4 725 %tmp3 = load i32, i32* %arrayidx, align 4 726 %tmp4 = load i32, i32* %arrayidx2, align 4 727 %add10 = add nsw i32 %tmp4, %tmp3 728 %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1 729 store i32 %add10, i32* %arrayidx13, align 4 730 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 731 %cmp = icmp ult i64 %indvars.iv.next, 1024 732 br i1 %cmp, label %for.body, label %for.cond.cleanup 733} 734 735; Check vectorization on interleaved access groups identified from mixed 736; loads/stores. 737; void mixed_load3_store3(int *A) { 738; for (unsigned i = 0; i < 1024; i++) { 739; *A++ += i; 740; *A++ += i; 741; *A++ += i; 742; } 743; } 744 745 746define void @mixed_load3_store3(i32* nocapture %A) { 747; CHECK-LABEL: @mixed_load3_store3( 748; CHECK-NEXT: entry: 749; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 750; CHECK: vector.ph: 751; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 752; CHECK: vector.body: 753; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 754; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 755; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 756; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]] 757; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* 758; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[TMP1]], align 4 759; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 760; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 761; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 762; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] 763; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]] 764; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]] 765; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* 766; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 767; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 768; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 769; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[TMP5]], align 4 770; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 771; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 772; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 773; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 774; CHECK: middle.block: 775; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 776; CHECK: scalar.ph: 777; CHECK-NEXT: br label [[FOR_BODY:%.*]] 778; CHECK: for.cond.cleanup: 779; CHECK-NEXT: ret void 780; CHECK: for.body: 781; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 782; 783entry: 784 br label %for.body 785 786for.cond.cleanup: ; preds = %for.body 787 ret void 788 789for.body: ; preds = %for.body, %entry 790 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 791 %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ] 792 %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1 793 %tmp = load i32, i32* %A.addr.012, align 4 794 %add = add i32 %tmp, %i.013 795 store i32 %add, i32* %A.addr.012, align 4 796 %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2 797 %tmp1 = load i32, i32* %incdec.ptr, align 4 798 %add2 = add i32 %tmp1, %i.013 799 store i32 %add2, i32* %incdec.ptr, align 4 800 %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3 801 %tmp2 = load i32, i32* %incdec.ptr1, align 4 802 %add4 = add i32 %tmp2, %i.013 803 store i32 %add4, i32* %incdec.ptr1, align 4 804 %inc = add nuw nsw i32 %i.013, 1 805 %exitcond = icmp eq i32 %inc, 1024 806 br i1 %exitcond, label %for.cond.cleanup, label %for.body 807} 808 809; Check vectorization on interleaved access groups with members having different 810; kinds of type. 811 812; struct IntFloat { 813; int a; 814; float b; 815; }; 816; 817; int SA; 818; float SB; 819; 820; void int_float_struct(struct IntFloat *A) { 821; int SumA; 822; float SumB; 823; for (unsigned i = 0; i < 1024; i++) { 824; SumA += A[i].a; 825; SumB += A[i].b; 826; } 827; SA = SumA; 828; SB = SumB; 829; } 830 831 832%struct.IntFloat = type { i32, float } 833 834@SA = common global i32 0, align 4 835@SB = common global float 0.000000e+00, align 4 836 837define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 { 838; CHECK-LABEL: @int_float_struct( 839; CHECK-NEXT: entry: 840; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 841; CHECK: vector.ph: 842; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 843; CHECK: vector.body: 844; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 845; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 846; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 undef, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 847; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_INTFLOAT:%.*]], %struct.IntFloat* [[A:%.*]], i64 [[INDEX]], i32 0 848; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <8 x i32>* 849; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 850; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 851; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 852; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[STRIDED_VEC2]] to <4 x float> 853; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI1]] 854; CHECK-NEXT: [[TMP4]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP2]] 855; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 856; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 857; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 858; CHECK: middle.block: 859; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) 860; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]]) 861; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 862; CHECK: scalar.ph: 863; CHECK-NEXT: br label [[FOR_BODY:%.*]] 864; CHECK: for.cond.cleanup: 865; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 866; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 867; CHECK-NEXT: store i32 [[ADD_LCSSA]], i32* @SA, align 4 868; CHECK-NEXT: store float [[ADD3_LCSSA]], float* @SB, align 4 869; CHECK-NEXT: ret void 870; CHECK: for.body: 871; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 872; 873entry: 874 br label %for.body 875 876for.cond.cleanup: ; preds = %for.body 877 store i32 %add, i32* @SA, align 4 878 store float %add3, float* @SB, align 4 879 ret void 880 881for.body: ; preds = %for.body, %entry 882 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 883 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ] 884 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ] 885 %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0 886 %tmp = load i32, i32* %a, align 4 887 %add = add nsw i32 %tmp, %SumA.013 888 %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1 889 %tmp1 = load float, float* %b, align 4 890 %add3 = fadd fast float %SumB.014, %tmp1 891 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 892 %exitcond = icmp eq i64 %indvars.iv.next, 1024 893 br i1 %exitcond, label %for.cond.cleanup, label %for.body 894} 895 896; Check vectorization of interleaved access groups in the presence of 897; dependences (PR27626). The following tests check that we don't reorder 898; dependent loads and stores when generating code for interleaved access 899; groups. Stores should be scalarized because the required code motion would 900; break dependences, and the remaining interleaved load groups should have 901; gaps. 902 903; PR27626_0: Ensure a strided store is not moved after a dependent (zero 904; distance) strided load. 905 906; void PR27626_0(struct pair *p, int z, int n) { 907; for (int i = 0; i < n; i++) { 908; p[i].x = z; 909; p[i].y = p[i].x; 910; } 911; } 912 913 914%pair.i32 = type { i32, i32 } 915define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) { 916; CHECK-LABEL: @PR27626_0( 917; CHECK-NEXT: entry: 918; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 919; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 920; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 921; CHECK: vector.ph: 922; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 923; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 924; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 925; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] 926; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 927; CHECK: vector.body: 928; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 929; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1 930; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 931; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3 932; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 933; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 0 934; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 0 935; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 0 936; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 937; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 938; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 939; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1 940; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP5]], align 4 941; CHECK-NEXT: store i32 [[Z]], i32* [[TMP6]], align 4 942; CHECK-NEXT: store i32 [[Z]], i32* [[TMP7]], align 4 943; CHECK-NEXT: store i32 [[Z]], i32* [[TMP8]], align 4 944; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* 945; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4 946; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 947; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP9]], align 4 948; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 949; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP10]], align 4 950; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 951; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP11]], align 4 952; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 953; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP12]], align 4 954; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 955; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 956; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 957; CHECK: middle.block: 958; CHECK-NEXT: br label [[SCALAR_PH]] 959; CHECK: scalar.ph: 960; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 961; CHECK-NEXT: br label [[FOR_BODY:%.*]] 962; CHECK: for.body: 963; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 964; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 0 965; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 966; CHECK-NEXT: store i32 [[Z]], i32* [[P_I_X]], align 4 967; CHECK-NEXT: store i32 [[Z]], i32* [[P_I_Y]], align 4 968; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 969; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 970; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP25:![0-9]+]] 971; CHECK: for.end: 972; CHECK-NEXT: ret void 973; 974entry: 975 br label %for.body 976 977for.body: 978 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 979 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0 980 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 981 store i32 %z, i32* %p_i.x, align 4 982 %0 = load i32, i32* %p_i.x, align 4 983 store i32 %0, i32 *%p_i.y, align 4 984 %i.next = add nuw nsw i64 %i, 1 985 %cond = icmp slt i64 %i.next, %n 986 br i1 %cond, label %for.body, label %for.end 987 988for.end: 989 ret void 990} 991 992; PR27626_1: Ensure a strided load is not moved before a dependent (zero 993; distance) strided store. 994 995; void PR27626_1(struct pair *p, int n) { 996; int s = 0; 997; for (int i = 0; i < n; i++) { 998; p[i].y = p[i].x; 999; s += p[i].y 1000; } 1001; } 1002 1003 1004define i32 @PR27626_1(%pair.i32 *%p, i64 %n) { 1005; CHECK-LABEL: @PR27626_1( 1006; CHECK-NEXT: entry: 1007; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1008; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 1009; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1010; CHECK: vector.ph: 1011; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 1012; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1013; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 1014; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] 1015; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1016; CHECK: vector.body: 1017; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1018; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] 1019; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1 1020; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 1021; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3 1022; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 1023; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 1024; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 1025; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 1026; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1 1027; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>* 1028; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4 1029; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 1030; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP6]], align 4 1031; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 1032; CHECK-NEXT: store i32 [[TMP12]], i32* [[TMP7]], align 4 1033; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 1034; CHECK-NEXT: store i32 [[TMP13]], i32* [[TMP8]], align 4 1035; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 1036; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP9]], align 4 1037; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* 1038; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4 1039; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1040; CHECK-NEXT: [[TMP16]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] 1041; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1042; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1043; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1044; CHECK: middle.block: 1045; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP16]]) 1046; CHECK-NEXT: br label [[SCALAR_PH]] 1047; CHECK: scalar.ph: 1048; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1049; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1050; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1051; CHECK: for.body: 1052; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1053; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP20:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 1054; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 0 1055; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1056; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[P_I_X]], align 4 1057; CHECK-NEXT: store i32 [[TMP19]], i32* [[P_I_Y]], align 4 1058; CHECK-NEXT: [[TMP20]] = add nsw i32 [[TMP19]], [[S]] 1059; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1060; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1061; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]] 1062; CHECK: for.end: 1063; CHECK-NEXT: ret i32 [[TMP20]] 1064; 1065entry: 1066 br label %for.body 1067 1068for.body: 1069 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1070 %s = phi i32 [ %2, %for.body ], [ 0, %entry ] 1071 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0 1072 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 1073 %0 = load i32, i32* %p_i.x, align 4 1074 store i32 %0, i32* %p_i.y, align 4 1075 %1 = load i32, i32* %p_i.y, align 4 1076 %2 = add nsw i32 %1, %s 1077 %i.next = add nuw nsw i64 %i, 1 1078 %cond = icmp slt i64 %i.next, %n 1079 br i1 %cond, label %for.body, label %for.end 1080 1081for.end: 1082 %3 = phi i32 [ %2, %for.body ] 1083 ret i32 %3 1084} 1085 1086; PR27626_2: Ensure a strided store is not moved after a dependent (negative 1087; distance) strided load. 1088 1089; void PR27626_2(struct pair *p, int z, int n) { 1090; for (int i = 0; i < n; i++) { 1091; p[i].x = z; 1092; p[i].y = p[i - 1].x; 1093; } 1094; } 1095 1096 1097define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) { 1098; CHECK-LABEL: @PR27626_2( 1099; CHECK-NEXT: entry: 1100; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1101; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 1102; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1103; CHECK: vector.ph: 1104; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 1105; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1106; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 1107; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] 1108; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1109; CHECK: vector.body: 1110; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1111; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1 1112; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 1113; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3 1114; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 1115; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 0 1116; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 0 1117; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 0 1118; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 -1, i32 0 1119; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 1120; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 1121; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 1122; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1 1123; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP5]], align 4 1124; CHECK-NEXT: store i32 [[Z]], i32* [[TMP6]], align 4 1125; CHECK-NEXT: store i32 [[Z]], i32* [[TMP7]], align 4 1126; CHECK-NEXT: store i32 [[Z]], i32* [[TMP8]], align 4 1127; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* 1128; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP14]], align 4 1129; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 1130; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP10]], align 4 1131; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 1132; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP11]], align 4 1133; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 1134; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP12]], align 4 1135; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 1136; CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP13]], align 4 1137; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1138; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1139; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1140; CHECK: middle.block: 1141; CHECK-NEXT: br label [[SCALAR_PH]] 1142; CHECK: scalar.ph: 1143; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1144; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1145; CHECK: for.body: 1146; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1147; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 0 1148; CHECK-NEXT: [[P_I_MINUS_1_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 -1, i32 0 1149; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1150; CHECK-NEXT: store i32 [[Z]], i32* [[P_I_X]], align 4 1151; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[P_I_MINUS_1_X]], align 4 1152; CHECK-NEXT: store i32 [[TMP20]], i32* [[P_I_Y]], align 4 1153; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1154; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1155; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]] 1156; CHECK: for.end: 1157; CHECK-NEXT: ret void 1158; 1159entry: 1160 br label %for.body 1161 1162for.body: 1163 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1164 %i_minus_1 = add nuw nsw i64 %i, -1 1165 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0 1166 %p_i_minus_1.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_minus_1, i32 0 1167 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 1168 store i32 %z, i32* %p_i.x, align 4 1169 %0 = load i32, i32* %p_i_minus_1.x, align 4 1170 store i32 %0, i32 *%p_i.y, align 4 1171 %i.next = add nuw nsw i64 %i, 1 1172 %cond = icmp slt i64 %i.next, %n 1173 br i1 %cond, label %for.body, label %for.end 1174 1175for.end: 1176 ret void 1177} 1178 1179; PR27626_3: Ensure a strided load is not moved before a dependent (negative 1180; distance) strided store. 1181 1182; void PR27626_3(struct pair *p, int z, int n) { 1183; for (int i = 0; i < n; i++) { 1184; p[i + 1].y = p[i].x; 1185; s += p[i].y; 1186; } 1187; } 1188 1189 1190define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) { 1191; CHECK-LABEL: @PR27626_3( 1192; CHECK-NEXT: entry: 1193; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1194; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 5 1195; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1196; CHECK: vector.ph: 1197; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 3 1198; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1199; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 1200; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] 1201; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1202; CHECK: vector.body: 1203; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1204; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1205; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 1206; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1> 1207; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 0 1208; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 1209; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0 1210; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1 1211; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i32 1 1212; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP7]], i32 1 1213; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i32 2 1214; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1 1215; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP2]], i32 3 1216; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1 1217; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>* 1218; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4 1219; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 0 1220; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP6]], align 4 1221; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 2 1222; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP8]], align 4 1223; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 4 1224; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP10]], align 4 1225; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i32 6 1226; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP12]], align 4 1227; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>* 1228; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP18]], align 4 1229; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1230; CHECK-NEXT: [[TMP19]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] 1231; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1232; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 1233; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1234; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1235; CHECK: middle.block: 1236; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP19]]) 1237; CHECK-NEXT: br label [[SCALAR_PH]] 1238; CHECK: scalar.ph: 1239; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1240; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1241; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1242; CHECK: for.body: 1243; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1244; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP24:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 1245; CHECK-NEXT: [[I_PLUS_1:%.*]] = add nuw nsw i64 [[I]], 1 1246; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 0 1247; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1248; CHECK-NEXT: [[P_I_PLUS_1_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I_PLUS_1]], i32 1 1249; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[P_I_X]], align 4 1250; CHECK-NEXT: store i32 [[TMP22]], i32* [[P_I_PLUS_1_Y]], align 4 1251; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[P_I_Y]], align 4 1252; CHECK-NEXT: [[TMP24]] = add nsw i32 [[TMP23]], [[S]] 1253; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1254; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1255; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP31:![0-9]+]] 1256; CHECK: for.end: 1257; CHECK-NEXT: ret i32 [[TMP24]] 1258; 1259entry: 1260 br label %for.body 1261 1262for.body: 1263 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1264 %s = phi i32 [ %2, %for.body ], [ 0, %entry ] 1265 %i_plus_1 = add nuw nsw i64 %i, 1 1266 %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0 1267 %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 1268 %p_i_plus_1.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_plus_1, i32 1 1269 %0 = load i32, i32* %p_i.x, align 4 1270 store i32 %0, i32* %p_i_plus_1.y, align 4 1271 %1 = load i32, i32* %p_i.y, align 4 1272 %2 = add nsw i32 %1, %s 1273 %i.next = add nuw nsw i64 %i, 1 1274 %cond = icmp slt i64 %i.next, %n 1275 br i1 %cond, label %for.body, label %for.end 1276 1277for.end: 1278 %3 = phi i32 [ %2, %for.body ] 1279 ret i32 %3 1280} 1281 1282; PR27626_4: Ensure we form an interleaved group for strided stores in the 1283; presence of a write-after-write dependence. We create a group for 1284; (2) and (3) while excluding (1). 1285 1286; void PR27626_4(int *a, int x, int y, int z, int n) { 1287; for (int i = 0; i < n; i += 2) { 1288; a[i] = x; // (1) 1289; a[i] = y; // (2) 1290; a[i + 1] = z; // (3) 1291; } 1292; } 1293 1294 1295define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) { 1296; CHECK-LABEL: @PR27626_4( 1297; CHECK-NEXT: entry: 1298; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) 1299; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 1300; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 1301; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1302; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 1303; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1304; CHECK: vector.ph: 1305; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 1306; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 1307; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0 1308; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1309; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Z:%.*]], i32 0 1310; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer 1311; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1312; CHECK: vector.body: 1313; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1314; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 1315; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2 1316; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 4 1317; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 6 1318; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] 1319; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] 1320; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] 1321; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] 1322; CHECK-NEXT: store i32 [[X:%.*]], i32* [[TMP6]], align 4 1323; CHECK-NEXT: store i32 [[X]], i32* [[TMP7]], align 4 1324; CHECK-NEXT: store i32 [[X]], i32* [[TMP8]], align 4 1325; CHECK-NEXT: store i32 [[X]], i32* [[TMP9]], align 4 1326; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[OFFSET_IDX]] 1327; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* 1328; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 1329; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[TMP11]], align 4 1330; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1331; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1332; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 1333; CHECK: middle.block: 1334; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1335; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1336; CHECK: scalar.ph: 1337; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1338; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1339; CHECK: for.body: 1340; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1341; CHECK-NEXT: [[I_PLUS_1:%.*]] = or i64 [[I]], 1 1342; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] 1343; CHECK-NEXT: [[A_I_PLUS_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_PLUS_1]] 1344; CHECK-NEXT: store i32 [[Y]], i32* [[A_I]], align 4 1345; CHECK-NEXT: store i32 [[Z]], i32* [[A_I_PLUS_1]], align 4 1346; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 1347; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1348; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP33:![0-9]+]] 1349; CHECK: for.end: 1350; CHECK-NEXT: ret void 1351; 1352entry: 1353 br label %for.body 1354 1355for.body: 1356 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1357 %i_plus_1 = add i64 %i, 1 1358 %a_i = getelementptr inbounds i32, i32* %a, i64 %i 1359 %a_i_plus_1 = getelementptr inbounds i32, i32* %a, i64 %i_plus_1 1360 store i32 %x, i32* %a_i, align 4 1361 store i32 %y, i32* %a_i, align 4 1362 store i32 %z, i32* %a_i_plus_1, align 4 1363 %i.next = add nuw nsw i64 %i, 2 1364 %cond = icmp slt i64 %i.next, %n 1365 br i1 %cond, label %for.body, label %for.end 1366 1367for.end: 1368 ret void 1369} 1370 1371; PR27626_5: Ensure we do not form an interleaved group for strided stores in 1372; the presence of a write-after-write dependence. 1373 1374; void PR27626_5(int *a, int x, int y, int z, int n) { 1375; for (int i = 3; i < n; i += 2) { 1376; a[i - 1] = x; 1377; a[i - 3] = y; 1378; a[i] = z; 1379; } 1380; } 1381 1382 1383define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) { 1384; CHECK-LABEL: @PR27626_5( 1385; CHECK-NEXT: entry: 1386; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5) 1387; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4 1388; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 1389; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1390; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 1391; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1392; CHECK: vector.ph: 1393; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 1394; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[N_VEC]], 1 1395; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[TMP3]], 3 1396; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1397; CHECK: vector.body: 1398; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1399; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 5, i64 7, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1400; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 1 1401; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[TMP4]], 3 1402; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP4]], 5 1403; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP4]], 7 1404; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 9 1405; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -1, i64 -1, i64 -1, i64 -1> 1406; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -3, i64 -3, i64 -3, i64 -3> 1407; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[OFFSET_IDX]] 1408; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] 1409; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] 1410; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] 1411; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP8]], i32 0 1412; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]] 1413; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP8]], i32 1 1414; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]] 1415; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP8]], i32 2 1416; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]] 1417; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP8]], i32 3 1418; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] 1419; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0 1420; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]] 1421; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP9]], i32 1 1422; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP24]] 1423; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP9]], i32 2 1424; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP26]] 1425; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP9]], i32 3 1426; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP28]] 1427; CHECK-NEXT: store i32 [[X:%.*]], i32* [[TMP15]], align 4 1428; CHECK-NEXT: store i32 [[X]], i32* [[TMP17]], align 4 1429; CHECK-NEXT: store i32 [[X]], i32* [[TMP19]], align 4 1430; CHECK-NEXT: store i32 [[X]], i32* [[TMP21]], align 4 1431; CHECK-NEXT: store i32 [[Y:%.*]], i32* [[TMP23]], align 4 1432; CHECK-NEXT: store i32 [[Y]], i32* [[TMP25]], align 4 1433; CHECK-NEXT: store i32 [[Y]], i32* [[TMP27]], align 4 1434; CHECK-NEXT: store i32 [[Y]], i32* [[TMP29]], align 4 1435; CHECK-NEXT: store i32 [[Z:%.*]], i32* [[TMP10]], align 4 1436; CHECK-NEXT: store i32 [[Z]], i32* [[TMP11]], align 4 1437; CHECK-NEXT: store i32 [[Z]], i32* [[TMP12]], align 4 1438; CHECK-NEXT: store i32 [[Z]], i32* [[TMP13]], align 4 1439; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1440; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8> 1441; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1442; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 1443; CHECK: middle.block: 1444; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1445; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1446; CHECK: scalar.ph: 1447; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] 1448; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1449; CHECK: for.body: 1450; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1451; CHECK-NEXT: [[I_MINUS_1:%.*]] = add i64 [[I]], -1 1452; CHECK-NEXT: [[I_MINUS_3:%.*]] = add i64 [[I]], -3 1453; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] 1454; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_MINUS_1]] 1455; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_MINUS_3]] 1456; CHECK-NEXT: store i32 [[X]], i32* [[A_I_MINUS_1]], align 4 1457; CHECK-NEXT: store i32 [[Y]], i32* [[A_I_MINUS_3]], align 4 1458; CHECK-NEXT: store i32 [[Z]], i32* [[A_I]], align 4 1459; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 1460; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1461; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP35:![0-9]+]] 1462; CHECK: for.end: 1463; CHECK-NEXT: ret void 1464; 1465entry: 1466 br label %for.body 1467 1468for.body: 1469 %i = phi i64 [ %i.next, %for.body ], [ 3, %entry ] 1470 %i_minus_1 = sub i64 %i, 1 1471 %i_minus_3 = sub i64 %i_minus_1, 2 1472 %a_i = getelementptr inbounds i32, i32* %a, i64 %i 1473 %a_i_minus_1 = getelementptr inbounds i32, i32* %a, i64 %i_minus_1 1474 %a_i_minus_3 = getelementptr inbounds i32, i32* %a, i64 %i_minus_3 1475 store i32 %x, i32* %a_i_minus_1, align 4 1476 store i32 %y, i32* %a_i_minus_3, align 4 1477 store i32 %z, i32* %a_i, align 4 1478 %i.next = add nuw nsw i64 %i, 2 1479 %cond = icmp slt i64 %i.next, %n 1480 br i1 %cond, label %for.body, label %for.end 1481 1482for.end: 1483 ret void 1484} 1485 1486; PR34743: Ensure that a cast which needs to sink after a load that belongs to 1487; an interleaved group, indeeded gets sunk. 1488 1489; void PR34743(short *a, int *b, int n) { 1490; for (int i = 0, iv = 0; iv < n; i++, iv += 2) { 1491; b[i] = a[iv] * a[iv+1] * a[iv+2]; 1492; } 1493; } 1494 1495 1496define void @PR34743(i16* %a, i32* %b, i64 %n) { 1497; CHECK-LABEL: @PR34743( 1498; CHECK-NEXT: entry: 1499; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 1500; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N:%.*]], 1 1501; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[TMP0]], 1 1502; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 6 1503; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1504; CHECK: vector.memcheck: 1505; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 1 1506; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 1507; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP3]] 1508; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 1509; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[N]], -2 1510; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 3 1511; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP5]] 1512; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[SCEVGEP5]] to i32* 1513; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP6]], [[B]] 1514; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[SCEVGEP]] to i16* 1515; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP7]] 1516; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1517; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1518; CHECK: vector.ph: 1519; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], -4 1520; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 1521; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3 1522; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1523; CHECK: vector.body: 1524; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1525; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STRIDED_VEC8:%.*]], [[VECTOR_BODY]] ] 1526; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 1527; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[OFFSET_IDX]], 1 1528; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP8]] 1529; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <8 x i16>* 1530; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, <8 x i16>* [[TMP10]], align 4 1531; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1532; CHECK-NEXT: [[STRIDED_VEC8]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1533; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> 1534; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 1535; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32> 1536; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[STRIDED_VEC8]] to <4 x i32> 1537; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]] 1538; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP14]] 1539; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] 1540; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* 1541; CHECK-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP18]], align 4, !alias.scope !36, !noalias !39 1542; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1543; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1544; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 1545; CHECK: middle.block: 1546; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] 1547; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i16> [[WIDE_VEC]], i32 7 1548; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] 1549; CHECK: scalar.ph: 1550; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 1551; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] 1552; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] 1553; CHECK-NEXT: br label [[LOOP:%.*]] 1554; CHECK: loop: 1555; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] 1556; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV2:%.*]], [[LOOP]] ] 1557; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL7]], [[SCALAR_PH]] ], [ [[I1:%.*]], [[LOOP]] ] 1558; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 1559; CHECK-NEXT: [[I1]] = add nuw nsw i64 [[I]], 1 1560; CHECK-NEXT: [[IV1:%.*]] = or i64 [[IV]], 1 1561; CHECK-NEXT: [[IV2]] = add nuw nsw i64 [[IV]], 2 1562; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[IV1]] 1563; CHECK-NEXT: [[LOAD1:%.*]] = load i16, i16* [[GEP1]], align 4 1564; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[LOAD1]] to i32 1565; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[IV2]] 1566; CHECK-NEXT: [[LOAD2]] = load i16, i16* [[GEP2]], align 4 1567; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[LOAD2]] to i32 1568; CHECK-NEXT: [[MUL01:%.*]] = mul nsw i32 [[CONV]], [[CONV1]] 1569; CHECK-NEXT: [[MUL012:%.*]] = mul nsw i32 [[MUL01]], [[CONV2]] 1570; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] 1571; CHECK-NEXT: store i32 [[MUL012]], i32* [[ARRAYIDX5]], align 4 1572; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], [[N]] 1573; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]], !llvm.loop [[LOOP42:![0-9]+]] 1574; CHECK: end: 1575; CHECK-NEXT: ret void 1576; 1577entry: 1578 %.pre = load i16, i16* %a 1579 br label %loop 1580 1581loop: 1582 %0 = phi i16 [ %.pre, %entry ], [ %load2, %loop ] 1583 %iv = phi i64 [ 0, %entry ], [ %iv2, %loop ] 1584 %i = phi i64 [ 0, %entry ], [ %i1, %loop ] 1585 %conv = sext i16 %0 to i32 1586 %i1 = add nuw nsw i64 %i, 1 1587 %iv1 = add nuw nsw i64 %iv, 1 1588 %iv2 = add nuw nsw i64 %iv, 2 1589 %gep1 = getelementptr inbounds i16, i16* %a, i64 %iv1 1590 %load1 = load i16, i16* %gep1, align 4 1591 %conv1 = sext i16 %load1 to i32 1592 %gep2 = getelementptr inbounds i16, i16* %a, i64 %iv2 1593 %load2 = load i16, i16* %gep2, align 4 1594 %conv2 = sext i16 %load2 to i32 1595 %mul01 = mul nsw i32 %conv, %conv1 1596 %mul012 = mul nsw i32 %mul01, %conv2 1597 %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i 1598 store i32 %mul012, i32* %arrayidx5 1599 %exitcond = icmp eq i64 %iv, %n 1600 br i1 %exitcond, label %end, label %loop 1601 1602end: 1603 ret void 1604} 1605 1606attributes #0 = { "unsafe-fp-math"="true" } 1607