1; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 5; Check vectorization on an interleaved load group of factor 2 and an interleaved 6; store group of factor 2. 7 8; int AB[1024]; 9; int CD[1024]; 10; void test_array_load2_store2(int C, int D) { 11; for (int i = 0; i < 1024; i+=2) { 12; int A = AB[i]; 13; int B = AB[i+1]; 14; CD[i] = A + C; 15; CD[i+1] = B * D; 16; } 17; } 18 19; CHECK-LABEL: @test_array_load2_store2( 20; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 21; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 22; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23; CHECK: add nsw <4 x i32> 24; CHECK: mul nsw <4 x i32> 25; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 26; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4 27 28@AB = common global [1024 x i32] zeroinitializer, align 4 29@CD = common global [1024 x i32] zeroinitializer, align 4 30 31define void @test_array_load2_store2(i32 %C, i32 %D) { 32entry: 33 br label %for.body 34 35for.body: ; preds = %for.body, %entry 36 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 37 %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv 38 %tmp = load i32, i32* %arrayidx0, align 4 39 %tmp1 = or i64 %indvars.iv, 1 40 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1 41 %tmp2 = load i32, i32* %arrayidx1, align 4 42 %add = add nsw i32 %tmp, %C 43 %mul = mul nsw i32 %tmp2, %D 44 %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv 45 store i32 %add, i32* %arrayidx2, align 4 46 %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1 47 store i32 %mul, i32* %arrayidx3, align 4 48 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 49 %cmp = icmp slt i64 %indvars.iv.next, 1024 50 br i1 %cmp, label %for.body, label %for.end 51 52for.end: ; preds = %for.body 53 ret void 54} 55 56; int A[3072]; 57; struct ST S[1024]; 58; void test_struct_st3() { 59; int *ptr = A; 60; for (int i = 0; i < 1024; i++) { 61; int X1 = *ptr++; 62; int X2 = *ptr++; 63; int X3 = *ptr++; 64; T[i].x = X1 + 1; 65; T[i].y = X2 + 2; 66; T[i].z = X3 + 3; 67; } 68; } 69 70; CHECK-LABEL: @test_struct_array_load3_store3( 71; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4 72; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 73; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 74; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 75; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1> 76; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2> 77; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3> 78; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 79; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 80; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 81; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4 82 83%struct.ST3 = type { i32, i32, i32 } 84@A = common global [3072 x i32] zeroinitializer, align 4 85@S = common global [1024 x %struct.ST3] zeroinitializer, align 4 86 87define void @test_struct_array_load3_store3() { 88entry: 89 br label %for.body 90 91for.body: ; preds = %for.body, %entry 92 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 93 %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ] 94 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1 95 %tmp = load i32, i32* %ptr.016, align 4 96 %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2 97 %tmp1 = load i32, i32* %incdec.ptr, align 4 98 %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3 99 %tmp2 = load i32, i32* %incdec.ptr1, align 4 100 %add = add nsw i32 %tmp, 1 101 %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0 102 store i32 %add, i32* %x, align 4 103 %add3 = add nsw i32 %tmp1, 2 104 %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1 105 store i32 %add3, i32* %y, align 4 106 %add6 = add nsw i32 %tmp2, 3 107 %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2 108 store i32 %add6, i32* %z, align 4 109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 110 %exitcond = icmp eq i64 %indvars.iv.next, 1024 111 br i1 %exitcond, label %for.end, label %for.body 112 113for.end: ; preds = %for.body 114 ret void 115} 116 117; Check vectorization on an interleaved load group of factor 4. 118 119; struct ST4{ 120; int x; 121; int y; 122; int z; 123; int w; 124; }; 125; int test_struct_load4(struct ST4 *S) { 126; int r = 0; 127; for (int i = 0; i < 1024; i++) { 128; r += S[i].x; 129; r -= S[i].y; 130; r += S[i].z; 131; r -= S[i].w; 132; } 133; return r; 134; } 135 136; CHECK-LABEL: @test_struct_load4( 137; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4 138; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 139; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 140; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 141; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 142; CHECK: add nsw <4 x i32> 143; CHECK: sub <4 x i32> 144; CHECK: add nsw <4 x i32> 145; CHECK: sub <4 x i32> 146 147%struct.ST4 = type { i32, i32, i32, i32 } 148 149define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) { 150entry: 151 br label %for.body 152 153for.body: ; preds = %for.body, %entry 154 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 155 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ] 156 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0 157 %tmp = load i32, i32* %x, align 4 158 %add = add nsw i32 %tmp, %r.022 159 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1 160 %tmp1 = load i32, i32* %y, align 4 161 %sub = sub i32 %add, %tmp1 162 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2 163 %tmp2 = load i32, i32* %z, align 4 164 %add5 = add nsw i32 %sub, %tmp2 165 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3 166 %tmp3 = load i32, i32* %w, align 4 167 %sub8 = sub i32 %add5, %tmp3 168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 169 %exitcond = icmp eq i64 %indvars.iv.next, 1024 170 br i1 %exitcond, label %for.end, label %for.body 171 172for.end: ; preds = %for.body 173 ret i32 %sub8 174} 175 176; Check vectorization on an interleaved store group of factor 4. 177 178; void test_struct_store4(int *A, struct ST4 *B) { 179; int *ptr = A; 180; for (int i = 0; i < 1024; i++) { 181; int X = *ptr++; 182; B[i].x = X + 1; 183; B[i].y = X * 2; 184; B[i].z = X + 3; 185; B[i].w = X + 4; 186; } 187; } 188 189; CHECK-LABEL: @test_struct_store4( 190; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>* 191; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1> 192; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1> 193; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3> 194; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4> 195; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 196; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 197; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 198; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4 199 200define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) { 201entry: 202 br label %for.body 203 204for.cond.cleanup: ; preds = %for.body 205 ret void 206 207for.body: ; preds = %for.body, %entry 208 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 209 %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ] 210 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1 211 %tmp = load i32, i32* %ptr.024, align 4 212 %add = add nsw i32 %tmp, 1 213 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0 214 store i32 %add, i32* %x, align 4 215 %mul = shl nsw i32 %tmp, 1 216 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1 217 store i32 %mul, i32* %y, align 4 218 %add3 = add nsw i32 %tmp, 3 219 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2 220 store i32 %add3, i32* %z, align 4 221 %add6 = add nsw i32 %tmp, 4 222 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3 223 store i32 %add6, i32* %w, align 4 224 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 225 %exitcond = icmp eq i64 %indvars.iv.next, 1024 226 br i1 %exitcond, label %for.cond.cleanup, label %for.body 227} 228 229; Check vectorization on a reverse interleaved load group of factor 2 and 230; a reverse interleaved store group of factor 2. 231 232; struct ST2 { 233; int x; 234; int y; 235; }; 236; 237; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) { 238; for (int i = 1023; i >= 0; i--) { 239; int a = A[i].x + i; // interleaved load of index 0 240; int b = A[i].y - i; // interleaved load of index 1 241; B[i].x = a; // interleaved store of index 0 242; B[i].y = b; // interleaved store of index 1 243; } 244; } 245 246; CHECK-LABEL: @test_reversed_load2_store2( 247; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4 248; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 249; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 250; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 251; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 252; CHECK: add nsw <4 x i32> 253; CHECK: sub nsw <4 x i32> 254; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 255; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 256; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 257; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4 258 259%struct.ST2 = type { i32, i32 } 260 261define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) { 262entry: 263 br label %for.body 264 265for.cond.cleanup: ; preds = %for.body 266 ret void 267 268for.body: ; preds = %for.body, %entry 269 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ] 270 %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0 271 %tmp = load i32, i32* %x, align 4 272 %tmp1 = trunc i64 %indvars.iv to i32 273 %add = add nsw i32 %tmp, %tmp1 274 %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1 275 %tmp2 = load i32, i32* %y, align 4 276 %sub = sub nsw i32 %tmp2, %tmp1 277 %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0 278 store i32 %add, i32* %x5, align 4 279 %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1 280 store i32 %sub, i32* %y8, align 4 281 %indvars.iv.next = add nsw i64 %indvars.iv, -1 282 %cmp = icmp sgt i64 %indvars.iv, 0 283 br i1 %cmp, label %for.body, label %for.cond.cleanup 284} 285 286; Check vectorization on an interleaved load group of factor 2 with 1 gap 287; (missing the load of odd elements). Because the vectorized loop would 288; speculatively access memory out-of-bounds, we must execute at least one 289; iteration of the scalar loop. 290 291; void even_load_static_tc(int *A, int *B) { 292; for (unsigned i = 0; i < 1024; i+=2) 293; B[i/2] = A[i] * 2; 294; } 295 296; CHECK-LABEL: @even_load_static_tc( 297; CHECK: vector.body: 298; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 299; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 300; CHECK: icmp eq i64 %index.next, 508 301; CHECK: middle.block: 302; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph 303 304define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { 305entry: 306 br label %for.body 307 308for.cond.cleanup: ; preds = %for.body 309 ret void 310 311for.body: ; preds = %for.body, %entry 312 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 313 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 314 %tmp = load i32, i32* %arrayidx, align 4 315 %mul = shl nsw i32 %tmp, 1 316 %tmp1 = lshr exact i64 %indvars.iv, 1 317 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1 318 store i32 %mul, i32* %arrayidx2, align 4 319 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 320 %cmp = icmp ult i64 %indvars.iv.next, 1024 321 br i1 %cmp, label %for.body, label %for.cond.cleanup 322} 323 324; Check vectorization on an interleaved load group of factor 2 with 1 gap 325; (missing the load of odd elements). Because the vectorized loop would 326; speculatively access memory out-of-bounds, we must execute at least one 327; iteration of the scalar loop. 328 329; void even_load_dynamic_tc(int *A, int *B, unsigned N) { 330; for (unsigned i = 0; i < N; i+=2) 331; B[i/2] = A[i] * 2; 332; } 333 334; CHECK-LABEL: @even_load_dynamic_tc( 335; CHECK: min.iters.checked: 336; CHECK: %n.mod.vf = and i64 %[[N:[a-zA-Z0-9]+]], 3 337; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 338; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf 339; CHECK: %n.vec = sub i64 %[[N]], %[[R]] 340; CHECK: vector.body: 341; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 342; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 343; CHECK: icmp eq i64 %index.next, %n.vec 344; CHECK: middle.block: 345; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph 346 347define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i64 %N) { 348entry: 349 br label %for.body 350 351for.cond.cleanup: ; preds = %for.body 352 ret void 353 354for.body: ; preds = %for.body, %entry 355 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 356 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 357 %tmp = load i32, i32* %arrayidx, align 4 358 %mul = shl nsw i32 %tmp, 1 359 %tmp1 = lshr exact i64 %indvars.iv, 1 360 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1 361 store i32 %mul, i32* %arrayidx2, align 4 362 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 363 %cmp = icmp ult i64 %indvars.iv.next, %N 364 br i1 %cmp, label %for.body, label %for.cond.cleanup 365} 366 367; Check vectorization on a reverse interleaved load group of factor 2 with 1 368; gap and a reverse interleaved store group of factor 2. The interleaved load 369; group should be removed since it has a gap and is reverse. 370 371; struct pair { 372; int x; 373; int y; 374; }; 375; 376; void load_gap_reverse(struct pair *P1, struct pair *P2, int X) { 377; for (int i = 1023; i >= 0; i--) { 378; int a = X + i; 379; int b = A[i].y - i; 380; B[i].x = a; 381; B[i].y = b; 382; } 383; } 384 385; CHECK-LABEL: @load_gap_reverse( 386; CHECK-NOT: %wide.vec = load <8 x i64>, <8 x i64>* %{{.*}}, align 8 387; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 388 389%pair = type { i64, i64 } 390define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) { 391entry: 392 br label %for.body 393 394for.body: 395 %i = phi i64 [ 1023, %entry ], [ %i.next, %for.body ] 396 %0 = add nsw i64 %X, %i 397 %1 = getelementptr inbounds %pair, %pair* %P1, i64 %i, i32 0 398 %2 = getelementptr inbounds %pair, %pair* %P2, i64 %i, i32 1 399 %3 = load i64, i64* %2, align 8 400 %4 = sub nsw i64 %3, %i 401 store i64 %0, i64* %1, align 8 402 store i64 %4, i64* %2, align 8 403 %i.next = add nsw i64 %i, -1 404 %cond = icmp sgt i64 %i, 0 405 br i1 %cond, label %for.body, label %for.exit 406 407for.exit: 408 ret void 409} 410 411; Check vectorization on interleaved access groups identified from mixed 412; loads/stores. 413; void mixed_load2_store2(int *A, int *B) { 414; for (unsigned i = 0; i < 1024; i+=2) { 415; B[i] = A[i] * A[i+1]; 416; B[i+1] = A[i] + A[i+1]; 417; } 418; } 419 420; CHECK-LABEL: @mixed_load2_store2( 421; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4 422; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 423; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 424; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 425; CHECK: store <8 x i32> %interleaved.vec 426 427define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { 428entry: 429 br label %for.body 430 431for.cond.cleanup: ; preds = %for.body 432 ret void 433 434for.body: ; preds = %for.body, %entry 435 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 436 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 437 %tmp = load i32, i32* %arrayidx, align 4 438 %tmp1 = or i64 %indvars.iv, 1 439 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1 440 %tmp2 = load i32, i32* %arrayidx2, align 4 441 %mul = mul nsw i32 %tmp2, %tmp 442 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 443 store i32 %mul, i32* %arrayidx4, align 4 444 %tmp3 = load i32, i32* %arrayidx, align 4 445 %tmp4 = load i32, i32* %arrayidx2, align 4 446 %add10 = add nsw i32 %tmp4, %tmp3 447 %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1 448 store i32 %add10, i32* %arrayidx13, align 4 449 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 450 %cmp = icmp ult i64 %indvars.iv.next, 1024 451 br i1 %cmp, label %for.body, label %for.cond.cleanup 452} 453 454; Check vectorization on interleaved access groups identified from mixed 455; loads/stores. 456; void mixed_load3_store3(int *A) { 457; for (unsigned i = 0; i < 1024; i++) { 458; *A++ += i; 459; *A++ += i; 460; *A++ += i; 461; } 462; } 463 464; CHECK-LABEL: @mixed_load3_store3( 465; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4 466; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 467; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 468; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 469; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 470; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4 471 472define void @mixed_load3_store3(i32* nocapture %A) { 473entry: 474 br label %for.body 475 476for.cond.cleanup: ; preds = %for.body 477 ret void 478 479for.body: ; preds = %for.body, %entry 480 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 481 %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ] 482 %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1 483 %tmp = load i32, i32* %A.addr.012, align 4 484 %add = add i32 %tmp, %i.013 485 store i32 %add, i32* %A.addr.012, align 4 486 %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2 487 %tmp1 = load i32, i32* %incdec.ptr, align 4 488 %add2 = add i32 %tmp1, %i.013 489 store i32 %add2, i32* %incdec.ptr, align 4 490 %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3 491 %tmp2 = load i32, i32* %incdec.ptr1, align 4 492 %add4 = add i32 %tmp2, %i.013 493 store i32 %add4, i32* %incdec.ptr1, align 4 494 %inc = add nuw nsw i32 %i.013, 1 495 %exitcond = icmp eq i32 %inc, 1024 496 br i1 %exitcond, label %for.cond.cleanup, label %for.body 497} 498 499; Check vectorization on interleaved access groups with members having different 500; kinds of type. 501 502; struct IntFloat { 503; int a; 504; float b; 505; }; 506; 507; int SA; 508; float SB; 509; 510; void int_float_struct(struct IntFloat *A) { 511; int SumA; 512; float SumB; 513; for (unsigned i = 0; i < 1024; i++) { 514; SumA += A[i].a; 515; SumB += A[i].b; 516; } 517; SA = SumA; 518; SB = SumB; 519; } 520 521; CHECK-LABEL: @int_float_struct( 522; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 523; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 524; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 525; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float> 526; CHECK: add nsw <4 x i32> 527; CHECK: fadd fast <4 x float> 528 529%struct.IntFloat = type { i32, float } 530 531@SA = common global i32 0, align 4 532@SB = common global float 0.000000e+00, align 4 533 534define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 { 535entry: 536 br label %for.body 537 538for.cond.cleanup: ; preds = %for.body 539 store i32 %add, i32* @SA, align 4 540 store float %add3, float* @SB, align 4 541 ret void 542 543for.body: ; preds = %for.body, %entry 544 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 545 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ] 546 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ] 547 %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0 548 %tmp = load i32, i32* %a, align 4 549 %add = add nsw i32 %tmp, %SumA.013 550 %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1 551 %tmp1 = load float, float* %b, align 4 552 %add3 = fadd fast float %SumB.014, %tmp1 553 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 554 %exitcond = icmp eq i64 %indvars.iv.next, 1024 555 br i1 %exitcond, label %for.cond.cleanup, label %for.body 556} 557 558attributes #0 = { "unsafe-fp-math"="true" } 559