1; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s 2; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5%pair = type { i64, i64 } 6 7; Ensure that we vectorize the interleaved load group even though the loop 8; contains a conditional store. The store group contains gaps and is not 9; vectorized. 10; 11; CHECK-LABEL: @interleaved_with_cond_store_0( 12; 13; CHECK: vector.ph 14; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 15; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 16; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 17; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] 18; 19; CHECK: vector.body: 20; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}} 21; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> poison, <2 x i32> <i32 0, i32 2> 22; 23; CHECK: pred.store.if 24; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 25; CHECK: store i64 %[[X1]], {{.*}} 26; 27; CHECK: pred.store.if 28; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 29; CHECK: store i64 %[[X2]], {{.*}} 30 31define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) { 32entry: 33 br label %for.body 34 35for.body: 36 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 37 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 38 %0 = load i64, i64* %p.1, align 8 39 %1 = icmp eq i64 %0, %x 40 br i1 %1, label %if.then, label %if.merge 41 42if.then: 43 store i64 %0, i64* %p.1, align 8 44 br label %if.merge 45 46if.merge: 47 %i.next = add nuw nsw i64 %i, 1 48 %cond = icmp slt i64 %i.next, %n 49 br i1 %cond, label %for.body, label %for.end 50 51for.end: 52 ret void 53} 54 55; Ensure that we don't form a single interleaved group for the two loads. The 56; conditional store prevents the second load from being hoisted. The two load 57; groups are separately vectorized. The store group contains gaps and is not 58; vectorized. 59; 60; CHECK-LABEL: @interleaved_with_cond_store_1( 61; 62; CHECK: vector.ph 63; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 64; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 65; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 66; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] 67; 68; CHECK: vector.body: 69; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} 70; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> 71; 72; CHECK: pred.store.if 73; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 74; CHECK: store i64 %[[X1]], {{.*}} 75; 76; CHECK: pred.store.if 77; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 78; CHECK: store i64 %[[X2]], {{.*}} 79; 80; CHECK: pred.store.continue 81; CHECK: %[[L2:.+]] = load <4 x i64>, <4 x i64>* {{.*}} 82; CHECK: %[[X3:.+]] = extractelement <4 x i64> %[[L2]], i32 0 83; CHECK: store i64 %[[X3]], {{.*}} 84; CHECK: %[[X4:.+]] = extractelement <4 x i64> %[[L2]], i32 2 85; CHECK: store i64 %[[X4]], {{.*}} 86 87define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) { 88entry: 89 br label %for.body 90 91for.body: 92 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 93 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 94 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 95 %0 = load i64, i64* %p.1, align 8 96 %1 = icmp eq i64 %0, %x 97 br i1 %1, label %if.then, label %if.merge 98 99if.then: 100 store i64 %0, i64* %p.0, align 8 101 br label %if.merge 102 103if.merge: 104 %2 = load i64, i64* %p.0, align 8 105 store i64 %2, i64 *%p.1, align 8 106 %i.next = add nuw nsw i64 %i, 1 107 %cond = icmp slt i64 %i.next, %n 108 br i1 %cond, label %for.body, label %for.end 109 110for.end: 111 ret void 112} 113 114; Ensure that we don't create a single interleaved group for the two stores. 115; The second store is conditional and we can't sink the first store inside the 116; predicated block. The load group is vectorized, and the store groups contain 117; gaps and are not vectorized. 118; 119; CHECK-LABEL: @interleaved_with_cond_store_2( 120; 121; CHECK: vector.ph 122; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 123; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 124; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 125; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] 126; 127; CHECK: vector.body: 128; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} 129; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> 130; CHECK: store i64 %x, {{.*}} 131; CHECK: store i64 %x, {{.*}} 132; 133; CHECK: pred.store.if 134; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 135; CHECK: store i64 %[[X1]], {{.*}} 136; 137; CHECK: pred.store.if 138; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 139; CHECK: store i64 %[[X2]], {{.*}} 140 141define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) { 142entry: 143 br label %for.body 144 145for.body: 146 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 147 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 148 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 149 %0 = load i64, i64* %p.1, align 8 150 store i64 %x, i64* %p.0, align 8 151 %1 = icmp eq i64 %0, %x 152 br i1 %1, label %if.then, label %if.merge 153 154if.then: 155 store i64 %0, i64* %p.1, align 8 156 br label %if.merge 157 158if.merge: 159 %i.next = add nuw nsw i64 %i, 1 160 %cond = icmp slt i64 %i.next, %n 161 br i1 %cond, label %for.body, label %for.end 162 163for.end: 164 ret void 165} 166