1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s 3 4;void Distance(float *p1, int p2, unsigned long p3[], float p4[]) { 5; long a = p3[0] = 5; 6; p1 += p2; 7; p4[3] += p1[a]; 8; p3[0] >>= 5; 9; p3[1] >>= 5; 10; p3[2] >>= 5; 11; p3[3] >>= 5; 12; p1 += p2; 13; p4[0] += p1[p3[0] & a]; 14;} 15 16define void @_Z8DistanceIlLi5EEvPfiPmS0_(float* %p1, i32 %p2, i64* %p3, float* %p4) { 17; CHECK-LABEL: @_Z8DistanceIlLi5EEvPfiPmS0_( 18; CHECK-NEXT: entry: 19; CHECK-NEXT: store i64 5, i64* [[P3:%.*]], align 8 20; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64 21; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]] 22; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5 23; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4 24; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3 25; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 26; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] 27; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 28; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>* 29; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 30; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5> 31; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>* 32; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8 33; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX_EXT]] 34; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 35; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP6]], 5 36; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[ADD_PTR11]], i64 [[AND]] 37; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX13]], align 4 38; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[P4]], align 4 39; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[TMP7]], [[TMP8]] 40; CHECK-NEXT: store float [[ADD15]], float* [[P4]], align 4 41; CHECK-NEXT: ret void 42; 43entry: 44 store i64 5, i64* %p3, align 8 45 %idx.ext = sext i32 %p2 to i64 46 %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext 47 %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5 48 %0 = load float, float* %arrayidx1, align 4 49 %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3 50 %1 = load float, float* %arrayidx2, align 4 51 %add = fadd float %0, %1 52 store float %add, float* %arrayidx2, align 4 53 %2 = load i64, i64* %p3, align 8 54 %shr = lshr i64 %2, 5 55 store i64 %shr, i64* %p3, align 8 56 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1 57 %3 = load i64, i64* %arrayidx4, align 8 58 %shr5 = lshr i64 %3, 5 59 store i64 %shr5, i64* %arrayidx4, align 8 60 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2 61 %4 = load i64, i64* %arrayidx6, align 8 62 %shr7 = lshr i64 %4, 5 63 store i64 %shr7, i64* %arrayidx6, align 8 64 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3 65 %5 = load i64, i64* %arrayidx8, align 8 66 %shr9 = lshr i64 %5, 5 67 store i64 %shr9, i64* %arrayidx8, align 8 68 %add.ptr11 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext 69 %and = and i64 %shr, 5 70 %arrayidx13 = getelementptr inbounds float, float* %add.ptr11, i64 %and 71 %6 = load float, float* %arrayidx13, align 4 72 %7 = load float, float* %p4, align 4 73 %add15 = fadd float %6, %7 74 store float %add15, float* %p4, align 4 75 ret void 76} 77 78define void @store_reverse(i64* %p3) { 79; CHECK-LABEL: @store_reverse( 80; CHECK-NEXT: entry: 81; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P3:%.*]], i64 8 82; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4 83; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[P3]] to <4 x i64>* 84; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8 85; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[ARRAYIDX1]] to <4 x i64>* 86; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 87; CHECK-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]] 88; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 89; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>* 90; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP5]], align 8 91; CHECK-NEXT: ret void 92; 93entry: 94 %0 = load i64, i64* %p3, align 8 95 %arrayidx1 = getelementptr inbounds i64, i64* %p3, i64 8 96 %1 = load i64, i64* %arrayidx1, align 8 97 %shl = shl i64 %0, %1 98 %arrayidx2 = getelementptr inbounds i64, i64* %p3, i64 7 99 store i64 %shl, i64* %arrayidx2, align 8 100 %arrayidx3 = getelementptr inbounds i64, i64* %p3, i64 1 101 %2 = load i64, i64* %arrayidx3, align 8 102 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 9 103 %3 = load i64, i64* %arrayidx4, align 8 104 %shl5 = shl i64 %2, %3 105 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 6 106 store i64 %shl5, i64* %arrayidx6, align 8 107 %arrayidx7 = getelementptr inbounds i64, i64* %p3, i64 2 108 %4 = load i64, i64* %arrayidx7, align 8 109 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 10 110 %5 = load i64, i64* %arrayidx8, align 8 111 %shl9 = shl i64 %4, %5 112 %arrayidx10 = getelementptr inbounds i64, i64* %p3, i64 5 113 store i64 %shl9, i64* %arrayidx10, align 8 114 %arrayidx11 = getelementptr inbounds i64, i64* %p3, i64 3 115 %6 = load i64, i64* %arrayidx11, align 8 116 %arrayidx12 = getelementptr inbounds i64, i64* %p3, i64 11 117 %7 = load i64, i64* %arrayidx12, align 8 118 %shl13 = shl i64 %6, %7 119 %arrayidx14 = getelementptr inbounds i64, i64* %p3, i64 4 120 store i64 %shl13, i64* %arrayidx14, align 8 121 ret void 122} 123 124define void @store15(float* %p1, i32 %p2, i64* %p3, float* %p4) { 125; CHECK-LABEL: @store15( 126; CHECK-NEXT: entry: 127; CHECK-NEXT: store i64 5, i64* [[P3:%.*]], align 8 128; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64 129; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]] 130; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5 131; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4 132; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3 133; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 134; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] 135; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 136; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5 137; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 138; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 139; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 140; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 141; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 142; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 143; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 144; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 145; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 146; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 147; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 148; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 149; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 150; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 151; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 152; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>* 153; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 154; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5> 155; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>* 156; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8 157; CHECK-NEXT: ret void 158; 159entry: 160 store i64 5, i64* %p3, align 8 161 %idx.ext = sext i32 %p2 to i64 162 %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext 163 %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5 164 %0 = load float, float* %arrayidx1, align 4 165 %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3 166 %1 = load float, float* %arrayidx2, align 4 167 %add = fadd float %0, %1 168 store float %add, float* %arrayidx2, align 4 169 %2 = load i64, i64* %p3, align 8 170 %shr = lshr i64 %2, 5 171 store i64 %shr, i64* %p3, align 8 172 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1 173 %3 = load i64, i64* %arrayidx4, align 8 174 %shr5 = lshr i64 %3, 5 175 store i64 %shr5, i64* %arrayidx4, align 8 176 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2 177 %4 = load i64, i64* %arrayidx6, align 8 178 %shr7 = lshr i64 %4, 5 179 store i64 %shr7, i64* %arrayidx6, align 8 180 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3 181 %5 = load i64, i64* %arrayidx8, align 8 182 %shr9 = lshr i64 %5, 5 183 %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5 184 store i64 5, i64* %arrayidx9, align 8 185 store i64 5, i64* %arrayidx9, align 8 186 store i64 5, i64* %arrayidx9, align 8 187 store i64 5, i64* %arrayidx9, align 8 188 store i64 5, i64* %arrayidx9, align 8 189 store i64 5, i64* %arrayidx9, align 8 190 store i64 5, i64* %arrayidx9, align 8 191 store i64 5, i64* %arrayidx9, align 8 192 store i64 5, i64* %arrayidx9, align 8 193 store i64 5, i64* %arrayidx9, align 8 194 store i64 5, i64* %arrayidx9, align 8 195 store i64 5, i64* %arrayidx9, align 8 196 store i64 5, i64* %arrayidx9, align 8 197 store i64 5, i64* %arrayidx9, align 8 198 store i64 5, i64* %arrayidx9, align 8 199 store i64 %shr9, i64* %arrayidx8, align 8 200 ret void 201} 202 203define void @store16(float* %p1, i32 %p2, i64* %p3, float* %p4) { 204; CHECK-LABEL: @store16( 205; CHECK-NEXT: entry: 206; CHECK-NEXT: store i64 5, i64* [[P3:%.*]], align 8 207; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64 208; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]] 209; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5 210; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4 211; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3 212; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 213; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] 214; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 215; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5 216; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 217; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 218; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 219; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 220; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 221; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 222; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 223; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 224; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 225; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 226; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 227; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 228; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 229; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 230; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 231; CHECK-NEXT: store i64 5, i64* [[ARRAYIDX9]], align 8 232; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>* 233; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8 234; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5> 235; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>* 236; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8 237; CHECK-NEXT: ret void 238; 239entry: 240 store i64 5, i64* %p3, align 8 241 %idx.ext = sext i32 %p2 to i64 242 %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext 243 %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5 244 %0 = load float, float* %arrayidx1, align 4 245 %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3 246 %1 = load float, float* %arrayidx2, align 4 247 %add = fadd float %0, %1 248 store float %add, float* %arrayidx2, align 4 249 %2 = load i64, i64* %p3, align 8 250 %shr = lshr i64 %2, 5 251 store i64 %shr, i64* %p3, align 8 252 %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1 253 %3 = load i64, i64* %arrayidx4, align 8 254 %shr5 = lshr i64 %3, 5 255 store i64 %shr5, i64* %arrayidx4, align 8 256 %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2 257 %4 = load i64, i64* %arrayidx6, align 8 258 %shr7 = lshr i64 %4, 5 259 store i64 %shr7, i64* %arrayidx6, align 8 260 %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3 261 %5 = load i64, i64* %arrayidx8, align 8 262 %shr9 = lshr i64 %5, 5 263 %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5 264 store i64 5, i64* %arrayidx9, align 8 265 store i64 5, i64* %arrayidx9, align 8 266 store i64 5, i64* %arrayidx9, align 8 267 store i64 5, i64* %arrayidx9, align 8 268 store i64 5, i64* %arrayidx9, align 8 269 store i64 5, i64* %arrayidx9, align 8 270 store i64 5, i64* %arrayidx9, align 8 271 store i64 5, i64* %arrayidx9, align 8 272 store i64 5, i64* %arrayidx9, align 8 273 store i64 5, i64* %arrayidx9, align 8 274 store i64 5, i64* %arrayidx9, align 8 275 store i64 5, i64* %arrayidx9, align 8 276 store i64 5, i64* %arrayidx9, align 8 277 store i64 5, i64* %arrayidx9, align 8 278 store i64 5, i64* %arrayidx9, align 8 279 store i64 5, i64* %arrayidx9, align 8 280 store i64 %shr9, i64* %arrayidx8, align 8 281 ret void 282} 283 284