1; REQUIRES: asserts 2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine -enable-new-pm=0 2>&1 | FileCheck %s 3; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine -enable-new-pm=0 2>&1 | FileCheck %s --check-prefix=INTER 4; RUN: opt < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s 5; RUN: opt < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER 6 7target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 8 9%pair = type { i32, i32 } 10 11; CHECK-LABEL: consecutive_ptr_forward 12; 13; Check that a forward consecutive pointer is recognized as uniform and remains 14; uniform after vectorization. 15; 16; CHECK: LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 17; CHECK: vector.body 18; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 19; CHECK-NOT: getelementptr 20; CHECK: getelementptr inbounds i32, i32* %a, i64 %index 21; CHECK-NOT: getelementptr 22; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 23; 24define i32 @consecutive_ptr_forward(i32* %a, i64 %n) { 25entry: 26 br label %for.body 27 28for.body: 29 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 30 %tmp0 = phi i32 [ %tmp3, %for.body ], [ 0, %entry ] 31 %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 32 %tmp2 = load i32, i32* %tmp1, align 8 33 %tmp3 = add i32 %tmp0, %tmp2 34 %i.next = add nuw nsw i64 %i, 1 35 %cond = icmp slt i64 %i.next, %n 36 br i1 %cond, label %for.body, label %for.end 37 38for.end: 39 %tmp4 = phi i32 [ %tmp3, %for.body ] 40 ret i32 %tmp4 41} 42 43; CHECK-LABEL: consecutive_ptr_reverse 44; 45; Check that a reverse consecutive pointer is recognized as uniform and remains 46; uniform after vectorization. 47; 48; CHECK: LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 49; CHECK: vector.body 50; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 51; CHECK: %offset.idx = sub i64 %n, %index 52; CHECK-NOT: getelementptr 53; CHECK: %[[G0:.+]] = getelementptr i32, i32* %a, i64 -3 54; CHECK: getelementptr i32, i32* %[[G0]], i64 %offset.idx 55; CHECK-NOT: getelementptr 56; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 57; 58define i32 @consecutive_ptr_reverse(i32* %a, i64 %n) { 59entry: 60 br label %for.body 61 62for.body: 63 %i = phi i64 [ %i.next, %for.body ], [ %n, %entry ] 64 %tmp0 = phi i32 [ %tmp3, %for.body ], [ 0, %entry ] 65 %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 66 %tmp2 = load i32, i32* %tmp1, align 8 67 %tmp3 = add i32 %tmp0, %tmp2 68 %i.next = add nsw i64 %i, -1 69 %cond = icmp sgt i64 %i.next, 0 70 br i1 %cond, label %for.body, label %for.end 71 72for.end: 73 %tmp4 = phi i32 [ %tmp3, %for.body ] 74 ret i32 %tmp4 75} 76 77; CHECK-LABEL: interleaved_access_forward 78; INTER-LABEL: interleaved_access_forward 79; 80; Check that a consecutive-like pointer used by a forward interleaved group is 81; recognized as uniform and remains uniform after vectorization. When 82; interleaved memory accesses aren't enabled, the pointer should not be 83; recognized as uniform, and it should not be uniform after vectorization. 84; 85; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 86; CHECK-NOT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 87; CHECK: vector.body 88; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 89; CHECK: %[[I1:.+]] = or i64 %index, 1 90; CHECK: %[[I2:.+]] = or i64 %index, 2 91; CHECK: %[[I3:.+]] = or i64 %index, 3 92; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0 93; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0 94; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0 95; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0 96; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %index, i32 1 97; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 1 98; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 1 99; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 1 100; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 101; 102; INTER: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 103; INTER: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 104; INTER: vector.body 105; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 106; INTER-NOT: getelementptr 107; INTER: getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0 108; INTER-NOT: getelementptr 109; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 110; 111define i32 @interleaved_access_forward(%pair* %p, i64 %n) { 112entry: 113 br label %for.body 114 115for.body: 116 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 117 %tmp0 = phi i32 [ %tmp6, %for.body ], [ 0, %entry ] 118 %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 119 %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 120 %tmp3 = load i32, i32* %tmp1, align 8 121 %tmp4 = load i32, i32* %tmp2, align 8 122 %tmp5 = add i32 %tmp3, %tmp4 123 %tmp6 = add i32 %tmp0, %tmp5 124 %i.next = add nuw nsw i64 %i, 1 125 %cond = icmp slt i64 %i.next, %n 126 br i1 %cond, label %for.body, label %for.end 127 128for.end: 129 %tmp14 = phi i32 [ %tmp6, %for.body ] 130 ret i32 %tmp14 131} 132 133; CHECK-LABEL: interleaved_access_reverse 134; INTER-LABEL: interleaved_access_reverse 135; 136; Check that a consecutive-like pointer used by a reverse interleaved group is 137; recognized as uniform and remains uniform after vectorization. When 138; interleaved memory accesses aren't enabled, the pointer should not be 139; recognized as uniform, and it should not be uniform after vectorization. 140; 141; recognized as uniform, and it should not be uniform after vectorization. 142; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 143; CHECK-NOT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 144; CHECK: vector.body 145; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 146; CHECK: %offset.idx = sub i64 %n, %index 147; CHECK: %[[I1:.+]] = add i64 %offset.idx, -1 148; CHECK: %[[I2:.+]] = add i64 %offset.idx, -2 149; CHECK: %[[I3:.+]] = add i64 %offset.idx, -3 150; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 0 151; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0 152; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0 153; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0 154; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 1 155; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 1 156; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 1 157; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 1 158; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 159; 160; INTER: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 161; INTER: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 162; INTER: vector.body 163; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 164; INTER: %offset.idx = sub i64 %n, %index 165; INTER-NOT: getelementptr 166; INTER: %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 0 167; INTER: getelementptr inbounds i32, i32* %[[G0]], i64 -6 168; INTER-NOT: getelementptr 169; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 170; 171define i32 @interleaved_access_reverse(%pair* %p, i64 %n) { 172entry: 173 br label %for.body 174 175for.body: 176 %i = phi i64 [ %i.next, %for.body ], [ %n, %entry ] 177 %tmp0 = phi i32 [ %tmp6, %for.body ], [ 0, %entry ] 178 %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 179 %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 180 %tmp3 = load i32, i32* %tmp1, align 8 181 %tmp4 = load i32, i32* %tmp2, align 8 182 %tmp5 = add i32 %tmp3, %tmp4 183 %tmp6 = add i32 %tmp0, %tmp5 184 %i.next = add nsw i64 %i, -1 185 %cond = icmp sgt i64 %i.next, 0 186 br i1 %cond, label %for.body, label %for.end 187 188for.end: 189 %tmp14 = phi i32 [ %tmp6, %for.body ] 190 ret i32 %tmp14 191} 192 193; INTER-LABEL: predicated_store 194; 195; Check that a consecutive-like pointer used by a forward interleaved group and 196; scalarized store is not recognized as uniform and is not uniform after 197; vectorization. The store is scalarized because it's in a predicated block. 198; Even though the load in this example is vectorized and only uses the pointer 199; as if it were uniform, the store is scalarized, making the pointer 200; non-uniform. 201; 202; INTER-NOT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 203; INTER: vector.body 204; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, {{.*}} ] 205; INTER: %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0 206; INTER: %[[B0:.+]] = bitcast i32* %[[G0]] to <8 x i32>* 207; INTER: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 8 208; INTER: %[[I1:.+]] = or i64 %index, 1 209; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0 210; INTER: %[[I2:.+]] = or i64 %index, 2 211; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0 212; INTER: %[[I3:.+]] = or i64 %index, 3 213; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0 214; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 215; 216define void @predicated_store(%pair *%p, i32 %x, i64 %n) { 217entry: 218 br label %for.body 219 220for.body: 221 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 222 %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 223 %tmp1 = load i32, i32* %tmp0, align 8 224 %tmp2 = icmp eq i32 %tmp1, %x 225 br i1 %tmp2, label %if.then, label %if.merge 226 227if.then: 228 store i32 %tmp1, i32* %tmp0, align 8 229 br label %if.merge 230 231if.merge: 232 %i.next = add nuw nsw i64 %i, 1 233 %cond = icmp slt i64 %i.next, %n 234 br i1 %cond, label %for.body, label %for.end 235 236for.end: 237 ret void 238} 239 240; CHECK-LABEL: irregular_type 241; 242; Check that a consecutive pointer used by a scalarized store is not recognized 243; as uniform and is not uniform after vectorization. The store is scalarized 244; because the stored type may required padding. 245; 246; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %i 247; CHECK: vector.body 248; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 249; CHECK: %[[I1:.+]] = or i64 %index, 1 250; CHECK: %[[I2:.+]] = or i64 %index, 2 251; CHECK: %[[I3:.+]] = or i64 %index, 3 252; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %index 253; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I1]] 254; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I2]] 255; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I3]] 256; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 257; 258define void @irregular_type(x86_fp80* %a, i64 %n) { 259entry: 260 br label %for.body 261 262for.body: 263 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 264 %tmp0 = sitofp i32 1 to x86_fp80 265 %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %i 266 store x86_fp80 %tmp0, x86_fp80* %tmp1, align 16 267 %i.next = add i64 %i, 1 268 %cond = icmp slt i64 %i.next, %n 269 br i1 %cond, label %for.body, label %for.end 270 271for.end: 272 ret void 273} 274 275; CHECK-LABEL: pointer_iv_uniform 276; 277; Check that a pointer induction variable is recognized as uniform and remains 278; uniform after vectorization. 279; 280; CHECK: LV: Found uniform instruction: %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 281; CHECK: vector.body 282; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 283; CHECK-NOT: getelementptr 284; CHECK: %next.gep = getelementptr i32, i32* %a, i64 %index 285; CHECK-NOT: getelementptr 286; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 287; 288define void @pointer_iv_uniform(i32* %a, i32 %x, i64 %n) { 289entry: 290 br label %for.body 291 292for.body: 293 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 294 %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 295 store i32 %x, i32* %p, align 8 296 %tmp03 = getelementptr inbounds i32, i32* %p, i32 1 297 %i.next = add nuw nsw i64 %i, 1 298 %cond = icmp slt i64 %i.next, %n 299 br i1 %cond, label %for.body, label %for.end 300 301for.end: 302 ret void 303} 304 305; INTER-LABEL: pointer_iv_non_uniform_0 306; 307; Check that a pointer induction variable with a non-uniform user is not 308; recognized as uniform and is not uniform after vectorization. The pointer 309; induction variable is used by getelementptr instructions that are non-uniform 310; due to scalarization of the stores. 311; 312; INTER-NOT: LV: Found uniform instruction: %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 313; INTER: vector.body 314; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 315; INTER: %[[I0:.+]] = shl i64 %index, 2 316; INTER: %next.gep = getelementptr i32, i32* %a, i64 %[[I0]] 317; INTER: %[[S1:.+]] = shl i64 %index, 2 318; INTER: %[[I1:.+]] = or i64 %[[S1]], 4 319; INTER: %next.gep2 = getelementptr i32, i32* %a, i64 %[[I1]] 320; INTER: %[[S2:.+]] = shl i64 %index, 2 321; INTER: %[[I2:.+]] = or i64 %[[S2]], 8 322; INTER: %next.gep3 = getelementptr i32, i32* %a, i64 %[[I2]] 323; INTER: %[[S3:.+]] = shl i64 %index, 2 324; INTER: %[[I3:.+]] = or i64 %[[S3]], 12 325; INTER: %next.gep4 = getelementptr i32, i32* %a, i64 %[[I3]] 326; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 327; 328define void @pointer_iv_non_uniform_0(i32* %a, i64 %n) { 329entry: 330 br label %for.body 331 332for.body: 333 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 334 %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 335 %tmp00 = load i32, i32* %p, align 8 336 %tmp01 = getelementptr inbounds i32, i32* %p, i32 1 337 %tmp02 = load i32, i32* %tmp01, align 8 338 %tmp03 = getelementptr inbounds i32, i32* %p, i32 4 339 %tmp04 = load i32, i32* %tmp03, align 8 340 %tmp05 = getelementptr inbounds i32, i32* %p, i32 5 341 %tmp06 = load i32, i32* %tmp05, align 8 342 %tmp07 = sub i32 %tmp04, %tmp00 343 %tmp08 = sub i32 %tmp02, %tmp02 344 %tmp09 = getelementptr inbounds i32, i32* %p, i32 2 345 store i32 %tmp07, i32* %tmp09, align 8 346 %tmp10 = getelementptr inbounds i32, i32* %p, i32 3 347 store i32 %tmp08, i32* %tmp10, align 8 348 %i.next = add nuw nsw i64 %i, 1 349 %cond = icmp slt i64 %i.next, %n 350 br i1 %cond, label %for.body, label %for.end 351 352for.end: 353 ret void 354} 355 356; CHECK-LABEL: pointer_iv_non_uniform_1 357; 358; Check that a pointer induction variable with a non-uniform user is not 359; recognized as uniform and is not uniform after vectorization. The pointer 360; induction variable is used by a store that will be scalarized. 361; 362; CHECK-NOT: LV: Found uniform instruction: %p = phi x86_fp80* [%tmp1, %for.body], [%a, %entry] 363; CHECK: vector.body 364; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 365; CHECK: %next.gep = getelementptr x86_fp80, x86_fp80* %a, i64 %index 366; CHECK: %[[I1:.+]] = or i64 %index, 1 367; CHECK: %next.gep2 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I1]] 368; CHECK: %[[I2:.+]] = or i64 %index, 2 369; CHECK: %next.gep3 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I2]] 370; CHECK: %[[I3:.+]] = or i64 %index, 3 371; CHECK: %next.gep4 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I3]] 372; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 373; 374define void @pointer_iv_non_uniform_1(x86_fp80* %a, i64 %n) { 375entry: 376 br label %for.body 377 378for.body: 379 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 380 %p = phi x86_fp80* [%tmp1, %for.body], [%a, %entry] 381 %tmp0 = sitofp i32 1 to x86_fp80 382 store x86_fp80 %tmp0, x86_fp80* %p, align 16 383 %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %p, i32 1 384 %i.next = add i64 %i, 1 385 %cond = icmp slt i64 %i.next, %n 386 br i1 %cond, label %for.body, label %for.end 387 388for.end: 389 ret void 390} 391 392; CHECK-LABEL: pointer_iv_mixed 393; 394; Check multiple pointer induction variables where only one is recognized as 395; uniform and remains uniform after vectorization. The other pointer induction 396; variable is not recognized as uniform and is not uniform after vectorization 397; because it is stored to memory. 398; 399; CHECK-NOT: LV: Found uniform instruction: %p = phi i32* [ %tmp3, %for.body ], [ %a, %entry ] 400; CHECK: LV: Found uniform instruction: %q = phi i32** [ %tmp4, %for.body ], [ %b, %entry ] 401; CHECK: vector.body 402; CHECK: %pointer.phi = phi i32* [ %a, %vector.ph ], [ %ptr.ind, %vector.body ] 403; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 404; CHECK: %[[PTRVEC:.+]] = getelementptr i32, i32* %pointer.phi, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 405; CHECK: %next.gep = getelementptr i32*, i32** %b, i64 %index 406; CHECK: %[[NEXTGEPBC:.+]] = bitcast i32** %next.gep to <4 x i32*>* 407; CHECK: store <4 x i32*> %[[PTRVEC]], <4 x i32*>* %[[NEXTGEPBC]], align 8 408; CHECK: %ptr.ind = getelementptr i32, i32* %pointer.phi, i64 4 409; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 410; 411define i32 @pointer_iv_mixed(i32* %a, i32** %b, i64 %n) { 412entry: 413 br label %for.body 414 415for.body: 416 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 417 %p = phi i32* [ %tmp3, %for.body ], [ %a, %entry ] 418 %q = phi i32** [ %tmp4, %for.body ], [ %b, %entry ] 419 %tmp0 = phi i32 [ %tmp2, %for.body ], [ 0, %entry ] 420 %tmp1 = load i32, i32* %p, align 8 421 %tmp2 = add i32 %tmp1, %tmp0 422 store i32* %p, i32** %q, align 8 423 %tmp3 = getelementptr inbounds i32, i32* %p, i32 1 424 %tmp4 = getelementptr inbounds i32*, i32** %q, i32 1 425 %i.next = add nuw nsw i64 %i, 1 426 %cond = icmp slt i64 %i.next, %n 427 br i1 %cond, label %for.body, label %for.end 428 429for.end: 430 %tmp5 = phi i32 [ %tmp2, %for.body ] 431 ret i32 %tmp5 432} 433 434; INTER-LABEL: bitcast_pointer_operand 435; 436; Check that a pointer operand having a user other than a memory access is 437; recognized as uniform after vectorization. In this test case, %tmp1 is a 438; bitcast that is used by a load and a getelementptr instruction (%tmp2). Once 439; %tmp2 is marked uniform, %tmp1 should be marked uniform as well. 440; 441; INTER: LV: Found uniform instruction: %cond = icmp slt i64 %i.next, %n 442; INTER-NEXT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3 443; INTER-NEXT: LV: Found uniform instruction: %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i 444; INTER-NEXT: LV: Found uniform instruction: %tmp1 = bitcast i64* %tmp0 to i8* 445; INTER-NEXT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i 446; INTER-NEXT: LV: Found uniform instruction: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 447; INTER-NEXT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 1 448; INTER: define void @bitcast_pointer_operand( 449; INTER: vector.body: 450; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] 451; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* %A, i64 [[INDEX]] 452; INTER-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <32 x i8>* 453; INTER-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 1 454; INTER-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24> 455; INTER-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 3, i32 11, i32 19, i32 27> 456; INTER-NEXT: [[TMP6:%.*]] = xor <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]] 457; INTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* %B, i64 [[INDEX]] 458; INTER-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>* 459; INTER-NEXT: store <4 x i8> [[TMP6]], <4 x i8>* [[TMP8]], align 1 460; INTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 461; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 462; 463define void @bitcast_pointer_operand(i64* %A, i8* %B, i64 %n) { 464entry: 465 br label %for.body 466 467for.body: 468 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 469 %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i 470 %tmp1 = bitcast i64* %tmp0 to i8* 471 %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3 472 %tmp3 = load i8, i8* %tmp2, align 1 473 %tmp4 = load i8, i8* %tmp1, align 1 474 %tmp5 = xor i8 %tmp3, %tmp4 475 %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i 476 store i8 %tmp5, i8* %tmp6 477 %i.next = add nuw nsw i64 %i, 1 478 %cond = icmp slt i64 %i.next, %n 479 br i1 %cond, label %for.body, label %for.end 480 481for.end: 482 ret void 483} 484