1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s 3 4declare i64 @may_inf_loop_ro() nounwind readonly 5declare i64 @may_inf_loop_rw() nounwind 6declare i64 @may_throw() willreturn 7 8; Base case with no interesting control dependencies 9define void @test_no_control(i64* %a, i64* %b, i64* %c) { 10; CHECK-LABEL: @test_no_control( 11; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* 12; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 13; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* 14; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 4 15; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 16; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 17; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 18; CHECK-NEXT: ret void 19; 20 %v1 = load i64, i64* %a 21 %a2 = getelementptr i64, i64* %a, i32 1 22 %v2 = load i64, i64* %a2 23 24 %c1 = load i64, i64* %c 25 %ca2 = getelementptr i64, i64* %c, i32 1 26 %c2 = load i64, i64* %ca2 27 %add1 = add i64 %v1, %c1 28 %add2 = add i64 %v2, %c2 29 30 store i64 %add1, i64* %b 31 %b2 = getelementptr i64, i64* %b, i32 1 32 store i64 %add2, i64* %b2 33 ret void 34} 35 36define void @test1(i64* %a, i64* %b, i64* %c) { 37; CHECK-LABEL: @test1( 38; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 39; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 40; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* 41; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 42; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 43; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 44; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 45; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 46; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 47; CHECK-NEXT: ret void 48; 49 %v1 = load i64, i64* %a 50 %a2 = getelementptr i64, i64* %a, i32 1 51 %v2 = load i64, i64* %a2 52 53 %c1 = load i64, i64* %c 54 %c2 = call i64 @may_inf_loop_ro() 55 %add1 = add i64 %v1, %c1 56 %add2 = add i64 %v2, %c2 57 58 store i64 %add1, i64* %b 59 %b2 = getelementptr i64, i64* %b, i32 1 60 store i64 %add2, i64* %b2 61 ret void 62} 63 64define void @test2(i64* %a, i64* %b, i64* %c) { 65; CHECK-LABEL: @test2( 66; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 67; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 68; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* 69; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 70; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 71; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 72; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 73; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 74; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 75; CHECK-NEXT: ret void 76; 77 %c1 = load i64, i64* %c 78 %c2 = call i64 @may_inf_loop_ro() 79 80 %v1 = load i64, i64* %a 81 %a2 = getelementptr i64, i64* %a, i32 1 82 %v2 = load i64, i64* %a2 83 84 %add1 = add i64 %v1, %c1 85 %add2 = add i64 %v2, %c2 86 87 store i64 %add1, i64* %b 88 %b2 = getelementptr i64, i64* %b, i32 1 89 store i64 %add2, i64* %b2 90 ret void 91} 92 93define void @test3(i64* %a, i64* %b, i64* %c) { 94; CHECK-LABEL: @test3( 95; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 96; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 97; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* 98; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 99; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 100; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 101; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 102; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 103; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 104; CHECK-NEXT: ret void 105; 106 %v1 = load i64, i64* %a 107 %c1 = load i64, i64* %c 108 %add1 = add i64 %v1, %c1 109 110 %a2 = getelementptr i64, i64* %a, i32 1 111 %v2 = load i64, i64* %a2 112 %c2 = call i64 @may_inf_loop_ro() 113 %add2 = add i64 %v2, %c2 114 115 store i64 %add1, i64* %b 116 %b2 = getelementptr i64, i64* %b, i32 1 117 store i64 %add2, i64* %b2 118 ret void 119} 120 121define void @test4(i64* %a, i64* %b, i64* %c) { 122; CHECK-LABEL: @test4( 123; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 124; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 125; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* 126; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 127; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 128; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 129; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 130; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 131; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 132; CHECK-NEXT: ret void 133; 134 %v1 = load i64, i64* %a 135 %c1 = load i64, i64* %c 136 %add1 = add i64 %v1, %c1 137 138 %c2 = call i64 @may_inf_loop_ro() 139 %a2 = getelementptr i64, i64* %a, i32 1 140 %v2 = load i64, i64* %a2 141 %add2 = add i64 %v2, %c2 142 143 store i64 %add1, i64* %b 144 %b2 = getelementptr i64, i64* %b, i32 1 145 store i64 %add2, i64* %b2 146 ret void 147} 148 149define void @test5(i64* %a, i64* %b, i64* %c) { 150; CHECK-LABEL: @test5( 151; CHECK-NEXT: [[C2:%.*]] = call i64 @may_inf_loop_ro() 152; CHECK-NEXT: [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4 153; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* 154; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 155; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0 156; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1 157; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 158; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 159; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4 160; CHECK-NEXT: ret void 161; 162 %a2 = getelementptr i64, i64* %a, i32 1 163 %v2 = load i64, i64* %a2 164 %c2 = call i64 @may_inf_loop_ro() 165 %add2 = add i64 %v2, %c2 166 167 %v1 = load i64, i64* %a 168 %c1 = load i64, i64* %c 169 %add1 = add i64 %v1, %c1 170 171 store i64 %add1, i64* %b 172 %b2 = getelementptr i64, i64* %b, i32 1 173 store i64 %add2, i64* %b2 174 ret void 175} 176 177define void @test6(i64* %a, i64* %b, i64* %c) { 178; CHECK-LABEL: @test6( 179; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 180; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* 181; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 182; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* 183; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 4 184; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]] 185; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 186; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 187; CHECK-NEXT: ret void 188; 189 %v1 = load i64, i64* %a 190 call i64 @may_inf_loop_ro() 191 %a2 = getelementptr i64, i64* %a, i32 1 192 %v2 = load i64, i64* %a2 193 194 %c1 = load i64, i64* %c 195 %ca2 = getelementptr i64, i64* %c, i32 1 196 %c2 = load i64, i64* %ca2 197 %add1 = add i64 %v1, %c1 198 %add2 = add i64 %v2, %c2 199 200 store i64 %add1, i64* %b 201 %b2 = getelementptr i64, i64* %b, i32 1 202 store i64 %add2, i64* %b2 203 ret void 204} 205 206; In this case, we can't vectorize the load pair because there's no valid 207; scheduling point which respects both memory and control dependence. If 208; we scheduled the second load before the store holding the first one in place, 209; we'd have hoisted a potentially faulting load above a potentially infinite 210; call and thus have introduced a possible fault into a program which didn't 211; previously exist. 212define void @test7(i64* %a, i64* %b, i64* %c) { 213; CHECK-LABEL: @test7( 214; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 215; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A]], align 4 216; CHECK-NEXT: store i64 0, i64* [[A]], align 4 217; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 218; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 219; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* 220; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 221; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 222; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1 223; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] 224; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 225; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 226; CHECK-NEXT: ret void 227; 228 %v1 = load i64, i64* %a 229 store i64 0, i64* %a 230 call i64 @may_inf_loop_ro() 231 %a2 = getelementptr i64, i64* %a, i32 1 232 %v2 = load i64, i64* %a2 233 234 %c1 = load i64, i64* %c 235 %ca2 = getelementptr i64, i64* %c, i32 1 236 %c2 = load i64, i64* %ca2 237 %add1 = add i64 %v1, %c1 238 %add2 = add i64 %v2, %c2 239 240 store i64 %add1, i64* %b 241 %b2 = getelementptr i64, i64* %b, i32 1 242 store i64 %add2, i64* %b2 243 ret void 244} 245 246; Same as test7, but with a throwing call 247define void @test8(i64* %a, i64* %b, i64* %c) { 248; CHECK-LABEL: @test8( 249; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 250; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A]], align 4 251; CHECK-NEXT: store i64 0, i64* [[A]], align 4 252; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]] 253; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 254; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* 255; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 256; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 257; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1 258; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] 259; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 260; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 261; CHECK-NEXT: ret void 262; 263 %v1 = load i64, i64* %a 264 store i64 0, i64* %a 265 call i64 @may_throw() readonly 266 %a2 = getelementptr i64, i64* %a, i32 1 267 %v2 = load i64, i64* %a2 268 269 %c1 = load i64, i64* %c 270 %ca2 = getelementptr i64, i64* %c, i32 1 271 %c2 = load i64, i64* %ca2 272 %add1 = add i64 %v1, %c1 273 %add2 = add i64 %v2, %c2 274 275 store i64 %add1, i64* %b 276 %b2 = getelementptr i64, i64* %b, i32 1 277 store i64 %add2, i64* %b2 278 ret void 279} 280 281; Same as test8, but with a readwrite maythrow call 282define void @test9(i64* %a, i64* %b, i64* %c) { 283; CHECK-LABEL: @test9( 284; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1 285; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A]], align 4 286; CHECK-NEXT: store i64 0, i64* [[A]], align 4 287; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_throw() 288; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 289; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* 290; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 291; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 292; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1 293; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] 294; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 295; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 296; CHECK-NEXT: ret void 297; 298 %v1 = load i64, i64* %a 299 store i64 0, i64* %a 300 call i64 @may_throw() 301 %a2 = getelementptr i64, i64* %a, i32 1 302 %v2 = load i64, i64* %a2 303 304 %c1 = load i64, i64* %c 305 %ca2 = getelementptr i64, i64* %c, i32 1 306 %c2 = load i64, i64* %ca2 307 %add1 = add i64 %v1, %c1 308 %add2 = add i64 %v2, %c2 309 310 store i64 %add1, i64* %b 311 %b2 = getelementptr i64, i64* %b, i32 1 312 store i64 %add2, i64* %b2 313 ret void 314} 315 316; A variant of test7 which shows the same problem with a non-load instruction 317define void @test10(i64* %a, i64* %b, i64* %c) { 318; CHECK-LABEL: @test10( 319; CHECK-NEXT: [[V1:%.*]] = load i64, i64* [[A:%.*]], align 4 320; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, i64* [[A]], i32 1 321; CHECK-NEXT: [[V2:%.*]] = load i64, i64* [[A2]], align 4 322; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[V1]] 323; CHECK-NEXT: store i64 [[U1]], i64* [[A]], align 4 324; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 325; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[V2]] 326; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* 327; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 328; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 329; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1 330; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] 331; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>* 332; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 333; CHECK-NEXT: ret void 334; 335 %v1 = load i64, i64* %a 336 %a2 = getelementptr i64, i64* %a, i32 1 337 %v2 = load i64, i64* %a2 338 339 %u1 = udiv i64 200, %v1 340 store i64 %u1, i64* %a 341 call i64 @may_inf_loop_ro() 342 %u2 = udiv i64 200, %v2 343 344 %c1 = load i64, i64* %c 345 %ca2 = getelementptr i64, i64* %c, i32 1 346 %c2 = load i64, i64* %ca2 347 %add1 = add i64 %u1, %c1 348 %add2 = add i64 %u2, %c2 349 350 store i64 %add1, i64* %b 351 %b2 = getelementptr i64, i64* %b, i32 1 352 store i64 %add2, i64* %b2 353 ret void 354} 355 356; Variant of test10 block invariant operands to the udivs 357; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop. 358define void @test11(i64 %x, i64 %y, i64* %b, i64* %c) { 359; CHECK-LABEL: @test11( 360; CHECK-NEXT: [[U1:%.*]] = udiv i64 200, [[X:%.*]] 361; CHECK-NEXT: store i64 [[U1]], i64* [[B:%.*]], align 4 362; CHECK-NEXT: [[TMP1:%.*]] = call i64 @may_inf_loop_ro() 363; CHECK-NEXT: [[U2:%.*]] = udiv i64 200, [[Y:%.*]] 364; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>* 365; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4 366; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0 367; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1 368; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]] 369; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>* 370; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 371; CHECK-NEXT: ret void 372; 373 %u1 = udiv i64 200, %x 374 store i64 %u1, i64* %b 375 call i64 @may_inf_loop_ro() 376 %u2 = udiv i64 200, %y 377 378 %c1 = load i64, i64* %c 379 %ca2 = getelementptr i64, i64* %c, i32 1 380 %c2 = load i64, i64* %ca2 381 %add1 = add i64 %u1, %c1 382 %add2 = add i64 %u2, %c2 383 384 store i64 %add1, i64* %b 385 %b2 = getelementptr i64, i64* %b, i32 1 386 store i64 %add2, i64* %b2 387 ret void 388} 389