1; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s 2 3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 4 5%struct.anon = type { [100 x i32], i32, [100 x i32] } 6%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] } 7 8@Foo = common global %struct.anon zeroinitializer, align 4 9@Bar = common global %struct.anon.0 zeroinitializer, align 4 10 11@PB = external global i32* 12@PA = external global i32* 13 14 15;; === First, the tests that should always vectorize, wither statically or by adding run-time checks === 16 17 18; /// Different objects, positive induction, constant distance 19; int noAlias01 (int a) { 20; int i; 21; for (i=0; i<SIZE; i++) 22; Foo.A[i] = Foo.B[i] + a; 23; return Foo.A[a]; 24; } 25; CHECK-LABEL: define i32 @noAlias01( 26; CHECK: add nsw <4 x i32> 27; CHECK: ret 28 29define i32 @noAlias01(i32 %a) nounwind { 30entry: 31 %a.addr = alloca i32, align 4 32 %i = alloca i32, align 4 33 store i32 %a, i32* %a.addr, align 4 34 store i32 0, i32* %i, align 4 35 br label %for.cond 36 37for.cond: ; preds = %for.inc, %entry 38 %0 = load i32* %i, align 4 39 %cmp = icmp slt i32 %0, 100 40 br i1 %cmp, label %for.body, label %for.end 41 42for.body: ; preds = %for.cond 43 %1 = load i32* %i, align 4 44 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 45 %2 = load i32* %arrayidx, align 4 46 %3 = load i32* %a.addr, align 4 47 %add = add nsw i32 %2, %3 48 %4 = load i32* %i, align 4 49 %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 50 store i32 %add, i32* %arrayidx1, align 4 51 br label %for.inc 52 53for.inc: ; preds = %for.body 54 %5 = load i32* %i, align 4 55 %inc = add nsw i32 %5, 1 56 store i32 %inc, i32* %i, align 4 57 br label %for.cond 58 59for.end: ; preds = %for.cond 60 %6 = load i32* %a.addr, align 4 61 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 62 %7 = load i32* %arrayidx2, align 4 63 ret i32 %7 64} 65 66; /// Different objects, positive induction with widening slide 67; int noAlias02 (int a) { 68; int i; 69; for (i=0; i<SIZE-10; i++) 70; Foo.A[i] = Foo.B[i+10] + a; 71; return Foo.A[a]; 72; } 73; CHECK-LABEL: define i32 @noAlias02( 74; CHECK: add nsw <4 x i32> 75; CHECK: ret 76 77define i32 @noAlias02(i32 %a) { 78entry: 79 %a.addr = alloca i32, align 4 80 %i = alloca i32, align 4 81 store i32 %a, i32* %a.addr, align 4 82 store i32 0, i32* %i, align 4 83 br label %for.cond 84 85for.cond: ; preds = %for.inc, %entry 86 %0 = load i32* %i, align 4 87 %cmp = icmp slt i32 %0, 90 88 br i1 %cmp, label %for.body, label %for.end 89 90for.body: ; preds = %for.cond 91 %1 = load i32* %i, align 4 92 %add = add nsw i32 %1, 10 93 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add 94 %2 = load i32* %arrayidx, align 4 95 %3 = load i32* %a.addr, align 4 96 %add1 = add nsw i32 %2, %3 97 %4 = load i32* %i, align 4 98 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 99 store i32 %add1, i32* %arrayidx2, align 4 100 br label %for.inc 101 102for.inc: ; preds = %for.body 103 %5 = load i32* %i, align 4 104 %inc = add nsw i32 %5, 1 105 store i32 %inc, i32* %i, align 4 106 br label %for.cond 107 108for.end: ; preds = %for.cond 109 %6 = load i32* %a.addr, align 4 110 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 111 %7 = load i32* %arrayidx3, align 4 112 ret i32 %7 113} 114 115; /// Different objects, positive induction with shortening slide 116; int noAlias03 (int a) { 117; int i; 118; for (i=0; i<SIZE; i++) 119; Foo.A[i+10] = Foo.B[i] + a; 120; return Foo.A[a]; 121; } 122; CHECK-LABEL: define i32 @noAlias03( 123; CHECK: add nsw <4 x i32> 124; CHECK: ret 125 126define i32 @noAlias03(i32 %a) { 127entry: 128 %a.addr = alloca i32, align 4 129 %i = alloca i32, align 4 130 store i32 %a, i32* %a.addr, align 4 131 store i32 0, i32* %i, align 4 132 br label %for.cond 133 134for.cond: ; preds = %for.inc, %entry 135 %0 = load i32* %i, align 4 136 %cmp = icmp slt i32 %0, 100 137 br i1 %cmp, label %for.body, label %for.end 138 139for.body: ; preds = %for.cond 140 %1 = load i32* %i, align 4 141 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 142 %2 = load i32* %arrayidx, align 4 143 %3 = load i32* %a.addr, align 4 144 %add = add nsw i32 %2, %3 145 %4 = load i32* %i, align 4 146 %add1 = add nsw i32 %4, 10 147 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1 148 store i32 %add, i32* %arrayidx2, align 4 149 br label %for.inc 150 151for.inc: ; preds = %for.body 152 %5 = load i32* %i, align 4 153 %inc = add nsw i32 %5, 1 154 store i32 %inc, i32* %i, align 4 155 br label %for.cond 156 157for.end: ; preds = %for.cond 158 %6 = load i32* %a.addr, align 4 159 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 160 %7 = load i32* %arrayidx3, align 4 161 ret i32 %7 162} 163 164; /// Pointer access, positive stride, run-time check added 165; int noAlias04 (int a) { 166; int i; 167; for (i=0; i<SIZE; i++) 168; *(PA+i) = *(PB+i) + a; 169; return *(PA+a); 170; } 171; CHECK-LABEL: define i32 @noAlias04( 172; CHECK-NOT: add nsw <4 x i32> 173; CHECK: ret 174; 175; TODO: This test vectorizes (with run-time check) on real targets with -O3) 176; Check why it's not being vectorized even when forcing vectorization 177 178define i32 @noAlias04(i32 %a) #0 { 179entry: 180 %a.addr = alloca i32, align 4 181 %i = alloca i32, align 4 182 store i32 %a, i32* %a.addr, align 4 183 store i32 0, i32* %i, align 4 184 br label %for.cond 185 186for.cond: ; preds = %for.inc, %entry 187 %0 = load i32* %i, align 4 188 %cmp = icmp slt i32 %0, 100 189 br i1 %cmp, label %for.body, label %for.end 190 191for.body: ; preds = %for.cond 192 %1 = load i32** @PB, align 4 193 %2 = load i32* %i, align 4 194 %add.ptr = getelementptr inbounds i32* %1, i32 %2 195 %3 = load i32* %add.ptr, align 4 196 %4 = load i32* %a.addr, align 4 197 %add = add nsw i32 %3, %4 198 %5 = load i32** @PA, align 4 199 %6 = load i32* %i, align 4 200 %add.ptr1 = getelementptr inbounds i32* %5, i32 %6 201 store i32 %add, i32* %add.ptr1, align 4 202 br label %for.inc 203 204for.inc: ; preds = %for.body 205 %7 = load i32* %i, align 4 206 %inc = add nsw i32 %7, 1 207 store i32 %inc, i32* %i, align 4 208 br label %for.cond 209 210for.end: ; preds = %for.cond 211 %8 = load i32** @PA, align 4 212 %9 = load i32* %a.addr, align 4 213 %add.ptr2 = getelementptr inbounds i32* %8, i32 %9 214 %10 = load i32* %add.ptr2, align 4 215 ret i32 %10 216} 217 218; /// Different objects, positive induction, multi-array 219; int noAlias05 (int a) { 220; int i, N=10; 221; for (i=0; i<SIZE; i++) 222; Bar.A[N][i] = Bar.B[N][i] + a; 223; return Bar.A[N][a]; 224; } 225; CHECK-LABEL: define i32 @noAlias05( 226; CHECK: add nsw <4 x i32> 227; CHECK: ret 228 229define i32 @noAlias05(i32 %a) #0 { 230entry: 231 %a.addr = alloca i32, align 4 232 %i = alloca i32, align 4 233 %N = alloca i32, align 4 234 store i32 %a, i32* %a.addr, align 4 235 store i32 10, i32* %N, align 4 236 store i32 0, i32* %i, align 4 237 br label %for.cond 238 239for.cond: ; preds = %for.inc, %entry 240 %0 = load i32* %i, align 4 241 %cmp = icmp slt i32 %0, 100 242 br i1 %cmp, label %for.body, label %for.end 243 244for.body: ; preds = %for.cond 245 %1 = load i32* %i, align 4 246 %2 = load i32* %N, align 4 247 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2 248 %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1 249 %3 = load i32* %arrayidx1, align 4 250 %4 = load i32* %a.addr, align 4 251 %add = add nsw i32 %3, %4 252 %5 = load i32* %i, align 4 253 %6 = load i32* %N, align 4 254 %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 255 %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5 256 store i32 %add, i32* %arrayidx3, align 4 257 br label %for.inc 258 259for.inc: ; preds = %for.body 260 %7 = load i32* %i, align 4 261 %inc = add nsw i32 %7, 1 262 store i32 %inc, i32* %i, align 4 263 br label %for.cond 264 265for.end: ; preds = %for.cond 266 %8 = load i32* %a.addr, align 4 267 %9 = load i32* %N, align 4 268 %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 269 %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8 270 %10 = load i32* %arrayidx5, align 4 271 ret i32 %10 272} 273 274; /// Same objects, positive induction, multi-array, different sub-elements 275; int noAlias06 (int a) { 276; int i, N=10; 277; for (i=0; i<SIZE; i++) 278; Bar.A[N][i] = Bar.A[N+1][i] + a; 279; return Bar.A[N][a]; 280; } 281; CHECK-LABEL: define i32 @noAlias06( 282; CHECK: add nsw <4 x i32> 283; CHECK: ret 284 285define i32 @noAlias06(i32 %a) #0 { 286entry: 287 %a.addr = alloca i32, align 4 288 %i = alloca i32, align 4 289 %N = alloca i32, align 4 290 store i32 %a, i32* %a.addr, align 4 291 store i32 10, i32* %N, align 4 292 store i32 0, i32* %i, align 4 293 br label %for.cond 294 295for.cond: ; preds = %for.inc, %entry 296 %0 = load i32* %i, align 4 297 %cmp = icmp slt i32 %0, 100 298 br i1 %cmp, label %for.body, label %for.end 299 300for.body: ; preds = %for.cond 301 %1 = load i32* %i, align 4 302 %2 = load i32* %N, align 4 303 %add = add nsw i32 %2, 1 304 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add 305 %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1 306 %3 = load i32* %arrayidx1, align 4 307 %4 = load i32* %a.addr, align 4 308 %add2 = add nsw i32 %3, %4 309 %5 = load i32* %i, align 4 310 %6 = load i32* %N, align 4 311 %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 312 %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5 313 store i32 %add2, i32* %arrayidx4, align 4 314 br label %for.inc 315 316for.inc: ; preds = %for.body 317 %7 = load i32* %i, align 4 318 %inc = add nsw i32 %7, 1 319 store i32 %inc, i32* %i, align 4 320 br label %for.cond 321 322for.end: ; preds = %for.cond 323 %8 = load i32* %a.addr, align 4 324 %9 = load i32* %N, align 4 325 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 326 %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8 327 %10 = load i32* %arrayidx6, align 4 328 ret i32 %10 329} 330 331; /// Different objects, negative induction, constant distance 332; int noAlias07 (int a) { 333; int i; 334; for (i=0; i<SIZE; i++) 335; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a; 336; return Foo.A[a]; 337; } 338; CHECK-LABEL: define i32 @noAlias07( 339; CHECK: sub nsw <4 x i32> 340; CHECK: ret 341 342define i32 @noAlias07(i32 %a) #0 { 343entry: 344 %a.addr = alloca i32, align 4 345 %i = alloca i32, align 4 346 store i32 %a, i32* %a.addr, align 4 347 store i32 0, i32* %i, align 4 348 br label %for.cond 349 350for.cond: ; preds = %for.inc, %entry 351 %0 = load i32* %i, align 4 352 %cmp = icmp slt i32 %0, 100 353 br i1 %cmp, label %for.body, label %for.end 354 355for.body: ; preds = %for.cond 356 %1 = load i32* %i, align 4 357 %sub = sub nsw i32 100, %1 358 %sub1 = sub nsw i32 %sub, 1 359 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 360 %2 = load i32* %arrayidx, align 4 361 %3 = load i32* %a.addr, align 4 362 %add = add nsw i32 %2, %3 363 %4 = load i32* %i, align 4 364 %sub2 = sub nsw i32 100, %4 365 %sub3 = sub nsw i32 %sub2, 1 366 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 367 store i32 %add, i32* %arrayidx4, align 4 368 br label %for.inc 369 370for.inc: ; preds = %for.body 371 %5 = load i32* %i, align 4 372 %inc = add nsw i32 %5, 1 373 store i32 %inc, i32* %i, align 4 374 br label %for.cond 375 376for.end: ; preds = %for.cond 377 %6 = load i32* %a.addr, align 4 378 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 379 %7 = load i32* %arrayidx5, align 4 380 ret i32 %7 381} 382 383; /// Different objects, negative induction, shortening slide 384; int noAlias08 (int a) { 385; int i; 386; for (i=0; i<SIZE-10; i++) 387; Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a; 388; return Foo.A[a]; 389; } 390; CHECK-LABEL: define i32 @noAlias08( 391; CHECK: sub nsw <4 x i32> 392; CHECK: ret 393 394define i32 @noAlias08(i32 %a) #0 { 395entry: 396 %a.addr = alloca i32, align 4 397 %i = alloca i32, align 4 398 store i32 %a, i32* %a.addr, align 4 399 store i32 0, i32* %i, align 4 400 br label %for.cond 401 402for.cond: ; preds = %for.inc, %entry 403 %0 = load i32* %i, align 4 404 %cmp = icmp slt i32 %0, 90 405 br i1 %cmp, label %for.body, label %for.end 406 407for.body: ; preds = %for.cond 408 %1 = load i32* %i, align 4 409 %sub = sub nsw i32 100, %1 410 %sub1 = sub nsw i32 %sub, 10 411 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 412 %2 = load i32* %arrayidx, align 4 413 %3 = load i32* %a.addr, align 4 414 %add = add nsw i32 %2, %3 415 %4 = load i32* %i, align 4 416 %sub2 = sub nsw i32 100, %4 417 %sub3 = sub nsw i32 %sub2, 1 418 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 419 store i32 %add, i32* %arrayidx4, align 4 420 br label %for.inc 421 422for.inc: ; preds = %for.body 423 %5 = load i32* %i, align 4 424 %inc = add nsw i32 %5, 1 425 store i32 %inc, i32* %i, align 4 426 br label %for.cond 427 428for.end: ; preds = %for.cond 429 %6 = load i32* %a.addr, align 4 430 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 431 %7 = load i32* %arrayidx5, align 4 432 ret i32 %7 433} 434 435; /// Different objects, negative induction, widening slide 436; int noAlias09 (int a) { 437; int i; 438; for (i=0; i<SIZE; i++) 439; Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a; 440; return Foo.A[a]; 441; } 442; CHECK-LABEL: define i32 @noAlias09( 443; CHECK: sub nsw <4 x i32> 444; CHECK: ret 445 446define i32 @noAlias09(i32 %a) #0 { 447entry: 448 %a.addr = alloca i32, align 4 449 %i = alloca i32, align 4 450 store i32 %a, i32* %a.addr, align 4 451 store i32 0, i32* %i, align 4 452 br label %for.cond 453 454for.cond: ; preds = %for.inc, %entry 455 %0 = load i32* %i, align 4 456 %cmp = icmp slt i32 %0, 100 457 br i1 %cmp, label %for.body, label %for.end 458 459for.body: ; preds = %for.cond 460 %1 = load i32* %i, align 4 461 %sub = sub nsw i32 100, %1 462 %sub1 = sub nsw i32 %sub, 1 463 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 464 %2 = load i32* %arrayidx, align 4 465 %3 = load i32* %a.addr, align 4 466 %add = add nsw i32 %2, %3 467 %4 = load i32* %i, align 4 468 %sub2 = sub nsw i32 100, %4 469 %sub3 = sub nsw i32 %sub2, 10 470 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 471 store i32 %add, i32* %arrayidx4, align 4 472 br label %for.inc 473 474for.inc: ; preds = %for.body 475 %5 = load i32* %i, align 4 476 %inc = add nsw i32 %5, 1 477 store i32 %inc, i32* %i, align 4 478 br label %for.cond 479 480for.end: ; preds = %for.cond 481 %6 = load i32* %a.addr, align 4 482 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 483 %7 = load i32* %arrayidx5, align 4 484 ret i32 %7 485} 486 487; /// Pointer access, negative stride, run-time check added 488; int noAlias10 (int a) { 489; int i; 490; for (i=0; i<SIZE; i++) 491; *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a; 492; return *(PA+a); 493; } 494; CHECK-LABEL: define i32 @noAlias10( 495; CHECK-NOT: sub nsw <4 x i32> 496; CHECK: ret 497; 498; TODO: This test vectorizes (with run-time check) on real targets with -O3) 499; Check why it's not being vectorized even when forcing vectorization 500 501define i32 @noAlias10(i32 %a) #0 { 502entry: 503 %a.addr = alloca i32, align 4 504 %i = alloca i32, align 4 505 store i32 %a, i32* %a.addr, align 4 506 store i32 0, i32* %i, align 4 507 br label %for.cond 508 509for.cond: ; preds = %for.inc, %entry 510 %0 = load i32* %i, align 4 511 %cmp = icmp slt i32 %0, 100 512 br i1 %cmp, label %for.body, label %for.end 513 514for.body: ; preds = %for.cond 515 %1 = load i32** @PB, align 4 516 %add.ptr = getelementptr inbounds i32* %1, i32 100 517 %2 = load i32* %i, align 4 518 %idx.neg = sub i32 0, %2 519 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg 520 %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1 521 %3 = load i32* %add.ptr2, align 4 522 %4 = load i32* %a.addr, align 4 523 %add = add nsw i32 %3, %4 524 %5 = load i32** @PA, align 4 525 %add.ptr3 = getelementptr inbounds i32* %5, i32 100 526 %6 = load i32* %i, align 4 527 %idx.neg4 = sub i32 0, %6 528 %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4 529 %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1 530 store i32 %add, i32* %add.ptr6, align 4 531 br label %for.inc 532 533for.inc: ; preds = %for.body 534 %7 = load i32* %i, align 4 535 %inc = add nsw i32 %7, 1 536 store i32 %inc, i32* %i, align 4 537 br label %for.cond 538 539for.end: ; preds = %for.cond 540 %8 = load i32** @PA, align 4 541 %9 = load i32* %a.addr, align 4 542 %add.ptr7 = getelementptr inbounds i32* %8, i32 %9 543 %10 = load i32* %add.ptr7, align 4 544 ret i32 %10 545} 546 547; /// Different objects, negative induction, multi-array 548; int noAlias11 (int a) { 549; int i, N=10; 550; for (i=0; i<SIZE; i++) 551; Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a; 552; return Bar.A[N][a]; 553; } 554; CHECK-LABEL: define i32 @noAlias11( 555; CHECK: sub nsw <4 x i32> 556; CHECK: ret 557 558define i32 @noAlias11(i32 %a) #0 { 559entry: 560 %a.addr = alloca i32, align 4 561 %i = alloca i32, align 4 562 %N = alloca i32, align 4 563 store i32 %a, i32* %a.addr, align 4 564 store i32 10, i32* %N, align 4 565 store i32 0, i32* %i, align 4 566 br label %for.cond 567 568for.cond: ; preds = %for.inc, %entry 569 %0 = load i32* %i, align 4 570 %cmp = icmp slt i32 %0, 100 571 br i1 %cmp, label %for.body, label %for.end 572 573for.body: ; preds = %for.cond 574 %1 = load i32* %i, align 4 575 %sub = sub nsw i32 100, %1 576 %sub1 = sub nsw i32 %sub, 1 577 %2 = load i32* %N, align 4 578 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2 579 %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1 580 %3 = load i32* %arrayidx2, align 4 581 %4 = load i32* %a.addr, align 4 582 %add = add nsw i32 %3, %4 583 %5 = load i32* %i, align 4 584 %sub3 = sub nsw i32 100, %5 585 %sub4 = sub nsw i32 %sub3, 1 586 %6 = load i32* %N, align 4 587 %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 588 %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4 589 store i32 %add, i32* %arrayidx6, align 4 590 br label %for.inc 591 592for.inc: ; preds = %for.body 593 %7 = load i32* %i, align 4 594 %inc = add nsw i32 %7, 1 595 store i32 %inc, i32* %i, align 4 596 br label %for.cond 597 598for.end: ; preds = %for.cond 599 %8 = load i32* %a.addr, align 4 600 %9 = load i32* %N, align 4 601 %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 602 %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8 603 %10 = load i32* %arrayidx8, align 4 604 ret i32 %10 605} 606 607; /// Same objects, negative induction, multi-array, different sub-elements 608; int noAlias12 (int a) { 609; int i, N=10; 610; for (i=0; i<SIZE; i++) 611; Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a; 612; return Bar.A[N][a]; 613; } 614; CHECK-LABEL: define i32 @noAlias12( 615; CHECK: sub nsw <4 x i32> 616; CHECK: ret 617 618define i32 @noAlias12(i32 %a) #0 { 619entry: 620 %a.addr = alloca i32, align 4 621 %i = alloca i32, align 4 622 %N = alloca i32, align 4 623 store i32 %a, i32* %a.addr, align 4 624 store i32 10, i32* %N, align 4 625 store i32 0, i32* %i, align 4 626 br label %for.cond 627 628for.cond: ; preds = %for.inc, %entry 629 %0 = load i32* %i, align 4 630 %cmp = icmp slt i32 %0, 100 631 br i1 %cmp, label %for.body, label %for.end 632 633for.body: ; preds = %for.cond 634 %1 = load i32* %i, align 4 635 %sub = sub nsw i32 100, %1 636 %sub1 = sub nsw i32 %sub, 1 637 %2 = load i32* %N, align 4 638 %add = add nsw i32 %2, 1 639 %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add 640 %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1 641 %3 = load i32* %arrayidx2, align 4 642 %4 = load i32* %a.addr, align 4 643 %add3 = add nsw i32 %3, %4 644 %5 = load i32* %i, align 4 645 %sub4 = sub nsw i32 100, %5 646 %sub5 = sub nsw i32 %sub4, 1 647 %6 = load i32* %N, align 4 648 %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6 649 %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5 650 store i32 %add3, i32* %arrayidx7, align 4 651 br label %for.inc 652 653for.inc: ; preds = %for.body 654 %7 = load i32* %i, align 4 655 %inc = add nsw i32 %7, 1 656 store i32 %inc, i32* %i, align 4 657 br label %for.cond 658 659for.end: ; preds = %for.cond 660 %8 = load i32* %a.addr, align 4 661 %9 = load i32* %N, align 4 662 %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9 663 %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8 664 %10 = load i32* %arrayidx9, align 4 665 ret i32 %10 666} 667 668; /// Same objects, positive induction, constant distance, just enough for vector size 669; int noAlias13 (int a) { 670; int i; 671; for (i=0; i<SIZE; i++) 672; Foo.A[i] = Foo.A[i+4] + a; 673; return Foo.A[a]; 674; } 675; CHECK-LABEL: define i32 @noAlias13( 676; CHECK: add nsw <4 x i32> 677; CHECK: ret 678 679define i32 @noAlias13(i32 %a) #0 { 680entry: 681 %a.addr = alloca i32, align 4 682 %i = alloca i32, align 4 683 store i32 %a, i32* %a.addr, align 4 684 store i32 0, i32* %i, align 4 685 br label %for.cond 686 687for.cond: ; preds = %for.inc, %entry 688 %0 = load i32* %i, align 4 689 %cmp = icmp slt i32 %0, 100 690 br i1 %cmp, label %for.body, label %for.end 691 692for.body: ; preds = %for.cond 693 %1 = load i32* %i, align 4 694 %add = add nsw i32 %1, 4 695 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add 696 %2 = load i32* %arrayidx, align 4 697 %3 = load i32* %a.addr, align 4 698 %add1 = add nsw i32 %2, %3 699 %4 = load i32* %i, align 4 700 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 701 store i32 %add1, i32* %arrayidx2, align 4 702 br label %for.inc 703 704for.inc: ; preds = %for.body 705 %5 = load i32* %i, align 4 706 %inc = add nsw i32 %5, 1 707 store i32 %inc, i32* %i, align 4 708 br label %for.cond 709 710for.end: ; preds = %for.cond 711 %6 = load i32* %a.addr, align 4 712 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 713 %7 = load i32* %arrayidx3, align 4 714 ret i32 %7 715} 716 717; /// Same objects, negative induction, constant distance, just enough for vector size 718; int noAlias14 (int a) { 719; int i; 720; for (i=0; i<SIZE; i++) 721; Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a; 722; return Foo.A[a]; 723; } 724; CHECK-LABEL: define i32 @noAlias14( 725; CHECK: sub nsw <4 x i32> 726; CHECK: ret 727 728define i32 @noAlias14(i32 %a) #0 { 729entry: 730 %a.addr = alloca i32, align 4 731 %i = alloca i32, align 4 732 store i32 %a, i32* %a.addr, align 4 733 store i32 0, i32* %i, align 4 734 br label %for.cond 735 736for.cond: ; preds = %for.inc, %entry 737 %0 = load i32* %i, align 4 738 %cmp = icmp slt i32 %0, 100 739 br i1 %cmp, label %for.body, label %for.end 740 741for.body: ; preds = %for.cond 742 %1 = load i32* %i, align 4 743 %sub = sub nsw i32 100, %1 744 %sub1 = sub nsw i32 %sub, 5 745 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1 746 %2 = load i32* %arrayidx, align 4 747 %3 = load i32* %a.addr, align 4 748 %add = add nsw i32 %2, %3 749 %4 = load i32* %i, align 4 750 %sub2 = sub nsw i32 100, %4 751 %sub3 = sub nsw i32 %sub2, 1 752 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3 753 store i32 %add, i32* %arrayidx4, align 4 754 br label %for.inc 755 756for.inc: ; preds = %for.body 757 %5 = load i32* %i, align 4 758 %inc = add nsw i32 %5, 1 759 store i32 %inc, i32* %i, align 4 760 br label %for.cond 761 762for.end: ; preds = %for.cond 763 %6 = load i32* %a.addr, align 4 764 %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 765 %7 = load i32* %arrayidx5, align 4 766 ret i32 %7 767} 768 769 770;; === Now, the tests that we could vectorize with induction changes or run-time checks === 771 772 773; /// Different objects, swapped induction, alias at the end 774; int mayAlias01 (int a) { 775; int i; 776; for (i=0; i<SIZE; i++) 777; Foo.A[i] = Foo.B[SIZE-i-1] + a; 778; return Foo.A[a]; 779; } 780; CHECK-LABEL: define i32 @mayAlias01( 781; CHECK-NOT: add nsw <4 x i32> 782; CHECK: ret 783 784define i32 @mayAlias01(i32 %a) nounwind { 785entry: 786 %a.addr = alloca i32, align 4 787 %i = alloca i32, align 4 788 store i32 %a, i32* %a.addr, align 4 789 store i32 0, i32* %i, align 4 790 br label %for.cond 791 792for.cond: ; preds = %for.inc, %entry 793 %0 = load i32* %i, align 4 794 %cmp = icmp slt i32 %0, 100 795 br i1 %cmp, label %for.body, label %for.end 796 797for.body: ; preds = %for.cond 798 %1 = load i32* %i, align 4 799 %sub = sub nsw i32 100, %1 800 %sub1 = sub nsw i32 %sub, 1 801 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 802 %2 = load i32* %arrayidx, align 4 803 %3 = load i32* %a.addr, align 4 804 %add = add nsw i32 %2, %3 805 %4 = load i32* %i, align 4 806 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 807 store i32 %add, i32* %arrayidx2, align 4 808 br label %for.inc 809 810for.inc: ; preds = %for.body 811 %5 = load i32* %i, align 4 812 %inc = add nsw i32 %5, 1 813 store i32 %inc, i32* %i, align 4 814 br label %for.cond 815 816for.end: ; preds = %for.cond 817 %6 = load i32* %a.addr, align 4 818 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 819 %7 = load i32* %arrayidx3, align 4 820 ret i32 %7 821} 822 823; /// Different objects, swapped induction, alias at the beginning 824; int mayAlias02 (int a) { 825; int i; 826; for (i=0; i<SIZE; i++) 827; Foo.A[SIZE-i-1] = Foo.B[i] + a; 828; return Foo.A[a]; 829; } 830; CHECK-LABEL: define i32 @mayAlias02( 831; CHECK-NOT: add nsw <4 x i32> 832; CHECK: ret 833 834define i32 @mayAlias02(i32 %a) nounwind { 835entry: 836 %a.addr = alloca i32, align 4 837 %i = alloca i32, align 4 838 store i32 %a, i32* %a.addr, align 4 839 store i32 0, i32* %i, align 4 840 br label %for.cond 841 842for.cond: ; preds = %for.inc, %entry 843 %0 = load i32* %i, align 4 844 %cmp = icmp slt i32 %0, 100 845 br i1 %cmp, label %for.body, label %for.end 846 847for.body: ; preds = %for.cond 848 %1 = load i32* %i, align 4 849 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 850 %2 = load i32* %arrayidx, align 4 851 %3 = load i32* %a.addr, align 4 852 %add = add nsw i32 %2, %3 853 %4 = load i32* %i, align 4 854 %sub = sub nsw i32 100, %4 855 %sub1 = sub nsw i32 %sub, 1 856 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1 857 store i32 %add, i32* %arrayidx2, align 4 858 br label %for.inc 859 860for.inc: ; preds = %for.body 861 %5 = load i32* %i, align 4 862 %inc = add nsw i32 %5, 1 863 store i32 %inc, i32* %i, align 4 864 br label %for.cond 865 866for.end: ; preds = %for.cond 867 %6 = load i32* %a.addr, align 4 868 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 869 %7 = load i32* %arrayidx3, align 4 870 ret i32 %7 871} 872 873; /// Pointer access, run-time check added 874; int mayAlias03 (int a) { 875; int i; 876; for (i=0; i<SIZE; i++) 877; *(PA+i) = *(PB+SIZE-i-1) + a; 878; return *(PA+a); 879; } 880; CHECK-LABEL: define i32 @mayAlias03( 881; CHECK-NOT: add nsw <4 x i32> 882; CHECK: ret 883 884define i32 @mayAlias03(i32 %a) nounwind { 885entry: 886 %a.addr = alloca i32, align 4 887 %i = alloca i32, align 4 888 store i32 %a, i32* %a.addr, align 4 889 store i32 0, i32* %i, align 4 890 br label %for.cond 891 892for.cond: ; preds = %for.inc, %entry 893 %0 = load i32* %i, align 4 894 %cmp = icmp slt i32 %0, 100 895 br i1 %cmp, label %for.body, label %for.end 896 897for.body: ; preds = %for.cond 898 %1 = load i32** @PB, align 4 899 %add.ptr = getelementptr inbounds i32* %1, i32 100 900 %2 = load i32* %i, align 4 901 %idx.neg = sub i32 0, %2 902 %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg 903 %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1 904 %3 = load i32* %add.ptr2, align 4 905 %4 = load i32* %a.addr, align 4 906 %add = add nsw i32 %3, %4 907 %5 = load i32** @PA, align 4 908 %6 = load i32* %i, align 4 909 %add.ptr3 = getelementptr inbounds i32* %5, i32 %6 910 store i32 %add, i32* %add.ptr3, align 4 911 br label %for.inc 912 913for.inc: ; preds = %for.body 914 %7 = load i32* %i, align 4 915 %inc = add nsw i32 %7, 1 916 store i32 %inc, i32* %i, align 4 917 br label %for.cond 918 919for.end: ; preds = %for.cond 920 %8 = load i32** @PA, align 4 921 %9 = load i32* %a.addr, align 4 922 %add.ptr4 = getelementptr inbounds i32* %8, i32 %9 923 %10 = load i32* %add.ptr4, align 4 924 ret i32 %10 925} 926 927 928;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) === 929 930 931; int mustAlias01 (int a) { 932; int i; 933; for (i=0; i<SIZE; i++) 934; Foo.A[i+10] = Foo.B[SIZE-i-1] + a; 935; return Foo.A[a]; 936; } 937; CHECK-LABEL: define i32 @mustAlias01( 938; CHECK-NOT: add nsw <4 x i32> 939; CHECK: ret 940 941define i32 @mustAlias01(i32 %a) nounwind { 942entry: 943 %a.addr = alloca i32, align 4 944 %i = alloca i32, align 4 945 store i32 %a, i32* %a.addr, align 4 946 store i32 0, i32* %i, align 4 947 br label %for.cond 948 949for.cond: ; preds = %for.inc, %entry 950 %0 = load i32* %i, align 4 951 %cmp = icmp slt i32 %0, 100 952 br i1 %cmp, label %for.body, label %for.end 953 954for.body: ; preds = %for.cond 955 %1 = load i32* %i, align 4 956 %sub = sub nsw i32 100, %1 957 %sub1 = sub nsw i32 %sub, 1 958 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 959 %2 = load i32* %arrayidx, align 4 960 %3 = load i32* %a.addr, align 4 961 %add = add nsw i32 %2, %3 962 %4 = load i32* %i, align 4 963 %add2 = add nsw i32 %4, 10 964 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2 965 store i32 %add, i32* %arrayidx3, align 4 966 br label %for.inc 967 968for.inc: ; preds = %for.body 969 %5 = load i32* %i, align 4 970 %inc = add nsw i32 %5, 1 971 store i32 %inc, i32* %i, align 4 972 br label %for.cond 973 974for.end: ; preds = %for.cond 975 %6 = load i32* %a.addr, align 4 976 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 977 %7 = load i32* %arrayidx4, align 4 978 ret i32 %7 979} 980 981; int mustAlias02 (int a) { 982; int i; 983; for (i=0; i<SIZE; i++) 984; Foo.A[i] = Foo.B[SIZE-i-10] + a; 985; return Foo.A[a]; 986; } 987; CHECK-LABEL: define i32 @mustAlias02( 988; CHECK-NOT: add nsw <4 x i32> 989; CHECK: ret 990 991define i32 @mustAlias02(i32 %a) nounwind { 992entry: 993 %a.addr = alloca i32, align 4 994 %i = alloca i32, align 4 995 store i32 %a, i32* %a.addr, align 4 996 store i32 0, i32* %i, align 4 997 br label %for.cond 998 999for.cond: ; preds = %for.inc, %entry 1000 %0 = load i32* %i, align 4 1001 %cmp = icmp slt i32 %0, 100 1002 br i1 %cmp, label %for.body, label %for.end 1003 1004for.body: ; preds = %for.cond 1005 %1 = load i32* %i, align 4 1006 %sub = sub nsw i32 100, %1 1007 %sub1 = sub nsw i32 %sub, 10 1008 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 1009 %2 = load i32* %arrayidx, align 4 1010 %3 = load i32* %a.addr, align 4 1011 %add = add nsw i32 %2, %3 1012 %4 = load i32* %i, align 4 1013 %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 1014 store i32 %add, i32* %arrayidx2, align 4 1015 br label %for.inc 1016 1017for.inc: ; preds = %for.body 1018 %5 = load i32* %i, align 4 1019 %inc = add nsw i32 %5, 1 1020 store i32 %inc, i32* %i, align 4 1021 br label %for.cond 1022 1023for.end: ; preds = %for.cond 1024 %6 = load i32* %a.addr, align 4 1025 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 1026 %7 = load i32* %arrayidx3, align 4 1027 ret i32 %7 1028} 1029 1030; int mustAlias03 (int a) { 1031; int i; 1032; for (i=0; i<SIZE; i++) 1033; Foo.A[i+10] = Foo.B[SIZE-i-10] + a; 1034; return Foo.A[a]; 1035; } 1036; CHECK-LABEL: define i32 @mustAlias03( 1037; CHECK-NOT: add nsw <4 x i32> 1038; CHECK: ret 1039 1040define i32 @mustAlias03(i32 %a) nounwind { 1041entry: 1042 %a.addr = alloca i32, align 4 1043 %i = alloca i32, align 4 1044 store i32 %a, i32* %a.addr, align 4 1045 store i32 0, i32* %i, align 4 1046 br label %for.cond 1047 1048for.cond: ; preds = %for.inc, %entry 1049 %0 = load i32* %i, align 4 1050 %cmp = icmp slt i32 %0, 100 1051 br i1 %cmp, label %for.body, label %for.end 1052 1053for.body: ; preds = %for.cond 1054 %1 = load i32* %i, align 4 1055 %sub = sub nsw i32 100, %1 1056 %sub1 = sub nsw i32 %sub, 10 1057 %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 1058 %2 = load i32* %arrayidx, align 4 1059 %3 = load i32* %a.addr, align 4 1060 %add = add nsw i32 %2, %3 1061 %4 = load i32* %i, align 4 1062 %add2 = add nsw i32 %4, 10 1063 %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2 1064 store i32 %add, i32* %arrayidx3, align 4 1065 br label %for.inc 1066 1067for.inc: ; preds = %for.body 1068 %5 = load i32* %i, align 4 1069 %inc = add nsw i32 %5, 1 1070 store i32 %inc, i32* %i, align 4 1071 br label %for.cond 1072 1073for.end: ; preds = %for.cond 1074 %6 = load i32* %a.addr, align 4 1075 %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 1076 %7 = load i32* %arrayidx4, align 4 1077 ret i32 %7 1078} 1079