1; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3 4; For @test11_pattern 5; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1] 6 7; For @test13_pattern 8; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i32*] [i32* @G, i32* @G] 9 10target triple = "x86_64-apple-darwin10.0.0" 11 12define void @test1(i8* %Base, i64 %Size) nounwind ssp { 13bb.nph: ; preds = %entry 14 br label %for.body 15 16for.body: ; preds = %bb.nph, %for.body 17 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 18 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 19 store i8 0, i8* %I.0.014, align 1 20 %indvar.next = add i64 %indvar, 1 21 %exitcond = icmp eq i64 %indvar.next, %Size 22 br i1 %exitcond, label %for.end, label %for.body 23 24for.end: ; preds = %for.body, %entry 25 ret void 26; CHECK-LABEL: @test1( 27; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 28; CHECK-NOT: store 29} 30 31; This is a loop that was rotated but where the blocks weren't merged. This 32; shouldn't perturb us. 33define void @test1a(i8* %Base, i64 %Size) nounwind ssp { 34bb.nph: ; preds = %entry 35 br label %for.body 36 37for.body: ; preds = %bb.nph, %for.body 38 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 39 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 40 store i8 0, i8* %I.0.014, align 1 41 %indvar.next = add i64 %indvar, 1 42 br label %for.body.cont 43for.body.cont: 44 %exitcond = icmp eq i64 %indvar.next, %Size 45 br i1 %exitcond, label %for.end, label %for.body 46 47for.end: ; preds = %for.body, %entry 48 ret void 49; CHECK-LABEL: @test1a( 50; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 51; CHECK-NOT: store 52} 53 54 55define void @test2(i32* %Base, i64 %Size) nounwind ssp { 56entry: 57 %cmp10 = icmp eq i64 %Size, 0 58 br i1 %cmp10, label %for.end, label %for.body 59 60for.body: ; preds = %entry, %for.body 61 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 62 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 63 store i32 16843009, i32* %add.ptr.i, align 4 64 %inc = add nsw i64 %i.011, 1 65 %exitcond = icmp eq i64 %inc, %Size 66 br i1 %exitcond, label %for.end, label %for.body 67 68for.end: ; preds = %for.body, %entry 69 ret void 70; CHECK-LABEL: @test2( 71; CHECK: br i1 %cmp10, 72; CHECK: %0 = shl i64 %Size, 2 73; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %Base1, i8 1, i64 %0, i1 false) 74; CHECK-NOT: store 75} 76 77; This is a case where there is an extra may-aliased store in the loop, we can't 78; promote the memset. 79define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp { 80entry: 81 br label %for.body 82 83for.body: ; preds = %entry, %for.body 84 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 85 %add.ptr.i = getelementptr i32, i32* %Base, i64 %i.011 86 store i32 16843009, i32* %add.ptr.i, align 4 87 88 store i8 42, i8* %MayAlias 89 %inc = add nsw i64 %i.011, 1 90 %exitcond = icmp eq i64 %inc, %Size 91 br i1 %exitcond, label %for.end, label %for.body 92 93for.end: ; preds = %entry 94 ret void 95; CHECK-LABEL: @test3( 96; CHECK-NOT: memset 97; CHECK: ret void 98} 99 100; Make sure the first store in the loop is turned into a memset. 101define void @test4(i8* %Base) nounwind ssp { 102bb.nph: ; preds = %entry 103 %Base100 = getelementptr i8, i8* %Base, i64 1000 104 br label %for.body 105 106for.body: ; preds = %bb.nph, %for.body 107 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 108 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 109 store i8 0, i8* %I.0.014, align 1 110 111 ;; Store beyond the range memset, should be safe to promote. 112 store i8 42, i8* %Base100 113 114 %indvar.next = add i64 %indvar, 1 115 %exitcond = icmp eq i64 %indvar.next, 100 116 br i1 %exitcond, label %for.end, label %for.body 117 118for.end: ; preds = %for.body, %entry 119 ret void 120; CHECK-LABEL: @test4( 121; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 100, i1 false) 122} 123 124; This can't be promoted: the memset is a store of a loop variant value. 125define void @test5(i8* %Base, i64 %Size) nounwind ssp { 126bb.nph: ; preds = %entry 127 br label %for.body 128 129for.body: ; preds = %bb.nph, %for.body 130 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 131 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 132 133 %V = trunc i64 %indvar to i8 134 store i8 %V, i8* %I.0.014, align 1 135 %indvar.next = add i64 %indvar, 1 136 %exitcond = icmp eq i64 %indvar.next, %Size 137 br i1 %exitcond, label %for.end, label %for.body 138 139for.end: ; preds = %for.body, %entry 140 ret void 141; CHECK-LABEL: @test5( 142; CHECK-NOT: memset 143; CHECK: ret void 144} 145 146 147;; memcpy formation 148define void @test6(i64 %Size) nounwind ssp { 149bb.nph: 150 %Base = alloca i8, i32 10000 151 %Dest = alloca i8, i32 10000 152 br label %for.body 153 154for.body: ; preds = %bb.nph, %for.body 155 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 156 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 157 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 158 %V = load i8, i8* %I.0.014, align 1 159 store i8 %V, i8* %DestI, align 1 160 %indvar.next = add i64 %indvar, 1 161 %exitcond = icmp eq i64 %indvar.next, %Size 162 br i1 %exitcond, label %for.end, label %for.body 163 164for.end: ; preds = %for.body, %entry 165 ret void 166; CHECK-LABEL: @test6( 167; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i1 false) 168; CHECK-NOT: store 169; CHECK: ret void 170} 171 172 173; This is a loop that was rotated but where the blocks weren't merged. This 174; shouldn't perturb us. 175define void @test7(i8* %Base, i64 %Size) nounwind ssp { 176bb.nph: ; preds = %entry 177 br label %for.body 178 179for.body: ; preds = %bb.nph, %for.body 180 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 181 br label %for.body.cont 182for.body.cont: 183 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 184 store i8 0, i8* %I.0.014, align 1 185 %indvar.next = add i64 %indvar, 1 186 %exitcond = icmp eq i64 %indvar.next, %Size 187 br i1 %exitcond, label %for.end, label %for.body 188 189for.end: ; preds = %for.body, %entry 190 ret void 191; CHECK-LABEL: @test7( 192; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false) 193; CHECK-NOT: store 194} 195 196; This is a loop should not be transformed, it only executes one iteration. 197define void @test8(i64* %Ptr, i64 %Size) nounwind ssp { 198bb.nph: ; preds = %entry 199 br label %for.body 200 201for.body: ; preds = %bb.nph, %for.body 202 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 203 %PI = getelementptr i64, i64* %Ptr, i64 %indvar 204 store i64 0, i64 *%PI 205 %indvar.next = add i64 %indvar, 1 206 %exitcond = icmp eq i64 %indvar.next, 1 207 br i1 %exitcond, label %for.end, label %for.body 208 209for.end: ; preds = %for.body, %entry 210 ret void 211; CHECK-LABEL: @test8( 212; CHECK: store i64 0, i64* %PI 213} 214 215declare i8* @external(i8*) 216 217;; This cannot be transformed into a memcpy, because the read-from location is 218;; mutated by the loop. 219define void @test9(i64 %Size) nounwind ssp { 220bb.nph: 221 %Base = alloca i8, i32 10000 222 %Dest = alloca i8, i32 10000 223 224 %BaseAlias = call i8* @external(i8* %Base) 225 br label %for.body 226 227for.body: ; preds = %bb.nph, %for.body 228 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 229 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 230 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 231 %V = load i8, i8* %I.0.014, align 1 232 store i8 %V, i8* %DestI, align 1 233 234 ;; This store can clobber the input. 235 store i8 4, i8* %BaseAlias 236 237 %indvar.next = add i64 %indvar, 1 238 %exitcond = icmp eq i64 %indvar.next, %Size 239 br i1 %exitcond, label %for.end, label %for.body 240 241for.end: ; preds = %for.body, %entry 242 ret void 243; CHECK-LABEL: @test9( 244; CHECK-NOT: llvm.memcpy 245; CHECK: ret void 246} 247 248; Two dimensional nested loop should be promoted to one big memset. 249define void @test10(i8* %X) nounwind ssp { 250entry: 251 br label %bb.nph 252 253bb.nph: ; preds = %entry, %for.inc10 254 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] 255 br label %for.body5 256 257for.body5: ; preds = %for.body5, %bb.nph 258 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ] 259 %mul = mul nsw i32 %i.04, 100 260 %add = add nsw i32 %j.02, %mul 261 %idxprom = sext i32 %add to i64 262 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 263 store i8 0, i8* %arrayidx, align 1 264 %inc = add nsw i32 %j.02, 1 265 %cmp4 = icmp eq i32 %inc, 100 266 br i1 %cmp4, label %for.inc10, label %for.body5 267 268for.inc10: ; preds = %for.body5 269 %inc12 = add nsw i32 %i.04, 1 270 %cmp = icmp eq i32 %inc12, 100 271 br i1 %cmp, label %for.end13, label %bb.nph 272 273for.end13: ; preds = %for.inc10 274 ret void 275; CHECK-LABEL: @test10( 276; CHECK: entry: 277; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false) 278; CHECK-NOT: store 279; CHECK: ret void 280} 281 282; On darwin10 (which is the triple in this .ll file) this loop can be turned 283; into a memset_pattern call. 284; rdar://9009151 285define void @test11_pattern(i32* nocapture %P) nounwind ssp { 286entry: 287 br label %for.body 288 289for.body: ; preds = %entry, %for.body 290 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 291 %arrayidx = getelementptr i32, i32* %P, i64 %indvar 292 store i32 1, i32* %arrayidx, align 4 293 %indvar.next = add i64 %indvar, 1 294 %exitcond = icmp eq i64 %indvar.next, 10000 295 br i1 %exitcond, label %for.end, label %for.body 296 297for.end: ; preds = %for.body 298 ret void 299; CHECK-LABEL: @test11_pattern( 300; CHECK-NEXT: entry: 301; CHECK-NEXT: bitcast 302; CHECK-NEXT: memset_pattern 303; CHECK-NOT: store 304; CHECK: ret void 305} 306 307; Store of null should turn into memset of zero. 308define void @test12(i32** nocapture %P) nounwind ssp { 309entry: 310 br label %for.body 311 312for.body: ; preds = %entry, %for.body 313 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 314 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 315 store i32* null, i32** %arrayidx, align 4 316 %indvar.next = add i64 %indvar, 1 317 %exitcond = icmp eq i64 %indvar.next, 10000 318 br i1 %exitcond, label %for.end, label %for.body 319 320for.end: ; preds = %for.body 321 ret void 322; CHECK-LABEL: @test12( 323; CHECK-NEXT: entry: 324; CHECK-NEXT: bitcast 325; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 %P1, i8 0, i64 80000, i1 false) 326; CHECK-NOT: store 327; CHECK: ret void 328} 329 330@G = global i32 5 331 332; This store-of-address loop can be turned into a memset_pattern call. 333; rdar://9009151 334define void @test13_pattern(i32** nocapture %P) nounwind ssp { 335entry: 336 br label %for.body 337 338for.body: ; preds = %entry, %for.body 339 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 340 %arrayidx = getelementptr i32*, i32** %P, i64 %indvar 341 store i32* @G, i32** %arrayidx, align 4 342 %indvar.next = add i64 %indvar, 1 343 %exitcond = icmp eq i64 %indvar.next, 10000 344 br i1 %exitcond, label %for.end, label %for.body 345 346for.end: ; preds = %for.body 347 ret void 348; CHECK-LABEL: @test13_pattern( 349; CHECK-NEXT: entry: 350; CHECK-NEXT: bitcast 351; CHECK-NEXT: memset_pattern 352; CHECK-NOT: store 353; CHECK: ret void 354} 355 356 357 358; PR9815 - This is a partial overlap case that cannot be safely transformed 359; into a memcpy. 360@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 361 362define i32 @test14() nounwind { 363entry: 364 br label %for.body 365 366for.body: ; preds = %for.inc, %for.body.lr.ph 367 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 368 %add = add nsw i32 %tmp5, 4 369 %idxprom = sext i32 %add to i64 370 %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom 371 %tmp2 = load i32, i32* %arrayidx, align 4 372 %add4 = add nsw i32 %tmp5, 5 373 %idxprom5 = sext i32 %add4 to i64 374 %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5 375 store i32 %tmp2, i32* %arrayidx6, align 4 376 %inc = add nsw i32 %tmp5, 1 377 %cmp = icmp slt i32 %inc, 2 378 br i1 %cmp, label %for.body, label %for.end 379 380for.end: ; preds = %for.inc 381 %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32], [7 x i32]* @g_50, i32 0, i64 6), align 4 382 ret i32 %tmp8 383; CHECK-LABEL: @test14( 384; CHECK: for.body: 385; CHECK: load i32 386; CHECK: store i32 387; CHECK: br i1 %cmp 388 389} 390 391define void @PR14241(i32* %s, i64 %size) { 392; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught 393; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy 394; instead of a memmove. If we get the memmove transform back, this will catch 395; regressions. 396; 397; CHECK-LABEL: @PR14241( 398 399entry: 400 %end.idx = add i64 %size, -1 401 %end.ptr = getelementptr inbounds i32, i32* %s, i64 %end.idx 402 br label %while.body 403; CHECK-NOT: memcpy 404; 405; FIXME: When we regain the ability to form a memmove here, this test should be 406; reversed and turned into a positive assertion. 407; CHECK-NOT: memmove 408 409while.body: 410 %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ] 411 %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 412 %val = load i32, i32* %src.ptr, align 4 413; CHECK: load 414 %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0 415 store i32 %val, i32* %dst.ptr, align 4 416; CHECK: store 417 %next.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1 418 %cmp = icmp eq i32* %next.ptr, %end.ptr 419 br i1 %cmp, label %exit, label %while.body 420 421exit: 422 ret void 423; CHECK: ret void 424} 425 426; Recognize loops with a negative stride. 427define void @test15(i32* nocapture %f) { 428entry: 429 br label %for.body 430 431for.body: 432 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 433 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 434 store i32 0, i32* %arrayidx, align 4 435 %indvars.iv.next = add nsw i64 %indvars.iv, -1 436 %cmp = icmp sgt i64 %indvars.iv, 0 437 br i1 %cmp, label %for.body, label %for.cond.cleanup 438 439for.cond.cleanup: 440 ret void 441; CHECK-LABEL: @test15( 442; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %f1, i8 0, i64 262148, i1 false) 443; CHECK-NOT: store 444; CHECK: ret void 445} 446 447; Loop with a negative stride. Verify an aliasing write to f[65536] prevents 448; the creation of a memset. 449define void @test16(i32* nocapture %f) { 450entry: 451 %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536 452 br label %for.body 453 454for.body: ; preds = %entry, %for.body 455 %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ] 456 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 457 store i32 0, i32* %arrayidx, align 4 458 store i32 1, i32* %arrayidx1, align 4 459 %indvars.iv.next = add nsw i64 %indvars.iv, -1 460 %cmp = icmp sgt i64 %indvars.iv, 0 461 br i1 %cmp, label %for.body, label %for.cond.cleanup 462 463for.cond.cleanup: ; preds = %for.body 464 ret void 465; CHECK-LABEL: @test16( 466; CHECK-NOT: call void @llvm.memset.p0i8.i64 467; CHECK: ret void 468} 469 470; Handle memcpy-able loops with negative stride. 471define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) { 472entry: 473 %conv = sext i32 %c to i64 474 %mul = shl nsw i64 %conv, 2 475 %call = tail call noalias i8* @malloc(i64 %mul) 476 %0 = bitcast i8* %call to i32* 477 %tobool.9 = icmp eq i32 %c, 0 478 br i1 %tobool.9, label %while.end, label %while.body.preheader 479 480while.body.preheader: ; preds = %entry 481 br label %while.body 482 483while.body: ; preds = %while.body.preheader, %while.body 484 %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ] 485 %dec10 = add nsw i32 %dec10.in, -1 486 %idxprom = sext i32 %dec10 to i64 487 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom 488 %1 = load i32, i32* %arrayidx, align 4 489 %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom 490 store i32 %1, i32* %arrayidx2, align 4 491 %tobool = icmp eq i32 %dec10, 0 492 br i1 %tobool, label %while.end.loopexit, label %while.body 493 494while.end.loopexit: ; preds = %while.body 495 br label %while.end 496 497while.end: ; preds = %while.end.loopexit, %entry 498 ret i32* %0 499; CHECK-LABEL: @test17( 500; CHECK: call void @llvm.memcpy 501; CHECK: ret i32* 502} 503 504declare noalias i8* @malloc(i64) 505 506; Handle memcpy-able loops with negative stride. 507; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) { 508; for (int i = 2047; i >= 0; --i) { 509; a[i] = b[i]; 510; } 511; } 512define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 { 513entry: 514 br label %for.body 515 516for.body: ; preds = %entry, %for.body 517 %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ] 518 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 519 %0 = load i32, i32* %arrayidx, align 4 520 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 521 store i32 %0, i32* %arrayidx2, align 4 522 %indvars.iv.next = add nsw i64 %indvars.iv, -1 523 %cmp = icmp sgt i64 %indvars.iv, 0 524 br i1 %cmp, label %for.body, label %for.cond.cleanup 525 526for.cond.cleanup: ; preds = %for.body 527 ret void 528; CHECK-LABEL: @test18( 529; CHECK: call void @llvm.memcpy 530; CHECK: ret 531} 532 533; Two dimensional nested loop with negative stride should be promoted to one big memset. 534define void @test19(i8* nocapture %X) { 535entry: 536 br label %for.cond1.preheader 537 538for.cond1.preheader: ; preds = %entry, %for.inc4 539 %i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ] 540 %mul = mul nsw i32 %i.06, 100 541 br label %for.body3 542 543for.body3: ; preds = %for.cond1.preheader, %for.body3 544 %j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ] 545 %add = add nsw i32 %j.05, %mul 546 %idxprom = sext i32 %add to i64 547 %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom 548 store i8 0, i8* %arrayidx, align 1 549 %dec = add nsw i32 %j.05, -1 550 %cmp2 = icmp sgt i32 %j.05, 0 551 br i1 %cmp2, label %for.body3, label %for.inc4 552 553for.inc4: ; preds = %for.body3 554 %dec5 = add nsw i32 %i.06, -1 555 %cmp = icmp sgt i32 %i.06, 0 556 br i1 %cmp, label %for.cond1.preheader, label %for.end6 557 558for.end6: ; preds = %for.inc4 559 ret void 560; CHECK-LABEL: @test19( 561; CHECK: entry: 562; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false) 563; CHECK: ret void 564} 565 566; Handle loops where the trip count is a narrow integer that needs to be 567; extended. 568define void @form_memset_narrow_size(i64* %ptr, i32 %size) { 569; CHECK-LABEL: @form_memset_narrow_size( 570entry: 571 %cmp1 = icmp sgt i32 %size, 0 572 br i1 %cmp1, label %loop.ph, label %exit 573; CHECK: entry: 574; CHECK: %[[C1:.*]] = icmp sgt i32 %size, 0 575; CHECK-NEXT: br i1 %[[C1]], label %loop.ph, label %exit 576 577loop.ph: 578 br label %loop.body 579; CHECK: loop.ph: 580; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64 581; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3 582; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %{{.*}}, i8 0, i64 %[[SCALED_SIZE]], i1 false) 583 584loop.body: 585 %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ] 586 %idxprom = sext i32 %storemerge4 to i64 587 %arrayidx = getelementptr inbounds i64, i64* %ptr, i64 %idxprom 588 store i64 0, i64* %arrayidx, align 8 589 %inc = add nsw i32 %storemerge4, 1 590 %cmp2 = icmp slt i32 %inc, %size 591 br i1 %cmp2, label %loop.body, label %loop.exit 592 593loop.exit: 594 br label %exit 595 596exit: 597 ret void 598} 599 600define void @form_memcpy_narrow_size(i64* noalias %dst, i64* noalias %src, i32 %size) { 601; CHECK-LABEL: @form_memcpy_narrow_size( 602entry: 603 %cmp1 = icmp sgt i32 %size, 0 604 br i1 %cmp1, label %loop.ph, label %exit 605; CHECK: entry: 606; CHECK: %[[C1:.*]] = icmp sgt i32 %size, 0 607; CHECK-NEXT: br i1 %[[C1]], label %loop.ph, label %exit 608 609loop.ph: 610 br label %loop.body 611; CHECK: loop.ph: 612; CHECK-NEXT: %[[ZEXT_SIZE:.*]] = zext i32 %size to i64 613; CHECK-NEXT: %[[SCALED_SIZE:.*]] = shl i64 %[[ZEXT_SIZE]], 3 614; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 %[[SCALED_SIZE]], i1 false) 615 616loop.body: 617 %storemerge4 = phi i32 [ 0, %loop.ph ], [ %inc, %loop.body ] 618 %idxprom1 = sext i32 %storemerge4 to i64 619 %arrayidx1 = getelementptr inbounds i64, i64* %src, i64 %idxprom1 620 %v = load i64, i64* %arrayidx1, align 8 621 %idxprom2 = sext i32 %storemerge4 to i64 622 %arrayidx2 = getelementptr inbounds i64, i64* %dst, i64 %idxprom2 623 store i64 %v, i64* %arrayidx2, align 8 624 %inc = add nsw i32 %storemerge4, 1 625 %cmp2 = icmp slt i32 %inc, %size 626 br i1 %cmp2, label %loop.body, label %loop.exit 627 628loop.exit: 629 br label %exit 630 631exit: 632 ret void 633} 634 635; Validate that "memset_pattern" has the proper attributes. 636; CHECK: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]] 637; CHECK: [[ATTRS]] = { argmemonly } 638