1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s 3 4; LSR should be able to eliminate the max computations by 5; making the loops use slt/ult comparisons instead of ne comparisons. 6 7target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" 8target triple = "i386-apple-darwin9" 9 10define void @foo(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind { 11; CHECK-LABEL: foo: 12; CHECK: ## %bb.0: ## %entry 13; CHECK-NEXT: pushl %ebp 14; CHECK-NEXT: pushl %ebx 15; CHECK-NEXT: pushl %edi 16; CHECK-NEXT: pushl %esi 17; CHECK-NEXT: subl $28, %esp 18; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 19; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 20; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 21; CHECK-NEXT: movl %edi, %ebx 22; CHECK-NEXT: imull %ebp, %ebx 23; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) 24; CHECK-NEXT: movl %ebx, (%esp) ## 4-byte Spill 25; CHECK-NEXT: je LBB0_19 26; CHECK-NEXT: ## %bb.1: ## %bb10.preheader 27; CHECK-NEXT: movl %ebx, %ecx 28; CHECK-NEXT: sarl $31, %ecx 29; CHECK-NEXT: shrl $30, %ecx 30; CHECK-NEXT: addl %ebx, %ecx 31; CHECK-NEXT: sarl $2, %ecx 32; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 33; CHECK-NEXT: testl %edi, %edi 34; CHECK-NEXT: jle LBB0_12 35; CHECK-NEXT: ## %bb.2: ## %bb.nph9 36; CHECK-NEXT: testl %ebp, %ebp 37; CHECK-NEXT: jle LBB0_12 38; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split 39; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 40; CHECK-NEXT: incl %eax 41; CHECK-NEXT: xorl %ecx, %ecx 42; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 43; CHECK-NEXT: xorl %esi, %esi 44; CHECK-NEXT: .p2align 4, 0x90 45; CHECK-NEXT: LBB0_4: ## %bb6 46; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 47; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx 48; CHECK-NEXT: movb %bl, (%edx,%esi) 49; CHECK-NEXT: incl %esi 50; CHECK-NEXT: cmpl %ebp, %esi 51; CHECK-NEXT: jl LBB0_4 52; CHECK-NEXT: ## %bb.5: ## %bb9 53; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 54; CHECK-NEXT: incl %ecx 55; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax 56; CHECK-NEXT: addl %ebp, %edx 57; CHECK-NEXT: cmpl %edi, %ecx 58; CHECK-NEXT: je LBB0_12 59; CHECK-NEXT: ## %bb.6: ## %bb7.preheader 60; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 61; CHECK-NEXT: xorl %esi, %esi 62; CHECK-NEXT: jmp LBB0_4 63; CHECK-NEXT: LBB0_12: ## %bb18.loopexit 64; CHECK-NEXT: movl (%esp), %eax ## 4-byte Reload 65; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 66; CHECK-NEXT: addl %ecx, %eax 67; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 68; CHECK-NEXT: cmpl $1, %edi 69; CHECK-NEXT: jle LBB0_13 70; CHECK-NEXT: ## %bb.7: ## %bb.nph5 71; CHECK-NEXT: cmpl $2, %ebp 72; CHECK-NEXT: jl LBB0_13 73; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split 74; CHECK-NEXT: movl %ebp, %edx 75; CHECK-NEXT: shrl $31, %edx 76; CHECK-NEXT: addl %ebp, %edx 77; CHECK-NEXT: sarl %edx 78; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 79; CHECK-NEXT: movl %eax, %ecx 80; CHECK-NEXT: shrl $31, %ecx 81; CHECK-NEXT: addl %eax, %ecx 82; CHECK-NEXT: sarl %ecx 83; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 84; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 85; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 86; CHECK-NEXT: addl %ecx, %eax 87; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 88; CHECK-NEXT: addl $2, %esi 89; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 90; CHECK-NEXT: movl (%esp), %esi ## 4-byte Reload 91; CHECK-NEXT: addl %esi, %ecx 92; CHECK-NEXT: xorl %esi, %esi 93; CHECK-NEXT: xorl %edi, %edi 94; CHECK-NEXT: .p2align 4, 0x90 95; CHECK-NEXT: LBB0_9: ## %bb13 96; CHECK-NEXT: ## =>This Loop Header: Depth=1 97; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 98; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 99; CHECK-NEXT: andl $1, %edi 100; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 101; CHECK-NEXT: addl %esi, %edi 102; CHECK-NEXT: imull {{[0-9]+}}(%esp), %edi 103; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload 104; CHECK-NEXT: xorl %esi, %esi 105; CHECK-NEXT: .p2align 4, 0x90 106; CHECK-NEXT: LBB0_10: ## %bb14 107; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 108; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 109; CHECK-NEXT: movzbl -2(%edi,%esi,4), %ebx 110; CHECK-NEXT: movb %bl, (%ecx,%esi) 111; CHECK-NEXT: movzbl (%edi,%esi,4), %ebx 112; CHECK-NEXT: movb %bl, (%eax,%esi) 113; CHECK-NEXT: incl %esi 114; CHECK-NEXT: cmpl %edx, %esi 115; CHECK-NEXT: jl LBB0_10 116; CHECK-NEXT: ## %bb.11: ## %bb17 117; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1 118; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload 119; CHECK-NEXT: incl %edi 120; CHECK-NEXT: addl %edx, %eax 121; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload 122; CHECK-NEXT: addl $2, %esi 123; CHECK-NEXT: addl %edx, %ecx 124; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload 125; CHECK-NEXT: jl LBB0_9 126; CHECK-NEXT: LBB0_13: ## %bb20 127; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 128; CHECK-NEXT: cmpl $1, %eax 129; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 130; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 131; CHECK-NEXT: je LBB0_19 132; CHECK-NEXT: ## %bb.14: ## %bb20 133; CHECK-NEXT: cmpl $3, %eax 134; CHECK-NEXT: jne LBB0_24 135; CHECK-NEXT: ## %bb.15: ## %bb22 136; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload 137; CHECK-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill 138; CHECK-NEXT: testl %edi, %edi 139; CHECK-NEXT: jle LBB0_18 140; CHECK-NEXT: ## %bb.16: ## %bb.nph 141; CHECK-NEXT: leal 15(%edi), %eax 142; CHECK-NEXT: andl $-16, %eax 143; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax 144; CHECK-NEXT: addl %eax, %ecx 145; CHECK-NEXT: leal 15(%ebp), %eax 146; CHECK-NEXT: andl $-16, %eax 147; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 148; CHECK-NEXT: addl %esi, %esi 149; CHECK-NEXT: addl (%esp), %esi ## 4-byte Folded Reload 150; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi 151; CHECK-NEXT: .p2align 4, 0x90 152; CHECK-NEXT: LBB0_17: ## %bb23 153; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 154; CHECK-NEXT: subl $4, %esp 155; CHECK-NEXT: pushl %ebp 156; CHECK-NEXT: pushl %ecx 157; CHECK-NEXT: pushl %esi 158; CHECK-NEXT: movl %ecx, %ebx 159; CHECK-NEXT: calll _memcpy 160; CHECK-NEXT: movl %ebx, %ecx 161; CHECK-NEXT: addl $16, %esp 162; CHECK-NEXT: addl %ebp, %esi 163; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload 164; CHECK-NEXT: decl %edi 165; CHECK-NEXT: jne LBB0_17 166; CHECK-NEXT: LBB0_18: ## %bb26 167; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 168; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload 169; CHECK-NEXT: addl %edx, %eax 170; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 171; CHECK-NEXT: addl %eax, %ecx 172; CHECK-NEXT: jmp LBB0_23 173; CHECK-NEXT: LBB0_19: ## %bb29 174; CHECK-NEXT: testl %edi, %edi 175; CHECK-NEXT: jle LBB0_22 176; CHECK-NEXT: ## %bb.20: ## %bb.nph11 177; CHECK-NEXT: movl %edi, %esi 178; CHECK-NEXT: leal 15(%ebp), %eax 179; CHECK-NEXT: andl $-16, %eax 180; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 181; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 182; CHECK-NEXT: .p2align 4, 0x90 183; CHECK-NEXT: LBB0_21: ## %bb30 184; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 185; CHECK-NEXT: subl $4, %esp 186; CHECK-NEXT: pushl %ebp 187; CHECK-NEXT: pushl %ecx 188; CHECK-NEXT: pushl %edi 189; CHECK-NEXT: movl %ecx, %ebx 190; CHECK-NEXT: calll _memcpy 191; CHECK-NEXT: movl %ebx, %ecx 192; CHECK-NEXT: addl $16, %esp 193; CHECK-NEXT: addl %ebp, %edi 194; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload 195; CHECK-NEXT: decl %esi 196; CHECK-NEXT: jne LBB0_21 197; CHECK-NEXT: LBB0_22: ## %bb33 198; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 199; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload 200; CHECK-NEXT: addl %edx, %ecx 201; CHECK-NEXT: LBB0_23: ## %bb33 202; CHECK-NEXT: movl %edx, %eax 203; CHECK-NEXT: shrl $31, %eax 204; CHECK-NEXT: addl %edx, %eax 205; CHECK-NEXT: sarl %eax 206; CHECK-NEXT: subl $4, %esp 207; CHECK-NEXT: pushl %eax 208; CHECK-NEXT: pushl $128 209; CHECK-NEXT: pushl %ecx 210; CHECK-NEXT: calll _memset 211; CHECK-NEXT: addl $44, %esp 212; CHECK-NEXT: LBB0_25: ## %return 213; CHECK-NEXT: popl %esi 214; CHECK-NEXT: popl %edi 215; CHECK-NEXT: popl %ebx 216; CHECK-NEXT: popl %ebp 217; CHECK-NEXT: retl 218; CHECK-NEXT: LBB0_24: ## %return 219; CHECK-NEXT: addl $28, %esp 220; CHECK-NEXT: jmp LBB0_25 221entry: 222 %0 = mul i32 %x, %w 223 %1 = mul i32 %x, %w 224 %2 = sdiv i32 %1, 4 225 %.sum2 = add i32 %2, %0 226 %cond = icmp eq i32 %d, 1 227 br i1 %cond, label %bb29, label %bb10.preheader 228 229bb10.preheader: ; preds = %entry 230 %3 = icmp sgt i32 %x, 0 231 br i1 %3, label %bb.nph9, label %bb18.loopexit 232 233bb.nph7: ; preds = %bb7.preheader 234 %4 = mul i32 %y.08, %w 235 %5 = mul i32 %y.08, %s 236 %6 = add i32 %5, 1 237 %tmp8 = icmp sgt i32 1, %w 238 %smax9 = select i1 %tmp8, i32 1, i32 %w 239 br label %bb6 240 241bb6: ; preds = %bb7, %bb.nph7 242 %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ] 243 %7 = add i32 %x.06, %4 244 %8 = shl i32 %x.06, 1 245 %9 = add i32 %6, %8 246 %10 = getelementptr i8, ptr %r, i32 %9 247 %11 = load i8, ptr %10, align 1 248 %12 = getelementptr i8, ptr %j, i32 %7 249 store i8 %11, ptr %12, align 1 250 br label %bb7 251 252bb7: ; preds = %bb6 253 %indvar.next7 = add i32 %x.06, 1 254 %exitcond10 = icmp ne i32 %indvar.next7, %smax9 255 br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge 256 257bb7.bb9_crit_edge: ; preds = %bb7 258 br label %bb9 259 260bb9: ; preds = %bb7.preheader, %bb7.bb9_crit_edge 261 br label %bb10 262 263bb10: ; preds = %bb9 264 %indvar.next11 = add i32 %y.08, 1 265 %exitcond12 = icmp ne i32 %indvar.next11, %x 266 br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge 267 268bb10.bb18.loopexit_crit_edge: ; preds = %bb10 269 br label %bb10.bb18.loopexit_crit_edge.split 270 271bb10.bb18.loopexit_crit_edge.split: ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge 272 br label %bb18.loopexit 273 274bb.nph9: ; preds = %bb10.preheader 275 %13 = icmp sgt i32 %w, 0 276 br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split 277 278bb.nph9.split: ; preds = %bb.nph9 279 br label %bb7.preheader 280 281bb7.preheader: ; preds = %bb.nph9.split, %bb10 282 %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ] 283 br i1 true, label %bb.nph7, label %bb9 284 285bb.nph5: ; preds = %bb18.loopexit 286 %14 = sdiv i32 %w, 2 287 %15 = icmp slt i32 %w, 2 288 %16 = sdiv i32 %x, 2 289 br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split 290 291bb.nph5.split: ; preds = %bb.nph5 292 %tmp2 = icmp sgt i32 1, %16 293 %smax3 = select i1 %tmp2, i32 1, i32 %16 294 br label %bb13 295 296bb13: ; preds = %bb18, %bb.nph5.split 297 %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ] 298 %17 = mul i32 %14, %y.14 299 %18 = shl i32 %y.14, 1 300 %19 = srem i32 %y.14, 2 301 %20 = add i32 %19, %18 302 %21 = mul i32 %20, %s 303 br i1 true, label %bb.nph3, label %bb17 304 305bb.nph3: ; preds = %bb13 306 %22 = add i32 %17, %0 307 %23 = add i32 %17, %.sum2 308 %24 = sdiv i32 %w, 2 309 %tmp = icmp sgt i32 1, %24 310 %smax = select i1 %tmp, i32 1, i32 %24 311 br label %bb14 312 313bb14: ; preds = %bb15, %bb.nph3 314 %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ] 315 %25 = shl i32 %x.12, 2 316 %26 = add i32 %25, %21 317 %27 = getelementptr i8, ptr %r, i32 %26 318 %28 = load i8, ptr %27, align 1 319 %.sum = add i32 %22, %x.12 320 %29 = getelementptr i8, ptr %j, i32 %.sum 321 store i8 %28, ptr %29, align 1 322 %30 = shl i32 %x.12, 2 323 %31 = or i32 %30, 2 324 %32 = add i32 %31, %21 325 %33 = getelementptr i8, ptr %r, i32 %32 326 %34 = load i8, ptr %33, align 1 327 %.sum6 = add i32 %23, %x.12 328 %35 = getelementptr i8, ptr %j, i32 %.sum6 329 store i8 %34, ptr %35, align 1 330 br label %bb15 331 332bb15: ; preds = %bb14 333 %indvar.next = add i32 %x.12, 1 334 %exitcond = icmp ne i32 %indvar.next, %smax 335 br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge 336 337bb15.bb17_crit_edge: ; preds = %bb15 338 br label %bb17 339 340bb17: ; preds = %bb15.bb17_crit_edge, %bb13 341 br label %bb18 342 343bb18.loopexit: ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader 344 %36 = icmp slt i32 %x, 2 345 br i1 %36, label %bb20, label %bb.nph5 346 347bb18: ; preds = %bb17 348 %indvar.next1 = add i32 %y.14, 1 349 %exitcond4 = icmp ne i32 %indvar.next1, %smax3 350 br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge 351 352bb18.bb20_crit_edge: ; preds = %bb18 353 br label %bb18.bb20_crit_edge.split 354 355bb18.bb20_crit_edge.split: ; preds = %bb18.bb20_crit_edge, %bb.nph5 356 br label %bb20 357 358bb20: ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit 359 switch i32 %d, label %return [ 360 i32 3, label %bb22 361 i32 1, label %bb29 362 ] 363 364bb22: ; preds = %bb20 365 %37 = mul i32 %x, %w 366 %38 = sdiv i32 %37, 4 367 %.sum3 = add i32 %38, %.sum2 368 %39 = add i32 %x, 15 369 %40 = and i32 %39, -16 370 %41 = add i32 %w, 15 371 %42 = and i32 %41, -16 372 %43 = mul i32 %40, %s 373 %44 = icmp sgt i32 %x, 0 374 br i1 %44, label %bb.nph, label %bb26 375 376bb.nph: ; preds = %bb22 377 br label %bb23 378 379bb23: ; preds = %bb24, %bb.nph 380 %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ] 381 %45 = mul i32 %y.21, %42 382 %.sum1 = add i32 %45, %43 383 %46 = getelementptr i8, ptr %r, i32 %.sum1 384 %47 = mul i32 %y.21, %w 385 %.sum5 = add i32 %47, %.sum3 386 %48 = getelementptr i8, ptr %j, i32 %.sum5 387 tail call void @llvm.memcpy.p0.p0.i32(ptr %48, ptr %46, i32 %w, i1 false) 388 br label %bb24 389 390bb24: ; preds = %bb23 391 %indvar.next5 = add i32 %y.21, 1 392 %exitcond6 = icmp ne i32 %indvar.next5, %x 393 br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge 394 395bb24.bb26_crit_edge: ; preds = %bb24 396 br label %bb26 397 398bb26: ; preds = %bb24.bb26_crit_edge, %bb22 399 %49 = mul i32 %x, %w 400 %.sum4 = add i32 %.sum3, %49 401 %50 = getelementptr i8, ptr %j, i32 %.sum4 402 %51 = mul i32 %x, %w 403 %52 = sdiv i32 %51, 2 404 tail call void @llvm.memset.p0.i32(ptr %50, i8 -128, i32 %52, i1 false) 405 ret void 406 407bb29: ; preds = %bb20, %entry 408 %53 = add i32 %w, 15 409 %54 = and i32 %53, -16 410 %55 = icmp sgt i32 %x, 0 411 br i1 %55, label %bb.nph11, label %bb33 412 413bb.nph11: ; preds = %bb29 414 br label %bb30 415 416bb30: ; preds = %bb31, %bb.nph11 417 %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ] 418 %56 = mul i32 %y.310, %54 419 %57 = getelementptr i8, ptr %r, i32 %56 420 %58 = mul i32 %y.310, %w 421 %59 = getelementptr i8, ptr %j, i32 %58 422 tail call void @llvm.memcpy.p0.p0.i32(ptr %59, ptr %57, i32 %w, i1 false) 423 br label %bb31 424 425bb31: ; preds = %bb30 426 %indvar.next13 = add i32 %y.310, 1 427 %exitcond14 = icmp ne i32 %indvar.next13, %x 428 br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge 429 430bb31.bb33_crit_edge: ; preds = %bb31 431 br label %bb33 432 433bb33: ; preds = %bb31.bb33_crit_edge, %bb29 434 %60 = mul i32 %x, %w 435 %61 = getelementptr i8, ptr %j, i32 %60 436 %62 = mul i32 %x, %w 437 %63 = sdiv i32 %62, 2 438 tail call void @llvm.memset.p0.i32(ptr %61, i8 -128, i32 %63, i1 false) 439 ret void 440 441return: ; preds = %bb20 442 ret void 443} 444 445define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind { 446; CHECK-LABEL: bar: 447; CHECK: ## %bb.0: ## %entry 448; CHECK-NEXT: pushl %ebp 449; CHECK-NEXT: pushl %ebx 450; CHECK-NEXT: pushl %edi 451; CHECK-NEXT: pushl %esi 452; CHECK-NEXT: subl $28, %esp 453; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 454; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 455; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 456; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 457; CHECK-NEXT: movl %ebp, %edx 458; CHECK-NEXT: imull %eax, %edx 459; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) 460; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 461; CHECK-NEXT: je LBB1_19 462; CHECK-NEXT: ## %bb.1: ## %bb10.preheader 463; CHECK-NEXT: movl %edx, %ecx 464; CHECK-NEXT: shrl $2, %ecx 465; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 466; CHECK-NEXT: testl %ebp, %ebp 467; CHECK-NEXT: movl %eax, %edi 468; CHECK-NEXT: je LBB1_12 469; CHECK-NEXT: ## %bb.2: ## %bb.nph9 470; CHECK-NEXT: testl %eax, %eax 471; CHECK-NEXT: je LBB1_12 472; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split 473; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 474; CHECK-NEXT: incl %eax 475; CHECK-NEXT: xorl %ecx, %ecx 476; CHECK-NEXT: movl %esi, %edx 477; CHECK-NEXT: xorl %esi, %esi 478; CHECK-NEXT: .p2align 4, 0x90 479; CHECK-NEXT: LBB1_4: ## %bb6 480; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 481; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx 482; CHECK-NEXT: movb %bl, (%edx,%esi) 483; CHECK-NEXT: incl %esi 484; CHECK-NEXT: cmpl %edi, %esi 485; CHECK-NEXT: jb LBB1_4 486; CHECK-NEXT: ## %bb.5: ## %bb9 487; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 488; CHECK-NEXT: movl %edi, %ebx 489; CHECK-NEXT: incl %ecx 490; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax 491; CHECK-NEXT: addl %edi, %edx 492; CHECK-NEXT: cmpl %ebp, %ecx 493; CHECK-NEXT: je LBB1_12 494; CHECK-NEXT: ## %bb.6: ## %bb7.preheader 495; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 496; CHECK-NEXT: xorl %esi, %esi 497; CHECK-NEXT: jmp LBB1_4 498; CHECK-NEXT: LBB1_12: ## %bb18.loopexit 499; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 500; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 501; CHECK-NEXT: addl %ecx, %eax 502; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 503; CHECK-NEXT: cmpl $1, %ebp 504; CHECK-NEXT: jbe LBB1_13 505; CHECK-NEXT: ## %bb.7: ## %bb.nph5 506; CHECK-NEXT: cmpl $2, %edi 507; CHECK-NEXT: jb LBB1_13 508; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split 509; CHECK-NEXT: movl %edi, %ebp 510; CHECK-NEXT: shrl %ebp 511; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 512; CHECK-NEXT: shrl %eax 513; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 514; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 515; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 516; CHECK-NEXT: addl %eax, %ecx 517; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 518; CHECK-NEXT: addl $2, %edx 519; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 520; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload 521; CHECK-NEXT: addl %edx, %eax 522; CHECK-NEXT: xorl %edx, %edx 523; CHECK-NEXT: xorl %ebx, %ebx 524; CHECK-NEXT: .p2align 4, 0x90 525; CHECK-NEXT: LBB1_9: ## %bb13 526; CHECK-NEXT: ## =>This Loop Header: Depth=1 527; CHECK-NEXT: ## Child Loop BB1_10 Depth 2 528; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill 529; CHECK-NEXT: andl $1, %ebx 530; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill 531; CHECK-NEXT: addl %edx, %ebx 532; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx 533; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload 534; CHECK-NEXT: xorl %esi, %esi 535; CHECK-NEXT: .p2align 4, 0x90 536; CHECK-NEXT: LBB1_10: ## %bb14 537; CHECK-NEXT: ## Parent Loop BB1_9 Depth=1 538; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 539; CHECK-NEXT: movzbl -2(%ebx,%esi,4), %edx 540; CHECK-NEXT: movb %dl, (%eax,%esi) 541; CHECK-NEXT: movzbl (%ebx,%esi,4), %edx 542; CHECK-NEXT: movb %dl, (%ecx,%esi) 543; CHECK-NEXT: incl %esi 544; CHECK-NEXT: cmpl %ebp, %esi 545; CHECK-NEXT: jb LBB1_10 546; CHECK-NEXT: ## %bb.11: ## %bb17 547; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1 548; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload 549; CHECK-NEXT: incl %ebx 550; CHECK-NEXT: addl %ebp, %ecx 551; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload 552; CHECK-NEXT: addl $2, %edx 553; CHECK-NEXT: addl %ebp, %eax 554; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload 555; CHECK-NEXT: jb LBB1_9 556; CHECK-NEXT: LBB1_13: ## %bb20 557; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 558; CHECK-NEXT: cmpl $1, %esi 559; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 560; CHECK-NEXT: movl %edi, %eax 561; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 562; CHECK-NEXT: je LBB1_19 563; CHECK-NEXT: ## %bb.14: ## %bb20 564; CHECK-NEXT: cmpl $3, %esi 565; CHECK-NEXT: jne LBB1_24 566; CHECK-NEXT: ## %bb.15: ## %bb22 567; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload 568; CHECK-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill 569; CHECK-NEXT: testl %ebp, %ebp 570; CHECK-NEXT: je LBB1_18 571; CHECK-NEXT: ## %bb.16: ## %bb.nph 572; CHECK-NEXT: movl %ebp, %esi 573; CHECK-NEXT: leal 15(%ebp), %eax 574; CHECK-NEXT: andl $-16, %eax 575; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax 576; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 577; CHECK-NEXT: addl $15, %edx 578; CHECK-NEXT: andl $-16, %edx 579; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill 580; CHECK-NEXT: addl %eax, %ecx 581; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 582; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 583; CHECK-NEXT: leal (%edx,%eax), %ebp 584; CHECK-NEXT: .p2align 4, 0x90 585; CHECK-NEXT: LBB1_17: ## %bb23 586; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 587; CHECK-NEXT: subl $4, %esp 588; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx 589; CHECK-NEXT: pushl %ebx 590; CHECK-NEXT: pushl %ecx 591; CHECK-NEXT: pushl %ebp 592; CHECK-NEXT: movl %ecx, %edi 593; CHECK-NEXT: calll _memcpy 594; CHECK-NEXT: movl %edi, %ecx 595; CHECK-NEXT: addl $16, %esp 596; CHECK-NEXT: addl %ebx, %ebp 597; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload 598; CHECK-NEXT: decl %esi 599; CHECK-NEXT: jne LBB1_17 600; CHECK-NEXT: LBB1_18: ## %bb26 601; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 602; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload 603; CHECK-NEXT: addl %ecx, %eax 604; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 605; CHECK-NEXT: addl %eax, %edx 606; CHECK-NEXT: shrl %ecx 607; CHECK-NEXT: subl $4, %esp 608; CHECK-NEXT: pushl %ecx 609; CHECK-NEXT: pushl $128 610; CHECK-NEXT: pushl %edx 611; CHECK-NEXT: jmp LBB1_23 612; CHECK-NEXT: LBB1_19: ## %bb29 613; CHECK-NEXT: testl %ebp, %ebp 614; CHECK-NEXT: je LBB1_22 615; CHECK-NEXT: ## %bb.20: ## %bb.nph11 616; CHECK-NEXT: movl %ebp, %esi 617; CHECK-NEXT: movl %eax, %edi 618; CHECK-NEXT: addl $15, %eax 619; CHECK-NEXT: andl $-16, %eax 620; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill 621; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp 622; CHECK-NEXT: .p2align 4, 0x90 623; CHECK-NEXT: LBB1_21: ## %bb30 624; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 625; CHECK-NEXT: subl $4, %esp 626; CHECK-NEXT: pushl %edi 627; CHECK-NEXT: pushl %ecx 628; CHECK-NEXT: pushl %ebp 629; CHECK-NEXT: movl %ecx, %ebx 630; CHECK-NEXT: calll _memcpy 631; CHECK-NEXT: movl %ebx, %ecx 632; CHECK-NEXT: addl $16, %esp 633; CHECK-NEXT: addl %edi, %ebp 634; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload 635; CHECK-NEXT: decl %esi 636; CHECK-NEXT: jne LBB1_21 637; CHECK-NEXT: LBB1_22: ## %bb33 638; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload 639; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 640; CHECK-NEXT: addl %eax, %ecx 641; CHECK-NEXT: shrl %eax 642; CHECK-NEXT: subl $4, %esp 643; CHECK-NEXT: pushl %eax 644; CHECK-NEXT: pushl $128 645; CHECK-NEXT: pushl %ecx 646; CHECK-NEXT: LBB1_23: ## %bb33 647; CHECK-NEXT: calll _memset 648; CHECK-NEXT: addl $44, %esp 649; CHECK-NEXT: LBB1_25: ## %return 650; CHECK-NEXT: popl %esi 651; CHECK-NEXT: popl %edi 652; CHECK-NEXT: popl %ebx 653; CHECK-NEXT: popl %ebp 654; CHECK-NEXT: retl 655; CHECK-NEXT: LBB1_24: ## %return 656; CHECK-NEXT: addl $28, %esp 657; CHECK-NEXT: jmp LBB1_25 658entry: 659 %0 = mul i32 %x, %w 660 %1 = mul i32 %x, %w 661 %2 = udiv i32 %1, 4 662 %.sum2 = add i32 %2, %0 663 %cond = icmp eq i32 %d, 1 664 br i1 %cond, label %bb29, label %bb10.preheader 665 666bb10.preheader: ; preds = %entry 667 %3 = icmp ne i32 %x, 0 668 br i1 %3, label %bb.nph9, label %bb18.loopexit 669 670bb.nph7: ; preds = %bb7.preheader 671 %4 = mul i32 %y.08, %w 672 %5 = mul i32 %y.08, %s 673 %6 = add i32 %5, 1 674 %tmp8 = icmp ugt i32 1, %w 675 %smax9 = select i1 %tmp8, i32 1, i32 %w 676 br label %bb6 677 678bb6: ; preds = %bb7, %bb.nph7 679 %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ] 680 %7 = add i32 %x.06, %4 681 %8 = shl i32 %x.06, 1 682 %9 = add i32 %6, %8 683 %10 = getelementptr i8, ptr %r, i32 %9 684 %11 = load i8, ptr %10, align 1 685 %12 = getelementptr i8, ptr %j, i32 %7 686 store i8 %11, ptr %12, align 1 687 br label %bb7 688 689bb7: ; preds = %bb6 690 %indvar.next7 = add i32 %x.06, 1 691 %exitcond10 = icmp ne i32 %indvar.next7, %smax9 692 br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge 693 694bb7.bb9_crit_edge: ; preds = %bb7 695 br label %bb9 696 697bb9: ; preds = %bb7.preheader, %bb7.bb9_crit_edge 698 br label %bb10 699 700bb10: ; preds = %bb9 701 %indvar.next11 = add i32 %y.08, 1 702 %exitcond12 = icmp ne i32 %indvar.next11, %x 703 br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge 704 705bb10.bb18.loopexit_crit_edge: ; preds = %bb10 706 br label %bb10.bb18.loopexit_crit_edge.split 707 708bb10.bb18.loopexit_crit_edge.split: ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge 709 br label %bb18.loopexit 710 711bb.nph9: ; preds = %bb10.preheader 712 %13 = icmp ugt i32 %w, 0 713 br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split 714 715bb.nph9.split: ; preds = %bb.nph9 716 br label %bb7.preheader 717 718bb7.preheader: ; preds = %bb.nph9.split, %bb10 719 %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ] 720 br i1 true, label %bb.nph7, label %bb9 721 722bb.nph5: ; preds = %bb18.loopexit 723 %14 = udiv i32 %w, 2 724 %15 = icmp ult i32 %w, 2 725 %16 = udiv i32 %x, 2 726 br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split 727 728bb.nph5.split: ; preds = %bb.nph5 729 %tmp2 = icmp ugt i32 1, %16 730 %smax3 = select i1 %tmp2, i32 1, i32 %16 731 br label %bb13 732 733bb13: ; preds = %bb18, %bb.nph5.split 734 %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ] 735 %17 = mul i32 %14, %y.14 736 %18 = shl i32 %y.14, 1 737 %19 = urem i32 %y.14, 2 738 %20 = add i32 %19, %18 739 %21 = mul i32 %20, %s 740 br i1 true, label %bb.nph3, label %bb17 741 742bb.nph3: ; preds = %bb13 743 %22 = add i32 %17, %0 744 %23 = add i32 %17, %.sum2 745 %24 = udiv i32 %w, 2 746 %tmp = icmp ugt i32 1, %24 747 %smax = select i1 %tmp, i32 1, i32 %24 748 br label %bb14 749 750bb14: ; preds = %bb15, %bb.nph3 751 %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ] 752 %25 = shl i32 %x.12, 2 753 %26 = add i32 %25, %21 754 %27 = getelementptr i8, ptr %r, i32 %26 755 %28 = load i8, ptr %27, align 1 756 %.sum = add i32 %22, %x.12 757 %29 = getelementptr i8, ptr %j, i32 %.sum 758 store i8 %28, ptr %29, align 1 759 %30 = shl i32 %x.12, 2 760 %31 = or i32 %30, 2 761 %32 = add i32 %31, %21 762 %33 = getelementptr i8, ptr %r, i32 %32 763 %34 = load i8, ptr %33, align 1 764 %.sum6 = add i32 %23, %x.12 765 %35 = getelementptr i8, ptr %j, i32 %.sum6 766 store i8 %34, ptr %35, align 1 767 br label %bb15 768 769bb15: ; preds = %bb14 770 %indvar.next = add i32 %x.12, 1 771 %exitcond = icmp ne i32 %indvar.next, %smax 772 br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge 773 774bb15.bb17_crit_edge: ; preds = %bb15 775 br label %bb17 776 777bb17: ; preds = %bb15.bb17_crit_edge, %bb13 778 br label %bb18 779 780bb18.loopexit: ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader 781 %36 = icmp ult i32 %x, 2 782 br i1 %36, label %bb20, label %bb.nph5 783 784bb18: ; preds = %bb17 785 %indvar.next1 = add i32 %y.14, 1 786 %exitcond4 = icmp ne i32 %indvar.next1, %smax3 787 br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge 788 789bb18.bb20_crit_edge: ; preds = %bb18 790 br label %bb18.bb20_crit_edge.split 791 792bb18.bb20_crit_edge.split: ; preds = %bb18.bb20_crit_edge, %bb.nph5 793 br label %bb20 794 795bb20: ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit 796 switch i32 %d, label %return [ 797 i32 3, label %bb22 798 i32 1, label %bb29 799 ] 800 801bb22: ; preds = %bb20 802 %37 = mul i32 %x, %w 803 %38 = udiv i32 %37, 4 804 %.sum3 = add i32 %38, %.sum2 805 %39 = add i32 %x, 15 806 %40 = and i32 %39, -16 807 %41 = add i32 %w, 15 808 %42 = and i32 %41, -16 809 %43 = mul i32 %40, %s 810 %44 = icmp ugt i32 %x, 0 811 br i1 %44, label %bb.nph, label %bb26 812 813bb.nph: ; preds = %bb22 814 br label %bb23 815 816bb23: ; preds = %bb24, %bb.nph 817 %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ] 818 %45 = mul i32 %y.21, %42 819 %.sum1 = add i32 %45, %43 820 %46 = getelementptr i8, ptr %r, i32 %.sum1 821 %47 = mul i32 %y.21, %w 822 %.sum5 = add i32 %47, %.sum3 823 %48 = getelementptr i8, ptr %j, i32 %.sum5 824 tail call void @llvm.memcpy.p0.p0.i32(ptr %48, ptr %46, i32 %w, i1 false) 825 br label %bb24 826 827bb24: ; preds = %bb23 828 %indvar.next5 = add i32 %y.21, 1 829 %exitcond6 = icmp ne i32 %indvar.next5, %x 830 br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge 831 832bb24.bb26_crit_edge: ; preds = %bb24 833 br label %bb26 834 835bb26: ; preds = %bb24.bb26_crit_edge, %bb22 836 %49 = mul i32 %x, %w 837 %.sum4 = add i32 %.sum3, %49 838 %50 = getelementptr i8, ptr %j, i32 %.sum4 839 %51 = mul i32 %x, %w 840 %52 = udiv i32 %51, 2 841 tail call void @llvm.memset.p0.i32(ptr %50, i8 -128, i32 %52, i1 false) 842 ret void 843 844bb29: ; preds = %bb20, %entry 845 %53 = add i32 %w, 15 846 %54 = and i32 %53, -16 847 %55 = icmp ugt i32 %x, 0 848 br i1 %55, label %bb.nph11, label %bb33 849 850bb.nph11: ; preds = %bb29 851 br label %bb30 852 853bb30: ; preds = %bb31, %bb.nph11 854 %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ] 855 %56 = mul i32 %y.310, %54 856 %57 = getelementptr i8, ptr %r, i32 %56 857 %58 = mul i32 %y.310, %w 858 %59 = getelementptr i8, ptr %j, i32 %58 859 tail call void @llvm.memcpy.p0.p0.i32(ptr %59, ptr %57, i32 %w, i1 false) 860 br label %bb31 861 862bb31: ; preds = %bb30 863 %indvar.next13 = add i32 %y.310, 1 864 %exitcond14 = icmp ne i32 %indvar.next13, %x 865 br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge 866 867bb31.bb33_crit_edge: ; preds = %bb31 868 br label %bb33 869 870bb33: ; preds = %bb31.bb33_crit_edge, %bb29 871 %60 = mul i32 %x, %w 872 %61 = getelementptr i8, ptr %j, i32 %60 873 %62 = mul i32 %x, %w 874 %63 = udiv i32 %62, 2 875 tail call void @llvm.memset.p0.i32(ptr %61, i8 -128, i32 %63, i1 false) 876 ret void 877 878return: ; preds = %bb20 879 ret void 880} 881 882declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind 883 884declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind 885