1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s 3; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s 4; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s 5; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s 6 7define i8 @load_i8(i8* %ptr) { 8; CHECK-O0-LABEL: load_i8: 9; CHECK-O0: # %bb.0: 10; CHECK-O0-NEXT: movb (%rdi), %al 11; CHECK-O0-NEXT: retq 12; 13; CHECK-O3-LABEL: load_i8: 14; CHECK-O3: # %bb.0: 15; CHECK-O3-NEXT: movzbl (%rdi), %eax 16; CHECK-O3-NEXT: retq 17 %v = load atomic i8, i8* %ptr unordered, align 1 18 ret i8 %v 19} 20 21define void @store_i8(i8* %ptr, i8 %v) { 22; CHECK-O0-LABEL: store_i8: 23; CHECK-O0: # %bb.0: 24; CHECK-O0-NEXT: movb %sil, %al 25; CHECK-O0-NEXT: movb %al, (%rdi) 26; CHECK-O0-NEXT: retq 27; 28; CHECK-O3-LABEL: store_i8: 29; CHECK-O3: # %bb.0: 30; CHECK-O3-NEXT: movb %sil, (%rdi) 31; CHECK-O3-NEXT: retq 32 store atomic i8 %v, i8* %ptr unordered, align 1 33 ret void 34} 35 36define i16 @load_i16(i16* %ptr) { 37; CHECK-O0-LABEL: load_i16: 38; CHECK-O0: # %bb.0: 39; CHECK-O0-NEXT: movw (%rdi), %ax 40; CHECK-O0-NEXT: retq 41; 42; CHECK-O3-LABEL: load_i16: 43; CHECK-O3: # %bb.0: 44; CHECK-O3-NEXT: movzwl (%rdi), %eax 45; CHECK-O3-NEXT: retq 46 %v = load atomic i16, i16* %ptr unordered, align 2 47 ret i16 %v 48} 49 50 51define void @store_i16(i16* %ptr, i16 %v) { 52; CHECK-O0-LABEL: store_i16: 53; CHECK-O0: # %bb.0: 54; CHECK-O0-NEXT: movw %si, %ax 55; CHECK-O0-NEXT: movw %ax, (%rdi) 56; CHECK-O0-NEXT: retq 57; 58; CHECK-O3-LABEL: store_i16: 59; CHECK-O3: # %bb.0: 60; CHECK-O3-NEXT: movw %si, (%rdi) 61; CHECK-O3-NEXT: retq 62 store atomic i16 %v, i16* %ptr unordered, align 2 63 ret void 64} 65 66define i32 @load_i32(i32* %ptr) { 67; CHECK-LABEL: load_i32: 68; CHECK: # %bb.0: 69; CHECK-NEXT: movl (%rdi), %eax 70; CHECK-NEXT: retq 71 %v = load atomic i32, i32* %ptr unordered, align 4 72 ret i32 %v 73} 74 75define void @store_i32(i32* %ptr, i32 %v) { 76; CHECK-LABEL: store_i32: 77; CHECK: # %bb.0: 78; CHECK-NEXT: movl %esi, (%rdi) 79; CHECK-NEXT: retq 80 store atomic i32 %v, i32* %ptr unordered, align 4 81 ret void 82} 83 84define i64 @load_i64(i64* %ptr) { 85; CHECK-LABEL: load_i64: 86; CHECK: # %bb.0: 87; CHECK-NEXT: movq (%rdi), %rax 88; CHECK-NEXT: retq 89 %v = load atomic i64, i64* %ptr unordered, align 8 90 ret i64 %v 91} 92 93define void @store_i64(i64* %ptr, i64 %v) { 94; CHECK-LABEL: store_i64: 95; CHECK: # %bb.0: 96; CHECK-NEXT: movq %rsi, (%rdi) 97; CHECK-NEXT: retq 98 store atomic i64 %v, i64* %ptr unordered, align 8 99 ret void 100} 101 102;; The tests in the rest of this file are intended to show transforms which we 103;; either *can't* do for legality, or don't currently implement. The later 104;; are noted carefully where relevant. 105 106;; Start w/some clearly illegal ones. 107 108; Must use a full width op, not a byte op 109define void @narrow_writeback_or(i64* %ptr) { 110; CHECK-O0-LABEL: narrow_writeback_or: 111; CHECK-O0: # %bb.0: 112; CHECK-O0-NEXT: movq (%rdi), %rax 113; CHECK-O0-NEXT: orq $7, %rax 114; CHECK-O0-NEXT: movq %rax, (%rdi) 115; CHECK-O0-NEXT: retq 116; 117; CHECK-O3-LABEL: narrow_writeback_or: 118; CHECK-O3: # %bb.0: 119; CHECK-O3-NEXT: orq $7, (%rdi) 120; CHECK-O3-NEXT: retq 121 %v = load atomic i64, i64* %ptr unordered, align 8 122 %v.new = or i64 %v, 7 123 store atomic i64 %v.new, i64* %ptr unordered, align 8 124 ret void 125} 126 127; Must use a full width op, not a byte op 128define void @narrow_writeback_and(i64* %ptr) { 129; CHECK-O0-LABEL: narrow_writeback_and: 130; CHECK-O0: # %bb.0: 131; CHECK-O0-NEXT: movq (%rdi), %rax 132; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 133; CHECK-O0-NEXT: andl $-256, %eax 134; CHECK-O0-NEXT: # kill: def $rax killed $eax 135; CHECK-O0-NEXT: movq %rax, (%rdi) 136; CHECK-O0-NEXT: retq 137; 138; CHECK-O3-LABEL: narrow_writeback_and: 139; CHECK-O3: # %bb.0: 140; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 141; CHECK-O3-NEXT: andq %rax, (%rdi) 142; CHECK-O3-NEXT: retq 143 %v = load atomic i64, i64* %ptr unordered, align 8 144 %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00 145 store atomic i64 %v.new, i64* %ptr unordered, align 8 146 ret void 147} 148 149; Must use a full width op, not a byte op 150define void @narrow_writeback_xor(i64* %ptr) { 151; CHECK-O0-LABEL: narrow_writeback_xor: 152; CHECK-O0: # %bb.0: 153; CHECK-O0-NEXT: movq (%rdi), %rax 154; CHECK-O0-NEXT: xorq $7, %rax 155; CHECK-O0-NEXT: movq %rax, (%rdi) 156; CHECK-O0-NEXT: retq 157; 158; CHECK-O3-LABEL: narrow_writeback_xor: 159; CHECK-O3: # %bb.0: 160; CHECK-O3-NEXT: xorq $7, (%rdi) 161; CHECK-O3-NEXT: retq 162 %v = load atomic i64, i64* %ptr unordered, align 8 163 %v.new = xor i64 %v, 7 164 store atomic i64 %v.new, i64* %ptr unordered, align 8 165 ret void 166} 167 168;; Next batch of tests are exercising cases where store widening would 169;; improve codegeneration. Note that widening is only legal if the 170;; resulting type would be atomic. Each tests has a well aligned, and 171;; unaligned variant to ensure we get correct codegen here. 172;; Note: It's not a legality issue, but there's a gotcha here to be aware 173;; of. Once we widen a pair of atomic stores, we loose the information 174;; that the original atomicity requirement was half the width. Given that, 175;; we can't then split the load again. This challenges our usual iterative 176;; approach to incremental improvement. 177 178; Legal if wider type is also atomic (TODO) 179define void @widen_store(i32* %p0, i32 %v1, i32 %v2) { 180; CHECK-LABEL: widen_store: 181; CHECK: # %bb.0: 182; CHECK-NEXT: movl %esi, (%rdi) 183; CHECK-NEXT: movl %edx, 4(%rdi) 184; CHECK-NEXT: retq 185 %p1 = getelementptr i32, i32* %p0, i64 1 186 store atomic i32 %v1, i32* %p0 unordered, align 8 187 store atomic i32 %v2, i32* %p1 unordered, align 4 188 ret void 189} 190 191; This one is *NOT* legal to widen. With weaker alignment, 192; the wider type might cross a cache line and violate the 193; atomicity requirement. 194define void @widen_store_unaligned(i32* %p0, i32 %v1, i32 %v2) { 195; CHECK-LABEL: widen_store_unaligned: 196; CHECK: # %bb.0: 197; CHECK-NEXT: movl %esi, (%rdi) 198; CHECK-NEXT: movl %edx, 4(%rdi) 199; CHECK-NEXT: retq 200 %p1 = getelementptr i32, i32* %p0, i64 1 201 store atomic i32 %v1, i32* %p0 unordered, align 4 202 store atomic i32 %v2, i32* %p1 unordered, align 4 203 ret void 204} 205 206; Legal if wider type is also atomic (TODO) 207define void @widen_broadcast(i32* %p0, i32 %v) { 208; CHECK-LABEL: widen_broadcast: 209; CHECK: # %bb.0: 210; CHECK-NEXT: movl %esi, (%rdi) 211; CHECK-NEXT: movl %esi, 4(%rdi) 212; CHECK-NEXT: retq 213 %p1 = getelementptr i32, i32* %p0, i64 1 214 store atomic i32 %v, i32* %p0 unordered, align 8 215 store atomic i32 %v, i32* %p1 unordered, align 4 216 ret void 217} 218 219; Not legal to widen due to alignment restriction 220define void @widen_broadcast_unaligned(i32* %p0, i32 %v) { 221; CHECK-LABEL: widen_broadcast_unaligned: 222; CHECK: # %bb.0: 223; CHECK-NEXT: movl %esi, (%rdi) 224; CHECK-NEXT: movl %esi, 4(%rdi) 225; CHECK-NEXT: retq 226 %p1 = getelementptr i32, i32* %p0, i64 1 227 store atomic i32 %v, i32* %p0 unordered, align 4 228 store atomic i32 %v, i32* %p1 unordered, align 4 229 ret void 230} 231 232define i128 @load_i128(i128* %ptr) { 233; CHECK-O0-LABEL: load_i128: 234; CHECK-O0: # %bb.0: 235; CHECK-O0-NEXT: pushq %rbx 236; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 237; CHECK-O0-NEXT: .cfi_offset %rbx, -16 238; CHECK-O0-NEXT: xorl %eax, %eax 239; CHECK-O0-NEXT: movl %eax, %ebx 240; CHECK-O0-NEXT: movq %rbx, %rax 241; CHECK-O0-NEXT: movq %rbx, %rdx 242; CHECK-O0-NEXT: movq %rbx, %rcx 243; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) 244; CHECK-O0-NEXT: popq %rbx 245; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 246; CHECK-O0-NEXT: retq 247; 248; CHECK-O3-LABEL: load_i128: 249; CHECK-O3: # %bb.0: 250; CHECK-O3-NEXT: pushq %rbx 251; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 252; CHECK-O3-NEXT: .cfi_offset %rbx, -16 253; CHECK-O3-NEXT: xorl %eax, %eax 254; CHECK-O3-NEXT: xorl %edx, %edx 255; CHECK-O3-NEXT: xorl %ecx, %ecx 256; CHECK-O3-NEXT: xorl %ebx, %ebx 257; CHECK-O3-NEXT: lock cmpxchg16b (%rdi) 258; CHECK-O3-NEXT: popq %rbx 259; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 260; CHECK-O3-NEXT: retq 261 %v = load atomic i128, i128* %ptr unordered, align 16 262 ret i128 %v 263} 264 265define void @store_i128(i128* %ptr, i128 %v) { 266; CHECK-O0-LABEL: store_i128: 267; CHECK-O0: # %bb.0: 268; CHECK-O0-NEXT: pushq %rbx 269; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 270; CHECK-O0-NEXT: .cfi_offset %rbx, -16 271; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 272; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 273; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 274; CHECK-O0-NEXT: movq (%rdi), %rax 275; CHECK-O0-NEXT: movq 8(%rdi), %rdx 276; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 277; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 278; CHECK-O0-NEXT: jmp .LBB16_1 279; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start 280; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1 281; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 282; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 283; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload 284; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 285; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 286; CHECK-O0-NEXT: lock cmpxchg16b (%rsi) 287; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 288; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 289; CHECK-O0-NEXT: jne .LBB16_1 290; CHECK-O0-NEXT: jmp .LBB16_2 291; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end 292; CHECK-O0-NEXT: popq %rbx 293; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 294; CHECK-O0-NEXT: retq 295; 296; CHECK-O3-LABEL: store_i128: 297; CHECK-O3: # %bb.0: 298; CHECK-O3-NEXT: pushq %rbx 299; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 300; CHECK-O3-NEXT: .cfi_offset %rbx, -16 301; CHECK-O3-NEXT: movq %rdx, %rcx 302; CHECK-O3-NEXT: movq %rsi, %rbx 303; CHECK-O3-NEXT: movq (%rdi), %rax 304; CHECK-O3-NEXT: movq 8(%rdi), %rdx 305; CHECK-O3-NEXT: .p2align 4, 0x90 306; CHECK-O3-NEXT: .LBB16_1: # %atomicrmw.start 307; CHECK-O3-NEXT: # =>This Inner Loop Header: Depth=1 308; CHECK-O3-NEXT: lock cmpxchg16b (%rdi) 309; CHECK-O3-NEXT: jne .LBB16_1 310; CHECK-O3-NEXT: # %bb.2: # %atomicrmw.end 311; CHECK-O3-NEXT: popq %rbx 312; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 313; CHECK-O3-NEXT: retq 314 store atomic i128 %v, i128* %ptr unordered, align 16 315 ret void 316} 317 318define i256 @load_i256(i256* %ptr) { 319; CHECK-O0-LABEL: load_i256: 320; CHECK-O0: # %bb.0: 321; CHECK-O0-NEXT: subq $56, %rsp 322; CHECK-O0-NEXT: .cfi_def_cfa_offset 64 323; CHECK-O0-NEXT: movq %rdi, %rax 324; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 325; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 326; CHECK-O0-NEXT: movl $32, %edi 327; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 328; CHECK-O0-NEXT: xorl %ecx, %ecx 329; CHECK-O0-NEXT: callq __atomic_load@PLT 330; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 331; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 332; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx 333; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx 334; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi 335; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %r8 336; CHECK-O0-NEXT: movq %r8, 24(%rdi) 337; CHECK-O0-NEXT: movq %rsi, 16(%rdi) 338; CHECK-O0-NEXT: movq %rdx, 8(%rdi) 339; CHECK-O0-NEXT: movq %rcx, (%rdi) 340; CHECK-O0-NEXT: addq $56, %rsp 341; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 342; CHECK-O0-NEXT: retq 343; 344; CHECK-O3-LABEL: load_i256: 345; CHECK-O3: # %bb.0: 346; CHECK-O3-NEXT: pushq %rbx 347; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 348; CHECK-O3-NEXT: subq $32, %rsp 349; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 350; CHECK-O3-NEXT: .cfi_offset %rbx, -16 351; CHECK-O3-NEXT: movq %rdi, %rbx 352; CHECK-O3-NEXT: movq %rsp, %rdx 353; CHECK-O3-NEXT: movl $32, %edi 354; CHECK-O3-NEXT: xorl %ecx, %ecx 355; CHECK-O3-NEXT: callq __atomic_load@PLT 356; CHECK-O3-NEXT: vmovups (%rsp), %ymm0 357; CHECK-O3-NEXT: vmovups %ymm0, (%rbx) 358; CHECK-O3-NEXT: movq %rbx, %rax 359; CHECK-O3-NEXT: addq $32, %rsp 360; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 361; CHECK-O3-NEXT: popq %rbx 362; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 363; CHECK-O3-NEXT: vzeroupper 364; CHECK-O3-NEXT: retq 365 %v = load atomic i256, i256* %ptr unordered, align 16 366 ret i256 %v 367} 368 369define void @store_i256(i256* %ptr, i256 %v) { 370; CHECK-O0-LABEL: store_i256: 371; CHECK-O0: # %bb.0: 372; CHECK-O0-NEXT: subq $40, %rsp 373; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 374; CHECK-O0-NEXT: movq %rdx, %rax 375; CHECK-O0-NEXT: movq %rsi, (%rsp) # 8-byte Spill 376; CHECK-O0-NEXT: movq %rdi, %rsi 377; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload 378; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 379; CHECK-O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) 380; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp) 381; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp) 382; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp) 383; CHECK-O0-NEXT: movl $32, %edi 384; CHECK-O0-NEXT: xorl %ecx, %ecx 385; CHECK-O0-NEXT: callq __atomic_store@PLT 386; CHECK-O0-NEXT: addq $40, %rsp 387; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 388; CHECK-O0-NEXT: retq 389; 390; CHECK-O3-LABEL: store_i256: 391; CHECK-O3: # %bb.0: 392; CHECK-O3-NEXT: subq $40, %rsp 393; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 394; CHECK-O3-NEXT: movq %rdi, %rax 395; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp) 396; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp) 397; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 398; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp) 399; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 400; CHECK-O3-NEXT: movl $32, %edi 401; CHECK-O3-NEXT: movq %rax, %rsi 402; CHECK-O3-NEXT: xorl %ecx, %ecx 403; CHECK-O3-NEXT: callq __atomic_store@PLT 404; CHECK-O3-NEXT: addq $40, %rsp 405; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 406; CHECK-O3-NEXT: retq 407 store atomic i256 %v, i256* %ptr unordered, align 16 408 ret void 409} 410 411; Legal if wider type is also atomic (TODO) 412define void @vec_store(i32* %p0, <2 x i32> %vec) { 413; CHECK-O0-CUR-LABEL: vec_store: 414; CHECK-O0-CUR: # %bb.0: 415; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx 416; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax 417; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) 418; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 419; CHECK-O0-CUR-NEXT: retq 420; 421; CHECK-O3-CUR-LABEL: vec_store: 422; CHECK-O3-CUR: # %bb.0: 423; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 424; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx 425; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 426; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi) 427; CHECK-O3-CUR-NEXT: retq 428; 429; CHECK-O0-EX-LABEL: vec_store: 430; CHECK-O0-EX: # %bb.0: 431; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 432; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi) 433; CHECK-O0-EX-NEXT: retq 434; 435; CHECK-O3-EX-LABEL: vec_store: 436; CHECK-O3-EX: # %bb.0: 437; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 438; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) 439; CHECK-O3-EX-NEXT: retq 440 %v1 = extractelement <2 x i32> %vec, i32 0 441 %v2 = extractelement <2 x i32> %vec, i32 1 442 %p1 = getelementptr i32, i32* %p0, i64 1 443 store atomic i32 %v1, i32* %p0 unordered, align 8 444 store atomic i32 %v2, i32* %p1 unordered, align 4 445 ret void 446} 447 448; Not legal to widen due to alignment restriction 449define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { 450; CHECK-O0-CUR-LABEL: vec_store_unaligned: 451; CHECK-O0-CUR: # %bb.0: 452; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx 453; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax 454; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) 455; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 456; CHECK-O0-CUR-NEXT: retq 457; 458; CHECK-O3-CUR-LABEL: vec_store_unaligned: 459; CHECK-O3-CUR: # %bb.0: 460; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 461; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx 462; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 463; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi) 464; CHECK-O3-CUR-NEXT: retq 465; 466; CHECK-O0-EX-LABEL: vec_store_unaligned: 467; CHECK-O0-EX: # %bb.0: 468; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 469; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi) 470; CHECK-O0-EX-NEXT: retq 471; 472; CHECK-O3-EX-LABEL: vec_store_unaligned: 473; CHECK-O3-EX: # %bb.0: 474; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 475; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) 476; CHECK-O3-EX-NEXT: retq 477 %v1 = extractelement <2 x i32> %vec, i32 0 478 %v2 = extractelement <2 x i32> %vec, i32 1 479 %p1 = getelementptr i32, i32* %p0, i64 1 480 store atomic i32 %v1, i32* %p0 unordered, align 4 481 store atomic i32 %v2, i32* %p1 unordered, align 4 482 ret void 483} 484 485 486 487; Legal if wider type is also atomic (TODO) 488; Also, can avoid register move from xmm to eax (TODO) 489define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) { 490; CHECK-O0-CUR-LABEL: widen_broadcast2: 491; CHECK-O0-CUR: # %bb.0: 492; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax 493; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) 494; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 495; CHECK-O0-CUR-NEXT: retq 496; 497; CHECK-O3-CUR-LABEL: widen_broadcast2: 498; CHECK-O3-CUR: # %bb.0: 499; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 500; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 501; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi) 502; CHECK-O3-CUR-NEXT: retq 503; 504; CHECK-O0-EX-LABEL: widen_broadcast2: 505; CHECK-O0-EX: # %bb.0: 506; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 507; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi) 508; CHECK-O0-EX-NEXT: retq 509; 510; CHECK-O3-EX-LABEL: widen_broadcast2: 511; CHECK-O3-EX: # %bb.0: 512; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 513; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi) 514; CHECK-O3-EX-NEXT: retq 515 %v1 = extractelement <2 x i32> %vec, i32 0 516 %p1 = getelementptr i32, i32* %p0, i64 1 517 store atomic i32 %v1, i32* %p0 unordered, align 8 518 store atomic i32 %v1, i32* %p1 unordered, align 4 519 ret void 520} 521 522; Not legal to widen due to alignment restriction 523define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) { 524; CHECK-O0-CUR-LABEL: widen_broadcast2_unaligned: 525; CHECK-O0-CUR: # %bb.0: 526; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax 527; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) 528; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 529; CHECK-O0-CUR-NEXT: retq 530; 531; CHECK-O3-CUR-LABEL: widen_broadcast2_unaligned: 532; CHECK-O3-CUR: # %bb.0: 533; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 534; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 535; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi) 536; CHECK-O3-CUR-NEXT: retq 537; 538; CHECK-O0-EX-LABEL: widen_broadcast2_unaligned: 539; CHECK-O0-EX: # %bb.0: 540; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 541; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi) 542; CHECK-O0-EX-NEXT: retq 543; 544; CHECK-O3-EX-LABEL: widen_broadcast2_unaligned: 545; CHECK-O3-EX: # %bb.0: 546; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 547; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi) 548; CHECK-O3-EX-NEXT: retq 549 %v1 = extractelement <2 x i32> %vec, i32 0 550 %p1 = getelementptr i32, i32* %p0, i64 1 551 store atomic i32 %v1, i32* %p0 unordered, align 4 552 store atomic i32 %v1, i32* %p1 unordered, align 4 553 ret void 554} 555 556; Legal if wider type is also atomic (TODO) 557define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) { 558; CHECK-LABEL: widen_zero_init: 559; CHECK: # %bb.0: 560; CHECK-NEXT: movl $0, (%rdi) 561; CHECK-NEXT: movl $0, 4(%rdi) 562; CHECK-NEXT: retq 563 %p1 = getelementptr i32, i32* %p0, i64 1 564 store atomic i32 0, i32* %p0 unordered, align 8 565 store atomic i32 0, i32* %p1 unordered, align 4 566 ret void 567} 568 569; Not legal to widen due to alignment restriction 570define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) { 571; CHECK-LABEL: widen_zero_init_unaligned: 572; CHECK: # %bb.0: 573; CHECK-NEXT: movl $0, (%rdi) 574; CHECK-NEXT: movl $0, 4(%rdi) 575; CHECK-NEXT: retq 576 %p1 = getelementptr i32, i32* %p0, i64 1 577 store atomic i32 0, i32* %p0 unordered, align 4 578 store atomic i32 0, i32* %p1 unordered, align 4 579 ret void 580} 581 582;; The next batch of tests are stressing load folding. Folding is legal 583;; on x86, so these are simply checking optimization quality. 584 585; Legal, as expected 586define i64 @load_fold_add1(i64* %p) { 587; CHECK-LABEL: load_fold_add1: 588; CHECK: # %bb.0: 589; CHECK-NEXT: movq (%rdi), %rax 590; CHECK-NEXT: addq $15, %rax 591; CHECK-NEXT: retq 592 %v = load atomic i64, i64* %p unordered, align 8 593 %ret = add i64 %v, 15 594 ret i64 %ret 595} 596 597define i64 @load_fold_add2(i64* %p, i64 %v2) { 598; CHECK-LABEL: load_fold_add2: 599; CHECK: # %bb.0: 600; CHECK-NEXT: movq %rsi, %rax 601; CHECK-NEXT: addq (%rdi), %rax 602; CHECK-NEXT: retq 603 %v = load atomic i64, i64* %p unordered, align 8 604 %ret = add i64 %v, %v2 605 ret i64 %ret 606} 607 608define i64 @load_fold_add3(i64* %p1, i64* %p2) { 609; CHECK-O0-LABEL: load_fold_add3: 610; CHECK-O0: # %bb.0: 611; CHECK-O0-NEXT: movq (%rdi), %rax 612; CHECK-O0-NEXT: addq (%rsi), %rax 613; CHECK-O0-NEXT: retq 614; 615; CHECK-O3-CUR-LABEL: load_fold_add3: 616; CHECK-O3-CUR: # %bb.0: 617; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 618; CHECK-O3-CUR-NEXT: addq (%rdi), %rax 619; CHECK-O3-CUR-NEXT: retq 620; 621; CHECK-O3-EX-LABEL: load_fold_add3: 622; CHECK-O3-EX: # %bb.0: 623; CHECK-O3-EX-NEXT: movq (%rdi), %rax 624; CHECK-O3-EX-NEXT: addq (%rsi), %rax 625; CHECK-O3-EX-NEXT: retq 626 %v = load atomic i64, i64* %p1 unordered, align 8 627 %v2 = load atomic i64, i64* %p2 unordered, align 8 628 %ret = add i64 %v, %v2 629 ret i64 %ret 630} 631 632; Legal, as expected 633define i64 @load_fold_sub1(i64* %p) { 634; CHECK-O0-LABEL: load_fold_sub1: 635; CHECK-O0: # %bb.0: 636; CHECK-O0-NEXT: movq (%rdi), %rax 637; CHECK-O0-NEXT: subq $15, %rax 638; CHECK-O0-NEXT: retq 639; 640; CHECK-O3-LABEL: load_fold_sub1: 641; CHECK-O3: # %bb.0: 642; CHECK-O3-NEXT: movq (%rdi), %rax 643; CHECK-O3-NEXT: addq $-15, %rax 644; CHECK-O3-NEXT: retq 645 %v = load atomic i64, i64* %p unordered, align 8 646 %ret = sub i64 %v, 15 647 ret i64 %ret 648} 649 650define i64 @load_fold_sub2(i64* %p, i64 %v2) { 651; CHECK-LABEL: load_fold_sub2: 652; CHECK: # %bb.0: 653; CHECK-NEXT: movq (%rdi), %rax 654; CHECK-NEXT: subq %rsi, %rax 655; CHECK-NEXT: retq 656 %v = load atomic i64, i64* %p unordered, align 8 657 %ret = sub i64 %v, %v2 658 ret i64 %ret 659} 660 661define i64 @load_fold_sub3(i64* %p1, i64* %p2) { 662; CHECK-LABEL: load_fold_sub3: 663; CHECK: # %bb.0: 664; CHECK-NEXT: movq (%rdi), %rax 665; CHECK-NEXT: subq (%rsi), %rax 666; CHECK-NEXT: retq 667 %v = load atomic i64, i64* %p1 unordered, align 8 668 %v2 = load atomic i64, i64* %p2 unordered, align 8 669 %ret = sub i64 %v, %v2 670 ret i64 %ret 671} 672 673; Legal, as expected 674define i64 @load_fold_mul1(i64* %p) { 675; CHECK-O0-LABEL: load_fold_mul1: 676; CHECK-O0: # %bb.0: 677; CHECK-O0-NEXT: imulq $15, (%rdi), %rax 678; CHECK-O0-NEXT: retq 679; 680; CHECK-O3-LABEL: load_fold_mul1: 681; CHECK-O3: # %bb.0: 682; CHECK-O3-NEXT: movq (%rdi), %rax 683; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax 684; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 685; CHECK-O3-NEXT: retq 686 %v = load atomic i64, i64* %p unordered, align 8 687 %ret = mul i64 %v, 15 688 ret i64 %ret 689} 690 691define i64 @load_fold_mul2(i64* %p, i64 %v2) { 692; CHECK-LABEL: load_fold_mul2: 693; CHECK: # %bb.0: 694; CHECK-NEXT: movq %rsi, %rax 695; CHECK-NEXT: imulq (%rdi), %rax 696; CHECK-NEXT: retq 697 %v = load atomic i64, i64* %p unordered, align 8 698 %ret = mul i64 %v, %v2 699 ret i64 %ret 700} 701 702define i64 @load_fold_mul3(i64* %p1, i64* %p2) { 703; CHECK-O0-LABEL: load_fold_mul3: 704; CHECK-O0: # %bb.0: 705; CHECK-O0-NEXT: movq (%rdi), %rax 706; CHECK-O0-NEXT: imulq (%rsi), %rax 707; CHECK-O0-NEXT: retq 708; 709; CHECK-O3-CUR-LABEL: load_fold_mul3: 710; CHECK-O3-CUR: # %bb.0: 711; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 712; CHECK-O3-CUR-NEXT: imulq (%rdi), %rax 713; CHECK-O3-CUR-NEXT: retq 714; 715; CHECK-O3-EX-LABEL: load_fold_mul3: 716; CHECK-O3-EX: # %bb.0: 717; CHECK-O3-EX-NEXT: movq (%rdi), %rax 718; CHECK-O3-EX-NEXT: imulq (%rsi), %rax 719; CHECK-O3-EX-NEXT: retq 720 %v = load atomic i64, i64* %p1 unordered, align 8 721 %v2 = load atomic i64, i64* %p2 unordered, align 8 722 %ret = mul i64 %v, %v2 723 ret i64 %ret 724} 725 726; Legal to fold (TODO) 727define i64 @load_fold_sdiv1(i64* %p) { 728; CHECK-O0-LABEL: load_fold_sdiv1: 729; CHECK-O0: # %bb.0: 730; CHECK-O0-NEXT: movq (%rdi), %rax 731; CHECK-O0-NEXT: movl $15, %ecx 732; CHECK-O0-NEXT: cqto 733; CHECK-O0-NEXT: idivq %rcx 734; CHECK-O0-NEXT: retq 735; 736; CHECK-O3-LABEL: load_fold_sdiv1: 737; CHECK-O3: # %bb.0: 738; CHECK-O3-NEXT: movq (%rdi), %rcx 739; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 740; CHECK-O3-NEXT: movq %rcx, %rax 741; CHECK-O3-NEXT: imulq %rdx 742; CHECK-O3-NEXT: addq %rdx, %rcx 743; CHECK-O3-NEXT: movq %rcx, %rax 744; CHECK-O3-NEXT: shrq $63, %rax 745; CHECK-O3-NEXT: sarq $3, %rcx 746; CHECK-O3-NEXT: addq %rax, %rcx 747; CHECK-O3-NEXT: movq %rcx, %rax 748; CHECK-O3-NEXT: retq 749 %v = load atomic i64, i64* %p unordered, align 8 750 %ret = sdiv i64 %v, 15 751 ret i64 %ret 752} 753 754; Legal to fold (TODO) 755define i64 @load_fold_sdiv2(i64* %p, i64 %v2) { 756; CHECK-O0-LABEL: load_fold_sdiv2: 757; CHECK-O0: # %bb.0: 758; CHECK-O0-NEXT: movq (%rdi), %rax 759; CHECK-O0-NEXT: cqto 760; CHECK-O0-NEXT: idivq %rsi 761; CHECK-O0-NEXT: retq 762; 763; CHECK-O3-LABEL: load_fold_sdiv2: 764; CHECK-O3: # %bb.0: 765; CHECK-O3-NEXT: movq (%rdi), %rax 766; CHECK-O3-NEXT: movq %rax, %rcx 767; CHECK-O3-NEXT: orq %rsi, %rcx 768; CHECK-O3-NEXT: shrq $32, %rcx 769; CHECK-O3-NEXT: je .LBB35_1 770; CHECK-O3-NEXT: # %bb.2: 771; CHECK-O3-NEXT: cqto 772; CHECK-O3-NEXT: idivq %rsi 773; CHECK-O3-NEXT: retq 774; CHECK-O3-NEXT: .LBB35_1: 775; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 776; CHECK-O3-NEXT: xorl %edx, %edx 777; CHECK-O3-NEXT: divl %esi 778; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 779; CHECK-O3-NEXT: retq 780 %v = load atomic i64, i64* %p unordered, align 8 781 %ret = sdiv i64 %v, %v2 782 ret i64 %ret 783} 784 785define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) { 786; CHECK-O0-LABEL: load_fold_sdiv3: 787; CHECK-O0: # %bb.0: 788; CHECK-O0-NEXT: movq (%rdi), %rax 789; CHECK-O0-NEXT: cqto 790; CHECK-O0-NEXT: idivq (%rsi) 791; CHECK-O0-NEXT: retq 792; 793; CHECK-O3-LABEL: load_fold_sdiv3: 794; CHECK-O3: # %bb.0: 795; CHECK-O3-NEXT: movq (%rdi), %rax 796; CHECK-O3-NEXT: movq (%rsi), %rcx 797; CHECK-O3-NEXT: movq %rax, %rdx 798; CHECK-O3-NEXT: orq %rcx, %rdx 799; CHECK-O3-NEXT: shrq $32, %rdx 800; CHECK-O3-NEXT: je .LBB36_1 801; CHECK-O3-NEXT: # %bb.2: 802; CHECK-O3-NEXT: cqto 803; CHECK-O3-NEXT: idivq %rcx 804; CHECK-O3-NEXT: retq 805; CHECK-O3-NEXT: .LBB36_1: 806; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 807; CHECK-O3-NEXT: xorl %edx, %edx 808; CHECK-O3-NEXT: divl %ecx 809; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 810; CHECK-O3-NEXT: retq 811 %v = load atomic i64, i64* %p1 unordered, align 8 812 %v2 = load atomic i64, i64* %p2 unordered, align 8 813 %ret = sdiv i64 %v, %v2 814 ret i64 %ret 815} 816 817; Legal to fold (TODO) 818define i64 @load_fold_udiv1(i64* %p) { 819; CHECK-O0-LABEL: load_fold_udiv1: 820; CHECK-O0: # %bb.0: 821; CHECK-O0-NEXT: movq (%rdi), %rax 822; CHECK-O0-NEXT: movl $15, %ecx 823; CHECK-O0-NEXT: xorl %edx, %edx 824; CHECK-O0-NEXT: # kill: def $rdx killed $edx 825; CHECK-O0-NEXT: divq %rcx 826; CHECK-O0-NEXT: retq 827; 828; CHECK-O3-CUR-LABEL: load_fold_udiv1: 829; CHECK-O3-CUR: # %bb.0: 830; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx 831; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 832; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax 833; CHECK-O3-CUR-NEXT: shrq $3, %rax 834; CHECK-O3-CUR-NEXT: retq 835; 836; CHECK-O3-EX-LABEL: load_fold_udiv1: 837; CHECK-O3-EX: # %bb.0: 838; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 839; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax 840; CHECK-O3-EX-NEXT: shrq $3, %rax 841; CHECK-O3-EX-NEXT: retq 842 %v = load atomic i64, i64* %p unordered, align 8 843 %ret = udiv i64 %v, 15 844 ret i64 %ret 845} 846 847define i64 @load_fold_udiv2(i64* %p, i64 %v2) { 848; CHECK-O0-LABEL: load_fold_udiv2: 849; CHECK-O0: # %bb.0: 850; CHECK-O0-NEXT: movq (%rdi), %rax 851; CHECK-O0-NEXT: xorl %ecx, %ecx 852; CHECK-O0-NEXT: movl %ecx, %edx 853; CHECK-O0-NEXT: divq %rsi 854; CHECK-O0-NEXT: retq 855; 856; CHECK-O3-LABEL: load_fold_udiv2: 857; CHECK-O3: # %bb.0: 858; CHECK-O3-NEXT: movq (%rdi), %rax 859; CHECK-O3-NEXT: movq %rax, %rcx 860; CHECK-O3-NEXT: orq %rsi, %rcx 861; CHECK-O3-NEXT: shrq $32, %rcx 862; CHECK-O3-NEXT: je .LBB38_1 863; CHECK-O3-NEXT: # %bb.2: 864; CHECK-O3-NEXT: xorl %edx, %edx 865; CHECK-O3-NEXT: divq %rsi 866; CHECK-O3-NEXT: retq 867; CHECK-O3-NEXT: .LBB38_1: 868; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 869; CHECK-O3-NEXT: xorl %edx, %edx 870; CHECK-O3-NEXT: divl %esi 871; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 872; CHECK-O3-NEXT: retq 873 %v = load atomic i64, i64* %p unordered, align 8 874 %ret = udiv i64 %v, %v2 875 ret i64 %ret 876} 877 878define i64 @load_fold_udiv3(i64* %p1, i64* %p2) { 879; CHECK-O0-LABEL: load_fold_udiv3: 880; CHECK-O0: # %bb.0: 881; CHECK-O0-NEXT: movq (%rdi), %rax 882; CHECK-O0-NEXT: xorl %ecx, %ecx 883; CHECK-O0-NEXT: movl %ecx, %edx 884; CHECK-O0-NEXT: divq (%rsi) 885; CHECK-O0-NEXT: retq 886; 887; CHECK-O3-LABEL: load_fold_udiv3: 888; CHECK-O3: # %bb.0: 889; CHECK-O3-NEXT: movq (%rdi), %rax 890; CHECK-O3-NEXT: movq (%rsi), %rcx 891; CHECK-O3-NEXT: movq %rax, %rdx 892; CHECK-O3-NEXT: orq %rcx, %rdx 893; CHECK-O3-NEXT: shrq $32, %rdx 894; CHECK-O3-NEXT: je .LBB39_1 895; CHECK-O3-NEXT: # %bb.2: 896; CHECK-O3-NEXT: xorl %edx, %edx 897; CHECK-O3-NEXT: divq %rcx 898; CHECK-O3-NEXT: retq 899; CHECK-O3-NEXT: .LBB39_1: 900; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 901; CHECK-O3-NEXT: xorl %edx, %edx 902; CHECK-O3-NEXT: divl %ecx 903; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 904; CHECK-O3-NEXT: retq 905 %v = load atomic i64, i64* %p1 unordered, align 8 906 %v2 = load atomic i64, i64* %p2 unordered, align 8 907 %ret = udiv i64 %v, %v2 908 ret i64 %ret 909} 910 911; Legal to fold (TODO) 912define i64 @load_fold_srem1(i64* %p) { 913; CHECK-O0-LABEL: load_fold_srem1: 914; CHECK-O0: # %bb.0: 915; CHECK-O0-NEXT: movq (%rdi), %rax 916; CHECK-O0-NEXT: movl $15, %ecx 917; CHECK-O0-NEXT: cqto 918; CHECK-O0-NEXT: idivq %rcx 919; CHECK-O0-NEXT: movq %rdx, %rax 920; CHECK-O0-NEXT: retq 921; 922; CHECK-O3-LABEL: load_fold_srem1: 923; CHECK-O3: # %bb.0: 924; CHECK-O3-NEXT: movq (%rdi), %rcx 925; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 926; CHECK-O3-NEXT: movq %rcx, %rax 927; CHECK-O3-NEXT: imulq %rdx 928; CHECK-O3-NEXT: addq %rcx, %rdx 929; CHECK-O3-NEXT: movq %rdx, %rax 930; CHECK-O3-NEXT: shrq $63, %rax 931; CHECK-O3-NEXT: sarq $3, %rdx 932; CHECK-O3-NEXT: addq %rax, %rdx 933; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax 934; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 935; CHECK-O3-NEXT: subq %rax, %rcx 936; CHECK-O3-NEXT: movq %rcx, %rax 937; CHECK-O3-NEXT: retq 938 %v = load atomic i64, i64* %p unordered, align 8 939 %ret = srem i64 %v, 15 940 ret i64 %ret 941} 942 943; Legal, as expected 944define i64 @load_fold_srem2(i64* %p, i64 %v2) { 945; CHECK-O0-LABEL: load_fold_srem2: 946; CHECK-O0: # %bb.0: 947; CHECK-O0-NEXT: movq (%rdi), %rax 948; CHECK-O0-NEXT: cqto 949; CHECK-O0-NEXT: idivq %rsi 950; CHECK-O0-NEXT: movq %rdx, %rax 951; CHECK-O0-NEXT: retq 952; 953; CHECK-O3-LABEL: load_fold_srem2: 954; CHECK-O3: # %bb.0: 955; CHECK-O3-NEXT: movq (%rdi), %rax 956; CHECK-O3-NEXT: movq %rax, %rcx 957; CHECK-O3-NEXT: orq %rsi, %rcx 958; CHECK-O3-NEXT: shrq $32, %rcx 959; CHECK-O3-NEXT: je .LBB41_1 960; CHECK-O3-NEXT: # %bb.2: 961; CHECK-O3-NEXT: cqto 962; CHECK-O3-NEXT: idivq %rsi 963; CHECK-O3-NEXT: movq %rdx, %rax 964; CHECK-O3-NEXT: retq 965; CHECK-O3-NEXT: .LBB41_1: 966; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 967; CHECK-O3-NEXT: xorl %edx, %edx 968; CHECK-O3-NEXT: divl %esi 969; CHECK-O3-NEXT: movl %edx, %eax 970; CHECK-O3-NEXT: retq 971 %v = load atomic i64, i64* %p unordered, align 8 972 %ret = srem i64 %v, %v2 973 ret i64 %ret 974} 975 976define i64 @load_fold_srem3(i64* %p1, i64* %p2) { 977; CHECK-O0-LABEL: load_fold_srem3: 978; CHECK-O0: # %bb.0: 979; CHECK-O0-NEXT: movq (%rdi), %rax 980; CHECK-O0-NEXT: cqto 981; CHECK-O0-NEXT: idivq (%rsi) 982; CHECK-O0-NEXT: movq %rdx, %rax 983; CHECK-O0-NEXT: retq 984; 985; CHECK-O3-LABEL: load_fold_srem3: 986; CHECK-O3: # %bb.0: 987; CHECK-O3-NEXT: movq (%rdi), %rax 988; CHECK-O3-NEXT: movq (%rsi), %rcx 989; CHECK-O3-NEXT: movq %rax, %rdx 990; CHECK-O3-NEXT: orq %rcx, %rdx 991; CHECK-O3-NEXT: shrq $32, %rdx 992; CHECK-O3-NEXT: je .LBB42_1 993; CHECK-O3-NEXT: # %bb.2: 994; CHECK-O3-NEXT: cqto 995; CHECK-O3-NEXT: idivq %rcx 996; CHECK-O3-NEXT: movq %rdx, %rax 997; CHECK-O3-NEXT: retq 998; CHECK-O3-NEXT: .LBB42_1: 999; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1000; CHECK-O3-NEXT: xorl %edx, %edx 1001; CHECK-O3-NEXT: divl %ecx 1002; CHECK-O3-NEXT: movl %edx, %eax 1003; CHECK-O3-NEXT: retq 1004 %v = load atomic i64, i64* %p1 unordered, align 8 1005 %v2 = load atomic i64, i64* %p2 unordered, align 8 1006 %ret = srem i64 %v, %v2 1007 ret i64 %ret 1008} 1009 1010; Legal to fold (TODO) 1011define i64 @load_fold_urem1(i64* %p) { 1012; CHECK-O0-LABEL: load_fold_urem1: 1013; CHECK-O0: # %bb.0: 1014; CHECK-O0-NEXT: movq (%rdi), %rax 1015; CHECK-O0-NEXT: movl $15, %ecx 1016; CHECK-O0-NEXT: xorl %edx, %edx 1017; CHECK-O0-NEXT: # kill: def $rdx killed $edx 1018; CHECK-O0-NEXT: divq %rcx 1019; CHECK-O0-NEXT: movq %rdx, %rax 1020; CHECK-O0-NEXT: retq 1021; 1022; CHECK-O3-LABEL: load_fold_urem1: 1023; CHECK-O3: # %bb.0: 1024; CHECK-O3-NEXT: movq (%rdi), %rax 1025; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1026; CHECK-O3-NEXT: movq %rax, %rdx 1027; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rcx 1028; CHECK-O3-NEXT: shrq $3, %rcx 1029; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rcx 1030; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx 1031; CHECK-O3-NEXT: subq %rcx, %rax 1032; CHECK-O3-NEXT: retq 1033 %v = load atomic i64, i64* %p unordered, align 8 1034 %ret = urem i64 %v, 15 1035 ret i64 %ret 1036} 1037 1038; Legal, as expected 1039define i64 @load_fold_urem2(i64* %p, i64 %v2) { 1040; CHECK-O0-LABEL: load_fold_urem2: 1041; CHECK-O0: # %bb.0: 1042; CHECK-O0-NEXT: movq (%rdi), %rax 1043; CHECK-O0-NEXT: xorl %ecx, %ecx 1044; CHECK-O0-NEXT: movl %ecx, %edx 1045; CHECK-O0-NEXT: divq %rsi 1046; CHECK-O0-NEXT: movq %rdx, %rax 1047; CHECK-O0-NEXT: retq 1048; 1049; CHECK-O3-LABEL: load_fold_urem2: 1050; CHECK-O3: # %bb.0: 1051; CHECK-O3-NEXT: movq (%rdi), %rax 1052; CHECK-O3-NEXT: movq %rax, %rcx 1053; CHECK-O3-NEXT: orq %rsi, %rcx 1054; CHECK-O3-NEXT: shrq $32, %rcx 1055; CHECK-O3-NEXT: je .LBB44_1 1056; CHECK-O3-NEXT: # %bb.2: 1057; CHECK-O3-NEXT: xorl %edx, %edx 1058; CHECK-O3-NEXT: divq %rsi 1059; CHECK-O3-NEXT: movq %rdx, %rax 1060; CHECK-O3-NEXT: retq 1061; CHECK-O3-NEXT: .LBB44_1: 1062; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1063; CHECK-O3-NEXT: xorl %edx, %edx 1064; CHECK-O3-NEXT: divl %esi 1065; CHECK-O3-NEXT: movl %edx, %eax 1066; CHECK-O3-NEXT: retq 1067 %v = load atomic i64, i64* %p unordered, align 8 1068 %ret = urem i64 %v, %v2 1069 ret i64 %ret 1070} 1071 1072define i64 @load_fold_urem3(i64* %p1, i64* %p2) { 1073; CHECK-O0-LABEL: load_fold_urem3: 1074; CHECK-O0: # %bb.0: 1075; CHECK-O0-NEXT: movq (%rdi), %rax 1076; CHECK-O0-NEXT: xorl %ecx, %ecx 1077; CHECK-O0-NEXT: movl %ecx, %edx 1078; CHECK-O0-NEXT: divq (%rsi) 1079; CHECK-O0-NEXT: movq %rdx, %rax 1080; CHECK-O0-NEXT: retq 1081; 1082; CHECK-O3-LABEL: load_fold_urem3: 1083; CHECK-O3: # %bb.0: 1084; CHECK-O3-NEXT: movq (%rdi), %rax 1085; CHECK-O3-NEXT: movq (%rsi), %rcx 1086; CHECK-O3-NEXT: movq %rax, %rdx 1087; CHECK-O3-NEXT: orq %rcx, %rdx 1088; CHECK-O3-NEXT: shrq $32, %rdx 1089; CHECK-O3-NEXT: je .LBB45_1 1090; CHECK-O3-NEXT: # %bb.2: 1091; CHECK-O3-NEXT: xorl %edx, %edx 1092; CHECK-O3-NEXT: divq %rcx 1093; CHECK-O3-NEXT: movq %rdx, %rax 1094; CHECK-O3-NEXT: retq 1095; CHECK-O3-NEXT: .LBB45_1: 1096; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1097; CHECK-O3-NEXT: xorl %edx, %edx 1098; CHECK-O3-NEXT: divl %ecx 1099; CHECK-O3-NEXT: movl %edx, %eax 1100; CHECK-O3-NEXT: retq 1101 %v = load atomic i64, i64* %p1 unordered, align 8 1102 %v2 = load atomic i64, i64* %p2 unordered, align 8 1103 %ret = urem i64 %v, %v2 1104 ret i64 %ret 1105} 1106 1107; Legal, as expected 1108define i64 @load_fold_shl1(i64* %p) { 1109; CHECK-LABEL: load_fold_shl1: 1110; CHECK: # %bb.0: 1111; CHECK-NEXT: movq (%rdi), %rax 1112; CHECK-NEXT: shlq $15, %rax 1113; CHECK-NEXT: retq 1114 %v = load atomic i64, i64* %p unordered, align 8 1115 %ret = shl i64 %v, 15 1116 ret i64 %ret 1117} 1118 1119define i64 @load_fold_shl2(i64* %p, i64 %v2) { 1120; CHECK-O0-LABEL: load_fold_shl2: 1121; CHECK-O0: # %bb.0: 1122; CHECK-O0-NEXT: movq %rsi, %rcx 1123; CHECK-O0-NEXT: movq (%rdi), %rax 1124; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1125; CHECK-O0-NEXT: shlq %cl, %rax 1126; CHECK-O0-NEXT: retq 1127; 1128; CHECK-O3-LABEL: load_fold_shl2: 1129; CHECK-O3: # %bb.0: 1130; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax 1131; CHECK-O3-NEXT: retq 1132 %v = load atomic i64, i64* %p unordered, align 8 1133 %ret = shl i64 %v, %v2 1134 ret i64 %ret 1135} 1136 1137define i64 @load_fold_shl3(i64* %p1, i64* %p2) { 1138; CHECK-O0-LABEL: load_fold_shl3: 1139; CHECK-O0: # %bb.0: 1140; CHECK-O0-NEXT: movq (%rdi), %rax 1141; CHECK-O0-NEXT: movq (%rsi), %rcx 1142; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1143; CHECK-O0-NEXT: shlq %cl, %rax 1144; CHECK-O0-NEXT: retq 1145; 1146; CHECK-O3-LABEL: load_fold_shl3: 1147; CHECK-O3: # %bb.0: 1148; CHECK-O3-NEXT: movq (%rsi), %rax 1149; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax 1150; CHECK-O3-NEXT: retq 1151 %v = load atomic i64, i64* %p1 unordered, align 8 1152 %v2 = load atomic i64, i64* %p2 unordered, align 8 1153 %ret = shl i64 %v, %v2 1154 ret i64 %ret 1155} 1156 1157; Legal, as expected 1158define i64 @load_fold_lshr1(i64* %p) { 1159; CHECK-LABEL: load_fold_lshr1: 1160; CHECK: # %bb.0: 1161; CHECK-NEXT: movq (%rdi), %rax 1162; CHECK-NEXT: shrq $15, %rax 1163; CHECK-NEXT: retq 1164 %v = load atomic i64, i64* %p unordered, align 8 1165 %ret = lshr i64 %v, 15 1166 ret i64 %ret 1167} 1168 1169define i64 @load_fold_lshr2(i64* %p, i64 %v2) { 1170; CHECK-O0-LABEL: load_fold_lshr2: 1171; CHECK-O0: # %bb.0: 1172; CHECK-O0-NEXT: movq %rsi, %rcx 1173; CHECK-O0-NEXT: movq (%rdi), %rax 1174; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1175; CHECK-O0-NEXT: shrq %cl, %rax 1176; CHECK-O0-NEXT: retq 1177; 1178; CHECK-O3-LABEL: load_fold_lshr2: 1179; CHECK-O3: # %bb.0: 1180; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax 1181; CHECK-O3-NEXT: retq 1182 %v = load atomic i64, i64* %p unordered, align 8 1183 %ret = lshr i64 %v, %v2 1184 ret i64 %ret 1185} 1186 1187define i64 @load_fold_lshr3(i64* %p1, i64* %p2) { 1188; CHECK-O0-LABEL: load_fold_lshr3: 1189; CHECK-O0: # %bb.0: 1190; CHECK-O0-NEXT: movq (%rdi), %rax 1191; CHECK-O0-NEXT: movq (%rsi), %rcx 1192; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1193; CHECK-O0-NEXT: shrq %cl, %rax 1194; CHECK-O0-NEXT: retq 1195; 1196; CHECK-O3-LABEL: load_fold_lshr3: 1197; CHECK-O3: # %bb.0: 1198; CHECK-O3-NEXT: movq (%rsi), %rax 1199; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax 1200; CHECK-O3-NEXT: retq 1201 %v = load atomic i64, i64* %p1 unordered, align 8 1202 %v2 = load atomic i64, i64* %p2 unordered, align 8 1203 %ret = lshr i64 %v, %v2 1204 ret i64 %ret 1205} 1206 1207; Legal, as expected 1208define i64 @load_fold_ashr1(i64* %p) { 1209; CHECK-LABEL: load_fold_ashr1: 1210; CHECK: # %bb.0: 1211; CHECK-NEXT: movq (%rdi), %rax 1212; CHECK-NEXT: sarq $15, %rax 1213; CHECK-NEXT: retq 1214 %v = load atomic i64, i64* %p unordered, align 8 1215 %ret = ashr i64 %v, 15 1216 ret i64 %ret 1217} 1218 1219define i64 @load_fold_ashr2(i64* %p, i64 %v2) { 1220; CHECK-O0-LABEL: load_fold_ashr2: 1221; CHECK-O0: # %bb.0: 1222; CHECK-O0-NEXT: movq %rsi, %rcx 1223; CHECK-O0-NEXT: movq (%rdi), %rax 1224; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1225; CHECK-O0-NEXT: sarq %cl, %rax 1226; CHECK-O0-NEXT: retq 1227; 1228; CHECK-O3-LABEL: load_fold_ashr2: 1229; CHECK-O3: # %bb.0: 1230; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax 1231; CHECK-O3-NEXT: retq 1232 %v = load atomic i64, i64* %p unordered, align 8 1233 %ret = ashr i64 %v, %v2 1234 ret i64 %ret 1235} 1236 1237define i64 @load_fold_ashr3(i64* %p1, i64* %p2) { 1238; CHECK-O0-LABEL: load_fold_ashr3: 1239; CHECK-O0: # %bb.0: 1240; CHECK-O0-NEXT: movq (%rdi), %rax 1241; CHECK-O0-NEXT: movq (%rsi), %rcx 1242; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1243; CHECK-O0-NEXT: sarq %cl, %rax 1244; CHECK-O0-NEXT: retq 1245; 1246; CHECK-O3-LABEL: load_fold_ashr3: 1247; CHECK-O3: # %bb.0: 1248; CHECK-O3-NEXT: movq (%rsi), %rax 1249; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax 1250; CHECK-O3-NEXT: retq 1251 %v = load atomic i64, i64* %p1 unordered, align 8 1252 %v2 = load atomic i64, i64* %p2 unordered, align 8 1253 %ret = ashr i64 %v, %v2 1254 ret i64 %ret 1255} 1256 1257; Legal, as expected 1258define i64 @load_fold_and1(i64* %p) { 1259; CHECK-O0-LABEL: load_fold_and1: 1260; CHECK-O0: # %bb.0: 1261; CHECK-O0-NEXT: movq (%rdi), %rax 1262; CHECK-O0-NEXT: andq $15, %rax 1263; CHECK-O0-NEXT: retq 1264; 1265; CHECK-O3-LABEL: load_fold_and1: 1266; CHECK-O3: # %bb.0: 1267; CHECK-O3-NEXT: movq (%rdi), %rax 1268; CHECK-O3-NEXT: andl $15, %eax 1269; CHECK-O3-NEXT: retq 1270 %v = load atomic i64, i64* %p unordered, align 8 1271 %ret = and i64 %v, 15 1272 ret i64 %ret 1273} 1274 1275define i64 @load_fold_and2(i64* %p, i64 %v2) { 1276; CHECK-LABEL: load_fold_and2: 1277; CHECK: # %bb.0: 1278; CHECK-NEXT: movq %rsi, %rax 1279; CHECK-NEXT: andq (%rdi), %rax 1280; CHECK-NEXT: retq 1281 %v = load atomic i64, i64* %p unordered, align 8 1282 %ret = and i64 %v, %v2 1283 ret i64 %ret 1284} 1285 1286define i64 @load_fold_and3(i64* %p1, i64* %p2) { 1287; CHECK-O0-LABEL: load_fold_and3: 1288; CHECK-O0: # %bb.0: 1289; CHECK-O0-NEXT: movq (%rdi), %rax 1290; CHECK-O0-NEXT: andq (%rsi), %rax 1291; CHECK-O0-NEXT: retq 1292; 1293; CHECK-O3-CUR-LABEL: load_fold_and3: 1294; CHECK-O3-CUR: # %bb.0: 1295; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1296; CHECK-O3-CUR-NEXT: andq (%rdi), %rax 1297; CHECK-O3-CUR-NEXT: retq 1298; 1299; CHECK-O3-EX-LABEL: load_fold_and3: 1300; CHECK-O3-EX: # %bb.0: 1301; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1302; CHECK-O3-EX-NEXT: andq (%rsi), %rax 1303; CHECK-O3-EX-NEXT: retq 1304 %v = load atomic i64, i64* %p1 unordered, align 8 1305 %v2 = load atomic i64, i64* %p2 unordered, align 8 1306 %ret = and i64 %v, %v2 1307 ret i64 %ret 1308} 1309 1310; Legal, as expected 1311define i64 @load_fold_or1(i64* %p) { 1312; CHECK-LABEL: load_fold_or1: 1313; CHECK: # %bb.0: 1314; CHECK-NEXT: movq (%rdi), %rax 1315; CHECK-NEXT: orq $15, %rax 1316; CHECK-NEXT: retq 1317 %v = load atomic i64, i64* %p unordered, align 8 1318 %ret = or i64 %v, 15 1319 ret i64 %ret 1320} 1321 1322define i64 @load_fold_or2(i64* %p, i64 %v2) { 1323; CHECK-LABEL: load_fold_or2: 1324; CHECK: # %bb.0: 1325; CHECK-NEXT: movq %rsi, %rax 1326; CHECK-NEXT: orq (%rdi), %rax 1327; CHECK-NEXT: retq 1328 %v = load atomic i64, i64* %p unordered, align 8 1329 %ret = or i64 %v, %v2 1330 ret i64 %ret 1331} 1332 1333define i64 @load_fold_or3(i64* %p1, i64* %p2) { 1334; CHECK-O0-LABEL: load_fold_or3: 1335; CHECK-O0: # %bb.0: 1336; CHECK-O0-NEXT: movq (%rdi), %rax 1337; CHECK-O0-NEXT: orq (%rsi), %rax 1338; CHECK-O0-NEXT: retq 1339; 1340; CHECK-O3-CUR-LABEL: load_fold_or3: 1341; CHECK-O3-CUR: # %bb.0: 1342; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1343; CHECK-O3-CUR-NEXT: orq (%rdi), %rax 1344; CHECK-O3-CUR-NEXT: retq 1345; 1346; CHECK-O3-EX-LABEL: load_fold_or3: 1347; CHECK-O3-EX: # %bb.0: 1348; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1349; CHECK-O3-EX-NEXT: orq (%rsi), %rax 1350; CHECK-O3-EX-NEXT: retq 1351 %v = load atomic i64, i64* %p1 unordered, align 8 1352 %v2 = load atomic i64, i64* %p2 unordered, align 8 1353 %ret = or i64 %v, %v2 1354 ret i64 %ret 1355} 1356 1357; Legal, as expected 1358define i64 @load_fold_xor1(i64* %p) { 1359; CHECK-LABEL: load_fold_xor1: 1360; CHECK: # %bb.0: 1361; CHECK-NEXT: movq (%rdi), %rax 1362; CHECK-NEXT: xorq $15, %rax 1363; CHECK-NEXT: retq 1364 %v = load atomic i64, i64* %p unordered, align 8 1365 %ret = xor i64 %v, 15 1366 ret i64 %ret 1367} 1368 1369define i64 @load_fold_xor2(i64* %p, i64 %v2) { 1370; CHECK-LABEL: load_fold_xor2: 1371; CHECK: # %bb.0: 1372; CHECK-NEXT: movq %rsi, %rax 1373; CHECK-NEXT: xorq (%rdi), %rax 1374; CHECK-NEXT: retq 1375 %v = load atomic i64, i64* %p unordered, align 8 1376 %ret = xor i64 %v, %v2 1377 ret i64 %ret 1378} 1379 1380define i64 @load_fold_xor3(i64* %p1, i64* %p2) { 1381; CHECK-O0-LABEL: load_fold_xor3: 1382; CHECK-O0: # %bb.0: 1383; CHECK-O0-NEXT: movq (%rdi), %rax 1384; CHECK-O0-NEXT: xorq (%rsi), %rax 1385; CHECK-O0-NEXT: retq 1386; 1387; CHECK-O3-CUR-LABEL: load_fold_xor3: 1388; CHECK-O3-CUR: # %bb.0: 1389; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1390; CHECK-O3-CUR-NEXT: xorq (%rdi), %rax 1391; CHECK-O3-CUR-NEXT: retq 1392; 1393; CHECK-O3-EX-LABEL: load_fold_xor3: 1394; CHECK-O3-EX: # %bb.0: 1395; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1396; CHECK-O3-EX-NEXT: xorq (%rsi), %rax 1397; CHECK-O3-EX-NEXT: retq 1398 %v = load atomic i64, i64* %p1 unordered, align 8 1399 %v2 = load atomic i64, i64* %p2 unordered, align 8 1400 %ret = xor i64 %v, %v2 1401 ret i64 %ret 1402} 1403 1404define i1 @load_fold_icmp1(i64* %p) { 1405; CHECK-O0-LABEL: load_fold_icmp1: 1406; CHECK-O0: # %bb.0: 1407; CHECK-O0-NEXT: movq (%rdi), %rax 1408; CHECK-O0-NEXT: subq $15, %rax 1409; CHECK-O0-NEXT: sete %al 1410; CHECK-O0-NEXT: retq 1411; 1412; CHECK-O3-LABEL: load_fold_icmp1: 1413; CHECK-O3: # %bb.0: 1414; CHECK-O3-NEXT: cmpq $15, (%rdi) 1415; CHECK-O3-NEXT: sete %al 1416; CHECK-O3-NEXT: retq 1417 %v = load atomic i64, i64* %p unordered, align 8 1418 %ret = icmp eq i64 %v, 15 1419 ret i1 %ret 1420} 1421 1422define i1 @load_fold_icmp2(i64* %p, i64 %v2) { 1423; CHECK-O0-LABEL: load_fold_icmp2: 1424; CHECK-O0: # %bb.0: 1425; CHECK-O0-NEXT: movq (%rdi), %rax 1426; CHECK-O0-NEXT: subq %rsi, %rax 1427; CHECK-O0-NEXT: sete %al 1428; CHECK-O0-NEXT: retq 1429; 1430; CHECK-O3-LABEL: load_fold_icmp2: 1431; CHECK-O3: # %bb.0: 1432; CHECK-O3-NEXT: cmpq %rsi, (%rdi) 1433; CHECK-O3-NEXT: sete %al 1434; CHECK-O3-NEXT: retq 1435 %v = load atomic i64, i64* %p unordered, align 8 1436 %ret = icmp eq i64 %v, %v2 1437 ret i1 %ret 1438} 1439 1440define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { 1441; CHECK-O0-LABEL: load_fold_icmp3: 1442; CHECK-O0: # %bb.0: 1443; CHECK-O0-NEXT: movq (%rdi), %rax 1444; CHECK-O0-NEXT: movq (%rsi), %rcx 1445; CHECK-O0-NEXT: subq %rcx, %rax 1446; CHECK-O0-NEXT: sete %al 1447; CHECK-O0-NEXT: retq 1448; 1449; CHECK-O3-CUR-LABEL: load_fold_icmp3: 1450; CHECK-O3-CUR: # %bb.0: 1451; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1452; CHECK-O3-CUR-NEXT: cmpq %rax, (%rdi) 1453; CHECK-O3-CUR-NEXT: sete %al 1454; CHECK-O3-CUR-NEXT: retq 1455; 1456; CHECK-O3-EX-LABEL: load_fold_icmp3: 1457; CHECK-O3-EX: # %bb.0: 1458; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1459; CHECK-O3-EX-NEXT: cmpq (%rsi), %rax 1460; CHECK-O3-EX-NEXT: sete %al 1461; CHECK-O3-EX-NEXT: retq 1462 %v = load atomic i64, i64* %p1 unordered, align 8 1463 %v2 = load atomic i64, i64* %p2 unordered, align 8 1464 %ret = icmp eq i64 %v, %v2 1465 ret i1 %ret 1466} 1467 1468 1469;; The next batch of tests check for read-modify-write patterns 1470;; Legally, it's okay to use a memory operand here as long as the operand 1471;; is well aligned (i.e. doesn't cross a cache line boundary). We are 1472;; required not to narrow the store though! 1473 1474; Legal, as expected 1475define void @rmw_fold_add1(i64* %p, i64 %v) { 1476; CHECK-O0-LABEL: rmw_fold_add1: 1477; CHECK-O0: # %bb.0: 1478; CHECK-O0-NEXT: movq (%rdi), %rax 1479; CHECK-O0-NEXT: addq $15, %rax 1480; CHECK-O0-NEXT: movq %rax, (%rdi) 1481; CHECK-O0-NEXT: retq 1482; 1483; CHECK-O3-LABEL: rmw_fold_add1: 1484; CHECK-O3: # %bb.0: 1485; CHECK-O3-NEXT: addq $15, (%rdi) 1486; CHECK-O3-NEXT: retq 1487 %prev = load atomic i64, i64* %p unordered, align 8 1488 %val = add i64 %prev, 15 1489 store atomic i64 %val, i64* %p unordered, align 8 1490 ret void 1491} 1492 1493; Legal, as expected 1494define void @rmw_fold_add2(i64* %p, i64 %v) { 1495; CHECK-O0-LABEL: rmw_fold_add2: 1496; CHECK-O0: # %bb.0: 1497; CHECK-O0-NEXT: movq (%rdi), %rax 1498; CHECK-O0-NEXT: addq %rsi, %rax 1499; CHECK-O0-NEXT: movq %rax, (%rdi) 1500; CHECK-O0-NEXT: retq 1501; 1502; CHECK-O3-LABEL: rmw_fold_add2: 1503; CHECK-O3: # %bb.0: 1504; CHECK-O3-NEXT: addq %rsi, (%rdi) 1505; CHECK-O3-NEXT: retq 1506 %prev = load atomic i64, i64* %p unordered, align 8 1507 %val = add i64 %prev, %v 1508 store atomic i64 %val, i64* %p unordered, align 8 1509 ret void 1510} 1511 1512; Legal, as expected 1513define void @rmw_fold_sub1(i64* %p, i64 %v) { 1514; CHECK-O0-LABEL: rmw_fold_sub1: 1515; CHECK-O0: # %bb.0: 1516; CHECK-O0-NEXT: movq (%rdi), %rax 1517; CHECK-O0-NEXT: addq $-15, %rax 1518; CHECK-O0-NEXT: movq %rax, (%rdi) 1519; CHECK-O0-NEXT: retq 1520; 1521; CHECK-O3-LABEL: rmw_fold_sub1: 1522; CHECK-O3: # %bb.0: 1523; CHECK-O3-NEXT: addq $-15, (%rdi) 1524; CHECK-O3-NEXT: retq 1525 %prev = load atomic i64, i64* %p unordered, align 8 1526 %val = sub i64 %prev, 15 1527 store atomic i64 %val, i64* %p unordered, align 8 1528 ret void 1529} 1530 1531; Legal, as expected 1532define void @rmw_fold_sub2(i64* %p, i64 %v) { 1533; CHECK-O0-LABEL: rmw_fold_sub2: 1534; CHECK-O0: # %bb.0: 1535; CHECK-O0-NEXT: movq (%rdi), %rax 1536; CHECK-O0-NEXT: subq %rsi, %rax 1537; CHECK-O0-NEXT: movq %rax, (%rdi) 1538; CHECK-O0-NEXT: retq 1539; 1540; CHECK-O3-LABEL: rmw_fold_sub2: 1541; CHECK-O3: # %bb.0: 1542; CHECK-O3-NEXT: subq %rsi, (%rdi) 1543; CHECK-O3-NEXT: retq 1544 %prev = load atomic i64, i64* %p unordered, align 8 1545 %val = sub i64 %prev, %v 1546 store atomic i64 %val, i64* %p unordered, align 8 1547 ret void 1548} 1549 1550; Legal, as expected 1551define void @rmw_fold_mul1(i64* %p, i64 %v) { 1552; CHECK-LABEL: rmw_fold_mul1: 1553; CHECK: # %bb.0: 1554; CHECK-NEXT: movq (%rdi), %rax 1555; CHECK-NEXT: leaq (%rax,%rax,4), %rax 1556; CHECK-NEXT: leaq (%rax,%rax,2), %rax 1557; CHECK-NEXT: movq %rax, (%rdi) 1558; CHECK-NEXT: retq 1559 %prev = load atomic i64, i64* %p unordered, align 8 1560 %val = mul i64 %prev, 15 1561 store atomic i64 %val, i64* %p unordered, align 8 1562 ret void 1563} 1564 1565; Legal to fold (TODO) 1566define void @rmw_fold_mul2(i64* %p, i64 %v) { 1567; CHECK-O0-LABEL: rmw_fold_mul2: 1568; CHECK-O0: # %bb.0: 1569; CHECK-O0-NEXT: movq (%rdi), %rax 1570; CHECK-O0-NEXT: imulq %rsi, %rax 1571; CHECK-O0-NEXT: movq %rax, (%rdi) 1572; CHECK-O0-NEXT: retq 1573; 1574; CHECK-O3-LABEL: rmw_fold_mul2: 1575; CHECK-O3: # %bb.0: 1576; CHECK-O3-NEXT: imulq (%rdi), %rsi 1577; CHECK-O3-NEXT: movq %rsi, (%rdi) 1578; CHECK-O3-NEXT: retq 1579 %prev = load atomic i64, i64* %p unordered, align 8 1580 %val = mul i64 %prev, %v 1581 store atomic i64 %val, i64* %p unordered, align 8 1582 ret void 1583} 1584 1585; Legal, as expected 1586define void @rmw_fold_sdiv1(i64* %p, i64 %v) { 1587; CHECK-O0-LABEL: rmw_fold_sdiv1: 1588; CHECK-O0: # %bb.0: 1589; CHECK-O0-NEXT: movq (%rdi), %rcx 1590; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1591; CHECK-O0-NEXT: movq %rcx, %rax 1592; CHECK-O0-NEXT: imulq %rdx 1593; CHECK-O0-NEXT: movq %rdx, %rax 1594; CHECK-O0-NEXT: addq %rcx, %rax 1595; CHECK-O0-NEXT: movq %rax, %rcx 1596; CHECK-O0-NEXT: shrq $63, %rcx 1597; CHECK-O0-NEXT: sarq $3, %rax 1598; CHECK-O0-NEXT: addq %rcx, %rax 1599; CHECK-O0-NEXT: movq %rax, (%rdi) 1600; CHECK-O0-NEXT: retq 1601; 1602; CHECK-O3-LABEL: rmw_fold_sdiv1: 1603; CHECK-O3: # %bb.0: 1604; CHECK-O3-NEXT: movq (%rdi), %rcx 1605; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1606; CHECK-O3-NEXT: movq %rcx, %rax 1607; CHECK-O3-NEXT: imulq %rdx 1608; CHECK-O3-NEXT: addq %rcx, %rdx 1609; CHECK-O3-NEXT: movq %rdx, %rax 1610; CHECK-O3-NEXT: shrq $63, %rax 1611; CHECK-O3-NEXT: sarq $3, %rdx 1612; CHECK-O3-NEXT: addq %rax, %rdx 1613; CHECK-O3-NEXT: movq %rdx, (%rdi) 1614; CHECK-O3-NEXT: retq 1615 %prev = load atomic i64, i64* %p unordered, align 8 1616 %val = sdiv i64 %prev, 15 1617 store atomic i64 %val, i64* %p unordered, align 8 1618 ret void 1619} 1620 1621; Legal, as expected 1622define void @rmw_fold_sdiv2(i64* %p, i64 %v) { 1623; CHECK-O0-LABEL: rmw_fold_sdiv2: 1624; CHECK-O0: # %bb.0: 1625; CHECK-O0-NEXT: movq (%rdi), %rax 1626; CHECK-O0-NEXT: cqto 1627; CHECK-O0-NEXT: idivq %rsi 1628; CHECK-O0-NEXT: movq %rax, (%rdi) 1629; CHECK-O0-NEXT: retq 1630; 1631; CHECK-O3-LABEL: rmw_fold_sdiv2: 1632; CHECK-O3: # %bb.0: 1633; CHECK-O3-NEXT: movq (%rdi), %rax 1634; CHECK-O3-NEXT: movq %rax, %rcx 1635; CHECK-O3-NEXT: orq %rsi, %rcx 1636; CHECK-O3-NEXT: shrq $32, %rcx 1637; CHECK-O3-NEXT: je .LBB74_1 1638; CHECK-O3-NEXT: # %bb.2: 1639; CHECK-O3-NEXT: cqto 1640; CHECK-O3-NEXT: idivq %rsi 1641; CHECK-O3-NEXT: movq %rax, (%rdi) 1642; CHECK-O3-NEXT: retq 1643; CHECK-O3-NEXT: .LBB74_1: 1644; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1645; CHECK-O3-NEXT: xorl %edx, %edx 1646; CHECK-O3-NEXT: divl %esi 1647; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 1648; CHECK-O3-NEXT: movq %rax, (%rdi) 1649; CHECK-O3-NEXT: retq 1650 %prev = load atomic i64, i64* %p unordered, align 8 1651 %val = sdiv i64 %prev, %v 1652 store atomic i64 %val, i64* %p unordered, align 8 1653 ret void 1654} 1655 1656; Legal, as expected 1657define void @rmw_fold_udiv1(i64* %p, i64 %v) { 1658; CHECK-O0-LABEL: rmw_fold_udiv1: 1659; CHECK-O0: # %bb.0: 1660; CHECK-O0-NEXT: movq (%rdi), %rdx 1661; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1662; CHECK-O0-NEXT: mulxq %rax, %rax, %rax 1663; CHECK-O0-NEXT: shrq $3, %rax 1664; CHECK-O0-NEXT: movq %rax, (%rdi) 1665; CHECK-O0-NEXT: retq 1666; 1667; CHECK-O3-CUR-LABEL: rmw_fold_udiv1: 1668; CHECK-O3-CUR: # %bb.0: 1669; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx 1670; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1671; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax 1672; CHECK-O3-CUR-NEXT: shrq $3, %rax 1673; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1674; CHECK-O3-CUR-NEXT: retq 1675; 1676; CHECK-O3-EX-LABEL: rmw_fold_udiv1: 1677; CHECK-O3-EX: # %bb.0: 1678; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1679; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax 1680; CHECK-O3-EX-NEXT: shrq $3, %rax 1681; CHECK-O3-EX-NEXT: movq %rax, (%rdi) 1682; CHECK-O3-EX-NEXT: retq 1683 %prev = load atomic i64, i64* %p unordered, align 8 1684 %val = udiv i64 %prev, 15 1685 store atomic i64 %val, i64* %p unordered, align 8 1686 ret void 1687} 1688 1689; Legal, as expected 1690define void @rmw_fold_udiv2(i64* %p, i64 %v) { 1691; CHECK-O0-LABEL: rmw_fold_udiv2: 1692; CHECK-O0: # %bb.0: 1693; CHECK-O0-NEXT: movq (%rdi), %rax 1694; CHECK-O0-NEXT: xorl %ecx, %ecx 1695; CHECK-O0-NEXT: movl %ecx, %edx 1696; CHECK-O0-NEXT: divq %rsi 1697; CHECK-O0-NEXT: movq %rax, (%rdi) 1698; CHECK-O0-NEXT: retq 1699; 1700; CHECK-O3-LABEL: rmw_fold_udiv2: 1701; CHECK-O3: # %bb.0: 1702; CHECK-O3-NEXT: movq (%rdi), %rax 1703; CHECK-O3-NEXT: movq %rax, %rcx 1704; CHECK-O3-NEXT: orq %rsi, %rcx 1705; CHECK-O3-NEXT: shrq $32, %rcx 1706; CHECK-O3-NEXT: je .LBB76_1 1707; CHECK-O3-NEXT: # %bb.2: 1708; CHECK-O3-NEXT: xorl %edx, %edx 1709; CHECK-O3-NEXT: divq %rsi 1710; CHECK-O3-NEXT: movq %rax, (%rdi) 1711; CHECK-O3-NEXT: retq 1712; CHECK-O3-NEXT: .LBB76_1: 1713; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1714; CHECK-O3-NEXT: xorl %edx, %edx 1715; CHECK-O3-NEXT: divl %esi 1716; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 1717; CHECK-O3-NEXT: movq %rax, (%rdi) 1718; CHECK-O3-NEXT: retq 1719 %prev = load atomic i64, i64* %p unordered, align 8 1720 %val = udiv i64 %prev, %v 1721 store atomic i64 %val, i64* %p unordered, align 8 1722 ret void 1723} 1724 1725; Legal, as expected 1726define void @rmw_fold_srem1(i64* %p, i64 %v) { 1727; CHECK-O0-LABEL: rmw_fold_srem1: 1728; CHECK-O0: # %bb.0: 1729; CHECK-O0-NEXT: movq (%rdi), %rax 1730; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1731; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1732; CHECK-O0-NEXT: imulq %rcx 1733; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 1734; CHECK-O0-NEXT: movq %rdx, %rcx 1735; CHECK-O0-NEXT: addq %rax, %rcx 1736; CHECK-O0-NEXT: movq %rcx, %rdx 1737; CHECK-O0-NEXT: shrq $63, %rdx 1738; CHECK-O0-NEXT: sarq $3, %rcx 1739; CHECK-O0-NEXT: addq %rdx, %rcx 1740; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx 1741; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx 1742; CHECK-O0-NEXT: subq %rcx, %rax 1743; CHECK-O0-NEXT: movq %rax, (%rdi) 1744; CHECK-O0-NEXT: retq 1745; 1746; CHECK-O3-LABEL: rmw_fold_srem1: 1747; CHECK-O3: # %bb.0: 1748; CHECK-O3-NEXT: movq (%rdi), %rcx 1749; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1750; CHECK-O3-NEXT: movq %rcx, %rax 1751; CHECK-O3-NEXT: imulq %rdx 1752; CHECK-O3-NEXT: addq %rcx, %rdx 1753; CHECK-O3-NEXT: movq %rdx, %rax 1754; CHECK-O3-NEXT: shrq $63, %rax 1755; CHECK-O3-NEXT: sarq $3, %rdx 1756; CHECK-O3-NEXT: addq %rax, %rdx 1757; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax 1758; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 1759; CHECK-O3-NEXT: subq %rax, %rcx 1760; CHECK-O3-NEXT: movq %rcx, (%rdi) 1761; CHECK-O3-NEXT: retq 1762 %prev = load atomic i64, i64* %p unordered, align 8 1763 %val = srem i64 %prev, 15 1764 store atomic i64 %val, i64* %p unordered, align 8 1765 ret void 1766} 1767 1768; Legal, as expected 1769define void @rmw_fold_srem2(i64* %p, i64 %v) { 1770; CHECK-O0-LABEL: rmw_fold_srem2: 1771; CHECK-O0: # %bb.0: 1772; CHECK-O0-NEXT: movq (%rdi), %rax 1773; CHECK-O0-NEXT: cqto 1774; CHECK-O0-NEXT: idivq %rsi 1775; CHECK-O0-NEXT: movq %rdx, (%rdi) 1776; CHECK-O0-NEXT: retq 1777; 1778; CHECK-O3-LABEL: rmw_fold_srem2: 1779; CHECK-O3: # %bb.0: 1780; CHECK-O3-NEXT: movq (%rdi), %rax 1781; CHECK-O3-NEXT: movq %rax, %rcx 1782; CHECK-O3-NEXT: orq %rsi, %rcx 1783; CHECK-O3-NEXT: shrq $32, %rcx 1784; CHECK-O3-NEXT: je .LBB78_1 1785; CHECK-O3-NEXT: # %bb.2: 1786; CHECK-O3-NEXT: cqto 1787; CHECK-O3-NEXT: idivq %rsi 1788; CHECK-O3-NEXT: movq %rdx, (%rdi) 1789; CHECK-O3-NEXT: retq 1790; CHECK-O3-NEXT: .LBB78_1: 1791; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1792; CHECK-O3-NEXT: xorl %edx, %edx 1793; CHECK-O3-NEXT: divl %esi 1794; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx 1795; CHECK-O3-NEXT: movq %rdx, (%rdi) 1796; CHECK-O3-NEXT: retq 1797 %prev = load atomic i64, i64* %p unordered, align 8 1798 %val = srem i64 %prev, %v 1799 store atomic i64 %val, i64* %p unordered, align 8 1800 ret void 1801} 1802 1803; Legal, as expected 1804define void @rmw_fold_urem1(i64* %p, i64 %v) { 1805; CHECK-O0-LABEL: rmw_fold_urem1: 1806; CHECK-O0: # %bb.0: 1807; CHECK-O0-NEXT: movq (%rdi), %rax 1808; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1809; CHECK-O0-NEXT: movq %rax, %rdx 1810; CHECK-O0-NEXT: mulxq %rcx, %rcx, %rcx 1811; CHECK-O0-NEXT: shrq $3, %rcx 1812; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx 1813; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx 1814; CHECK-O0-NEXT: subq %rcx, %rax 1815; CHECK-O0-NEXT: movq %rax, (%rdi) 1816; CHECK-O0-NEXT: retq 1817; 1818; CHECK-O3-LABEL: rmw_fold_urem1: 1819; CHECK-O3: # %bb.0: 1820; CHECK-O3-NEXT: movq (%rdi), %rdx 1821; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1822; CHECK-O3-NEXT: mulxq %rax, %rax, %rax 1823; CHECK-O3-NEXT: shrq $3, %rax 1824; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax 1825; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 1826; CHECK-O3-NEXT: subq %rax, %rdx 1827; CHECK-O3-NEXT: movq %rdx, (%rdi) 1828; CHECK-O3-NEXT: retq 1829 %prev = load atomic i64, i64* %p unordered, align 8 1830 %val = urem i64 %prev, 15 1831 store atomic i64 %val, i64* %p unordered, align 8 1832 ret void 1833} 1834 1835; Legal, as expected 1836define void @rmw_fold_urem2(i64* %p, i64 %v) { 1837; CHECK-O0-LABEL: rmw_fold_urem2: 1838; CHECK-O0: # %bb.0: 1839; CHECK-O0-NEXT: movq (%rdi), %rax 1840; CHECK-O0-NEXT: xorl %ecx, %ecx 1841; CHECK-O0-NEXT: movl %ecx, %edx 1842; CHECK-O0-NEXT: divq %rsi 1843; CHECK-O0-NEXT: movq %rdx, (%rdi) 1844; CHECK-O0-NEXT: retq 1845; 1846; CHECK-O3-LABEL: rmw_fold_urem2: 1847; CHECK-O3: # %bb.0: 1848; CHECK-O3-NEXT: movq (%rdi), %rax 1849; CHECK-O3-NEXT: movq %rax, %rcx 1850; CHECK-O3-NEXT: orq %rsi, %rcx 1851; CHECK-O3-NEXT: shrq $32, %rcx 1852; CHECK-O3-NEXT: je .LBB80_1 1853; CHECK-O3-NEXT: # %bb.2: 1854; CHECK-O3-NEXT: xorl %edx, %edx 1855; CHECK-O3-NEXT: divq %rsi 1856; CHECK-O3-NEXT: movq %rdx, (%rdi) 1857; CHECK-O3-NEXT: retq 1858; CHECK-O3-NEXT: .LBB80_1: 1859; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1860; CHECK-O3-NEXT: xorl %edx, %edx 1861; CHECK-O3-NEXT: divl %esi 1862; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx 1863; CHECK-O3-NEXT: movq %rdx, (%rdi) 1864; CHECK-O3-NEXT: retq 1865 %prev = load atomic i64, i64* %p unordered, align 8 1866 %val = urem i64 %prev, %v 1867 store atomic i64 %val, i64* %p unordered, align 8 1868 ret void 1869} 1870 1871; Legal to fold (TODO) 1872define void @rmw_fold_shl1(i64* %p, i64 %v) { 1873; CHECK-O0-LABEL: rmw_fold_shl1: 1874; CHECK-O0: # %bb.0: 1875; CHECK-O0-NEXT: movq (%rdi), %rax 1876; CHECK-O0-NEXT: shlq $15, %rax 1877; CHECK-O0-NEXT: movq %rax, (%rdi) 1878; CHECK-O0-NEXT: retq 1879; 1880; CHECK-O3-CUR-LABEL: rmw_fold_shl1: 1881; CHECK-O3-CUR: # %bb.0: 1882; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 1883; CHECK-O3-CUR-NEXT: shlq $15, %rax 1884; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1885; CHECK-O3-CUR-NEXT: retq 1886; 1887; CHECK-O3-EX-LABEL: rmw_fold_shl1: 1888; CHECK-O3-EX: # %bb.0: 1889; CHECK-O3-EX-NEXT: shlq $15, (%rdi) 1890; CHECK-O3-EX-NEXT: retq 1891 %prev = load atomic i64, i64* %p unordered, align 8 1892 %val = shl i64 %prev, 15 1893 store atomic i64 %val, i64* %p unordered, align 8 1894 ret void 1895} 1896 1897; Legal to fold (TODO) 1898define void @rmw_fold_shl2(i64* %p, i64 %v) { 1899; CHECK-O0-LABEL: rmw_fold_shl2: 1900; CHECK-O0: # %bb.0: 1901; CHECK-O0-NEXT: movq (%rdi), %rax 1902; CHECK-O0-NEXT: movb %sil, %dl 1903; CHECK-O0-NEXT: # implicit-def: $rcx 1904; CHECK-O0-NEXT: movb %dl, %cl 1905; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax 1906; CHECK-O0-NEXT: movq %rax, (%rdi) 1907; CHECK-O0-NEXT: retq 1908; 1909; CHECK-O3-CUR-LABEL: rmw_fold_shl2: 1910; CHECK-O3-CUR: # %bb.0: 1911; CHECK-O3-CUR-NEXT: shlxq %rsi, (%rdi), %rax 1912; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1913; CHECK-O3-CUR-NEXT: retq 1914; 1915; CHECK-O3-EX-LABEL: rmw_fold_shl2: 1916; CHECK-O3-EX: # %bb.0: 1917; CHECK-O3-EX-NEXT: movq %rsi, %rcx 1918; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx 1919; CHECK-O3-EX-NEXT: shlq %cl, (%rdi) 1920; CHECK-O3-EX-NEXT: retq 1921 %prev = load atomic i64, i64* %p unordered, align 8 1922 %val = shl i64 %prev, %v 1923 store atomic i64 %val, i64* %p unordered, align 8 1924 ret void 1925} 1926 1927; Legal to fold (TODO) 1928define void @rmw_fold_lshr1(i64* %p, i64 %v) { 1929; CHECK-O0-LABEL: rmw_fold_lshr1: 1930; CHECK-O0: # %bb.0: 1931; CHECK-O0-NEXT: movq (%rdi), %rax 1932; CHECK-O0-NEXT: shrq $15, %rax 1933; CHECK-O0-NEXT: movq %rax, (%rdi) 1934; CHECK-O0-NEXT: retq 1935; 1936; CHECK-O3-CUR-LABEL: rmw_fold_lshr1: 1937; CHECK-O3-CUR: # %bb.0: 1938; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 1939; CHECK-O3-CUR-NEXT: shrq $15, %rax 1940; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1941; CHECK-O3-CUR-NEXT: retq 1942; 1943; CHECK-O3-EX-LABEL: rmw_fold_lshr1: 1944; CHECK-O3-EX: # %bb.0: 1945; CHECK-O3-EX-NEXT: shrq $15, (%rdi) 1946; CHECK-O3-EX-NEXT: retq 1947 %prev = load atomic i64, i64* %p unordered, align 8 1948 %val = lshr i64 %prev, 15 1949 store atomic i64 %val, i64* %p unordered, align 8 1950 ret void 1951} 1952 1953; Legal to fold (TODO) 1954define void @rmw_fold_lshr2(i64* %p, i64 %v) { 1955; CHECK-O0-LABEL: rmw_fold_lshr2: 1956; CHECK-O0: # %bb.0: 1957; CHECK-O0-NEXT: movq (%rdi), %rax 1958; CHECK-O0-NEXT: movb %sil, %dl 1959; CHECK-O0-NEXT: # implicit-def: $rcx 1960; CHECK-O0-NEXT: movb %dl, %cl 1961; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax 1962; CHECK-O0-NEXT: movq %rax, (%rdi) 1963; CHECK-O0-NEXT: retq 1964; 1965; CHECK-O3-CUR-LABEL: rmw_fold_lshr2: 1966; CHECK-O3-CUR: # %bb.0: 1967; CHECK-O3-CUR-NEXT: shrxq %rsi, (%rdi), %rax 1968; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1969; CHECK-O3-CUR-NEXT: retq 1970; 1971; CHECK-O3-EX-LABEL: rmw_fold_lshr2: 1972; CHECK-O3-EX: # %bb.0: 1973; CHECK-O3-EX-NEXT: movq %rsi, %rcx 1974; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx 1975; CHECK-O3-EX-NEXT: shrq %cl, (%rdi) 1976; CHECK-O3-EX-NEXT: retq 1977 %prev = load atomic i64, i64* %p unordered, align 8 1978 %val = lshr i64 %prev, %v 1979 store atomic i64 %val, i64* %p unordered, align 8 1980 ret void 1981} 1982 1983; Legal to fold (TODO) 1984define void @rmw_fold_ashr1(i64* %p, i64 %v) { 1985; CHECK-O0-LABEL: rmw_fold_ashr1: 1986; CHECK-O0: # %bb.0: 1987; CHECK-O0-NEXT: movq (%rdi), %rax 1988; CHECK-O0-NEXT: sarq $15, %rax 1989; CHECK-O0-NEXT: movq %rax, (%rdi) 1990; CHECK-O0-NEXT: retq 1991; 1992; CHECK-O3-CUR-LABEL: rmw_fold_ashr1: 1993; CHECK-O3-CUR: # %bb.0: 1994; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 1995; CHECK-O3-CUR-NEXT: sarq $15, %rax 1996; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1997; CHECK-O3-CUR-NEXT: retq 1998; 1999; CHECK-O3-EX-LABEL: rmw_fold_ashr1: 2000; CHECK-O3-EX: # %bb.0: 2001; CHECK-O3-EX-NEXT: sarq $15, (%rdi) 2002; CHECK-O3-EX-NEXT: retq 2003 %prev = load atomic i64, i64* %p unordered, align 8 2004 %val = ashr i64 %prev, 15 2005 store atomic i64 %val, i64* %p unordered, align 8 2006 ret void 2007} 2008 2009; Legal to fold (TODO) 2010define void @rmw_fold_ashr2(i64* %p, i64 %v) { 2011; CHECK-O0-LABEL: rmw_fold_ashr2: 2012; CHECK-O0: # %bb.0: 2013; CHECK-O0-NEXT: movq (%rdi), %rax 2014; CHECK-O0-NEXT: movb %sil, %dl 2015; CHECK-O0-NEXT: # implicit-def: $rcx 2016; CHECK-O0-NEXT: movb %dl, %cl 2017; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax 2018; CHECK-O0-NEXT: movq %rax, (%rdi) 2019; CHECK-O0-NEXT: retq 2020; 2021; CHECK-O3-CUR-LABEL: rmw_fold_ashr2: 2022; CHECK-O3-CUR: # %bb.0: 2023; CHECK-O3-CUR-NEXT: sarxq %rsi, (%rdi), %rax 2024; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 2025; CHECK-O3-CUR-NEXT: retq 2026; 2027; CHECK-O3-EX-LABEL: rmw_fold_ashr2: 2028; CHECK-O3-EX: # %bb.0: 2029; CHECK-O3-EX-NEXT: movq %rsi, %rcx 2030; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx 2031; CHECK-O3-EX-NEXT: sarq %cl, (%rdi) 2032; CHECK-O3-EX-NEXT: retq 2033 %prev = load atomic i64, i64* %p unordered, align 8 2034 %val = ashr i64 %prev, %v 2035 store atomic i64 %val, i64* %p unordered, align 8 2036 ret void 2037} 2038 2039; Legal, as expected 2040define void @rmw_fold_and1(i64* %p, i64 %v) { 2041; CHECK-O0-LABEL: rmw_fold_and1: 2042; CHECK-O0: # %bb.0: 2043; CHECK-O0-NEXT: movq (%rdi), %rax 2044; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2045; CHECK-O0-NEXT: andl $15, %eax 2046; CHECK-O0-NEXT: # kill: def $rax killed $eax 2047; CHECK-O0-NEXT: movq %rax, (%rdi) 2048; CHECK-O0-NEXT: retq 2049; 2050; CHECK-O3-LABEL: rmw_fold_and1: 2051; CHECK-O3: # %bb.0: 2052; CHECK-O3-NEXT: andq $15, (%rdi) 2053; CHECK-O3-NEXT: retq 2054 %prev = load atomic i64, i64* %p unordered, align 8 2055 %val = and i64 %prev, 15 2056 store atomic i64 %val, i64* %p unordered, align 8 2057 ret void 2058} 2059 2060; Legal, as expected 2061define void @rmw_fold_and2(i64* %p, i64 %v) { 2062; CHECK-O0-LABEL: rmw_fold_and2: 2063; CHECK-O0: # %bb.0: 2064; CHECK-O0-NEXT: movq (%rdi), %rax 2065; CHECK-O0-NEXT: andq %rsi, %rax 2066; CHECK-O0-NEXT: movq %rax, (%rdi) 2067; CHECK-O0-NEXT: retq 2068; 2069; CHECK-O3-LABEL: rmw_fold_and2: 2070; CHECK-O3: # %bb.0: 2071; CHECK-O3-NEXT: andq %rsi, (%rdi) 2072; CHECK-O3-NEXT: retq 2073 %prev = load atomic i64, i64* %p unordered, align 8 2074 %val = and i64 %prev, %v 2075 store atomic i64 %val, i64* %p unordered, align 8 2076 ret void 2077} 2078 2079; Legal, as expected 2080define void @rmw_fold_or1(i64* %p, i64 %v) { 2081; CHECK-O0-LABEL: rmw_fold_or1: 2082; CHECK-O0: # %bb.0: 2083; CHECK-O0-NEXT: movq (%rdi), %rax 2084; CHECK-O0-NEXT: orq $15, %rax 2085; CHECK-O0-NEXT: movq %rax, (%rdi) 2086; CHECK-O0-NEXT: retq 2087; 2088; CHECK-O3-LABEL: rmw_fold_or1: 2089; CHECK-O3: # %bb.0: 2090; CHECK-O3-NEXT: orq $15, (%rdi) 2091; CHECK-O3-NEXT: retq 2092 %prev = load atomic i64, i64* %p unordered, align 8 2093 %val = or i64 %prev, 15 2094 store atomic i64 %val, i64* %p unordered, align 8 2095 ret void 2096} 2097 2098; Legal, as expected 2099define void @rmw_fold_or2(i64* %p, i64 %v) { 2100; CHECK-O0-LABEL: rmw_fold_or2: 2101; CHECK-O0: # %bb.0: 2102; CHECK-O0-NEXT: movq (%rdi), %rax 2103; CHECK-O0-NEXT: orq %rsi, %rax 2104; CHECK-O0-NEXT: movq %rax, (%rdi) 2105; CHECK-O0-NEXT: retq 2106; 2107; CHECK-O3-LABEL: rmw_fold_or2: 2108; CHECK-O3: # %bb.0: 2109; CHECK-O3-NEXT: orq %rsi, (%rdi) 2110; CHECK-O3-NEXT: retq 2111 %prev = load atomic i64, i64* %p unordered, align 8 2112 %val = or i64 %prev, %v 2113 store atomic i64 %val, i64* %p unordered, align 8 2114 ret void 2115} 2116 2117; Legal, as expected 2118define void @rmw_fold_xor1(i64* %p, i64 %v) { 2119; CHECK-O0-LABEL: rmw_fold_xor1: 2120; CHECK-O0: # %bb.0: 2121; CHECK-O0-NEXT: movq (%rdi), %rax 2122; CHECK-O0-NEXT: xorq $15, %rax 2123; CHECK-O0-NEXT: movq %rax, (%rdi) 2124; CHECK-O0-NEXT: retq 2125; 2126; CHECK-O3-LABEL: rmw_fold_xor1: 2127; CHECK-O3: # %bb.0: 2128; CHECK-O3-NEXT: xorq $15, (%rdi) 2129; CHECK-O3-NEXT: retq 2130 %prev = load atomic i64, i64* %p unordered, align 8 2131 %val = xor i64 %prev, 15 2132 store atomic i64 %val, i64* %p unordered, align 8 2133 ret void 2134} 2135 2136; Legal, as expected 2137define void @rmw_fold_xor2(i64* %p, i64 %v) { 2138; CHECK-O0-LABEL: rmw_fold_xor2: 2139; CHECK-O0: # %bb.0: 2140; CHECK-O0-NEXT: movq (%rdi), %rax 2141; CHECK-O0-NEXT: xorq %rsi, %rax 2142; CHECK-O0-NEXT: movq %rax, (%rdi) 2143; CHECK-O0-NEXT: retq 2144; 2145; CHECK-O3-LABEL: rmw_fold_xor2: 2146; CHECK-O3: # %bb.0: 2147; CHECK-O3-NEXT: xorq %rsi, (%rdi) 2148; CHECK-O3-NEXT: retq 2149 %prev = load atomic i64, i64* %p unordered, align 8 2150 %val = xor i64 %prev, %v 2151 store atomic i64 %val, i64* %p unordered, align 8 2152 ret void 2153} 2154 2155;; The next batch test truncations, in combination w/operations which could 2156;; be folded against the memory operation. 2157 2158; Legal to reduce the load width (TODO) 2159define i32 @fold_trunc(i64* %p) { 2160; CHECK-LABEL: fold_trunc: 2161; CHECK: # %bb.0: 2162; CHECK-NEXT: movq (%rdi), %rax 2163; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 2164; CHECK-NEXT: retq 2165 %v = load atomic i64, i64* %p unordered, align 8 2166 %ret = trunc i64 %v to i32 2167 ret i32 %ret 2168} 2169 2170; Legal to reduce the load width and fold the load (TODO) 2171define i32 @fold_trunc_add(i64* %p, i32 %v2) { 2172; CHECK-O0-LABEL: fold_trunc_add: 2173; CHECK-O0: # %bb.0: 2174; CHECK-O0-NEXT: movq (%rdi), %rax 2175; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2176; CHECK-O0-NEXT: addl %esi, %eax 2177; CHECK-O0-NEXT: retq 2178; 2179; CHECK-O3-LABEL: fold_trunc_add: 2180; CHECK-O3: # %bb.0: 2181; CHECK-O3-NEXT: movq (%rdi), %rax 2182; CHECK-O3-NEXT: addl %esi, %eax 2183; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 2184; CHECK-O3-NEXT: retq 2185 %v = load atomic i64, i64* %p unordered, align 8 2186 %trunc = trunc i64 %v to i32 2187 %ret = add i32 %trunc, %v2 2188 ret i32 %ret 2189} 2190 2191; Legal to reduce the load width and fold the load (TODO) 2192define i32 @fold_trunc_and(i64* %p, i32 %v2) { 2193; CHECK-O0-LABEL: fold_trunc_and: 2194; CHECK-O0: # %bb.0: 2195; CHECK-O0-NEXT: movq (%rdi), %rax 2196; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2197; CHECK-O0-NEXT: andl %esi, %eax 2198; CHECK-O0-NEXT: retq 2199; 2200; CHECK-O3-LABEL: fold_trunc_and: 2201; CHECK-O3: # %bb.0: 2202; CHECK-O3-NEXT: movq (%rdi), %rax 2203; CHECK-O3-NEXT: andl %esi, %eax 2204; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 2205; CHECK-O3-NEXT: retq 2206 %v = load atomic i64, i64* %p unordered, align 8 2207 %trunc = trunc i64 %v to i32 2208 %ret = and i32 %trunc, %v2 2209 ret i32 %ret 2210} 2211 2212; Legal to reduce the load width and fold the load (TODO) 2213define i32 @fold_trunc_or(i64* %p, i32 %v2) { 2214; CHECK-O0-LABEL: fold_trunc_or: 2215; CHECK-O0: # %bb.0: 2216; CHECK-O0-NEXT: movq (%rdi), %rax 2217; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2218; CHECK-O0-NEXT: orl %esi, %eax 2219; CHECK-O0-NEXT: retq 2220; 2221; CHECK-O3-LABEL: fold_trunc_or: 2222; CHECK-O3: # %bb.0: 2223; CHECK-O3-NEXT: movq (%rdi), %rax 2224; CHECK-O3-NEXT: orl %esi, %eax 2225; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 2226; CHECK-O3-NEXT: retq 2227 %v = load atomic i64, i64* %p unordered, align 8 2228 %trunc = trunc i64 %v to i32 2229 %ret = or i32 %trunc, %v2 2230 ret i32 %ret 2231} 2232 2233; It's tempting to split the wide load into two smaller byte loads 2234; to reduce memory traffic, but this would be illegal for a atomic load 2235define i32 @split_load(i64* %p) { 2236; CHECK-O0-LABEL: split_load: 2237; CHECK-O0: # %bb.0: 2238; CHECK-O0-NEXT: movq (%rdi), %rcx 2239; CHECK-O0-NEXT: movb %cl, %al 2240; CHECK-O0-NEXT: shrq $32, %rcx 2241; CHECK-O0-NEXT: # kill: def $cl killed $cl killed $rcx 2242; CHECK-O0-NEXT: orb %cl, %al 2243; CHECK-O0-NEXT: movzbl %al, %eax 2244; CHECK-O0-NEXT: retq 2245; 2246; CHECK-O3-LABEL: split_load: 2247; CHECK-O3: # %bb.0: 2248; CHECK-O3-NEXT: movq (%rdi), %rax 2249; CHECK-O3-NEXT: movq %rax, %rcx 2250; CHECK-O3-NEXT: shrq $32, %rcx 2251; CHECK-O3-NEXT: orl %eax, %ecx 2252; CHECK-O3-NEXT: movzbl %cl, %eax 2253; CHECK-O3-NEXT: retq 2254 %v = load atomic i64, i64* %p unordered, align 8 2255 %b1 = trunc i64 %v to i8 2256 %v.shift = lshr i64 %v, 32 2257 %b2 = trunc i64 %v.shift to i8 2258 %or = or i8 %b1, %b2 2259 %ret = zext i8 %or to i32 2260 ret i32 %ret 2261} 2262 2263;; A collection of simple memory forwarding tests. Nothing particular 2264;; interesting semantic wise, just demonstrating obvious missed transforms. 2265 2266@Zero = constant i64 0 2267 2268; TODO: should return constant 2269define i64 @constant_folding(i64* %p) { 2270; CHECK-LABEL: constant_folding: 2271; CHECK: # %bb.0: 2272; CHECK-NEXT: movq (%rdi), %rax 2273; CHECK-NEXT: retq 2274 %v = load atomic i64, i64* %p unordered, align 8 2275 ret i64 %v 2276} 2277 2278; Legal to forward and fold (TODO) 2279define i64 @load_forwarding(i64* %p) { 2280; CHECK-LABEL: load_forwarding: 2281; CHECK: # %bb.0: 2282; CHECK-NEXT: movq (%rdi), %rax 2283; CHECK-NEXT: orq (%rdi), %rax 2284; CHECK-NEXT: retq 2285 %v = load atomic i64, i64* %p unordered, align 8 2286 %v2 = load atomic i64, i64* %p unordered, align 8 2287 %ret = or i64 %v, %v2 2288 ret i64 %ret 2289} 2290 2291; Legal to forward (TODO) 2292define i64 @store_forward(i64* %p, i64 %v) { 2293; CHECK-LABEL: store_forward: 2294; CHECK: # %bb.0: 2295; CHECK-NEXT: movq %rsi, (%rdi) 2296; CHECK-NEXT: movq (%rdi), %rax 2297; CHECK-NEXT: retq 2298 store atomic i64 %v, i64* %p unordered, align 8 2299 %ret = load atomic i64, i64* %p unordered, align 8 2300 ret i64 %ret 2301} 2302 2303; Legal to kill (TODO) 2304define void @dead_writeback(i64* %p) { 2305; CHECK-LABEL: dead_writeback: 2306; CHECK: # %bb.0: 2307; CHECK-NEXT: movq (%rdi), %rax 2308; CHECK-NEXT: movq %rax, (%rdi) 2309; CHECK-NEXT: retq 2310 %v = load atomic i64, i64* %p unordered, align 8 2311 store atomic i64 %v, i64* %p unordered, align 8 2312 ret void 2313} 2314 2315; Legal to kill (TODO) 2316define void @dead_store(i64* %p, i64 %v) { 2317; CHECK-LABEL: dead_store: 2318; CHECK: # %bb.0: 2319; CHECK-NEXT: movq $0, (%rdi) 2320; CHECK-NEXT: movq %rsi, (%rdi) 2321; CHECK-NEXT: retq 2322 store atomic i64 0, i64* %p unordered, align 8 2323 store atomic i64 %v, i64* %p unordered, align 8 2324 ret void 2325} 2326 2327;; The next batch of tests ensure that we don't try to fold a load into a 2328;; use where the code motion implied for the load is prevented by a fence. 2329;; Note: We're checking that the load doesn't get moved below the fence as 2330;; part of folding, but is technically legal to lift the add above the fence. 2331;; If that were to happen, please rewrite the test to ensure load movement 2332;; isn't violated. 2333 2334define i64 @nofold_fence(i64* %p) { 2335; CHECK-LABEL: nofold_fence: 2336; CHECK: # %bb.0: 2337; CHECK-NEXT: movq (%rdi), %rax 2338; CHECK-NEXT: mfence 2339; CHECK-NEXT: addq $15, %rax 2340; CHECK-NEXT: retq 2341 %v = load atomic i64, i64* %p unordered, align 8 2342 fence seq_cst 2343 %ret = add i64 %v, 15 2344 ret i64 %ret 2345} 2346 2347define i64 @nofold_fence_acquire(i64* %p) { 2348; CHECK-LABEL: nofold_fence_acquire: 2349; CHECK: # %bb.0: 2350; CHECK-NEXT: movq (%rdi), %rax 2351; CHECK-NEXT: #MEMBARRIER 2352; CHECK-NEXT: addq $15, %rax 2353; CHECK-NEXT: retq 2354 %v = load atomic i64, i64* %p unordered, align 8 2355 fence acquire 2356 %ret = add i64 %v, 15 2357 ret i64 %ret 2358} 2359 2360 2361define i64 @nofold_stfence(i64* %p) { 2362; CHECK-LABEL: nofold_stfence: 2363; CHECK: # %bb.0: 2364; CHECK-NEXT: movq (%rdi), %rax 2365; CHECK-NEXT: #MEMBARRIER 2366; CHECK-NEXT: addq $15, %rax 2367; CHECK-NEXT: retq 2368 %v = load atomic i64, i64* %p unordered, align 8 2369 fence syncscope("singlethread") seq_cst 2370 %ret = add i64 %v, 15 2371 ret i64 %ret 2372} 2373 2374;; Next, test how well we can fold invariant loads. 2375 2376@Constant = external dso_local constant i64 2377 2378define i64 @fold_constant(i64 %arg) { 2379; CHECK-O0-LABEL: fold_constant: 2380; CHECK-O0: # %bb.0: 2381; CHECK-O0-NEXT: movq %rdi, %rax 2382; CHECK-O0-NEXT: addq Constant, %rax 2383; CHECK-O0-NEXT: retq 2384; 2385; CHECK-O3-LABEL: fold_constant: 2386; CHECK-O3: # %bb.0: 2387; CHECK-O3-NEXT: movq %rdi, %rax 2388; CHECK-O3-NEXT: addq Constant(%rip), %rax 2389; CHECK-O3-NEXT: retq 2390 %v = load atomic i64, i64* @Constant unordered, align 8 2391 %ret = add i64 %v, %arg 2392 ret i64 %ret 2393} 2394 2395define i64 @fold_constant_clobber(i64* %p, i64 %arg) { 2396; CHECK-O0-LABEL: fold_constant_clobber: 2397; CHECK-O0: # %bb.0: 2398; CHECK-O0-NEXT: movq Constant(%rip), %rax 2399; CHECK-O0-NEXT: movq $5, (%rdi) 2400; CHECK-O0-NEXT: addq %rsi, %rax 2401; CHECK-O0-NEXT: retq 2402; 2403; CHECK-O3-CUR-LABEL: fold_constant_clobber: 2404; CHECK-O3-CUR: # %bb.0: 2405; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax 2406; CHECK-O3-CUR-NEXT: movq $5, (%rdi) 2407; CHECK-O3-CUR-NEXT: addq %rsi, %rax 2408; CHECK-O3-CUR-NEXT: retq 2409; 2410; CHECK-O3-EX-LABEL: fold_constant_clobber: 2411; CHECK-O3-EX: # %bb.0: 2412; CHECK-O3-EX-NEXT: movq %rsi, %rax 2413; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax 2414; CHECK-O3-EX-NEXT: movq $5, (%rdi) 2415; CHECK-O3-EX-NEXT: retq 2416 %v = load atomic i64, i64* @Constant unordered, align 8 2417 store i64 5, i64* %p 2418 %ret = add i64 %v, %arg 2419 ret i64 %ret 2420} 2421 2422define i64 @fold_constant_fence(i64 %arg) { 2423; CHECK-O0-LABEL: fold_constant_fence: 2424; CHECK-O0: # %bb.0: 2425; CHECK-O0-NEXT: movq Constant(%rip), %rax 2426; CHECK-O0-NEXT: mfence 2427; CHECK-O0-NEXT: addq %rdi, %rax 2428; CHECK-O0-NEXT: retq 2429; 2430; CHECK-O3-CUR-LABEL: fold_constant_fence: 2431; CHECK-O3-CUR: # %bb.0: 2432; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax 2433; CHECK-O3-CUR-NEXT: mfence 2434; CHECK-O3-CUR-NEXT: addq %rdi, %rax 2435; CHECK-O3-CUR-NEXT: retq 2436; 2437; CHECK-O3-EX-LABEL: fold_constant_fence: 2438; CHECK-O3-EX: # %bb.0: 2439; CHECK-O3-EX-NEXT: movq %rdi, %rax 2440; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax 2441; CHECK-O3-EX-NEXT: mfence 2442; CHECK-O3-EX-NEXT: retq 2443 %v = load atomic i64, i64* @Constant unordered, align 8 2444 fence seq_cst 2445 %ret = add i64 %v, %arg 2446 ret i64 %ret 2447} 2448 2449define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { 2450; CHECK-O0-LABEL: fold_invariant_clobber: 2451; CHECK-O0: # %bb.0: 2452; CHECK-O0-NEXT: movq (%rdi), %rax 2453; CHECK-O0-NEXT: movq $5, (%rdi) 2454; CHECK-O0-NEXT: addq %rsi, %rax 2455; CHECK-O0-NEXT: retq 2456; 2457; CHECK-O3-CUR-LABEL: fold_invariant_clobber: 2458; CHECK-O3-CUR: # %bb.0: 2459; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 2460; CHECK-O3-CUR-NEXT: movq $5, (%rdi) 2461; CHECK-O3-CUR-NEXT: addq %rsi, %rax 2462; CHECK-O3-CUR-NEXT: retq 2463; 2464; CHECK-O3-EX-LABEL: fold_invariant_clobber: 2465; CHECK-O3-EX: # %bb.0: 2466; CHECK-O3-EX-NEXT: movq %rsi, %rax 2467; CHECK-O3-EX-NEXT: addq (%rdi), %rax 2468; CHECK-O3-EX-NEXT: movq $5, (%rdi) 2469; CHECK-O3-EX-NEXT: retq 2470 %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} 2471 store i64 5, i64* %p 2472 %ret = add i64 %v, %arg 2473 ret i64 %ret 2474} 2475 2476 2477define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { 2478; CHECK-O0-LABEL: fold_invariant_fence: 2479; CHECK-O0: # %bb.0: 2480; CHECK-O0-NEXT: movq (%rdi), %rax 2481; CHECK-O0-NEXT: mfence 2482; CHECK-O0-NEXT: addq %rsi, %rax 2483; CHECK-O0-NEXT: retq 2484; 2485; CHECK-O3-CUR-LABEL: fold_invariant_fence: 2486; CHECK-O3-CUR: # %bb.0: 2487; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 2488; CHECK-O3-CUR-NEXT: mfence 2489; CHECK-O3-CUR-NEXT: addq %rsi, %rax 2490; CHECK-O3-CUR-NEXT: retq 2491; 2492; CHECK-O3-EX-LABEL: fold_invariant_fence: 2493; CHECK-O3-EX: # %bb.0: 2494; CHECK-O3-EX-NEXT: movq %rsi, %rax 2495; CHECK-O3-EX-NEXT: addq (%rdi), %rax 2496; CHECK-O3-EX-NEXT: mfence 2497; CHECK-O3-EX-NEXT: retq 2498 %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} 2499 fence seq_cst 2500 %ret = add i64 %v, %arg 2501 ret i64 %ret 2502} 2503 2504 2505; Exercise a few cases involving any extend idioms 2506 2507define i16 @load_i8_anyext_i16(i8* %ptr) { 2508; CHECK-O0-CUR-LABEL: load_i8_anyext_i16: 2509; CHECK-O0-CUR: # %bb.0: 2510; CHECK-O0-CUR-NEXT: movb (%rdi), %al 2511; CHECK-O0-CUR-NEXT: movzbl %al, %eax 2512; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax 2513; CHECK-O0-CUR-NEXT: retq 2514; 2515; CHECK-O3-CUR-LABEL: load_i8_anyext_i16: 2516; CHECK-O3-CUR: # %bb.0: 2517; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax 2518; CHECK-O3-CUR-NEXT: # kill: def $ax killed $ax killed $eax 2519; CHECK-O3-CUR-NEXT: retq 2520; 2521; CHECK-O0-EX-LABEL: load_i8_anyext_i16: 2522; CHECK-O0-EX: # %bb.0: 2523; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2524; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax 2525; CHECK-O0-EX-NEXT: # kill: def $ax killed $ax killed $eax 2526; CHECK-O0-EX-NEXT: retq 2527; 2528; CHECK-O3-EX-LABEL: load_i8_anyext_i16: 2529; CHECK-O3-EX: # %bb.0: 2530; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2531; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax 2532; CHECK-O3-EX-NEXT: # kill: def $ax killed $ax killed $eax 2533; CHECK-O3-EX-NEXT: retq 2534 %v = load atomic i8, i8* %ptr unordered, align 2 2535 %vec = insertelement <2 x i8> undef, i8 %v, i32 0 2536 %res = bitcast <2 x i8> %vec to i16 2537 ret i16 %res 2538} 2539 2540define i32 @load_i8_anyext_i32(i8* %ptr) { 2541; CHECK-O0-CUR-LABEL: load_i8_anyext_i32: 2542; CHECK-O0-CUR: # %bb.0: 2543; CHECK-O0-CUR-NEXT: movb (%rdi), %al 2544; CHECK-O0-CUR-NEXT: movzbl %al, %eax 2545; CHECK-O0-CUR-NEXT: retq 2546; 2547; CHECK-O3-CUR-LABEL: load_i8_anyext_i32: 2548; CHECK-O3-CUR: # %bb.0: 2549; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax 2550; CHECK-O3-CUR-NEXT: retq 2551; 2552; CHECK-O0-EX-LABEL: load_i8_anyext_i32: 2553; CHECK-O0-EX: # %bb.0: 2554; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2555; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax 2556; CHECK-O0-EX-NEXT: retq 2557; 2558; CHECK-O3-EX-LABEL: load_i8_anyext_i32: 2559; CHECK-O3-EX: # %bb.0: 2560; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2561; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax 2562; CHECK-O3-EX-NEXT: retq 2563 %v = load atomic i8, i8* %ptr unordered, align 4 2564 %vec = insertelement <4 x i8> undef, i8 %v, i32 0 2565 %res = bitcast <4 x i8> %vec to i32 2566 ret i32 %res 2567} 2568 2569define i32 @load_i16_anyext_i32(i16* %ptr) { 2570; CHECK-O0-CUR-LABEL: load_i16_anyext_i32: 2571; CHECK-O0-CUR: # %bb.0: 2572; CHECK-O0-CUR-NEXT: movw (%rdi), %cx 2573; CHECK-O0-CUR-NEXT: # implicit-def: $eax 2574; CHECK-O0-CUR-NEXT: movw %cx, %ax 2575; CHECK-O0-CUR-NEXT: retq 2576; 2577; CHECK-O3-CUR-LABEL: load_i16_anyext_i32: 2578; CHECK-O3-CUR: # %bb.0: 2579; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax 2580; CHECK-O3-CUR-NEXT: retq 2581; 2582; CHECK-O0-EX-LABEL: load_i16_anyext_i32: 2583; CHECK-O0-EX: # %bb.0: 2584; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2585; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax 2586; CHECK-O0-EX-NEXT: retq 2587; 2588; CHECK-O3-EX-LABEL: load_i16_anyext_i32: 2589; CHECK-O3-EX: # %bb.0: 2590; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2591; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax 2592; CHECK-O3-EX-NEXT: retq 2593 %v = load atomic i16, i16* %ptr unordered, align 4 2594 %vec = insertelement <2 x i16> undef, i16 %v, i64 0 2595 %res = bitcast <2 x i16> %vec to i32 2596 ret i32 %res 2597} 2598 2599define i64 @load_i16_anyext_i64(i16* %ptr) { 2600; CHECK-O0-CUR-LABEL: load_i16_anyext_i64: 2601; CHECK-O0-CUR: # %bb.0: 2602; CHECK-O0-CUR-NEXT: movw (%rdi), %cx 2603; CHECK-O0-CUR-NEXT: # implicit-def: $eax 2604; CHECK-O0-CUR-NEXT: movw %cx, %ax 2605; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0 2606; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax 2607; CHECK-O0-CUR-NEXT: retq 2608; 2609; CHECK-O3-CUR-LABEL: load_i16_anyext_i64: 2610; CHECK-O3-CUR: # %bb.0: 2611; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax 2612; CHECK-O3-CUR-NEXT: vmovd %eax, %xmm0 2613; CHECK-O3-CUR-NEXT: vmovq %xmm0, %rax 2614; CHECK-O3-CUR-NEXT: retq 2615; 2616; CHECK-O0-EX-LABEL: load_i16_anyext_i64: 2617; CHECK-O0-EX: # %bb.0: 2618; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2619; CHECK-O0-EX-NEXT: vmovq %xmm0, %rax 2620; CHECK-O0-EX-NEXT: retq 2621; 2622; CHECK-O3-EX-LABEL: load_i16_anyext_i64: 2623; CHECK-O3-EX: # %bb.0: 2624; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2625; CHECK-O3-EX-NEXT: vmovq %xmm0, %rax 2626; CHECK-O3-EX-NEXT: retq 2627 %v = load atomic i16, i16* %ptr unordered, align 8 2628 %vec = insertelement <4 x i16> undef, i16 %v, i64 0 2629 %res = bitcast <4 x i16> %vec to i64 2630 ret i64 %res 2631} 2632 2633; TODO: Would be legal to combine for legal atomic wider types 2634define i16 @load_combine(i8* %p) { 2635; CHECK-O0-LABEL: load_combine: 2636; CHECK-O0: # %bb.0: 2637; CHECK-O0-NEXT: movb (%rdi), %al 2638; CHECK-O0-NEXT: movb 1(%rdi), %cl 2639; CHECK-O0-NEXT: movzbl %al, %eax 2640; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax 2641; CHECK-O0-NEXT: movzbl %cl, %ecx 2642; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx 2643; CHECK-O0-NEXT: shlw $8, %cx 2644; CHECK-O0-NEXT: orw %cx, %ax 2645; CHECK-O0-NEXT: retq 2646; 2647; CHECK-O3-LABEL: load_combine: 2648; CHECK-O3: # %bb.0: 2649; CHECK-O3-NEXT: movzbl (%rdi), %ecx 2650; CHECK-O3-NEXT: movzbl 1(%rdi), %eax 2651; CHECK-O3-NEXT: shll $8, %eax 2652; CHECK-O3-NEXT: orl %ecx, %eax 2653; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax 2654; CHECK-O3-NEXT: retq 2655 %v1 = load atomic i8, i8* %p unordered, align 2 2656 %p2 = getelementptr i8, i8* %p, i64 1 2657 %v2 = load atomic i8, i8* %p2 unordered, align 1 2658 %v1.ext = zext i8 %v1 to i16 2659 %v2.ext = zext i8 %v2 to i16 2660 %v2.sht = shl i16 %v2.ext, 8 2661 %res = or i16 %v1.ext, %v2.sht 2662 ret i16 %res 2663} 2664 2665define i1 @fold_cmp_over_fence(i32* %p, i32 %v1) { 2666; CHECK-O0-LABEL: fold_cmp_over_fence: 2667; CHECK-O0: # %bb.0: 2668; CHECK-O0-NEXT: movl (%rdi), %eax 2669; CHECK-O0-NEXT: mfence 2670; CHECK-O0-NEXT: cmpl %eax, %esi 2671; CHECK-O0-NEXT: jne .LBB116_2 2672; CHECK-O0-NEXT: # %bb.1: # %taken 2673; CHECK-O0-NEXT: movb $1, %al 2674; CHECK-O0-NEXT: retq 2675; CHECK-O0-NEXT: .LBB116_2: # %untaken 2676; CHECK-O0-NEXT: xorl %eax, %eax 2677; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax 2678; CHECK-O0-NEXT: retq 2679; 2680; CHECK-O3-CUR-LABEL: fold_cmp_over_fence: 2681; CHECK-O3-CUR: # %bb.0: 2682; CHECK-O3-CUR-NEXT: movl (%rdi), %eax 2683; CHECK-O3-CUR-NEXT: mfence 2684; CHECK-O3-CUR-NEXT: cmpl %eax, %esi 2685; CHECK-O3-CUR-NEXT: jne .LBB116_2 2686; CHECK-O3-CUR-NEXT: # %bb.1: # %taken 2687; CHECK-O3-CUR-NEXT: movb $1, %al 2688; CHECK-O3-CUR-NEXT: retq 2689; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken 2690; CHECK-O3-CUR-NEXT: xorl %eax, %eax 2691; CHECK-O3-CUR-NEXT: retq 2692; 2693; CHECK-O3-EX-LABEL: fold_cmp_over_fence: 2694; CHECK-O3-EX: # %bb.0: 2695; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi 2696; CHECK-O3-EX-NEXT: mfence 2697; CHECK-O3-EX-NEXT: jne .LBB116_2 2698; CHECK-O3-EX-NEXT: # %bb.1: # %taken 2699; CHECK-O3-EX-NEXT: movb $1, %al 2700; CHECK-O3-EX-NEXT: retq 2701; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken 2702; CHECK-O3-EX-NEXT: xorl %eax, %eax 2703; CHECK-O3-EX-NEXT: retq 2704 %v2 = load atomic i32, i32* %p unordered, align 4 2705 fence seq_cst 2706 %cmp = icmp eq i32 %v1, %v2 2707 br i1 %cmp, label %taken, label %untaken 2708taken: 2709 ret i1 true 2710untaken: 2711 ret i1 false 2712} 2713