1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s 3; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s 4; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s 5; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s 6 7define i8 @load_i8(i8* %ptr) { 8; CHECK-LABEL: load_i8: 9; CHECK: # %bb.0: 10; CHECK-NEXT: movb (%rdi), %al 11; CHECK-NEXT: retq 12 %v = load atomic i8, i8* %ptr unordered, align 1 13 ret i8 %v 14} 15 16define void @store_i8(i8* %ptr, i8 %v) { 17; CHECK-O0-LABEL: store_i8: 18; CHECK-O0: # %bb.0: 19; CHECK-O0-NEXT: movb %sil, %al 20; CHECK-O0-NEXT: movb %al, (%rdi) 21; CHECK-O0-NEXT: retq 22; 23; CHECK-O3-LABEL: store_i8: 24; CHECK-O3: # %bb.0: 25; CHECK-O3-NEXT: movb %sil, (%rdi) 26; CHECK-O3-NEXT: retq 27 store atomic i8 %v, i8* %ptr unordered, align 1 28 ret void 29} 30 31define i16 @load_i16(i16* %ptr) { 32; CHECK-O0-LABEL: load_i16: 33; CHECK-O0: # %bb.0: 34; CHECK-O0-NEXT: movw (%rdi), %ax 35; CHECK-O0-NEXT: retq 36; 37; CHECK-O3-LABEL: load_i16: 38; CHECK-O3: # %bb.0: 39; CHECK-O3-NEXT: movzwl (%rdi), %eax 40; CHECK-O3-NEXT: retq 41 %v = load atomic i16, i16* %ptr unordered, align 2 42 ret i16 %v 43} 44 45 46define void @store_i16(i16* %ptr, i16 %v) { 47; CHECK-O0-LABEL: store_i16: 48; CHECK-O0: # %bb.0: 49; CHECK-O0-NEXT: movw %si, %ax 50; CHECK-O0-NEXT: movw %ax, (%rdi) 51; CHECK-O0-NEXT: retq 52; 53; CHECK-O3-LABEL: store_i16: 54; CHECK-O3: # %bb.0: 55; CHECK-O3-NEXT: movw %si, (%rdi) 56; CHECK-O3-NEXT: retq 57 store atomic i16 %v, i16* %ptr unordered, align 2 58 ret void 59} 60 61define i32 @load_i32(i32* %ptr) { 62; CHECK-LABEL: load_i32: 63; CHECK: # %bb.0: 64; CHECK-NEXT: movl (%rdi), %eax 65; CHECK-NEXT: retq 66 %v = load atomic i32, i32* %ptr unordered, align 4 67 ret i32 %v 68} 69 70define void @store_i32(i32* %ptr, i32 %v) { 71; CHECK-LABEL: store_i32: 72; CHECK: # %bb.0: 73; CHECK-NEXT: movl %esi, (%rdi) 74; CHECK-NEXT: retq 75 store atomic i32 %v, i32* %ptr unordered, align 4 76 ret void 77} 78 79define i64 @load_i64(i64* %ptr) { 80; CHECK-LABEL: load_i64: 81; CHECK: # %bb.0: 82; CHECK-NEXT: movq (%rdi), %rax 83; CHECK-NEXT: retq 84 %v = load atomic i64, i64* %ptr unordered, align 8 85 ret i64 %v 86} 87 88define void @store_i64(i64* %ptr, i64 %v) { 89; CHECK-LABEL: store_i64: 90; CHECK: # %bb.0: 91; CHECK-NEXT: movq %rsi, (%rdi) 92; CHECK-NEXT: retq 93 store atomic i64 %v, i64* %ptr unordered, align 8 94 ret void 95} 96 97;; The tests in the rest of this file are intended to show transforms which we 98;; either *can't* do for legality, or don't currently implement. The later 99;; are noted carefully where relevant. 100 101;; Start w/some clearly illegal ones. 102 103; Must use a full width op, not a byte op 104define void @narrow_writeback_or(i64* %ptr) { 105; CHECK-O0-LABEL: narrow_writeback_or: 106; CHECK-O0: # %bb.0: 107; CHECK-O0-NEXT: movq (%rdi), %rax 108; CHECK-O0-NEXT: orq $7, %rax 109; CHECK-O0-NEXT: movq %rax, (%rdi) 110; CHECK-O0-NEXT: retq 111; 112; CHECK-O3-LABEL: narrow_writeback_or: 113; CHECK-O3: # %bb.0: 114; CHECK-O3-NEXT: orq $7, (%rdi) 115; CHECK-O3-NEXT: retq 116 %v = load atomic i64, i64* %ptr unordered, align 8 117 %v.new = or i64 %v, 7 118 store atomic i64 %v.new, i64* %ptr unordered, align 8 119 ret void 120} 121 122; Must use a full width op, not a byte op 123define void @narrow_writeback_and(i64* %ptr) { 124; CHECK-O0-LABEL: narrow_writeback_and: 125; CHECK-O0: # %bb.0: 126; CHECK-O0-NEXT: movq (%rdi), %rax 127; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 128; CHECK-O0-NEXT: andl $-256, %eax 129; CHECK-O0-NEXT: # kill: def $rax killed $eax 130; CHECK-O0-NEXT: movq %rax, (%rdi) 131; CHECK-O0-NEXT: retq 132; 133; CHECK-O3-LABEL: narrow_writeback_and: 134; CHECK-O3: # %bb.0: 135; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 136; CHECK-O3-NEXT: andq %rax, (%rdi) 137; CHECK-O3-NEXT: retq 138 %v = load atomic i64, i64* %ptr unordered, align 8 139 %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00 140 store atomic i64 %v.new, i64* %ptr unordered, align 8 141 ret void 142} 143 144; Must use a full width op, not a byte op 145define void @narrow_writeback_xor(i64* %ptr) { 146; CHECK-O0-LABEL: narrow_writeback_xor: 147; CHECK-O0: # %bb.0: 148; CHECK-O0-NEXT: movq (%rdi), %rax 149; CHECK-O0-NEXT: xorq $7, %rax 150; CHECK-O0-NEXT: movq %rax, (%rdi) 151; CHECK-O0-NEXT: retq 152; 153; CHECK-O3-LABEL: narrow_writeback_xor: 154; CHECK-O3: # %bb.0: 155; CHECK-O3-NEXT: xorq $7, (%rdi) 156; CHECK-O3-NEXT: retq 157 %v = load atomic i64, i64* %ptr unordered, align 8 158 %v.new = xor i64 %v, 7 159 store atomic i64 %v.new, i64* %ptr unordered, align 8 160 ret void 161} 162 163;; Next batch of tests are exercising cases where store widening would 164;; improve codegeneration. Note that widening is only legal if the 165;; resulting type would be atomic. Each tests has a well aligned, and 166;; unaligned variant to ensure we get correct codegen here. 167;; Note: It's not a legality issue, but there's a gotcha here to be aware 168;; of. Once we widen a pair of atomic stores, we loose the information 169;; that the original atomicity requirement was half the width. Given that, 170;; we can't then split the load again. This challenges our usual iterative 171;; approach to incremental improvement. 172 173; Legal if wider type is also atomic (TODO) 174define void @widen_store(i32* %p0, i32 %v1, i32 %v2) { 175; CHECK-LABEL: widen_store: 176; CHECK: # %bb.0: 177; CHECK-NEXT: movl %esi, (%rdi) 178; CHECK-NEXT: movl %edx, 4(%rdi) 179; CHECK-NEXT: retq 180 %p1 = getelementptr i32, i32* %p0, i64 1 181 store atomic i32 %v1, i32* %p0 unordered, align 8 182 store atomic i32 %v2, i32* %p1 unordered, align 4 183 ret void 184} 185 186; This one is *NOT* legal to widen. With weaker alignment, 187; the wider type might cross a cache line and violate the 188; atomicity requirement. 189define void @widen_store_unaligned(i32* %p0, i32 %v1, i32 %v2) { 190; CHECK-LABEL: widen_store_unaligned: 191; CHECK: # %bb.0: 192; CHECK-NEXT: movl %esi, (%rdi) 193; CHECK-NEXT: movl %edx, 4(%rdi) 194; CHECK-NEXT: retq 195 %p1 = getelementptr i32, i32* %p0, i64 1 196 store atomic i32 %v1, i32* %p0 unordered, align 4 197 store atomic i32 %v2, i32* %p1 unordered, align 4 198 ret void 199} 200 201; Legal if wider type is also atomic (TODO) 202define void @widen_broadcast(i32* %p0, i32 %v) { 203; CHECK-LABEL: widen_broadcast: 204; CHECK: # %bb.0: 205; CHECK-NEXT: movl %esi, (%rdi) 206; CHECK-NEXT: movl %esi, 4(%rdi) 207; CHECK-NEXT: retq 208 %p1 = getelementptr i32, i32* %p0, i64 1 209 store atomic i32 %v, i32* %p0 unordered, align 8 210 store atomic i32 %v, i32* %p1 unordered, align 4 211 ret void 212} 213 214; Not legal to widen due to alignment restriction 215define void @widen_broadcast_unaligned(i32* %p0, i32 %v) { 216; CHECK-LABEL: widen_broadcast_unaligned: 217; CHECK: # %bb.0: 218; CHECK-NEXT: movl %esi, (%rdi) 219; CHECK-NEXT: movl %esi, 4(%rdi) 220; CHECK-NEXT: retq 221 %p1 = getelementptr i32, i32* %p0, i64 1 222 store atomic i32 %v, i32* %p0 unordered, align 4 223 store atomic i32 %v, i32* %p1 unordered, align 4 224 ret void 225} 226 227define i128 @load_i128(i128* %ptr) { 228; CHECK-O0-LABEL: load_i128: 229; CHECK-O0: # %bb.0: 230; CHECK-O0-NEXT: pushq %rbx 231; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 232; CHECK-O0-NEXT: .cfi_offset %rbx, -16 233; CHECK-O0-NEXT: xorl %eax, %eax 234; CHECK-O0-NEXT: movl %eax, %ebx 235; CHECK-O0-NEXT: movq %rbx, %rax 236; CHECK-O0-NEXT: movq %rbx, %rdx 237; CHECK-O0-NEXT: movq %rbx, %rcx 238; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) 239; CHECK-O0-NEXT: popq %rbx 240; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 241; CHECK-O0-NEXT: retq 242; 243; CHECK-O3-LABEL: load_i128: 244; CHECK-O3: # %bb.0: 245; CHECK-O3-NEXT: pushq %rbx 246; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 247; CHECK-O3-NEXT: .cfi_offset %rbx, -16 248; CHECK-O3-NEXT: xorl %eax, %eax 249; CHECK-O3-NEXT: xorl %edx, %edx 250; CHECK-O3-NEXT: xorl %ecx, %ecx 251; CHECK-O3-NEXT: xorl %ebx, %ebx 252; CHECK-O3-NEXT: lock cmpxchg16b (%rdi) 253; CHECK-O3-NEXT: popq %rbx 254; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 255; CHECK-O3-NEXT: retq 256 %v = load atomic i128, i128* %ptr unordered, align 16 257 ret i128 %v 258} 259 260define void @store_i128(i128* %ptr, i128 %v) { 261; CHECK-O0-LABEL: store_i128: 262; CHECK-O0: # %bb.0: 263; CHECK-O0-NEXT: pushq %rbx 264; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 265; CHECK-O0-NEXT: .cfi_offset %rbx, -16 266; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 267; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 268; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 269; CHECK-O0-NEXT: movq (%rdi), %rax 270; CHECK-O0-NEXT: movq 8(%rdi), %rdx 271; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 272; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 273; CHECK-O0-NEXT: jmp .LBB16_1 274; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start 275; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1 276; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 277; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 278; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload 279; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload 280; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 281; CHECK-O0-NEXT: lock cmpxchg16b (%rsi) 282; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 283; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 284; CHECK-O0-NEXT: jne .LBB16_1 285; CHECK-O0-NEXT: jmp .LBB16_2 286; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end 287; CHECK-O0-NEXT: popq %rbx 288; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 289; CHECK-O0-NEXT: retq 290; 291; CHECK-O3-LABEL: store_i128: 292; CHECK-O3: # %bb.0: 293; CHECK-O3-NEXT: pushq %rbx 294; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 295; CHECK-O3-NEXT: .cfi_offset %rbx, -16 296; CHECK-O3-NEXT: movq %rdx, %rcx 297; CHECK-O3-NEXT: movq %rsi, %rbx 298; CHECK-O3-NEXT: movq (%rdi), %rax 299; CHECK-O3-NEXT: movq 8(%rdi), %rdx 300; CHECK-O3-NEXT: .p2align 4, 0x90 301; CHECK-O3-NEXT: .LBB16_1: # %atomicrmw.start 302; CHECK-O3-NEXT: # =>This Inner Loop Header: Depth=1 303; CHECK-O3-NEXT: lock cmpxchg16b (%rdi) 304; CHECK-O3-NEXT: jne .LBB16_1 305; CHECK-O3-NEXT: # %bb.2: # %atomicrmw.end 306; CHECK-O3-NEXT: popq %rbx 307; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 308; CHECK-O3-NEXT: retq 309 store atomic i128 %v, i128* %ptr unordered, align 16 310 ret void 311} 312 313define i256 @load_i256(i256* %ptr) { 314; CHECK-O0-LABEL: load_i256: 315; CHECK-O0: # %bb.0: 316; CHECK-O0-NEXT: subq $56, %rsp 317; CHECK-O0-NEXT: .cfi_def_cfa_offset 64 318; CHECK-O0-NEXT: movq %rdi, %rax 319; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 320; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 321; CHECK-O0-NEXT: movl $32, %edi 322; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 323; CHECK-O0-NEXT: xorl %ecx, %ecx 324; CHECK-O0-NEXT: callq __atomic_load@PLT 325; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 326; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 327; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx 328; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx 329; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi 330; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %r8 331; CHECK-O0-NEXT: movq %r8, 24(%rdi) 332; CHECK-O0-NEXT: movq %rsi, 16(%rdi) 333; CHECK-O0-NEXT: movq %rdx, 8(%rdi) 334; CHECK-O0-NEXT: movq %rcx, (%rdi) 335; CHECK-O0-NEXT: addq $56, %rsp 336; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 337; CHECK-O0-NEXT: retq 338; 339; CHECK-O3-LABEL: load_i256: 340; CHECK-O3: # %bb.0: 341; CHECK-O3-NEXT: pushq %rbx 342; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 343; CHECK-O3-NEXT: subq $32, %rsp 344; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 345; CHECK-O3-NEXT: .cfi_offset %rbx, -16 346; CHECK-O3-NEXT: movq %rdi, %rbx 347; CHECK-O3-NEXT: movq %rsp, %rdx 348; CHECK-O3-NEXT: movl $32, %edi 349; CHECK-O3-NEXT: xorl %ecx, %ecx 350; CHECK-O3-NEXT: callq __atomic_load@PLT 351; CHECK-O3-NEXT: vmovups (%rsp), %ymm0 352; CHECK-O3-NEXT: vmovups %ymm0, (%rbx) 353; CHECK-O3-NEXT: movq %rbx, %rax 354; CHECK-O3-NEXT: addq $32, %rsp 355; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 356; CHECK-O3-NEXT: popq %rbx 357; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 358; CHECK-O3-NEXT: vzeroupper 359; CHECK-O3-NEXT: retq 360 %v = load atomic i256, i256* %ptr unordered, align 16 361 ret i256 %v 362} 363 364define void @store_i256(i256* %ptr, i256 %v) { 365; CHECK-O0-LABEL: store_i256: 366; CHECK-O0: # %bb.0: 367; CHECK-O0-NEXT: subq $40, %rsp 368; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 369; CHECK-O0-NEXT: movq %rdx, %rax 370; CHECK-O0-NEXT: movq %rsi, (%rsp) # 8-byte Spill 371; CHECK-O0-NEXT: movq %rdi, %rsi 372; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload 373; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 374; CHECK-O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) 375; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp) 376; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp) 377; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp) 378; CHECK-O0-NEXT: movl $32, %edi 379; CHECK-O0-NEXT: xorl %ecx, %ecx 380; CHECK-O0-NEXT: callq __atomic_store@PLT 381; CHECK-O0-NEXT: addq $40, %rsp 382; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 383; CHECK-O0-NEXT: retq 384; 385; CHECK-O3-LABEL: store_i256: 386; CHECK-O3: # %bb.0: 387; CHECK-O3-NEXT: subq $40, %rsp 388; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 389; CHECK-O3-NEXT: movq %rdi, %rax 390; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp) 391; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp) 392; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 393; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp) 394; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 395; CHECK-O3-NEXT: movl $32, %edi 396; CHECK-O3-NEXT: movq %rax, %rsi 397; CHECK-O3-NEXT: xorl %ecx, %ecx 398; CHECK-O3-NEXT: callq __atomic_store@PLT 399; CHECK-O3-NEXT: addq $40, %rsp 400; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 401; CHECK-O3-NEXT: retq 402 store atomic i256 %v, i256* %ptr unordered, align 16 403 ret void 404} 405 406; Legal if wider type is also atomic (TODO) 407define void @vec_store(i32* %p0, <2 x i32> %vec) { 408; CHECK-O0-CUR-LABEL: vec_store: 409; CHECK-O0-CUR: # %bb.0: 410; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx 411; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax 412; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) 413; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 414; CHECK-O0-CUR-NEXT: retq 415; 416; CHECK-O3-CUR-LABEL: vec_store: 417; CHECK-O3-CUR: # %bb.0: 418; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 419; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx 420; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 421; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi) 422; CHECK-O3-CUR-NEXT: retq 423; 424; CHECK-O0-EX-LABEL: vec_store: 425; CHECK-O0-EX: # %bb.0: 426; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 427; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi) 428; CHECK-O0-EX-NEXT: retq 429; 430; CHECK-O3-EX-LABEL: vec_store: 431; CHECK-O3-EX: # %bb.0: 432; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 433; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) 434; CHECK-O3-EX-NEXT: retq 435 %v1 = extractelement <2 x i32> %vec, i32 0 436 %v2 = extractelement <2 x i32> %vec, i32 1 437 %p1 = getelementptr i32, i32* %p0, i64 1 438 store atomic i32 %v1, i32* %p0 unordered, align 8 439 store atomic i32 %v2, i32* %p1 unordered, align 4 440 ret void 441} 442 443; Not legal to widen due to alignment restriction 444define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { 445; CHECK-O0-CUR-LABEL: vec_store_unaligned: 446; CHECK-O0-CUR: # %bb.0: 447; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx 448; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax 449; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) 450; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 451; CHECK-O0-CUR-NEXT: retq 452; 453; CHECK-O3-CUR-LABEL: vec_store_unaligned: 454; CHECK-O3-CUR: # %bb.0: 455; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 456; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx 457; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 458; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi) 459; CHECK-O3-CUR-NEXT: retq 460; 461; CHECK-O0-EX-LABEL: vec_store_unaligned: 462; CHECK-O0-EX: # %bb.0: 463; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 464; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi) 465; CHECK-O0-EX-NEXT: retq 466; 467; CHECK-O3-EX-LABEL: vec_store_unaligned: 468; CHECK-O3-EX: # %bb.0: 469; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 470; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) 471; CHECK-O3-EX-NEXT: retq 472 %v1 = extractelement <2 x i32> %vec, i32 0 473 %v2 = extractelement <2 x i32> %vec, i32 1 474 %p1 = getelementptr i32, i32* %p0, i64 1 475 store atomic i32 %v1, i32* %p0 unordered, align 4 476 store atomic i32 %v2, i32* %p1 unordered, align 4 477 ret void 478} 479 480 481 482; Legal if wider type is also atomic (TODO) 483; Also, can avoid register move from xmm to eax (TODO) 484define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) { 485; CHECK-O0-CUR-LABEL: widen_broadcast2: 486; CHECK-O0-CUR: # %bb.0: 487; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax 488; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) 489; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 490; CHECK-O0-CUR-NEXT: retq 491; 492; CHECK-O3-CUR-LABEL: widen_broadcast2: 493; CHECK-O3-CUR: # %bb.0: 494; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 495; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 496; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi) 497; CHECK-O3-CUR-NEXT: retq 498; 499; CHECK-O0-EX-LABEL: widen_broadcast2: 500; CHECK-O0-EX: # %bb.0: 501; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 502; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi) 503; CHECK-O0-EX-NEXT: retq 504; 505; CHECK-O3-EX-LABEL: widen_broadcast2: 506; CHECK-O3-EX: # %bb.0: 507; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 508; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi) 509; CHECK-O3-EX-NEXT: retq 510 %v1 = extractelement <2 x i32> %vec, i32 0 511 %p1 = getelementptr i32, i32* %p0, i64 1 512 store atomic i32 %v1, i32* %p0 unordered, align 8 513 store atomic i32 %v1, i32* %p1 unordered, align 4 514 ret void 515} 516 517; Not legal to widen due to alignment restriction 518define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) { 519; CHECK-O0-CUR-LABEL: widen_broadcast2_unaligned: 520; CHECK-O0-CUR: # %bb.0: 521; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax 522; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) 523; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) 524; CHECK-O0-CUR-NEXT: retq 525; 526; CHECK-O3-CUR-LABEL: widen_broadcast2_unaligned: 527; CHECK-O3-CUR: # %bb.0: 528; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax 529; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) 530; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi) 531; CHECK-O3-CUR-NEXT: retq 532; 533; CHECK-O0-EX-LABEL: widen_broadcast2_unaligned: 534; CHECK-O0-EX: # %bb.0: 535; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) 536; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi) 537; CHECK-O0-EX-NEXT: retq 538; 539; CHECK-O3-EX-LABEL: widen_broadcast2_unaligned: 540; CHECK-O3-EX: # %bb.0: 541; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) 542; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi) 543; CHECK-O3-EX-NEXT: retq 544 %v1 = extractelement <2 x i32> %vec, i32 0 545 %p1 = getelementptr i32, i32* %p0, i64 1 546 store atomic i32 %v1, i32* %p0 unordered, align 4 547 store atomic i32 %v1, i32* %p1 unordered, align 4 548 ret void 549} 550 551; Legal if wider type is also atomic (TODO) 552define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) { 553; CHECK-LABEL: widen_zero_init: 554; CHECK: # %bb.0: 555; CHECK-NEXT: movl $0, (%rdi) 556; CHECK-NEXT: movl $0, 4(%rdi) 557; CHECK-NEXT: retq 558 %p1 = getelementptr i32, i32* %p0, i64 1 559 store atomic i32 0, i32* %p0 unordered, align 8 560 store atomic i32 0, i32* %p1 unordered, align 4 561 ret void 562} 563 564; Not legal to widen due to alignment restriction 565define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) { 566; CHECK-LABEL: widen_zero_init_unaligned: 567; CHECK: # %bb.0: 568; CHECK-NEXT: movl $0, (%rdi) 569; CHECK-NEXT: movl $0, 4(%rdi) 570; CHECK-NEXT: retq 571 %p1 = getelementptr i32, i32* %p0, i64 1 572 store atomic i32 0, i32* %p0 unordered, align 4 573 store atomic i32 0, i32* %p1 unordered, align 4 574 ret void 575} 576 577;; The next batch of tests are stressing load folding. Folding is legal 578;; on x86, so these are simply checking optimization quality. 579 580; Legal, as expected 581define i64 @load_fold_add1(i64* %p) { 582; CHECK-LABEL: load_fold_add1: 583; CHECK: # %bb.0: 584; CHECK-NEXT: movq (%rdi), %rax 585; CHECK-NEXT: addq $15, %rax 586; CHECK-NEXT: retq 587 %v = load atomic i64, i64* %p unordered, align 8 588 %ret = add i64 %v, 15 589 ret i64 %ret 590} 591 592define i64 @load_fold_add2(i64* %p, i64 %v2) { 593; CHECK-LABEL: load_fold_add2: 594; CHECK: # %bb.0: 595; CHECK-NEXT: movq %rsi, %rax 596; CHECK-NEXT: addq (%rdi), %rax 597; CHECK-NEXT: retq 598 %v = load atomic i64, i64* %p unordered, align 8 599 %ret = add i64 %v, %v2 600 ret i64 %ret 601} 602 603define i64 @load_fold_add3(i64* %p1, i64* %p2) { 604; CHECK-O0-LABEL: load_fold_add3: 605; CHECK-O0: # %bb.0: 606; CHECK-O0-NEXT: movq (%rdi), %rax 607; CHECK-O0-NEXT: addq (%rsi), %rax 608; CHECK-O0-NEXT: retq 609; 610; CHECK-O3-CUR-LABEL: load_fold_add3: 611; CHECK-O3-CUR: # %bb.0: 612; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 613; CHECK-O3-CUR-NEXT: addq (%rdi), %rax 614; CHECK-O3-CUR-NEXT: retq 615; 616; CHECK-O3-EX-LABEL: load_fold_add3: 617; CHECK-O3-EX: # %bb.0: 618; CHECK-O3-EX-NEXT: movq (%rdi), %rax 619; CHECK-O3-EX-NEXT: addq (%rsi), %rax 620; CHECK-O3-EX-NEXT: retq 621 %v = load atomic i64, i64* %p1 unordered, align 8 622 %v2 = load atomic i64, i64* %p2 unordered, align 8 623 %ret = add i64 %v, %v2 624 ret i64 %ret 625} 626 627; Legal, as expected 628define i64 @load_fold_sub1(i64* %p) { 629; CHECK-O0-LABEL: load_fold_sub1: 630; CHECK-O0: # %bb.0: 631; CHECK-O0-NEXT: movq (%rdi), %rax 632; CHECK-O0-NEXT: subq $15, %rax 633; CHECK-O0-NEXT: retq 634; 635; CHECK-O3-LABEL: load_fold_sub1: 636; CHECK-O3: # %bb.0: 637; CHECK-O3-NEXT: movq (%rdi), %rax 638; CHECK-O3-NEXT: addq $-15, %rax 639; CHECK-O3-NEXT: retq 640 %v = load atomic i64, i64* %p unordered, align 8 641 %ret = sub i64 %v, 15 642 ret i64 %ret 643} 644 645define i64 @load_fold_sub2(i64* %p, i64 %v2) { 646; CHECK-LABEL: load_fold_sub2: 647; CHECK: # %bb.0: 648; CHECK-NEXT: movq (%rdi), %rax 649; CHECK-NEXT: subq %rsi, %rax 650; CHECK-NEXT: retq 651 %v = load atomic i64, i64* %p unordered, align 8 652 %ret = sub i64 %v, %v2 653 ret i64 %ret 654} 655 656define i64 @load_fold_sub3(i64* %p1, i64* %p2) { 657; CHECK-LABEL: load_fold_sub3: 658; CHECK: # %bb.0: 659; CHECK-NEXT: movq (%rdi), %rax 660; CHECK-NEXT: subq (%rsi), %rax 661; CHECK-NEXT: retq 662 %v = load atomic i64, i64* %p1 unordered, align 8 663 %v2 = load atomic i64, i64* %p2 unordered, align 8 664 %ret = sub i64 %v, %v2 665 ret i64 %ret 666} 667 668; Legal, as expected 669define i64 @load_fold_mul1(i64* %p) { 670; CHECK-O0-LABEL: load_fold_mul1: 671; CHECK-O0: # %bb.0: 672; CHECK-O0-NEXT: imulq $15, (%rdi), %rax 673; CHECK-O0-NEXT: retq 674; 675; CHECK-O3-LABEL: load_fold_mul1: 676; CHECK-O3: # %bb.0: 677; CHECK-O3-NEXT: movq (%rdi), %rax 678; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax 679; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 680; CHECK-O3-NEXT: retq 681 %v = load atomic i64, i64* %p unordered, align 8 682 %ret = mul i64 %v, 15 683 ret i64 %ret 684} 685 686define i64 @load_fold_mul2(i64* %p, i64 %v2) { 687; CHECK-LABEL: load_fold_mul2: 688; CHECK: # %bb.0: 689; CHECK-NEXT: movq %rsi, %rax 690; CHECK-NEXT: imulq (%rdi), %rax 691; CHECK-NEXT: retq 692 %v = load atomic i64, i64* %p unordered, align 8 693 %ret = mul i64 %v, %v2 694 ret i64 %ret 695} 696 697define i64 @load_fold_mul3(i64* %p1, i64* %p2) { 698; CHECK-O0-LABEL: load_fold_mul3: 699; CHECK-O0: # %bb.0: 700; CHECK-O0-NEXT: movq (%rdi), %rax 701; CHECK-O0-NEXT: imulq (%rsi), %rax 702; CHECK-O0-NEXT: retq 703; 704; CHECK-O3-CUR-LABEL: load_fold_mul3: 705; CHECK-O3-CUR: # %bb.0: 706; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 707; CHECK-O3-CUR-NEXT: imulq (%rdi), %rax 708; CHECK-O3-CUR-NEXT: retq 709; 710; CHECK-O3-EX-LABEL: load_fold_mul3: 711; CHECK-O3-EX: # %bb.0: 712; CHECK-O3-EX-NEXT: movq (%rdi), %rax 713; CHECK-O3-EX-NEXT: imulq (%rsi), %rax 714; CHECK-O3-EX-NEXT: retq 715 %v = load atomic i64, i64* %p1 unordered, align 8 716 %v2 = load atomic i64, i64* %p2 unordered, align 8 717 %ret = mul i64 %v, %v2 718 ret i64 %ret 719} 720 721; Legal to fold (TODO) 722define i64 @load_fold_sdiv1(i64* %p) { 723; CHECK-O0-LABEL: load_fold_sdiv1: 724; CHECK-O0: # %bb.0: 725; CHECK-O0-NEXT: movq (%rdi), %rax 726; CHECK-O0-NEXT: movl $15, %ecx 727; CHECK-O0-NEXT: cqto 728; CHECK-O0-NEXT: idivq %rcx 729; CHECK-O0-NEXT: retq 730; 731; CHECK-O3-LABEL: load_fold_sdiv1: 732; CHECK-O3: # %bb.0: 733; CHECK-O3-NEXT: movq (%rdi), %rcx 734; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 735; CHECK-O3-NEXT: movq %rcx, %rax 736; CHECK-O3-NEXT: imulq %rdx 737; CHECK-O3-NEXT: addq %rdx, %rcx 738; CHECK-O3-NEXT: movq %rcx, %rax 739; CHECK-O3-NEXT: shrq $63, %rax 740; CHECK-O3-NEXT: sarq $3, %rcx 741; CHECK-O3-NEXT: addq %rax, %rcx 742; CHECK-O3-NEXT: movq %rcx, %rax 743; CHECK-O3-NEXT: retq 744 %v = load atomic i64, i64* %p unordered, align 8 745 %ret = sdiv i64 %v, 15 746 ret i64 %ret 747} 748 749; Legal to fold (TODO) 750define i64 @load_fold_sdiv2(i64* %p, i64 %v2) { 751; CHECK-O0-LABEL: load_fold_sdiv2: 752; CHECK-O0: # %bb.0: 753; CHECK-O0-NEXT: movq (%rdi), %rax 754; CHECK-O0-NEXT: cqto 755; CHECK-O0-NEXT: idivq %rsi 756; CHECK-O0-NEXT: retq 757; 758; CHECK-O3-LABEL: load_fold_sdiv2: 759; CHECK-O3: # %bb.0: 760; CHECK-O3-NEXT: movq (%rdi), %rax 761; CHECK-O3-NEXT: movq %rax, %rcx 762; CHECK-O3-NEXT: orq %rsi, %rcx 763; CHECK-O3-NEXT: shrq $32, %rcx 764; CHECK-O3-NEXT: je .LBB35_1 765; CHECK-O3-NEXT: # %bb.2: 766; CHECK-O3-NEXT: cqto 767; CHECK-O3-NEXT: idivq %rsi 768; CHECK-O3-NEXT: retq 769; CHECK-O3-NEXT: .LBB35_1: 770; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 771; CHECK-O3-NEXT: xorl %edx, %edx 772; CHECK-O3-NEXT: divl %esi 773; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 774; CHECK-O3-NEXT: retq 775 %v = load atomic i64, i64* %p unordered, align 8 776 %ret = sdiv i64 %v, %v2 777 ret i64 %ret 778} 779 780define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) { 781; CHECK-O0-LABEL: load_fold_sdiv3: 782; CHECK-O0: # %bb.0: 783; CHECK-O0-NEXT: movq (%rdi), %rax 784; CHECK-O0-NEXT: cqto 785; CHECK-O0-NEXT: idivq (%rsi) 786; CHECK-O0-NEXT: retq 787; 788; CHECK-O3-LABEL: load_fold_sdiv3: 789; CHECK-O3: # %bb.0: 790; CHECK-O3-NEXT: movq (%rdi), %rax 791; CHECK-O3-NEXT: movq (%rsi), %rcx 792; CHECK-O3-NEXT: movq %rax, %rdx 793; CHECK-O3-NEXT: orq %rcx, %rdx 794; CHECK-O3-NEXT: shrq $32, %rdx 795; CHECK-O3-NEXT: je .LBB36_1 796; CHECK-O3-NEXT: # %bb.2: 797; CHECK-O3-NEXT: cqto 798; CHECK-O3-NEXT: idivq %rcx 799; CHECK-O3-NEXT: retq 800; CHECK-O3-NEXT: .LBB36_1: 801; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 802; CHECK-O3-NEXT: xorl %edx, %edx 803; CHECK-O3-NEXT: divl %ecx 804; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 805; CHECK-O3-NEXT: retq 806 %v = load atomic i64, i64* %p1 unordered, align 8 807 %v2 = load atomic i64, i64* %p2 unordered, align 8 808 %ret = sdiv i64 %v, %v2 809 ret i64 %ret 810} 811 812; Legal to fold (TODO) 813define i64 @load_fold_udiv1(i64* %p) { 814; CHECK-O0-LABEL: load_fold_udiv1: 815; CHECK-O0: # %bb.0: 816; CHECK-O0-NEXT: movq (%rdi), %rax 817; CHECK-O0-NEXT: movl $15, %ecx 818; CHECK-O0-NEXT: xorl %edx, %edx 819; CHECK-O0-NEXT: # kill: def $rdx killed $edx 820; CHECK-O0-NEXT: divq %rcx 821; CHECK-O0-NEXT: retq 822; 823; CHECK-O3-CUR-LABEL: load_fold_udiv1: 824; CHECK-O3-CUR: # %bb.0: 825; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx 826; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 827; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax 828; CHECK-O3-CUR-NEXT: shrq $3, %rax 829; CHECK-O3-CUR-NEXT: retq 830; 831; CHECK-O3-EX-LABEL: load_fold_udiv1: 832; CHECK-O3-EX: # %bb.0: 833; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 834; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax 835; CHECK-O3-EX-NEXT: shrq $3, %rax 836; CHECK-O3-EX-NEXT: retq 837 %v = load atomic i64, i64* %p unordered, align 8 838 %ret = udiv i64 %v, 15 839 ret i64 %ret 840} 841 842define i64 @load_fold_udiv2(i64* %p, i64 %v2) { 843; CHECK-O0-LABEL: load_fold_udiv2: 844; CHECK-O0: # %bb.0: 845; CHECK-O0-NEXT: movq (%rdi), %rax 846; CHECK-O0-NEXT: xorl %ecx, %ecx 847; CHECK-O0-NEXT: movl %ecx, %edx 848; CHECK-O0-NEXT: divq %rsi 849; CHECK-O0-NEXT: retq 850; 851; CHECK-O3-LABEL: load_fold_udiv2: 852; CHECK-O3: # %bb.0: 853; CHECK-O3-NEXT: movq (%rdi), %rax 854; CHECK-O3-NEXT: movq %rax, %rcx 855; CHECK-O3-NEXT: orq %rsi, %rcx 856; CHECK-O3-NEXT: shrq $32, %rcx 857; CHECK-O3-NEXT: je .LBB38_1 858; CHECK-O3-NEXT: # %bb.2: 859; CHECK-O3-NEXT: xorl %edx, %edx 860; CHECK-O3-NEXT: divq %rsi 861; CHECK-O3-NEXT: retq 862; CHECK-O3-NEXT: .LBB38_1: 863; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 864; CHECK-O3-NEXT: xorl %edx, %edx 865; CHECK-O3-NEXT: divl %esi 866; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 867; CHECK-O3-NEXT: retq 868 %v = load atomic i64, i64* %p unordered, align 8 869 %ret = udiv i64 %v, %v2 870 ret i64 %ret 871} 872 873define i64 @load_fold_udiv3(i64* %p1, i64* %p2) { 874; CHECK-O0-LABEL: load_fold_udiv3: 875; CHECK-O0: # %bb.0: 876; CHECK-O0-NEXT: movq (%rdi), %rax 877; CHECK-O0-NEXT: xorl %ecx, %ecx 878; CHECK-O0-NEXT: movl %ecx, %edx 879; CHECK-O0-NEXT: divq (%rsi) 880; CHECK-O0-NEXT: retq 881; 882; CHECK-O3-LABEL: load_fold_udiv3: 883; CHECK-O3: # %bb.0: 884; CHECK-O3-NEXT: movq (%rdi), %rax 885; CHECK-O3-NEXT: movq (%rsi), %rcx 886; CHECK-O3-NEXT: movq %rax, %rdx 887; CHECK-O3-NEXT: orq %rcx, %rdx 888; CHECK-O3-NEXT: shrq $32, %rdx 889; CHECK-O3-NEXT: je .LBB39_1 890; CHECK-O3-NEXT: # %bb.2: 891; CHECK-O3-NEXT: xorl %edx, %edx 892; CHECK-O3-NEXT: divq %rcx 893; CHECK-O3-NEXT: retq 894; CHECK-O3-NEXT: .LBB39_1: 895; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 896; CHECK-O3-NEXT: xorl %edx, %edx 897; CHECK-O3-NEXT: divl %ecx 898; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 899; CHECK-O3-NEXT: retq 900 %v = load atomic i64, i64* %p1 unordered, align 8 901 %v2 = load atomic i64, i64* %p2 unordered, align 8 902 %ret = udiv i64 %v, %v2 903 ret i64 %ret 904} 905 906; Legal to fold (TODO) 907define i64 @load_fold_srem1(i64* %p) { 908; CHECK-O0-LABEL: load_fold_srem1: 909; CHECK-O0: # %bb.0: 910; CHECK-O0-NEXT: movq (%rdi), %rax 911; CHECK-O0-NEXT: movl $15, %ecx 912; CHECK-O0-NEXT: cqto 913; CHECK-O0-NEXT: idivq %rcx 914; CHECK-O0-NEXT: movq %rdx, %rax 915; CHECK-O0-NEXT: retq 916; 917; CHECK-O3-LABEL: load_fold_srem1: 918; CHECK-O3: # %bb.0: 919; CHECK-O3-NEXT: movq (%rdi), %rcx 920; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 921; CHECK-O3-NEXT: movq %rcx, %rax 922; CHECK-O3-NEXT: imulq %rdx 923; CHECK-O3-NEXT: addq %rcx, %rdx 924; CHECK-O3-NEXT: movq %rdx, %rax 925; CHECK-O3-NEXT: shrq $63, %rax 926; CHECK-O3-NEXT: sarq $3, %rdx 927; CHECK-O3-NEXT: addq %rax, %rdx 928; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax 929; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 930; CHECK-O3-NEXT: subq %rax, %rcx 931; CHECK-O3-NEXT: movq %rcx, %rax 932; CHECK-O3-NEXT: retq 933 %v = load atomic i64, i64* %p unordered, align 8 934 %ret = srem i64 %v, 15 935 ret i64 %ret 936} 937 938; Legal, as expected 939define i64 @load_fold_srem2(i64* %p, i64 %v2) { 940; CHECK-O0-LABEL: load_fold_srem2: 941; CHECK-O0: # %bb.0: 942; CHECK-O0-NEXT: movq (%rdi), %rax 943; CHECK-O0-NEXT: cqto 944; CHECK-O0-NEXT: idivq %rsi 945; CHECK-O0-NEXT: movq %rdx, %rax 946; CHECK-O0-NEXT: retq 947; 948; CHECK-O3-LABEL: load_fold_srem2: 949; CHECK-O3: # %bb.0: 950; CHECK-O3-NEXT: movq (%rdi), %rax 951; CHECK-O3-NEXT: movq %rax, %rcx 952; CHECK-O3-NEXT: orq %rsi, %rcx 953; CHECK-O3-NEXT: shrq $32, %rcx 954; CHECK-O3-NEXT: je .LBB41_1 955; CHECK-O3-NEXT: # %bb.2: 956; CHECK-O3-NEXT: cqto 957; CHECK-O3-NEXT: idivq %rsi 958; CHECK-O3-NEXT: movq %rdx, %rax 959; CHECK-O3-NEXT: retq 960; CHECK-O3-NEXT: .LBB41_1: 961; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 962; CHECK-O3-NEXT: xorl %edx, %edx 963; CHECK-O3-NEXT: divl %esi 964; CHECK-O3-NEXT: movl %edx, %eax 965; CHECK-O3-NEXT: retq 966 %v = load atomic i64, i64* %p unordered, align 8 967 %ret = srem i64 %v, %v2 968 ret i64 %ret 969} 970 971define i64 @load_fold_srem3(i64* %p1, i64* %p2) { 972; CHECK-O0-LABEL: load_fold_srem3: 973; CHECK-O0: # %bb.0: 974; CHECK-O0-NEXT: movq (%rdi), %rax 975; CHECK-O0-NEXT: cqto 976; CHECK-O0-NEXT: idivq (%rsi) 977; CHECK-O0-NEXT: movq %rdx, %rax 978; CHECK-O0-NEXT: retq 979; 980; CHECK-O3-LABEL: load_fold_srem3: 981; CHECK-O3: # %bb.0: 982; CHECK-O3-NEXT: movq (%rdi), %rax 983; CHECK-O3-NEXT: movq (%rsi), %rcx 984; CHECK-O3-NEXT: movq %rax, %rdx 985; CHECK-O3-NEXT: orq %rcx, %rdx 986; CHECK-O3-NEXT: shrq $32, %rdx 987; CHECK-O3-NEXT: je .LBB42_1 988; CHECK-O3-NEXT: # %bb.2: 989; CHECK-O3-NEXT: cqto 990; CHECK-O3-NEXT: idivq %rcx 991; CHECK-O3-NEXT: movq %rdx, %rax 992; CHECK-O3-NEXT: retq 993; CHECK-O3-NEXT: .LBB42_1: 994; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 995; CHECK-O3-NEXT: xorl %edx, %edx 996; CHECK-O3-NEXT: divl %ecx 997; CHECK-O3-NEXT: movl %edx, %eax 998; CHECK-O3-NEXT: retq 999 %v = load atomic i64, i64* %p1 unordered, align 8 1000 %v2 = load atomic i64, i64* %p2 unordered, align 8 1001 %ret = srem i64 %v, %v2 1002 ret i64 %ret 1003} 1004 1005; Legal to fold (TODO) 1006define i64 @load_fold_urem1(i64* %p) { 1007; CHECK-O0-LABEL: load_fold_urem1: 1008; CHECK-O0: # %bb.0: 1009; CHECK-O0-NEXT: movq (%rdi), %rax 1010; CHECK-O0-NEXT: movl $15, %ecx 1011; CHECK-O0-NEXT: xorl %edx, %edx 1012; CHECK-O0-NEXT: # kill: def $rdx killed $edx 1013; CHECK-O0-NEXT: divq %rcx 1014; CHECK-O0-NEXT: movq %rdx, %rax 1015; CHECK-O0-NEXT: retq 1016; 1017; CHECK-O3-LABEL: load_fold_urem1: 1018; CHECK-O3: # %bb.0: 1019; CHECK-O3-NEXT: movq (%rdi), %rax 1020; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1021; CHECK-O3-NEXT: movq %rax, %rdx 1022; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rcx 1023; CHECK-O3-NEXT: shrq $3, %rcx 1024; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rcx 1025; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx 1026; CHECK-O3-NEXT: subq %rcx, %rax 1027; CHECK-O3-NEXT: retq 1028 %v = load atomic i64, i64* %p unordered, align 8 1029 %ret = urem i64 %v, 15 1030 ret i64 %ret 1031} 1032 1033; Legal, as expected 1034define i64 @load_fold_urem2(i64* %p, i64 %v2) { 1035; CHECK-O0-LABEL: load_fold_urem2: 1036; CHECK-O0: # %bb.0: 1037; CHECK-O0-NEXT: movq (%rdi), %rax 1038; CHECK-O0-NEXT: xorl %ecx, %ecx 1039; CHECK-O0-NEXT: movl %ecx, %edx 1040; CHECK-O0-NEXT: divq %rsi 1041; CHECK-O0-NEXT: movq %rdx, %rax 1042; CHECK-O0-NEXT: retq 1043; 1044; CHECK-O3-LABEL: load_fold_urem2: 1045; CHECK-O3: # %bb.0: 1046; CHECK-O3-NEXT: movq (%rdi), %rax 1047; CHECK-O3-NEXT: movq %rax, %rcx 1048; CHECK-O3-NEXT: orq %rsi, %rcx 1049; CHECK-O3-NEXT: shrq $32, %rcx 1050; CHECK-O3-NEXT: je .LBB44_1 1051; CHECK-O3-NEXT: # %bb.2: 1052; CHECK-O3-NEXT: xorl %edx, %edx 1053; CHECK-O3-NEXT: divq %rsi 1054; CHECK-O3-NEXT: movq %rdx, %rax 1055; CHECK-O3-NEXT: retq 1056; CHECK-O3-NEXT: .LBB44_1: 1057; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1058; CHECK-O3-NEXT: xorl %edx, %edx 1059; CHECK-O3-NEXT: divl %esi 1060; CHECK-O3-NEXT: movl %edx, %eax 1061; CHECK-O3-NEXT: retq 1062 %v = load atomic i64, i64* %p unordered, align 8 1063 %ret = urem i64 %v, %v2 1064 ret i64 %ret 1065} 1066 1067define i64 @load_fold_urem3(i64* %p1, i64* %p2) { 1068; CHECK-O0-LABEL: load_fold_urem3: 1069; CHECK-O0: # %bb.0: 1070; CHECK-O0-NEXT: movq (%rdi), %rax 1071; CHECK-O0-NEXT: xorl %ecx, %ecx 1072; CHECK-O0-NEXT: movl %ecx, %edx 1073; CHECK-O0-NEXT: divq (%rsi) 1074; CHECK-O0-NEXT: movq %rdx, %rax 1075; CHECK-O0-NEXT: retq 1076; 1077; CHECK-O3-LABEL: load_fold_urem3: 1078; CHECK-O3: # %bb.0: 1079; CHECK-O3-NEXT: movq (%rdi), %rax 1080; CHECK-O3-NEXT: movq (%rsi), %rcx 1081; CHECK-O3-NEXT: movq %rax, %rdx 1082; CHECK-O3-NEXT: orq %rcx, %rdx 1083; CHECK-O3-NEXT: shrq $32, %rdx 1084; CHECK-O3-NEXT: je .LBB45_1 1085; CHECK-O3-NEXT: # %bb.2: 1086; CHECK-O3-NEXT: xorl %edx, %edx 1087; CHECK-O3-NEXT: divq %rcx 1088; CHECK-O3-NEXT: movq %rdx, %rax 1089; CHECK-O3-NEXT: retq 1090; CHECK-O3-NEXT: .LBB45_1: 1091; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1092; CHECK-O3-NEXT: xorl %edx, %edx 1093; CHECK-O3-NEXT: divl %ecx 1094; CHECK-O3-NEXT: movl %edx, %eax 1095; CHECK-O3-NEXT: retq 1096 %v = load atomic i64, i64* %p1 unordered, align 8 1097 %v2 = load atomic i64, i64* %p2 unordered, align 8 1098 %ret = urem i64 %v, %v2 1099 ret i64 %ret 1100} 1101 1102; Legal, as expected 1103define i64 @load_fold_shl1(i64* %p) { 1104; CHECK-LABEL: load_fold_shl1: 1105; CHECK: # %bb.0: 1106; CHECK-NEXT: movq (%rdi), %rax 1107; CHECK-NEXT: shlq $15, %rax 1108; CHECK-NEXT: retq 1109 %v = load atomic i64, i64* %p unordered, align 8 1110 %ret = shl i64 %v, 15 1111 ret i64 %ret 1112} 1113 1114define i64 @load_fold_shl2(i64* %p, i64 %v2) { 1115; CHECK-O0-LABEL: load_fold_shl2: 1116; CHECK-O0: # %bb.0: 1117; CHECK-O0-NEXT: movq %rsi, %rcx 1118; CHECK-O0-NEXT: movq (%rdi), %rax 1119; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1120; CHECK-O0-NEXT: shlq %cl, %rax 1121; CHECK-O0-NEXT: retq 1122; 1123; CHECK-O3-LABEL: load_fold_shl2: 1124; CHECK-O3: # %bb.0: 1125; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax 1126; CHECK-O3-NEXT: retq 1127 %v = load atomic i64, i64* %p unordered, align 8 1128 %ret = shl i64 %v, %v2 1129 ret i64 %ret 1130} 1131 1132define i64 @load_fold_shl3(i64* %p1, i64* %p2) { 1133; CHECK-O0-LABEL: load_fold_shl3: 1134; CHECK-O0: # %bb.0: 1135; CHECK-O0-NEXT: movq (%rdi), %rax 1136; CHECK-O0-NEXT: movq (%rsi), %rcx 1137; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1138; CHECK-O0-NEXT: shlq %cl, %rax 1139; CHECK-O0-NEXT: retq 1140; 1141; CHECK-O3-LABEL: load_fold_shl3: 1142; CHECK-O3: # %bb.0: 1143; CHECK-O3-NEXT: movq (%rsi), %rax 1144; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax 1145; CHECK-O3-NEXT: retq 1146 %v = load atomic i64, i64* %p1 unordered, align 8 1147 %v2 = load atomic i64, i64* %p2 unordered, align 8 1148 %ret = shl i64 %v, %v2 1149 ret i64 %ret 1150} 1151 1152; Legal, as expected 1153define i64 @load_fold_lshr1(i64* %p) { 1154; CHECK-LABEL: load_fold_lshr1: 1155; CHECK: # %bb.0: 1156; CHECK-NEXT: movq (%rdi), %rax 1157; CHECK-NEXT: shrq $15, %rax 1158; CHECK-NEXT: retq 1159 %v = load atomic i64, i64* %p unordered, align 8 1160 %ret = lshr i64 %v, 15 1161 ret i64 %ret 1162} 1163 1164define i64 @load_fold_lshr2(i64* %p, i64 %v2) { 1165; CHECK-O0-LABEL: load_fold_lshr2: 1166; CHECK-O0: # %bb.0: 1167; CHECK-O0-NEXT: movq %rsi, %rcx 1168; CHECK-O0-NEXT: movq (%rdi), %rax 1169; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1170; CHECK-O0-NEXT: shrq %cl, %rax 1171; CHECK-O0-NEXT: retq 1172; 1173; CHECK-O3-LABEL: load_fold_lshr2: 1174; CHECK-O3: # %bb.0: 1175; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax 1176; CHECK-O3-NEXT: retq 1177 %v = load atomic i64, i64* %p unordered, align 8 1178 %ret = lshr i64 %v, %v2 1179 ret i64 %ret 1180} 1181 1182define i64 @load_fold_lshr3(i64* %p1, i64* %p2) { 1183; CHECK-O0-LABEL: load_fold_lshr3: 1184; CHECK-O0: # %bb.0: 1185; CHECK-O0-NEXT: movq (%rdi), %rax 1186; CHECK-O0-NEXT: movq (%rsi), %rcx 1187; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1188; CHECK-O0-NEXT: shrq %cl, %rax 1189; CHECK-O0-NEXT: retq 1190; 1191; CHECK-O3-LABEL: load_fold_lshr3: 1192; CHECK-O3: # %bb.0: 1193; CHECK-O3-NEXT: movq (%rsi), %rax 1194; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax 1195; CHECK-O3-NEXT: retq 1196 %v = load atomic i64, i64* %p1 unordered, align 8 1197 %v2 = load atomic i64, i64* %p2 unordered, align 8 1198 %ret = lshr i64 %v, %v2 1199 ret i64 %ret 1200} 1201 1202; Legal, as expected 1203define i64 @load_fold_ashr1(i64* %p) { 1204; CHECK-LABEL: load_fold_ashr1: 1205; CHECK: # %bb.0: 1206; CHECK-NEXT: movq (%rdi), %rax 1207; CHECK-NEXT: sarq $15, %rax 1208; CHECK-NEXT: retq 1209 %v = load atomic i64, i64* %p unordered, align 8 1210 %ret = ashr i64 %v, 15 1211 ret i64 %ret 1212} 1213 1214define i64 @load_fold_ashr2(i64* %p, i64 %v2) { 1215; CHECK-O0-LABEL: load_fold_ashr2: 1216; CHECK-O0: # %bb.0: 1217; CHECK-O0-NEXT: movq %rsi, %rcx 1218; CHECK-O0-NEXT: movq (%rdi), %rax 1219; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1220; CHECK-O0-NEXT: sarq %cl, %rax 1221; CHECK-O0-NEXT: retq 1222; 1223; CHECK-O3-LABEL: load_fold_ashr2: 1224; CHECK-O3: # %bb.0: 1225; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax 1226; CHECK-O3-NEXT: retq 1227 %v = load atomic i64, i64* %p unordered, align 8 1228 %ret = ashr i64 %v, %v2 1229 ret i64 %ret 1230} 1231 1232define i64 @load_fold_ashr3(i64* %p1, i64* %p2) { 1233; CHECK-O0-LABEL: load_fold_ashr3: 1234; CHECK-O0: # %bb.0: 1235; CHECK-O0-NEXT: movq (%rdi), %rax 1236; CHECK-O0-NEXT: movq (%rsi), %rcx 1237; CHECK-O0-NEXT: # kill: def $cl killed $rcx 1238; CHECK-O0-NEXT: sarq %cl, %rax 1239; CHECK-O0-NEXT: retq 1240; 1241; CHECK-O3-LABEL: load_fold_ashr3: 1242; CHECK-O3: # %bb.0: 1243; CHECK-O3-NEXT: movq (%rsi), %rax 1244; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax 1245; CHECK-O3-NEXT: retq 1246 %v = load atomic i64, i64* %p1 unordered, align 8 1247 %v2 = load atomic i64, i64* %p2 unordered, align 8 1248 %ret = ashr i64 %v, %v2 1249 ret i64 %ret 1250} 1251 1252; Legal, as expected 1253define i64 @load_fold_and1(i64* %p) { 1254; CHECK-O0-LABEL: load_fold_and1: 1255; CHECK-O0: # %bb.0: 1256; CHECK-O0-NEXT: movq (%rdi), %rax 1257; CHECK-O0-NEXT: andq $15, %rax 1258; CHECK-O0-NEXT: retq 1259; 1260; CHECK-O3-LABEL: load_fold_and1: 1261; CHECK-O3: # %bb.0: 1262; CHECK-O3-NEXT: movq (%rdi), %rax 1263; CHECK-O3-NEXT: andl $15, %eax 1264; CHECK-O3-NEXT: retq 1265 %v = load atomic i64, i64* %p unordered, align 8 1266 %ret = and i64 %v, 15 1267 ret i64 %ret 1268} 1269 1270define i64 @load_fold_and2(i64* %p, i64 %v2) { 1271; CHECK-LABEL: load_fold_and2: 1272; CHECK: # %bb.0: 1273; CHECK-NEXT: movq %rsi, %rax 1274; CHECK-NEXT: andq (%rdi), %rax 1275; CHECK-NEXT: retq 1276 %v = load atomic i64, i64* %p unordered, align 8 1277 %ret = and i64 %v, %v2 1278 ret i64 %ret 1279} 1280 1281define i64 @load_fold_and3(i64* %p1, i64* %p2) { 1282; CHECK-O0-LABEL: load_fold_and3: 1283; CHECK-O0: # %bb.0: 1284; CHECK-O0-NEXT: movq (%rdi), %rax 1285; CHECK-O0-NEXT: andq (%rsi), %rax 1286; CHECK-O0-NEXT: retq 1287; 1288; CHECK-O3-CUR-LABEL: load_fold_and3: 1289; CHECK-O3-CUR: # %bb.0: 1290; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1291; CHECK-O3-CUR-NEXT: andq (%rdi), %rax 1292; CHECK-O3-CUR-NEXT: retq 1293; 1294; CHECK-O3-EX-LABEL: load_fold_and3: 1295; CHECK-O3-EX: # %bb.0: 1296; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1297; CHECK-O3-EX-NEXT: andq (%rsi), %rax 1298; CHECK-O3-EX-NEXT: retq 1299 %v = load atomic i64, i64* %p1 unordered, align 8 1300 %v2 = load atomic i64, i64* %p2 unordered, align 8 1301 %ret = and i64 %v, %v2 1302 ret i64 %ret 1303} 1304 1305; Legal, as expected 1306define i64 @load_fold_or1(i64* %p) { 1307; CHECK-LABEL: load_fold_or1: 1308; CHECK: # %bb.0: 1309; CHECK-NEXT: movq (%rdi), %rax 1310; CHECK-NEXT: orq $15, %rax 1311; CHECK-NEXT: retq 1312 %v = load atomic i64, i64* %p unordered, align 8 1313 %ret = or i64 %v, 15 1314 ret i64 %ret 1315} 1316 1317define i64 @load_fold_or2(i64* %p, i64 %v2) { 1318; CHECK-LABEL: load_fold_or2: 1319; CHECK: # %bb.0: 1320; CHECK-NEXT: movq %rsi, %rax 1321; CHECK-NEXT: orq (%rdi), %rax 1322; CHECK-NEXT: retq 1323 %v = load atomic i64, i64* %p unordered, align 8 1324 %ret = or i64 %v, %v2 1325 ret i64 %ret 1326} 1327 1328define i64 @load_fold_or3(i64* %p1, i64* %p2) { 1329; CHECK-O0-LABEL: load_fold_or3: 1330; CHECK-O0: # %bb.0: 1331; CHECK-O0-NEXT: movq (%rdi), %rax 1332; CHECK-O0-NEXT: orq (%rsi), %rax 1333; CHECK-O0-NEXT: retq 1334; 1335; CHECK-O3-CUR-LABEL: load_fold_or3: 1336; CHECK-O3-CUR: # %bb.0: 1337; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1338; CHECK-O3-CUR-NEXT: orq (%rdi), %rax 1339; CHECK-O3-CUR-NEXT: retq 1340; 1341; CHECK-O3-EX-LABEL: load_fold_or3: 1342; CHECK-O3-EX: # %bb.0: 1343; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1344; CHECK-O3-EX-NEXT: orq (%rsi), %rax 1345; CHECK-O3-EX-NEXT: retq 1346 %v = load atomic i64, i64* %p1 unordered, align 8 1347 %v2 = load atomic i64, i64* %p2 unordered, align 8 1348 %ret = or i64 %v, %v2 1349 ret i64 %ret 1350} 1351 1352; Legal, as expected 1353define i64 @load_fold_xor1(i64* %p) { 1354; CHECK-LABEL: load_fold_xor1: 1355; CHECK: # %bb.0: 1356; CHECK-NEXT: movq (%rdi), %rax 1357; CHECK-NEXT: xorq $15, %rax 1358; CHECK-NEXT: retq 1359 %v = load atomic i64, i64* %p unordered, align 8 1360 %ret = xor i64 %v, 15 1361 ret i64 %ret 1362} 1363 1364define i64 @load_fold_xor2(i64* %p, i64 %v2) { 1365; CHECK-LABEL: load_fold_xor2: 1366; CHECK: # %bb.0: 1367; CHECK-NEXT: movq %rsi, %rax 1368; CHECK-NEXT: xorq (%rdi), %rax 1369; CHECK-NEXT: retq 1370 %v = load atomic i64, i64* %p unordered, align 8 1371 %ret = xor i64 %v, %v2 1372 ret i64 %ret 1373} 1374 1375define i64 @load_fold_xor3(i64* %p1, i64* %p2) { 1376; CHECK-O0-LABEL: load_fold_xor3: 1377; CHECK-O0: # %bb.0: 1378; CHECK-O0-NEXT: movq (%rdi), %rax 1379; CHECK-O0-NEXT: xorq (%rsi), %rax 1380; CHECK-O0-NEXT: retq 1381; 1382; CHECK-O3-CUR-LABEL: load_fold_xor3: 1383; CHECK-O3-CUR: # %bb.0: 1384; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1385; CHECK-O3-CUR-NEXT: xorq (%rdi), %rax 1386; CHECK-O3-CUR-NEXT: retq 1387; 1388; CHECK-O3-EX-LABEL: load_fold_xor3: 1389; CHECK-O3-EX: # %bb.0: 1390; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1391; CHECK-O3-EX-NEXT: xorq (%rsi), %rax 1392; CHECK-O3-EX-NEXT: retq 1393 %v = load atomic i64, i64* %p1 unordered, align 8 1394 %v2 = load atomic i64, i64* %p2 unordered, align 8 1395 %ret = xor i64 %v, %v2 1396 ret i64 %ret 1397} 1398 1399define i1 @load_fold_icmp1(i64* %p) { 1400; CHECK-O0-LABEL: load_fold_icmp1: 1401; CHECK-O0: # %bb.0: 1402; CHECK-O0-NEXT: movq (%rdi), %rax 1403; CHECK-O0-NEXT: subq $15, %rax 1404; CHECK-O0-NEXT: sete %al 1405; CHECK-O0-NEXT: retq 1406; 1407; CHECK-O3-LABEL: load_fold_icmp1: 1408; CHECK-O3: # %bb.0: 1409; CHECK-O3-NEXT: cmpq $15, (%rdi) 1410; CHECK-O3-NEXT: sete %al 1411; CHECK-O3-NEXT: retq 1412 %v = load atomic i64, i64* %p unordered, align 8 1413 %ret = icmp eq i64 %v, 15 1414 ret i1 %ret 1415} 1416 1417define i1 @load_fold_icmp2(i64* %p, i64 %v2) { 1418; CHECK-O0-LABEL: load_fold_icmp2: 1419; CHECK-O0: # %bb.0: 1420; CHECK-O0-NEXT: movq (%rdi), %rax 1421; CHECK-O0-NEXT: subq %rsi, %rax 1422; CHECK-O0-NEXT: sete %al 1423; CHECK-O0-NEXT: retq 1424; 1425; CHECK-O3-LABEL: load_fold_icmp2: 1426; CHECK-O3: # %bb.0: 1427; CHECK-O3-NEXT: cmpq %rsi, (%rdi) 1428; CHECK-O3-NEXT: sete %al 1429; CHECK-O3-NEXT: retq 1430 %v = load atomic i64, i64* %p unordered, align 8 1431 %ret = icmp eq i64 %v, %v2 1432 ret i1 %ret 1433} 1434 1435define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { 1436; CHECK-O0-LABEL: load_fold_icmp3: 1437; CHECK-O0: # %bb.0: 1438; CHECK-O0-NEXT: movq (%rdi), %rax 1439; CHECK-O0-NEXT: movq (%rsi), %rcx 1440; CHECK-O0-NEXT: subq %rcx, %rax 1441; CHECK-O0-NEXT: sete %al 1442; CHECK-O0-NEXT: retq 1443; 1444; CHECK-O3-CUR-LABEL: load_fold_icmp3: 1445; CHECK-O3-CUR: # %bb.0: 1446; CHECK-O3-CUR-NEXT: movq (%rsi), %rax 1447; CHECK-O3-CUR-NEXT: cmpq %rax, (%rdi) 1448; CHECK-O3-CUR-NEXT: sete %al 1449; CHECK-O3-CUR-NEXT: retq 1450; 1451; CHECK-O3-EX-LABEL: load_fold_icmp3: 1452; CHECK-O3-EX: # %bb.0: 1453; CHECK-O3-EX-NEXT: movq (%rdi), %rax 1454; CHECK-O3-EX-NEXT: cmpq (%rsi), %rax 1455; CHECK-O3-EX-NEXT: sete %al 1456; CHECK-O3-EX-NEXT: retq 1457 %v = load atomic i64, i64* %p1 unordered, align 8 1458 %v2 = load atomic i64, i64* %p2 unordered, align 8 1459 %ret = icmp eq i64 %v, %v2 1460 ret i1 %ret 1461} 1462 1463 1464;; The next batch of tests check for read-modify-write patterns 1465;; Legally, it's okay to use a memory operand here as long as the operand 1466;; is well aligned (i.e. doesn't cross a cache line boundary). We are 1467;; required not to narrow the store though! 1468 1469; Legal, as expected 1470define void @rmw_fold_add1(i64* %p, i64 %v) { 1471; CHECK-O0-LABEL: rmw_fold_add1: 1472; CHECK-O0: # %bb.0: 1473; CHECK-O0-NEXT: movq (%rdi), %rax 1474; CHECK-O0-NEXT: addq $15, %rax 1475; CHECK-O0-NEXT: movq %rax, (%rdi) 1476; CHECK-O0-NEXT: retq 1477; 1478; CHECK-O3-LABEL: rmw_fold_add1: 1479; CHECK-O3: # %bb.0: 1480; CHECK-O3-NEXT: addq $15, (%rdi) 1481; CHECK-O3-NEXT: retq 1482 %prev = load atomic i64, i64* %p unordered, align 8 1483 %val = add i64 %prev, 15 1484 store atomic i64 %val, i64* %p unordered, align 8 1485 ret void 1486} 1487 1488; Legal, as expected 1489define void @rmw_fold_add2(i64* %p, i64 %v) { 1490; CHECK-O0-LABEL: rmw_fold_add2: 1491; CHECK-O0: # %bb.0: 1492; CHECK-O0-NEXT: movq (%rdi), %rax 1493; CHECK-O0-NEXT: addq %rsi, %rax 1494; CHECK-O0-NEXT: movq %rax, (%rdi) 1495; CHECK-O0-NEXT: retq 1496; 1497; CHECK-O3-LABEL: rmw_fold_add2: 1498; CHECK-O3: # %bb.0: 1499; CHECK-O3-NEXT: addq %rsi, (%rdi) 1500; CHECK-O3-NEXT: retq 1501 %prev = load atomic i64, i64* %p unordered, align 8 1502 %val = add i64 %prev, %v 1503 store atomic i64 %val, i64* %p unordered, align 8 1504 ret void 1505} 1506 1507; Legal, as expected 1508define void @rmw_fold_sub1(i64* %p, i64 %v) { 1509; CHECK-O0-LABEL: rmw_fold_sub1: 1510; CHECK-O0: # %bb.0: 1511; CHECK-O0-NEXT: movq (%rdi), %rax 1512; CHECK-O0-NEXT: addq $-15, %rax 1513; CHECK-O0-NEXT: movq %rax, (%rdi) 1514; CHECK-O0-NEXT: retq 1515; 1516; CHECK-O3-LABEL: rmw_fold_sub1: 1517; CHECK-O3: # %bb.0: 1518; CHECK-O3-NEXT: addq $-15, (%rdi) 1519; CHECK-O3-NEXT: retq 1520 %prev = load atomic i64, i64* %p unordered, align 8 1521 %val = sub i64 %prev, 15 1522 store atomic i64 %val, i64* %p unordered, align 8 1523 ret void 1524} 1525 1526; Legal, as expected 1527define void @rmw_fold_sub2(i64* %p, i64 %v) { 1528; CHECK-O0-LABEL: rmw_fold_sub2: 1529; CHECK-O0: # %bb.0: 1530; CHECK-O0-NEXT: movq (%rdi), %rax 1531; CHECK-O0-NEXT: subq %rsi, %rax 1532; CHECK-O0-NEXT: movq %rax, (%rdi) 1533; CHECK-O0-NEXT: retq 1534; 1535; CHECK-O3-LABEL: rmw_fold_sub2: 1536; CHECK-O3: # %bb.0: 1537; CHECK-O3-NEXT: subq %rsi, (%rdi) 1538; CHECK-O3-NEXT: retq 1539 %prev = load atomic i64, i64* %p unordered, align 8 1540 %val = sub i64 %prev, %v 1541 store atomic i64 %val, i64* %p unordered, align 8 1542 ret void 1543} 1544 1545; Legal, as expected 1546define void @rmw_fold_mul1(i64* %p, i64 %v) { 1547; CHECK-LABEL: rmw_fold_mul1: 1548; CHECK: # %bb.0: 1549; CHECK-NEXT: movq (%rdi), %rax 1550; CHECK-NEXT: leaq (%rax,%rax,4), %rax 1551; CHECK-NEXT: leaq (%rax,%rax,2), %rax 1552; CHECK-NEXT: movq %rax, (%rdi) 1553; CHECK-NEXT: retq 1554 %prev = load atomic i64, i64* %p unordered, align 8 1555 %val = mul i64 %prev, 15 1556 store atomic i64 %val, i64* %p unordered, align 8 1557 ret void 1558} 1559 1560; Legal to fold (TODO) 1561define void @rmw_fold_mul2(i64* %p, i64 %v) { 1562; CHECK-O0-LABEL: rmw_fold_mul2: 1563; CHECK-O0: # %bb.0: 1564; CHECK-O0-NEXT: movq (%rdi), %rax 1565; CHECK-O0-NEXT: imulq %rsi, %rax 1566; CHECK-O0-NEXT: movq %rax, (%rdi) 1567; CHECK-O0-NEXT: retq 1568; 1569; CHECK-O3-LABEL: rmw_fold_mul2: 1570; CHECK-O3: # %bb.0: 1571; CHECK-O3-NEXT: imulq (%rdi), %rsi 1572; CHECK-O3-NEXT: movq %rsi, (%rdi) 1573; CHECK-O3-NEXT: retq 1574 %prev = load atomic i64, i64* %p unordered, align 8 1575 %val = mul i64 %prev, %v 1576 store atomic i64 %val, i64* %p unordered, align 8 1577 ret void 1578} 1579 1580; Legal, as expected 1581define void @rmw_fold_sdiv1(i64* %p, i64 %v) { 1582; CHECK-O0-LABEL: rmw_fold_sdiv1: 1583; CHECK-O0: # %bb.0: 1584; CHECK-O0-NEXT: movq (%rdi), %rcx 1585; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1586; CHECK-O0-NEXT: movq %rcx, %rax 1587; CHECK-O0-NEXT: imulq %rdx 1588; CHECK-O0-NEXT: movq %rdx, %rax 1589; CHECK-O0-NEXT: addq %rcx, %rax 1590; CHECK-O0-NEXT: movq %rax, %rcx 1591; CHECK-O0-NEXT: shrq $63, %rcx 1592; CHECK-O0-NEXT: sarq $3, %rax 1593; CHECK-O0-NEXT: addq %rcx, %rax 1594; CHECK-O0-NEXT: movq %rax, (%rdi) 1595; CHECK-O0-NEXT: retq 1596; 1597; CHECK-O3-LABEL: rmw_fold_sdiv1: 1598; CHECK-O3: # %bb.0: 1599; CHECK-O3-NEXT: movq (%rdi), %rcx 1600; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1601; CHECK-O3-NEXT: movq %rcx, %rax 1602; CHECK-O3-NEXT: imulq %rdx 1603; CHECK-O3-NEXT: addq %rcx, %rdx 1604; CHECK-O3-NEXT: movq %rdx, %rax 1605; CHECK-O3-NEXT: shrq $63, %rax 1606; CHECK-O3-NEXT: sarq $3, %rdx 1607; CHECK-O3-NEXT: addq %rax, %rdx 1608; CHECK-O3-NEXT: movq %rdx, (%rdi) 1609; CHECK-O3-NEXT: retq 1610 %prev = load atomic i64, i64* %p unordered, align 8 1611 %val = sdiv i64 %prev, 15 1612 store atomic i64 %val, i64* %p unordered, align 8 1613 ret void 1614} 1615 1616; Legal, as expected 1617define void @rmw_fold_sdiv2(i64* %p, i64 %v) { 1618; CHECK-O0-LABEL: rmw_fold_sdiv2: 1619; CHECK-O0: # %bb.0: 1620; CHECK-O0-NEXT: movq (%rdi), %rax 1621; CHECK-O0-NEXT: cqto 1622; CHECK-O0-NEXT: idivq %rsi 1623; CHECK-O0-NEXT: movq %rax, (%rdi) 1624; CHECK-O0-NEXT: retq 1625; 1626; CHECK-O3-LABEL: rmw_fold_sdiv2: 1627; CHECK-O3: # %bb.0: 1628; CHECK-O3-NEXT: movq (%rdi), %rax 1629; CHECK-O3-NEXT: movq %rax, %rcx 1630; CHECK-O3-NEXT: orq %rsi, %rcx 1631; CHECK-O3-NEXT: shrq $32, %rcx 1632; CHECK-O3-NEXT: je .LBB74_1 1633; CHECK-O3-NEXT: # %bb.2: 1634; CHECK-O3-NEXT: cqto 1635; CHECK-O3-NEXT: idivq %rsi 1636; CHECK-O3-NEXT: movq %rax, (%rdi) 1637; CHECK-O3-NEXT: retq 1638; CHECK-O3-NEXT: .LBB74_1: 1639; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1640; CHECK-O3-NEXT: xorl %edx, %edx 1641; CHECK-O3-NEXT: divl %esi 1642; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 1643; CHECK-O3-NEXT: movq %rax, (%rdi) 1644; CHECK-O3-NEXT: retq 1645 %prev = load atomic i64, i64* %p unordered, align 8 1646 %val = sdiv i64 %prev, %v 1647 store atomic i64 %val, i64* %p unordered, align 8 1648 ret void 1649} 1650 1651; Legal, as expected 1652define void @rmw_fold_udiv1(i64* %p, i64 %v) { 1653; CHECK-O0-LABEL: rmw_fold_udiv1: 1654; CHECK-O0: # %bb.0: 1655; CHECK-O0-NEXT: movq (%rdi), %rdx 1656; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1657; CHECK-O0-NEXT: mulxq %rax, %rax, %rax 1658; CHECK-O0-NEXT: shrq $3, %rax 1659; CHECK-O0-NEXT: movq %rax, (%rdi) 1660; CHECK-O0-NEXT: retq 1661; 1662; CHECK-O3-CUR-LABEL: rmw_fold_udiv1: 1663; CHECK-O3-CUR: # %bb.0: 1664; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx 1665; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1666; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax 1667; CHECK-O3-CUR-NEXT: shrq $3, %rax 1668; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1669; CHECK-O3-CUR-NEXT: retq 1670; 1671; CHECK-O3-EX-LABEL: rmw_fold_udiv1: 1672; CHECK-O3-EX: # %bb.0: 1673; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1674; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax 1675; CHECK-O3-EX-NEXT: shrq $3, %rax 1676; CHECK-O3-EX-NEXT: movq %rax, (%rdi) 1677; CHECK-O3-EX-NEXT: retq 1678 %prev = load atomic i64, i64* %p unordered, align 8 1679 %val = udiv i64 %prev, 15 1680 store atomic i64 %val, i64* %p unordered, align 8 1681 ret void 1682} 1683 1684; Legal, as expected 1685define void @rmw_fold_udiv2(i64* %p, i64 %v) { 1686; CHECK-O0-LABEL: rmw_fold_udiv2: 1687; CHECK-O0: # %bb.0: 1688; CHECK-O0-NEXT: movq (%rdi), %rax 1689; CHECK-O0-NEXT: xorl %ecx, %ecx 1690; CHECK-O0-NEXT: movl %ecx, %edx 1691; CHECK-O0-NEXT: divq %rsi 1692; CHECK-O0-NEXT: movq %rax, (%rdi) 1693; CHECK-O0-NEXT: retq 1694; 1695; CHECK-O3-LABEL: rmw_fold_udiv2: 1696; CHECK-O3: # %bb.0: 1697; CHECK-O3-NEXT: movq (%rdi), %rax 1698; CHECK-O3-NEXT: movq %rax, %rcx 1699; CHECK-O3-NEXT: orq %rsi, %rcx 1700; CHECK-O3-NEXT: shrq $32, %rcx 1701; CHECK-O3-NEXT: je .LBB76_1 1702; CHECK-O3-NEXT: # %bb.2: 1703; CHECK-O3-NEXT: xorl %edx, %edx 1704; CHECK-O3-NEXT: divq %rsi 1705; CHECK-O3-NEXT: movq %rax, (%rdi) 1706; CHECK-O3-NEXT: retq 1707; CHECK-O3-NEXT: .LBB76_1: 1708; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1709; CHECK-O3-NEXT: xorl %edx, %edx 1710; CHECK-O3-NEXT: divl %esi 1711; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax 1712; CHECK-O3-NEXT: movq %rax, (%rdi) 1713; CHECK-O3-NEXT: retq 1714 %prev = load atomic i64, i64* %p unordered, align 8 1715 %val = udiv i64 %prev, %v 1716 store atomic i64 %val, i64* %p unordered, align 8 1717 ret void 1718} 1719 1720; Legal, as expected 1721define void @rmw_fold_srem1(i64* %p, i64 %v) { 1722; CHECK-O0-LABEL: rmw_fold_srem1: 1723; CHECK-O0: # %bb.0: 1724; CHECK-O0-NEXT: movq (%rdi), %rax 1725; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1726; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1727; CHECK-O0-NEXT: imulq %rcx 1728; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 1729; CHECK-O0-NEXT: movq %rdx, %rcx 1730; CHECK-O0-NEXT: addq %rax, %rcx 1731; CHECK-O0-NEXT: movq %rcx, %rdx 1732; CHECK-O0-NEXT: shrq $63, %rdx 1733; CHECK-O0-NEXT: sarq $3, %rcx 1734; CHECK-O0-NEXT: addq %rdx, %rcx 1735; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx 1736; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx 1737; CHECK-O0-NEXT: subq %rcx, %rax 1738; CHECK-O0-NEXT: movq %rax, (%rdi) 1739; CHECK-O0-NEXT: retq 1740; 1741; CHECK-O3-LABEL: rmw_fold_srem1: 1742; CHECK-O3: # %bb.0: 1743; CHECK-O3-NEXT: movq (%rdi), %rcx 1744; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 1745; CHECK-O3-NEXT: movq %rcx, %rax 1746; CHECK-O3-NEXT: imulq %rdx 1747; CHECK-O3-NEXT: addq %rcx, %rdx 1748; CHECK-O3-NEXT: movq %rdx, %rax 1749; CHECK-O3-NEXT: shrq $63, %rax 1750; CHECK-O3-NEXT: sarq $3, %rdx 1751; CHECK-O3-NEXT: addq %rax, %rdx 1752; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax 1753; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 1754; CHECK-O3-NEXT: subq %rax, %rcx 1755; CHECK-O3-NEXT: movq %rcx, (%rdi) 1756; CHECK-O3-NEXT: retq 1757 %prev = load atomic i64, i64* %p unordered, align 8 1758 %val = srem i64 %prev, 15 1759 store atomic i64 %val, i64* %p unordered, align 8 1760 ret void 1761} 1762 1763; Legal, as expected 1764define void @rmw_fold_srem2(i64* %p, i64 %v) { 1765; CHECK-O0-LABEL: rmw_fold_srem2: 1766; CHECK-O0: # %bb.0: 1767; CHECK-O0-NEXT: movq (%rdi), %rax 1768; CHECK-O0-NEXT: cqto 1769; CHECK-O0-NEXT: idivq %rsi 1770; CHECK-O0-NEXT: movq %rdx, (%rdi) 1771; CHECK-O0-NEXT: retq 1772; 1773; CHECK-O3-LABEL: rmw_fold_srem2: 1774; CHECK-O3: # %bb.0: 1775; CHECK-O3-NEXT: movq (%rdi), %rax 1776; CHECK-O3-NEXT: movq %rax, %rcx 1777; CHECK-O3-NEXT: orq %rsi, %rcx 1778; CHECK-O3-NEXT: shrq $32, %rcx 1779; CHECK-O3-NEXT: je .LBB78_1 1780; CHECK-O3-NEXT: # %bb.2: 1781; CHECK-O3-NEXT: cqto 1782; CHECK-O3-NEXT: idivq %rsi 1783; CHECK-O3-NEXT: movq %rdx, (%rdi) 1784; CHECK-O3-NEXT: retq 1785; CHECK-O3-NEXT: .LBB78_1: 1786; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1787; CHECK-O3-NEXT: xorl %edx, %edx 1788; CHECK-O3-NEXT: divl %esi 1789; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx 1790; CHECK-O3-NEXT: movq %rdx, (%rdi) 1791; CHECK-O3-NEXT: retq 1792 %prev = load atomic i64, i64* %p unordered, align 8 1793 %val = srem i64 %prev, %v 1794 store atomic i64 %val, i64* %p unordered, align 8 1795 ret void 1796} 1797 1798; Legal, as expected 1799define void @rmw_fold_urem1(i64* %p, i64 %v) { 1800; CHECK-O0-LABEL: rmw_fold_urem1: 1801; CHECK-O0: # %bb.0: 1802; CHECK-O0-NEXT: movq (%rdi), %rax 1803; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 1804; CHECK-O0-NEXT: movq %rax, %rdx 1805; CHECK-O0-NEXT: mulxq %rcx, %rcx, %rcx 1806; CHECK-O0-NEXT: shrq $3, %rcx 1807; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx 1808; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx 1809; CHECK-O0-NEXT: subq %rcx, %rax 1810; CHECK-O0-NEXT: movq %rax, (%rdi) 1811; CHECK-O0-NEXT: retq 1812; 1813; CHECK-O3-LABEL: rmw_fold_urem1: 1814; CHECK-O3: # %bb.0: 1815; CHECK-O3-NEXT: movq (%rdi), %rdx 1816; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 1817; CHECK-O3-NEXT: mulxq %rax, %rax, %rax 1818; CHECK-O3-NEXT: shrq $3, %rax 1819; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax 1820; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax 1821; CHECK-O3-NEXT: subq %rax, %rdx 1822; CHECK-O3-NEXT: movq %rdx, (%rdi) 1823; CHECK-O3-NEXT: retq 1824 %prev = load atomic i64, i64* %p unordered, align 8 1825 %val = urem i64 %prev, 15 1826 store atomic i64 %val, i64* %p unordered, align 8 1827 ret void 1828} 1829 1830; Legal, as expected 1831define void @rmw_fold_urem2(i64* %p, i64 %v) { 1832; CHECK-O0-LABEL: rmw_fold_urem2: 1833; CHECK-O0: # %bb.0: 1834; CHECK-O0-NEXT: movq (%rdi), %rax 1835; CHECK-O0-NEXT: xorl %ecx, %ecx 1836; CHECK-O0-NEXT: movl %ecx, %edx 1837; CHECK-O0-NEXT: divq %rsi 1838; CHECK-O0-NEXT: movq %rdx, (%rdi) 1839; CHECK-O0-NEXT: retq 1840; 1841; CHECK-O3-LABEL: rmw_fold_urem2: 1842; CHECK-O3: # %bb.0: 1843; CHECK-O3-NEXT: movq (%rdi), %rax 1844; CHECK-O3-NEXT: movq %rax, %rcx 1845; CHECK-O3-NEXT: orq %rsi, %rcx 1846; CHECK-O3-NEXT: shrq $32, %rcx 1847; CHECK-O3-NEXT: je .LBB80_1 1848; CHECK-O3-NEXT: # %bb.2: 1849; CHECK-O3-NEXT: xorl %edx, %edx 1850; CHECK-O3-NEXT: divq %rsi 1851; CHECK-O3-NEXT: movq %rdx, (%rdi) 1852; CHECK-O3-NEXT: retq 1853; CHECK-O3-NEXT: .LBB80_1: 1854; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 1855; CHECK-O3-NEXT: xorl %edx, %edx 1856; CHECK-O3-NEXT: divl %esi 1857; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx 1858; CHECK-O3-NEXT: movq %rdx, (%rdi) 1859; CHECK-O3-NEXT: retq 1860 %prev = load atomic i64, i64* %p unordered, align 8 1861 %val = urem i64 %prev, %v 1862 store atomic i64 %val, i64* %p unordered, align 8 1863 ret void 1864} 1865 1866; Legal to fold (TODO) 1867define void @rmw_fold_shl1(i64* %p, i64 %v) { 1868; CHECK-O0-LABEL: rmw_fold_shl1: 1869; CHECK-O0: # %bb.0: 1870; CHECK-O0-NEXT: movq (%rdi), %rax 1871; CHECK-O0-NEXT: shlq $15, %rax 1872; CHECK-O0-NEXT: movq %rax, (%rdi) 1873; CHECK-O0-NEXT: retq 1874; 1875; CHECK-O3-CUR-LABEL: rmw_fold_shl1: 1876; CHECK-O3-CUR: # %bb.0: 1877; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 1878; CHECK-O3-CUR-NEXT: shlq $15, %rax 1879; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1880; CHECK-O3-CUR-NEXT: retq 1881; 1882; CHECK-O3-EX-LABEL: rmw_fold_shl1: 1883; CHECK-O3-EX: # %bb.0: 1884; CHECK-O3-EX-NEXT: shlq $15, (%rdi) 1885; CHECK-O3-EX-NEXT: retq 1886 %prev = load atomic i64, i64* %p unordered, align 8 1887 %val = shl i64 %prev, 15 1888 store atomic i64 %val, i64* %p unordered, align 8 1889 ret void 1890} 1891 1892; Legal to fold (TODO) 1893define void @rmw_fold_shl2(i64* %p, i64 %v) { 1894; CHECK-O0-LABEL: rmw_fold_shl2: 1895; CHECK-O0: # %bb.0: 1896; CHECK-O0-NEXT: movq (%rdi), %rax 1897; CHECK-O0-NEXT: movb %sil, %dl 1898; CHECK-O0-NEXT: # implicit-def: $rcx 1899; CHECK-O0-NEXT: movb %dl, %cl 1900; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax 1901; CHECK-O0-NEXT: movq %rax, (%rdi) 1902; CHECK-O0-NEXT: retq 1903; 1904; CHECK-O3-CUR-LABEL: rmw_fold_shl2: 1905; CHECK-O3-CUR: # %bb.0: 1906; CHECK-O3-CUR-NEXT: shlxq %rsi, (%rdi), %rax 1907; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1908; CHECK-O3-CUR-NEXT: retq 1909; 1910; CHECK-O3-EX-LABEL: rmw_fold_shl2: 1911; CHECK-O3-EX: # %bb.0: 1912; CHECK-O3-EX-NEXT: movq %rsi, %rcx 1913; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx 1914; CHECK-O3-EX-NEXT: shlq %cl, (%rdi) 1915; CHECK-O3-EX-NEXT: retq 1916 %prev = load atomic i64, i64* %p unordered, align 8 1917 %val = shl i64 %prev, %v 1918 store atomic i64 %val, i64* %p unordered, align 8 1919 ret void 1920} 1921 1922; Legal to fold (TODO) 1923define void @rmw_fold_lshr1(i64* %p, i64 %v) { 1924; CHECK-O0-LABEL: rmw_fold_lshr1: 1925; CHECK-O0: # %bb.0: 1926; CHECK-O0-NEXT: movq (%rdi), %rax 1927; CHECK-O0-NEXT: shrq $15, %rax 1928; CHECK-O0-NEXT: movq %rax, (%rdi) 1929; CHECK-O0-NEXT: retq 1930; 1931; CHECK-O3-CUR-LABEL: rmw_fold_lshr1: 1932; CHECK-O3-CUR: # %bb.0: 1933; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 1934; CHECK-O3-CUR-NEXT: shrq $15, %rax 1935; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1936; CHECK-O3-CUR-NEXT: retq 1937; 1938; CHECK-O3-EX-LABEL: rmw_fold_lshr1: 1939; CHECK-O3-EX: # %bb.0: 1940; CHECK-O3-EX-NEXT: shrq $15, (%rdi) 1941; CHECK-O3-EX-NEXT: retq 1942 %prev = load atomic i64, i64* %p unordered, align 8 1943 %val = lshr i64 %prev, 15 1944 store atomic i64 %val, i64* %p unordered, align 8 1945 ret void 1946} 1947 1948; Legal to fold (TODO) 1949define void @rmw_fold_lshr2(i64* %p, i64 %v) { 1950; CHECK-O0-LABEL: rmw_fold_lshr2: 1951; CHECK-O0: # %bb.0: 1952; CHECK-O0-NEXT: movq (%rdi), %rax 1953; CHECK-O0-NEXT: movb %sil, %dl 1954; CHECK-O0-NEXT: # implicit-def: $rcx 1955; CHECK-O0-NEXT: movb %dl, %cl 1956; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax 1957; CHECK-O0-NEXT: movq %rax, (%rdi) 1958; CHECK-O0-NEXT: retq 1959; 1960; CHECK-O3-CUR-LABEL: rmw_fold_lshr2: 1961; CHECK-O3-CUR: # %bb.0: 1962; CHECK-O3-CUR-NEXT: shrxq %rsi, (%rdi), %rax 1963; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1964; CHECK-O3-CUR-NEXT: retq 1965; 1966; CHECK-O3-EX-LABEL: rmw_fold_lshr2: 1967; CHECK-O3-EX: # %bb.0: 1968; CHECK-O3-EX-NEXT: movq %rsi, %rcx 1969; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx 1970; CHECK-O3-EX-NEXT: shrq %cl, (%rdi) 1971; CHECK-O3-EX-NEXT: retq 1972 %prev = load atomic i64, i64* %p unordered, align 8 1973 %val = lshr i64 %prev, %v 1974 store atomic i64 %val, i64* %p unordered, align 8 1975 ret void 1976} 1977 1978; Legal to fold (TODO) 1979define void @rmw_fold_ashr1(i64* %p, i64 %v) { 1980; CHECK-O0-LABEL: rmw_fold_ashr1: 1981; CHECK-O0: # %bb.0: 1982; CHECK-O0-NEXT: movq (%rdi), %rax 1983; CHECK-O0-NEXT: sarq $15, %rax 1984; CHECK-O0-NEXT: movq %rax, (%rdi) 1985; CHECK-O0-NEXT: retq 1986; 1987; CHECK-O3-CUR-LABEL: rmw_fold_ashr1: 1988; CHECK-O3-CUR: # %bb.0: 1989; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 1990; CHECK-O3-CUR-NEXT: sarq $15, %rax 1991; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 1992; CHECK-O3-CUR-NEXT: retq 1993; 1994; CHECK-O3-EX-LABEL: rmw_fold_ashr1: 1995; CHECK-O3-EX: # %bb.0: 1996; CHECK-O3-EX-NEXT: sarq $15, (%rdi) 1997; CHECK-O3-EX-NEXT: retq 1998 %prev = load atomic i64, i64* %p unordered, align 8 1999 %val = ashr i64 %prev, 15 2000 store atomic i64 %val, i64* %p unordered, align 8 2001 ret void 2002} 2003 2004; Legal to fold (TODO) 2005define void @rmw_fold_ashr2(i64* %p, i64 %v) { 2006; CHECK-O0-LABEL: rmw_fold_ashr2: 2007; CHECK-O0: # %bb.0: 2008; CHECK-O0-NEXT: movq (%rdi), %rax 2009; CHECK-O0-NEXT: movb %sil, %dl 2010; CHECK-O0-NEXT: # implicit-def: $rcx 2011; CHECK-O0-NEXT: movb %dl, %cl 2012; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax 2013; CHECK-O0-NEXT: movq %rax, (%rdi) 2014; CHECK-O0-NEXT: retq 2015; 2016; CHECK-O3-CUR-LABEL: rmw_fold_ashr2: 2017; CHECK-O3-CUR: # %bb.0: 2018; CHECK-O3-CUR-NEXT: sarxq %rsi, (%rdi), %rax 2019; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) 2020; CHECK-O3-CUR-NEXT: retq 2021; 2022; CHECK-O3-EX-LABEL: rmw_fold_ashr2: 2023; CHECK-O3-EX: # %bb.0: 2024; CHECK-O3-EX-NEXT: movq %rsi, %rcx 2025; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx 2026; CHECK-O3-EX-NEXT: sarq %cl, (%rdi) 2027; CHECK-O3-EX-NEXT: retq 2028 %prev = load atomic i64, i64* %p unordered, align 8 2029 %val = ashr i64 %prev, %v 2030 store atomic i64 %val, i64* %p unordered, align 8 2031 ret void 2032} 2033 2034; Legal, as expected 2035define void @rmw_fold_and1(i64* %p, i64 %v) { 2036; CHECK-O0-LABEL: rmw_fold_and1: 2037; CHECK-O0: # %bb.0: 2038; CHECK-O0-NEXT: movq (%rdi), %rax 2039; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2040; CHECK-O0-NEXT: andl $15, %eax 2041; CHECK-O0-NEXT: # kill: def $rax killed $eax 2042; CHECK-O0-NEXT: movq %rax, (%rdi) 2043; CHECK-O0-NEXT: retq 2044; 2045; CHECK-O3-LABEL: rmw_fold_and1: 2046; CHECK-O3: # %bb.0: 2047; CHECK-O3-NEXT: andq $15, (%rdi) 2048; CHECK-O3-NEXT: retq 2049 %prev = load atomic i64, i64* %p unordered, align 8 2050 %val = and i64 %prev, 15 2051 store atomic i64 %val, i64* %p unordered, align 8 2052 ret void 2053} 2054 2055; Legal, as expected 2056define void @rmw_fold_and2(i64* %p, i64 %v) { 2057; CHECK-O0-LABEL: rmw_fold_and2: 2058; CHECK-O0: # %bb.0: 2059; CHECK-O0-NEXT: movq (%rdi), %rax 2060; CHECK-O0-NEXT: andq %rsi, %rax 2061; CHECK-O0-NEXT: movq %rax, (%rdi) 2062; CHECK-O0-NEXT: retq 2063; 2064; CHECK-O3-LABEL: rmw_fold_and2: 2065; CHECK-O3: # %bb.0: 2066; CHECK-O3-NEXT: andq %rsi, (%rdi) 2067; CHECK-O3-NEXT: retq 2068 %prev = load atomic i64, i64* %p unordered, align 8 2069 %val = and i64 %prev, %v 2070 store atomic i64 %val, i64* %p unordered, align 8 2071 ret void 2072} 2073 2074; Legal, as expected 2075define void @rmw_fold_or1(i64* %p, i64 %v) { 2076; CHECK-O0-LABEL: rmw_fold_or1: 2077; CHECK-O0: # %bb.0: 2078; CHECK-O0-NEXT: movq (%rdi), %rax 2079; CHECK-O0-NEXT: orq $15, %rax 2080; CHECK-O0-NEXT: movq %rax, (%rdi) 2081; CHECK-O0-NEXT: retq 2082; 2083; CHECK-O3-LABEL: rmw_fold_or1: 2084; CHECK-O3: # %bb.0: 2085; CHECK-O3-NEXT: orq $15, (%rdi) 2086; CHECK-O3-NEXT: retq 2087 %prev = load atomic i64, i64* %p unordered, align 8 2088 %val = or i64 %prev, 15 2089 store atomic i64 %val, i64* %p unordered, align 8 2090 ret void 2091} 2092 2093; Legal, as expected 2094define void @rmw_fold_or2(i64* %p, i64 %v) { 2095; CHECK-O0-LABEL: rmw_fold_or2: 2096; CHECK-O0: # %bb.0: 2097; CHECK-O0-NEXT: movq (%rdi), %rax 2098; CHECK-O0-NEXT: orq %rsi, %rax 2099; CHECK-O0-NEXT: movq %rax, (%rdi) 2100; CHECK-O0-NEXT: retq 2101; 2102; CHECK-O3-LABEL: rmw_fold_or2: 2103; CHECK-O3: # %bb.0: 2104; CHECK-O3-NEXT: orq %rsi, (%rdi) 2105; CHECK-O3-NEXT: retq 2106 %prev = load atomic i64, i64* %p unordered, align 8 2107 %val = or i64 %prev, %v 2108 store atomic i64 %val, i64* %p unordered, align 8 2109 ret void 2110} 2111 2112; Legal, as expected 2113define void @rmw_fold_xor1(i64* %p, i64 %v) { 2114; CHECK-O0-LABEL: rmw_fold_xor1: 2115; CHECK-O0: # %bb.0: 2116; CHECK-O0-NEXT: movq (%rdi), %rax 2117; CHECK-O0-NEXT: xorq $15, %rax 2118; CHECK-O0-NEXT: movq %rax, (%rdi) 2119; CHECK-O0-NEXT: retq 2120; 2121; CHECK-O3-LABEL: rmw_fold_xor1: 2122; CHECK-O3: # %bb.0: 2123; CHECK-O3-NEXT: xorq $15, (%rdi) 2124; CHECK-O3-NEXT: retq 2125 %prev = load atomic i64, i64* %p unordered, align 8 2126 %val = xor i64 %prev, 15 2127 store atomic i64 %val, i64* %p unordered, align 8 2128 ret void 2129} 2130 2131; Legal, as expected 2132define void @rmw_fold_xor2(i64* %p, i64 %v) { 2133; CHECK-O0-LABEL: rmw_fold_xor2: 2134; CHECK-O0: # %bb.0: 2135; CHECK-O0-NEXT: movq (%rdi), %rax 2136; CHECK-O0-NEXT: xorq %rsi, %rax 2137; CHECK-O0-NEXT: movq %rax, (%rdi) 2138; CHECK-O0-NEXT: retq 2139; 2140; CHECK-O3-LABEL: rmw_fold_xor2: 2141; CHECK-O3: # %bb.0: 2142; CHECK-O3-NEXT: xorq %rsi, (%rdi) 2143; CHECK-O3-NEXT: retq 2144 %prev = load atomic i64, i64* %p unordered, align 8 2145 %val = xor i64 %prev, %v 2146 store atomic i64 %val, i64* %p unordered, align 8 2147 ret void 2148} 2149 2150;; The next batch test truncations, in combination w/operations which could 2151;; be folded against the memory operation. 2152 2153; Legal to reduce the load width (TODO) 2154define i32 @fold_trunc(i64* %p) { 2155; CHECK-LABEL: fold_trunc: 2156; CHECK: # %bb.0: 2157; CHECK-NEXT: movq (%rdi), %rax 2158; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 2159; CHECK-NEXT: retq 2160 %v = load atomic i64, i64* %p unordered, align 8 2161 %ret = trunc i64 %v to i32 2162 ret i32 %ret 2163} 2164 2165; Legal to reduce the load width and fold the load (TODO) 2166define i32 @fold_trunc_add(i64* %p, i32 %v2) { 2167; CHECK-O0-LABEL: fold_trunc_add: 2168; CHECK-O0: # %bb.0: 2169; CHECK-O0-NEXT: movq (%rdi), %rax 2170; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2171; CHECK-O0-NEXT: addl %esi, %eax 2172; CHECK-O0-NEXT: retq 2173; 2174; CHECK-O3-LABEL: fold_trunc_add: 2175; CHECK-O3: # %bb.0: 2176; CHECK-O3-NEXT: movq (%rdi), %rax 2177; CHECK-O3-NEXT: addl %esi, %eax 2178; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 2179; CHECK-O3-NEXT: retq 2180 %v = load atomic i64, i64* %p unordered, align 8 2181 %trunc = trunc i64 %v to i32 2182 %ret = add i32 %trunc, %v2 2183 ret i32 %ret 2184} 2185 2186; Legal to reduce the load width and fold the load (TODO) 2187define i32 @fold_trunc_and(i64* %p, i32 %v2) { 2188; CHECK-O0-LABEL: fold_trunc_and: 2189; CHECK-O0: # %bb.0: 2190; CHECK-O0-NEXT: movq (%rdi), %rax 2191; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2192; CHECK-O0-NEXT: andl %esi, %eax 2193; CHECK-O0-NEXT: retq 2194; 2195; CHECK-O3-LABEL: fold_trunc_and: 2196; CHECK-O3: # %bb.0: 2197; CHECK-O3-NEXT: movq (%rdi), %rax 2198; CHECK-O3-NEXT: andl %esi, %eax 2199; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 2200; CHECK-O3-NEXT: retq 2201 %v = load atomic i64, i64* %p unordered, align 8 2202 %trunc = trunc i64 %v to i32 2203 %ret = and i32 %trunc, %v2 2204 ret i32 %ret 2205} 2206 2207; Legal to reduce the load width and fold the load (TODO) 2208define i32 @fold_trunc_or(i64* %p, i32 %v2) { 2209; CHECK-O0-LABEL: fold_trunc_or: 2210; CHECK-O0: # %bb.0: 2211; CHECK-O0-NEXT: movq (%rdi), %rax 2212; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax 2213; CHECK-O0-NEXT: orl %esi, %eax 2214; CHECK-O0-NEXT: retq 2215; 2216; CHECK-O3-LABEL: fold_trunc_or: 2217; CHECK-O3: # %bb.0: 2218; CHECK-O3-NEXT: movq (%rdi), %rax 2219; CHECK-O3-NEXT: orl %esi, %eax 2220; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax 2221; CHECK-O3-NEXT: retq 2222 %v = load atomic i64, i64* %p unordered, align 8 2223 %trunc = trunc i64 %v to i32 2224 %ret = or i32 %trunc, %v2 2225 ret i32 %ret 2226} 2227 2228; It's tempting to split the wide load into two smaller byte loads 2229; to reduce memory traffic, but this would be illegal for a atomic load 2230define i32 @split_load(i64* %p) { 2231; CHECK-O0-LABEL: split_load: 2232; CHECK-O0: # %bb.0: 2233; CHECK-O0-NEXT: movq (%rdi), %rcx 2234; CHECK-O0-NEXT: movb %cl, %al 2235; CHECK-O0-NEXT: shrq $32, %rcx 2236; CHECK-O0-NEXT: # kill: def $cl killed $cl killed $rcx 2237; CHECK-O0-NEXT: orb %cl, %al 2238; CHECK-O0-NEXT: movzbl %al, %eax 2239; CHECK-O0-NEXT: retq 2240; 2241; CHECK-O3-LABEL: split_load: 2242; CHECK-O3: # %bb.0: 2243; CHECK-O3-NEXT: movq (%rdi), %rax 2244; CHECK-O3-NEXT: movq %rax, %rcx 2245; CHECK-O3-NEXT: shrq $32, %rcx 2246; CHECK-O3-NEXT: orl %eax, %ecx 2247; CHECK-O3-NEXT: movzbl %cl, %eax 2248; CHECK-O3-NEXT: retq 2249 %v = load atomic i64, i64* %p unordered, align 8 2250 %b1 = trunc i64 %v to i8 2251 %v.shift = lshr i64 %v, 32 2252 %b2 = trunc i64 %v.shift to i8 2253 %or = or i8 %b1, %b2 2254 %ret = zext i8 %or to i32 2255 ret i32 %ret 2256} 2257 2258;; A collection of simple memory forwarding tests. Nothing particular 2259;; interesting semantic wise, just demonstrating obvious missed transforms. 2260 2261@Zero = constant i64 0 2262 2263; TODO: should return constant 2264define i64 @constant_folding(i64* %p) { 2265; CHECK-LABEL: constant_folding: 2266; CHECK: # %bb.0: 2267; CHECK-NEXT: movq (%rdi), %rax 2268; CHECK-NEXT: retq 2269 %v = load atomic i64, i64* %p unordered, align 8 2270 ret i64 %v 2271} 2272 2273; Legal to forward and fold (TODO) 2274define i64 @load_forwarding(i64* %p) { 2275; CHECK-LABEL: load_forwarding: 2276; CHECK: # %bb.0: 2277; CHECK-NEXT: movq (%rdi), %rax 2278; CHECK-NEXT: orq (%rdi), %rax 2279; CHECK-NEXT: retq 2280 %v = load atomic i64, i64* %p unordered, align 8 2281 %v2 = load atomic i64, i64* %p unordered, align 8 2282 %ret = or i64 %v, %v2 2283 ret i64 %ret 2284} 2285 2286; Legal to forward (TODO) 2287define i64 @store_forward(i64* %p, i64 %v) { 2288; CHECK-LABEL: store_forward: 2289; CHECK: # %bb.0: 2290; CHECK-NEXT: movq %rsi, (%rdi) 2291; CHECK-NEXT: movq (%rdi), %rax 2292; CHECK-NEXT: retq 2293 store atomic i64 %v, i64* %p unordered, align 8 2294 %ret = load atomic i64, i64* %p unordered, align 8 2295 ret i64 %ret 2296} 2297 2298; Legal to kill (TODO) 2299define void @dead_writeback(i64* %p) { 2300; CHECK-LABEL: dead_writeback: 2301; CHECK: # %bb.0: 2302; CHECK-NEXT: movq (%rdi), %rax 2303; CHECK-NEXT: movq %rax, (%rdi) 2304; CHECK-NEXT: retq 2305 %v = load atomic i64, i64* %p unordered, align 8 2306 store atomic i64 %v, i64* %p unordered, align 8 2307 ret void 2308} 2309 2310; Legal to kill (TODO) 2311define void @dead_store(i64* %p, i64 %v) { 2312; CHECK-LABEL: dead_store: 2313; CHECK: # %bb.0: 2314; CHECK-NEXT: movq $0, (%rdi) 2315; CHECK-NEXT: movq %rsi, (%rdi) 2316; CHECK-NEXT: retq 2317 store atomic i64 0, i64* %p unordered, align 8 2318 store atomic i64 %v, i64* %p unordered, align 8 2319 ret void 2320} 2321 2322;; The next batch of tests ensure that we don't try to fold a load into a 2323;; use where the code motion implied for the load is prevented by a fence. 2324;; Note: We're checking that the load doesn't get moved below the fence as 2325;; part of folding, but is technically legal to lift the add above the fence. 2326;; If that were to happen, please rewrite the test to ensure load movement 2327;; isn't violated. 2328 2329define i64 @nofold_fence(i64* %p) { 2330; CHECK-LABEL: nofold_fence: 2331; CHECK: # %bb.0: 2332; CHECK-NEXT: movq (%rdi), %rax 2333; CHECK-NEXT: mfence 2334; CHECK-NEXT: addq $15, %rax 2335; CHECK-NEXT: retq 2336 %v = load atomic i64, i64* %p unordered, align 8 2337 fence seq_cst 2338 %ret = add i64 %v, 15 2339 ret i64 %ret 2340} 2341 2342define i64 @nofold_fence_acquire(i64* %p) { 2343; CHECK-LABEL: nofold_fence_acquire: 2344; CHECK: # %bb.0: 2345; CHECK-NEXT: movq (%rdi), %rax 2346; CHECK-NEXT: #MEMBARRIER 2347; CHECK-NEXT: addq $15, %rax 2348; CHECK-NEXT: retq 2349 %v = load atomic i64, i64* %p unordered, align 8 2350 fence acquire 2351 %ret = add i64 %v, 15 2352 ret i64 %ret 2353} 2354 2355 2356define i64 @nofold_stfence(i64* %p) { 2357; CHECK-LABEL: nofold_stfence: 2358; CHECK: # %bb.0: 2359; CHECK-NEXT: movq (%rdi), %rax 2360; CHECK-NEXT: #MEMBARRIER 2361; CHECK-NEXT: addq $15, %rax 2362; CHECK-NEXT: retq 2363 %v = load atomic i64, i64* %p unordered, align 8 2364 fence syncscope("singlethread") seq_cst 2365 %ret = add i64 %v, 15 2366 ret i64 %ret 2367} 2368 2369;; Next, test how well we can fold invariant loads. 2370 2371@Constant = external dso_local constant i64 2372 2373define i64 @fold_constant(i64 %arg) { 2374; CHECK-O0-LABEL: fold_constant: 2375; CHECK-O0: # %bb.0: 2376; CHECK-O0-NEXT: movq %rdi, %rax 2377; CHECK-O0-NEXT: addq Constant, %rax 2378; CHECK-O0-NEXT: retq 2379; 2380; CHECK-O3-LABEL: fold_constant: 2381; CHECK-O3: # %bb.0: 2382; CHECK-O3-NEXT: movq %rdi, %rax 2383; CHECK-O3-NEXT: addq Constant(%rip), %rax 2384; CHECK-O3-NEXT: retq 2385 %v = load atomic i64, i64* @Constant unordered, align 8 2386 %ret = add i64 %v, %arg 2387 ret i64 %ret 2388} 2389 2390define i64 @fold_constant_clobber(i64* %p, i64 %arg) { 2391; CHECK-O0-LABEL: fold_constant_clobber: 2392; CHECK-O0: # %bb.0: 2393; CHECK-O0-NEXT: movq Constant(%rip), %rax 2394; CHECK-O0-NEXT: movq $5, (%rdi) 2395; CHECK-O0-NEXT: addq %rsi, %rax 2396; CHECK-O0-NEXT: retq 2397; 2398; CHECK-O3-CUR-LABEL: fold_constant_clobber: 2399; CHECK-O3-CUR: # %bb.0: 2400; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax 2401; CHECK-O3-CUR-NEXT: movq $5, (%rdi) 2402; CHECK-O3-CUR-NEXT: addq %rsi, %rax 2403; CHECK-O3-CUR-NEXT: retq 2404; 2405; CHECK-O3-EX-LABEL: fold_constant_clobber: 2406; CHECK-O3-EX: # %bb.0: 2407; CHECK-O3-EX-NEXT: movq %rsi, %rax 2408; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax 2409; CHECK-O3-EX-NEXT: movq $5, (%rdi) 2410; CHECK-O3-EX-NEXT: retq 2411 %v = load atomic i64, i64* @Constant unordered, align 8 2412 store i64 5, i64* %p 2413 %ret = add i64 %v, %arg 2414 ret i64 %ret 2415} 2416 2417define i64 @fold_constant_fence(i64 %arg) { 2418; CHECK-O0-LABEL: fold_constant_fence: 2419; CHECK-O0: # %bb.0: 2420; CHECK-O0-NEXT: movq Constant(%rip), %rax 2421; CHECK-O0-NEXT: mfence 2422; CHECK-O0-NEXT: addq %rdi, %rax 2423; CHECK-O0-NEXT: retq 2424; 2425; CHECK-O3-CUR-LABEL: fold_constant_fence: 2426; CHECK-O3-CUR: # %bb.0: 2427; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax 2428; CHECK-O3-CUR-NEXT: mfence 2429; CHECK-O3-CUR-NEXT: addq %rdi, %rax 2430; CHECK-O3-CUR-NEXT: retq 2431; 2432; CHECK-O3-EX-LABEL: fold_constant_fence: 2433; CHECK-O3-EX: # %bb.0: 2434; CHECK-O3-EX-NEXT: movq %rdi, %rax 2435; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax 2436; CHECK-O3-EX-NEXT: mfence 2437; CHECK-O3-EX-NEXT: retq 2438 %v = load atomic i64, i64* @Constant unordered, align 8 2439 fence seq_cst 2440 %ret = add i64 %v, %arg 2441 ret i64 %ret 2442} 2443 2444define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { 2445; CHECK-O0-LABEL: fold_invariant_clobber: 2446; CHECK-O0: # %bb.0: 2447; CHECK-O0-NEXT: movq (%rdi), %rax 2448; CHECK-O0-NEXT: movq $5, (%rdi) 2449; CHECK-O0-NEXT: addq %rsi, %rax 2450; CHECK-O0-NEXT: retq 2451; 2452; CHECK-O3-CUR-LABEL: fold_invariant_clobber: 2453; CHECK-O3-CUR: # %bb.0: 2454; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 2455; CHECK-O3-CUR-NEXT: movq $5, (%rdi) 2456; CHECK-O3-CUR-NEXT: addq %rsi, %rax 2457; CHECK-O3-CUR-NEXT: retq 2458; 2459; CHECK-O3-EX-LABEL: fold_invariant_clobber: 2460; CHECK-O3-EX: # %bb.0: 2461; CHECK-O3-EX-NEXT: movq %rsi, %rax 2462; CHECK-O3-EX-NEXT: addq (%rdi), %rax 2463; CHECK-O3-EX-NEXT: movq $5, (%rdi) 2464; CHECK-O3-EX-NEXT: retq 2465 %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} 2466 store i64 5, i64* %p 2467 %ret = add i64 %v, %arg 2468 ret i64 %ret 2469} 2470 2471 2472define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { 2473; CHECK-O0-LABEL: fold_invariant_fence: 2474; CHECK-O0: # %bb.0: 2475; CHECK-O0-NEXT: movq (%rdi), %rax 2476; CHECK-O0-NEXT: mfence 2477; CHECK-O0-NEXT: addq %rsi, %rax 2478; CHECK-O0-NEXT: retq 2479; 2480; CHECK-O3-CUR-LABEL: fold_invariant_fence: 2481; CHECK-O3-CUR: # %bb.0: 2482; CHECK-O3-CUR-NEXT: movq (%rdi), %rax 2483; CHECK-O3-CUR-NEXT: mfence 2484; CHECK-O3-CUR-NEXT: addq %rsi, %rax 2485; CHECK-O3-CUR-NEXT: retq 2486; 2487; CHECK-O3-EX-LABEL: fold_invariant_fence: 2488; CHECK-O3-EX: # %bb.0: 2489; CHECK-O3-EX-NEXT: movq %rsi, %rax 2490; CHECK-O3-EX-NEXT: addq (%rdi), %rax 2491; CHECK-O3-EX-NEXT: mfence 2492; CHECK-O3-EX-NEXT: retq 2493 %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} 2494 fence seq_cst 2495 %ret = add i64 %v, %arg 2496 ret i64 %ret 2497} 2498 2499 2500; Exercise a few cases involving any extend idioms 2501 2502define i16 @load_i8_anyext_i16(i8* %ptr) { 2503; CHECK-O0-CUR-LABEL: load_i8_anyext_i16: 2504; CHECK-O0-CUR: # %bb.0: 2505; CHECK-O0-CUR-NEXT: movb (%rdi), %al 2506; CHECK-O0-CUR-NEXT: movzbl %al, %eax 2507; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax 2508; CHECK-O0-CUR-NEXT: retq 2509; 2510; CHECK-O3-CUR-LABEL: load_i8_anyext_i16: 2511; CHECK-O3-CUR: # %bb.0: 2512; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax 2513; CHECK-O3-CUR-NEXT: # kill: def $ax killed $ax killed $eax 2514; CHECK-O3-CUR-NEXT: retq 2515; 2516; CHECK-O0-EX-LABEL: load_i8_anyext_i16: 2517; CHECK-O0-EX: # %bb.0: 2518; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2519; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax 2520; CHECK-O0-EX-NEXT: # kill: def $ax killed $ax killed $eax 2521; CHECK-O0-EX-NEXT: retq 2522; 2523; CHECK-O3-EX-LABEL: load_i8_anyext_i16: 2524; CHECK-O3-EX: # %bb.0: 2525; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2526; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax 2527; CHECK-O3-EX-NEXT: # kill: def $ax killed $ax killed $eax 2528; CHECK-O3-EX-NEXT: retq 2529 %v = load atomic i8, i8* %ptr unordered, align 2 2530 %vec = insertelement <2 x i8> undef, i8 %v, i32 0 2531 %res = bitcast <2 x i8> %vec to i16 2532 ret i16 %res 2533} 2534 2535define i32 @load_i8_anyext_i32(i8* %ptr) { 2536; CHECK-O0-CUR-LABEL: load_i8_anyext_i32: 2537; CHECK-O0-CUR: # %bb.0: 2538; CHECK-O0-CUR-NEXT: movb (%rdi), %al 2539; CHECK-O0-CUR-NEXT: movzbl %al, %eax 2540; CHECK-O0-CUR-NEXT: retq 2541; 2542; CHECK-O3-CUR-LABEL: load_i8_anyext_i32: 2543; CHECK-O3-CUR: # %bb.0: 2544; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax 2545; CHECK-O3-CUR-NEXT: retq 2546; 2547; CHECK-O0-EX-LABEL: load_i8_anyext_i32: 2548; CHECK-O0-EX: # %bb.0: 2549; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2550; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax 2551; CHECK-O0-EX-NEXT: retq 2552; 2553; CHECK-O3-EX-LABEL: load_i8_anyext_i32: 2554; CHECK-O3-EX: # %bb.0: 2555; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0 2556; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax 2557; CHECK-O3-EX-NEXT: retq 2558 %v = load atomic i8, i8* %ptr unordered, align 4 2559 %vec = insertelement <4 x i8> undef, i8 %v, i32 0 2560 %res = bitcast <4 x i8> %vec to i32 2561 ret i32 %res 2562} 2563 2564define i32 @load_i16_anyext_i32(i16* %ptr) { 2565; CHECK-O0-CUR-LABEL: load_i16_anyext_i32: 2566; CHECK-O0-CUR: # %bb.0: 2567; CHECK-O0-CUR-NEXT: movw (%rdi), %cx 2568; CHECK-O0-CUR-NEXT: # implicit-def: $eax 2569; CHECK-O0-CUR-NEXT: movw %cx, %ax 2570; CHECK-O0-CUR-NEXT: retq 2571; 2572; CHECK-O3-CUR-LABEL: load_i16_anyext_i32: 2573; CHECK-O3-CUR: # %bb.0: 2574; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax 2575; CHECK-O3-CUR-NEXT: retq 2576; 2577; CHECK-O0-EX-LABEL: load_i16_anyext_i32: 2578; CHECK-O0-EX: # %bb.0: 2579; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2580; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax 2581; CHECK-O0-EX-NEXT: retq 2582; 2583; CHECK-O3-EX-LABEL: load_i16_anyext_i32: 2584; CHECK-O3-EX: # %bb.0: 2585; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2586; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax 2587; CHECK-O3-EX-NEXT: retq 2588 %v = load atomic i16, i16* %ptr unordered, align 4 2589 %vec = insertelement <2 x i16> undef, i16 %v, i64 0 2590 %res = bitcast <2 x i16> %vec to i32 2591 ret i32 %res 2592} 2593 2594define i64 @load_i16_anyext_i64(i16* %ptr) { 2595; CHECK-O0-CUR-LABEL: load_i16_anyext_i64: 2596; CHECK-O0-CUR: # %bb.0: 2597; CHECK-O0-CUR-NEXT: movw (%rdi), %cx 2598; CHECK-O0-CUR-NEXT: # implicit-def: $eax 2599; CHECK-O0-CUR-NEXT: movw %cx, %ax 2600; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0 2601; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax 2602; CHECK-O0-CUR-NEXT: retq 2603; 2604; CHECK-O3-CUR-LABEL: load_i16_anyext_i64: 2605; CHECK-O3-CUR: # %bb.0: 2606; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax 2607; CHECK-O3-CUR-NEXT: vmovd %eax, %xmm0 2608; CHECK-O3-CUR-NEXT: vmovq %xmm0, %rax 2609; CHECK-O3-CUR-NEXT: retq 2610; 2611; CHECK-O0-EX-LABEL: load_i16_anyext_i64: 2612; CHECK-O0-EX: # %bb.0: 2613; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2614; CHECK-O0-EX-NEXT: vmovq %xmm0, %rax 2615; CHECK-O0-EX-NEXT: retq 2616; 2617; CHECK-O3-EX-LABEL: load_i16_anyext_i64: 2618; CHECK-O3-EX: # %bb.0: 2619; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0 2620; CHECK-O3-EX-NEXT: vmovq %xmm0, %rax 2621; CHECK-O3-EX-NEXT: retq 2622 %v = load atomic i16, i16* %ptr unordered, align 8 2623 %vec = insertelement <4 x i16> undef, i16 %v, i64 0 2624 %res = bitcast <4 x i16> %vec to i64 2625 ret i64 %res 2626} 2627 2628; TODO: Would be legal to combine for legal atomic wider types 2629define i16 @load_combine(i8* %p) { 2630; CHECK-O0-LABEL: load_combine: 2631; CHECK-O0: # %bb.0: 2632; CHECK-O0-NEXT: movb (%rdi), %al 2633; CHECK-O0-NEXT: movb 1(%rdi), %cl 2634; CHECK-O0-NEXT: movzbl %al, %eax 2635; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax 2636; CHECK-O0-NEXT: movzbl %cl, %ecx 2637; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx 2638; CHECK-O0-NEXT: shlw $8, %cx 2639; CHECK-O0-NEXT: orw %cx, %ax 2640; CHECK-O0-NEXT: retq 2641; 2642; CHECK-O3-LABEL: load_combine: 2643; CHECK-O3: # %bb.0: 2644; CHECK-O3-NEXT: movzbl (%rdi), %ecx 2645; CHECK-O3-NEXT: movzbl 1(%rdi), %eax 2646; CHECK-O3-NEXT: shll $8, %eax 2647; CHECK-O3-NEXT: orl %ecx, %eax 2648; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax 2649; CHECK-O3-NEXT: retq 2650 %v1 = load atomic i8, i8* %p unordered, align 2 2651 %p2 = getelementptr i8, i8* %p, i64 1 2652 %v2 = load atomic i8, i8* %p2 unordered, align 1 2653 %v1.ext = zext i8 %v1 to i16 2654 %v2.ext = zext i8 %v2 to i16 2655 %v2.sht = shl i16 %v2.ext, 8 2656 %res = or i16 %v1.ext, %v2.sht 2657 ret i16 %res 2658} 2659 2660define i1 @fold_cmp_over_fence(i32* %p, i32 %v1) { 2661; CHECK-O0-LABEL: fold_cmp_over_fence: 2662; CHECK-O0: # %bb.0: 2663; CHECK-O0-NEXT: movl (%rdi), %eax 2664; CHECK-O0-NEXT: mfence 2665; CHECK-O0-NEXT: cmpl %eax, %esi 2666; CHECK-O0-NEXT: jne .LBB116_2 2667; CHECK-O0-NEXT: # %bb.1: # %taken 2668; CHECK-O0-NEXT: movb $1, %al 2669; CHECK-O0-NEXT: retq 2670; CHECK-O0-NEXT: .LBB116_2: # %untaken 2671; CHECK-O0-NEXT: xorl %eax, %eax 2672; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax 2673; CHECK-O0-NEXT: retq 2674; 2675; CHECK-O3-CUR-LABEL: fold_cmp_over_fence: 2676; CHECK-O3-CUR: # %bb.0: 2677; CHECK-O3-CUR-NEXT: movl (%rdi), %eax 2678; CHECK-O3-CUR-NEXT: mfence 2679; CHECK-O3-CUR-NEXT: cmpl %eax, %esi 2680; CHECK-O3-CUR-NEXT: jne .LBB116_2 2681; CHECK-O3-CUR-NEXT: # %bb.1: # %taken 2682; CHECK-O3-CUR-NEXT: movb $1, %al 2683; CHECK-O3-CUR-NEXT: retq 2684; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken 2685; CHECK-O3-CUR-NEXT: xorl %eax, %eax 2686; CHECK-O3-CUR-NEXT: retq 2687; 2688; CHECK-O3-EX-LABEL: fold_cmp_over_fence: 2689; CHECK-O3-EX: # %bb.0: 2690; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi 2691; CHECK-O3-EX-NEXT: mfence 2692; CHECK-O3-EX-NEXT: jne .LBB116_2 2693; CHECK-O3-EX-NEXT: # %bb.1: # %taken 2694; CHECK-O3-EX-NEXT: movb $1, %al 2695; CHECK-O3-EX-NEXT: retq 2696; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken 2697; CHECK-O3-EX-NEXT: xorl %eax, %eax 2698; CHECK-O3-EX-NEXT: retq 2699 %v2 = load atomic i32, i32* %p unordered, align 4 2700 fence seq_cst 2701 %cmp = icmp eq i32 %v1, %v2 2702 br i1 %cmp, label %taken, label %untaken 2703taken: 2704 ret i1 true 2705untaken: 2706 ret i1 false 2707} 2708