1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 5 6define double @v_constained_fsub_f64_fpexcept_strict(double %x, double %y) #0 { 7; GCN-LABEL: v_constained_fsub_f64_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_constained_fsub_f64_fpexcept_strict: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 17; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 18; GFX10-NEXT: s_setpc_b64 s[30:31] 19 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 20 ret double %val 21} 22 23define double @v_constained_fsub_f64_fpexcept_ignore(double %x, double %y) #0 { 24; GCN-LABEL: v_constained_fsub_f64_fpexcept_ignore: 25; GCN: ; %bb.0: 26; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 28; GCN-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10-LABEL: v_constained_fsub_f64_fpexcept_ignore: 31; GFX10: ; %bb.0: 32; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 34; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 35; GFX10-NEXT: s_setpc_b64 s[30:31] 36 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 37 ret double %val 38} 39 40define double @v_constained_fsub_f64_fpexcept_maytrap(double %x, double %y) #0 { 41; GCN-LABEL: v_constained_fsub_f64_fpexcept_maytrap: 42; GCN: ; %bb.0: 43; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 45; GCN-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10-LABEL: v_constained_fsub_f64_fpexcept_maytrap: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 51; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 52; GFX10-NEXT: s_setpc_b64 s[30:31] 53 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 54 ret double %val 55} 56 57define <2 x double> @v_constained_fsub_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) #0 { 58; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_strict: 59; GCN: ; %bb.0: 60; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 62; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 63; GCN-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_strict: 66; GFX10: ; %bb.0: 67; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 69; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 70; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 71; GFX10-NEXT: s_setpc_b64 s[30:31] 72 %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 73 ret <2 x double> %val 74} 75 76define <2 x double> @v_constained_fsub_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) #0 { 77; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_ignore: 78; GCN: ; %bb.0: 79; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 81; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 82; GCN-NEXT: s_setpc_b64 s[30:31] 83; 84; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_ignore: 85; GFX10: ; %bb.0: 86; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 88; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 89; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 90; GFX10-NEXT: s_setpc_b64 s[30:31] 91 %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 92 ret <2 x double> %val 93} 94 95define <2 x double> @v_constained_fsub_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) #0 { 96; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap: 97; GCN: ; %bb.0: 98; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 100; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 101; GCN-NEXT: s_setpc_b64 s[30:31] 102; 103; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap: 104; GFX10: ; %bb.0: 105; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 107; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 108; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] 109; GFX10-NEXT: s_setpc_b64 s[30:31] 110 %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 111 ret <2 x double> %val 112} 113 114define <3 x double> @v_constained_fsub_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) #0 { 115; GCN-LABEL: v_constained_fsub_v3f64_fpexcept_strict: 116; GCN: ; %bb.0: 117; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GCN-NEXT: v_add_f64 v[0:1], v[0:1], -v[6:7] 119; GCN-NEXT: v_add_f64 v[2:3], v[2:3], -v[8:9] 120; GCN-NEXT: v_add_f64 v[4:5], v[4:5], -v[10:11] 121; GCN-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX10-LABEL: v_constained_fsub_v3f64_fpexcept_strict: 124; GFX10: ; %bb.0: 125; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 127; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[6:7] 128; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[8:9] 129; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[10:11] 130; GFX10-NEXT: s_setpc_b64 s[30:31] 131 %val = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 132 ret <3 x double> %val 133} 134 135define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 { 136; GCN-LABEL: s_constained_fsub_f64_fpexcept_strict: 137; GCN: ; %bb.0: 138; GCN-NEXT: v_mov_b32_e32 v0, s4 139; GCN-NEXT: v_mov_b32_e32 v1, s5 140; GCN-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1] 141; GCN-NEXT: ; return to shader part epilog 142; 143; GFX10-LABEL: s_constained_fsub_f64_fpexcept_strict: 144; GFX10: ; %bb.0: 145; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], -s[4:5] 146; GFX10-NEXT: ; return to shader part epilog 147 %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 148 %cast = bitcast double %val to <2 x float> 149 ret <2 x float> %cast 150} 151 152declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) #1 153declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1 154declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1 155 156attributes #0 = { strictfp } 157attributes #1 = { inaccessiblememonly nounwind willreturn } 158