1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 4 5define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 { 6; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict: 7; GCN: ; %bb.0: 8; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 10; GCN-NEXT: s_setpc_b64 s[30:31] 11; 12; GFX10-LABEL: v_constained_fsub_f32_fpexcept_strict: 13; GFX10: ; %bb.0: 14; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 16; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1 17; GFX10-NEXT: s_setpc_b64 s[30:31] 18 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 19 ret float %val 20} 21 22define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 { 23; GCN-LABEL: v_constained_fsub_f32_fpexcept_ignore: 24; GCN: ; %bb.0: 25; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 27; GCN-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX10-LABEL: v_constained_fsub_f32_fpexcept_ignore: 30; GFX10: ; %bb.0: 31; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 33; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1 34; GFX10-NEXT: s_setpc_b64 s[30:31] 35 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 36 ret float %val 37} 38 39define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 { 40; GCN-LABEL: v_constained_fsub_f32_fpexcept_maytrap: 41; GCN: ; %bb.0: 42; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 44; GCN-NEXT: s_setpc_b64 s[30:31] 45; 46; GFX10-LABEL: v_constained_fsub_f32_fpexcept_maytrap: 47; GFX10: ; %bb.0: 48; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 50; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1 51; GFX10-NEXT: s_setpc_b64 s[30:31] 52 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 53 ret float %val 54} 55 56define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { 57; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 58; GCN: ; %bb.0: 59; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 61; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 62; GCN-NEXT: s_setpc_b64 s[30:31] 63; 64; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 65; GFX10: ; %bb.0: 66; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 68; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 69; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 70; GFX10-NEXT: s_setpc_b64 s[30:31] 71 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 72 ret <2 x float> %val 73} 74 75define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { 76; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 77; GCN: ; %bb.0: 78; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 80; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 81; GCN-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 84; GFX10: ; %bb.0: 85; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 87; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 88; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 89; GFX10-NEXT: s_setpc_b64 s[30:31] 90 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 91 ret <2 x float> %val 92} 93 94define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { 95; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 96; GCN: ; %bb.0: 97; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 99; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 100; GCN-NEXT: s_setpc_b64 s[30:31] 101; 102; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 103; GFX10: ; %bb.0: 104; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 106; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 107; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 108; GFX10-NEXT: s_setpc_b64 s[30:31] 109 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 110 ret <2 x float> %val 111} 112 113define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 { 114; GCN-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 115; GCN: ; %bb.0: 116; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GCN-NEXT: v_sub_f32_e32 v0, v0, v3 118; GCN-NEXT: v_sub_f32_e32 v1, v1, v4 119; GCN-NEXT: v_sub_f32_e32 v2, v2, v5 120; GCN-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 123; GFX10: ; %bb.0: 124; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 126; GFX10-NEXT: v_sub_f32_e32 v0, v0, v3 127; GFX10-NEXT: v_sub_f32_e32 v1, v1, v4 128; GFX10-NEXT: v_sub_f32_e32 v2, v2, v5 129; GFX10-NEXT: s_setpc_b64 s[30:31] 130 %val = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 131 ret <3 x float> %val 132} 133 134define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 { 135; GCN-LABEL: s_constained_fsub_f32_fpexcept_strict: 136; GCN: ; %bb.0: 137; GCN-NEXT: v_mov_b32_e32 v0, s3 138; GCN-NEXT: v_sub_f32_e32 v0, s2, v0 139; GCN-NEXT: ; return to shader part epilog 140; 141; GFX10-LABEL: s_constained_fsub_f32_fpexcept_strict: 142; GFX10: ; %bb.0: 143; GFX10-NEXT: v_sub_f32_e64 v0, s2, s3 144; GFX10-NEXT: ; return to shader part epilog 145 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 146 ret float %val 147} 148 149define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 { 150; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs: 151; GCN: ; %bb.0: 152; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GCN-NEXT: v_sub_f32_e64 v0, |v0|, v1 154; GCN-NEXT: s_setpc_b64 s[30:31] 155; 156; GFX10-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs: 157; GFX10: ; %bb.0: 158; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 160; GFX10-NEXT: v_sub_f32_e64 v0, |v0|, v1 161; GFX10-NEXT: s_setpc_b64 s[30:31] 162 %fabs.x = call float @llvm.fabs.f32(float %x) 163 %val = call float @llvm.experimental.constrained.fsub.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 164 ret float %val 165} 166 167define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 { 168; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs: 169; GCN: ; %bb.0: 170; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GCN-NEXT: v_sub_f32_e64 v0, v0, |v1| 172; GCN-NEXT: s_setpc_b64 s[30:31] 173; 174; GFX10-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs: 175; GFX10: ; %bb.0: 176; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 178; GFX10-NEXT: v_sub_f32_e64 v0, v0, |v1| 179; GFX10-NEXT: s_setpc_b64 s[30:31] 180 %fabs.y = call float @llvm.fabs.f32(float %y) 181 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict") 182 ret float %val 183} 184 185define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 { 186; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs: 187; GCN: ; %bb.0: 188; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GCN-NEXT: v_sub_f32_e64 v0, -|v0|, v1 190; GCN-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX10-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs: 193; GFX10: ; %bb.0: 194; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 196; GFX10-NEXT: v_sub_f32_e64 v0, -|v0|, v1 197; GFX10-NEXT: s_setpc_b64 s[30:31] 198 %fabs.x = call float @llvm.fabs.f32(float %x) 199 %neg.fabs.x = fneg float %fabs.x 200 %val = call float @llvm.experimental.constrained.fsub.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 201 ret float %val 202} 203 204declare float @llvm.fabs.f32(float) #1 205declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1 206declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1 207declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1 208 209attributes #0 = { strictfp } 210attributes #1 = { inaccessiblememonly nounwind willreturn } 211