1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5 6define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 { 7; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict: 14; GFX10PLUS: ; %bb.0: 15; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 17; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 18; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 19 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 20 ret float %val 21} 22 23define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 { 24; GCN-LABEL: v_constained_fsub_f32_fpexcept_ignore: 25; GCN: ; %bb.0: 26; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 28; GCN-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_ignore: 31; GFX10PLUS: ; %bb.0: 32; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 34; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 35; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 36 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 37 ret float %val 38} 39 40define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 { 41; GCN-LABEL: v_constained_fsub_f32_fpexcept_maytrap: 42; GCN: ; %bb.0: 43; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 45; GCN-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_maytrap: 48; GFX10PLUS: ; %bb.0: 49; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 51; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1 52; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 53 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 54 ret float %val 55} 56 57define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { 58; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 59; GCN: ; %bb.0: 60; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 62; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 63; GCN-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 66; GFX10: ; %bb.0: 67; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 69; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 70; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 71; GFX10-NEXT: s_setpc_b64 s[30:31] 72; 73; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_strict: 74; GFX11: ; %bb.0: 75; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 77; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 78; GFX11-NEXT: s_setpc_b64 s[30:31] 79 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 80 ret <2 x float> %val 81} 82 83define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { 84; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 85; GCN: ; %bb.0: 86; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 88; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 89; GCN-NEXT: s_setpc_b64 s[30:31] 90; 91; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 92; GFX10: ; %bb.0: 93; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 95; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 96; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 97; GFX10-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_ignore: 100; GFX11: ; %bb.0: 101; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 103; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 104; GFX11-NEXT: s_setpc_b64 s[30:31] 105 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 106 ret <2 x float> %val 107} 108 109define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { 110; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 111; GCN: ; %bb.0: 112; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 114; GCN-NEXT: v_sub_f32_e32 v1, v1, v3 115; GCN-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 118; GFX10: ; %bb.0: 119; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 121; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 122; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3 123; GFX10-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap: 126; GFX11: ; %bb.0: 127; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 129; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 130; GFX11-NEXT: s_setpc_b64 s[30:31] 131 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 132 ret <2 x float> %val 133} 134 135define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 { 136; GCN-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 137; GCN: ; %bb.0: 138; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GCN-NEXT: v_sub_f32_e32 v0, v0, v3 140; GCN-NEXT: v_sub_f32_e32 v1, v1, v4 141; GCN-NEXT: v_sub_f32_e32 v2, v2, v5 142; GCN-NEXT: s_setpc_b64 s[30:31] 143; 144; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 145; GFX10: ; %bb.0: 146; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 148; GFX10-NEXT: v_sub_f32_e32 v0, v0, v3 149; GFX10-NEXT: v_sub_f32_e32 v1, v1, v4 150; GFX10-NEXT: v_sub_f32_e32 v2, v2, v5 151; GFX10-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX11-LABEL: v_constained_fsub_v3f32_fpexcept_strict: 154; GFX11: ; %bb.0: 155; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 157; GFX11-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4 158; GFX11-NEXT: v_sub_f32_e32 v2, v2, v5 159; GFX11-NEXT: s_setpc_b64 s[30:31] 160 %val = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 161 ret <3 x float> %val 162} 163 164define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 { 165; GCN-LABEL: s_constained_fsub_f32_fpexcept_strict: 166; GCN: ; %bb.0: 167; GCN-NEXT: v_mov_b32_e32 v0, s3 168; GCN-NEXT: v_sub_f32_e32 v0, s2, v0 169; GCN-NEXT: ; return to shader part epilog 170; 171; GFX10PLUS-LABEL: s_constained_fsub_f32_fpexcept_strict: 172; GFX10PLUS: ; %bb.0: 173; GFX10PLUS-NEXT: v_sub_f32_e64 v0, s2, s3 174; GFX10PLUS-NEXT: ; return to shader part epilog 175 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 176 ret float %val 177} 178 179define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 { 180; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs: 181; GCN: ; %bb.0: 182; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GCN-NEXT: v_sub_f32_e64 v0, |v0|, v1 184; GCN-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs: 187; GFX10PLUS: ; %bb.0: 188; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 190; GFX10PLUS-NEXT: v_sub_f32_e64 v0, |v0|, v1 191; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 192 %fabs.x = call float @llvm.fabs.f32(float %x) 193 %val = call float @llvm.experimental.constrained.fsub.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 194 ret float %val 195} 196 197define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 { 198; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs: 199; GCN: ; %bb.0: 200; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GCN-NEXT: v_sub_f32_e64 v0, v0, |v1| 202; GCN-NEXT: s_setpc_b64 s[30:31] 203; 204; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs: 205; GFX10PLUS: ; %bb.0: 206; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 208; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v0, |v1| 209; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 210 %fabs.y = call float @llvm.fabs.f32(float %y) 211 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict") 212 ret float %val 213} 214 215define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 { 216; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs: 217; GCN: ; %bb.0: 218; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GCN-NEXT: v_sub_f32_e64 v0, -|v0|, v1 220; GCN-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs: 223; GFX10PLUS: ; %bb.0: 224; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 226; GFX10PLUS-NEXT: v_sub_f32_e64 v0, -|v0|, v1 227; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 228 %fabs.x = call float @llvm.fabs.f32(float %x) 229 %neg.fabs.x = fneg float %fabs.x 230 %val = call float @llvm.experimental.constrained.fsub.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 231 ret float %val 232} 233 234declare float @llvm.fabs.f32(float) #1 235declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1 236declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1 237declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1 238 239attributes #0 = { strictfp } 240attributes #1 = { inaccessiblememonly nounwind willreturn } 241