1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s 5 6define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 { 7; GCN-LABEL: v_constained_fma_f16_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_fma_f16 v0, v0, v1, v2 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 17; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 18; GFX10-NEXT: s_setpc_b64 s[30:31] 19 %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 20 ret half %val 21} 22 23define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { 24; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 28; GFX9-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX8-LABEL: v_constained_fma_v2f16_fpexcept_strict: 31; GFX8: ; %bb.0: 32; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2 34; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 35; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 36; GFX8-NEXT: v_fma_f16 v3, v5, v4, v3 37; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v3 38; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 39; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 40; GFX8-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict: 43; GFX10: ; %bb.0: 44; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 46; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 47; GFX10-NEXT: s_setpc_b64 s[30:31] 48 %val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 49 ret <2 x half> %val 50} 51 52define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y, <3 x half> %z) #0 { 53; GFX9-LABEL: v_constained_fma_v3f16_fpexcept_strict: 54; GFX9: ; %bb.0: 55; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 57; GFX9-NEXT: v_fma_f16 v1, v1, v3, v5 58; GFX9-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX8-LABEL: v_constained_fma_v3f16_fpexcept_strict: 61; GFX8: ; %bb.0: 62; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v4 64; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v2 65; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v0 66; GFX8-NEXT: v_fma_f16 v6, v8, v7, v6 67; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v6 68; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 69; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 70; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 71; GFX8-NEXT: s_setpc_b64 s[30:31] 72; 73; GFX10-LABEL: v_constained_fma_v3f16_fpexcept_strict: 74; GFX10: ; %bb.0: 75; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 77; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 78; GFX10-NEXT: v_fma_f16 v1, v1, v3, v5 79; GFX10-NEXT: s_setpc_b64 s[30:31] 80 %val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 81 ret <3 x half> %val 82} 83 84define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y, <4 x half> %z) #0 { 85; GFX9-LABEL: v_constained_fma_v4f16_fpexcept_strict: 86; GFX9: ; %bb.0: 87; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v5 89; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v3 90; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v1 91; GFX9-NEXT: v_fma_f16 v6, v8, v7, v6 92; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4 93; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v2 94; GFX9-NEXT: v_lshrrev_b32_e32 v9, 16, v0 95; GFX9-NEXT: v_fma_f16 v1, v1, v3, v5 96; GFX9-NEXT: v_fma_f16 v0, v0, v2, v4 97; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 98; GFX9-NEXT: v_fma_f16 v7, v9, v8, v7 99; GFX9-NEXT: v_and_b32_e32 v0, v2, v0 100; GFX9-NEXT: v_and_b32_e32 v1, v2, v1 101; GFX9-NEXT: v_lshl_or_b32 v0, v7, 16, v0 102; GFX9-NEXT: v_lshl_or_b32 v1, v6, 16, v1 103; GFX9-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX8-LABEL: v_constained_fma_v4f16_fpexcept_strict: 106; GFX8: ; %bb.0: 107; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v5 109; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v3 110; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v1 111; GFX8-NEXT: v_fma_f16 v6, v8, v7, v6 112; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v4 113; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 114; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v0 115; GFX8-NEXT: v_fma_f16 v7, v9, v8, v7 116; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 117; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v7 118; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 119; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 120; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v6 121; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 122; GFX8-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX10-LABEL: v_constained_fma_v4f16_fpexcept_strict: 125; GFX10: ; %bb.0: 126; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 128; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v5 129; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v3 130; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 131; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v4 132; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 133; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v0 134; GFX10-NEXT: v_fmac_f16_e32 v4, v0, v2 135; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff 136; GFX10-NEXT: v_fmac_f16_e32 v5, v1, v3 137; GFX10-NEXT: v_fmac_f16_e32 v6, v8, v7 138; GFX10-NEXT: v_fmac_f16_e32 v9, v11, v10 139; GFX10-NEXT: v_and_b32_e32 v1, v0, v4 140; GFX10-NEXT: v_and_b32_e32 v2, v0, v5 141; GFX10-NEXT: v_lshl_or_b32 v0, v9, 16, v1 142; GFX10-NEXT: v_lshl_or_b32 v1, v6, 16, v2 143; GFX10-NEXT: s_setpc_b64 s[30:31] 144 %val = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 145 ret <4 x half> %val 146} 147 148define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z) #0 { 149; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg: 150; GCN: ; %bb.0: 151; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GCN-NEXT: v_fma_f16 v0, v0, v1, -v2 153; GCN-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fneg: 156; GFX10: ; %bb.0: 157; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 159; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2 160; GFX10-NEXT: s_setpc_b64 s[30:31] 161 %neg.z = fneg half %z 162 %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict") 163 ret half %val 164} 165 166define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, half %z) #0 { 167; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg: 168; GCN: ; %bb.0: 169; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GCN-NEXT: v_fma_f16 v0, -v0, -v1, v2 171; GCN-NEXT: s_setpc_b64 s[30:31] 172; 173; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg: 174; GFX10: ; %bb.0: 175; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 177; GFX10-NEXT: v_fma_f16 v0, -v0, -v1, v2 178; GFX10-NEXT: s_setpc_b64 s[30:31] 179 %neg.x = fneg half %x 180 %neg.y = fneg half %y 181 %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 182 ret half %val 183} 184 185define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, half %z) #0 { 186; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs: 187; GCN: ; %bb.0: 188; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GCN-NEXT: v_fma_f16 v0, |v0|, |v1|, v2 190; GCN-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX10-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs: 193; GFX10: ; %bb.0: 194; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 196; GFX10-NEXT: v_fma_f16 v0, |v0|, |v1|, v2 197; GFX10-NEXT: s_setpc_b64 s[30:31] 198 %neg.x = call half @llvm.fabs.f16(half %x) 199 %neg.y = call half @llvm.fabs.f16(half %y) 200 %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 201 ret half %val 202} 203 204define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { 205; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg: 206; GFX9: ; %bb.0: 207; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0] 209; GFX9-NEXT: s_setpc_b64 s[30:31] 210; 211; GFX8-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg: 212; GFX8: ; %bb.0: 213; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2 215; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 216; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 217; GFX8-NEXT: v_fma_f16 v3, -v5, -v4, v3 218; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v3 219; GFX8-NEXT: v_fma_f16 v0, -v0, -v1, v2 220; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 221; GFX8-NEXT: s_setpc_b64 s[30:31] 222; 223; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg: 224; GFX10: ; %bb.0: 225; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 226; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 227; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0] 228; GFX10-NEXT: s_setpc_b64 s[30:31] 229 %neg.x = fneg <2 x half> %x 230 %neg.y = fneg <2 x half> %y 231 %val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %neg.x, <2 x half> %neg.y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 232 ret <2 x half> %val 233} 234 235declare half @llvm.fabs.f16(half) #1 236declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) #1 237declare <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, metadata, metadata) #1 238declare <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half>, <3 x half>, <3 x half>, metadata, metadata) #1 239declare <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, metadata, metadata) #1 240 241attributes #0 = { strictfp } 242attributes #1 = { inaccessiblememonly nounwind willreturn } 243