1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 5; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 6; FIXME: promotion not handled without f16 insts 7 8define half @v_constained_fsub_f16_fpexcept_strict(half %x, half %y) #0 { 9; GCN-LABEL: v_constained_fsub_f16_fpexcept_strict: 10; GCN: ; %bb.0: 11; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GCN-NEXT: v_sub_f16_e32 v0, v0, v1 13; GCN-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_strict: 16; GFX10PLUS: ; %bb.0: 17; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 19; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1 20; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 21 %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 22 ret half %val 23} 24 25define half @v_constained_fsub_f16_fpexcept_ignore(half %x, half %y) #0 { 26; GCN-LABEL: v_constained_fsub_f16_fpexcept_ignore: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GCN-NEXT: v_sub_f16_e32 v0, v0, v1 30; GCN-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_ignore: 33; GFX10PLUS: ; %bb.0: 34; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 36; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1 37; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 38 %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 39 ret half %val 40} 41 42define half @v_constained_fsub_f16_fpexcept_maytrap(half %x, half %y) #0 { 43; GCN-LABEL: v_constained_fsub_f16_fpexcept_maytrap: 44; GCN: ; %bb.0: 45; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GCN-NEXT: v_sub_f16_e32 v0, v0, v1 47; GCN-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_maytrap: 50; GFX10PLUS: ; %bb.0: 51; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 53; GFX10PLUS-NEXT: v_sub_f16_e32 v0, v0, v1 54; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 55 %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 56 ret half %val 57} 58 59define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) #0 { 60; GFX9-LABEL: v_constained_fsub_v2f16_fpexcept_strict: 61; GFX9: ; %bb.0: 62; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 64; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1 65; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 66; GFX9-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX8-LABEL: v_constained_fsub_v2f16_fpexcept_strict: 69; GFX8: ; %bb.0: 70; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 72; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1 73; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 74; GFX8-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_strict: 77; GFX10: ; %bb.0: 78; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 80; GFX10-NEXT: v_sub_f16_e32 v2, v0, v1 81; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 82; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v2 83; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 84; GFX10-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX11-LABEL: v_constained_fsub_v2f16_fpexcept_strict: 87; GFX11: ; %bb.0: 88; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 90; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 91; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 92; GFX11-NEXT: v_sub_f16_e32 v0, v0, v1 93; GFX11-NEXT: v_sub_f16_e32 v1, v3, v2 94; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 95; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 96; GFX11-NEXT: s_setpc_b64 s[30:31] 97 %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 98 ret <2 x half> %val 99} 100 101define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) #0 { 102; GFX9-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: 103; GFX9: ; %bb.0: 104; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 106; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1 107; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 108; GFX9-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX8-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: 111; GFX8: ; %bb.0: 112; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 114; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1 115; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 116; GFX8-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: 119; GFX10: ; %bb.0: 120; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 122; GFX10-NEXT: v_sub_f16_e32 v2, v0, v1 123; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 124; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v2 125; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 126; GFX10-NEXT: s_setpc_b64 s[30:31] 127; 128; GFX11-LABEL: v_constained_fsub_v2f16_fpexcept_ignore: 129; GFX11: ; %bb.0: 130; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 131; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 132; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 133; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 134; GFX11-NEXT: v_sub_f16_e32 v0, v0, v1 135; GFX11-NEXT: v_sub_f16_e32 v1, v3, v2 136; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 137; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 138; GFX11-NEXT: s_setpc_b64 s[30:31] 139 %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 140 ret <2 x half> %val 141} 142 143define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) #0 { 144; GFX9-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: 145; GFX9: ; %bb.0: 146; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 148; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1 149; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 150; GFX9-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX8-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: 153; GFX8: ; %bb.0: 154; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 156; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1 157; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 158; GFX8-NEXT: s_setpc_b64 s[30:31] 159; 160; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: 161; GFX10: ; %bb.0: 162; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 164; GFX10-NEXT: v_sub_f16_e32 v2, v0, v1 165; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 166; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v2 167; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 168; GFX10-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX11-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap: 171; GFX11: ; %bb.0: 172; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 174; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 175; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 176; GFX11-NEXT: v_sub_f16_e32 v0, v0, v1 177; GFX11-NEXT: v_sub_f16_e32 v1, v3, v2 178; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 179; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 180; GFX11-NEXT: s_setpc_b64 s[30:31] 181 %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 182 ret <2 x half> %val 183} 184 185define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) #0 { 186; GFX9-LABEL: v_constained_fsub_v3f16_fpexcept_strict: 187; GFX9: ; %bb.0: 188; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX9-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 190; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2 191; GFX9-NEXT: v_lshl_or_b32 v0, v4, 16, v0 192; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3 193; GFX9-NEXT: s_setpc_b64 s[30:31] 194; 195; GFX8-LABEL: v_constained_fsub_v3f16_fpexcept_strict: 196; GFX8: ; %bb.0: 197; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX8-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 199; GFX8-NEXT: v_sub_f16_e32 v0, v0, v2 200; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 201; GFX8-NEXT: v_sub_f16_e32 v1, v1, v3 202; GFX8-NEXT: s_setpc_b64 s[30:31] 203; 204; GFX10-LABEL: v_constained_fsub_v3f16_fpexcept_strict: 205; GFX10: ; %bb.0: 206; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 208; GFX10-NEXT: v_sub_f16_e32 v4, v0, v2 209; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 210; GFX10-NEXT: v_sub_f16_e32 v1, v1, v3 211; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v4 212; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 213; GFX10-NEXT: s_setpc_b64 s[30:31] 214; 215; GFX11-LABEL: v_constained_fsub_v3f16_fpexcept_strict: 216; GFX11: ; %bb.0: 217; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 219; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 220; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v0 221; GFX11-NEXT: v_sub_f16_e32 v0, v0, v2 222; GFX11-NEXT: v_sub_f16_e32 v1, v1, v3 223; GFX11-NEXT: v_sub_f16_e32 v2, v5, v4 224; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 225; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v0 226; GFX11-NEXT: s_setpc_b64 s[30:31] 227 %val = call <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half> %x, <3 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 228 ret <3 x half> %val 229} 230 231; FIXME: Scalarized 232define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) #0 { 233; GFX9-LABEL: v_constained_fsub_v4f16_fpexcept_strict: 234; GFX9: ; %bb.0: 235; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX9-NEXT: v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 237; GFX9-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 238; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3 239; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2 240; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0 241; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1 242; GFX9-NEXT: s_setpc_b64 s[30:31] 243; 244; GFX8-LABEL: v_constained_fsub_v4f16_fpexcept_strict: 245; GFX8: ; %bb.0: 246; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 247; GFX8-NEXT: v_sub_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 248; GFX8-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 249; GFX8-NEXT: v_sub_f16_e32 v1, v1, v3 250; GFX8-NEXT: v_sub_f16_e32 v0, v0, v2 251; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 252; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 253; GFX8-NEXT: s_setpc_b64 s[30:31] 254; 255; GFX10-LABEL: v_constained_fsub_v4f16_fpexcept_strict: 256; GFX10: ; %bb.0: 257; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 258; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 259; GFX10-NEXT: v_sub_f16_e32 v4, v0, v2 260; GFX10-NEXT: v_sub_f16_e32 v5, v1, v3 261; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 262; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 263; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v4 264; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v5 265; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 266; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 267; GFX10-NEXT: s_setpc_b64 s[30:31] 268; 269; GFX11-LABEL: v_constained_fsub_v4f16_fpexcept_strict: 270; GFX11: ; %bb.0: 271; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 273; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v3 274; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v1 275; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v2 276; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v0 277; GFX11-NEXT: v_sub_f16_e32 v0, v0, v2 278; GFX11-NEXT: v_sub_f16_e32 v1, v1, v3 279; GFX11-NEXT: v_sub_f16_e32 v2, v5, v4 280; GFX11-NEXT: v_sub_f16_e32 v3, v7, v6 281; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 282; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 283; GFX11-NEXT: v_lshl_or_b32 v0, v3, 16, v0 284; GFX11-NEXT: v_lshl_or_b32 v1, v2, 16, v1 285; GFX11-NEXT: s_setpc_b64 s[30:31] 286 %val = call <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 287 ret <4 x half> %val 288} 289 290define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half inreg %y) #0 { 291; GCN-LABEL: s_constained_fsub_f16_fpexcept_strict: 292; GCN: ; %bb.0: 293; GCN-NEXT: v_mov_b32_e32 v0, s3 294; GCN-NEXT: v_sub_f16_e32 v0, s2, v0 295; GCN-NEXT: ; return to shader part epilog 296; 297; GFX10PLUS-LABEL: s_constained_fsub_f16_fpexcept_strict: 298; GFX10PLUS: ; %bb.0: 299; GFX10PLUS-NEXT: v_sub_f16_e64 v0, s2, s3 300; GFX10PLUS-NEXT: ; return to shader part epilog 301 %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 302 ret half %val 303} 304 305define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 { 306; GFX9-LABEL: s_constained_fsub_v2f16_fpexcept_strict: 307; GFX9: ; %bb.0: 308; GFX9-NEXT: s_lshr_b32 s0, s3, 16 309; GFX9-NEXT: s_lshr_b32 s1, s2, 16 310; GFX9-NEXT: v_mov_b32_e32 v0, s0 311; GFX9-NEXT: v_mov_b32_e32 v1, s3 312; GFX9-NEXT: v_sub_f16_e32 v0, s1, v0 313; GFX9-NEXT: v_sub_f16_e32 v1, s2, v1 314; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 315; GFX9-NEXT: ; return to shader part epilog 316; 317; GFX8-LABEL: s_constained_fsub_v2f16_fpexcept_strict: 318; GFX8: ; %bb.0: 319; GFX8-NEXT: s_lshr_b32 s0, s3, 16 320; GFX8-NEXT: s_lshr_b32 s1, s2, 16 321; GFX8-NEXT: v_mov_b32_e32 v0, s0 322; GFX8-NEXT: v_mov_b32_e32 v1, s1 323; GFX8-NEXT: v_sub_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 324; GFX8-NEXT: v_mov_b32_e32 v1, s3 325; GFX8-NEXT: v_sub_f16_e32 v1, s2, v1 326; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 327; GFX8-NEXT: ; return to shader part epilog 328; 329; GFX10PLUS-LABEL: s_constained_fsub_v2f16_fpexcept_strict: 330; GFX10PLUS: ; %bb.0: 331; GFX10PLUS-NEXT: v_sub_f16_e64 v0, s2, s3 332; GFX10PLUS-NEXT: s_lshr_b32 s0, s3, 16 333; GFX10PLUS-NEXT: s_lshr_b32 s1, s2, 16 334; GFX10PLUS-NEXT: v_sub_f16_e64 v1, s1, s0 335; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 336; GFX10PLUS-NEXT: v_lshl_or_b32 v0, v1, 16, v0 337; GFX10PLUS-NEXT: ; return to shader part epilog 338 %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 339 ret <2 x half> %val 340} 341 342declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) #1 343declare <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1 344declare <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1 345declare <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1 346 347attributes #0 = { strictfp } 348attributes #1 = { inaccessiblememonly nounwind willreturn } 349