1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s 3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s 4 5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s 6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s 7 8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s 9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s 10 11define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 { 12; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16: 13; GFX9-SAFE: ; %bb.0: 14; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 16; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 17; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 18; 19; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_f16: 20; GFX9-NNAN: ; %bb.0: 21; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX9-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 23; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 24; 25; VI-SAFE-LABEL: test_fmax_legacy_ugt_f16: 26; VI-SAFE: ; %bb.0: 27; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 29; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 30; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 31; 32; VI-NNAN-LABEL: test_fmax_legacy_ugt_f16: 33; VI-NNAN: ; %bb.0: 34; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 36; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 37; 38; SI-SAFE-LABEL: test_fmax_legacy_ugt_f16: 39; SI-SAFE: ; %bb.0: 40; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 42; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 43; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 44; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 45; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v1, v0 46; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 47; 48; SI-NNAN-LABEL: test_fmax_legacy_ugt_f16: 49; SI-NNAN: ; %bb.0: 50; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 52; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 53; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 54; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 55; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v1 56; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 57 %cmp = fcmp ugt half %a, %b 58 %val = select i1 %cmp, half %a, half %b 59 ret half %val 60} 61 62define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 { 63; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 64; GFX9-SAFE: ; %bb.0: 65; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 67; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 68; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2 69; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 70; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 71; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 72; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 73; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v2, 16, v0 74; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 77; GFX9-NNAN: ; %bb.0: 78; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v1 80; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 81; 82; VI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 83; VI-SAFE: ; %bb.0: 84; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 86; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 87; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2 88; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 89; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 90; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 91; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 92; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 93; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 94; 95; VI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 96; VI-NNAN: ; %bb.0: 97; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; VI-NNAN-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 99; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 100; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2 101; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 102; 103; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 104; SI-SAFE: ; %bb.0: 105; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 107; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 108; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 109; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 110; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 111; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 112; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 113; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 114; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v2, v0 115; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v3, v1 116; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 117; 118; SI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 119; SI-NNAN: ; %bb.0: 120; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 122; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 123; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 124; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 125; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 126; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 127; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 128; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 129; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v2 130; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v3 131; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 132 %cmp = fcmp ugt <2 x half> %a, %b 133 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b 134 ret <2 x half> %val 135} 136 137define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 { 138; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 139; GFX9-SAFE: ; %bb.0: 140; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 142; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 143; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 144; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 145; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 146; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 147; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 148; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 149; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 150; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0 151; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 154; GFX9-NNAN: ; %bb.0: 155; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 157; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 158; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 159; 160; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 161; VI-SAFE: ; %bb.0: 162; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 164; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 165; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 166; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 167; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 168; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 169; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 170; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 171; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 172; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 173; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 174; 175; VI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 176; VI-NNAN: ; %bb.0: 177; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; VI-NNAN-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 179; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2 180; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3 181; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4 182; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 183; 184; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 185; SI-SAFE: ; %bb.0: 186; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 188; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 189; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 190; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 191; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 192; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 193; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 194; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 195; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 196; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 197; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 198; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 199; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v3, v0 200; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v4, v1 201; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v5, v2 202; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 203; 204; SI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 205; SI-NNAN: ; %bb.0: 206; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 208; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 209; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 210; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 211; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 212; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 213; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 214; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 215; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 216; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 217; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 218; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 219; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v3 220; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v4 221; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v5 222; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 223 %cmp = fcmp ugt <3 x half> %a, %b 224 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b 225 ret <3 x half> %val 226} 227 228define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 { 229; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 230; GFX9-SAFE: ; %bb.0: 231; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 233; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 234; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 235; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 236; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6 237; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 238; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 239; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 240; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 241; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 242; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 243; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 244; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 245; GFX9-SAFE-NEXT: v_and_b32_e32 v1, 0xffff, v1 246; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0 247; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v6, 16, v1 248; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 251; GFX9-NNAN: ; %bb.0: 252; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 254; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 255; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 256; 257; VI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 258; VI-SAFE: ; %bb.0: 259; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 261; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 262; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 263; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 264; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6 265; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 266; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 267; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 268; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 269; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 270; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 271; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 272; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 273; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 274; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6 275; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 276; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 277; 278; VI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 279; VI-NNAN: ; %bb.0: 280; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; VI-NNAN-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 282; VI-NNAN-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 283; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3 284; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2 285; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5 286; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4 287; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 288; 289; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 290; SI-SAFE: ; %bb.0: 291; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 292; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 293; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 294; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 295; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 296; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 297; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 298; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 299; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 300; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 301; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 302; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 303; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 304; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 305; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 306; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 307; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 308; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v4, v0 309; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v5, v1 310; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v6, v2 311; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v7, v3 312; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 313; 314; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 315; SI-NNAN: ; %bb.0: 316; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 317; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 318; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 319; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 320; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 321; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 322; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 323; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 324; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 325; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 326; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 327; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 328; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 329; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 330; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 331; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 332; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 333; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v4 334; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v5 335; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v6 336; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v7 337; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 338 %cmp = fcmp ugt <4 x half> %a, %b 339 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b 340 ret <4 x half> %val 341} 342 343define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 { 344; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 345; GFX9-SAFE: ; %bb.0: 346; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 348; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 349; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 350; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 351; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14 352; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 353; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 354; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 355; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12 356; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 357; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 358; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 359; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10 360; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 361; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8 362; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 363; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7 364; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 365; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6 366; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 367; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5 368; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 369; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4 370; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 371; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 372; GFX9-SAFE-NEXT: v_and_b32_e32 v1, 0xffff, v1 373; GFX9-SAFE-NEXT: v_and_b32_e32 v2, 0xffff, v2 374; GFX9-SAFE-NEXT: v_and_b32_e32 v3, 0xffff, v3 375; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v8, 16, v0 376; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v10, 16, v1 377; GFX9-SAFE-NEXT: v_lshl_or_b32 v2, v12, 16, v2 378; GFX9-SAFE-NEXT: v_lshl_or_b32 v3, v14, 16, v3 379; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 380; 381; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 382; GFX9-NNAN: ; %bb.0: 383; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v4 385; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v5 386; GFX9-NNAN-NEXT: v_pk_max_f16 v2, v2, v6 387; GFX9-NNAN-NEXT: v_pk_max_f16 v3, v3, v7 388; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 389; 390; VI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 391; VI-SAFE: ; %bb.0: 392; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 394; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 395; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 396; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 397; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14 398; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 399; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 400; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 401; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12 402; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 403; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 404; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 405; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10 406; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 407; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8 408; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 409; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7 410; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 411; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6 412; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 413; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5 414; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 415; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4 416; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 417; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8 418; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 419; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10 420; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 421; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12 422; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 423; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14 424; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 425; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 426; 427; VI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 428; VI-NNAN: ; %bb.0: 429; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 430; VI-NNAN-NEXT: v_max_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 431; VI-NNAN-NEXT: v_max_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 432; VI-NNAN-NEXT: v_max_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 433; VI-NNAN-NEXT: v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 434; VI-NNAN-NEXT: v_max_f16_e32 v3, v3, v7 435; VI-NNAN-NEXT: v_max_f16_e32 v2, v2, v6 436; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v5 437; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v4 438; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11 439; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10 440; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9 441; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8 442; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 443; 444; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 445; SI-SAFE: ; %bb.0: 446; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 447; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 448; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15 449; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 450; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 451; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 452; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 453; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 454; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 455; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 456; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 457; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 458; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 459; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 460; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 461; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 462; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 463; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 464; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 465; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 466; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 467; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 468; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 469; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 470; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 471; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 472; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 473; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 474; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 475; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 476; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 477; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 478; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 479; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v8, v0 480; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v9, v1 481; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v10, v2 482; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v11, v3 483; SI-SAFE-NEXT: v_max_legacy_f32_e32 v4, v12, v4 484; SI-SAFE-NEXT: v_max_legacy_f32_e32 v5, v13, v5 485; SI-SAFE-NEXT: v_max_legacy_f32_e32 v6, v14, v6 486; SI-SAFE-NEXT: v_max_legacy_f32_e32 v7, v15, v7 487; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 488; 489; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 490; SI-NNAN: ; %bb.0: 491; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15 493; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 494; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 495; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 496; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 497; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 498; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 499; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 500; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 501; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 502; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 503; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 504; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 505; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 506; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 507; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 508; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 509; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 510; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 511; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 512; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 513; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 514; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 515; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 516; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 517; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 518; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 519; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 520; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 521; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 522; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 523; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 524; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v8 525; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v9 526; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v10 527; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v11 528; SI-NNAN-NEXT: v_max_f32_e32 v4, v4, v12 529; SI-NNAN-NEXT: v_max_f32_e32 v5, v5, v13 530; SI-NNAN-NEXT: v_max_f32_e32 v6, v6, v14 531; SI-NNAN-NEXT: v_max_f32_e32 v7, v7, v15 532; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 533 %cmp = fcmp ugt <8 x half> %a, %b 534 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b 535 ret <8 x half> %val 536} 537 538attributes #0 = { nounwind } 539