1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}fcmp_f16_lt 5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 7; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 8; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 9; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 10; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 11; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 12; GCN: buffer_store_dword v[[R_I32]] 13; GCN: s_endpgm 14define void @fcmp_f16_lt( 15 i32 addrspace(1)* %r, 16 half addrspace(1)* %a, 17 half addrspace(1)* %b) { 18entry: 19 %a.val = load half, half addrspace(1)* %a 20 %b.val = load half, half addrspace(1)* %b 21 %r.val = fcmp olt half %a.val, %b.val 22 %r.val.sext = sext i1 %r.val to i32 23 store i32 %r.val.sext, i32 addrspace(1)* %r 24 ret void 25} 26 27; GCN-LABEL: {{^}}fcmp_f16_lt_abs: 28; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 29; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 30 31; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 32; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 33 34; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]| 35; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]| 36 37; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 38; GCN: buffer_store_dword v[[R_I32]] 39; GCN: s_endpgm 40define void @fcmp_f16_lt_abs( 41 i32 addrspace(1)* %r, 42 half addrspace(1)* %a, 43 half addrspace(1)* %b) { 44entry: 45 %a.val = load half, half addrspace(1)* %a 46 %b.val = load half, half addrspace(1)* %b 47 %a.abs = call half @llvm.fabs.f16(half %a.val) 48 %b.abs = call half @llvm.fabs.f16(half %b.val) 49 %r.val = fcmp olt half %a.abs, %b.abs 50 %r.val.sext = sext i1 %r.val to i32 51 store i32 %r.val.sext, i32 addrspace(1)* %r 52 ret void 53} 54 55; GCN-LABEL: {{^}}fcmp_f16_eq 56; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 57; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 58; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 59; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 60; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 61; VI: v_cmp_eq_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 62; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 63; GCN: buffer_store_dword v[[R_I32]] 64; GCN: s_endpgm 65define void @fcmp_f16_eq( 66 i32 addrspace(1)* %r, 67 half addrspace(1)* %a, 68 half addrspace(1)* %b) { 69entry: 70 %a.val = load half, half addrspace(1)* %a 71 %b.val = load half, half addrspace(1)* %b 72 %r.val = fcmp oeq half %a.val, %b.val 73 %r.val.sext = sext i1 %r.val to i32 74 store i32 %r.val.sext, i32 addrspace(1)* %r 75 ret void 76} 77 78; GCN-LABEL: {{^}}fcmp_f16_le 79; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 80; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 81; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 82; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 83; SI: v_cmp_le_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 84; VI: v_cmp_le_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 85; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 86; GCN: buffer_store_dword v[[R_I32]] 87; GCN: s_endpgm 88define void @fcmp_f16_le( 89 i32 addrspace(1)* %r, 90 half addrspace(1)* %a, 91 half addrspace(1)* %b) { 92entry: 93 %a.val = load half, half addrspace(1)* %a 94 %b.val = load half, half addrspace(1)* %b 95 %r.val = fcmp ole half %a.val, %b.val 96 %r.val.sext = sext i1 %r.val to i32 97 store i32 %r.val.sext, i32 addrspace(1)* %r 98 ret void 99} 100 101; GCN-LABEL: {{^}}fcmp_f16_gt 102; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 103; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 104; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 105; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 106; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 107; VI: v_cmp_gt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 108; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 109; GCN: buffer_store_dword v[[R_I32]] 110; GCN: s_endpgm 111define void @fcmp_f16_gt( 112 i32 addrspace(1)* %r, 113 half addrspace(1)* %a, 114 half addrspace(1)* %b) { 115entry: 116 %a.val = load half, half addrspace(1)* %a 117 %b.val = load half, half addrspace(1)* %b 118 %r.val = fcmp ogt half %a.val, %b.val 119 %r.val.sext = sext i1 %r.val to i32 120 store i32 %r.val.sext, i32 addrspace(1)* %r 121 ret void 122} 123 124; GCN-LABEL: {{^}}fcmp_f16_lg 125; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 126; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 127; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 128; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 129; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 130; VI: v_cmp_lg_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 131; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 132; GCN: buffer_store_dword v[[R_I32]] 133; GCN: s_endpgm 134define void @fcmp_f16_lg( 135 i32 addrspace(1)* %r, 136 half addrspace(1)* %a, 137 half addrspace(1)* %b) { 138entry: 139 %a.val = load half, half addrspace(1)* %a 140 %b.val = load half, half addrspace(1)* %b 141 %r.val = fcmp one half %a.val, %b.val 142 %r.val.sext = sext i1 %r.val to i32 143 store i32 %r.val.sext, i32 addrspace(1)* %r 144 ret void 145} 146 147; GCN-LABEL: {{^}}fcmp_f16_ge 148; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 149; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 150; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 151; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 152; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 153; VI: v_cmp_ge_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 154; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 155; GCN: buffer_store_dword v[[R_I32]] 156; GCN: s_endpgm 157define void @fcmp_f16_ge( 158 i32 addrspace(1)* %r, 159 half addrspace(1)* %a, 160 half addrspace(1)* %b) { 161entry: 162 %a.val = load half, half addrspace(1)* %a 163 %b.val = load half, half addrspace(1)* %b 164 %r.val = fcmp oge half %a.val, %b.val 165 %r.val.sext = sext i1 %r.val to i32 166 store i32 %r.val.sext, i32 addrspace(1)* %r 167 ret void 168} 169 170; GCN-LABEL: {{^}}fcmp_f16_o 171; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 172; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 173; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 174; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 175; SI: v_cmp_o_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 176; VI: v_cmp_o_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 177; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 178; GCN: buffer_store_dword v[[R_I32]] 179; GCN: s_endpgm 180define void @fcmp_f16_o( 181 i32 addrspace(1)* %r, 182 half addrspace(1)* %a, 183 half addrspace(1)* %b) { 184entry: 185 %a.val = load half, half addrspace(1)* %a 186 %b.val = load half, half addrspace(1)* %b 187 %r.val = fcmp ord half %a.val, %b.val 188 %r.val.sext = sext i1 %r.val to i32 189 store i32 %r.val.sext, i32 addrspace(1)* %r 190 ret void 191} 192 193; GCN-LABEL: {{^}}fcmp_f16_u 194; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 195; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 196; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 197; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 198; SI: v_cmp_u_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 199; VI: v_cmp_u_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 200; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 201; GCN: buffer_store_dword v[[R_I32]] 202; GCN: s_endpgm 203define void @fcmp_f16_u( 204 i32 addrspace(1)* %r, 205 half addrspace(1)* %a, 206 half addrspace(1)* %b) { 207entry: 208 %a.val = load half, half addrspace(1)* %a 209 %b.val = load half, half addrspace(1)* %b 210 %r.val = fcmp uno half %a.val, %b.val 211 %r.val.sext = sext i1 %r.val to i32 212 store i32 %r.val.sext, i32 addrspace(1)* %r 213 ret void 214} 215 216; GCN-LABEL: {{^}}fcmp_f16_nge 217; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 218; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 219; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 220; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 221; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 222; VI: v_cmp_nge_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 223; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 224; GCN: buffer_store_dword v[[R_I32]] 225; GCN: s_endpgm 226define void @fcmp_f16_nge( 227 i32 addrspace(1)* %r, 228 half addrspace(1)* %a, 229 half addrspace(1)* %b) { 230entry: 231 %a.val = load half, half addrspace(1)* %a 232 %b.val = load half, half addrspace(1)* %b 233 %r.val = fcmp ult half %a.val, %b.val 234 %r.val.sext = sext i1 %r.val to i32 235 store i32 %r.val.sext, i32 addrspace(1)* %r 236 ret void 237} 238 239; GCN-LABEL: {{^}}fcmp_f16_nlg 240; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 241; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 242; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 243; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 244; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 245; VI: v_cmp_nlg_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 246; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 247; GCN: buffer_store_dword v[[R_I32]] 248; GCN: s_endpgm 249define void @fcmp_f16_nlg( 250 i32 addrspace(1)* %r, 251 half addrspace(1)* %a, 252 half addrspace(1)* %b) { 253entry: 254 %a.val = load half, half addrspace(1)* %a 255 %b.val = load half, half addrspace(1)* %b 256 %r.val = fcmp ueq half %a.val, %b.val 257 %r.val.sext = sext i1 %r.val to i32 258 store i32 %r.val.sext, i32 addrspace(1)* %r 259 ret void 260} 261 262; GCN-LABEL: {{^}}fcmp_f16_ngt 263; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 264; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 265; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 266; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 267; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 268; VI: v_cmp_ngt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 269; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 270; GCN: buffer_store_dword v[[R_I32]] 271; GCN: s_endpgm 272define void @fcmp_f16_ngt( 273 i32 addrspace(1)* %r, 274 half addrspace(1)* %a, 275 half addrspace(1)* %b) { 276entry: 277 %a.val = load half, half addrspace(1)* %a 278 %b.val = load half, half addrspace(1)* %b 279 %r.val = fcmp ule half %a.val, %b.val 280 %r.val.sext = sext i1 %r.val to i32 281 store i32 %r.val.sext, i32 addrspace(1)* %r 282 ret void 283} 284 285; GCN-LABEL: {{^}}fcmp_f16_nle 286; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 287; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 288; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 289; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 290; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 291; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 292; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 293; GCN: buffer_store_dword v[[R_I32]] 294; GCN: s_endpgm 295define void @fcmp_f16_nle( 296 i32 addrspace(1)* %r, 297 half addrspace(1)* %a, 298 half addrspace(1)* %b) { 299entry: 300 %a.val = load half, half addrspace(1)* %a 301 %b.val = load half, half addrspace(1)* %b 302 %r.val = fcmp ugt half %a.val, %b.val 303 %r.val.sext = sext i1 %r.val to i32 304 store i32 %r.val.sext, i32 addrspace(1)* %r 305 ret void 306} 307 308; GCN-LABEL: {{^}}fcmp_f16_neq 309; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 310; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 311; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 312; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 313; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 314; VI: v_cmp_neq_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 315; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 316; GCN: buffer_store_dword v[[R_I32]] 317; GCN: s_endpgm 318define void @fcmp_f16_neq( 319 i32 addrspace(1)* %r, 320 half addrspace(1)* %a, 321 half addrspace(1)* %b) { 322entry: 323 %a.val = load half, half addrspace(1)* %a 324 %b.val = load half, half addrspace(1)* %b 325 %r.val = fcmp une half %a.val, %b.val 326 %r.val.sext = sext i1 %r.val to i32 327 store i32 %r.val.sext, i32 addrspace(1)* %r 328 ret void 329} 330 331; GCN-LABEL: {{^}}fcmp_f16_nlt 332; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 333; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 334; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 335; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 336; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 337; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 338; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]] 339; GCN: buffer_store_dword v[[R_I32]] 340; GCN: s_endpgm 341define void @fcmp_f16_nlt( 342 i32 addrspace(1)* %r, 343 half addrspace(1)* %a, 344 half addrspace(1)* %b) { 345entry: 346 %a.val = load half, half addrspace(1)* %a 347 %b.val = load half, half addrspace(1)* %b 348 %r.val = fcmp uge half %a.val, %b.val 349 %r.val.sext = sext i1 %r.val to i32 350 store i32 %r.val.sext, i32 addrspace(1)* %r 351 ret void 352} 353 354; GCN-LABEL: {{^}}fcmp_v2f16_lt 355; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 356; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 357; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 358; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 359; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 360; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 361; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 362; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 363; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 364; SI: v_cmp_lt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 365; VI: v_cmp_lt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 366; VI: v_cmp_lt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 367; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 368; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 369; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 370; GCN: s_endpgm 371define void @fcmp_v2f16_lt( 372 <2 x i32> addrspace(1)* %r, 373 <2 x half> addrspace(1)* %a, 374 <2 x half> addrspace(1)* %b) { 375entry: 376 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 377 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 378 %r.val = fcmp olt <2 x half> %a.val, %b.val 379 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 380 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 381 ret void 382} 383 384; GCN-LABEL: {{^}}fcmp_v2f16_eq 385; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 386; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 387; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 388; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 389; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 390; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 391; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 392; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 393; SI: v_cmp_eq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 394; SI: v_cmp_eq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 395; VI: v_cmp_eq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 396; VI: v_cmp_eq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 397; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 398; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 399; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 400; GCN: s_endpgm 401define void @fcmp_v2f16_eq( 402 <2 x i32> addrspace(1)* %r, 403 <2 x half> addrspace(1)* %a, 404 <2 x half> addrspace(1)* %b) { 405entry: 406 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 407 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 408 %r.val = fcmp oeq <2 x half> %a.val, %b.val 409 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 410 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 411 ret void 412} 413 414; GCN-LABEL: {{^}}fcmp_v2f16_le 415; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 416; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 417; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 418; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 419; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 420; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 421; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 422; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 423; SI: v_cmp_le_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 424; SI: v_cmp_le_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 425; VI: v_cmp_le_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 426; VI: v_cmp_le_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 427; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 428; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 429; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 430; GCN: s_endpgm 431define void @fcmp_v2f16_le( 432 <2 x i32> addrspace(1)* %r, 433 <2 x half> addrspace(1)* %a, 434 <2 x half> addrspace(1)* %b) { 435entry: 436 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 437 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 438 %r.val = fcmp ole <2 x half> %a.val, %b.val 439 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 440 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 441 ret void 442} 443 444; GCN-LABEL: {{^}}fcmp_v2f16_gt 445; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 446; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 447; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 448; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 449; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 450; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 451; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 452; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 453; SI: v_cmp_gt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 454; SI: v_cmp_gt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 455; VI: v_cmp_gt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 456; VI: v_cmp_gt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 457; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 458; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 459; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 460; GCN: s_endpgm 461define void @fcmp_v2f16_gt( 462 <2 x i32> addrspace(1)* %r, 463 <2 x half> addrspace(1)* %a, 464 <2 x half> addrspace(1)* %b) { 465entry: 466 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 467 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 468 %r.val = fcmp ogt <2 x half> %a.val, %b.val 469 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 470 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 471 ret void 472} 473 474; GCN-LABEL: {{^}}fcmp_v2f16_lg 475; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 476; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 477; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 478; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 479; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 480; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 481; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 482; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 483; SI: v_cmp_lg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 484; SI: v_cmp_lg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 485; VI: v_cmp_lg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 486; VI: v_cmp_lg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 487; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 488; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 489; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 490; GCN: s_endpgm 491define void @fcmp_v2f16_lg( 492 <2 x i32> addrspace(1)* %r, 493 <2 x half> addrspace(1)* %a, 494 <2 x half> addrspace(1)* %b) { 495entry: 496 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 497 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 498 %r.val = fcmp one <2 x half> %a.val, %b.val 499 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 500 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 501 ret void 502} 503 504; GCN-LABEL: {{^}}fcmp_v2f16_ge 505; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 506; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 507; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 508; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 509; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 510; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 511; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 512; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 513; SI: v_cmp_ge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 514; SI: v_cmp_ge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 515; VI: v_cmp_ge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 516; VI: v_cmp_ge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 517; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 518; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 519; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 520; GCN: s_endpgm 521define void @fcmp_v2f16_ge( 522 <2 x i32> addrspace(1)* %r, 523 <2 x half> addrspace(1)* %a, 524 <2 x half> addrspace(1)* %b) { 525entry: 526 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 527 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 528 %r.val = fcmp oge <2 x half> %a.val, %b.val 529 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 530 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 531 ret void 532} 533 534; GCN-LABEL: {{^}}fcmp_v2f16_o 535; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 536; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 537; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 538; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 539; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 540; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 541; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 542; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 543; SI: v_cmp_o_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 544; SI: v_cmp_o_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 545; VI: v_cmp_o_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 546; VI: v_cmp_o_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 547; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 548; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 549; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 550; GCN: s_endpgm 551define void @fcmp_v2f16_o( 552 <2 x i32> addrspace(1)* %r, 553 <2 x half> addrspace(1)* %a, 554 <2 x half> addrspace(1)* %b) { 555entry: 556 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 557 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 558 %r.val = fcmp ord <2 x half> %a.val, %b.val 559 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 560 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 561 ret void 562} 563 564; GCN-LABEL: {{^}}fcmp_v2f16_u 565; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 566; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 567; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 568; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 569; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 570; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 571; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 572; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 573; SI: v_cmp_u_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 574; SI: v_cmp_u_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 575; VI: v_cmp_u_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 576; VI: v_cmp_u_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 577; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 578; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 579; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 580; GCN: s_endpgm 581define void @fcmp_v2f16_u( 582 <2 x i32> addrspace(1)* %r, 583 <2 x half> addrspace(1)* %a, 584 <2 x half> addrspace(1)* %b) { 585entry: 586 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 587 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 588 %r.val = fcmp uno <2 x half> %a.val, %b.val 589 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 590 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 591 ret void 592} 593 594; GCN-LABEL: {{^}}fcmp_v2f16_nge 595; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 596; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 597; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 598; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 599; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 600; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 601; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 602; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 603; SI: v_cmp_nge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 604; SI: v_cmp_nge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 605; VI: v_cmp_nge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 606; VI: v_cmp_nge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 607; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 608; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 609; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 610; GCN: s_endpgm 611define void @fcmp_v2f16_nge( 612 <2 x i32> addrspace(1)* %r, 613 <2 x half> addrspace(1)* %a, 614 <2 x half> addrspace(1)* %b) { 615entry: 616 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 617 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 618 %r.val = fcmp ult <2 x half> %a.val, %b.val 619 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 620 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 621 ret void 622} 623 624; GCN-LABEL: {{^}}fcmp_v2f16_nlg 625; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 626; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 627; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 628; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 629; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 630; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 631; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 632; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 633; SI: v_cmp_nlg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 634; SI: v_cmp_nlg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 635; VI: v_cmp_nlg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 636; VI: v_cmp_nlg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 637; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 638; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 639; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 640; GCN: s_endpgm 641define void @fcmp_v2f16_nlg( 642 <2 x i32> addrspace(1)* %r, 643 <2 x half> addrspace(1)* %a, 644 <2 x half> addrspace(1)* %b) { 645entry: 646 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 647 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 648 %r.val = fcmp ueq <2 x half> %a.val, %b.val 649 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 650 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 651 ret void 652} 653 654; GCN-LABEL: {{^}}fcmp_v2f16_ngt 655; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 656; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 657; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 658; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 659; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 660; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 661; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 662; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 663; SI: v_cmp_ngt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 664; SI: v_cmp_ngt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 665; VI: v_cmp_ngt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 666; VI: v_cmp_ngt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 667; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 668; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 669; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 670; GCN: s_endpgm 671define void @fcmp_v2f16_ngt( 672 <2 x i32> addrspace(1)* %r, 673 <2 x half> addrspace(1)* %a, 674 <2 x half> addrspace(1)* %b) { 675entry: 676 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 677 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 678 %r.val = fcmp ule <2 x half> %a.val, %b.val 679 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 680 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 681 ret void 682} 683 684; GCN-LABEL: {{^}}fcmp_v2f16_nle 685; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 686; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 687; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 688; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 689; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 690; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 691; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 692; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 693; SI: v_cmp_nle_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 694; SI: v_cmp_nle_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 695; VI: v_cmp_nle_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 696; VI: v_cmp_nle_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 697; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 698; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 699; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 700; GCN: s_endpgm 701define void @fcmp_v2f16_nle( 702 <2 x i32> addrspace(1)* %r, 703 <2 x half> addrspace(1)* %a, 704 <2 x half> addrspace(1)* %b) { 705entry: 706 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 707 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 708 %r.val = fcmp ugt <2 x half> %a.val, %b.val 709 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 710 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 711 ret void 712} 713 714; GCN-LABEL: {{^}}fcmp_v2f16_neq 715; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 716; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 717; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 718; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 719; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 720; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 721; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 722; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 723; SI: v_cmp_neq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 724; SI: v_cmp_neq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 725; VI: v_cmp_neq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 726; VI: v_cmp_neq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 727; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 728; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 729; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 730; GCN: s_endpgm 731define void @fcmp_v2f16_neq( 732 <2 x i32> addrspace(1)* %r, 733 <2 x half> addrspace(1)* %a, 734 <2 x half> addrspace(1)* %b) { 735entry: 736 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 737 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 738 %r.val = fcmp une <2 x half> %a.val, %b.val 739 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 740 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 741 ret void 742} 743 744; GCN-LABEL: {{^}}fcmp_v2f16_nlt 745; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 746; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 747; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 748; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 749; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 750; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 751; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 752; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 753; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]] 754; SI: v_cmp_nlt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]] 755; VI: v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]] 756; VI: v_cmp_nlt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]] 757; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]] 758; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]] 759; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}} 760; GCN: s_endpgm 761define void @fcmp_v2f16_nlt( 762 <2 x i32> addrspace(1)* %r, 763 <2 x half> addrspace(1)* %a, 764 <2 x half> addrspace(1)* %b) { 765entry: 766 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 767 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 768 %r.val = fcmp uge <2 x half> %a.val, %b.val 769 %r.val.sext = sext <2 x i1> %r.val to <2 x i32> 770 store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r 771 ret void 772} 773 774declare half @llvm.fabs.f16(half) #1 775 776attributes #0 = { nounwind } 777attributes #1 = { nounwind readnone } 778