1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}select_f16 5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 8; GCN: buffer_load_ushort v[[D_F16:[0-9]+]] 9; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 10; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 11; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 12; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] 13; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]] 14; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]] 15; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] 16; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 17; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc 18; GCN: buffer_store_short v[[R_F16]] 19; GCN: s_endpgm 20define void @select_f16( 21 half addrspace(1)* %r, 22 half addrspace(1)* %a, 23 half addrspace(1)* %b, 24 half addrspace(1)* %c, 25 half addrspace(1)* %d) { 26entry: 27 %a.val = load half, half addrspace(1)* %a 28 %b.val = load half, half addrspace(1)* %b 29 %c.val = load half, half addrspace(1)* %c 30 %d.val = load half, half addrspace(1)* %d 31 %fcmp = fcmp olt half %a.val, %b.val 32 %r.val = select i1 %fcmp, half %c.val, half %d.val 33 store half %r.val, half addrspace(1)* %r 34 ret void 35} 36 37; GCN-LABEL: {{^}}select_f16_imm_a 38; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 39; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 40; GCN: buffer_load_ushort v[[D_F16:[0-9]+]] 41; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x3800{{$}} 42; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 43; SI: v_cmp_gt_f32_e32 vcc, v[[B_F32]], v[[A_F32]] 44; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] 45; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]] 46; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]] 47; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] 48; VI: v_cmp_lt_f16_e32 vcc, 0.5, v[[B_F16]] 49; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc 50; GCN: buffer_store_short v[[R_F16]] 51; GCN: s_endpgm 52define void @select_f16_imm_a( 53 half addrspace(1)* %r, 54 half addrspace(1)* %b, 55 half addrspace(1)* %c, 56 half addrspace(1)* %d) { 57entry: 58 %b.val = load half, half addrspace(1)* %b 59 %c.val = load half, half addrspace(1)* %c 60 %d.val = load half, half addrspace(1)* %d 61 %fcmp = fcmp olt half 0xH3800, %b.val 62 %r.val = select i1 %fcmp, half %c.val, half %d.val 63 store half %r.val, half addrspace(1)* %r 64 ret void 65} 66 67; GCN-LABEL: {{^}}select_f16_imm_b 68; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 69; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 70; GCN: buffer_load_ushort v[[D_F16:[0-9]+]] 71; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0x3800{{$}} 72; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 73; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 74; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] 75; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]] 76; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]] 77; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] 78; VI: v_cmp_gt_f16_e32 vcc, 0.5, v[[A_F16]] 79; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc 80; GCN: buffer_store_short v[[R_F16]] 81; GCN: s_endpgm 82define void @select_f16_imm_b( 83 half addrspace(1)* %r, 84 half addrspace(1)* %a, 85 half addrspace(1)* %c, 86 half addrspace(1)* %d) { 87entry: 88 %a.val = load half, half addrspace(1)* %a 89 %c.val = load half, half addrspace(1)* %c 90 %d.val = load half, half addrspace(1)* %d 91 %fcmp = fcmp olt half %a.val, 0xH3800 92 %r.val = select i1 %fcmp, half %c.val, half %d.val 93 store half %r.val, half addrspace(1)* %r 94 ret void 95} 96 97; GCN-LABEL: {{^}}select_f16_imm_c 98; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 99; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 100; GCN: buffer_load_ushort v[[D_F16:[0-9]+]] 101; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], 0x3800{{$}} 102; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 103; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 104; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]] 105; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 106; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[C_F32]], v[[D_F32]], vcc 107; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] 108; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 109; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x3800{{$}} 110; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[C_F16]], v[[D_F16]], vcc 111; GCN: buffer_store_short v[[R_F16]] 112; GCN: s_endpgm 113define void @select_f16_imm_c( 114 half addrspace(1)* %r, 115 half addrspace(1)* %a, 116 half addrspace(1)* %b, 117 half addrspace(1)* %d) { 118entry: 119 %a.val = load half, half addrspace(1)* %a 120 %b.val = load half, half addrspace(1)* %b 121 %d.val = load half, half addrspace(1)* %d 122 %fcmp = fcmp olt half %a.val, %b.val 123 %r.val = select i1 %fcmp, half 0xH3800, half %d.val 124 store half %r.val, half addrspace(1)* %r 125 ret void 126} 127 128; GCN-LABEL: {{^}}select_f16_imm_d 129; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 130; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 131; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 132; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], 0x3800{{$}} 133; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 134; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 135; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] 136; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] 137; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]] 138; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] 139; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] 140; VI: v_mov_b32_e32 v[[D_F16:[0-9]+]], 0x3800{{$}} 141; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc 142; GCN: buffer_store_short v[[R_F16]] 143; GCN: s_endpgm 144define void @select_f16_imm_d( 145 half addrspace(1)* %r, 146 half addrspace(1)* %a, 147 half addrspace(1)* %b, 148 half addrspace(1)* %c) { 149entry: 150 %a.val = load half, half addrspace(1)* %a 151 %b.val = load half, half addrspace(1)* %b 152 %c.val = load half, half addrspace(1)* %c 153 %fcmp = fcmp olt half %a.val, %b.val 154 %r.val = select i1 %fcmp, half %c.val, half 0xH3800 155 store half %r.val, half addrspace(1)* %r 156 ret void 157} 158 159; GCN-LABEL: {{^}}select_v2f16 160; SI: v_cvt_f32_f16_e32 161; SI: v_cvt_f32_f16_e32 162; SI: v_cvt_f32_f16_e32 163; SI: v_cvt_f32_f16_e32 164; SI: v_cmp_lt_f32_e64 165; SI: v_cmp_lt_f32_e32 166; VI: v_cmp_lt_f16_e32 167; VI: v_cmp_lt_f16_e64 168; GCN: v_cndmask_b32_e32 169; GCN: v_cndmask_b32_e64 170; SI: v_cvt_f16_f32_e32 171; SI: v_cvt_f16_f32_e32 172; GCN: s_endpgm 173define void @select_v2f16( 174 <2 x half> addrspace(1)* %r, 175 <2 x half> addrspace(1)* %a, 176 <2 x half> addrspace(1)* %b, 177 <2 x half> addrspace(1)* %c, 178 <2 x half> addrspace(1)* %d) { 179entry: 180 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 181 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 182 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 183 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d 184 %fcmp = fcmp olt <2 x half> %a.val, %b.val 185 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val 186 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 187 ret void 188} 189 190; GCN-LABEL: {{^}}select_v2f16_imm_a 191; SI: v_cvt_f32_f16_e32 192; SI: v_cvt_f32_f16_e32 193; SI: v_cvt_f32_f16_e32 194; SI: v_cvt_f32_f16_e32 195; SI: v_cmp_gt_f32_e32 196; SI: v_cvt_f32_f16_e32 197; SI: v_cvt_f32_f16_e32 198; SI: v_cvt_f32_f16_e32 199; SI: v_cvt_f32_f16_e32 200; SI: v_cmp_gt_f32_e64 201; VI: v_cmp_lt_f16_e32 202; VI: v_cmp_lt_f16_e64 203; GCN: v_cndmask_b32_e32 204; SI: v_cvt_f16_f32_e32 205; GCN: v_cndmask_b32_e64 206; SI: v_cvt_f16_f32_e32 207; GCN: s_endpgm 208define void @select_v2f16_imm_a( 209 <2 x half> addrspace(1)* %r, 210 <2 x half> addrspace(1)* %b, 211 <2 x half> addrspace(1)* %c, 212 <2 x half> addrspace(1)* %d) { 213entry: 214 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 215 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 216 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d 217 %fcmp = fcmp olt <2 x half> <half 0xH3800, half 0xH3900>, %b.val 218 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val 219 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 220 ret void 221} 222 223; GCN-LABEL: {{^}}select_v2f16_imm_b 224; SI: v_cvt_f32_f16_e32 225; SI: v_cvt_f32_f16_e32 226; SI: v_cvt_f32_f16_e32 227; SI: v_cvt_f32_f16_e32 228; SI: v_cmp_lt_f32_e32 229; SI: v_cvt_f32_f16_e32 230; SI: v_cvt_f32_f16_e32 231; SI: v_cvt_f32_f16_e32 232; SI: v_cvt_f32_f16_e32 233; SI: v_cmp_lt_f32_e64 234; VI: v_cmp_gt_f16_e32 235; VI: v_cmp_gt_f16_e64 236; GCN: v_cndmask_b32_e32 237; SI: v_cvt_f16_f32_e32 238; GCN: v_cndmask_b32_e64 239; SI: v_cvt_f16_f32_e32 240; GCN: s_endpgm 241define void @select_v2f16_imm_b( 242 <2 x half> addrspace(1)* %r, 243 <2 x half> addrspace(1)* %a, 244 <2 x half> addrspace(1)* %c, 245 <2 x half> addrspace(1)* %d) { 246entry: 247 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 248 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 249 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d 250 %fcmp = fcmp olt <2 x half> %a.val, <half 0xH3800, half 0xH3900> 251 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val 252 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 253 ret void 254} 255 256; GCN-LABEL: {{^}}select_v2f16_imm_c 257; SI: v_cvt_f32_f16_e32 258; SI: v_cvt_f32_f16_e32 259; SI: v_cvt_f32_f16_e32 260; SI: v_cvt_f32_f16_e32 261; SI: v_cvt_f32_f16_e32 262; SI: v_cvt_f32_f16_e32 263; SI: v_cvt_f32_f16_e32 264; SI: v_cvt_f32_f16_e32 265 266; SI: v_cmp_lt_f32_e32 267; SI: v_cmp_lt_f32_e64 268; SI: v_cndmask_b32_e32 269; SI: v_cndmask_b32_e64 270 271; VI: v_cmp_nlt_f16_e32 272; VI: v_cndmask_b32_e32 273 274; VI: v_cmp_nlt_f16_e32 275; VI: v_cndmask_b32_e32 276 277; SI: v_cvt_f16_f32_e32 278; SI: v_cvt_f16_f32_e32 279; GCN: s_endpgm 280define void @select_v2f16_imm_c( 281 <2 x half> addrspace(1)* %r, 282 <2 x half> addrspace(1)* %a, 283 <2 x half> addrspace(1)* %b, 284 <2 x half> addrspace(1)* %d) { 285entry: 286 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 287 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 288 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d 289 %fcmp = fcmp olt <2 x half> %a.val, %b.val 290 %r.val = select <2 x i1> %fcmp, <2 x half> <half 0xH3800, half 0xH3900>, <2 x half> %d.val 291 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 292 ret void 293} 294 295; GCN-LABEL: {{^}}select_v2f16_imm_d 296; SI: v_cvt_f32_f16_e32 297; SI: v_cvt_f32_f16_e32 298; SI: v_cvt_f32_f16_e32 299; SI: v_cvt_f32_f16_e32 300; SI: v_cvt_f32_f16_e32 301; SI: v_cvt_f32_f16_e32 302; SI: v_cvt_f32_f16_e32 303; SI: v_cvt_f32_f16_e32 304; SI: v_cmp_lt_f32_e32 305; SI: v_cmp_lt_f32_e64 306; VI: v_cmp_lt_f16_e32 307; VI: v_cmp_lt_f16_e64 308; GCN: v_cndmask_b32_e32 309; GCN: v_cndmask_b32_e64 310; SI: v_cvt_f16_f32_e32 311; SI: v_cvt_f16_f32_e32 312; GCN: s_endpgm 313define void @select_v2f16_imm_d( 314 <2 x half> addrspace(1)* %r, 315 <2 x half> addrspace(1)* %a, 316 <2 x half> addrspace(1)* %b, 317 <2 x half> addrspace(1)* %c) { 318entry: 319 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 320 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 321 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 322 %fcmp = fcmp olt <2 x half> %a.val, %b.val 323 %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> <half 0xH3800, half 0xH3900> 324 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 325 ret void 326} 327