1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}mac_f16 5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 8; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] 9; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] 10; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] 11; SI: v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]] 12; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]] 13; SI: buffer_store_short v[[R_F16]] 14; VI: v_mac_f16_e32 v[[C_F16]], v[[B_F16]], v[[A_F16]] 15; VI: buffer_store_short v[[C_F16]] 16; GCN: s_endpgm 17define void @mac_f16( 18 half addrspace(1)* %r, 19 half addrspace(1)* %a, 20 half addrspace(1)* %b, 21 half addrspace(1)* %c) #0 { 22entry: 23 %a.val = load half, half addrspace(1)* %a 24 %b.val = load half, half addrspace(1)* %b 25 %c.val = load half, half addrspace(1)* %c 26 27 %t.val = fmul half %a.val, %b.val 28 %r.val = fadd half %t.val, %c.val 29 30 store half %r.val, half addrspace(1)* %r 31 ret void 32} 33 34; GCN-LABEL: {{^}}mac_f16_same_add 35; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]] 36; SI: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}} 37; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]] 38; VI: v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}} 39; GCN: s_endpgm 40define void @mac_f16_same_add( 41 half addrspace(1)* %r0, 42 half addrspace(1)* %r1, 43 half addrspace(1)* %a, 44 half addrspace(1)* %b, 45 half addrspace(1)* %c, 46 half addrspace(1)* %d, 47 half addrspace(1)* %e) #0 { 48entry: 49 %a.val = load half, half addrspace(1)* %a 50 %b.val = load half, half addrspace(1)* %b 51 %c.val = load half, half addrspace(1)* %c 52 %d.val = load half, half addrspace(1)* %d 53 %e.val = load half, half addrspace(1)* %e 54 55 %t0.val = fmul half %a.val, %b.val 56 %r0.val = fadd half %t0.val, %c.val 57 58 %t1.val = fmul half %d.val, %e.val 59 %r1.val = fadd half %t1.val, %c.val 60 61 store half %r0.val, half addrspace(1)* %r0 62 store half %r1.val, half addrspace(1)* %r1 63 ret void 64} 65 66; GCN-LABEL: {{^}}mac_f16_neg_a 67; SI-NOT: v_mac_f32 68; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 69; VI-NOT: v_mac_f16 70; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 71; GCN: s_endpgm 72define void @mac_f16_neg_a( 73 half addrspace(1)* %r, 74 half addrspace(1)* %a, 75 half addrspace(1)* %b, 76 half addrspace(1)* %c) #0 { 77entry: 78 %a.val = load half, half addrspace(1)* %a 79 %b.val = load half, half addrspace(1)* %b 80 %c.val = load half, half addrspace(1)* %c 81 82 %a.neg = fsub half -0.0, %a.val 83 %t.val = fmul half %a.neg, %b.val 84 %r.val = fadd half %t.val, %c.val 85 86 store half %r.val, half addrspace(1)* %r 87 ret void 88} 89 90; GCN-LABEL: {{^}}mac_f16_neg_b 91; SI-NOT: v_mac_f32 92; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 93; VI-NOT: v_mac_f16 94; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 95; GCN: s_endpgm 96define void @mac_f16_neg_b( 97 half addrspace(1)* %r, 98 half addrspace(1)* %a, 99 half addrspace(1)* %b, 100 half addrspace(1)* %c) #0 { 101entry: 102 %a.val = load half, half addrspace(1)* %a 103 %b.val = load half, half addrspace(1)* %b 104 %c.val = load half, half addrspace(1)* %c 105 106 %b.neg = fsub half -0.0, %b.val 107 %t.val = fmul half %a.val, %b.neg 108 %r.val = fadd half %t.val, %c.val 109 110 store half %r.val, half addrspace(1)* %r 111 ret void 112} 113 114; GCN-LABEL: {{^}}mac_f16_neg_c 115; SI-NOT: v_mac_f32 116; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} 117; VI-NOT: v_mac_f16 118; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} 119; GCN: s_endpgm 120define void @mac_f16_neg_c( 121 half addrspace(1)* %r, 122 half addrspace(1)* %a, 123 half addrspace(1)* %b, 124 half addrspace(1)* %c) #0 { 125entry: 126 %a.val = load half, half addrspace(1)* %a 127 %b.val = load half, half addrspace(1)* %b 128 %c.val = load half, half addrspace(1)* %c 129 130 %c.neg = fsub half -0.0, %c.val 131 %t.val = fmul half %a.val, %b.val 132 %r.val = fadd half %t.val, %c.neg 133 134 store half %r.val, half addrspace(1)* %r 135 ret void 136} 137 138; GCN-LABEL: {{^}}mac_f16_neg_a_safe_fp_math 139; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}} 140; SI: v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 141; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]] 142; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}} 143; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]] 144; GCN: s_endpgm 145define void @mac_f16_neg_a_safe_fp_math( 146 half addrspace(1)* %r, 147 half addrspace(1)* %a, 148 half addrspace(1)* %b, 149 half addrspace(1)* %c) #0 { 150entry: 151 %a.val = load half, half addrspace(1)* %a 152 %b.val = load half, half addrspace(1)* %b 153 %c.val = load half, half addrspace(1)* %c 154 155 %a.neg = fsub half 0.0, %a.val 156 %t.val = fmul half %a.neg, %b.val 157 %r.val = fadd half %t.val, %c.val 158 159 store half %r.val, half addrspace(1)* %r 160 ret void 161} 162 163; GCN-LABEL: {{^}}mac_f16_neg_b_safe_fp_math 164; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}} 165; SI: v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 166; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}} 167; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}} 168; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}} 169; GCN: s_endpgm 170define void @mac_f16_neg_b_safe_fp_math( 171 half addrspace(1)* %r, 172 half addrspace(1)* %a, 173 half addrspace(1)* %b, 174 half addrspace(1)* %c) #0 { 175entry: 176 %a.val = load half, half addrspace(1)* %a 177 %b.val = load half, half addrspace(1)* %b 178 %c.val = load half, half addrspace(1)* %c 179 180 %b.neg = fsub half 0.0, %b.val 181 %t.val = fmul half %a.val, %b.neg 182 %r.val = fadd half %t.val, %c.val 183 184 store half %r.val, half addrspace(1)* %r 185 ret void 186} 187 188; GCN-LABEL: {{^}}mac_f16_neg_c_safe_fp_math 189; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}} 190; SI: v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 191; SI: v_mac_f32_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}} 192; VI: v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}} 193; VI: v_mac_f16_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}} 194; GCN: s_endpgm 195define void @mac_f16_neg_c_safe_fp_math( 196 half addrspace(1)* %r, 197 half addrspace(1)* %a, 198 half addrspace(1)* %b, 199 half addrspace(1)* %c) #0 { 200entry: 201 %a.val = load half, half addrspace(1)* %a 202 %b.val = load half, half addrspace(1)* %b 203 %c.val = load half, half addrspace(1)* %c 204 205 %c.neg = fsub half 0.0, %c.val 206 %t.val = fmul half %a.val, %b.val 207 %r.val = fadd half %t.val, %c.neg 208 209 store half %r.val, half addrspace(1)* %r 210 ret void 211} 212 213; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math 214; SI-NOT: v_mac_f32 215; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} 216; VI-NOT: v_mac_f16 217; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} 218; GCN: s_endpgm 219define void @mac_f16_neg_a_unsafe_fp_math( 220 half addrspace(1)* %r, 221 half addrspace(1)* %a, 222 half addrspace(1)* %b, 223 half addrspace(1)* %c) #1 { 224entry: 225 %a.val = load half, half addrspace(1)* %a 226 %b.val = load half, half addrspace(1)* %b 227 %c.val = load half, half addrspace(1)* %c 228 229 %a.neg = fsub half 0.0, %a.val 230 %t.val = fmul half %a.neg, %b.val 231 %r.val = fadd half %t.val, %c.val 232 233 store half %r.val, half addrspace(1)* %r 234 ret void 235} 236 237; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math 238; SI-NOT: v_mac_f32 239; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} 240; VI-NOT: v_mac_f16 241; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} 242; GCN: s_endpgm 243define void @mac_f16_neg_b_unsafe_fp_math( 244 half addrspace(1)* %r, 245 half addrspace(1)* %a, 246 half addrspace(1)* %b, 247 half addrspace(1)* %c) #1 { 248entry: 249 %a.val = load half, half addrspace(1)* %a 250 %b.val = load half, half addrspace(1)* %b 251 %c.val = load half, half addrspace(1)* %c 252 253 %b.neg = fsub half 0.0, %b.val 254 %t.val = fmul half %a.val, %b.neg 255 %r.val = fadd half %t.val, %c.val 256 257 store half %r.val, half addrspace(1)* %r 258 ret void 259} 260 261; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math 262; SI-NOT: v_mac_f32 263; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}} 264; VI-NOT: v_mac_f16 265; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}} 266; GCN: s_endpgm 267define void @mac_f16_neg_c_unsafe_fp_math( 268 half addrspace(1)* %r, 269 half addrspace(1)* %a, 270 half addrspace(1)* %b, 271 half addrspace(1)* %c) #1 { 272entry: 273 %a.val = load half, half addrspace(1)* %a 274 %b.val = load half, half addrspace(1)* %b 275 %c.val = load half, half addrspace(1)* %c 276 277 %c.neg = fsub half 0.0, %c.val 278 %t.val = fmul half %a.val, %b.val 279 %r.val = fadd half %t.val, %c.neg 280 281 store half %r.val, half addrspace(1)* %r 282 ret void 283} 284 285; GCN-LABEL: {{^}}mac_v2f16 286; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] 287; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] 288; GCN: buffer_load_dword v[[C_V2_F16:[0-9]+]] 289; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] 290; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] 291; GCN: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] 292; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] 293; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] 294; SI: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] 295; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] 296; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] 297; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]] 298; SI: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]] 299; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[C_F32_0]] 300; SI: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]] 301; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]] 302; SI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] 303; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] 304; VI: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]] 305; VI: v_mac_f16_e32 v[[C_F16_1]], v[[B_F16_1]], v[[A_F16_1]] 306; VI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[C_V2_F16]] 307; VI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]] 308; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] 309; GCN: buffer_store_dword v[[R_V2_F16]] 310; GCN: s_endpgm 311define void @mac_v2f16( 312 <2 x half> addrspace(1)* %r, 313 <2 x half> addrspace(1)* %a, 314 <2 x half> addrspace(1)* %b, 315 <2 x half> addrspace(1)* %c) #0 { 316entry: 317 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 318 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 319 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 320 321 %t.val = fmul <2 x half> %a.val, %b.val 322 %r.val = fadd <2 x half> %t.val, %c.val 323 324 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 325 ret void 326} 327 328; GCN-LABEL: {{^}}mac_v2f16_same_add 329; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]] 330; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]] 331; SI: v_mac_f32_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}} 332; SI: v_mac_f32_e32 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}} 333; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]] 334; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]] 335; VI: v_mac_f16_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}} 336; VI: v_mac_f16_e32 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}} 337; GCN: s_endpgm 338define void @mac_v2f16_same_add( 339 <2 x half> addrspace(1)* %r0, 340 <2 x half> addrspace(1)* %r1, 341 <2 x half> addrspace(1)* %a, 342 <2 x half> addrspace(1)* %b, 343 <2 x half> addrspace(1)* %c, 344 <2 x half> addrspace(1)* %d, 345 <2 x half> addrspace(1)* %e) #0 { 346entry: 347 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 348 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 349 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 350 %d.val = load <2 x half>, <2 x half> addrspace(1)* %d 351 %e.val = load <2 x half>, <2 x half> addrspace(1)* %e 352 353 %t0.val = fmul <2 x half> %a.val, %b.val 354 %r0.val = fadd <2 x half> %t0.val, %c.val 355 356 %t1.val = fmul <2 x half> %d.val, %e.val 357 %r1.val = fadd <2 x half> %t1.val, %c.val 358 359 store <2 x half> %r0.val, <2 x half> addrspace(1)* %r0 360 store <2 x half> %r1.val, <2 x half> addrspace(1)* %r1 361 ret void 362} 363 364; GCN-LABEL: {{^}}mac_v2f16_neg_a 365; SI-NOT: v_mac_f32 366; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 367; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 368; VI-NOT: v_mac_f16 369; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 370; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 371; GCN: s_endpgm 372define void @mac_v2f16_neg_a( 373 <2 x half> addrspace(1)* %r, 374 <2 x half> addrspace(1)* %a, 375 <2 x half> addrspace(1)* %b, 376 <2 x half> addrspace(1)* %c) #0 { 377entry: 378 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 379 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 380 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 381 382 %a.neg = fsub <2 x half> <half -0.0, half -0.0>, %a.val 383 %t.val = fmul <2 x half> %a.neg, %b.val 384 %r.val = fadd <2 x half> %t.val, %c.val 385 386 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 387 ret void 388} 389 390; GCN-LABEL: {{^}}mac_v2f16_neg_b 391; SI-NOT: v_mac_f32 392; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 393; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 394; VI-NOT: v_mac_f16 395; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 396; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 397; GCN: s_endpgm 398define void @mac_v2f16_neg_b( 399 <2 x half> addrspace(1)* %r, 400 <2 x half> addrspace(1)* %a, 401 <2 x half> addrspace(1)* %b, 402 <2 x half> addrspace(1)* %c) #0 { 403entry: 404 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 405 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 406 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 407 408 %b.neg = fsub <2 x half> <half -0.0, half -0.0>, %b.val 409 %t.val = fmul <2 x half> %a.val, %b.neg 410 %r.val = fadd <2 x half> %t.val, %c.val 411 412 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 413 ret void 414} 415 416; GCN-LABEL: {{^}}mac_v2f16_neg_c 417; SI-NOT: v_mac_f32 418; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} 419; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} 420; VI-NOT: v_mac_f16 421; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} 422; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} 423; GCN: s_endpgm 424define void @mac_v2f16_neg_c( 425 <2 x half> addrspace(1)* %r, 426 <2 x half> addrspace(1)* %a, 427 <2 x half> addrspace(1)* %b, 428 <2 x half> addrspace(1)* %c) #0 { 429entry: 430 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 431 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 432 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 433 434 %c.neg = fsub <2 x half> <half -0.0, half -0.0>, %c.val 435 %t.val = fmul <2 x half> %a.val, %b.val 436 %r.val = fadd <2 x half> %t.val, %c.neg 437 438 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 439 ret void 440} 441 442; GCN-LABEL: {{^}}mac_v2f16_neg_a_safe_fp_math 443; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}} 444; SI: v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 445; SI: v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 446; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] 447; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]] 448; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} 449; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} 450; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] 451; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]] 452; GCN: s_endpgm 453define void @mac_v2f16_neg_a_safe_fp_math( 454 <2 x half> addrspace(1)* %r, 455 <2 x half> addrspace(1)* %a, 456 <2 x half> addrspace(1)* %b, 457 <2 x half> addrspace(1)* %c) #0 { 458entry: 459 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 460 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 461 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 462 463 %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val 464 %t.val = fmul <2 x half> %a.neg, %b.val 465 %r.val = fadd <2 x half> %t.val, %c.val 466 467 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 468 ret void 469} 470 471; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math 472; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}} 473; SI: v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 474; SI: v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 475; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} 476; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}} 477; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} 478; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} 479; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} 480; VI: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}} 481; GCN: s_endpgm 482define void @mac_v2f16_neg_b_safe_fp_math( 483 <2 x half> addrspace(1)* %r, 484 <2 x half> addrspace(1)* %a, 485 <2 x half> addrspace(1)* %b, 486 <2 x half> addrspace(1)* %c) #0 { 487entry: 488 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 489 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 490 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 491 492 %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val 493 %t.val = fmul <2 x half> %a.val, %b.neg 494 %r.val = fadd <2 x half> %t.val, %c.val 495 496 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 497 ret void 498} 499 500; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math 501; SI: v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}} 502; SI: v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 503; SI: v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]] 504; SI: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} 505; SI: v_mac_f32_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}} 506; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} 507; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} 508; VI: v_mac_f16_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} 509; VI: v_mac_f16_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}} 510; GCN: s_endpgm 511define void @mac_v2f16_neg_c_safe_fp_math( 512 <2 x half> addrspace(1)* %r, 513 <2 x half> addrspace(1)* %a, 514 <2 x half> addrspace(1)* %b, 515 <2 x half> addrspace(1)* %c) #0 { 516entry: 517 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 518 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 519 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 520 521 %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val 522 %t.val = fmul <2 x half> %a.val, %b.val 523 %r.val = fadd <2 x half> %t.val, %c.neg 524 525 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 526 ret void 527} 528 529; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math 530; SI-NOT: v_mac_f32 531; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 532; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 533; VI-NOT: v_mac_f16 534; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 535; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 536; GCN: s_endpgm 537define void @mac_v2f16_neg_a_unsafe_fp_math( 538 <2 x half> addrspace(1)* %r, 539 <2 x half> addrspace(1)* %a, 540 <2 x half> addrspace(1)* %b, 541 <2 x half> addrspace(1)* %c) #1 { 542entry: 543 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 544 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 545 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 546 547 %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val 548 %t.val = fmul <2 x half> %a.neg, %b.val 549 %r.val = fadd <2 x half> %t.val, %c.val 550 551 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 552 ret void 553} 554 555; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math 556; SI-NOT: v_mac_f32 557; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 558; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 559; VI-NOT: v_mac_f16 560; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 561; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}} 562; GCN: s_endpgm 563define void @mac_v2f16_neg_b_unsafe_fp_math( 564 <2 x half> addrspace(1)* %r, 565 <2 x half> addrspace(1)* %a, 566 <2 x half> addrspace(1)* %b, 567 <2 x half> addrspace(1)* %c) #1 { 568entry: 569 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 570 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 571 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 572 573 %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val 574 %t.val = fmul <2 x half> %a.val, %b.neg 575 %r.val = fadd <2 x half> %t.val, %c.val 576 577 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 578 ret void 579} 580 581; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math 582; SI-NOT: v_mac_f32 583; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} 584; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} 585; VI-NOT: v_mac_f16 586; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} 587; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}} 588; GCN: s_endpgm 589define void @mac_v2f16_neg_c_unsafe_fp_math( 590 <2 x half> addrspace(1)* %r, 591 <2 x half> addrspace(1)* %a, 592 <2 x half> addrspace(1)* %b, 593 <2 x half> addrspace(1)* %c) #1 { 594entry: 595 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a 596 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b 597 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c 598 599 %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val 600 %t.val = fmul <2 x half> %a.val, %b.val 601 %r.val = fadd <2 x half> %t.val, %c.neg 602 603 store <2 x half> %r.val, <2 x half> addrspace(1)* %r 604 ret void 605} 606 607attributes #0 = {"unsafe-fp-math"="false"} 608attributes #1 = {"unsafe-fp-math"="true"} 609