1; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s 2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s 3; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s 4; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s 5 6; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s 7; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s 8; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s 9; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s 10 11declare i32 @llvm.amdgcn.workitem.id.x() #1 12declare half @llvm.fmuladd.f16(half, half, half) #1 13declare half @llvm.fabs.f16(half) #1 14 15; GCN-LABEL: {{^}}fmuladd_f16: 16; VI-FLUSH: v_mac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} 17 18; VI-DENORM: v_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}} 19define void @fmuladd_f16(half addrspace(1)* %out, half addrspace(1)* %in1, 20 half addrspace(1)* %in2, half addrspace(1)* %in3) #0 { 21 %r0 = load half, half addrspace(1)* %in1 22 %r1 = load half, half addrspace(1)* %in2 23 %r2 = load half, half addrspace(1)* %in3 24 %r3 = tail call half @llvm.fmuladd.f16(half %r0, half %r1, half %r2) 25 store half %r3, half addrspace(1)* %out 26 ret void 27} 28 29; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f16 30; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 31; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 32; VI-FLUSH: v_mac_f16_e32 [[R2]], 2.0, [[R1]] 33; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 34 35; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 36; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 37define void @fmuladd_2.0_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 38 %tid = call i32 @llvm.amdgcn.workitem.id.x() 39 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 40 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 41 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 42 43 %r1 = load volatile half, half addrspace(1)* %gep.0 44 %r2 = load volatile half, half addrspace(1)* %gep.1 45 46 %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2) 47 store half %r3, half addrspace(1)* %gep.out 48 ret void 49} 50 51; GCN-LABEL: {{^}}fmuladd_a_2.0_b_f16 52; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 53; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 54; VI-FLUSH: v_mac_f16_e32 [[R2]], 2.0, [[R1]] 55; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 56 57; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 58; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 59define void @fmuladd_a_2.0_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 60 %tid = call i32 @llvm.amdgcn.workitem.id.x() 61 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 62 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 63 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 64 65 %r1 = load volatile half, half addrspace(1)* %gep.0 66 %r2 = load volatile half, half addrspace(1)* %gep.1 67 68 %r3 = tail call half @llvm.fmuladd.f16(half %r1, half 2.0, half %r2) 69 store half %r3, half addrspace(1)* %gep.out 70 ret void 71} 72 73; GCN-LABEL: {{^}}fadd_a_a_b_f16: 74; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 75; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 76; VI-FLUSH: v_mac_f16_e32 [[R2]], 2.0, [[R1]] 77; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 78 79; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 80 81; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] 82; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]] 83 84; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 85define void @fadd_a_a_b_f16(half addrspace(1)* %out, 86 half addrspace(1)* %in1, 87 half addrspace(1)* %in2) #0 { 88 %tid = call i32 @llvm.amdgcn.workitem.id.x() 89 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 90 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 91 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 92 93 %r0 = load volatile half, half addrspace(1)* %gep.0 94 %r1 = load volatile half, half addrspace(1)* %gep.1 95 96 %add.0 = fadd half %r0, %r0 97 %add.1 = fadd half %add.0, %r1 98 store half %add.1, half addrspace(1)* %gep.out 99 ret void 100} 101 102; GCN-LABEL: {{^}}fadd_b_a_a_f16: 103; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 104; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 105; VI-FLUSH: v_mac_f16_e32 [[R2]], 2.0, [[R1]] 106; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 107 108; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 109 110; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] 111; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] 112 113; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 114define void @fadd_b_a_a_f16(half addrspace(1)* %out, 115 half addrspace(1)* %in1, 116 half addrspace(1)* %in2) #0 { 117 %tid = call i32 @llvm.amdgcn.workitem.id.x() 118 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 119 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 120 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 121 122 %r0 = load volatile half, half addrspace(1)* %gep.0 123 %r1 = load volatile half, half addrspace(1)* %gep.1 124 125 %add.0 = fadd half %r0, %r0 126 %add.1 = fadd half %r1, %add.0 127 store half %add.1, half addrspace(1)* %gep.out 128 ret void 129} 130 131; GCN-LABEL: {{^}}fmuladd_neg_2.0_a_b_f16 132; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 133; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 134; VI-FLUSH: v_mac_f16_e32 [[R2]], -2.0, [[R1]] 135; VI-DENORM: v_fma_f16 [[R2:v[0-9]+]], [[R1]], -2.0, [[R2]] 136; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 137define void @fmuladd_neg_2.0_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 138 %tid = call i32 @llvm.amdgcn.workitem.id.x() 139 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 140 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 141 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 142 143 %r1 = load volatile half, half addrspace(1)* %gep.0 144 %r2 = load volatile half, half addrspace(1)* %gep.1 145 146 %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1, half %r2) 147 store half %r3, half addrspace(1)* %gep.out 148 ret void 149} 150 151; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f16 152; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 153; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 154; VI-FLUSH: v_mac_f16_e32 [[R2]], 2.0, [[R1]] 155; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 156 157; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], -[[R1]], -2.0, [[R2]] 158; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 159define void @fmuladd_neg_2.0_neg_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 160 %tid = call i32 @llvm.amdgcn.workitem.id.x() 161 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 162 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 163 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 164 165 %r1 = load volatile half, half addrspace(1)* %gep.0 166 %r2 = load volatile half, half addrspace(1)* %gep.1 167 168 %r1.fneg = fsub half -0.000000e+00, %r1 169 170 %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2) 171 store half %r3, half addrspace(1)* %gep.out 172 ret void 173} 174 175; GCN-LABEL: {{^}}fmuladd_2.0_neg_a_b_f16 176; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 177; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 178; VI-FLUSH: v_mac_f16_e32 [[R2]], -2.0, [[R1]] 179; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 180 181; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]] 182; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 183define void @fmuladd_2.0_neg_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 184 %tid = call i32 @llvm.amdgcn.workitem.id.x() 185 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 186 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 187 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 188 189 %r1 = load volatile half, half addrspace(1)* %gep.0 190 %r2 = load volatile half, half addrspace(1)* %gep.1 191 192 %r1.fneg = fsub half -0.000000e+00, %r1 193 194 %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2) 195 store half %r3, half addrspace(1)* %gep.out 196 ret void 197} 198 199; GCN-LABEL: {{^}}fmuladd_2.0_a_neg_b_f16 200; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 201; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 202; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]] 203; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]] 204; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 205define void @fmuladd_2.0_a_neg_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 206 %tid = call i32 @llvm.amdgcn.workitem.id.x() 207 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 208 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 209 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 210 211 %r1 = load volatile half, half addrspace(1)* %gep.0 212 %r2 = load volatile half, half addrspace(1)* %gep.1 213 214 %r2.fneg = fsub half -0.000000e+00, %r2 215 216 %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2.fneg) 217 store half %r3, half addrspace(1)* %gep.out 218 ret void 219} 220 221; GCN-LABEL: {{^}}mad_sub_f16: 222; GCN: {{buffer|flat}}_load_ushort [[REGA:v[0-9]+]] 223; GCN: {{buffer|flat}}_load_ushort [[REGB:v[0-9]+]] 224; GCN: {{buffer|flat}}_load_ushort [[REGC:v[0-9]+]] 225 226; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]] 227 228; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]] 229 230; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]] 231; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] 232 233; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 234define void @mad_sub_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { 235 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 236 %tid.ext = sext i32 %tid to i64 237 %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext 238 %add1 = add i64 %tid.ext, 1 239 %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1 240 %add2 = add i64 %tid.ext, 2 241 %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2 242 %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext 243 %a = load volatile half, half addrspace(1)* %gep0, align 2 244 %b = load volatile half, half addrspace(1)* %gep1, align 2 245 %c = load volatile half, half addrspace(1)* %gep2, align 2 246 %mul = fmul half %a, %b 247 %sub = fsub half %mul, %c 248 store half %sub, half addrspace(1)* %outgep, align 2 249 ret void 250} 251 252; GCN-LABEL: {{^}}mad_sub_inv_f16: 253; GCN: {{buffer|flat}}_load_ushort [[REGA:v[0-9]+]] 254; GCN: {{buffer|flat}}_load_ushort [[REGB:v[0-9]+]] 255; GCN: {{buffer|flat}}_load_ushort [[REGC:v[0-9]+]] 256; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]] 257 258; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]] 259 260; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]] 261; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]] 262 263; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 264define void @mad_sub_inv_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { 265 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 266 %tid.ext = sext i32 %tid to i64 267 %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext 268 %add1 = add i64 %tid.ext, 1 269 %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1 270 %add2 = add i64 %tid.ext, 2 271 %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2 272 %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext 273 %a = load volatile half, half addrspace(1)* %gep0, align 2 274 %b = load volatile half, half addrspace(1)* %gep1, align 2 275 %c = load volatile half, half addrspace(1)* %gep2, align 2 276 %mul = fmul half %a, %b 277 %sub = fsub half %c, %mul 278 store half %sub, half addrspace(1)* %outgep, align 2 279 ret void 280} 281 282; GCN-LABEL: {{^}}mad_sub_fabs_f16: 283; GCN: {{buffer|flat}}_load_ushort [[REGA:v[0-9]+]] 284; GCN: {{buffer|flat}}_load_ushort [[REGB:v[0-9]+]] 285; GCN: {{buffer|flat}}_load_ushort [[REGC:v[0-9]+]] 286; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]| 287 288; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]| 289 290; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]] 291; VI-DENORM-STRICT: v_sub_f16_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]| 292 293; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 294define void @mad_sub_fabs_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { 295 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 296 %tid.ext = sext i32 %tid to i64 297 %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext 298 %add1 = add i64 %tid.ext, 1 299 %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1 300 %add2 = add i64 %tid.ext, 2 301 %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2 302 %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext 303 %a = load volatile half, half addrspace(1)* %gep0, align 2 304 %b = load volatile half, half addrspace(1)* %gep1, align 2 305 %c = load volatile half, half addrspace(1)* %gep2, align 2 306 %c.abs = call half @llvm.fabs.f16(half %c) #0 307 %mul = fmul half %a, %b 308 %sub = fsub half %mul, %c.abs 309 store half %sub, half addrspace(1)* %outgep, align 2 310 ret void 311} 312 313; GCN-LABEL: {{^}}mad_sub_fabs_inv_f16: 314; GCN: {{buffer|flat}}_load_ushort [[REGA:v[0-9]+]] 315; GCN: {{buffer|flat}}_load_ushort [[REGB:v[0-9]+]] 316; GCN: {{buffer|flat}}_load_ushort [[REGC:v[0-9]+]] 317 318; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]| 319 320; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]| 321 322; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]] 323; VI-DENORM-STRICT: v_sub_f16_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]] 324 325; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 326define void @mad_sub_fabs_inv_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { 327 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 328 %tid.ext = sext i32 %tid to i64 329 %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext 330 %add1 = add i64 %tid.ext, 1 331 %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1 332 %add2 = add i64 %tid.ext, 2 333 %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2 334 %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext 335 %a = load volatile half, half addrspace(1)* %gep0, align 2 336 %b = load volatile half, half addrspace(1)* %gep1, align 2 337 %c = load volatile half, half addrspace(1)* %gep2, align 2 338 %c.abs = call half @llvm.fabs.f16(half %c) #0 339 %mul = fmul half %a, %b 340 %sub = fsub half %c.abs, %mul 341 store half %sub, half addrspace(1)* %outgep, align 2 342 ret void 343} 344 345; GCN-LABEL: {{^}}neg_neg_mad_f16: 346; GCN: {{buffer|flat}}_load_ushort [[REGA:v[0-9]+]] 347; GCN: {{buffer|flat}}_load_ushort [[REGB:v[0-9]+]] 348; GCN: {{buffer|flat}}_load_ushort [[REGC:v[0-9]+]] 349 350; VI-FLUSH: v_mac_f16_e32 [[REGC]], [[REGB]], [[REGA]] 351; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]] 352 353; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]] 354 355; VI-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGB]], [[REGA]] 356; VI-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]] 357; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 358define void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { 359 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 360 %tid.ext = sext i32 %tid to i64 361 %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext 362 %add1 = add i64 %tid.ext, 1 363 %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1 364 %add2 = add i64 %tid.ext, 2 365 %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2 366 %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext 367 %a = load volatile half, half addrspace(1)* %gep0, align 2 368 %b = load volatile half, half addrspace(1)* %gep1, align 2 369 %c = load volatile half, half addrspace(1)* %gep2, align 2 370 %nega = fsub half -0.000000e+00, %a 371 %negb = fsub half -0.000000e+00, %b 372 %mul = fmul half %nega, %negb 373 %sub = fadd half %mul, %c 374 store half %sub, half addrspace(1)* %outgep, align 2 375 ret void 376} 377 378; GCN-LABEL: {{^}}mad_fabs_sub_f16: 379; GCN: {{buffer|flat}}_load_ushort [[REGA:v[0-9]+]] 380; GCN: {{buffer|flat}}_load_ushort [[REGB:v[0-9]+]] 381; GCN: {{buffer|flat}}_load_ushort [[REGC:v[0-9]+]] 382 383; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]] 384 385; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]] 386 387; VI-DENORM-STRICT: v_mul_f16_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]| 388; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]] 389 390; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 391define void @mad_fabs_sub_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 { 392 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 393 %tid.ext = sext i32 %tid to i64 394 %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext 395 %add1 = add i64 %tid.ext, 1 396 %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1 397 %add2 = add i64 %tid.ext, 2 398 %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2 399 %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext 400 %a = load volatile half, half addrspace(1)* %gep0, align 2 401 %b = load volatile half, half addrspace(1)* %gep1, align 2 402 %c = load volatile half, half addrspace(1)* %gep2, align 2 403 %b.abs = call half @llvm.fabs.f16(half %b) #0 404 %mul = fmul half %a, %b.abs 405 %sub = fsub half %mul, %c 406 store half %sub, half addrspace(1)* %outgep, align 2 407 ret void 408} 409 410; GCN-LABEL: {{^}}fsub_c_fadd_a_a_f16: 411; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 412; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 413; VI-FLUSH: v_mac_f16_e32 [[R2]], -2.0, [[R1]] 414; VI-FLUSH: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[R2]] 415 416; VI-DENORM-CONTRACT: v_fma_f16 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]] 417 418; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] 419; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] 420 421; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 422define void @fsub_c_fadd_a_a_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 423 %tid = call i32 @llvm.amdgcn.workitem.id.x() 424 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 425 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 426 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 427 428 %r1 = load volatile half, half addrspace(1)* %gep.0 429 %r2 = load volatile half, half addrspace(1)* %gep.1 430 431 %add = fadd half %r1, %r1 432 %r3 = fsub half %r2, %add 433 434 store half %r3, half addrspace(1)* %gep.out 435 ret void 436} 437 438; GCN-LABEL: {{^}}fsub_fadd_a_a_c_f16: 439; GCN: {{buffer|flat}}_load_ushort [[R1:v[0-9]+]], 440; GCN: {{buffer|flat}}_load_ushort [[R2:v[0-9]+]], 441 442; VI-FLUSH: v_mad_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]] 443 444; VI-DENORM-CONTRACT: v_fma_f16 [[R2]], [[R1]], 2.0, -[[R2]] 445 446; VI-DENORM-STRICT: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] 447; VI-DENORM-STRICT: v_subrev_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]] 448 449; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] 450define void @fsub_fadd_a_a_c_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 451 %tid = call i32 @llvm.amdgcn.workitem.id.x() 452 %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid 453 %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1 454 %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid 455 456 %r1 = load volatile half, half addrspace(1)* %gep.0 457 %r2 = load volatile half, half addrspace(1)* %gep.1 458 459 %add = fadd half %r1, %r1 460 %r3 = fsub half %add, %r2 461 462 store half %r3, half addrspace(1)* %gep.out 463 ret void 464} 465 466attributes #0 = { nounwind } 467attributes #1 = { nounwind readnone } 468