1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX906,GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s 4; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s 5 6; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo: 7; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] 8; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] 9; VI: v_mac_f32 10; CI: v_mad_f32 11define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 12 %src0.ext = fpext half %src0 to float 13 %src1.ext = fpext half %src1 to float 14 %src2.ext = fpext half %src2 to float 15 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 16 ret float %result 17} 18 19; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_int: 20; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 21; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 22; CIVI: v_mac_f32 23define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 { 24 %src0.hi = lshr i32 %src0, 16 25 %src1.hi = lshr i32 %src1, 16 26 %src2.hi = lshr i32 %src2, 16 27 %src0.i16 = trunc i32 %src0.hi to i16 28 %src1.i16 = trunc i32 %src1.hi to i16 29 %src2.i16 = trunc i32 %src2.hi to i16 30 %src0.fp16 = bitcast i16 %src0.i16 to half 31 %src1.fp16 = bitcast i16 %src1.i16 to half 32 %src2.fp16 = bitcast i16 %src2.i16 to half 33 %src0.ext = fpext half %src0.fp16 to float 34 %src1.ext = fpext half %src1.fp16 to float 35 %src2.ext = fpext half %src2.fp16 to float 36 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 37 ret float %result 38} 39 40; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 41; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 42; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 43; VI: v_mac_f32 44; CI: v_mad_f32 45define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 46 %src0.hi = extractelement <2 x half> %src0, i32 1 47 %src1.hi = extractelement <2 x half> %src1, i32 1 48 %src2.hi = extractelement <2 x half> %src2, i32 1 49 %src0.ext = fpext half %src0.hi to float 50 %src1.ext = fpext half %src1.hi to float 51 %src2.ext = fpext half %src2.hi to float 52 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 53 ret float %result 54} 55 56; GCN-LABEL: {{^}}v_mad_mix_v2f32: 57; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 58; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 59; GFX900-NEXT: v_mov_b32_e32 v1, v3 60 61; GFX906: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 62; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 63; GFX906-NEXT: v_mov_b32_e32 v1, v3 64 65; CIVI: v_mac_f32 66define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 67 %src0.ext = fpext <2 x half> %src0 to <2 x float> 68 %src1.ext = fpext <2 x half> %src1 to <2 x float> 69 %src2.ext = fpext <2 x half> %src2 to <2 x float> 70 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 71 ret <2 x float> %result 72} 73 74; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle: 75; GCN: s_waitcnt 76; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] 77; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] 78; GFX900-NEXT: v_mov_b32_e32 v0, v3 79; GFX900-NEXT: s_setpc_b64 80 81; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] 82; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] 83; GFX906-NEXT: v_mov_b32_e32 v0, v3 84; GFX906-NEXT: s_setpc_b64 85 86; CIVI: v_mac_f32 87define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 88 %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0> 89 %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1> 90 %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1> 91 %src0.ext = fpext <2 x half> %src0.shuf to <2 x float> 92 %src1.ext = fpext <2 x half> %src1.shuf to <2 x float> 93 %src2.ext = fpext <2 x half> %src2.shuf to <2 x float> 94 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 95 ret <2 x float> %result 96} 97 98; GCN-LABEL: {{^}}v_mad_mix_f32_negf16lo_f16lo_f16lo: 99; GFX900: s_waitcnt 100; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 101; GFX900-NEXT: s_setpc_b64 102 103; GFX906: s_waitcnt 104; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 105; GFX906-NEXT: s_setpc_b64 106 107; CIVI: v_mad_f32 108define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 109 %src0.ext = fpext half %src0 to float 110 %src1.ext = fpext half %src1 to float 111 %src2.ext = fpext half %src2 to float 112 %src0.ext.neg = fneg float %src0.ext 113 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext) 114 ret float %result 115} 116 117; GCN-LABEL: {{^}}v_mad_mix_f32_absf16lo_f16lo_f16lo: 118; GFX900: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 119; GFX906: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 120 121; CIVI: v_mad_f32 122define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 123 %src0.ext = fpext half %src0 to float 124 %src1.ext = fpext half %src1 to float 125 %src2.ext = fpext half %src2 to float 126 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 127 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) 128 ret float %result 129} 130 131; GCN-LABEL: {{^}}v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 132; GFX900: s_waitcnt 133; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] 134; GFX900-NEXT: s_setpc_b64 135 136; GFX906: s_waitcnt 137; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] 138; GFX906-NEXT: s_setpc_b64 139 140; CIVI: v_mad_f32 141define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 142 %src0.ext = fpext half %src0 to float 143 %src1.ext = fpext half %src1 to float 144 %src2.ext = fpext half %src2 to float 145 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 146 %src0.ext.neg.abs = fneg float %src0.ext.abs 147 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext) 148 ret float %result 149} 150 151; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32: 152; GCN: s_waitcnt 153; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 154; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 155; GFX9-NEXT: s_setpc_b64 156 157; CIVI: v_mad_f32 158define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { 159 %src0.ext = fpext half %src0 to float 160 %src1.ext = fpext half %src1 to float 161 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 162 ret float %result 163} 164 165; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negf32: 166; GCN: s_waitcnt 167; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding 168; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding 169; GFX9-NEXT: s_setpc_b64 170 171; CIVI: v_mad_f32 172define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 { 173 %src0.ext = fpext half %src0 to float 174 %src1.ext = fpext half %src1 to float 175 %src2.neg = fneg float %src2 176 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg) 177 ret float %result 178} 179 180; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_absf32: 181; GCN: s_waitcnt 182; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding 183; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding 184; GFX9-NEXT: s_setpc_b64 185 186; CIVI: v_mad_f32 187define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 { 188 %src0.ext = fpext half %src0 to float 189 %src1.ext = fpext half %src1 to float 190 %src2.abs = call float @llvm.fabs.f32(float %src2) 191 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs) 192 ret float %result 193} 194 195; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negabsf32: 196; GCN: s_waitcnt 197; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding 198; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding 199; GFX9-NEXT: s_setpc_b64 200 201; CIVI: v_mad_f32 202define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 { 203 %src0.ext = fpext half %src0 to float 204 %src1.ext = fpext half %src1 to float 205 %src2.abs = call float @llvm.fabs.f32(float %src2) 206 %src2.neg.abs = fneg float %src2.abs 207 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs) 208 ret float %result 209} 210 211; TODO: Fold inline immediates. Need to be careful because it is an 212; f16 inline immediate that may be converted to f32, not an actual f32 213; inline immediate. 214 215; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imm1: 216; GCN: s_waitcnt 217; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 1.0 218; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 219; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 220 221; CIVI: v_mad_f32 v0, v0, v1, 1.0 222; GCN-NEXT: s_setpc_b64 223define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { 224 %src0.ext = fpext half %src0 to float 225 %src1.ext = fpext half %src1 to float 226 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0) 227 ret float %result 228} 229 230; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 231; GCN: s_waitcnt 232; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0.15915494 233; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 234; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 235; VI: v_mad_f32 v0, v0, v1, 0.15915494 236define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 { 237 %src0.ext = fpext half %src0 to float 238 %src1.ext = fpext half %src1 to float 239 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000) 240 ret float %result 241} 242 243; Attempt to break inline immediate folding. If the operand is 244; interpreted as f32, the inline immediate is really the f16 inline 245; imm value converted to f32. 246; fpext f16 1/2pi = 0x3e230000 247; f32 1/2pi = 0x3e22f983 248; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 249; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0x3e230000 250; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 251; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 252 253; CIVI: v_madak_f32 v0, v0, v1, 0x3e230000 254define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 { 255 %src0.ext = fpext half %src0 to float 256 %src1.ext = fpext half %src1 to float 257 %src2 = fpext half 0xH3118 to float 258 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 259 ret float %result 260} 261 262; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 263; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0x367c0000 264; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 265; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 266 267; CIVI: v_madak_f32 v0, v0, v1, 0x367c0000 268define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { 269 %src0.ext = fpext half %src0 to float 270 %src1.ext = fpext half %src1 to float 271 %src2 = fpext half 0xH003F to float 272 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 273 ret float %result 274} 275 276; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1: 277; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 1.0 278; GFX900: v_mad_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 279; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 280; GFX900: v_mov_b32_e32 v1, v2 281 282; GFX906: v_fma_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 283; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 284; GFX906: v_mov_b32_e32 v1, v2 285define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 { 286 %src0.ext = fpext <2 x half> %src0 to <2 x float> 287 %src1.ext = fpext <2 x half> %src1 to <2 x float> 288 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>) 289 ret <2 x float> %result 290} 291 292; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi: 293; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0x3e230000 294 295; GFX900: v_mad_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 296; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 297; GFX900: v_mov_b32_e32 v1, v2 298 299; GFX906: v_fma_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 300; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 301; GFX906: v_mov_b32_e32 v1, v2 302define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { 303 %src0.ext = fpext <2 x half> %src0 to <2 x float> 304 %src1.ext = fpext <2 x half> %src1 to <2 x float> 305 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> 306 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2) 307 ret <2 x float> %result 308} 309 310; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi: 311; GFX9: s_mov_b32 [[SREG:s[0-9]+]], 0.15915494 312 313; GFX900: v_mad_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 314; GFX900: v_mad_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 315; GFX900: v_mov_b32_e32 v1, v2 316 317; GFX906: v_fma_mix_f32 v2, v0, v1, [[SREG]] op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 318; GFX906: v_fma_mix_f32 v0, v0, v1, [[SREG]] op_sel_hi:[1,1,0] ; encoding 319; GFX906: v_mov_b32_e32 v1, v2 320define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { 321 %src0.ext = fpext <2 x half> %src0 to <2 x float> 322 %src1.ext = fpext <2 x half> %src1 to <2 x float> 323 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> 324 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>) 325 ret <2 x float> %result 326} 327 328; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 329; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding 330; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding 331; CIVI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} 332define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 333 %src0.hi = extractelement <2 x half> %src0, i32 1 334 %src1.hi = extractelement <2 x half> %src1, i32 1 335 %src2.hi = extractelement <2 x half> %src2, i32 1 336 %src0.ext = fpext half %src0.hi to float 337 %src1.ext = fpext half %src1.hi to float 338 %src2.ext = fpext half %src2.hi to float 339 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 340 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 341 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 342 ret float %clamp 343} 344 345; GCN-LABEL: no_mix_simple: 346; GCN: s_waitcnt 347; GCN-NEXT: v_{{mad|fma}}_f32 v0, v0, v1, v2 348; GCN-NEXT: s_setpc_b64 349define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { 350 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) 351 ret float %result 352} 353 354; GCN-LABEL: no_mix_simple_fabs: 355; GCN: s_waitcnt 356; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2 357; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2 358; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2 359; GCN-NEXT: s_setpc_b64 360define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { 361 %src0.fabs = call float @llvm.fabs.f32(float %src0) 362 %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2) 363 ret float %result 364} 365 366; FIXME: Should abe able to select in thits case 367; All sources are converted from f16, so it doesn't matter 368; v_mad_mix_f32 flushes. 369 370; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 371; GFX900: v_cvt_f32_f16 372; GFX900: v_cvt_f32_f16 373; GFX900: v_cvt_f32_f16 374; GFX900: v_fma_f32 375define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 { 376 %src0.ext = fpext half %src0 to float 377 %src1.ext = fpext half %src1 to float 378 %src2.ext = fpext half %src2 to float 379 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 380 ret float %result 381} 382 383; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals: 384; GFX900: v_cvt_f32_f16 385; GFX900: v_cvt_f32_f16 386; GFX900: v_fma_f32 387 388; GFX906-NOT: v_cvt_f32_f16 389; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 390define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 { 391 %src0.ext = fpext half %src0 to float 392 %src1.ext = fpext half %src1 to float 393 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 394 ret float %result 395} 396 397; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 398; GFX9: v_cvt_f32_f16 399; GFX9: v_cvt_f32_f16 400; GFX9: v_cvt_f32_f16 401; GFX9: v_mul_f32 402; GFX9: v_add_f32 403define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 { 404 %src0.ext = fpext half %src0 to float 405 %src1.ext = fpext half %src1 to float 406 %src2.ext = fpext half %src2 to float 407 %mul = fmul float %src0.ext, %src1.ext 408 %result = fadd float %mul, %src2.ext 409 ret float %result 410} 411 412; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 413; GFX9: v_cvt_f32_f16 414; GFX9: v_cvt_f32_f16 415; GFX9: v_mul_f32 416; GFX9: v_add_f32 417define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 { 418 %src0.ext = fpext half %src0 to float 419 %src1.ext = fpext half %src1 to float 420 %mul = fmul float %src0.ext, %src1.ext 421 %result = fadd float %mul, %src2 422 ret float %result 423} 424 425; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 426; GCN: s_waitcnt 427; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 428; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 429; GFX9-NEXT: s_setpc_b64 430define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 { 431 %src0.ext = fpext half %src0 to float 432 %src1.ext = fpext half %src1 to float 433 %src2.ext = fpext half %src2 to float 434 %mul = fmul contract float %src0.ext, %src1.ext 435 %result = fadd contract float %mul, %src2.ext 436 ret float %result 437} 438 439; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 440; GCN: s_waitcnt 441; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 442; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 443; GFX9-NEXT: s_setpc_b64 444define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 { 445 %src0.ext = fpext half %src0 to float 446 %src1.ext = fpext half %src1 to float 447 %mul = fmul contract float %src0.ext, %src1.ext 448 %result = fadd contract float %mul, %src2 449 ret float %result 450} 451 452; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 453; GFX9: s_waitcnt 454; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 455; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 456; GFX9-NEXT: s_setpc_b64 457 458; CIVI: v_mad_f32 459define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 460 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 461 %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 462 %src0.neg = fsub half -0.0, %src0 463 %src0.ext = fpext half %src0.neg to float 464 %src1.ext = fpext half %src1 to float 465 %src2.ext = fpext half %src2 to float 466; %src0.ext.neg = fsub float -0.0, %src0.ext 467 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 468 ret float %result 469} 470 471; Make sure we don't fold pre-cvt fneg if we already have a fabs 472; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 473; GFX900: s_waitcnt 474define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 475 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 476 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 477 %src0.neg = fsub half -0.0, %src0 478 %src0.ext = fpext half %src0.neg to float 479 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 480 %src1.ext = fpext half %src1 to float 481 %src2.ext = fpext half %src2 to float 482 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) 483 ret float %result 484} 485 486; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 487; GFX9: s_waitcnt 488; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 489; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 490; GFX9-NEXT: s_setpc_b64 491define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 492 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 493 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 494 %src0.abs = call half @llvm.fabs.f16(half %src0) 495 %src0.ext = fpext half %src0.abs to float 496 %src1.ext = fpext half %src1 to float 497 %src2.ext = fpext half %src2 to float 498 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 499 ret float %result 500} 501 502; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 503; GFX9: s_waitcnt 504; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 505; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 506; GFX9-NEXT: s_setpc_b64 507define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 508 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 509 %fneg = fsub <2 x half> <half -0.0, half -0.0>, %src0.arg.bc 510 %src0 = extractelement <2 x half> %fneg, i32 1 511 %src0.ext = fpext half %src0 to float 512 %src1.ext = fpext half %src1 to float 513 %src2.ext = fpext half %src2 to float 514 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 515 ret float %result 516} 517 518; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 519; GFX9: s_waitcnt 520; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 521; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 522; GFX9-NEXT: s_setpc_b64 523define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 524 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 525 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) 526 %src0 = extractelement <2 x half> %fabs, i32 1 527 %src0.ext = fpext half %src0 to float 528 %src1.ext = fpext half %src1 to float 529 %src2.ext = fpext half %src2 to float 530 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 531 ret float %result 532} 533 534; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 535; GFX9: s_waitcnt 536; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 537; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 538; GFX9-NEXT: s_setpc_b64 539define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 540 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 541 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) 542 %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs 543 %src0 = extractelement <2 x half> %fneg.fabs, i32 1 544 %src0.ext = fpext half %src0 to float 545 %src1.ext = fpext half %src1 to float 546 %src2.ext = fpext half %src2 to float 547 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 548 ret float %result 549} 550 551declare half @llvm.fabs.f16(half) #2 552declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 553declare float @llvm.fabs.f32(float) #2 554declare float @llvm.minnum.f32(float, float) #2 555declare float @llvm.maxnum.f32(float, float) #2 556declare float @llvm.fmuladd.f32(float, float, float) #2 557declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2 558 559attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 560attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } 561attributes #2 = { nounwind readnone speculatable } 562