1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SI %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SI %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s 4 5declare double @llvm.fma.f64(double, double, double) nounwind readnone 6declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 7declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 8declare double @llvm.fabs.f64(double) nounwind readnone 9 10; FUNC-LABEL: {{^}}fma_f64: 11; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 12; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 13define amdgpu_kernel void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, 14 double addrspace(1)* %in2, double addrspace(1)* %in3) { 15 %r0 = load double, double addrspace(1)* %in1 16 %r1 = load double, double addrspace(1)* %in2 17 %r2 = load double, double addrspace(1)* %in3 18 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2) 19 store double %r3, double addrspace(1)* %out 20 ret void 21} 22 23; FUNC-LABEL: {{^}}fma_v2f64: 24; SI: v_fma_f64 25; SI: v_fma_f64 26; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 27; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 28define amdgpu_kernel void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, 29 <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) { 30 %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1 31 %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2 32 %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3 33 %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) 34 store <2 x double> %r3, <2 x double> addrspace(1)* %out 35 ret void 36} 37 38; FUNC-LABEL: {{^}}fma_v4f64: 39; SI: v_fma_f64 40; SI: v_fma_f64 41; SI: v_fma_f64 42; SI: v_fma_f64 43; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 44; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 45; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 46; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 47define amdgpu_kernel void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, 48 <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) { 49 %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1 50 %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2 51 %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3 52 %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) 53 store <4 x double> %r3, <4 x double> addrspace(1)* %out 54 ret void 55} 56 57; FUNC-LABEL: {{^}}fma_f64_abs_src0: 58; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 59define amdgpu_kernel void @fma_f64_abs_src0(double addrspace(1)* %out, double addrspace(1)* %in1, 60 double addrspace(1)* %in2, double addrspace(1)* %in3) { 61 %r0 = load double, double addrspace(1)* %in1 62 %r1 = load double, double addrspace(1)* %in2 63 %r2 = load double, double addrspace(1)* %in3 64 %fabs = call double @llvm.fabs.f64(double %r0) 65 %r3 = tail call double @llvm.fma.f64(double %fabs, double %r1, double %r2) 66 store double %r3, double addrspace(1)* %out 67 ret void 68} 69 70; FUNC-LABEL: {{^}}fma_f64_abs_src1: 71; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}} 72define amdgpu_kernel void @fma_f64_abs_src1(double addrspace(1)* %out, double addrspace(1)* %in1, 73 double addrspace(1)* %in2, double addrspace(1)* %in3) { 74 %r0 = load double, double addrspace(1)* %in1 75 %r1 = load double, double addrspace(1)* %in2 76 %r2 = load double, double addrspace(1)* %in3 77 %fabs = call double @llvm.fabs.f64(double %r1) 78 %r3 = tail call double @llvm.fma.f64(double %r0, double %fabs, double %r2) 79 store double %r3, double addrspace(1)* %out 80 ret void 81} 82 83; FUNC-LABEL: {{^}}fma_f64_abs_src2: 84; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}} 85; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}} 86define amdgpu_kernel void @fma_f64_abs_src2(double addrspace(1)* %out, double addrspace(1)* %in1, 87 double addrspace(1)* %in2, double addrspace(1)* %in3) { 88 %r0 = load double, double addrspace(1)* %in1 89 %r1 = load double, double addrspace(1)* %in2 90 %r2 = load double, double addrspace(1)* %in3 91 %fabs = call double @llvm.fabs.f64(double %r2) 92 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fabs) 93 store double %r3, double addrspace(1)* %out 94 ret void 95} 96 97; FUNC-LABEL: {{^}}fma_f64_neg_src0: 98; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 99define amdgpu_kernel void @fma_f64_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1, 100 double addrspace(1)* %in2, double addrspace(1)* %in3) { 101 %r0 = load double, double addrspace(1)* %in1 102 %r1 = load double, double addrspace(1)* %in2 103 %r2 = load double, double addrspace(1)* %in3 104 %fsub = fsub double -0.000000e+00, %r0 105 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2) 106 store double %r3, double addrspace(1)* %out 107 ret void 108} 109 110; FUNC-LABEL: {{^}}fma_f64_neg_src1: 111; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 112define amdgpu_kernel void @fma_f64_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1, 113 double addrspace(1)* %in2, double addrspace(1)* %in3) { 114 %r0 = load double, double addrspace(1)* %in1 115 %r1 = load double, double addrspace(1)* %in2 116 %r2 = load double, double addrspace(1)* %in3 117 %fsub = fsub double -0.000000e+00, %r1 118 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2) 119 store double %r3, double addrspace(1)* %out 120 ret void 121} 122 123; FUNC-LABEL: {{^}}fma_f64_neg_src2: 124; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} 125; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} 126define amdgpu_kernel void @fma_f64_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1, 127 double addrspace(1)* %in2, double addrspace(1)* %in3) { 128 %r0 = load double, double addrspace(1)* %in1 129 %r1 = load double, double addrspace(1)* %in2 130 %r2 = load double, double addrspace(1)* %in3 131 %fsub = fsub double -0.000000e+00, %r2 132 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub) 133 store double %r3, double addrspace(1)* %out 134 ret void 135} 136 137; FUNC-LABEL: {{^}}fma_f64_abs_neg_src0: 138; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 139define amdgpu_kernel void @fma_f64_abs_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1, 140 double addrspace(1)* %in2, double addrspace(1)* %in3) { 141 %r0 = load double, double addrspace(1)* %in1 142 %r1 = load double, double addrspace(1)* %in2 143 %r2 = load double, double addrspace(1)* %in3 144 %fabs = call double @llvm.fabs.f64(double %r0) 145 %fsub = fsub double -0.000000e+00, %fabs 146 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2) 147 store double %r3, double addrspace(1)* %out 148 ret void 149} 150 151; FUNC-LABEL: {{^}}fma_f64_abs_neg_src1: 152; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}} 153define amdgpu_kernel void @fma_f64_abs_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1, 154 double addrspace(1)* %in2, double addrspace(1)* %in3) { 155 %r0 = load double, double addrspace(1)* %in1 156 %r1 = load double, double addrspace(1)* %in2 157 %r2 = load double, double addrspace(1)* %in3 158 %fabs = call double @llvm.fabs.f64(double %r1) 159 %fsub = fsub double -0.000000e+00, %fabs 160 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2) 161 store double %r3, double addrspace(1)* %out 162 ret void 163} 164 165; FUNC-LABEL: {{^}}fma_f64_abs_neg_src2: 166; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}} 167; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}} 168define amdgpu_kernel void @fma_f64_abs_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1, 169 double addrspace(1)* %in2, double addrspace(1)* %in3) { 170 %r0 = load double, double addrspace(1)* %in1 171 %r1 = load double, double addrspace(1)* %in2 172 %r2 = load double, double addrspace(1)* %in3 173 %fabs = call double @llvm.fabs.f64(double %r2) 174 %fsub = fsub double -0.000000e+00, %fabs 175 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub) 176 store double %r3, double addrspace(1)* %out 177 ret void 178} 179 180; FUNC-LABEL: {{^}}fma_f64_lit_src0: 181; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 182; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 183define amdgpu_kernel void @fma_f64_lit_src0(double addrspace(1)* %out, 184 double addrspace(1)* %in2, double addrspace(1)* %in3) { 185 %r1 = load double, double addrspace(1)* %in2 186 %r2 = load double, double addrspace(1)* %in3 187 %r3 = tail call double @llvm.fma.f64(double +2.0, double %r1, double %r2) 188 store double %r3, double addrspace(1)* %out 189 ret void 190} 191 192; FUNC-LABEL: {{^}}fma_f64_lit_src1: 193; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 194; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}} 195define amdgpu_kernel void @fma_f64_lit_src1(double addrspace(1)* %out, double addrspace(1)* %in1, 196 double addrspace(1)* %in3) { 197 %r0 = load double, double addrspace(1)* %in1 198 %r2 = load double, double addrspace(1)* %in3 199 %r3 = tail call double @llvm.fma.f64(double %r0, double +2.0, double %r2) 200 store double %r3, double addrspace(1)* %out 201 ret void 202} 203 204; FUNC-LABEL: {{^}}fma_f64_lit_src2: 205; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}} 206; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}} 207define amdgpu_kernel void @fma_f64_lit_src2(double addrspace(1)* %out, double addrspace(1)* %in1, 208 double addrspace(1)* %in2) { 209 %r0 = load double, double addrspace(1)* %in1 210 %r1 = load double, double addrspace(1)* %in2 211 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double +2.0) 212 store double %r3, double addrspace(1)* %out 213 ret void 214} 215