1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s 2 3; -------------------------------------------------------------------------------- 4; fadd tests 5; -------------------------------------------------------------------------------- 6 7; GCN-LABEL: {{^}}v_fneg_add_f32: 8; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 9; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 10; GCN: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]] 11; GCN-NEXT: buffer_store_dword [[RESULT]] 12define void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 13 %tid = call i32 @llvm.amdgcn.workitem.id.x() 14 %tid.ext = sext i32 %tid to i64 15 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 16 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 17 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 18 %a = load volatile float, float addrspace(1)* %a.gep 19 %b = load volatile float, float addrspace(1)* %b.gep 20 %add = fadd float %a, %b 21 %fneg = fsub float -0.000000e+00, %add 22 store float %fneg, float addrspace(1)* %out.gep 23 ret void 24} 25 26; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32: 27; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 28; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 29; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 30; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 31; GCN-NEXT: buffer_store_dword [[NEG_ADD]] 32; GCN-NEXT: buffer_store_dword [[ADD]] 33define void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 34 %tid = call i32 @llvm.amdgcn.workitem.id.x() 35 %tid.ext = sext i32 %tid to i64 36 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 37 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 38 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 39 %a = load volatile float, float addrspace(1)* %a.gep 40 %b = load volatile float, float addrspace(1)* %b.gep 41 %add = fadd float %a, %b 42 %fneg = fsub float -0.000000e+00, %add 43 store volatile float %fneg, float addrspace(1)* %out 44 store volatile float %add, float addrspace(1)* %out 45 ret void 46} 47 48; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32: 49; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 50; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 51; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 52; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 53; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] 54; GCN-NEXT: buffer_store_dword [[NEG_ADD]] 55; GCN-NEXT: buffer_store_dword [[MUL]] 56define void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 57 %tid = call i32 @llvm.amdgcn.workitem.id.x() 58 %tid.ext = sext i32 %tid to i64 59 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 60 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 61 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 62 %a = load volatile float, float addrspace(1)* %a.gep 63 %b = load volatile float, float addrspace(1)* %b.gep 64 %add = fadd float %a, %b 65 %fneg = fsub float -0.000000e+00, %add 66 %use1 = fmul float %add, 4.0 67 store volatile float %fneg, float addrspace(1)* %out 68 store volatile float %use1, float addrspace(1)* %out 69 ret void 70} 71 72; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32: 73; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 74; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 75; GCN: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 76; GCN-NEXT: buffer_store_dword [[ADD]] 77define void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 78 %tid = call i32 @llvm.amdgcn.workitem.id.x() 79 %tid.ext = sext i32 %tid to i64 80 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 81 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 82 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 83 %a = load volatile float, float addrspace(1)* %a.gep 84 %b = load volatile float, float addrspace(1)* %b.gep 85 %fneg.a = fsub float -0.000000e+00, %a 86 %add = fadd float %fneg.a, %b 87 %fneg = fsub float -0.000000e+00, %add 88 store volatile float %fneg, float addrspace(1)* %out 89 ret void 90} 91 92; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32: 93; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 94; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 95; GCN: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 96; GCN-NEXT: buffer_store_dword [[ADD]] 97define void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 98 %tid = call i32 @llvm.amdgcn.workitem.id.x() 99 %tid.ext = sext i32 %tid to i64 100 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 101 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 102 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 103 %a = load volatile float, float addrspace(1)* %a.gep 104 %b = load volatile float, float addrspace(1)* %b.gep 105 %fneg.b = fsub float -0.000000e+00, %b 106 %add = fadd float %a, %fneg.b 107 %fneg = fsub float -0.000000e+00, %add 108 store volatile float %fneg, float addrspace(1)* %out 109 ret void 110} 111 112; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32: 113; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 114; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 115; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 116; GCN-NEXT: buffer_store_dword [[ADD]] 117define void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 118 %tid = call i32 @llvm.amdgcn.workitem.id.x() 119 %tid.ext = sext i32 %tid to i64 120 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 121 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 122 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 123 %a = load volatile float, float addrspace(1)* %a.gep 124 %b = load volatile float, float addrspace(1)* %b.gep 125 %fneg.a = fsub float -0.000000e+00, %a 126 %fneg.b = fsub float -0.000000e+00, %b 127 %add = fadd float %fneg.a, %fneg.b 128 %fneg = fsub float -0.000000e+00, %add 129 store volatile float %fneg, float addrspace(1)* %out 130 ret void 131} 132 133; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32: 134; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 135; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 136; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 137; GCN-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]] 138; GCN-NEXT: buffer_store_dword [[NEG_ADD]] 139; GCN-NEXT: buffer_store_dword [[NEG_A]] 140define void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 141 %tid = call i32 @llvm.amdgcn.workitem.id.x() 142 %tid.ext = sext i32 %tid to i64 143 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 144 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 145 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 146 %a = load volatile float, float addrspace(1)* %a.gep 147 %b = load volatile float, float addrspace(1)* %b.gep 148 %fneg.a = fsub float -0.000000e+00, %a 149 %add = fadd float %fneg.a, %b 150 %fneg = fsub float -0.000000e+00, %add 151 store volatile float %fneg, float addrspace(1)* %out 152 store volatile float %fneg.a, float addrspace(1)* %out 153 ret void 154} 155 156; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32: 157; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 158; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 159; GCN-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]] 160; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 161; GCN-NEXT: buffer_store_dword [[NEG_ADD]] 162; GCN-NEXT: buffer_store_dword [[MUL]] 163define void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 164 %tid = call i32 @llvm.amdgcn.workitem.id.x() 165 %tid.ext = sext i32 %tid to i64 166 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 167 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 168 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 169 %a = load volatile float, float addrspace(1)* %a.gep 170 %b = load volatile float, float addrspace(1)* %b.gep 171 %fneg.a = fsub float -0.000000e+00, %a 172 %add = fadd float %fneg.a, %b 173 %fneg = fsub float -0.000000e+00, %add 174 %use1 = fmul float %fneg.a, %c 175 store volatile float %fneg, float addrspace(1)* %out 176 store volatile float %use1, float addrspace(1)* %out 177 ret void 178} 179 180; -------------------------------------------------------------------------------- 181; fmul tests 182; -------------------------------------------------------------------------------- 183 184; GCN-LABEL: {{^}}v_fneg_mul_f32: 185; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 186; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 187; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 188; GCN-NEXT: buffer_store_dword [[RESULT]] 189define void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 190 %tid = call i32 @llvm.amdgcn.workitem.id.x() 191 %tid.ext = sext i32 %tid to i64 192 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 193 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 194 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 195 %a = load volatile float, float addrspace(1)* %a.gep 196 %b = load volatile float, float addrspace(1)* %b.gep 197 %mul = fmul float %a, %b 198 %fneg = fsub float -0.000000e+00, %mul 199 store float %fneg, float addrspace(1)* %out.gep 200 ret void 201} 202 203; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32: 204; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 205; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 206; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 207; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]] 208; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 209; GCN: buffer_store_dword [[ADD]] 210define void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 211 %tid = call i32 @llvm.amdgcn.workitem.id.x() 212 %tid.ext = sext i32 %tid to i64 213 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 214 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 215 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 216 %a = load volatile float, float addrspace(1)* %a.gep 217 %b = load volatile float, float addrspace(1)* %b.gep 218 %mul = fmul float %a, %b 219 %fneg = fsub float -0.000000e+00, %mul 220 store volatile float %fneg, float addrspace(1)* %out 221 store volatile float %mul, float addrspace(1)* %out 222 ret void 223} 224 225; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32: 226; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 227; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 228; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 229; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]] 230; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] 231; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 232; GCN: buffer_store_dword [[MUL]] 233define void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 234 %tid = call i32 @llvm.amdgcn.workitem.id.x() 235 %tid.ext = sext i32 %tid to i64 236 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 237 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 238 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 239 %a = load volatile float, float addrspace(1)* %a.gep 240 %b = load volatile float, float addrspace(1)* %b.gep 241 %mul = fmul float %a, %b 242 %fneg = fsub float -0.000000e+00, %mul 243 %use1 = fmul float %mul, 4.0 244 store volatile float %fneg, float addrspace(1)* %out 245 store volatile float %use1, float addrspace(1)* %out 246 ret void 247} 248 249; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32: 250; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 251; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 252; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 253; GCN-NEXT: buffer_store_dword [[ADD]] 254define void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 255 %tid = call i32 @llvm.amdgcn.workitem.id.x() 256 %tid.ext = sext i32 %tid to i64 257 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 258 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 259 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 260 %a = load volatile float, float addrspace(1)* %a.gep 261 %b = load volatile float, float addrspace(1)* %b.gep 262 %fneg.a = fsub float -0.000000e+00, %a 263 %mul = fmul float %fneg.a, %b 264 %fneg = fsub float -0.000000e+00, %mul 265 store volatile float %fneg, float addrspace(1)* %out 266 ret void 267} 268 269; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32: 270; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 271; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 272; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 273; GCN-NEXT: buffer_store_dword [[ADD]] 274define void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 275 %tid = call i32 @llvm.amdgcn.workitem.id.x() 276 %tid.ext = sext i32 %tid to i64 277 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 278 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 279 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 280 %a = load volatile float, float addrspace(1)* %a.gep 281 %b = load volatile float, float addrspace(1)* %b.gep 282 %fneg.b = fsub float -0.000000e+00, %b 283 %mul = fmul float %a, %fneg.b 284 %fneg = fsub float -0.000000e+00, %mul 285 store volatile float %fneg, float addrspace(1)* %out 286 ret void 287} 288 289; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32: 290; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 291; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 292; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 293; GCN-NEXT: buffer_store_dword [[ADD]] 294define void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 295 %tid = call i32 @llvm.amdgcn.workitem.id.x() 296 %tid.ext = sext i32 %tid to i64 297 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 298 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 299 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 300 %a = load volatile float, float addrspace(1)* %a.gep 301 %b = load volatile float, float addrspace(1)* %b.gep 302 %fneg.a = fsub float -0.000000e+00, %a 303 %fneg.b = fsub float -0.000000e+00, %b 304 %mul = fmul float %fneg.a, %fneg.b 305 %fneg = fsub float -0.000000e+00, %mul 306 store volatile float %fneg, float addrspace(1)* %out 307 ret void 308} 309 310; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32: 311; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 312; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 313; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 314; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[B]], [[A]] 315; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 316; GCN: buffer_store_dword [[NEG_A]] 317define void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 318 %tid = call i32 @llvm.amdgcn.workitem.id.x() 319 %tid.ext = sext i32 %tid to i64 320 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 321 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 322 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 323 %a = load volatile float, float addrspace(1)* %a.gep 324 %b = load volatile float, float addrspace(1)* %b.gep 325 %fneg.a = fsub float -0.000000e+00, %a 326 %mul = fmul float %fneg.a, %b 327 %fneg = fsub float -0.000000e+00, %mul 328 store volatile float %fneg, float addrspace(1)* %out 329 store volatile float %fneg.a, float addrspace(1)* %out 330 ret void 331} 332 333; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32: 334; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 335; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 336; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[B]], [[A]] 337; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 338; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 339; GCN: buffer_store_dword [[MUL]] 340define void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 341 %tid = call i32 @llvm.amdgcn.workitem.id.x() 342 %tid.ext = sext i32 %tid to i64 343 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 344 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 345 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 346 %a = load volatile float, float addrspace(1)* %a.gep 347 %b = load volatile float, float addrspace(1)* %b.gep 348 %fneg.a = fsub float -0.000000e+00, %a 349 %mul = fmul float %fneg.a, %b 350 %fneg = fsub float -0.000000e+00, %mul 351 %use1 = fmul float %fneg.a, %c 352 store volatile float %fneg, float addrspace(1)* %out 353 store volatile float %use1, float addrspace(1)* %out 354 ret void 355} 356 357; -------------------------------------------------------------------------------- 358; fma tests 359; -------------------------------------------------------------------------------- 360 361; GCN-LABEL: {{^}}v_fneg_fma_f32: 362; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 363; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 364; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 365; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 366; GCN-NEXT: buffer_store_dword [[RESULT]] 367define void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 368 %tid = call i32 @llvm.amdgcn.workitem.id.x() 369 %tid.ext = sext i32 %tid to i64 370 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 371 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 372 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 373 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 374 %a = load volatile float, float addrspace(1)* %a.gep 375 %b = load volatile float, float addrspace(1)* %b.gep 376 %c = load volatile float, float addrspace(1)* %c.gep 377 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 378 %fneg = fsub float -0.000000e+00, %fma 379 store float %fneg, float addrspace(1)* %out.gep 380 ret void 381} 382 383; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32: 384; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 385; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 386; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 387; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 388; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 389; GCN-NEXT: buffer_store_dword [[NEG_FMA]] 390; GCN-NEXT: buffer_store_dword [[FMA]] 391define void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 392 %tid = call i32 @llvm.amdgcn.workitem.id.x() 393 %tid.ext = sext i32 %tid to i64 394 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 395 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 396 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 397 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 398 %a = load volatile float, float addrspace(1)* %a.gep 399 %b = load volatile float, float addrspace(1)* %b.gep 400 %c = load volatile float, float addrspace(1)* %c.gep 401 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 402 %fneg = fsub float -0.000000e+00, %fma 403 store volatile float %fneg, float addrspace(1)* %out 404 store volatile float %fma, float addrspace(1)* %out 405 ret void 406} 407 408; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32: 409; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 410; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 411; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 412; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 413; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 414; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]] 415; GCN-NEXT: buffer_store_dword [[NEG_FMA]] 416; GCN-NEXT: buffer_store_dword [[MUL]] 417define void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 418 %tid = call i32 @llvm.amdgcn.workitem.id.x() 419 %tid.ext = sext i32 %tid to i64 420 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 421 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 422 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 423 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 424 %a = load volatile float, float addrspace(1)* %a.gep 425 %b = load volatile float, float addrspace(1)* %b.gep 426 %c = load volatile float, float addrspace(1)* %c.gep 427 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 428 %fneg = fsub float -0.000000e+00, %fma 429 %use1 = fmul float %fma, 4.0 430 store volatile float %fneg, float addrspace(1)* %out 431 store volatile float %use1, float addrspace(1)* %out 432 ret void 433} 434 435; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32: 436; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 437; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 438; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 439; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 440; GCN-NEXT: buffer_store_dword [[FMA]] 441define void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 442 %tid = call i32 @llvm.amdgcn.workitem.id.x() 443 %tid.ext = sext i32 %tid to i64 444 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 445 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 446 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 447 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 448 %a = load volatile float, float addrspace(1)* %a.gep 449 %b = load volatile float, float addrspace(1)* %b.gep 450 %c = load volatile float, float addrspace(1)* %c.gep 451 %fneg.a = fsub float -0.000000e+00, %a 452 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 453 %fneg = fsub float -0.000000e+00, %fma 454 store volatile float %fneg, float addrspace(1)* %out 455 ret void 456} 457 458; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32: 459; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 460; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 461; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 462; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 463; GCN-NEXT: buffer_store_dword [[FMA]] 464define void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 465 %tid = call i32 @llvm.amdgcn.workitem.id.x() 466 %tid.ext = sext i32 %tid to i64 467 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 468 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 469 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 470 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 471 %a = load volatile float, float addrspace(1)* %a.gep 472 %b = load volatile float, float addrspace(1)* %b.gep 473 %c = load volatile float, float addrspace(1)* %c.gep 474 %fneg.b = fsub float -0.000000e+00, %b 475 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c) 476 %fneg = fsub float -0.000000e+00, %fma 477 store volatile float %fneg, float addrspace(1)* %out 478 ret void 479} 480 481; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32: 482; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 483; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 484; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 485; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 486; GCN-NEXT: buffer_store_dword [[FMA]] 487define void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 488 %tid = call i32 @llvm.amdgcn.workitem.id.x() 489 %tid.ext = sext i32 %tid to i64 490 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 491 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 492 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 493 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 494 %a = load volatile float, float addrspace(1)* %a.gep 495 %b = load volatile float, float addrspace(1)* %b.gep 496 %c = load volatile float, float addrspace(1)* %c.gep 497 %fneg.a = fsub float -0.000000e+00, %a 498 %fneg.b = fsub float -0.000000e+00, %b 499 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c) 500 %fneg = fsub float -0.000000e+00, %fma 501 store volatile float %fneg, float addrspace(1)* %out 502 ret void 503} 504 505; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32: 506; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 507; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 508; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 509; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 510; GCN-NEXT: buffer_store_dword [[FMA]] 511define void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 512 %tid = call i32 @llvm.amdgcn.workitem.id.x() 513 %tid.ext = sext i32 %tid to i64 514 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 515 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 516 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 517 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 518 %a = load volatile float, float addrspace(1)* %a.gep 519 %b = load volatile float, float addrspace(1)* %b.gep 520 %c = load volatile float, float addrspace(1)* %c.gep 521 %fneg.a = fsub float -0.000000e+00, %a 522 %fneg.c = fsub float -0.000000e+00, %c 523 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c) 524 %fneg = fsub float -0.000000e+00, %fma 525 store volatile float %fneg, float addrspace(1)* %out 526 ret void 527} 528 529; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32: 530; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 531; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 532; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 533; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 534; GCN-NEXT: buffer_store_dword [[FMA]] 535define void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 536 %tid = call i32 @llvm.amdgcn.workitem.id.x() 537 %tid.ext = sext i32 %tid to i64 538 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 539 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 540 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 541 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 542 %a = load volatile float, float addrspace(1)* %a.gep 543 %b = load volatile float, float addrspace(1)* %b.gep 544 %c = load volatile float, float addrspace(1)* %c.gep 545 %fneg.c = fsub float -0.000000e+00, %c 546 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c) 547 %fneg = fsub float -0.000000e+00, %fma 548 store volatile float %fneg, float addrspace(1)* %out 549 ret void 550} 551 552; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32: 553; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 554; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 555; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 556; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 557; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 558; GCN-NEXT: buffer_store_dword [[FMA]] 559; GCN-NEXT: buffer_store_dword [[NEG_A]] 560define void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 561 %tid = call i32 @llvm.amdgcn.workitem.id.x() 562 %tid.ext = sext i32 %tid to i64 563 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 564 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 565 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 566 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 567 %a = load volatile float, float addrspace(1)* %a.gep 568 %b = load volatile float, float addrspace(1)* %b.gep 569 %c = load volatile float, float addrspace(1)* %c.gep 570 %fneg.a = fsub float -0.000000e+00, %a 571 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 572 %fneg = fsub float -0.000000e+00, %fma 573 store volatile float %fneg, float addrspace(1)* %out 574 store volatile float %fneg.a, float addrspace(1)* %out 575 ret void 576} 577 578; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32: 579; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 580; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 581; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 582; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 583; GCN-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 584; GCN-NEXT: buffer_store_dword [[NEG_FMA]] 585; GCN-NEXT: buffer_store_dword [[MUL]] 586define void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 { 587 %tid = call i32 @llvm.amdgcn.workitem.id.x() 588 %tid.ext = sext i32 %tid to i64 589 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 590 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 591 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 592 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 593 %a = load volatile float, float addrspace(1)* %a.gep 594 %b = load volatile float, float addrspace(1)* %b.gep 595 %c = load volatile float, float addrspace(1)* %c.gep 596 %fneg.a = fsub float -0.000000e+00, %a 597 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 598 %fneg = fsub float -0.000000e+00, %fma 599 %use1 = fmul float %fneg.a, %d 600 store volatile float %fneg, float addrspace(1)* %out 601 store volatile float %use1, float addrspace(1)* %out 602 ret void 603} 604 605; -------------------------------------------------------------------------------- 606; fmad tests 607; -------------------------------------------------------------------------------- 608 609; GCN-LABEL: {{^}}v_fneg_fmad_f32: 610; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 611; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 612; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 613; GCN: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 614; GCN-NEXT: buffer_store_dword [[RESULT]] 615define void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 616 %tid = call i32 @llvm.amdgcn.workitem.id.x() 617 %tid.ext = sext i32 %tid to i64 618 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 619 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 620 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 621 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 622 %a = load volatile float, float addrspace(1)* %a.gep 623 %b = load volatile float, float addrspace(1)* %b.gep 624 %c = load volatile float, float addrspace(1)* %c.gep 625 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 626 %fneg = fsub float -0.000000e+00, %fma 627 store float %fneg, float addrspace(1)* %out.gep 628 ret void 629} 630 631; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32: 632; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 633; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 634; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 635; GCN-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]] 636; GCN-DAG: v_xor_b32_e32 [[NEG_C:v[0-9]+]], 0x80000000, [[C]] 637; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]] 638; GCN-NEXT: buffer_store_dword [[NEG_C]] 639; GCN-NEXT: buffer_store_dword [[MUL]] 640define void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 641 %tid = call i32 @llvm.amdgcn.workitem.id.x() 642 %tid.ext = sext i32 %tid to i64 643 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 644 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 645 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 646 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 647 %a = load volatile float, float addrspace(1)* %a.gep 648 %b = load volatile float, float addrspace(1)* %b.gep 649 %c = load volatile float, float addrspace(1)* %c.gep 650 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 651 %fneg = fsub float -0.000000e+00, %fma 652 %use1 = fmul float %fma, 4.0 653 store volatile float %fneg, float addrspace(1)* %out 654 store volatile float %use1, float addrspace(1)* %out 655 ret void 656} 657 658; -------------------------------------------------------------------------------- 659; fp_extend tests 660; -------------------------------------------------------------------------------- 661 662; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64: 663; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 664; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]] 665; GCN: buffer_store_dwordx2 [[RESULT]] 666define void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 667 %tid = call i32 @llvm.amdgcn.workitem.id.x() 668 %tid.ext = sext i32 %tid to i64 669 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 670 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 671 %a = load volatile float, float addrspace(1)* %a.gep 672 %fpext = fpext float %a to double 673 %fneg = fsub double -0.000000e+00, %fpext 674 store double %fneg, double addrspace(1)* %out.gep 675 ret void 676} 677 678; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64: 679; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 680; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 681; GCN: buffer_store_dwordx2 [[RESULT]] 682define void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 683 %tid = call i32 @llvm.amdgcn.workitem.id.x() 684 %tid.ext = sext i32 %tid to i64 685 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 686 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 687 %a = load volatile float, float addrspace(1)* %a.gep 688 %fneg.a = fsub float -0.000000e+00, %a 689 %fpext = fpext float %fneg.a to double 690 %fneg = fsub double -0.000000e+00, %fpext 691 store double %fneg, double addrspace(1)* %out.gep 692 ret void 693} 694 695; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64: 696; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 697; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 698; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]] 699; GCN: buffer_store_dwordx2 [[RESULT]] 700; GCN: buffer_store_dword [[FNEG_A]] 701define void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 702 %tid = call i32 @llvm.amdgcn.workitem.id.x() 703 %tid.ext = sext i32 %tid to i64 704 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 705 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 706 %a = load volatile float, float addrspace(1)* %a.gep 707 %fneg.a = fsub float -0.000000e+00, %a 708 %fpext = fpext float %fneg.a to double 709 %fneg = fsub double -0.000000e+00, %fpext 710 store volatile double %fneg, double addrspace(1)* %out.gep 711 store volatile float %fneg.a, float addrspace(1)* undef 712 ret void 713} 714 715; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64: 716; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 717; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 718; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 719; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 720; GCN: buffer_store_dwordx2 v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}} 721define void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 722 %tid = call i32 @llvm.amdgcn.workitem.id.x() 723 %tid.ext = sext i32 %tid to i64 724 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 725 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 726 %a = load volatile float, float addrspace(1)* %a.gep 727 %fpext = fpext float %a to double 728 %fneg = fsub double -0.000000e+00, %fpext 729 store volatile double %fneg, double addrspace(1)* %out.gep 730 store volatile double %fpext, double addrspace(1)* undef 731 ret void 732} 733 734; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64: 735; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 736; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 737; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 738; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0 739; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 740; GCN: buffer_store_dwordx2 [[MUL]] 741define void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 742 %tid = call i32 @llvm.amdgcn.workitem.id.x() 743 %tid.ext = sext i32 %tid to i64 744 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 745 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 746 %a = load volatile float, float addrspace(1)* %a.gep 747 %fpext = fpext float %a to double 748 %fneg = fsub double -0.000000e+00, %fpext 749 %mul = fmul double %fpext, 4.0 750 store volatile double %fneg, double addrspace(1)* %out.gep 751 store volatile double %mul, double addrspace(1)* %out.gep 752 ret void 753} 754 755; FIXME: Source modifiers not folded for f16->f32 756; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32: 757define void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 758 %tid = call i32 @llvm.amdgcn.workitem.id.x() 759 %tid.ext = sext i32 %tid to i64 760 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 761 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 762 %a = load volatile half, half addrspace(1)* %a.gep 763 %fpext = fpext half %a to float 764 %fneg = fsub float -0.000000e+00, %fpext 765 store volatile float %fneg, float addrspace(1)* %out.gep 766 store volatile float %fpext, float addrspace(1)* %out.gep 767 ret void 768} 769 770; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32: 771define void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 772 %tid = call i32 @llvm.amdgcn.workitem.id.x() 773 %tid.ext = sext i32 %tid to i64 774 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 775 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 776 %a = load volatile half, half addrspace(1)* %a.gep 777 %fpext = fpext half %a to float 778 %fneg = fsub float -0.000000e+00, %fpext 779 %mul = fmul float %fpext, 4.0 780 store volatile float %fneg, float addrspace(1)* %out.gep 781 store volatile float %mul, float addrspace(1)* %out.gep 782 ret void 783} 784 785; -------------------------------------------------------------------------------- 786; fp_round tests 787; -------------------------------------------------------------------------------- 788 789; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32: 790; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 791; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]] 792; GCN: buffer_store_dword [[RESULT]] 793define void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 794 %tid = call i32 @llvm.amdgcn.workitem.id.x() 795 %tid.ext = sext i32 %tid to i64 796 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 797 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 798 %a = load volatile double, double addrspace(1)* %a.gep 799 %fpround = fptrunc double %a to float 800 %fneg = fsub float -0.000000e+00, %fpround 801 store float %fneg, float addrspace(1)* %out.gep 802 ret void 803} 804 805; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32: 806; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 807; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 808; GCN: buffer_store_dword [[RESULT]] 809define void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 810 %tid = call i32 @llvm.amdgcn.workitem.id.x() 811 %tid.ext = sext i32 %tid to i64 812 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 813 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 814 %a = load volatile double, double addrspace(1)* %a.gep 815 %fneg.a = fsub double -0.000000e+00, %a 816 %fpround = fptrunc double %fneg.a to float 817 %fneg = fsub float -0.000000e+00, %fpround 818 store float %fneg, float addrspace(1)* %out.gep 819 ret void 820} 821 822; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32: 823; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}} 824; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}} 825; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]] 826; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]] 827; GCN: buffer_store_dword [[RESULT]] 828; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}} 829define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 830 %tid = call i32 @llvm.amdgcn.workitem.id.x() 831 %tid.ext = sext i32 %tid to i64 832 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 833 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 834 %a = load volatile double, double addrspace(1)* %a.gep 835 %fneg.a = fsub double -0.000000e+00, %a 836 %fpround = fptrunc double %fneg.a to float 837 %fneg = fsub float -0.000000e+00, %fpround 838 store volatile float %fneg, float addrspace(1)* %out.gep 839 store volatile double %fneg.a, double addrspace(1)* undef 840 ret void 841} 842 843; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32: 844; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 845; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 846; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}} 847; GCN: buffer_store_dword [[RESULT]] 848; GCN: buffer_store_dwordx2 [[USE1]] 849define void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 { 850 %tid = call i32 @llvm.amdgcn.workitem.id.x() 851 %tid.ext = sext i32 %tid to i64 852 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 853 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 854 %a = load volatile double, double addrspace(1)* %a.gep 855 %fneg.a = fsub double -0.000000e+00, %a 856 %fpround = fptrunc double %fneg.a to float 857 %fneg = fsub float -0.000000e+00, %fpround 858 %use1 = fmul double %fneg.a, %c 859 store volatile float %fneg, float addrspace(1)* %out.gep 860 store volatile double %use1, double addrspace(1)* undef 861 ret void 862} 863 864; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16: 865; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 866; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 867; GCN: buffer_store_short [[RESULT]] 868define void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 869 %tid = call i32 @llvm.amdgcn.workitem.id.x() 870 %tid.ext = sext i32 %tid to i64 871 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 872 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 873 %a = load volatile float, float addrspace(1)* %a.gep 874 %fpround = fptrunc float %a to half 875 %fneg = fsub half -0.000000e+00, %fpround 876 store half %fneg, half addrspace(1)* %out.gep 877 ret void 878} 879 880; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16: 881; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 882; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 883; GCN: buffer_store_short [[RESULT]] 884define void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 885 %tid = call i32 @llvm.amdgcn.workitem.id.x() 886 %tid.ext = sext i32 %tid to i64 887 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 888 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 889 %a = load volatile float, float addrspace(1)* %a.gep 890 %fneg.a = fsub float -0.000000e+00, %a 891 %fpround = fptrunc float %fneg.a to half 892 %fneg = fsub half -0.000000e+00, %fpround 893 store half %fneg, half addrspace(1)* %out.gep 894 ret void 895} 896 897; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32: 898; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 899; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]] 900; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]] 901; GCN: buffer_store_dword [[NEG]] 902; GCN: buffer_store_dword [[CVT]] 903define void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 904 %tid = call i32 @llvm.amdgcn.workitem.id.x() 905 %tid.ext = sext i32 %tid to i64 906 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 907 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 908 %a = load volatile double, double addrspace(1)* %a.gep 909 %fpround = fptrunc double %a to float 910 %fneg = fsub float -0.000000e+00, %fpround 911 store volatile float %fneg, float addrspace(1)* %out.gep 912 store volatile float %fpround, float addrspace(1)* %out.gep 913 ret void 914} 915 916; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16: 917; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 918; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 919; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 920; GCN: buffer_store_short [[RESULT]] 921; GCN: buffer_store_dword [[NEG_A]] 922define void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 923 %tid = call i32 @llvm.amdgcn.workitem.id.x() 924 %tid.ext = sext i32 %tid to i64 925 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 926 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 927 %a = load volatile float, float addrspace(1)* %a.gep 928 %fneg.a = fsub float -0.000000e+00, %a 929 %fpround = fptrunc float %fneg.a to half 930 %fneg = fsub half -0.000000e+00, %fpround 931 store volatile half %fneg, half addrspace(1)* %out.gep 932 store volatile float %fneg.a, float addrspace(1)* undef 933 ret void 934} 935 936; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16: 937; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 938; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 939; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s 940; GCN: buffer_store_short [[RESULT]] 941; GCN: buffer_store_dword [[USE1]] 942define void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 943 %tid = call i32 @llvm.amdgcn.workitem.id.x() 944 %tid.ext = sext i32 %tid to i64 945 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 946 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 947 %a = load volatile float, float addrspace(1)* %a.gep 948 %fneg.a = fsub float -0.000000e+00, %a 949 %fpround = fptrunc float %fneg.a to half 950 %fneg = fsub half -0.000000e+00, %fpround 951 %use1 = fmul float %fneg.a, %c 952 store volatile half %fneg, half addrspace(1)* %out.gep 953 store volatile float %use1, float addrspace(1)* undef 954 ret void 955} 956 957; -------------------------------------------------------------------------------- 958; rcp tests 959; -------------------------------------------------------------------------------- 960 961; GCN-LABEL: {{^}}v_fneg_rcp_f32: 962; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 963; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 964; GCN: buffer_store_dword [[RESULT]] 965define void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 966 %tid = call i32 @llvm.amdgcn.workitem.id.x() 967 %tid.ext = sext i32 %tid to i64 968 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 969 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 970 %a = load volatile float, float addrspace(1)* %a.gep 971 %rcp = call float @llvm.amdgcn.rcp.f32(float %a) 972 %fneg = fsub float -0.000000e+00, %rcp 973 store float %fneg, float addrspace(1)* %out.gep 974 ret void 975} 976 977; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32: 978; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 979; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 980; GCN: buffer_store_dword [[RESULT]] 981define void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 982 %tid = call i32 @llvm.amdgcn.workitem.id.x() 983 %tid.ext = sext i32 %tid to i64 984 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 985 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 986 %a = load volatile float, float addrspace(1)* %a.gep 987 %fneg.a = fsub float -0.000000e+00, %a 988 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 989 %fneg = fsub float -0.000000e+00, %rcp 990 store float %fneg, float addrspace(1)* %out.gep 991 ret void 992} 993 994; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32: 995; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 996; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 997; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 998; GCN: buffer_store_dword [[RESULT]] 999; GCN: buffer_store_dword [[NEG_A]] 1000define void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1001 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1002 %tid.ext = sext i32 %tid to i64 1003 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1004 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1005 %a = load volatile float, float addrspace(1)* %a.gep 1006 %fneg.a = fsub float -0.000000e+00, %a 1007 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1008 %fneg = fsub float -0.000000e+00, %rcp 1009 store volatile float %fneg, float addrspace(1)* %out.gep 1010 store volatile float %fneg.a, float addrspace(1)* undef 1011 ret void 1012} 1013 1014; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32: 1015; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1016; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1017; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1018; GCN: buffer_store_dword [[RESULT]] 1019; GCN: buffer_store_dword [[MUL]] 1020define void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 1021 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1022 %tid.ext = sext i32 %tid to i64 1023 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1024 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1025 %a = load volatile float, float addrspace(1)* %a.gep 1026 %fneg.a = fsub float -0.000000e+00, %a 1027 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1028 %fneg = fsub float -0.000000e+00, %rcp 1029 %use1 = fmul float %fneg.a, %c 1030 store volatile float %fneg, float addrspace(1)* %out.gep 1031 store volatile float %use1, float addrspace(1)* undef 1032 ret void 1033} 1034 1035; -------------------------------------------------------------------------------- 1036; rcp_legacy tests 1037; -------------------------------------------------------------------------------- 1038 1039; GCN-LABEL: {{^}}v_fneg_rcp_legacy_f32: 1040; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1041; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1042; GCN: buffer_store_dword [[RESULT]] 1043define void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1044 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1045 %tid.ext = sext i32 %tid to i64 1046 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1047 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1048 %a = load volatile float, float addrspace(1)* %a.gep 1049 %rcp = call float @llvm.amdgcn.rcp.legacy(float %a) 1050 %fneg = fsub float -0.000000e+00, %rcp 1051 store float %fneg, float addrspace(1)* %out.gep 1052 ret void 1053} 1054 1055; -------------------------------------------------------------------------------- 1056; fmul_legacy tests 1057; -------------------------------------------------------------------------------- 1058 1059; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32: 1060; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1061; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1062; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 1063; GCN-NEXT: buffer_store_dword [[RESULT]] 1064define void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1065 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1066 %tid.ext = sext i32 %tid to i64 1067 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1068 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1069 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1070 %a = load volatile float, float addrspace(1)* %a.gep 1071 %b = load volatile float, float addrspace(1)* %b.gep 1072 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1073 %fneg = fsub float -0.000000e+00, %mul 1074 store float %fneg, float addrspace(1)* %out.gep 1075 ret void 1076} 1077 1078; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32: 1079; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1080; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1081; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 1082; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]] 1083; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1084; GCN: buffer_store_dword [[ADD]] 1085define void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1086 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1087 %tid.ext = sext i32 %tid to i64 1088 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1089 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1090 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1091 %a = load volatile float, float addrspace(1)* %a.gep 1092 %b = load volatile float, float addrspace(1)* %b.gep 1093 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1094 %fneg = fsub float -0.000000e+00, %mul 1095 store volatile float %fneg, float addrspace(1)* %out 1096 store volatile float %mul, float addrspace(1)* %out 1097 ret void 1098} 1099 1100; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32: 1101; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1102; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1103; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 1104; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]] 1105; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] 1106; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1107; GCN: buffer_store_dword [[MUL]] 1108define void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1109 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1110 %tid.ext = sext i32 %tid to i64 1111 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1112 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1113 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1114 %a = load volatile float, float addrspace(1)* %a.gep 1115 %b = load volatile float, float addrspace(1)* %b.gep 1116 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1117 %fneg = fsub float -0.000000e+00, %mul 1118 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0) 1119 store volatile float %fneg, float addrspace(1)* %out 1120 store volatile float %use1, float addrspace(1)* %out 1121 ret void 1122} 1123 1124; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32: 1125; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1126; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1127; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 1128; GCN-NEXT: buffer_store_dword [[ADD]] 1129define void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1130 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1131 %tid.ext = sext i32 %tid to i64 1132 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1133 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1134 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1135 %a = load volatile float, float addrspace(1)* %a.gep 1136 %b = load volatile float, float addrspace(1)* %b.gep 1137 %fneg.a = fsub float -0.000000e+00, %a 1138 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1139 %fneg = fsub float -0.000000e+00, %mul 1140 store volatile float %fneg, float addrspace(1)* %out 1141 ret void 1142} 1143 1144; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32: 1145; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1146; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1147; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 1148; GCN-NEXT: buffer_store_dword [[ADD]] 1149define void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1150 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1151 %tid.ext = sext i32 %tid to i64 1152 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1153 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1154 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1155 %a = load volatile float, float addrspace(1)* %a.gep 1156 %b = load volatile float, float addrspace(1)* %b.gep 1157 %fneg.b = fsub float -0.000000e+00, %b 1158 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b) 1159 %fneg = fsub float -0.000000e+00, %mul 1160 store volatile float %fneg, float addrspace(1)* %out 1161 ret void 1162} 1163 1164; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32: 1165; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1166; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1167; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 1168; GCN-NEXT: buffer_store_dword [[ADD]] 1169define void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1170 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1171 %tid.ext = sext i32 %tid to i64 1172 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1173 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1174 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1175 %a = load volatile float, float addrspace(1)* %a.gep 1176 %b = load volatile float, float addrspace(1)* %b.gep 1177 %fneg.a = fsub float -0.000000e+00, %a 1178 %fneg.b = fsub float -0.000000e+00, %b 1179 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b) 1180 %fneg = fsub float -0.000000e+00, %mul 1181 store volatile float %fneg, float addrspace(1)* %out 1182 ret void 1183} 1184 1185; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32: 1186; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1187; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1188; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1189; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[B]], [[A]] 1190; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1191; GCN: buffer_store_dword [[NEG_A]] 1192define void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1193 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1194 %tid.ext = sext i32 %tid to i64 1195 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1196 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1197 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1198 %a = load volatile float, float addrspace(1)* %a.gep 1199 %b = load volatile float, float addrspace(1)* %b.gep 1200 %fneg.a = fsub float -0.000000e+00, %a 1201 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1202 %fneg = fsub float -0.000000e+00, %mul 1203 store volatile float %fneg, float addrspace(1)* %out 1204 store volatile float %fneg.a, float addrspace(1)* %out 1205 ret void 1206} 1207 1208; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32: 1209; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1210; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1211; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[B]], [[A]] 1212; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1213; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1214; GCN: buffer_store_dword [[MUL]] 1215define void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 1216 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1217 %tid.ext = sext i32 %tid to i64 1218 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1219 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1220 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1221 %a = load volatile float, float addrspace(1)* %a.gep 1222 %b = load volatile float, float addrspace(1)* %b.gep 1223 %fneg.a = fsub float -0.000000e+00, %a 1224 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1225 %fneg = fsub float -0.000000e+00, %mul 1226 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c) 1227 store volatile float %fneg, float addrspace(1)* %out 1228 store volatile float %use1, float addrspace(1)* %out 1229 ret void 1230} 1231 1232; -------------------------------------------------------------------------------- 1233; sin tests 1234; -------------------------------------------------------------------------------- 1235 1236; GCN-LABEL: {{^}}v_fneg_sin_f32: 1237; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1238; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 1239; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[K]], -[[A]] 1240; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]] 1241; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]] 1242; GCN: buffer_store_dword [[RESULT]] 1243define void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1244 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1245 %tid.ext = sext i32 %tid to i64 1246 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1247 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1248 %a = load volatile float, float addrspace(1)* %a.gep 1249 %sin = call float @llvm.sin.f32(float %a) 1250 %fneg = fsub float -0.000000e+00, %sin 1251 store float %fneg, float addrspace(1)* %out.gep 1252 ret void 1253} 1254 1255; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32: 1256; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1257; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1258; GCN: buffer_store_dword [[RESULT]] 1259define void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1260 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1261 %tid.ext = sext i32 %tid to i64 1262 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1263 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1264 %a = load volatile float, float addrspace(1)* %a.gep 1265 %sin = call float @llvm.amdgcn.sin.f32(float %a) 1266 %fneg = fsub float -0.000000e+00, %sin 1267 store float %fneg, float addrspace(1)* %out.gep 1268 ret void 1269} 1270 1271declare i32 @llvm.amdgcn.workitem.id.x() #1 1272declare float @llvm.fma.f32(float, float, float) #1 1273declare float @llvm.fmuladd.f32(float, float, float) #1 1274declare float @llvm.sin.f32(float) #1 1275 1276declare float @llvm.amdgcn.sin.f32(float) #1 1277declare float @llvm.amdgcn.rcp.f32(float) #1 1278declare float @llvm.amdgcn.rcp.legacy(float) #1 1279declare float @llvm.amdgcn.fmul.legacy(float, float) #1 1280 1281attributes #0 = { nounwind } 1282attributes #1 = { nounwind readnone } 1283