1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2 3define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 4 ; CHECK: add_v4f32: 5 6 %1 = load <4 x float>* %a 7 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 8 %2 = load <4 x float>* %b 9 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 10 %3 = fadd <4 x float> %1, %2 11 ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 12 store <4 x float> %3, <4 x float>* %c 13 ; CHECK-DAG: st.w [[R3]], 0($4) 14 15 ret void 16 ; CHECK: .size add_v4f32 17} 18 19define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 20 ; CHECK: add_v2f64: 21 22 %1 = load <2 x double>* %a 23 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 24 %2 = load <2 x double>* %b 25 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 26 %3 = fadd <2 x double> %1, %2 27 ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 28 store <2 x double> %3, <2 x double>* %c 29 ; CHECK-DAG: st.d [[R3]], 0($4) 30 31 ret void 32 ; CHECK: .size add_v2f64 33} 34 35define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 36 ; CHECK: sub_v4f32: 37 38 %1 = load <4 x float>* %a 39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 40 %2 = load <4 x float>* %b 41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 42 %3 = fsub <4 x float> %1, %2 43 ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 44 store <4 x float> %3, <4 x float>* %c 45 ; CHECK-DAG: st.w [[R3]], 0($4) 46 47 ret void 48 ; CHECK: .size sub_v4f32 49} 50 51define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 52 ; CHECK: sub_v2f64: 53 54 %1 = load <2 x double>* %a 55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 56 %2 = load <2 x double>* %b 57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 58 %3 = fsub <2 x double> %1, %2 59 ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 60 store <2 x double> %3, <2 x double>* %c 61 ; CHECK-DAG: st.d [[R3]], 0($4) 62 63 ret void 64 ; CHECK: .size sub_v2f64 65} 66 67define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 68 ; CHECK: mul_v4f32: 69 70 %1 = load <4 x float>* %a 71 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 72 %2 = load <4 x float>* %b 73 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 74 %3 = fmul <4 x float> %1, %2 75 ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 76 store <4 x float> %3, <4 x float>* %c 77 ; CHECK-DAG: st.w [[R3]], 0($4) 78 79 ret void 80 ; CHECK: .size mul_v4f32 81} 82 83define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 84 ; CHECK: mul_v2f64: 85 86 %1 = load <2 x double>* %a 87 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 88 %2 = load <2 x double>* %b 89 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 90 %3 = fmul <2 x double> %1, %2 91 ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 92 store <2 x double> %3, <2 x double>* %c 93 ; CHECK-DAG: st.d [[R3]], 0($4) 94 95 ret void 96 ; CHECK: .size mul_v2f64 97} 98 99define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 100 <4 x float>* %c) nounwind { 101 ; CHECK: fma_v4f32: 102 103 %1 = load <4 x float>* %a 104 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 105 %2 = load <4 x float>* %b 106 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 107 %3 = load <4 x float>* %c 108 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 109 %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2, 110 <4 x float> %3) 111 ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]] 112 store <4 x float> %4, <4 x float>* %d 113 ; CHECK-DAG: st.w [[R1]], 0($4) 114 115 ret void 116 ; CHECK: .size fma_v4f32 117} 118 119define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 120 <2 x double>* %c) nounwind { 121 ; CHECK: fma_v2f64: 122 123 %1 = load <2 x double>* %a 124 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 125 %2 = load <2 x double>* %b 126 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 127 %3 = load <2 x double>* %c 128 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 129 %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2, 130 <2 x double> %3) 131 ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]] 132 store <2 x double> %4, <2 x double>* %d 133 ; CHECK-DAG: st.d [[R1]], 0($4) 134 135 ret void 136 ; CHECK: .size fma_v2f64 137} 138 139define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 140 <4 x float>* %c) nounwind { 141 ; CHECK: fmsub_v4f32: 142 143 %1 = load <4 x float>* %a 144 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 145 %2 = load <4 x float>* %b 146 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 147 %3 = load <4 x float>* %c 148 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 149 %4 = fmul <4 x float> %2, %3 150 %5 = fsub <4 x float> %1, %4 151 ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]] 152 store <4 x float> %5, <4 x float>* %d 153 ; CHECK-DAG: st.w [[R1]], 0($4) 154 155 ret void 156 ; CHECK: .size fmsub_v4f32 157} 158 159define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 160 <2 x double>* %c) nounwind { 161 ; CHECK: fmsub_v2f64: 162 163 %1 = load <2 x double>* %a 164 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 165 %2 = load <2 x double>* %b 166 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 167 %3 = load <2 x double>* %c 168 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 169 %4 = fmul <2 x double> %2, %3 170 %5 = fsub <2 x double> %1, %4 171 ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]] 172 store <2 x double> %5, <2 x double>* %d 173 ; CHECK-DAG: st.d [[R1]], 0($4) 174 175 ret void 176 ; CHECK: .size fmsub_v2f64 177} 178 179define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 180 ; CHECK: fdiv_v4f32: 181 182 %1 = load <4 x float>* %a 183 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 184 %2 = load <4 x float>* %b 185 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 186 %3 = fdiv <4 x float> %1, %2 187 ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 188 store <4 x float> %3, <4 x float>* %c 189 ; CHECK-DAG: st.w [[R3]], 0($4) 190 191 ret void 192 ; CHECK: .size fdiv_v4f32 193} 194 195define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 196 ; CHECK: fdiv_v2f64: 197 198 %1 = load <2 x double>* %a 199 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 200 %2 = load <2 x double>* %b 201 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 202 %3 = fdiv <2 x double> %1, %2 203 ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 204 store <2 x double> %3, <2 x double>* %c 205 ; CHECK-DAG: st.d [[R3]], 0($4) 206 207 ret void 208 ; CHECK: .size fdiv_v2f64 209} 210 211define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 212 ; CHECK: fabs_v4f32: 213 214 %1 = load <4 x float>* %a 215 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 216 %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1) 217 ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 218 store <4 x float> %2, <4 x float>* %c 219 ; CHECK-DAG: st.w [[R3]], 0($4) 220 221 ret void 222 ; CHECK: .size fabs_v4f32 223} 224 225define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 226 ; CHECK: fabs_v2f64: 227 228 %1 = load <2 x double>* %a 229 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 230 %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1) 231 ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]] 232 store <2 x double> %2, <2 x double>* %c 233 ; CHECK-DAG: st.d [[R3]], 0($4) 234 235 ret void 236 ; CHECK: .size fabs_v2f64 237} 238 239define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 240 ; CHECK: fexp2_v4f32: 241 242 %1 = load <4 x float>* %a 243 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 244 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 245 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 246 ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] 247 ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]] 248 store <4 x float> %2, <4 x float>* %c 249 ; CHECK-DAG: st.w [[R4]], 0($4) 250 251 ret void 252 ; CHECK: .size fexp2_v4f32 253} 254 255define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 256 ; CHECK: fexp2_v2f64: 257 258 %1 = load <2 x double>* %a 259 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 260 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 261 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 262 ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]] 263 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 264 store <2 x double> %2, <2 x double>* %c 265 ; CHECK-DAG: st.d [[R4]], 0($4) 266 267 ret void 268 ; CHECK: .size fexp2_v2f64 269} 270 271define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind { 272 ; CHECK: fexp2_v4f32_2: 273 274 %1 = load <4 x float>* %a 275 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 276 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 277 %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2 278 ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384 279 ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]] 280 ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]] 281 store <4 x float> %3, <4 x float>* %c 282 ; CHECK-DAG: st.w [[R5]], 0($4) 283 284 ret void 285 ; CHECK: .size fexp2_v4f32_2 286} 287 288define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind { 289 ; CHECK: .8byte 4611686018427387904 290 ; CHECK-NEXT: .8byte 4611686018427387904 291 ; CHECK: fexp2_v2f64_2: 292 293 %1 = load <2 x double>* %a 294 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 295 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 296 %3 = fmul <2 x double> <double 2.0, double 2.0>, %2 297 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo( 298 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 299 store <2 x double> %3, <2 x double>* %c 300 ; CHECK-DAG: st.d [[R4]], 0($4) 301 302 ret void 303 ; CHECK: .size fexp2_v2f64_2 304} 305 306define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 307 ; CHECK: fsqrt_v4f32: 308 309 %1 = load <4 x float>* %a 310 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 311 %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1) 312 ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]] 313 store <4 x float> %2, <4 x float>* %c 314 ; CHECK-DAG: st.w [[R3]], 0($4) 315 316 ret void 317 ; CHECK: .size fsqrt_v4f32 318} 319 320define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 321 ; CHECK: fsqrt_v2f64: 322 323 %1 = load <2 x double>* %a 324 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 325 %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1) 326 ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]] 327 store <2 x double> %2, <2 x double>* %c 328 ; CHECK-DAG: st.d [[R3]], 0($4) 329 330 ret void 331 ; CHECK: .size fsqrt_v2f64 332} 333 334define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 335 ; CHECK: ffint_u_v4f32: 336 337 %1 = load <4 x i32>* %a 338 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 339 %2 = uitofp <4 x i32> %1 to <4 x float> 340 ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]] 341 store <4 x float> %2, <4 x float>* %c 342 ; CHECK-DAG: st.w [[R3]], 0($4) 343 344 ret void 345 ; CHECK: .size ffint_u_v4f32 346} 347 348define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 349 ; CHECK: ffint_u_v2f64: 350 351 %1 = load <2 x i64>* %a 352 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 353 %2 = uitofp <2 x i64> %1 to <2 x double> 354 ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]] 355 store <2 x double> %2, <2 x double>* %c 356 ; CHECK-DAG: st.d [[R3]], 0($4) 357 358 ret void 359 ; CHECK: .size ffint_u_v2f64 360} 361 362define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 363 ; CHECK: ffint_s_v4f32: 364 365 %1 = load <4 x i32>* %a 366 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 367 %2 = sitofp <4 x i32> %1 to <4 x float> 368 ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]] 369 store <4 x float> %2, <4 x float>* %c 370 ; CHECK-DAG: st.w [[R3]], 0($4) 371 372 ret void 373 ; CHECK: .size ffint_s_v4f32 374} 375 376define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 377 ; CHECK: ffint_s_v2f64: 378 379 %1 = load <2 x i64>* %a 380 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 381 %2 = sitofp <2 x i64> %1 to <2 x double> 382 ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]] 383 store <2 x double> %2, <2 x double>* %c 384 ; CHECK-DAG: st.d [[R3]], 0($4) 385 386 ret void 387 ; CHECK: .size ffint_s_v2f64 388} 389 390define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 391 ; CHECK: ftrunc_u_v4f32: 392 393 %1 = load <4 x float>* %a 394 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 395 %2 = fptoui <4 x float> %1 to <4 x i32> 396 ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]] 397 store <4 x i32> %2, <4 x i32>* %c 398 ; CHECK-DAG: st.w [[R3]], 0($4) 399 400 ret void 401 ; CHECK: .size ftrunc_u_v4f32 402} 403 404define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 405 ; CHECK: ftrunc_u_v2f64: 406 407 %1 = load <2 x double>* %a 408 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 409 %2 = fptoui <2 x double> %1 to <2 x i64> 410 ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]] 411 store <2 x i64> %2, <2 x i64>* %c 412 ; CHECK-DAG: st.d [[R3]], 0($4) 413 414 ret void 415 ; CHECK: .size ftrunc_u_v2f64 416} 417 418define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 419 ; CHECK: ftrunc_s_v4f32: 420 421 %1 = load <4 x float>* %a 422 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 423 %2 = fptosi <4 x float> %1 to <4 x i32> 424 ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]] 425 store <4 x i32> %2, <4 x i32>* %c 426 ; CHECK-DAG: st.w [[R3]], 0($4) 427 428 ret void 429 ; CHECK: .size ftrunc_s_v4f32 430} 431 432define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 433 ; CHECK: ftrunc_s_v2f64: 434 435 %1 = load <2 x double>* %a 436 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 437 %2 = fptosi <2 x double> %1 to <2 x i64> 438 ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]] 439 store <2 x i64> %2, <2 x i64>* %c 440 ; CHECK-DAG: st.d [[R3]], 0($4) 441 442 ret void 443 ; CHECK: .size ftrunc_s_v2f64 444} 445 446declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val) 447declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val) 448declare <4 x float> @llvm.exp2.v4f32(<4 x float> %val) 449declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val) 450declare <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, 451 <4 x float> %c) 452declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, 453 <2 x double> %c) 454declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val) 455declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val) 456