1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. A complete set of tests 22 // for Neon intrinsics can be generated by calling the runTests() entry point. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/StringMap.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/TableGen/Error.h" 33 #include "llvm/TableGen/Record.h" 34 #include "llvm/TableGen/TableGenBackend.h" 35 #include <string> 36 using namespace llvm; 37 38 enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMlalHi, 56 OpMls, 57 OpMlsl, 58 OpMlslHi, 59 OpMulN, 60 OpMlaN, 61 OpMlsN, 62 OpMlalN, 63 OpMlslN, 64 OpMulLane, 65 OpMulXLane, 66 OpMullLane, 67 OpMullHiLane, 68 OpMlaLane, 69 OpMlsLane, 70 OpMlalLane, 71 OpMlalHiLane, 72 OpMlslLane, 73 OpMlslHiLane, 74 OpQDMullLane, 75 OpQDMullHiLane, 76 OpQDMlalLane, 77 OpQDMlalHiLane, 78 OpQDMlslLane, 79 OpQDMlslHiLane, 80 OpQDMulhLane, 81 OpQRDMulhLane, 82 OpFMSLane, 83 OpFMSLaneQ, 84 OpEq, 85 OpGe, 86 OpLe, 87 OpGt, 88 OpLt, 89 OpNeg, 90 OpNot, 91 OpAnd, 92 OpOr, 93 OpXor, 94 OpAndNot, 95 OpOrNot, 96 OpCast, 97 OpConcat, 98 OpDup, 99 OpDupLane, 100 OpHi, 101 OpLo, 102 OpSelect, 103 OpRev16, 104 OpRev32, 105 OpRev64, 106 OpReinterpret, 107 OpAddhnHi, 108 OpRAddhnHi, 109 OpSubhnHi, 110 OpRSubhnHi, 111 OpAbdl, 112 OpAbdlHi, 113 OpAba, 114 OpAbal, 115 OpAbalHi, 116 OpQDMullHi, 117 OpQDMlalHi, 118 OpQDMlslHi, 119 OpDiv, 120 OpLongHi, 121 OpNarrowHi, 122 OpMovlHi 123 }; 124 125 enum ClassKind { 126 ClassNone, 127 ClassI, // generic integer instruction, e.g., "i8" suffix 128 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 129 ClassW, // width-specific instruction, e.g., "8" suffix 130 ClassB, // bitcast arguments with enum argument to specify type 131 ClassL, // Logical instructions which are op instructions 132 // but we need to not emit any suffix for in our 133 // tests. 134 ClassNoTest // Instructions which we do not test since they are 135 // not TRUE instructions. 136 }; 137 138 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 139 /// builtins. These must be kept in sync with the flags in 140 /// include/clang/Basic/TargetBuiltins.h. 141 namespace { 142 class NeonTypeFlags { 143 enum { 144 EltTypeMask = 0xf, 145 UnsignedFlag = 0x10, 146 QuadFlag = 0x20 147 }; 148 uint32_t Flags; 149 150 public: 151 enum EltType { 152 Int8, 153 Int16, 154 Int32, 155 Int64, 156 Poly8, 157 Poly16, 158 Float16, 159 Float32, 160 Float64 161 }; 162 163 NeonTypeFlags(unsigned F) : Flags(F) {} 164 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 165 if (IsUnsigned) 166 Flags |= UnsignedFlag; 167 if (IsQuad) 168 Flags |= QuadFlag; 169 } 170 171 uint32_t getFlags() const { return Flags; } 172 }; 173 } // end anonymous namespace 174 175 namespace { 176 class NeonEmitter { 177 RecordKeeper &Records; 178 StringMap<OpKind> OpMap; 179 DenseMap<Record*, ClassKind> ClassMap; 180 181 public: 182 NeonEmitter(RecordKeeper &R) : Records(R) { 183 OpMap["OP_NONE"] = OpNone; 184 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 185 OpMap["OP_ADD"] = OpAdd; 186 OpMap["OP_ADDL"] = OpAddl; 187 OpMap["OP_ADDLHi"] = OpAddlHi; 188 OpMap["OP_ADDW"] = OpAddw; 189 OpMap["OP_ADDWHi"] = OpAddwHi; 190 OpMap["OP_SUB"] = OpSub; 191 OpMap["OP_SUBL"] = OpSubl; 192 OpMap["OP_SUBLHi"] = OpSublHi; 193 OpMap["OP_SUBW"] = OpSubw; 194 OpMap["OP_SUBWHi"] = OpSubwHi; 195 OpMap["OP_MUL"] = OpMul; 196 OpMap["OP_MLA"] = OpMla; 197 OpMap["OP_MLAL"] = OpMlal; 198 OpMap["OP_MULLHi"] = OpMullHi; 199 OpMap["OP_MLALHi"] = OpMlalHi; 200 OpMap["OP_MLS"] = OpMls; 201 OpMap["OP_MLSL"] = OpMlsl; 202 OpMap["OP_MLSLHi"] = OpMlslHi; 203 OpMap["OP_MUL_N"] = OpMulN; 204 OpMap["OP_MLA_N"] = OpMlaN; 205 OpMap["OP_MLS_N"] = OpMlsN; 206 OpMap["OP_MLAL_N"] = OpMlalN; 207 OpMap["OP_MLSL_N"] = OpMlslN; 208 OpMap["OP_MUL_LN"]= OpMulLane; 209 OpMap["OP_MULX_LN"]= OpMulXLane; 210 OpMap["OP_MULL_LN"] = OpMullLane; 211 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 212 OpMap["OP_MLA_LN"]= OpMlaLane; 213 OpMap["OP_MLS_LN"]= OpMlsLane; 214 OpMap["OP_MLAL_LN"] = OpMlalLane; 215 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 216 OpMap["OP_MLSL_LN"] = OpMlslLane; 217 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 218 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 219 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 220 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 221 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 222 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 223 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 224 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 225 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 226 OpMap["OP_FMS_LN"] = OpFMSLane; 227 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 228 OpMap["OP_EQ"] = OpEq; 229 OpMap["OP_GE"] = OpGe; 230 OpMap["OP_LE"] = OpLe; 231 OpMap["OP_GT"] = OpGt; 232 OpMap["OP_LT"] = OpLt; 233 OpMap["OP_NEG"] = OpNeg; 234 OpMap["OP_NOT"] = OpNot; 235 OpMap["OP_AND"] = OpAnd; 236 OpMap["OP_OR"] = OpOr; 237 OpMap["OP_XOR"] = OpXor; 238 OpMap["OP_ANDN"] = OpAndNot; 239 OpMap["OP_ORN"] = OpOrNot; 240 OpMap["OP_CAST"] = OpCast; 241 OpMap["OP_CONC"] = OpConcat; 242 OpMap["OP_HI"] = OpHi; 243 OpMap["OP_LO"] = OpLo; 244 OpMap["OP_DUP"] = OpDup; 245 OpMap["OP_DUP_LN"] = OpDupLane; 246 OpMap["OP_SEL"] = OpSelect; 247 OpMap["OP_REV16"] = OpRev16; 248 OpMap["OP_REV32"] = OpRev32; 249 OpMap["OP_REV64"] = OpRev64; 250 OpMap["OP_REINT"] = OpReinterpret; 251 OpMap["OP_ADDHNHi"] = OpAddhnHi; 252 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 253 OpMap["OP_SUBHNHi"] = OpSubhnHi; 254 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 255 OpMap["OP_ABDL"] = OpAbdl; 256 OpMap["OP_ABDLHi"] = OpAbdlHi; 257 OpMap["OP_ABA"] = OpAba; 258 OpMap["OP_ABAL"] = OpAbal; 259 OpMap["OP_ABALHi"] = OpAbalHi; 260 OpMap["OP_QDMULLHi"] = OpQDMullHi; 261 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 262 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 263 OpMap["OP_DIV"] = OpDiv; 264 OpMap["OP_LONG_HI"] = OpLongHi; 265 OpMap["OP_NARROW_HI"] = OpNarrowHi; 266 OpMap["OP_MOVL_HI"] = OpMovlHi; 267 268 Record *SI = R.getClass("SInst"); 269 Record *II = R.getClass("IInst"); 270 Record *WI = R.getClass("WInst"); 271 Record *SOpI = R.getClass("SOpInst"); 272 Record *IOpI = R.getClass("IOpInst"); 273 Record *WOpI = R.getClass("WOpInst"); 274 Record *LOpI = R.getClass("LOpInst"); 275 Record *NoTestOpI = R.getClass("NoTestOpInst"); 276 277 ClassMap[SI] = ClassS; 278 ClassMap[II] = ClassI; 279 ClassMap[WI] = ClassW; 280 ClassMap[SOpI] = ClassS; 281 ClassMap[IOpI] = ClassI; 282 ClassMap[WOpI] = ClassW; 283 ClassMap[LOpI] = ClassL; 284 ClassMap[NoTestOpI] = ClassNoTest; 285 } 286 287 // run - Emit arm_neon.h.inc 288 void run(raw_ostream &o); 289 290 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 291 void runHeader(raw_ostream &o); 292 293 // runTests - Emit tests for all the Neon intrinsics. 294 void runTests(raw_ostream &o); 295 296 private: 297 void emitIntrinsic(raw_ostream &OS, Record *R, 298 StringMap<ClassKind> &EmittedMap); 299 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 300 bool isA64GenBuiltinDef); 301 void genOverloadTypeCheckCode(raw_ostream &OS, 302 StringMap<ClassKind> &A64IntrinsicMap, 303 bool isA64TypeCheck); 304 void genIntrinsicRangeCheckCode(raw_ostream &OS, 305 StringMap<ClassKind> &A64IntrinsicMap, 306 bool isA64RangeCheck); 307 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 308 bool isA64TestGen); 309 }; 310 } // end anonymous namespace 311 312 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 313 /// which each StringRef representing a single type declared in the string. 314 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 315 /// 2xfloat and 4xfloat respectively. 316 static void ParseTypes(Record *r, std::string &s, 317 SmallVectorImpl<StringRef> &TV) { 318 const char *data = s.data(); 319 int len = 0; 320 321 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 322 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 323 || data[len] == 'H' || data[len] == 'S') 324 continue; 325 326 switch (data[len]) { 327 case 'c': 328 case 's': 329 case 'i': 330 case 'l': 331 case 'h': 332 case 'f': 333 case 'd': 334 break; 335 default: 336 PrintFatalError(r->getLoc(), 337 "Unexpected letter: " + std::string(data + len, 1)); 338 } 339 TV.push_back(StringRef(data, len + 1)); 340 data += len + 1; 341 len = -1; 342 } 343 } 344 345 /// Widen - Convert a type code into the next wider type. char -> short, 346 /// short -> int, etc. 347 static char Widen(const char t) { 348 switch (t) { 349 case 'c': 350 return 's'; 351 case 's': 352 return 'i'; 353 case 'i': 354 return 'l'; 355 case 'h': 356 return 'f'; 357 default: 358 PrintFatalError("unhandled type in widen!"); 359 } 360 } 361 362 /// Narrow - Convert a type code into the next smaller type. short -> char, 363 /// float -> half float, etc. 364 static char Narrow(const char t) { 365 switch (t) { 366 case 's': 367 return 'c'; 368 case 'i': 369 return 's'; 370 case 'l': 371 return 'i'; 372 case 'f': 373 return 'h'; 374 default: 375 PrintFatalError("unhandled type in narrow!"); 376 } 377 } 378 379 static std::string GetNarrowTypestr(StringRef ty) 380 { 381 std::string s; 382 for (size_t i = 0, end = ty.size(); i < end; i++) { 383 switch (ty[i]) { 384 case 's': 385 s += 'c'; 386 break; 387 case 'i': 388 s += 's'; 389 break; 390 case 'l': 391 s += 'i'; 392 break; 393 default: 394 s += ty[i]; 395 break; 396 } 397 } 398 399 return s; 400 } 401 402 /// For a particular StringRef, return the base type code, and whether it has 403 /// the quad-vector, polynomial, or unsigned modifiers set. 404 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 405 unsigned off = 0; 406 // ignore scalar. 407 if (ty[off] == 'S') { 408 ++off; 409 } 410 // remember quad. 411 if (ty[off] == 'Q' || ty[off] == 'H') { 412 quad = true; 413 ++off; 414 } 415 416 // remember poly. 417 if (ty[off] == 'P') { 418 poly = true; 419 ++off; 420 } 421 422 // remember unsigned. 423 if (ty[off] == 'U') { 424 usgn = true; 425 ++off; 426 } 427 428 // base type to get the type string for. 429 return ty[off]; 430 } 431 432 /// ModType - Transform a type code and its modifiers based on a mod code. The 433 /// mod code definitions may be found at the top of arm_neon.td. 434 static char ModType(const char mod, char type, bool &quad, bool &poly, 435 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 436 switch (mod) { 437 case 't': 438 if (poly) { 439 poly = false; 440 usgn = true; 441 } 442 break; 443 case 'u': 444 usgn = true; 445 poly = false; 446 if (type == 'f') 447 type = 'i'; 448 if (type == 'd') 449 type = 'l'; 450 break; 451 case 'x': 452 usgn = false; 453 poly = false; 454 if (type == 'f') 455 type = 'i'; 456 if (type == 'd') 457 type = 'l'; 458 break; 459 case 'o': 460 scal = true; 461 type = 'd'; 462 usgn = false; 463 break; 464 case 'y': 465 scal = true; 466 case 'f': 467 if (type == 'h') 468 quad = true; 469 type = 'f'; 470 usgn = false; 471 break; 472 case 'g': 473 quad = false; 474 break; 475 case 'j': 476 quad = true; 477 break; 478 case 'w': 479 type = Widen(type); 480 quad = true; 481 break; 482 case 'n': 483 type = Widen(type); 484 break; 485 case 'i': 486 type = 'i'; 487 scal = true; 488 break; 489 case 'l': 490 type = 'l'; 491 scal = true; 492 usgn = true; 493 break; 494 case 'r': 495 type = Widen(type); 496 case 's': 497 case 'a': 498 scal = true; 499 break; 500 case 'k': 501 quad = true; 502 break; 503 case 'c': 504 cnst = true; 505 case 'p': 506 pntr = true; 507 scal = true; 508 break; 509 case 'h': 510 type = Narrow(type); 511 if (type == 'h') 512 quad = false; 513 break; 514 case 'q': 515 type = Narrow(type); 516 quad = true; 517 break; 518 case 'e': 519 type = Narrow(type); 520 usgn = true; 521 break; 522 case 'm': 523 type = Narrow(type); 524 quad = false; 525 break; 526 default: 527 break; 528 } 529 return type; 530 } 531 532 /// TypeString - for a modifier and type, generate the name of the typedef for 533 /// that type. QUc -> uint8x8_t. 534 static std::string TypeString(const char mod, StringRef typestr) { 535 bool quad = false; 536 bool poly = false; 537 bool usgn = false; 538 bool scal = false; 539 bool cnst = false; 540 bool pntr = false; 541 542 if (mod == 'v') 543 return "void"; 544 if (mod == 'i') 545 return "int"; 546 547 // base type to get the type string for. 548 char type = ClassifyType(typestr, quad, poly, usgn); 549 550 // Based on the modifying character, change the type and width if necessary. 551 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 552 553 SmallString<128> s; 554 555 if (usgn) 556 s.push_back('u'); 557 558 switch (type) { 559 case 'c': 560 s += poly ? "poly8" : "int8"; 561 if (scal) 562 break; 563 s += quad ? "x16" : "x8"; 564 break; 565 case 's': 566 s += poly ? "poly16" : "int16"; 567 if (scal) 568 break; 569 s += quad ? "x8" : "x4"; 570 break; 571 case 'i': 572 s += "int32"; 573 if (scal) 574 break; 575 s += quad ? "x4" : "x2"; 576 break; 577 case 'l': 578 s += "int64"; 579 if (scal) 580 break; 581 s += quad ? "x2" : "x1"; 582 break; 583 case 'h': 584 s += "float16"; 585 if (scal) 586 break; 587 s += quad ? "x8" : "x4"; 588 break; 589 case 'f': 590 s += "float32"; 591 if (scal) 592 break; 593 s += quad ? "x4" : "x2"; 594 break; 595 case 'd': 596 s += "float64"; 597 if (scal) 598 break; 599 s += quad ? "x2" : "x1"; 600 break; 601 602 default: 603 PrintFatalError("unhandled type!"); 604 } 605 606 if (mod == '2') 607 s += "x2"; 608 if (mod == '3') 609 s += "x3"; 610 if (mod == '4') 611 s += "x4"; 612 613 // Append _t, finishing the type string typedef type. 614 s += "_t"; 615 616 if (cnst) 617 s += " const"; 618 619 if (pntr) 620 s += " *"; 621 622 return s.str(); 623 } 624 625 /// BuiltinTypeString - for a modifier and type, generate the clang 626 /// BuiltinsARM.def prototype code for the function. See the top of clang's 627 /// Builtins.def for a description of the type strings. 628 static std::string BuiltinTypeString(const char mod, StringRef typestr, 629 ClassKind ck, bool ret) { 630 bool quad = false; 631 bool poly = false; 632 bool usgn = false; 633 bool scal = false; 634 bool cnst = false; 635 bool pntr = false; 636 637 if (mod == 'v') 638 return "v"; // void 639 if (mod == 'i') 640 return "i"; // int 641 642 // base type to get the type string for. 643 char type = ClassifyType(typestr, quad, poly, usgn); 644 645 // Based on the modifying character, change the type and width if necessary. 646 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 647 648 // All pointers are void* pointers. Change type to 'v' now. 649 if (pntr) { 650 usgn = false; 651 poly = false; 652 type = 'v'; 653 } 654 // Treat half-float ('h') types as unsigned short ('s') types. 655 if (type == 'h') { 656 type = 's'; 657 usgn = true; 658 } 659 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 660 scal && type != 'f' && type != 'd'); 661 662 if (scal) { 663 SmallString<128> s; 664 665 if (usgn) 666 s.push_back('U'); 667 else if (type == 'c') 668 s.push_back('S'); // make chars explicitly signed 669 670 if (type == 'l') // 64-bit long 671 s += "LLi"; 672 else 673 s.push_back(type); 674 675 if (cnst) 676 s.push_back('C'); 677 if (pntr) 678 s.push_back('*'); 679 return s.str(); 680 } 681 682 // Since the return value must be one type, return a vector type of the 683 // appropriate width which we will bitcast. An exception is made for 684 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 685 // fashion, storing them to a pointer arg. 686 if (ret) { 687 if (mod >= '2' && mod <= '4') 688 return "vv*"; // void result with void* first argument 689 if (mod == 'f' || (ck != ClassB && type == 'f')) 690 return quad ? "V4f" : "V2f"; 691 if (ck != ClassB && type == 'd') 692 return quad ? "V2d" : "V1d"; 693 if (ck != ClassB && type == 's') 694 return quad ? "V8s" : "V4s"; 695 if (ck != ClassB && type == 'i') 696 return quad ? "V4i" : "V2i"; 697 if (ck != ClassB && type == 'l') 698 return quad ? "V2LLi" : "V1LLi"; 699 700 return quad ? "V16Sc" : "V8Sc"; 701 } 702 703 // Non-return array types are passed as individual vectors. 704 if (mod == '2') 705 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 706 if (mod == '3') 707 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 708 if (mod == '4') 709 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 710 711 if (mod == 'f' || (ck != ClassB && type == 'f')) 712 return quad ? "V4f" : "V2f"; 713 if (ck != ClassB && type == 'd') 714 return quad ? "V2d" : "V1d"; 715 if (ck != ClassB && type == 's') 716 return quad ? "V8s" : "V4s"; 717 if (ck != ClassB && type == 'i') 718 return quad ? "V4i" : "V2i"; 719 if (ck != ClassB && type == 'l') 720 return quad ? "V2LLi" : "V1LLi"; 721 722 return quad ? "V16Sc" : "V8Sc"; 723 } 724 725 /// InstructionTypeCode - Computes the ARM argument character code and 726 /// quad status for a specific type string and ClassKind. 727 static void InstructionTypeCode(const StringRef &typeStr, 728 const ClassKind ck, 729 bool &quad, 730 std::string &typeCode) { 731 bool poly = false; 732 bool usgn = false; 733 char type = ClassifyType(typeStr, quad, poly, usgn); 734 735 switch (type) { 736 case 'c': 737 switch (ck) { 738 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 739 case ClassI: typeCode = "i8"; break; 740 case ClassW: typeCode = "8"; break; 741 default: break; 742 } 743 break; 744 case 's': 745 switch (ck) { 746 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 747 case ClassI: typeCode = "i16"; break; 748 case ClassW: typeCode = "16"; break; 749 default: break; 750 } 751 break; 752 case 'i': 753 switch (ck) { 754 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 755 case ClassI: typeCode = "i32"; break; 756 case ClassW: typeCode = "32"; break; 757 default: break; 758 } 759 break; 760 case 'l': 761 switch (ck) { 762 case ClassS: typeCode = usgn ? "u64" : "s64"; break; 763 case ClassI: typeCode = "i64"; break; 764 case ClassW: typeCode = "64"; break; 765 default: break; 766 } 767 break; 768 case 'h': 769 switch (ck) { 770 case ClassS: 771 case ClassI: typeCode = "f16"; break; 772 case ClassW: typeCode = "16"; break; 773 default: break; 774 } 775 break; 776 case 'f': 777 switch (ck) { 778 case ClassS: 779 case ClassI: typeCode = "f32"; break; 780 case ClassW: typeCode = "32"; break; 781 default: break; 782 } 783 break; 784 case 'd': 785 switch (ck) { 786 case ClassS: 787 case ClassI: 788 typeCode += "f64"; 789 break; 790 case ClassW: 791 PrintFatalError("unhandled type!"); 792 default: 793 break; 794 } 795 break; 796 default: 797 PrintFatalError("unhandled type!"); 798 } 799 } 800 801 static char Insert_BHSD_Suffix(StringRef typestr){ 802 unsigned off = 0; 803 if(typestr[off++] == 'S'){ 804 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 805 typestr[off] == 'P' || typestr[off] == 'U') 806 ++off; 807 switch (typestr[off]){ 808 default : break; 809 case 'c' : return 'b'; 810 case 's' : return 'h'; 811 case 'i' : 812 case 'f' : return 's'; 813 case 'l' : 814 case 'd' : return 'd'; 815 } 816 } 817 return 0; 818 } 819 820 /// MangleName - Append a type or width suffix to a base neon function name, 821 /// and insert a 'q' in the appropriate location if type string starts with 'Q'. 822 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 823 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 824 static std::string MangleName(const std::string &name, StringRef typestr, 825 ClassKind ck) { 826 if (name == "vcvt_f32_f16") 827 return name; 828 829 bool quad = false; 830 std::string typeCode = ""; 831 832 InstructionTypeCode(typestr, ck, quad, typeCode); 833 834 std::string s = name; 835 836 if (typeCode.size() > 0) { 837 s += "_" + typeCode; 838 } 839 840 if (ck == ClassB) 841 s += "_v"; 842 843 // Insert a 'q' before the first '_' character so that it ends up before 844 // _lane or _n on vector-scalar operations. 845 if (typestr.find("Q") != StringRef::npos) { 846 size_t pos = s.find('_'); 847 s = s.insert(pos, "q"); 848 } 849 char ins = Insert_BHSD_Suffix(typestr); 850 if(ins){ 851 size_t pos = s.find('_'); 852 s = s.insert(pos, &ins, 1); 853 } 854 855 return s; 856 } 857 858 static void PreprocessInstruction(const StringRef &Name, 859 const std::string &InstName, 860 std::string &Prefix, 861 bool &HasNPostfix, 862 bool &HasLanePostfix, 863 bool &HasDupPostfix, 864 bool &IsSpecialVCvt, 865 size_t &TBNumber) { 866 // All of our instruction name fields from arm_neon.td are of the form 867 // <instructionname>_... 868 // Thus we grab our instruction name via computation of said Prefix. 869 const size_t PrefixEnd = Name.find_first_of('_'); 870 // If InstName is passed in, we use that instead of our name Prefix. 871 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 872 873 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 874 875 HasNPostfix = Postfix.count("_n"); 876 HasLanePostfix = Postfix.count("_lane"); 877 HasDupPostfix = Postfix.count("_dup"); 878 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 879 880 if (InstName.compare("vtbl") == 0 || 881 InstName.compare("vtbx") == 0) { 882 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 883 // encoding to get its true value. 884 TBNumber = Name[Name.size()-1] - 48; 885 } 886 } 887 888 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 889 /// extracted, generate a FileCheck pattern for a Load Or Store 890 static void 891 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 892 const std::string& OutTypeCode, 893 const bool &IsQuad, 894 const bool &HasDupPostfix, 895 const bool &HasLanePostfix, 896 const size_t Count, 897 std::string &RegisterSuffix) { 898 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 899 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 900 // will output a series of v{ld,st}1s, so we have to handle it specially. 901 if ((Count == 3 || Count == 4) && IsQuad) { 902 RegisterSuffix += "{"; 903 for (size_t i = 0; i < Count; i++) { 904 RegisterSuffix += "d{{[0-9]+}}"; 905 if (HasDupPostfix) { 906 RegisterSuffix += "[]"; 907 } 908 if (HasLanePostfix) { 909 RegisterSuffix += "[{{[0-9]+}}]"; 910 } 911 if (i < Count-1) { 912 RegisterSuffix += ", "; 913 } 914 } 915 RegisterSuffix += "}"; 916 } else { 917 918 // Handle normal loads and stores. 919 RegisterSuffix += "{"; 920 for (size_t i = 0; i < Count; i++) { 921 RegisterSuffix += "d{{[0-9]+}}"; 922 if (HasDupPostfix) { 923 RegisterSuffix += "[]"; 924 } 925 if (HasLanePostfix) { 926 RegisterSuffix += "[{{[0-9]+}}]"; 927 } 928 if (IsQuad && !HasLanePostfix) { 929 RegisterSuffix += ", d{{[0-9]+}}"; 930 if (HasDupPostfix) { 931 RegisterSuffix += "[]"; 932 } 933 } 934 if (i < Count-1) { 935 RegisterSuffix += ", "; 936 } 937 } 938 RegisterSuffix += "}, [r{{[0-9]+}}"; 939 940 // We only include the alignment hint if we have a vld1.*64 or 941 // a dup/lane instruction. 942 if (IsLDSTOne) { 943 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 944 RegisterSuffix += ":" + OutTypeCode; 945 } 946 } 947 948 RegisterSuffix += "]"; 949 } 950 } 951 952 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 953 const bool &HasNPostfix) { 954 return (NameRef.count("vmla") || 955 NameRef.count("vmlal") || 956 NameRef.count("vmlsl") || 957 NameRef.count("vmull") || 958 NameRef.count("vqdmlal") || 959 NameRef.count("vqdmlsl") || 960 NameRef.count("vqdmulh") || 961 NameRef.count("vqdmull") || 962 NameRef.count("vqrdmulh")) && HasNPostfix; 963 } 964 965 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 966 const bool &HasLanePostfix) { 967 return (NameRef.count("vmla") || 968 NameRef.count("vmls") || 969 NameRef.count("vmlal") || 970 NameRef.count("vmlsl") || 971 (NameRef.count("vmul") && NameRef.size() == 3)|| 972 NameRef.count("vqdmlal") || 973 NameRef.count("vqdmlsl") || 974 NameRef.count("vqdmulh") || 975 NameRef.count("vqrdmulh")) && HasLanePostfix; 976 } 977 978 static bool IsSpecialLaneMultiply(const StringRef &NameRef, 979 const bool &HasLanePostfix, 980 const bool &IsQuad) { 981 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 982 && IsQuad; 983 const bool IsVMull = NameRef.count("mull") && !IsQuad; 984 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 985 } 986 987 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 988 const std::string &Proto, 989 const bool &HasNPostfix, 990 const bool &IsQuad, 991 const bool &HasLanePostfix, 992 const bool &HasDupPostfix, 993 std::string &NormedProto) { 994 // Handle generic case. 995 const StringRef NameRef(Name); 996 for (size_t i = 0, end = Proto.size(); i < end; i++) { 997 switch (Proto[i]) { 998 case 'u': 999 case 'f': 1000 case 'd': 1001 case 's': 1002 case 'x': 1003 case 't': 1004 case 'n': 1005 NormedProto += IsQuad? 'q' : 'd'; 1006 break; 1007 case 'w': 1008 case 'k': 1009 NormedProto += 'q'; 1010 break; 1011 case 'g': 1012 case 'j': 1013 case 'h': 1014 case 'e': 1015 NormedProto += 'd'; 1016 break; 1017 case 'i': 1018 NormedProto += HasLanePostfix? 'a' : 'i'; 1019 break; 1020 case 'a': 1021 if (HasLanePostfix) { 1022 NormedProto += 'a'; 1023 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1024 NormedProto += IsQuad? 'q' : 'd'; 1025 } else { 1026 NormedProto += 'i'; 1027 } 1028 break; 1029 } 1030 } 1031 1032 // Handle Special Cases. 1033 const bool IsNotVExt = !NameRef.count("vext"); 1034 const bool IsVPADAL = NameRef.count("vpadal"); 1035 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1036 HasLanePostfix); 1037 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1038 IsQuad); 1039 1040 if (IsSpecialLaneMul) { 1041 // If 1042 NormedProto[2] = NormedProto[3]; 1043 NormedProto.erase(3); 1044 } else if (NormedProto.size() == 4 && 1045 NormedProto[0] == NormedProto[1] && 1046 IsNotVExt) { 1047 // If NormedProto.size() == 4 and the first two proto characters are the 1048 // same, ignore the first. 1049 NormedProto = NormedProto.substr(1, 3); 1050 } else if (Is5OpLaneAccum) { 1051 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1052 std::string tmp = NormedProto.substr(1,2); 1053 tmp += NormedProto[4]; 1054 NormedProto = tmp; 1055 } else if (IsVPADAL) { 1056 // If we have VPADAL, ignore the first character. 1057 NormedProto = NormedProto.substr(0, 2); 1058 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1059 // If our instruction is a dup instruction, keep only the first and 1060 // last characters. 1061 std::string tmp = ""; 1062 tmp += NormedProto[0]; 1063 tmp += NormedProto[NormedProto.size()-1]; 1064 NormedProto = tmp; 1065 } 1066 } 1067 1068 /// GenerateRegisterCheckPatterns - Given a bunch of data we have 1069 /// extracted, generate a FileCheck pattern to check that an 1070 /// instruction's arguments are correct. 1071 static void GenerateRegisterCheckPattern(const std::string &Name, 1072 const std::string &Proto, 1073 const std::string &OutTypeCode, 1074 const bool &HasNPostfix, 1075 const bool &IsQuad, 1076 const bool &HasLanePostfix, 1077 const bool &HasDupPostfix, 1078 const size_t &TBNumber, 1079 std::string &RegisterSuffix) { 1080 1081 RegisterSuffix = ""; 1082 1083 const StringRef NameRef(Name); 1084 const StringRef ProtoRef(Proto); 1085 1086 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1087 return; 1088 } 1089 1090 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1091 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1092 1093 if (IsLoadStore) { 1094 // Grab N value from v{ld,st}N using its ascii representation. 1095 const size_t Count = NameRef[3] - 48; 1096 1097 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1098 HasDupPostfix, HasLanePostfix, 1099 Count, RegisterSuffix); 1100 } else if (IsTBXOrTBL) { 1101 RegisterSuffix += "d{{[0-9]+}}, {"; 1102 for (size_t i = 0; i < TBNumber-1; i++) { 1103 RegisterSuffix += "d{{[0-9]+}}, "; 1104 } 1105 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1106 } else { 1107 // Handle a normal instruction. 1108 if (NameRef.count("vget") || NameRef.count("vset")) 1109 return; 1110 1111 // We first normalize our proto, since we only need to emit 4 1112 // different types of checks, yet have more than 4 proto types 1113 // that map onto those 4 patterns. 1114 std::string NormalizedProto(""); 1115 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1116 HasLanePostfix, HasDupPostfix, 1117 NormalizedProto); 1118 1119 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1120 const char &c = NormalizedProto[i]; 1121 switch (c) { 1122 case 'q': 1123 RegisterSuffix += "q{{[0-9]+}}, "; 1124 break; 1125 1126 case 'd': 1127 RegisterSuffix += "d{{[0-9]+}}, "; 1128 break; 1129 1130 case 'i': 1131 RegisterSuffix += "#{{[0-9]+}}, "; 1132 break; 1133 1134 case 'a': 1135 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1136 break; 1137 } 1138 } 1139 1140 // Remove extra ", ". 1141 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1142 } 1143 } 1144 1145 /// GenerateChecksForIntrinsic - Given a specific instruction name + 1146 /// typestr + class kind, generate the proper set of FileCheck 1147 /// Patterns to check for. We could just return a string, but instead 1148 /// use a vector since it provides us with the extra flexibility of 1149 /// emitting multiple checks, which comes in handy for certain cases 1150 /// like mla where we want to check for 2 different instructions. 1151 static void GenerateChecksForIntrinsic(const std::string &Name, 1152 const std::string &Proto, 1153 StringRef &OutTypeStr, 1154 StringRef &InTypeStr, 1155 ClassKind Ck, 1156 const std::string &InstName, 1157 bool IsHiddenLOp, 1158 std::vector<std::string>& Result) { 1159 1160 // If Ck is a ClassNoTest instruction, just return so no test is 1161 // emitted. 1162 if(Ck == ClassNoTest) 1163 return; 1164 1165 if (Name == "vcvt_f32_f16") { 1166 Result.push_back("vcvt.f32.f16"); 1167 return; 1168 } 1169 1170 1171 // Now we preprocess our instruction given the data we have to get the 1172 // data that we need. 1173 // Create a StringRef for String Manipulation of our Name. 1174 const StringRef NameRef(Name); 1175 // Instruction Prefix. 1176 std::string Prefix; 1177 // The type code for our out type string. 1178 std::string OutTypeCode; 1179 // To handle our different cases, we need to check for different postfixes. 1180 // Is our instruction a quad instruction. 1181 bool IsQuad = false; 1182 // Our instruction is of the form <instructionname>_n. 1183 bool HasNPostfix = false; 1184 // Our instruction is of the form <instructionname>_lane. 1185 bool HasLanePostfix = false; 1186 // Our instruction is of the form <instructionname>_dup. 1187 bool HasDupPostfix = false; 1188 // Our instruction is a vcvt instruction which requires special handling. 1189 bool IsSpecialVCvt = false; 1190 // If we have a vtbxN or vtblN instruction, this is set to N. 1191 size_t TBNumber = -1; 1192 // Register Suffix 1193 std::string RegisterSuffix; 1194 1195 PreprocessInstruction(NameRef, InstName, Prefix, 1196 HasNPostfix, HasLanePostfix, HasDupPostfix, 1197 IsSpecialVCvt, TBNumber); 1198 1199 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1200 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1201 HasLanePostfix, HasDupPostfix, TBNumber, 1202 RegisterSuffix); 1203 1204 // In the following section, we handle a bunch of special cases. You can tell 1205 // a special case by the fact we are returning early. 1206 1207 // If our instruction is a logical instruction without postfix or a 1208 // hidden LOp just return the current Prefix. 1209 if (Ck == ClassL || IsHiddenLOp) { 1210 Result.push_back(Prefix + " " + RegisterSuffix); 1211 return; 1212 } 1213 1214 // If we have a vmov, due to the many different cases, some of which 1215 // vary within the different intrinsics generated for a single 1216 // instruction type, just output a vmov. (e.g. given an instruction 1217 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1218 // 1219 // FIXME: Maybe something can be done about this. The two cases that we care 1220 // about are vmov as an LType and vmov as a WType. 1221 if (Prefix == "vmov") { 1222 Result.push_back(Prefix + " " + RegisterSuffix); 1223 return; 1224 } 1225 1226 // In the following section, we handle special cases. 1227 1228 if (OutTypeCode == "64") { 1229 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1230 // type, the intrinsic will be optimized away, so just return 1231 // nothing. On the other hand if we are handling an uint64x2_t 1232 // (i.e. quad instruction), vdup/vmov instructions should be 1233 // emitted. 1234 if (Prefix == "vdup" || Prefix == "vext") { 1235 if (IsQuad) { 1236 Result.push_back("{{vmov|vdup}}"); 1237 } 1238 return; 1239 } 1240 1241 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1242 // multiple register operands. 1243 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1244 || Prefix == "vld4"; 1245 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1246 || Prefix == "vst4"; 1247 if (MultiLoadPrefix || MultiStorePrefix) { 1248 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1249 return; 1250 } 1251 1252 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1253 // emitting said instructions. So return a check for 1254 // vldr/vstr/vmov/str instead. 1255 if (HasLanePostfix || HasDupPostfix) { 1256 if (Prefix == "vst1") { 1257 Result.push_back("{{str|vstr|vmov}}"); 1258 return; 1259 } else if (Prefix == "vld1") { 1260 Result.push_back("{{ldr|vldr|vmov}}"); 1261 return; 1262 } 1263 } 1264 } 1265 1266 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1267 // sometimes disassembled as vtrn.32. We use a regex to handle both 1268 // cases. 1269 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1270 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1271 return; 1272 } 1273 1274 // Currently on most ARM processors, we do not use vmla/vmls for 1275 // quad floating point operations. Instead we output vmul + vadd. So 1276 // check if we have one of those instructions and just output a 1277 // check for vmul. 1278 if (OutTypeCode == "f32") { 1279 if (Prefix == "vmls") { 1280 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1281 Result.push_back("vsub." + OutTypeCode); 1282 return; 1283 } else if (Prefix == "vmla") { 1284 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1285 Result.push_back("vadd." + OutTypeCode); 1286 return; 1287 } 1288 } 1289 1290 // If we have vcvt, get the input type from the instruction name 1291 // (which should be of the form instname_inputtype) and append it 1292 // before the output type. 1293 if (Prefix == "vcvt") { 1294 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1295 Prefix += "." + inTypeCode; 1296 } 1297 1298 // Append output type code to get our final mangled instruction. 1299 Prefix += "." + OutTypeCode; 1300 1301 Result.push_back(Prefix + " " + RegisterSuffix); 1302 } 1303 1304 /// UseMacro - Examine the prototype string to determine if the intrinsic 1305 /// should be defined as a preprocessor macro instead of an inline function. 1306 static bool UseMacro(const std::string &proto) { 1307 // If this builtin takes an immediate argument, we need to #define it rather 1308 // than use a standard declaration, so that SemaChecking can range check 1309 // the immediate passed by the user. 1310 if (proto.find('i') != std::string::npos) 1311 return true; 1312 1313 // Pointer arguments need to use macros to avoid hiding aligned attributes 1314 // from the pointer type. 1315 if (proto.find('p') != std::string::npos || 1316 proto.find('c') != std::string::npos) 1317 return true; 1318 1319 return false; 1320 } 1321 1322 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1323 /// defined as a macro should be accessed directly instead of being first 1324 /// assigned to a local temporary. 1325 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1326 // True for constant ints (i), pointers (p) and const pointers (c). 1327 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1328 } 1329 1330 // Generate the string "(argtype a, argtype b, ...)" 1331 static std::string GenArgs(const std::string &proto, StringRef typestr) { 1332 bool define = UseMacro(proto); 1333 char arg = 'a'; 1334 1335 std::string s; 1336 s += "("; 1337 1338 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1339 if (define) { 1340 // Some macro arguments are used directly instead of being assigned 1341 // to local temporaries; prepend an underscore prefix to make their 1342 // names consistent with the local temporaries. 1343 if (MacroArgUsedDirectly(proto, i)) 1344 s += "__"; 1345 } else { 1346 s += TypeString(proto[i], typestr) + " __"; 1347 } 1348 s.push_back(arg); 1349 if ((i + 1) < e) 1350 s += ", "; 1351 } 1352 1353 s += ")"; 1354 return s; 1355 } 1356 1357 // Macro arguments are not type-checked like inline function arguments, so 1358 // assign them to local temporaries to get the right type checking. 1359 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { 1360 char arg = 'a'; 1361 std::string s; 1362 bool generatedLocal = false; 1363 1364 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1365 // Do not create a temporary for an immediate argument. 1366 // That would defeat the whole point of using a macro! 1367 if (MacroArgUsedDirectly(proto, i)) 1368 continue; 1369 generatedLocal = true; 1370 1371 s += TypeString(proto[i], typestr) + " __"; 1372 s.push_back(arg); 1373 s += " = ("; 1374 s.push_back(arg); 1375 s += "); "; 1376 } 1377 1378 if (generatedLocal) 1379 s += "\\\n "; 1380 return s; 1381 } 1382 1383 // Use the vmovl builtin to sign-extend or zero-extend a vector. 1384 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1385 std::string s, high; 1386 high = h ? "_high" : ""; 1387 s = MangleName("vmovl" + high, typestr, ClassS); 1388 s += "(" + a + ")"; 1389 return s; 1390 } 1391 1392 // Get the high 64-bit part of a vector 1393 static std::string GetHigh(const std::string &a, StringRef typestr) { 1394 std::string s; 1395 s = MangleName("vget_high", typestr, ClassS); 1396 s += "(" + a + ")"; 1397 return s; 1398 } 1399 1400 // Gen operation with two operands and get high 64-bit for both of two operands. 1401 static std::string Gen2OpWith2High(StringRef typestr, 1402 const std::string &op, 1403 const std::string &a, 1404 const std::string &b) { 1405 std::string s; 1406 std::string Op1 = GetHigh(a, typestr); 1407 std::string Op2 = GetHigh(b, typestr); 1408 s = MangleName(op, typestr, ClassS); 1409 s += "(" + Op1 + ", " + Op2 + ");"; 1410 return s; 1411 } 1412 1413 // Gen operation with three operands and get high 64-bit of the latter 1414 // two operands. 1415 static std::string Gen3OpWith2High(StringRef typestr, 1416 const std::string &op, 1417 const std::string &a, 1418 const std::string &b, 1419 const std::string &c) { 1420 std::string s; 1421 std::string Op1 = GetHigh(b, typestr); 1422 std::string Op2 = GetHigh(c, typestr); 1423 s = MangleName(op, typestr, ClassS); 1424 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1425 return s; 1426 } 1427 1428 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1429 static std::string GenCombine(std::string typestr, 1430 const std::string &a, 1431 const std::string &b) { 1432 std::string s; 1433 s = MangleName("vcombine", typestr, ClassS); 1434 s += "(" + a + ", " + b + ")"; 1435 return s; 1436 } 1437 1438 static std::string Duplicate(unsigned nElts, StringRef typestr, 1439 const std::string &a) { 1440 std::string s; 1441 1442 s = "(" + TypeString('d', typestr) + "){ "; 1443 for (unsigned i = 0; i != nElts; ++i) { 1444 s += a; 1445 if ((i + 1) < nElts) 1446 s += ", "; 1447 } 1448 s += " }"; 1449 1450 return s; 1451 } 1452 1453 static std::string SplatLane(unsigned nElts, const std::string &vec, 1454 const std::string &lane) { 1455 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1456 for (unsigned i = 0; i < nElts; ++i) 1457 s += ", " + lane; 1458 s += ")"; 1459 return s; 1460 } 1461 1462 static std::string RemoveHigh(const std::string &name) { 1463 std::string s = name; 1464 std::size_t found = s.find("_high_"); 1465 if (found == std::string::npos) 1466 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1467 s.replace(found, 5, ""); 1468 return s; 1469 } 1470 1471 static unsigned GetNumElements(StringRef typestr, bool &quad) { 1472 quad = false; 1473 bool dummy = false; 1474 char type = ClassifyType(typestr, quad, dummy, dummy); 1475 unsigned nElts = 0; 1476 switch (type) { 1477 case 'c': nElts = 8; break; 1478 case 's': nElts = 4; break; 1479 case 'i': nElts = 2; break; 1480 case 'l': nElts = 1; break; 1481 case 'h': nElts = 4; break; 1482 case 'f': nElts = 2; break; 1483 case 'd': 1484 nElts = 1; 1485 break; 1486 default: 1487 PrintFatalError("unhandled type!"); 1488 } 1489 if (quad) nElts <<= 1; 1490 return nElts; 1491 } 1492 1493 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1494 static std::string GenOpString(const std::string &name, OpKind op, 1495 const std::string &proto, StringRef typestr) { 1496 bool quad; 1497 unsigned nElts = GetNumElements(typestr, quad); 1498 bool define = UseMacro(proto); 1499 1500 std::string ts = TypeString(proto[0], typestr); 1501 std::string s; 1502 if (!define) { 1503 s = "return "; 1504 } 1505 1506 switch(op) { 1507 case OpAdd: 1508 s += "__a + __b;"; 1509 break; 1510 case OpAddl: 1511 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1512 break; 1513 case OpAddlHi: 1514 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1515 break; 1516 case OpAddw: 1517 s += "__a + " + Extend(typestr, "__b") + ";"; 1518 break; 1519 case OpAddwHi: 1520 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1521 break; 1522 case OpSub: 1523 s += "__a - __b;"; 1524 break; 1525 case OpSubl: 1526 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1527 break; 1528 case OpSublHi: 1529 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1530 break; 1531 case OpSubw: 1532 s += "__a - " + Extend(typestr, "__b") + ";"; 1533 break; 1534 case OpSubwHi: 1535 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1536 break; 1537 case OpMulN: 1538 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1539 break; 1540 case OpMulLane: 1541 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1542 break; 1543 case OpMulXLane: 1544 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1545 SplatLane(nElts, "__b", "__c") + ");"; 1546 break; 1547 case OpMul: 1548 s += "__a * __b;"; 1549 break; 1550 case OpMullLane: 1551 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1552 SplatLane(nElts, "__b", "__c") + ");"; 1553 break; 1554 case OpMullHiLane: 1555 s += MangleName("vmull", typestr, ClassS) + "(" + 1556 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1557 break; 1558 case OpMlaN: 1559 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1560 break; 1561 case OpMlaLane: 1562 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1563 break; 1564 case OpMla: 1565 s += "__a + (__b * __c);"; 1566 break; 1567 case OpMlalN: 1568 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1569 Duplicate(nElts, typestr, "__c") + ");"; 1570 break; 1571 case OpMlalLane: 1572 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1573 SplatLane(nElts, "__c", "__d") + ");"; 1574 break; 1575 case OpMlalHiLane: 1576 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1577 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1578 break; 1579 case OpMlal: 1580 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1581 break; 1582 case OpMullHi: 1583 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1584 break; 1585 case OpMlalHi: 1586 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1587 break; 1588 case OpMlsN: 1589 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1590 break; 1591 case OpMlsLane: 1592 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1593 break; 1594 case OpFMSLane: 1595 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1596 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1597 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1598 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1599 break; 1600 case OpFMSLaneQ: 1601 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1602 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1603 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1604 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1605 break; 1606 case OpMls: 1607 s += "__a - (__b * __c);"; 1608 break; 1609 case OpMlslN: 1610 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1611 Duplicate(nElts, typestr, "__c") + ");"; 1612 break; 1613 case OpMlslLane: 1614 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1615 SplatLane(nElts, "__c", "__d") + ");"; 1616 break; 1617 case OpMlslHiLane: 1618 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1619 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1620 break; 1621 case OpMlsl: 1622 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1623 break; 1624 case OpMlslHi: 1625 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1626 break; 1627 case OpQDMullLane: 1628 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1629 SplatLane(nElts, "__b", "__c") + ");"; 1630 break; 1631 case OpQDMullHiLane: 1632 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1633 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1634 break; 1635 case OpQDMlalLane: 1636 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1637 SplatLane(nElts, "__c", "__d") + ");"; 1638 break; 1639 case OpQDMlalHiLane: 1640 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1641 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1642 break; 1643 case OpQDMlslLane: 1644 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1645 SplatLane(nElts, "__c", "__d") + ");"; 1646 break; 1647 case OpQDMlslHiLane: 1648 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1649 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1650 break; 1651 case OpQDMulhLane: 1652 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1653 SplatLane(nElts, "__b", "__c") + ");"; 1654 break; 1655 case OpQRDMulhLane: 1656 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1657 SplatLane(nElts, "__b", "__c") + ");"; 1658 break; 1659 case OpEq: 1660 s += "(" + ts + ")(__a == __b);"; 1661 break; 1662 case OpGe: 1663 s += "(" + ts + ")(__a >= __b);"; 1664 break; 1665 case OpLe: 1666 s += "(" + ts + ")(__a <= __b);"; 1667 break; 1668 case OpGt: 1669 s += "(" + ts + ")(__a > __b);"; 1670 break; 1671 case OpLt: 1672 s += "(" + ts + ")(__a < __b);"; 1673 break; 1674 case OpNeg: 1675 s += " -__a;"; 1676 break; 1677 case OpNot: 1678 s += " ~__a;"; 1679 break; 1680 case OpAnd: 1681 s += "__a & __b;"; 1682 break; 1683 case OpOr: 1684 s += "__a | __b;"; 1685 break; 1686 case OpXor: 1687 s += "__a ^ __b;"; 1688 break; 1689 case OpAndNot: 1690 s += "__a & ~__b;"; 1691 break; 1692 case OpOrNot: 1693 s += "__a | ~__b;"; 1694 break; 1695 case OpCast: 1696 s += "(" + ts + ")__a;"; 1697 break; 1698 case OpConcat: 1699 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1700 s += ", (int64x1_t)__b, 0, 1);"; 1701 break; 1702 case OpHi: 1703 // nElts is for the result vector, so the source is twice that number. 1704 s += "__builtin_shufflevector(__a, __a"; 1705 for (unsigned i = nElts; i < nElts * 2; ++i) 1706 s += ", " + utostr(i); 1707 s+= ");"; 1708 break; 1709 case OpLo: 1710 s += "__builtin_shufflevector(__a, __a"; 1711 for (unsigned i = 0; i < nElts; ++i) 1712 s += ", " + utostr(i); 1713 s+= ");"; 1714 break; 1715 case OpDup: 1716 s += Duplicate(nElts, typestr, "__a") + ";"; 1717 break; 1718 case OpDupLane: 1719 s += SplatLane(nElts, "__a", "__b") + ";"; 1720 break; 1721 case OpSelect: 1722 // ((0 & 1) | (~0 & 2)) 1723 s += "(" + ts + ")"; 1724 ts = TypeString(proto[1], typestr); 1725 s += "((__a & (" + ts + ")__b) | "; 1726 s += "(~__a & (" + ts + ")__c));"; 1727 break; 1728 case OpRev16: 1729 s += "__builtin_shufflevector(__a, __a"; 1730 for (unsigned i = 2; i <= nElts; i += 2) 1731 for (unsigned j = 0; j != 2; ++j) 1732 s += ", " + utostr(i - j - 1); 1733 s += ");"; 1734 break; 1735 case OpRev32: { 1736 unsigned WordElts = nElts >> (1 + (int)quad); 1737 s += "__builtin_shufflevector(__a, __a"; 1738 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1739 for (unsigned j = 0; j != WordElts; ++j) 1740 s += ", " + utostr(i - j - 1); 1741 s += ");"; 1742 break; 1743 } 1744 case OpRev64: { 1745 unsigned DblWordElts = nElts >> (int)quad; 1746 s += "__builtin_shufflevector(__a, __a"; 1747 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1748 for (unsigned j = 0; j != DblWordElts; ++j) 1749 s += ", " + utostr(i - j - 1); 1750 s += ");"; 1751 break; 1752 } 1753 case OpAbdl: { 1754 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1755 if (typestr[0] != 'U') { 1756 // vabd results are always unsigned and must be zero-extended. 1757 std::string utype = "U" + typestr.str(); 1758 s += "(" + TypeString(proto[0], typestr) + ")"; 1759 abd = "(" + TypeString('d', utype) + ")" + abd; 1760 s += Extend(utype, abd) + ";"; 1761 } else { 1762 s += Extend(typestr, abd) + ";"; 1763 } 1764 break; 1765 } 1766 case OpAbdlHi: 1767 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 1768 break; 1769 case OpAddhnHi: { 1770 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 1771 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 1772 s += ";"; 1773 break; 1774 } 1775 case OpRAddhnHi: { 1776 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 1777 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 1778 s += ";"; 1779 break; 1780 } 1781 case OpSubhnHi: { 1782 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 1783 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 1784 s += ";"; 1785 break; 1786 } 1787 case OpRSubhnHi: { 1788 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 1789 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 1790 s += ";"; 1791 break; 1792 } 1793 case OpAba: 1794 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1795 break; 1796 case OpAbal: 1797 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 1798 break; 1799 case OpAbalHi: 1800 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 1801 break; 1802 case OpQDMullHi: 1803 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 1804 break; 1805 case OpQDMlalHi: 1806 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 1807 break; 1808 case OpQDMlslHi: 1809 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 1810 break; 1811 case OpDiv: 1812 s += "__a / __b;"; 1813 break; 1814 case OpMovlHi: { 1815 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1816 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 1817 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 1818 s += "(__a1, 0);"; 1819 break; 1820 } 1821 case OpLongHi: { 1822 // Another local variable __a1 is needed for calling a Macro, 1823 // or using __a will have naming conflict when Macro expanding. 1824 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1825 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 1826 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 1827 "(__a1, __b);"; 1828 break; 1829 } 1830 case OpNarrowHi: { 1831 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 1832 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 1833 break; 1834 } 1835 default: 1836 PrintFatalError("unknown OpKind!"); 1837 } 1838 return s; 1839 } 1840 1841 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 1842 unsigned mod = proto[0]; 1843 1844 if (mod == 'v' || mod == 'f') 1845 mod = proto[1]; 1846 1847 bool quad = false; 1848 bool poly = false; 1849 bool usgn = false; 1850 bool scal = false; 1851 bool cnst = false; 1852 bool pntr = false; 1853 1854 // Base type to get the type string for. 1855 char type = ClassifyType(typestr, quad, poly, usgn); 1856 1857 // Based on the modifying character, change the type and width if necessary. 1858 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 1859 1860 NeonTypeFlags::EltType ET; 1861 switch (type) { 1862 case 'c': 1863 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 1864 break; 1865 case 's': 1866 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 1867 break; 1868 case 'i': 1869 ET = NeonTypeFlags::Int32; 1870 break; 1871 case 'l': 1872 ET = NeonTypeFlags::Int64; 1873 break; 1874 case 'h': 1875 ET = NeonTypeFlags::Float16; 1876 break; 1877 case 'f': 1878 ET = NeonTypeFlags::Float32; 1879 break; 1880 case 'd': 1881 ET = NeonTypeFlags::Float64; 1882 break; 1883 default: 1884 PrintFatalError("unhandled type!"); 1885 } 1886 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 1887 return Flags.getFlags(); 1888 } 1889 1890 static bool ProtoHasScalar(const std::string proto) 1891 { 1892 return (proto.find('s') != std::string::npos 1893 || proto.find('r') != std::string::npos); 1894 } 1895 1896 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 1897 static std::string GenBuiltin(const std::string &name, const std::string &proto, 1898 StringRef typestr, ClassKind ck) { 1899 std::string s; 1900 1901 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1902 // sret-like argument. 1903 bool sret = (proto[0] >= '2' && proto[0] <= '4'); 1904 1905 bool define = UseMacro(proto); 1906 1907 // Check if the prototype has a scalar operand with the type of the vector 1908 // elements. If not, bitcasting the args will take care of arg checking. 1909 // The actual signedness etc. will be taken care of with special enums. 1910 if (!ProtoHasScalar(proto)) 1911 ck = ClassB; 1912 1913 if (proto[0] != 'v') { 1914 std::string ts = TypeString(proto[0], typestr); 1915 1916 if (define) { 1917 if (sret) 1918 s += ts + " r; "; 1919 else 1920 s += "(" + ts + ")"; 1921 } else if (sret) { 1922 s += ts + " r; "; 1923 } else { 1924 s += "return (" + ts + ")"; 1925 } 1926 } 1927 1928 bool splat = proto.find('a') != std::string::npos; 1929 1930 s += "__builtin_neon_"; 1931 if (splat) { 1932 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1933 std::string vname(name, 0, name.size()-2); 1934 s += MangleName(vname, typestr, ck); 1935 } else { 1936 s += MangleName(name, typestr, ck); 1937 } 1938 s += "("; 1939 1940 // Pass the address of the return variable as the first argument to sret-like 1941 // builtins. 1942 if (sret) 1943 s += "&r, "; 1944 1945 char arg = 'a'; 1946 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1947 std::string args = std::string(&arg, 1); 1948 1949 // Use the local temporaries instead of the macro arguments. 1950 args = "__" + args; 1951 1952 bool argQuad = false; 1953 bool argPoly = false; 1954 bool argUsgn = false; 1955 bool argScalar = false; 1956 bool dummy = false; 1957 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 1958 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 1959 dummy, dummy); 1960 1961 // Handle multiple-vector values specially, emitting each subvector as an 1962 // argument to the __builtin. 1963 if (proto[i] >= '2' && proto[i] <= '4') { 1964 // Check if an explicit cast is needed. 1965 if (argType != 'c' || argPoly || argUsgn) 1966 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 1967 1968 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { 1969 s += args + ".val[" + utostr(vi) + "]"; 1970 if ((vi + 1) < ve) 1971 s += ", "; 1972 } 1973 if ((i + 1) < e) 1974 s += ", "; 1975 1976 continue; 1977 } 1978 1979 if (splat && (i + 1) == e) 1980 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 1981 1982 // Check if an explicit cast is needed. 1983 if ((splat || !argScalar) && 1984 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 1985 std::string argTypeStr = "c"; 1986 if (ck != ClassB) 1987 argTypeStr = argType; 1988 if (argQuad) 1989 argTypeStr = "Q" + argTypeStr; 1990 args = "(" + TypeString('d', argTypeStr) + ")" + args; 1991 } 1992 1993 s += args; 1994 if ((i + 1) < e) 1995 s += ", "; 1996 } 1997 1998 // Extra constant integer to hold type class enum for this function, e.g. s8 1999 if (ck == ClassB) 2000 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2001 2002 s += ");"; 2003 2004 if (proto[0] != 'v' && sret) { 2005 if (define) 2006 s += " r;"; 2007 else 2008 s += " return r;"; 2009 } 2010 return s; 2011 } 2012 2013 static std::string GenBuiltinDef(const std::string &name, 2014 const std::string &proto, 2015 StringRef typestr, ClassKind ck) { 2016 std::string s("BUILTIN(__builtin_neon_"); 2017 2018 // If all types are the same size, bitcasting the args will take care 2019 // of arg checking. The actual signedness etc. will be taken care of with 2020 // special enums. 2021 if (!ProtoHasScalar(proto)) 2022 ck = ClassB; 2023 2024 s += MangleName(name, typestr, ck); 2025 s += ", \""; 2026 2027 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2028 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2029 2030 // Extra constant integer to hold type class enum for this function, e.g. s8 2031 if (ck == ClassB) 2032 s += "i"; 2033 2034 s += "\", \"n\")"; 2035 return s; 2036 } 2037 2038 static std::string GenIntrinsic(const std::string &name, 2039 const std::string &proto, 2040 StringRef outTypeStr, StringRef inTypeStr, 2041 OpKind kind, ClassKind classKind) { 2042 assert(!proto.empty() && ""); 2043 bool define = UseMacro(proto) && kind != OpUnavailable; 2044 std::string s; 2045 2046 // static always inline + return type 2047 if (define) 2048 s += "#define "; 2049 else 2050 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2051 2052 // Function name with type suffix 2053 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2054 if (outTypeStr != inTypeStr) { 2055 // If the input type is different (e.g., for vreinterpret), append a suffix 2056 // for the input type. String off a "Q" (quad) prefix so that MangleName 2057 // does not insert another "q" in the name. 2058 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2059 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2060 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2061 } 2062 s += mangledName; 2063 2064 // Function arguments 2065 s += GenArgs(proto, inTypeStr); 2066 2067 // Definition. 2068 if (define) { 2069 s += " __extension__ ({ \\\n "; 2070 s += GenMacroLocals(proto, inTypeStr); 2071 } else if (kind == OpUnavailable) { 2072 s += " __attribute__((unavailable));\n"; 2073 return s; 2074 } else 2075 s += " {\n "; 2076 2077 if (kind != OpNone) 2078 s += GenOpString(name, kind, proto, outTypeStr); 2079 else 2080 s += GenBuiltin(name, proto, outTypeStr, classKind); 2081 if (define) 2082 s += " })"; 2083 else 2084 s += " }"; 2085 s += "\n"; 2086 return s; 2087 } 2088 2089 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2090 /// is comprised of type definitions and function declarations. 2091 void NeonEmitter::run(raw_ostream &OS) { 2092 OS << 2093 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2094 "---===\n" 2095 " *\n" 2096 " * Permission is hereby granted, free of charge, to any person obtaining " 2097 "a copy\n" 2098 " * of this software and associated documentation files (the \"Software\")," 2099 " to deal\n" 2100 " * in the Software without restriction, including without limitation the " 2101 "rights\n" 2102 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2103 "and/or sell\n" 2104 " * copies of the Software, and to permit persons to whom the Software is\n" 2105 " * furnished to do so, subject to the following conditions:\n" 2106 " *\n" 2107 " * The above copyright notice and this permission notice shall be " 2108 "included in\n" 2109 " * all copies or substantial portions of the Software.\n" 2110 " *\n" 2111 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2112 "EXPRESS OR\n" 2113 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2114 "MERCHANTABILITY,\n" 2115 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2116 "SHALL THE\n" 2117 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2118 "OTHER\n" 2119 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2120 "ARISING FROM,\n" 2121 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2122 "DEALINGS IN\n" 2123 " * THE SOFTWARE.\n" 2124 " *\n" 2125 " *===--------------------------------------------------------------------" 2126 "---===\n" 2127 " */\n\n"; 2128 2129 OS << "#ifndef __ARM_NEON_H\n"; 2130 OS << "#define __ARM_NEON_H\n\n"; 2131 2132 OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n"; 2133 OS << "#error \"NEON support not enabled\"\n"; 2134 OS << "#endif\n\n"; 2135 2136 OS << "#include <stdint.h>\n\n"; 2137 2138 // Emit NEON-specific scalar typedefs. 2139 OS << "typedef float float32_t;\n"; 2140 OS << "typedef __fp16 float16_t;\n"; 2141 2142 OS << "#ifdef __aarch64__\n"; 2143 OS << "typedef double float64_t;\n"; 2144 OS << "#endif\n\n"; 2145 2146 // For now, signedness of polynomial types depends on target 2147 OS << "#ifdef __aarch64__\n"; 2148 OS << "typedef uint8_t poly8_t;\n"; 2149 OS << "typedef uint16_t poly16_t;\n"; 2150 OS << "#else\n"; 2151 OS << "typedef int8_t poly8_t;\n"; 2152 OS << "typedef int16_t poly16_t;\n"; 2153 OS << "#endif\n"; 2154 2155 // Emit Neon vector typedefs. 2156 std::string TypedefTypes( 2157 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs"); 2158 SmallVector<StringRef, 24> TDTypeVec; 2159 ParseTypes(0, TypedefTypes, TDTypeVec); 2160 2161 // Emit vector typedefs. 2162 bool isA64 = false; 2163 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2164 bool dummy, quad = false, poly = false; 2165 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2166 bool preinsert = false; 2167 bool postinsert = false; 2168 2169 if (type == 'd') { 2170 preinsert = isA64? false: true; 2171 isA64 = true; 2172 } else { 2173 postinsert = isA64? true: false; 2174 isA64 = false; 2175 } 2176 if (postinsert) 2177 OS << "#endif\n"; 2178 if (preinsert) 2179 OS << "#ifdef __aarch64__\n"; 2180 2181 if (poly) 2182 OS << "typedef __attribute__((neon_polyvector_type("; 2183 else 2184 OS << "typedef __attribute__((neon_vector_type("; 2185 2186 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2187 OS << utostr(nElts) << "))) "; 2188 if (nElts < 10) 2189 OS << " "; 2190 2191 OS << TypeString('s', TDTypeVec[i]); 2192 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2193 2194 } 2195 OS << "\n"; 2196 2197 // Emit struct typedefs. 2198 isA64 = false; 2199 for (unsigned vi = 2; vi != 5; ++vi) { 2200 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2201 bool dummy, quad = false, poly = false; 2202 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2203 bool preinsert = false; 2204 bool postinsert = false; 2205 2206 if (type == 'd') { 2207 preinsert = isA64? false: true; 2208 isA64 = true; 2209 } else { 2210 postinsert = isA64? true: false; 2211 isA64 = false; 2212 } 2213 if (postinsert) 2214 OS << "#endif\n"; 2215 if (preinsert) 2216 OS << "#ifdef __aarch64__\n"; 2217 2218 std::string ts = TypeString('d', TDTypeVec[i]); 2219 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2220 OS << "typedef struct " << vs << " {\n"; 2221 OS << " " << ts << " val"; 2222 OS << "[" << utostr(vi) << "]"; 2223 OS << ";\n} "; 2224 OS << vs << ";\n"; 2225 OS << "\n"; 2226 } 2227 } 2228 2229 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2230 2231 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2232 2233 StringMap<ClassKind> EmittedMap; 2234 2235 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2236 // intrinsics. (Some of the saturating multiply instructions are also 2237 // used to implement the corresponding "_lane" variants, but tablegen 2238 // sorts the records into alphabetical order so that the "_lane" variants 2239 // come after the intrinsics they use.) 2240 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2241 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2242 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2243 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2244 2245 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2246 // common intrinsics appear only once in the output stream. 2247 // The check for uniquiness is done in emitIntrinsic. 2248 // Emit ARM intrinsics. 2249 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2250 Record *R = RV[i]; 2251 2252 // Skip AArch64 intrinsics; they will be emitted at the end. 2253 bool isA64 = R->getValueAsBit("isA64"); 2254 if (isA64) 2255 continue; 2256 2257 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2258 R->getName() != "VABD") 2259 emitIntrinsic(OS, R, EmittedMap); 2260 } 2261 2262 // Emit AArch64-specific intrinsics. 2263 OS << "#ifdef __aarch64__\n"; 2264 2265 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2266 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2267 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2268 2269 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2270 Record *R = RV[i]; 2271 2272 // Skip ARM intrinsics already included above. 2273 bool isA64 = R->getValueAsBit("isA64"); 2274 if (!isA64) 2275 continue; 2276 2277 emitIntrinsic(OS, R, EmittedMap); 2278 } 2279 2280 OS << "#endif\n\n"; 2281 2282 OS << "#undef __ai\n\n"; 2283 OS << "#endif /* __ARM_NEON_H */\n"; 2284 } 2285 2286 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2287 /// intrinsics specified by record R checking for intrinsic uniqueness. 2288 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2289 StringMap<ClassKind> &EmittedMap) { 2290 std::string name = R->getValueAsString("Name"); 2291 std::string Proto = R->getValueAsString("Prototype"); 2292 std::string Types = R->getValueAsString("Types"); 2293 2294 SmallVector<StringRef, 16> TypeVec; 2295 ParseTypes(R, Types, TypeVec); 2296 2297 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2298 2299 ClassKind classKind = ClassNone; 2300 if (R->getSuperClasses().size() >= 2) 2301 classKind = ClassMap[R->getSuperClasses()[1]]; 2302 if (classKind == ClassNone && kind == OpNone) 2303 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2304 2305 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2306 if (kind == OpReinterpret) { 2307 bool outQuad = false; 2308 bool dummy = false; 2309 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2310 for (unsigned srcti = 0, srcte = TypeVec.size(); 2311 srcti != srcte; ++srcti) { 2312 bool inQuad = false; 2313 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2314 if (srcti == ti || inQuad != outQuad) 2315 continue; 2316 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2317 OpCast, ClassS); 2318 if (EmittedMap.count(s)) 2319 continue; 2320 EmittedMap[s] = ClassS; 2321 OS << s; 2322 } 2323 } else { 2324 std::string s = 2325 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2326 if (EmittedMap.count(s)) 2327 continue; 2328 EmittedMap[s] = classKind; 2329 OS << s; 2330 } 2331 } 2332 OS << "\n"; 2333 } 2334 2335 static unsigned RangeFromType(const char mod, StringRef typestr) { 2336 // base type to get the type string for. 2337 bool quad = false, dummy = false; 2338 char type = ClassifyType(typestr, quad, dummy, dummy); 2339 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2340 2341 switch (type) { 2342 case 'c': 2343 return (8 << (int)quad) - 1; 2344 case 'h': 2345 case 's': 2346 return (4 << (int)quad) - 1; 2347 case 'f': 2348 case 'i': 2349 return (2 << (int)quad) - 1; 2350 case 'd': 2351 case 'l': 2352 return (1 << (int)quad) - 1; 2353 default: 2354 PrintFatalError("unhandled type!"); 2355 } 2356 } 2357 2358 /// Generate the ARM and AArch64 intrinsic range checking code for 2359 /// shift/lane immediates, checking for unique declarations. 2360 void 2361 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2362 StringMap<ClassKind> &A64IntrinsicMap, 2363 bool isA64RangeCheck) { 2364 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2365 StringMap<OpKind> EmittedMap; 2366 2367 // Generate the intrinsic range checking code for shift/lane immediates. 2368 if (isA64RangeCheck) 2369 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2370 else 2371 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2372 2373 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2374 Record *R = RV[i]; 2375 2376 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2377 if (k != OpNone) 2378 continue; 2379 2380 std::string name = R->getValueAsString("Name"); 2381 std::string Proto = R->getValueAsString("Prototype"); 2382 std::string Types = R->getValueAsString("Types"); 2383 std::string Rename = name + "@" + Proto; 2384 2385 // Functions with 'a' (the splat code) in the type prototype should not get 2386 // their own builtin as they use the non-splat variant. 2387 if (Proto.find('a') != std::string::npos) 2388 continue; 2389 2390 // Functions which do not have an immediate do not need to have range 2391 // checking code emitted. 2392 size_t immPos = Proto.find('i'); 2393 if (immPos == std::string::npos) 2394 continue; 2395 2396 SmallVector<StringRef, 16> TypeVec; 2397 ParseTypes(R, Types, TypeVec); 2398 2399 if (R->getSuperClasses().size() < 2) 2400 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2401 2402 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2403 2404 // Do not include AArch64 range checks if not generating code for AArch64. 2405 bool isA64 = R->getValueAsBit("isA64"); 2406 if (!isA64RangeCheck && isA64) 2407 continue; 2408 2409 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2410 // redefined by AArch64 to handle new types. 2411 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2412 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2413 if (A64CK == ck && ck != ClassNone) 2414 continue; 2415 } 2416 2417 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2418 std::string namestr, shiftstr, rangestr; 2419 2420 if (R->getValueAsBit("isVCVT_N")) { 2421 // VCVT between floating- and fixed-point values takes an immediate 2422 // in the range [1, 32] for f32, or [1, 64] for f64. 2423 ck = ClassB; 2424 if (name.find("32") != std::string::npos) 2425 rangestr = "l = 1; u = 31"; // upper bound = l + u 2426 else if (name.find("64") != std::string::npos) 2427 rangestr = "l = 1; u = 63"; 2428 else 2429 PrintFatalError(R->getLoc(), 2430 "Fixed point convert name should contains \"32\" or \"64\""); 2431 } else if (!ProtoHasScalar(Proto)) { 2432 // Builtins which are overloaded by type will need to have their upper 2433 // bound computed at Sema time based on the type constant. 2434 ck = ClassB; 2435 if (R->getValueAsBit("isShift")) { 2436 shiftstr = ", true"; 2437 2438 // Right shifts have an 'r' in the name, left shifts do not. 2439 if (name.find('r') != std::string::npos) 2440 rangestr = "l = 1; "; 2441 } 2442 rangestr += "u = RFT(TV" + shiftstr + ")"; 2443 } else { 2444 // The immediate generally refers to a lane in the preceding argument. 2445 assert(immPos > 0 && "unexpected immediate operand"); 2446 rangestr = 2447 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2448 } 2449 // Make sure cases appear only once by uniquing them in a string map. 2450 namestr = MangleName(name, TypeVec[ti], ck); 2451 if (EmittedMap.count(namestr)) 2452 continue; 2453 EmittedMap[namestr] = OpNone; 2454 2455 // Calculate the index of the immediate that should be range checked. 2456 unsigned immidx = 0; 2457 2458 // Builtins that return a struct of multiple vectors have an extra 2459 // leading arg for the struct return. 2460 if (Proto[0] >= '2' && Proto[0] <= '4') 2461 ++immidx; 2462 2463 // Add one to the index for each argument until we reach the immediate 2464 // to be checked. Structs of vectors are passed as multiple arguments. 2465 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2466 switch (Proto[ii]) { 2467 default: 2468 immidx += 1; 2469 break; 2470 case '2': 2471 immidx += 2; 2472 break; 2473 case '3': 2474 immidx += 3; 2475 break; 2476 case '4': 2477 immidx += 4; 2478 break; 2479 case 'i': 2480 ie = ii + 1; 2481 break; 2482 } 2483 } 2484 if (isA64RangeCheck) 2485 OS << "case AArch64::BI__builtin_neon_"; 2486 else 2487 OS << "case ARM::BI__builtin_neon_"; 2488 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2489 << rangestr << "; break;\n"; 2490 } 2491 } 2492 OS << "#endif\n\n"; 2493 } 2494 2495 /// Generate the ARM and AArch64 overloaded type checking code for 2496 /// SemaChecking.cpp, checking for unique builtin declarations. 2497 void 2498 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2499 StringMap<ClassKind> &A64IntrinsicMap, 2500 bool isA64TypeCheck) { 2501 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2502 StringMap<OpKind> EmittedMap; 2503 2504 // Generate the overloaded type checking code for SemaChecking.cpp 2505 if (isA64TypeCheck) 2506 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2507 else 2508 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2509 2510 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2511 Record *R = RV[i]; 2512 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2513 if (k != OpNone) 2514 continue; 2515 2516 std::string Proto = R->getValueAsString("Prototype"); 2517 std::string Types = R->getValueAsString("Types"); 2518 std::string name = R->getValueAsString("Name"); 2519 std::string Rename = name + "@" + Proto; 2520 2521 // Functions with 'a' (the splat code) in the type prototype should not get 2522 // their own builtin as they use the non-splat variant. 2523 if (Proto.find('a') != std::string::npos) 2524 continue; 2525 2526 // Functions which have a scalar argument cannot be overloaded, no need to 2527 // check them if we are emitting the type checking code. 2528 if (ProtoHasScalar(Proto)) 2529 continue; 2530 2531 SmallVector<StringRef, 16> TypeVec; 2532 ParseTypes(R, Types, TypeVec); 2533 2534 if (R->getSuperClasses().size() < 2) 2535 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2536 2537 // Do not include AArch64 type checks if not generating code for AArch64. 2538 bool isA64 = R->getValueAsBit("isA64"); 2539 if (!isA64TypeCheck && isA64) 2540 continue; 2541 2542 // Include ARM type check in AArch64 but only if ARM intrinsics 2543 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2544 // redefined in AArch64 to handle an additional 2 x f64 type. 2545 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2546 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2547 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2548 if (A64CK == ck && ck != ClassNone) 2549 continue; 2550 } 2551 2552 int si = -1, qi = -1; 2553 uint64_t mask = 0, qmask = 0; 2554 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2555 // Generate the switch case(s) for this builtin for the type validation. 2556 bool quad = false, poly = false, usgn = false; 2557 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 2558 2559 if (quad) { 2560 qi = ti; 2561 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2562 } else { 2563 si = ti; 2564 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2565 } 2566 } 2567 2568 // Check if the builtin function has a pointer or const pointer argument. 2569 int PtrArgNum = -1; 2570 bool HasConstPtr = false; 2571 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2572 char ArgType = Proto[arg]; 2573 if (ArgType == 'c') { 2574 HasConstPtr = true; 2575 PtrArgNum = arg - 1; 2576 break; 2577 } 2578 if (ArgType == 'p') { 2579 PtrArgNum = arg - 1; 2580 break; 2581 } 2582 } 2583 // For sret builtins, adjust the pointer argument index. 2584 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) 2585 PtrArgNum += 1; 2586 2587 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2588 // and vst1_lane intrinsics. Using a pointer to the vector element 2589 // type with one of those operations causes codegen to select an aligned 2590 // load/store instruction. If you want an unaligned operation, 2591 // the pointer argument needs to have less alignment than element type, 2592 // so just accept any pointer type. 2593 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2594 PtrArgNum = -1; 2595 HasConstPtr = false; 2596 } 2597 2598 if (mask) { 2599 if (isA64TypeCheck) 2600 OS << "case AArch64::BI__builtin_neon_"; 2601 else 2602 OS << "case ARM::BI__builtin_neon_"; 2603 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 2604 << "0x" << utohexstr(mask) << "ULL"; 2605 if (PtrArgNum >= 0) 2606 OS << "; PtrArgNum = " << PtrArgNum; 2607 if (HasConstPtr) 2608 OS << "; HasConstPtr = true"; 2609 OS << "; break;\n"; 2610 } 2611 if (qmask) { 2612 if (isA64TypeCheck) 2613 OS << "case AArch64::BI__builtin_neon_"; 2614 else 2615 OS << "case ARM::BI__builtin_neon_"; 2616 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 2617 << "0x" << utohexstr(qmask) << "ULL"; 2618 if (PtrArgNum >= 0) 2619 OS << "; PtrArgNum = " << PtrArgNum; 2620 if (HasConstPtr) 2621 OS << "; HasConstPtr = true"; 2622 OS << "; break;\n"; 2623 } 2624 } 2625 OS << "#endif\n\n"; 2626 } 2627 2628 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2629 /// declaration of builtins, checking for unique builtin declarations. 2630 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2631 StringMap<ClassKind> &A64IntrinsicMap, 2632 bool isA64GenBuiltinDef) { 2633 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2634 StringMap<OpKind> EmittedMap; 2635 2636 // Generate BuiltinsARM.def and BuiltinsAArch64.def 2637 if (isA64GenBuiltinDef) 2638 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 2639 else 2640 OS << "#ifdef GET_NEON_BUILTINS\n"; 2641 2642 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2643 Record *R = RV[i]; 2644 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2645 if (k != OpNone) 2646 continue; 2647 2648 std::string Proto = R->getValueAsString("Prototype"); 2649 std::string name = R->getValueAsString("Name"); 2650 std::string Rename = name + "@" + Proto; 2651 2652 // Functions with 'a' (the splat code) in the type prototype should not get 2653 // their own builtin as they use the non-splat variant. 2654 if (Proto.find('a') != std::string::npos) 2655 continue; 2656 2657 std::string Types = R->getValueAsString("Types"); 2658 SmallVector<StringRef, 16> TypeVec; 2659 ParseTypes(R, Types, TypeVec); 2660 2661 if (R->getSuperClasses().size() < 2) 2662 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2663 2664 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2665 2666 // Do not include AArch64 BUILTIN() macros if not generating 2667 // code for AArch64 2668 bool isA64 = R->getValueAsBit("isA64"); 2669 if (!isA64GenBuiltinDef && isA64) 2670 continue; 2671 2672 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 2673 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2674 // redefined in AArch64 to handle an additional 2 x f64 type. 2675 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 2676 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2677 if (A64CK == ck && ck != ClassNone) 2678 continue; 2679 } 2680 2681 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2682 // Generate the declaration for this builtin, ensuring 2683 // that each unique BUILTIN() macro appears only once in the output 2684 // stream. 2685 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 2686 if (EmittedMap.count(bd)) 2687 continue; 2688 2689 EmittedMap[bd] = OpNone; 2690 OS << bd << "\n"; 2691 } 2692 } 2693 OS << "#endif\n\n"; 2694 } 2695 2696 /// runHeader - Emit a file with sections defining: 2697 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2698 /// 2. the SemaChecking code for the type overload checking. 2699 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2700 void NeonEmitter::runHeader(raw_ostream &OS) { 2701 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2702 2703 // build a map of AArch64 intriniscs to be used in uniqueness checks. 2704 StringMap<ClassKind> A64IntrinsicMap; 2705 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2706 Record *R = RV[i]; 2707 2708 bool isA64 = R->getValueAsBit("isA64"); 2709 if (!isA64) 2710 continue; 2711 2712 ClassKind CK = ClassNone; 2713 if (R->getSuperClasses().size() >= 2) 2714 CK = ClassMap[R->getSuperClasses()[1]]; 2715 2716 std::string Name = R->getValueAsString("Name"); 2717 std::string Proto = R->getValueAsString("Prototype"); 2718 std::string Rename = Name + "@" + Proto; 2719 if (A64IntrinsicMap.count(Rename)) 2720 continue; 2721 A64IntrinsicMap[Rename] = CK; 2722 } 2723 2724 // Generate BuiltinsARM.def for ARM 2725 genBuiltinsDef(OS, A64IntrinsicMap, false); 2726 2727 // Generate BuiltinsAArch64.def for AArch64 2728 genBuiltinsDef(OS, A64IntrinsicMap, true); 2729 2730 // Generate ARM overloaded type checking code for SemaChecking.cpp 2731 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 2732 2733 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 2734 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 2735 2736 // Generate ARM range checking code for shift/lane immediates. 2737 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 2738 2739 // Generate the AArch64 range checking code for shift/lane immediates. 2740 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 2741 } 2742 2743 /// GenTest - Write out a test for the intrinsic specified by the name and 2744 /// type strings, including the embedded patterns for FileCheck to match. 2745 static std::string GenTest(const std::string &name, 2746 const std::string &proto, 2747 StringRef outTypeStr, StringRef inTypeStr, 2748 bool isShift, bool isHiddenLOp, 2749 ClassKind ck, const std::string &InstName, 2750 bool isA64, 2751 std::string & testFuncProto) { 2752 assert(!proto.empty() && ""); 2753 std::string s; 2754 2755 // Function name with type suffix 2756 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2757 if (outTypeStr != inTypeStr) { 2758 // If the input type is different (e.g., for vreinterpret), append a suffix 2759 // for the input type. String off a "Q" (quad) prefix so that MangleName 2760 // does not insert another "q" in the name. 2761 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2762 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2763 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2764 } 2765 2766 // todo: GenerateChecksForIntrinsic does not generate CHECK 2767 // for aarch64 instructions yet 2768 std::vector<std::string> FileCheckPatterns; 2769 if (!isA64) { 2770 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 2771 isHiddenLOp, FileCheckPatterns); 2772 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 2773 } 2774 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 2775 2776 // Emit the FileCheck patterns. 2777 // If for any reason we do not want to emit a check, mangledInst 2778 // will be the empty string. 2779 if (FileCheckPatterns.size()) { 2780 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 2781 e = FileCheckPatterns.end(); 2782 i != e; 2783 ++i) { 2784 s += "// CHECK_ARM: " + *i + "\n"; 2785 } 2786 } 2787 2788 // Emit the start of the test function. 2789 2790 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 2791 char arg = 'a'; 2792 std::string comma; 2793 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2794 // Do not create arguments for values that must be immediate constants. 2795 if (proto[i] == 'i') 2796 continue; 2797 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 2798 testFuncProto.push_back(arg); 2799 comma = ", "; 2800 } 2801 testFuncProto += ")"; 2802 2803 s+= testFuncProto; 2804 s+= " {\n "; 2805 2806 if (proto[0] != 'v') 2807 s += "return "; 2808 s += mangledName + "("; 2809 arg = 'a'; 2810 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2811 if (proto[i] == 'i') { 2812 // For immediate operands, test the maximum value. 2813 if (isShift) 2814 s += "1"; // FIXME 2815 else 2816 // The immediate generally refers to a lane in the preceding argument. 2817 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 2818 } else { 2819 s.push_back(arg); 2820 } 2821 if ((i + 1) < e) 2822 s += ", "; 2823 } 2824 s += ");\n}\n\n"; 2825 return s; 2826 } 2827 2828 /// Write out all intrinsic tests for the specified target, checking 2829 /// for intrinsic test uniqueness. 2830 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 2831 bool isA64GenTest) { 2832 if (isA64GenTest) 2833 OS << "#ifdef __aarch64__\n"; 2834 2835 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2836 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2837 Record *R = RV[i]; 2838 std::string name = R->getValueAsString("Name"); 2839 std::string Proto = R->getValueAsString("Prototype"); 2840 std::string Types = R->getValueAsString("Types"); 2841 bool isShift = R->getValueAsBit("isShift"); 2842 std::string InstName = R->getValueAsString("InstName"); 2843 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 2844 bool isA64 = R->getValueAsBit("isA64"); 2845 2846 // do not include AArch64 intrinsic test if not generating 2847 // code for AArch64 2848 if (!isA64GenTest && isA64) 2849 continue; 2850 2851 SmallVector<StringRef, 16> TypeVec; 2852 ParseTypes(R, Types, TypeVec); 2853 2854 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2855 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2856 if (kind == OpUnavailable) 2857 continue; 2858 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2859 if (kind == OpReinterpret) { 2860 bool outQuad = false; 2861 bool dummy = false; 2862 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2863 for (unsigned srcti = 0, srcte = TypeVec.size(); 2864 srcti != srcte; ++srcti) { 2865 bool inQuad = false; 2866 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2867 if (srcti == ti || inQuad != outQuad) 2868 continue; 2869 std::string testFuncProto; 2870 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 2871 isShift, isHiddenLOp, ck, InstName, isA64, 2872 testFuncProto); 2873 if (EmittedMap.count(testFuncProto)) 2874 continue; 2875 EmittedMap[testFuncProto] = kind; 2876 OS << s << "\n"; 2877 } 2878 } else { 2879 std::string testFuncProto; 2880 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 2881 isHiddenLOp, ck, InstName, isA64, testFuncProto); 2882 if (EmittedMap.count(testFuncProto)) 2883 continue; 2884 EmittedMap[testFuncProto] = kind; 2885 OS << s << "\n"; 2886 } 2887 } 2888 } 2889 2890 if (isA64GenTest) 2891 OS << "#endif\n"; 2892 } 2893 /// runTests - Write out a complete set of tests for all of the Neon 2894 /// intrinsics. 2895 void NeonEmitter::runTests(raw_ostream &OS) { 2896 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 2897 "apcs-gnu\\\n" 2898 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 2899 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 2900 "\n" 2901 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 2902 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 2903 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 2904 "\n" 2905 "// REQUIRES: long_tests\n" 2906 "\n" 2907 "#include <arm_neon.h>\n" 2908 "\n"; 2909 2910 // ARM tests must be emitted before AArch64 tests to ensure 2911 // tests for intrinsics that are common to ARM and AArch64 2912 // appear only once in the output stream. 2913 // The check for uniqueness is done in genTargetTest. 2914 StringMap<OpKind> EmittedMap; 2915 2916 genTargetTest(OS, EmittedMap, false); 2917 2918 genTargetTest(OS, EmittedMap, true); 2919 } 2920 2921 namespace clang { 2922 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2923 NeonEmitter(Records).run(OS); 2924 } 2925 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2926 NeonEmitter(Records).runHeader(OS); 2927 } 2928 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2929 NeonEmitter(Records).runTests(OS); 2930 } 2931 } // End namespace clang 2932