1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. A complete set of tests 22 // for Neon intrinsics can be generated by calling the runTests() entry point. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/StringMap.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/TableGen/Error.h" 33 #include "llvm/TableGen/Record.h" 34 #include "llvm/TableGen/TableGenBackend.h" 35 #include <string> 36 using namespace llvm; 37 38 enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMlalHi, 56 OpMls, 57 OpMlsl, 58 OpMlslHi, 59 OpMulN, 60 OpMlaN, 61 OpMlsN, 62 OpMlalN, 63 OpMlslN, 64 OpMulLane, 65 OpMulXLane, 66 OpMullLane, 67 OpMullHiLane, 68 OpMlaLane, 69 OpMlsLane, 70 OpMlalLane, 71 OpMlalHiLane, 72 OpMlslLane, 73 OpMlslHiLane, 74 OpQDMullLane, 75 OpQDMullHiLane, 76 OpQDMlalLane, 77 OpQDMlalHiLane, 78 OpQDMlslLane, 79 OpQDMlslHiLane, 80 OpQDMulhLane, 81 OpQRDMulhLane, 82 OpFMSLane, 83 OpFMSLaneQ, 84 OpEq, 85 OpGe, 86 OpLe, 87 OpGt, 88 OpLt, 89 OpNeg, 90 OpNot, 91 OpAnd, 92 OpOr, 93 OpXor, 94 OpAndNot, 95 OpOrNot, 96 OpCast, 97 OpConcat, 98 OpDup, 99 OpDupLane, 100 OpHi, 101 OpLo, 102 OpSelect, 103 OpRev16, 104 OpRev32, 105 OpRev64, 106 OpReinterpret, 107 OpAddhnHi, 108 OpRAddhnHi, 109 OpSubhnHi, 110 OpRSubhnHi, 111 OpAbdl, 112 OpAbdlHi, 113 OpAba, 114 OpAbal, 115 OpAbalHi, 116 OpQDMullHi, 117 OpQDMlalHi, 118 OpQDMlslHi, 119 OpDiv, 120 OpLongHi, 121 OpNarrowHi, 122 OpMovlHi, 123 OpCopy 124 }; 125 126 enum ClassKind { 127 ClassNone, 128 ClassI, // generic integer instruction, e.g., "i8" suffix 129 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 130 ClassW, // width-specific instruction, e.g., "8" suffix 131 ClassB, // bitcast arguments with enum argument to specify type 132 ClassL, // Logical instructions which are op instructions 133 // but we need to not emit any suffix for in our 134 // tests. 135 ClassNoTest // Instructions which we do not test since they are 136 // not TRUE instructions. 137 }; 138 139 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 140 /// builtins. These must be kept in sync with the flags in 141 /// include/clang/Basic/TargetBuiltins.h. 142 namespace { 143 class NeonTypeFlags { 144 enum { 145 EltTypeMask = 0xf, 146 UnsignedFlag = 0x10, 147 QuadFlag = 0x20 148 }; 149 uint32_t Flags; 150 151 public: 152 enum EltType { 153 Int8, 154 Int16, 155 Int32, 156 Int64, 157 Poly8, 158 Poly16, 159 Float16, 160 Float32, 161 Float64 162 }; 163 164 NeonTypeFlags(unsigned F) : Flags(F) {} 165 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 166 if (IsUnsigned) 167 Flags |= UnsignedFlag; 168 if (IsQuad) 169 Flags |= QuadFlag; 170 } 171 172 uint32_t getFlags() const { return Flags; } 173 }; 174 } // end anonymous namespace 175 176 namespace { 177 class NeonEmitter { 178 RecordKeeper &Records; 179 StringMap<OpKind> OpMap; 180 DenseMap<Record*, ClassKind> ClassMap; 181 182 public: 183 NeonEmitter(RecordKeeper &R) : Records(R) { 184 OpMap["OP_NONE"] = OpNone; 185 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 186 OpMap["OP_ADD"] = OpAdd; 187 OpMap["OP_ADDL"] = OpAddl; 188 OpMap["OP_ADDLHi"] = OpAddlHi; 189 OpMap["OP_ADDW"] = OpAddw; 190 OpMap["OP_ADDWHi"] = OpAddwHi; 191 OpMap["OP_SUB"] = OpSub; 192 OpMap["OP_SUBL"] = OpSubl; 193 OpMap["OP_SUBLHi"] = OpSublHi; 194 OpMap["OP_SUBW"] = OpSubw; 195 OpMap["OP_SUBWHi"] = OpSubwHi; 196 OpMap["OP_MUL"] = OpMul; 197 OpMap["OP_MLA"] = OpMla; 198 OpMap["OP_MLAL"] = OpMlal; 199 OpMap["OP_MULLHi"] = OpMullHi; 200 OpMap["OP_MLALHi"] = OpMlalHi; 201 OpMap["OP_MLS"] = OpMls; 202 OpMap["OP_MLSL"] = OpMlsl; 203 OpMap["OP_MLSLHi"] = OpMlslHi; 204 OpMap["OP_MUL_N"] = OpMulN; 205 OpMap["OP_MLA_N"] = OpMlaN; 206 OpMap["OP_MLS_N"] = OpMlsN; 207 OpMap["OP_MLAL_N"] = OpMlalN; 208 OpMap["OP_MLSL_N"] = OpMlslN; 209 OpMap["OP_MUL_LN"]= OpMulLane; 210 OpMap["OP_MULX_LN"]= OpMulXLane; 211 OpMap["OP_MULL_LN"] = OpMullLane; 212 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 213 OpMap["OP_MLA_LN"]= OpMlaLane; 214 OpMap["OP_MLS_LN"]= OpMlsLane; 215 OpMap["OP_MLAL_LN"] = OpMlalLane; 216 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 217 OpMap["OP_MLSL_LN"] = OpMlslLane; 218 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 219 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 220 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 221 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 222 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 223 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 224 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 225 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 226 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 227 OpMap["OP_FMS_LN"] = OpFMSLane; 228 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 229 OpMap["OP_EQ"] = OpEq; 230 OpMap["OP_GE"] = OpGe; 231 OpMap["OP_LE"] = OpLe; 232 OpMap["OP_GT"] = OpGt; 233 OpMap["OP_LT"] = OpLt; 234 OpMap["OP_NEG"] = OpNeg; 235 OpMap["OP_NOT"] = OpNot; 236 OpMap["OP_AND"] = OpAnd; 237 OpMap["OP_OR"] = OpOr; 238 OpMap["OP_XOR"] = OpXor; 239 OpMap["OP_ANDN"] = OpAndNot; 240 OpMap["OP_ORN"] = OpOrNot; 241 OpMap["OP_CAST"] = OpCast; 242 OpMap["OP_CONC"] = OpConcat; 243 OpMap["OP_HI"] = OpHi; 244 OpMap["OP_LO"] = OpLo; 245 OpMap["OP_DUP"] = OpDup; 246 OpMap["OP_DUP_LN"] = OpDupLane; 247 OpMap["OP_SEL"] = OpSelect; 248 OpMap["OP_REV16"] = OpRev16; 249 OpMap["OP_REV32"] = OpRev32; 250 OpMap["OP_REV64"] = OpRev64; 251 OpMap["OP_REINT"] = OpReinterpret; 252 OpMap["OP_ADDHNHi"] = OpAddhnHi; 253 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 254 OpMap["OP_SUBHNHi"] = OpSubhnHi; 255 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 256 OpMap["OP_ABDL"] = OpAbdl; 257 OpMap["OP_ABDLHi"] = OpAbdlHi; 258 OpMap["OP_ABA"] = OpAba; 259 OpMap["OP_ABAL"] = OpAbal; 260 OpMap["OP_ABALHi"] = OpAbalHi; 261 OpMap["OP_QDMULLHi"] = OpQDMullHi; 262 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 263 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 264 OpMap["OP_DIV"] = OpDiv; 265 OpMap["OP_LONG_HI"] = OpLongHi; 266 OpMap["OP_NARROW_HI"] = OpNarrowHi; 267 OpMap["OP_MOVL_HI"] = OpMovlHi; 268 OpMap["OP_COPY"] = OpCopy; 269 270 Record *SI = R.getClass("SInst"); 271 Record *II = R.getClass("IInst"); 272 Record *WI = R.getClass("WInst"); 273 Record *SOpI = R.getClass("SOpInst"); 274 Record *IOpI = R.getClass("IOpInst"); 275 Record *WOpI = R.getClass("WOpInst"); 276 Record *LOpI = R.getClass("LOpInst"); 277 Record *NoTestOpI = R.getClass("NoTestOpInst"); 278 279 ClassMap[SI] = ClassS; 280 ClassMap[II] = ClassI; 281 ClassMap[WI] = ClassW; 282 ClassMap[SOpI] = ClassS; 283 ClassMap[IOpI] = ClassI; 284 ClassMap[WOpI] = ClassW; 285 ClassMap[LOpI] = ClassL; 286 ClassMap[NoTestOpI] = ClassNoTest; 287 } 288 289 // run - Emit arm_neon.h.inc 290 void run(raw_ostream &o); 291 292 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 293 void runHeader(raw_ostream &o); 294 295 // runTests - Emit tests for all the Neon intrinsics. 296 void runTests(raw_ostream &o); 297 298 private: 299 void emitIntrinsic(raw_ostream &OS, Record *R, 300 StringMap<ClassKind> &EmittedMap); 301 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 302 bool isA64GenBuiltinDef); 303 void genOverloadTypeCheckCode(raw_ostream &OS, 304 StringMap<ClassKind> &A64IntrinsicMap, 305 bool isA64TypeCheck); 306 void genIntrinsicRangeCheckCode(raw_ostream &OS, 307 StringMap<ClassKind> &A64IntrinsicMap, 308 bool isA64RangeCheck); 309 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 310 bool isA64TestGen); 311 }; 312 } // end anonymous namespace 313 314 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 315 /// which each StringRef representing a single type declared in the string. 316 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 317 /// 2xfloat and 4xfloat respectively. 318 static void ParseTypes(Record *r, std::string &s, 319 SmallVectorImpl<StringRef> &TV) { 320 const char *data = s.data(); 321 int len = 0; 322 323 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 324 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 325 || data[len] == 'H' || data[len] == 'S') 326 continue; 327 328 switch (data[len]) { 329 case 'c': 330 case 's': 331 case 'i': 332 case 'l': 333 case 'h': 334 case 'f': 335 case 'd': 336 break; 337 default: 338 PrintFatalError(r->getLoc(), 339 "Unexpected letter: " + std::string(data + len, 1)); 340 } 341 TV.push_back(StringRef(data, len + 1)); 342 data += len + 1; 343 len = -1; 344 } 345 } 346 347 /// Widen - Convert a type code into the next wider type. char -> short, 348 /// short -> int, etc. 349 static char Widen(const char t) { 350 switch (t) { 351 case 'c': 352 return 's'; 353 case 's': 354 return 'i'; 355 case 'i': 356 return 'l'; 357 case 'h': 358 return 'f'; 359 default: 360 PrintFatalError("unhandled type in widen!"); 361 } 362 } 363 364 /// Narrow - Convert a type code into the next smaller type. short -> char, 365 /// float -> half float, etc. 366 static char Narrow(const char t) { 367 switch (t) { 368 case 's': 369 return 'c'; 370 case 'i': 371 return 's'; 372 case 'l': 373 return 'i'; 374 case 'f': 375 return 'h'; 376 default: 377 PrintFatalError("unhandled type in narrow!"); 378 } 379 } 380 381 static std::string GetNarrowTypestr(StringRef ty) 382 { 383 std::string s; 384 for (size_t i = 0, end = ty.size(); i < end; i++) { 385 switch (ty[i]) { 386 case 's': 387 s += 'c'; 388 break; 389 case 'i': 390 s += 's'; 391 break; 392 case 'l': 393 s += 'i'; 394 break; 395 default: 396 s += ty[i]; 397 break; 398 } 399 } 400 401 return s; 402 } 403 404 /// For a particular StringRef, return the base type code, and whether it has 405 /// the quad-vector, polynomial, or unsigned modifiers set. 406 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 407 unsigned off = 0; 408 // ignore scalar. 409 if (ty[off] == 'S') { 410 ++off; 411 } 412 // remember quad. 413 if (ty[off] == 'Q' || ty[off] == 'H') { 414 quad = true; 415 ++off; 416 } 417 418 // remember poly. 419 if (ty[off] == 'P') { 420 poly = true; 421 ++off; 422 } 423 424 // remember unsigned. 425 if (ty[off] == 'U') { 426 usgn = true; 427 ++off; 428 } 429 430 // base type to get the type string for. 431 return ty[off]; 432 } 433 434 /// ModType - Transform a type code and its modifiers based on a mod code. The 435 /// mod code definitions may be found at the top of arm_neon.td. 436 static char ModType(const char mod, char type, bool &quad, bool &poly, 437 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 438 switch (mod) { 439 case 't': 440 if (poly) { 441 poly = false; 442 usgn = true; 443 } 444 break; 445 case 'u': 446 usgn = true; 447 poly = false; 448 if (type == 'f') 449 type = 'i'; 450 if (type == 'd') 451 type = 'l'; 452 break; 453 case 'x': 454 usgn = false; 455 poly = false; 456 if (type == 'f') 457 type = 'i'; 458 if (type == 'd') 459 type = 'l'; 460 break; 461 case 'o': 462 scal = true; 463 type = 'd'; 464 usgn = false; 465 break; 466 case 'y': 467 scal = true; 468 case 'f': 469 if (type == 'h') 470 quad = true; 471 type = 'f'; 472 usgn = false; 473 break; 474 case 'g': 475 quad = false; 476 break; 477 case 'j': 478 quad = true; 479 break; 480 case 'w': 481 type = Widen(type); 482 quad = true; 483 break; 484 case 'n': 485 type = Widen(type); 486 break; 487 case 'i': 488 type = 'i'; 489 scal = true; 490 break; 491 case 'l': 492 type = 'l'; 493 scal = true; 494 usgn = true; 495 break; 496 case 'z': 497 type = Narrow(type); 498 scal = true; 499 break; 500 case 'r': 501 type = Widen(type); 502 scal = true; 503 break; 504 case 's': 505 case 'a': 506 scal = true; 507 break; 508 case 'k': 509 quad = true; 510 break; 511 case 'c': 512 cnst = true; 513 case 'p': 514 pntr = true; 515 scal = true; 516 break; 517 case 'h': 518 type = Narrow(type); 519 if (type == 'h') 520 quad = false; 521 break; 522 case 'q': 523 type = Narrow(type); 524 quad = true; 525 break; 526 case 'e': 527 type = Narrow(type); 528 usgn = true; 529 break; 530 case 'm': 531 type = Narrow(type); 532 quad = false; 533 break; 534 default: 535 break; 536 } 537 return type; 538 } 539 540 /// TypeString - for a modifier and type, generate the name of the typedef for 541 /// that type. QUc -> uint8x8_t. 542 static std::string TypeString(const char mod, StringRef typestr) { 543 bool quad = false; 544 bool poly = false; 545 bool usgn = false; 546 bool scal = false; 547 bool cnst = false; 548 bool pntr = false; 549 550 if (mod == 'v') 551 return "void"; 552 if (mod == 'i') 553 return "int"; 554 555 // base type to get the type string for. 556 char type = ClassifyType(typestr, quad, poly, usgn); 557 558 // Based on the modifying character, change the type and width if necessary. 559 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 560 561 SmallString<128> s; 562 563 if (usgn) 564 s.push_back('u'); 565 566 switch (type) { 567 case 'c': 568 s += poly ? "poly8" : "int8"; 569 if (scal) 570 break; 571 s += quad ? "x16" : "x8"; 572 break; 573 case 's': 574 s += poly ? "poly16" : "int16"; 575 if (scal) 576 break; 577 s += quad ? "x8" : "x4"; 578 break; 579 case 'i': 580 s += "int32"; 581 if (scal) 582 break; 583 s += quad ? "x4" : "x2"; 584 break; 585 case 'l': 586 s += "int64"; 587 if (scal) 588 break; 589 s += quad ? "x2" : "x1"; 590 break; 591 case 'h': 592 s += "float16"; 593 if (scal) 594 break; 595 s += quad ? "x8" : "x4"; 596 break; 597 case 'f': 598 s += "float32"; 599 if (scal) 600 break; 601 s += quad ? "x4" : "x2"; 602 break; 603 case 'd': 604 s += "float64"; 605 if (scal) 606 break; 607 s += quad ? "x2" : "x1"; 608 break; 609 610 default: 611 PrintFatalError("unhandled type!"); 612 } 613 614 if (mod == '2') 615 s += "x2"; 616 if (mod == '3') 617 s += "x3"; 618 if (mod == '4') 619 s += "x4"; 620 621 // Append _t, finishing the type string typedef type. 622 s += "_t"; 623 624 if (cnst) 625 s += " const"; 626 627 if (pntr) 628 s += " *"; 629 630 return s.str(); 631 } 632 633 /// BuiltinTypeString - for a modifier and type, generate the clang 634 /// BuiltinsARM.def prototype code for the function. See the top of clang's 635 /// Builtins.def for a description of the type strings. 636 static std::string BuiltinTypeString(const char mod, StringRef typestr, 637 ClassKind ck, bool ret) { 638 bool quad = false; 639 bool poly = false; 640 bool usgn = false; 641 bool scal = false; 642 bool cnst = false; 643 bool pntr = false; 644 645 if (mod == 'v') 646 return "v"; // void 647 if (mod == 'i') 648 return "i"; // int 649 650 // base type to get the type string for. 651 char type = ClassifyType(typestr, quad, poly, usgn); 652 653 // Based on the modifying character, change the type and width if necessary. 654 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 655 656 // All pointers are void* pointers. Change type to 'v' now. 657 if (pntr) { 658 usgn = false; 659 poly = false; 660 type = 'v'; 661 } 662 // Treat half-float ('h') types as unsigned short ('s') types. 663 if (type == 'h') { 664 type = 's'; 665 usgn = true; 666 } 667 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 668 scal && type != 'f' && type != 'd'); 669 670 if (scal) { 671 SmallString<128> s; 672 673 if (usgn) 674 s.push_back('U'); 675 else if (type == 'c') 676 s.push_back('S'); // make chars explicitly signed 677 678 if (type == 'l') // 64-bit long 679 s += "LLi"; 680 else 681 s.push_back(type); 682 683 if (cnst) 684 s.push_back('C'); 685 if (pntr) 686 s.push_back('*'); 687 return s.str(); 688 } 689 690 // Since the return value must be one type, return a vector type of the 691 // appropriate width which we will bitcast. An exception is made for 692 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 693 // fashion, storing them to a pointer arg. 694 if (ret) { 695 if (mod >= '2' && mod <= '4') 696 return "vv*"; // void result with void* first argument 697 if (mod == 'f' || (ck != ClassB && type == 'f')) 698 return quad ? "V4f" : "V2f"; 699 if (ck != ClassB && type == 'd') 700 return quad ? "V2d" : "V1d"; 701 if (ck != ClassB && type == 's') 702 return quad ? "V8s" : "V4s"; 703 if (ck != ClassB && type == 'i') 704 return quad ? "V4i" : "V2i"; 705 if (ck != ClassB && type == 'l') 706 return quad ? "V2LLi" : "V1LLi"; 707 708 return quad ? "V16Sc" : "V8Sc"; 709 } 710 711 // Non-return array types are passed as individual vectors. 712 if (mod == '2') 713 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 714 if (mod == '3') 715 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 716 if (mod == '4') 717 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 718 719 if (mod == 'f' || (ck != ClassB && type == 'f')) 720 return quad ? "V4f" : "V2f"; 721 if (ck != ClassB && type == 'd') 722 return quad ? "V2d" : "V1d"; 723 if (ck != ClassB && type == 's') 724 return quad ? "V8s" : "V4s"; 725 if (ck != ClassB && type == 'i') 726 return quad ? "V4i" : "V2i"; 727 if (ck != ClassB && type == 'l') 728 return quad ? "V2LLi" : "V1LLi"; 729 730 return quad ? "V16Sc" : "V8Sc"; 731 } 732 733 /// InstructionTypeCode - Computes the ARM argument character code and 734 /// quad status for a specific type string and ClassKind. 735 static void InstructionTypeCode(const StringRef &typeStr, 736 const ClassKind ck, 737 bool &quad, 738 std::string &typeCode) { 739 bool poly = false; 740 bool usgn = false; 741 char type = ClassifyType(typeStr, quad, poly, usgn); 742 743 switch (type) { 744 case 'c': 745 switch (ck) { 746 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 747 case ClassI: typeCode = "i8"; break; 748 case ClassW: typeCode = "8"; break; 749 default: break; 750 } 751 break; 752 case 's': 753 switch (ck) { 754 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 755 case ClassI: typeCode = "i16"; break; 756 case ClassW: typeCode = "16"; break; 757 default: break; 758 } 759 break; 760 case 'i': 761 switch (ck) { 762 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 763 case ClassI: typeCode = "i32"; break; 764 case ClassW: typeCode = "32"; break; 765 default: break; 766 } 767 break; 768 case 'l': 769 switch (ck) { 770 case ClassS: typeCode = usgn ? "u64" : "s64"; break; 771 case ClassI: typeCode = "i64"; break; 772 case ClassW: typeCode = "64"; break; 773 default: break; 774 } 775 break; 776 case 'h': 777 switch (ck) { 778 case ClassS: 779 case ClassI: typeCode = "f16"; break; 780 case ClassW: typeCode = "16"; break; 781 default: break; 782 } 783 break; 784 case 'f': 785 switch (ck) { 786 case ClassS: 787 case ClassI: typeCode = "f32"; break; 788 case ClassW: typeCode = "32"; break; 789 default: break; 790 } 791 break; 792 case 'd': 793 switch (ck) { 794 case ClassS: 795 case ClassI: 796 typeCode += "f64"; 797 break; 798 case ClassW: 799 PrintFatalError("unhandled type!"); 800 default: 801 break; 802 } 803 break; 804 default: 805 PrintFatalError("unhandled type!"); 806 } 807 } 808 809 static char Insert_BHSD_Suffix(StringRef typestr){ 810 unsigned off = 0; 811 if(typestr[off++] == 'S'){ 812 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 813 typestr[off] == 'P' || typestr[off] == 'U') 814 ++off; 815 switch (typestr[off]){ 816 default : break; 817 case 'c' : return 'b'; 818 case 's' : return 'h'; 819 case 'i' : 820 case 'f' : return 's'; 821 case 'l' : 822 case 'd' : return 'd'; 823 } 824 } 825 return 0; 826 } 827 828 /// MangleName - Append a type or width suffix to a base neon function name, 829 /// and insert a 'q' in the appropriate location if type string starts with 'Q'. 830 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 831 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 832 static std::string MangleName(const std::string &name, StringRef typestr, 833 ClassKind ck) { 834 if (name == "vcvt_f32_f16") 835 return name; 836 837 bool quad = false; 838 std::string typeCode = ""; 839 840 InstructionTypeCode(typestr, ck, quad, typeCode); 841 842 std::string s = name; 843 844 if (typeCode.size() > 0) { 845 s += "_" + typeCode; 846 } 847 848 if (ck == ClassB) 849 s += "_v"; 850 851 // Insert a 'q' before the first '_' character so that it ends up before 852 // _lane or _n on vector-scalar operations. 853 if (typestr.find("Q") != StringRef::npos) { 854 size_t pos = s.find('_'); 855 s = s.insert(pos, "q"); 856 } 857 char ins = Insert_BHSD_Suffix(typestr); 858 if(ins){ 859 size_t pos = s.find('_'); 860 s = s.insert(pos, &ins, 1); 861 } 862 863 return s; 864 } 865 866 static void PreprocessInstruction(const StringRef &Name, 867 const std::string &InstName, 868 std::string &Prefix, 869 bool &HasNPostfix, 870 bool &HasLanePostfix, 871 bool &HasDupPostfix, 872 bool &IsSpecialVCvt, 873 size_t &TBNumber) { 874 // All of our instruction name fields from arm_neon.td are of the form 875 // <instructionname>_... 876 // Thus we grab our instruction name via computation of said Prefix. 877 const size_t PrefixEnd = Name.find_first_of('_'); 878 // If InstName is passed in, we use that instead of our name Prefix. 879 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 880 881 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 882 883 HasNPostfix = Postfix.count("_n"); 884 HasLanePostfix = Postfix.count("_lane"); 885 HasDupPostfix = Postfix.count("_dup"); 886 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 887 888 if (InstName.compare("vtbl") == 0 || 889 InstName.compare("vtbx") == 0) { 890 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 891 // encoding to get its true value. 892 TBNumber = Name[Name.size()-1] - 48; 893 } 894 } 895 896 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 897 /// extracted, generate a FileCheck pattern for a Load Or Store 898 static void 899 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 900 const std::string& OutTypeCode, 901 const bool &IsQuad, 902 const bool &HasDupPostfix, 903 const bool &HasLanePostfix, 904 const size_t Count, 905 std::string &RegisterSuffix) { 906 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 907 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 908 // will output a series of v{ld,st}1s, so we have to handle it specially. 909 if ((Count == 3 || Count == 4) && IsQuad) { 910 RegisterSuffix += "{"; 911 for (size_t i = 0; i < Count; i++) { 912 RegisterSuffix += "d{{[0-9]+}}"; 913 if (HasDupPostfix) { 914 RegisterSuffix += "[]"; 915 } 916 if (HasLanePostfix) { 917 RegisterSuffix += "[{{[0-9]+}}]"; 918 } 919 if (i < Count-1) { 920 RegisterSuffix += ", "; 921 } 922 } 923 RegisterSuffix += "}"; 924 } else { 925 926 // Handle normal loads and stores. 927 RegisterSuffix += "{"; 928 for (size_t i = 0; i < Count; i++) { 929 RegisterSuffix += "d{{[0-9]+}}"; 930 if (HasDupPostfix) { 931 RegisterSuffix += "[]"; 932 } 933 if (HasLanePostfix) { 934 RegisterSuffix += "[{{[0-9]+}}]"; 935 } 936 if (IsQuad && !HasLanePostfix) { 937 RegisterSuffix += ", d{{[0-9]+}}"; 938 if (HasDupPostfix) { 939 RegisterSuffix += "[]"; 940 } 941 } 942 if (i < Count-1) { 943 RegisterSuffix += ", "; 944 } 945 } 946 RegisterSuffix += "}, [r{{[0-9]+}}"; 947 948 // We only include the alignment hint if we have a vld1.*64 or 949 // a dup/lane instruction. 950 if (IsLDSTOne) { 951 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 952 RegisterSuffix += ":" + OutTypeCode; 953 } 954 } 955 956 RegisterSuffix += "]"; 957 } 958 } 959 960 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 961 const bool &HasNPostfix) { 962 return (NameRef.count("vmla") || 963 NameRef.count("vmlal") || 964 NameRef.count("vmlsl") || 965 NameRef.count("vmull") || 966 NameRef.count("vqdmlal") || 967 NameRef.count("vqdmlsl") || 968 NameRef.count("vqdmulh") || 969 NameRef.count("vqdmull") || 970 NameRef.count("vqrdmulh")) && HasNPostfix; 971 } 972 973 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 974 const bool &HasLanePostfix) { 975 return (NameRef.count("vmla") || 976 NameRef.count("vmls") || 977 NameRef.count("vmlal") || 978 NameRef.count("vmlsl") || 979 (NameRef.count("vmul") && NameRef.size() == 3)|| 980 NameRef.count("vqdmlal") || 981 NameRef.count("vqdmlsl") || 982 NameRef.count("vqdmulh") || 983 NameRef.count("vqrdmulh")) && HasLanePostfix; 984 } 985 986 static bool IsSpecialLaneMultiply(const StringRef &NameRef, 987 const bool &HasLanePostfix, 988 const bool &IsQuad) { 989 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 990 && IsQuad; 991 const bool IsVMull = NameRef.count("mull") && !IsQuad; 992 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 993 } 994 995 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 996 const std::string &Proto, 997 const bool &HasNPostfix, 998 const bool &IsQuad, 999 const bool &HasLanePostfix, 1000 const bool &HasDupPostfix, 1001 std::string &NormedProto) { 1002 // Handle generic case. 1003 const StringRef NameRef(Name); 1004 for (size_t i = 0, end = Proto.size(); i < end; i++) { 1005 switch (Proto[i]) { 1006 case 'u': 1007 case 'f': 1008 case 'd': 1009 case 's': 1010 case 'x': 1011 case 't': 1012 case 'n': 1013 NormedProto += IsQuad? 'q' : 'd'; 1014 break; 1015 case 'w': 1016 case 'k': 1017 NormedProto += 'q'; 1018 break; 1019 case 'g': 1020 case 'j': 1021 case 'h': 1022 case 'e': 1023 NormedProto += 'd'; 1024 break; 1025 case 'i': 1026 NormedProto += HasLanePostfix? 'a' : 'i'; 1027 break; 1028 case 'a': 1029 if (HasLanePostfix) { 1030 NormedProto += 'a'; 1031 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1032 NormedProto += IsQuad? 'q' : 'd'; 1033 } else { 1034 NormedProto += 'i'; 1035 } 1036 break; 1037 } 1038 } 1039 1040 // Handle Special Cases. 1041 const bool IsNotVExt = !NameRef.count("vext"); 1042 const bool IsVPADAL = NameRef.count("vpadal"); 1043 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1044 HasLanePostfix); 1045 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1046 IsQuad); 1047 1048 if (IsSpecialLaneMul) { 1049 // If 1050 NormedProto[2] = NormedProto[3]; 1051 NormedProto.erase(3); 1052 } else if (NormedProto.size() == 4 && 1053 NormedProto[0] == NormedProto[1] && 1054 IsNotVExt) { 1055 // If NormedProto.size() == 4 and the first two proto characters are the 1056 // same, ignore the first. 1057 NormedProto = NormedProto.substr(1, 3); 1058 } else if (Is5OpLaneAccum) { 1059 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1060 std::string tmp = NormedProto.substr(1,2); 1061 tmp += NormedProto[4]; 1062 NormedProto = tmp; 1063 } else if (IsVPADAL) { 1064 // If we have VPADAL, ignore the first character. 1065 NormedProto = NormedProto.substr(0, 2); 1066 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1067 // If our instruction is a dup instruction, keep only the first and 1068 // last characters. 1069 std::string tmp = ""; 1070 tmp += NormedProto[0]; 1071 tmp += NormedProto[NormedProto.size()-1]; 1072 NormedProto = tmp; 1073 } 1074 } 1075 1076 /// GenerateRegisterCheckPatterns - Given a bunch of data we have 1077 /// extracted, generate a FileCheck pattern to check that an 1078 /// instruction's arguments are correct. 1079 static void GenerateRegisterCheckPattern(const std::string &Name, 1080 const std::string &Proto, 1081 const std::string &OutTypeCode, 1082 const bool &HasNPostfix, 1083 const bool &IsQuad, 1084 const bool &HasLanePostfix, 1085 const bool &HasDupPostfix, 1086 const size_t &TBNumber, 1087 std::string &RegisterSuffix) { 1088 1089 RegisterSuffix = ""; 1090 1091 const StringRef NameRef(Name); 1092 const StringRef ProtoRef(Proto); 1093 1094 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1095 return; 1096 } 1097 1098 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1099 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1100 1101 if (IsLoadStore) { 1102 // Grab N value from v{ld,st}N using its ascii representation. 1103 const size_t Count = NameRef[3] - 48; 1104 1105 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1106 HasDupPostfix, HasLanePostfix, 1107 Count, RegisterSuffix); 1108 } else if (IsTBXOrTBL) { 1109 RegisterSuffix += "d{{[0-9]+}}, {"; 1110 for (size_t i = 0; i < TBNumber-1; i++) { 1111 RegisterSuffix += "d{{[0-9]+}}, "; 1112 } 1113 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1114 } else { 1115 // Handle a normal instruction. 1116 if (NameRef.count("vget") || NameRef.count("vset")) 1117 return; 1118 1119 // We first normalize our proto, since we only need to emit 4 1120 // different types of checks, yet have more than 4 proto types 1121 // that map onto those 4 patterns. 1122 std::string NormalizedProto(""); 1123 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1124 HasLanePostfix, HasDupPostfix, 1125 NormalizedProto); 1126 1127 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1128 const char &c = NormalizedProto[i]; 1129 switch (c) { 1130 case 'q': 1131 RegisterSuffix += "q{{[0-9]+}}, "; 1132 break; 1133 1134 case 'd': 1135 RegisterSuffix += "d{{[0-9]+}}, "; 1136 break; 1137 1138 case 'i': 1139 RegisterSuffix += "#{{[0-9]+}}, "; 1140 break; 1141 1142 case 'a': 1143 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1144 break; 1145 } 1146 } 1147 1148 // Remove extra ", ". 1149 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1150 } 1151 } 1152 1153 /// GenerateChecksForIntrinsic - Given a specific instruction name + 1154 /// typestr + class kind, generate the proper set of FileCheck 1155 /// Patterns to check for. We could just return a string, but instead 1156 /// use a vector since it provides us with the extra flexibility of 1157 /// emitting multiple checks, which comes in handy for certain cases 1158 /// like mla where we want to check for 2 different instructions. 1159 static void GenerateChecksForIntrinsic(const std::string &Name, 1160 const std::string &Proto, 1161 StringRef &OutTypeStr, 1162 StringRef &InTypeStr, 1163 ClassKind Ck, 1164 const std::string &InstName, 1165 bool IsHiddenLOp, 1166 std::vector<std::string>& Result) { 1167 1168 // If Ck is a ClassNoTest instruction, just return so no test is 1169 // emitted. 1170 if(Ck == ClassNoTest) 1171 return; 1172 1173 if (Name == "vcvt_f32_f16") { 1174 Result.push_back("vcvt.f32.f16"); 1175 return; 1176 } 1177 1178 1179 // Now we preprocess our instruction given the data we have to get the 1180 // data that we need. 1181 // Create a StringRef for String Manipulation of our Name. 1182 const StringRef NameRef(Name); 1183 // Instruction Prefix. 1184 std::string Prefix; 1185 // The type code for our out type string. 1186 std::string OutTypeCode; 1187 // To handle our different cases, we need to check for different postfixes. 1188 // Is our instruction a quad instruction. 1189 bool IsQuad = false; 1190 // Our instruction is of the form <instructionname>_n. 1191 bool HasNPostfix = false; 1192 // Our instruction is of the form <instructionname>_lane. 1193 bool HasLanePostfix = false; 1194 // Our instruction is of the form <instructionname>_dup. 1195 bool HasDupPostfix = false; 1196 // Our instruction is a vcvt instruction which requires special handling. 1197 bool IsSpecialVCvt = false; 1198 // If we have a vtbxN or vtblN instruction, this is set to N. 1199 size_t TBNumber = -1; 1200 // Register Suffix 1201 std::string RegisterSuffix; 1202 1203 PreprocessInstruction(NameRef, InstName, Prefix, 1204 HasNPostfix, HasLanePostfix, HasDupPostfix, 1205 IsSpecialVCvt, TBNumber); 1206 1207 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1208 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1209 HasLanePostfix, HasDupPostfix, TBNumber, 1210 RegisterSuffix); 1211 1212 // In the following section, we handle a bunch of special cases. You can tell 1213 // a special case by the fact we are returning early. 1214 1215 // If our instruction is a logical instruction without postfix or a 1216 // hidden LOp just return the current Prefix. 1217 if (Ck == ClassL || IsHiddenLOp) { 1218 Result.push_back(Prefix + " " + RegisterSuffix); 1219 return; 1220 } 1221 1222 // If we have a vmov, due to the many different cases, some of which 1223 // vary within the different intrinsics generated for a single 1224 // instruction type, just output a vmov. (e.g. given an instruction 1225 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1226 // 1227 // FIXME: Maybe something can be done about this. The two cases that we care 1228 // about are vmov as an LType and vmov as a WType. 1229 if (Prefix == "vmov") { 1230 Result.push_back(Prefix + " " + RegisterSuffix); 1231 return; 1232 } 1233 1234 // In the following section, we handle special cases. 1235 1236 if (OutTypeCode == "64") { 1237 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1238 // type, the intrinsic will be optimized away, so just return 1239 // nothing. On the other hand if we are handling an uint64x2_t 1240 // (i.e. quad instruction), vdup/vmov instructions should be 1241 // emitted. 1242 if (Prefix == "vdup" || Prefix == "vext") { 1243 if (IsQuad) { 1244 Result.push_back("{{vmov|vdup}}"); 1245 } 1246 return; 1247 } 1248 1249 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1250 // multiple register operands. 1251 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1252 || Prefix == "vld4"; 1253 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1254 || Prefix == "vst4"; 1255 if (MultiLoadPrefix || MultiStorePrefix) { 1256 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1257 return; 1258 } 1259 1260 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1261 // emitting said instructions. So return a check for 1262 // vldr/vstr/vmov/str instead. 1263 if (HasLanePostfix || HasDupPostfix) { 1264 if (Prefix == "vst1") { 1265 Result.push_back("{{str|vstr|vmov}}"); 1266 return; 1267 } else if (Prefix == "vld1") { 1268 Result.push_back("{{ldr|vldr|vmov}}"); 1269 return; 1270 } 1271 } 1272 } 1273 1274 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1275 // sometimes disassembled as vtrn.32. We use a regex to handle both 1276 // cases. 1277 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1278 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1279 return; 1280 } 1281 1282 // Currently on most ARM processors, we do not use vmla/vmls for 1283 // quad floating point operations. Instead we output vmul + vadd. So 1284 // check if we have one of those instructions and just output a 1285 // check for vmul. 1286 if (OutTypeCode == "f32") { 1287 if (Prefix == "vmls") { 1288 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1289 Result.push_back("vsub." + OutTypeCode); 1290 return; 1291 } else if (Prefix == "vmla") { 1292 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1293 Result.push_back("vadd." + OutTypeCode); 1294 return; 1295 } 1296 } 1297 1298 // If we have vcvt, get the input type from the instruction name 1299 // (which should be of the form instname_inputtype) and append it 1300 // before the output type. 1301 if (Prefix == "vcvt") { 1302 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1303 Prefix += "." + inTypeCode; 1304 } 1305 1306 // Append output type code to get our final mangled instruction. 1307 Prefix += "." + OutTypeCode; 1308 1309 Result.push_back(Prefix + " " + RegisterSuffix); 1310 } 1311 1312 /// UseMacro - Examine the prototype string to determine if the intrinsic 1313 /// should be defined as a preprocessor macro instead of an inline function. 1314 static bool UseMacro(const std::string &proto) { 1315 // If this builtin takes an immediate argument, we need to #define it rather 1316 // than use a standard declaration, so that SemaChecking can range check 1317 // the immediate passed by the user. 1318 if (proto.find('i') != std::string::npos) 1319 return true; 1320 1321 // Pointer arguments need to use macros to avoid hiding aligned attributes 1322 // from the pointer type. 1323 if (proto.find('p') != std::string::npos || 1324 proto.find('c') != std::string::npos) 1325 return true; 1326 1327 return false; 1328 } 1329 1330 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1331 /// defined as a macro should be accessed directly instead of being first 1332 /// assigned to a local temporary. 1333 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1334 // True for constant ints (i), pointers (p) and const pointers (c). 1335 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1336 } 1337 1338 // Generate the string "(argtype a, argtype b, ...)" 1339 static std::string GenArgs(const std::string &proto, StringRef typestr, 1340 const std::string &name) { 1341 bool define = UseMacro(proto); 1342 char arg = 'a'; 1343 1344 std::string s; 1345 s += "("; 1346 1347 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1348 if (define) { 1349 // Some macro arguments are used directly instead of being assigned 1350 // to local temporaries; prepend an underscore prefix to make their 1351 // names consistent with the local temporaries. 1352 if (MacroArgUsedDirectly(proto, i)) 1353 s += "__"; 1354 } else { 1355 s += TypeString(proto[i], typestr) + " __"; 1356 } 1357 s.push_back(arg); 1358 //To avoid argument being multiple defined, add extra number for renaming. 1359 if (name == "vcopy_lane") 1360 s.push_back('1'); 1361 if ((i + 1) < e) 1362 s += ", "; 1363 } 1364 1365 s += ")"; 1366 return s; 1367 } 1368 1369 // Macro arguments are not type-checked like inline function arguments, so 1370 // assign them to local temporaries to get the right type checking. 1371 static std::string GenMacroLocals(const std::string &proto, StringRef typestr, 1372 const std::string &name ) { 1373 char arg = 'a'; 1374 std::string s; 1375 bool generatedLocal = false; 1376 1377 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1378 // Do not create a temporary for an immediate argument. 1379 // That would defeat the whole point of using a macro! 1380 if (MacroArgUsedDirectly(proto, i)) 1381 continue; 1382 generatedLocal = true; 1383 bool extranumber = false; 1384 if(name == "vcopy_lane") 1385 extranumber = true; 1386 1387 s += TypeString(proto[i], typestr) + " __"; 1388 s.push_back(arg); 1389 if(extranumber) 1390 s.push_back('1'); 1391 s += " = ("; 1392 s.push_back(arg); 1393 if(extranumber) 1394 s.push_back('1'); 1395 s += "); "; 1396 } 1397 1398 if (generatedLocal) 1399 s += "\\\n "; 1400 return s; 1401 } 1402 1403 // Use the vmovl builtin to sign-extend or zero-extend a vector. 1404 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1405 std::string s, high; 1406 high = h ? "_high" : ""; 1407 s = MangleName("vmovl" + high, typestr, ClassS); 1408 s += "(" + a + ")"; 1409 return s; 1410 } 1411 1412 // Get the high 64-bit part of a vector 1413 static std::string GetHigh(const std::string &a, StringRef typestr) { 1414 std::string s; 1415 s = MangleName("vget_high", typestr, ClassS); 1416 s += "(" + a + ")"; 1417 return s; 1418 } 1419 1420 // Gen operation with two operands and get high 64-bit for both of two operands. 1421 static std::string Gen2OpWith2High(StringRef typestr, 1422 const std::string &op, 1423 const std::string &a, 1424 const std::string &b) { 1425 std::string s; 1426 std::string Op1 = GetHigh(a, typestr); 1427 std::string Op2 = GetHigh(b, typestr); 1428 s = MangleName(op, typestr, ClassS); 1429 s += "(" + Op1 + ", " + Op2 + ");"; 1430 return s; 1431 } 1432 1433 // Gen operation with three operands and get high 64-bit of the latter 1434 // two operands. 1435 static std::string Gen3OpWith2High(StringRef typestr, 1436 const std::string &op, 1437 const std::string &a, 1438 const std::string &b, 1439 const std::string &c) { 1440 std::string s; 1441 std::string Op1 = GetHigh(b, typestr); 1442 std::string Op2 = GetHigh(c, typestr); 1443 s = MangleName(op, typestr, ClassS); 1444 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1445 return s; 1446 } 1447 1448 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1449 static std::string GenCombine(std::string typestr, 1450 const std::string &a, 1451 const std::string &b) { 1452 std::string s; 1453 s = MangleName("vcombine", typestr, ClassS); 1454 s += "(" + a + ", " + b + ")"; 1455 return s; 1456 } 1457 1458 static std::string Duplicate(unsigned nElts, StringRef typestr, 1459 const std::string &a) { 1460 std::string s; 1461 1462 s = "(" + TypeString('d', typestr) + "){ "; 1463 for (unsigned i = 0; i != nElts; ++i) { 1464 s += a; 1465 if ((i + 1) < nElts) 1466 s += ", "; 1467 } 1468 s += " }"; 1469 1470 return s; 1471 } 1472 1473 static std::string SplatLane(unsigned nElts, const std::string &vec, 1474 const std::string &lane) { 1475 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1476 for (unsigned i = 0; i < nElts; ++i) 1477 s += ", " + lane; 1478 s += ")"; 1479 return s; 1480 } 1481 1482 static std::string RemoveHigh(const std::string &name) { 1483 std::string s = name; 1484 std::size_t found = s.find("_high_"); 1485 if (found == std::string::npos) 1486 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1487 s.replace(found, 5, ""); 1488 return s; 1489 } 1490 1491 static unsigned GetNumElements(StringRef typestr, bool &quad) { 1492 quad = false; 1493 bool dummy = false; 1494 char type = ClassifyType(typestr, quad, dummy, dummy); 1495 unsigned nElts = 0; 1496 switch (type) { 1497 case 'c': nElts = 8; break; 1498 case 's': nElts = 4; break; 1499 case 'i': nElts = 2; break; 1500 case 'l': nElts = 1; break; 1501 case 'h': nElts = 4; break; 1502 case 'f': nElts = 2; break; 1503 case 'd': 1504 nElts = 1; 1505 break; 1506 default: 1507 PrintFatalError("unhandled type!"); 1508 } 1509 if (quad) nElts <<= 1; 1510 return nElts; 1511 } 1512 1513 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1514 static std::string GenOpString(const std::string &name, OpKind op, 1515 const std::string &proto, StringRef typestr) { 1516 bool quad; 1517 unsigned nElts = GetNumElements(typestr, quad); 1518 bool define = UseMacro(proto); 1519 1520 std::string ts = TypeString(proto[0], typestr); 1521 std::string s; 1522 if (!define) { 1523 s = "return "; 1524 } 1525 1526 switch(op) { 1527 case OpAdd: 1528 s += "__a + __b;"; 1529 break; 1530 case OpAddl: 1531 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1532 break; 1533 case OpAddlHi: 1534 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1535 break; 1536 case OpAddw: 1537 s += "__a + " + Extend(typestr, "__b") + ";"; 1538 break; 1539 case OpAddwHi: 1540 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1541 break; 1542 case OpSub: 1543 s += "__a - __b;"; 1544 break; 1545 case OpSubl: 1546 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1547 break; 1548 case OpSublHi: 1549 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1550 break; 1551 case OpSubw: 1552 s += "__a - " + Extend(typestr, "__b") + ";"; 1553 break; 1554 case OpSubwHi: 1555 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1556 break; 1557 case OpMulN: 1558 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1559 break; 1560 case OpMulLane: 1561 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1562 break; 1563 case OpMulXLane: 1564 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1565 SplatLane(nElts, "__b", "__c") + ");"; 1566 break; 1567 case OpMul: 1568 s += "__a * __b;"; 1569 break; 1570 case OpMullLane: 1571 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1572 SplatLane(nElts, "__b", "__c") + ");"; 1573 break; 1574 case OpMullHiLane: 1575 s += MangleName("vmull", typestr, ClassS) + "(" + 1576 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1577 break; 1578 case OpMlaN: 1579 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1580 break; 1581 case OpMlaLane: 1582 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1583 break; 1584 case OpMla: 1585 s += "__a + (__b * __c);"; 1586 break; 1587 case OpMlalN: 1588 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1589 Duplicate(nElts, typestr, "__c") + ");"; 1590 break; 1591 case OpMlalLane: 1592 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1593 SplatLane(nElts, "__c", "__d") + ");"; 1594 break; 1595 case OpMlalHiLane: 1596 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1597 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1598 break; 1599 case OpMlal: 1600 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1601 break; 1602 case OpMullHi: 1603 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1604 break; 1605 case OpMlalHi: 1606 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1607 break; 1608 case OpMlsN: 1609 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1610 break; 1611 case OpMlsLane: 1612 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1613 break; 1614 case OpFMSLane: 1615 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1616 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1617 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1618 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1619 break; 1620 case OpFMSLaneQ: 1621 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1622 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1623 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1624 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1625 break; 1626 case OpMls: 1627 s += "__a - (__b * __c);"; 1628 break; 1629 case OpMlslN: 1630 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1631 Duplicate(nElts, typestr, "__c") + ");"; 1632 break; 1633 case OpMlslLane: 1634 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1635 SplatLane(nElts, "__c", "__d") + ");"; 1636 break; 1637 case OpMlslHiLane: 1638 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1639 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1640 break; 1641 case OpMlsl: 1642 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1643 break; 1644 case OpMlslHi: 1645 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1646 break; 1647 case OpQDMullLane: 1648 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1649 SplatLane(nElts, "__b", "__c") + ");"; 1650 break; 1651 case OpQDMullHiLane: 1652 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1653 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1654 break; 1655 case OpQDMlalLane: 1656 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1657 SplatLane(nElts, "__c", "__d") + ");"; 1658 break; 1659 case OpQDMlalHiLane: 1660 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1661 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1662 break; 1663 case OpQDMlslLane: 1664 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1665 SplatLane(nElts, "__c", "__d") + ");"; 1666 break; 1667 case OpQDMlslHiLane: 1668 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1669 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1670 break; 1671 case OpQDMulhLane: 1672 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1673 SplatLane(nElts, "__b", "__c") + ");"; 1674 break; 1675 case OpQRDMulhLane: 1676 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1677 SplatLane(nElts, "__b", "__c") + ");"; 1678 break; 1679 case OpEq: 1680 s += "(" + ts + ")(__a == __b);"; 1681 break; 1682 case OpGe: 1683 s += "(" + ts + ")(__a >= __b);"; 1684 break; 1685 case OpLe: 1686 s += "(" + ts + ")(__a <= __b);"; 1687 break; 1688 case OpGt: 1689 s += "(" + ts + ")(__a > __b);"; 1690 break; 1691 case OpLt: 1692 s += "(" + ts + ")(__a < __b);"; 1693 break; 1694 case OpNeg: 1695 s += " -__a;"; 1696 break; 1697 case OpNot: 1698 s += " ~__a;"; 1699 break; 1700 case OpAnd: 1701 s += "__a & __b;"; 1702 break; 1703 case OpOr: 1704 s += "__a | __b;"; 1705 break; 1706 case OpXor: 1707 s += "__a ^ __b;"; 1708 break; 1709 case OpAndNot: 1710 s += "__a & ~__b;"; 1711 break; 1712 case OpOrNot: 1713 s += "__a | ~__b;"; 1714 break; 1715 case OpCast: 1716 s += "(" + ts + ")__a;"; 1717 break; 1718 case OpConcat: 1719 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1720 s += ", (int64x1_t)__b, 0, 1);"; 1721 break; 1722 case OpHi: 1723 // nElts is for the result vector, so the source is twice that number. 1724 s += "__builtin_shufflevector(__a, __a"; 1725 for (unsigned i = nElts; i < nElts * 2; ++i) 1726 s += ", " + utostr(i); 1727 s+= ");"; 1728 break; 1729 case OpLo: 1730 s += "__builtin_shufflevector(__a, __a"; 1731 for (unsigned i = 0; i < nElts; ++i) 1732 s += ", " + utostr(i); 1733 s+= ");"; 1734 break; 1735 case OpDup: 1736 s += Duplicate(nElts, typestr, "__a") + ";"; 1737 break; 1738 case OpDupLane: 1739 s += SplatLane(nElts, "__a", "__b") + ";"; 1740 break; 1741 case OpSelect: 1742 // ((0 & 1) | (~0 & 2)) 1743 s += "(" + ts + ")"; 1744 ts = TypeString(proto[1], typestr); 1745 s += "((__a & (" + ts + ")__b) | "; 1746 s += "(~__a & (" + ts + ")__c));"; 1747 break; 1748 case OpRev16: 1749 s += "__builtin_shufflevector(__a, __a"; 1750 for (unsigned i = 2; i <= nElts; i += 2) 1751 for (unsigned j = 0; j != 2; ++j) 1752 s += ", " + utostr(i - j - 1); 1753 s += ");"; 1754 break; 1755 case OpRev32: { 1756 unsigned WordElts = nElts >> (1 + (int)quad); 1757 s += "__builtin_shufflevector(__a, __a"; 1758 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1759 for (unsigned j = 0; j != WordElts; ++j) 1760 s += ", " + utostr(i - j - 1); 1761 s += ");"; 1762 break; 1763 } 1764 case OpRev64: { 1765 unsigned DblWordElts = nElts >> (int)quad; 1766 s += "__builtin_shufflevector(__a, __a"; 1767 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1768 for (unsigned j = 0; j != DblWordElts; ++j) 1769 s += ", " + utostr(i - j - 1); 1770 s += ");"; 1771 break; 1772 } 1773 case OpAbdl: { 1774 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1775 if (typestr[0] != 'U') { 1776 // vabd results are always unsigned and must be zero-extended. 1777 std::string utype = "U" + typestr.str(); 1778 s += "(" + TypeString(proto[0], typestr) + ")"; 1779 abd = "(" + TypeString('d', utype) + ")" + abd; 1780 s += Extend(utype, abd) + ";"; 1781 } else { 1782 s += Extend(typestr, abd) + ";"; 1783 } 1784 break; 1785 } 1786 case OpAbdlHi: 1787 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 1788 break; 1789 case OpAddhnHi: { 1790 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 1791 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 1792 s += ";"; 1793 break; 1794 } 1795 case OpRAddhnHi: { 1796 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 1797 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 1798 s += ";"; 1799 break; 1800 } 1801 case OpSubhnHi: { 1802 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 1803 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 1804 s += ";"; 1805 break; 1806 } 1807 case OpRSubhnHi: { 1808 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 1809 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 1810 s += ";"; 1811 break; 1812 } 1813 case OpAba: 1814 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 1815 break; 1816 case OpAbal: 1817 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 1818 break; 1819 case OpAbalHi: 1820 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 1821 break; 1822 case OpQDMullHi: 1823 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 1824 break; 1825 case OpQDMlalHi: 1826 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 1827 break; 1828 case OpQDMlslHi: 1829 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 1830 break; 1831 case OpDiv: 1832 s += "__a / __b;"; 1833 break; 1834 case OpMovlHi: { 1835 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1836 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 1837 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 1838 s += "(__a1, 0);"; 1839 break; 1840 } 1841 case OpLongHi: { 1842 // Another local variable __a1 is needed for calling a Macro, 1843 // or using __a will have naming conflict when Macro expanding. 1844 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 1845 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 1846 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 1847 "(__a1, __b);"; 1848 break; 1849 } 1850 case OpNarrowHi: { 1851 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 1852 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 1853 break; 1854 } 1855 case OpCopy: { 1856 s += TypeString('s', typestr) + " __c2 = " + 1857 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + 1858 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; 1859 break; 1860 } 1861 default: 1862 PrintFatalError("unknown OpKind!"); 1863 } 1864 return s; 1865 } 1866 1867 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 1868 unsigned mod = proto[0]; 1869 1870 if (mod == 'v' || mod == 'f') 1871 mod = proto[1]; 1872 1873 bool quad = false; 1874 bool poly = false; 1875 bool usgn = false; 1876 bool scal = false; 1877 bool cnst = false; 1878 bool pntr = false; 1879 1880 // Base type to get the type string for. 1881 char type = ClassifyType(typestr, quad, poly, usgn); 1882 1883 // Based on the modifying character, change the type and width if necessary. 1884 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 1885 1886 NeonTypeFlags::EltType ET; 1887 switch (type) { 1888 case 'c': 1889 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 1890 break; 1891 case 's': 1892 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 1893 break; 1894 case 'i': 1895 ET = NeonTypeFlags::Int32; 1896 break; 1897 case 'l': 1898 ET = NeonTypeFlags::Int64; 1899 break; 1900 case 'h': 1901 ET = NeonTypeFlags::Float16; 1902 break; 1903 case 'f': 1904 ET = NeonTypeFlags::Float32; 1905 break; 1906 case 'd': 1907 ET = NeonTypeFlags::Float64; 1908 break; 1909 default: 1910 PrintFatalError("unhandled type!"); 1911 } 1912 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 1913 return Flags.getFlags(); 1914 } 1915 1916 static bool ProtoHasScalar(const std::string proto) 1917 { 1918 return (proto.find('s') != std::string::npos 1919 || proto.find('r') != std::string::npos); 1920 } 1921 1922 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 1923 static std::string GenBuiltin(const std::string &name, const std::string &proto, 1924 StringRef typestr, ClassKind ck) { 1925 std::string s; 1926 1927 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1928 // sret-like argument. 1929 bool sret = (proto[0] >= '2' && proto[0] <= '4'); 1930 1931 bool define = UseMacro(proto); 1932 1933 // Check if the prototype has a scalar operand with the type of the vector 1934 // elements. If not, bitcasting the args will take care of arg checking. 1935 // The actual signedness etc. will be taken care of with special enums. 1936 if (!ProtoHasScalar(proto)) 1937 ck = ClassB; 1938 1939 if (proto[0] != 'v') { 1940 std::string ts = TypeString(proto[0], typestr); 1941 1942 if (define) { 1943 if (sret) 1944 s += ts + " r; "; 1945 else 1946 s += "(" + ts + ")"; 1947 } else if (sret) { 1948 s += ts + " r; "; 1949 } else { 1950 s += "return (" + ts + ")"; 1951 } 1952 } 1953 1954 bool splat = proto.find('a') != std::string::npos; 1955 1956 s += "__builtin_neon_"; 1957 if (splat) { 1958 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1959 std::string vname(name, 0, name.size()-2); 1960 s += MangleName(vname, typestr, ck); 1961 } else { 1962 s += MangleName(name, typestr, ck); 1963 } 1964 s += "("; 1965 1966 // Pass the address of the return variable as the first argument to sret-like 1967 // builtins. 1968 if (sret) 1969 s += "&r, "; 1970 1971 char arg = 'a'; 1972 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1973 std::string args = std::string(&arg, 1); 1974 1975 // Use the local temporaries instead of the macro arguments. 1976 args = "__" + args; 1977 1978 bool argQuad = false; 1979 bool argPoly = false; 1980 bool argUsgn = false; 1981 bool argScalar = false; 1982 bool dummy = false; 1983 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 1984 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 1985 dummy, dummy); 1986 1987 // Handle multiple-vector values specially, emitting each subvector as an 1988 // argument to the __builtin. 1989 if (proto[i] >= '2' && proto[i] <= '4') { 1990 // Check if an explicit cast is needed. 1991 if (argType != 'c' || argPoly || argUsgn) 1992 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 1993 1994 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { 1995 s += args + ".val[" + utostr(vi) + "]"; 1996 if ((vi + 1) < ve) 1997 s += ", "; 1998 } 1999 if ((i + 1) < e) 2000 s += ", "; 2001 2002 continue; 2003 } 2004 2005 if (splat && (i + 1) == e) 2006 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 2007 2008 // Check if an explicit cast is needed. 2009 if ((splat || !argScalar) && 2010 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 2011 std::string argTypeStr = "c"; 2012 if (ck != ClassB) 2013 argTypeStr = argType; 2014 if (argQuad) 2015 argTypeStr = "Q" + argTypeStr; 2016 args = "(" + TypeString('d', argTypeStr) + ")" + args; 2017 } 2018 2019 s += args; 2020 if ((i + 1) < e) 2021 s += ", "; 2022 } 2023 2024 // Extra constant integer to hold type class enum for this function, e.g. s8 2025 if (ck == ClassB) 2026 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2027 2028 s += ");"; 2029 2030 if (proto[0] != 'v' && sret) { 2031 if (define) 2032 s += " r;"; 2033 else 2034 s += " return r;"; 2035 } 2036 return s; 2037 } 2038 2039 static std::string GenBuiltinDef(const std::string &name, 2040 const std::string &proto, 2041 StringRef typestr, ClassKind ck) { 2042 std::string s("BUILTIN(__builtin_neon_"); 2043 2044 // If all types are the same size, bitcasting the args will take care 2045 // of arg checking. The actual signedness etc. will be taken care of with 2046 // special enums. 2047 if (!ProtoHasScalar(proto)) 2048 ck = ClassB; 2049 2050 s += MangleName(name, typestr, ck); 2051 s += ", \""; 2052 2053 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2054 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2055 2056 // Extra constant integer to hold type class enum for this function, e.g. s8 2057 if (ck == ClassB) 2058 s += "i"; 2059 2060 s += "\", \"n\")"; 2061 return s; 2062 } 2063 2064 static std::string GenIntrinsic(const std::string &name, 2065 const std::string &proto, 2066 StringRef outTypeStr, StringRef inTypeStr, 2067 OpKind kind, ClassKind classKind) { 2068 assert(!proto.empty() && ""); 2069 bool define = UseMacro(proto) && kind != OpUnavailable; 2070 std::string s; 2071 2072 // static always inline + return type 2073 if (define) 2074 s += "#define "; 2075 else 2076 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2077 2078 // Function name with type suffix 2079 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2080 if (outTypeStr != inTypeStr) { 2081 // If the input type is different (e.g., for vreinterpret), append a suffix 2082 // for the input type. String off a "Q" (quad) prefix so that MangleName 2083 // does not insert another "q" in the name. 2084 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2085 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2086 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2087 } 2088 s += mangledName; 2089 2090 // Function arguments 2091 s += GenArgs(proto, inTypeStr, name); 2092 2093 // Definition. 2094 if (define) { 2095 s += " __extension__ ({ \\\n "; 2096 s += GenMacroLocals(proto, inTypeStr, name); 2097 } else if (kind == OpUnavailable) { 2098 s += " __attribute__((unavailable));\n"; 2099 return s; 2100 } else 2101 s += " {\n "; 2102 2103 if (kind != OpNone) 2104 s += GenOpString(name, kind, proto, outTypeStr); 2105 else 2106 s += GenBuiltin(name, proto, outTypeStr, classKind); 2107 if (define) 2108 s += " })"; 2109 else 2110 s += " }"; 2111 s += "\n"; 2112 return s; 2113 } 2114 2115 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2116 /// is comprised of type definitions and function declarations. 2117 void NeonEmitter::run(raw_ostream &OS) { 2118 OS << 2119 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2120 "---===\n" 2121 " *\n" 2122 " * Permission is hereby granted, free of charge, to any person obtaining " 2123 "a copy\n" 2124 " * of this software and associated documentation files (the \"Software\")," 2125 " to deal\n" 2126 " * in the Software without restriction, including without limitation the " 2127 "rights\n" 2128 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2129 "and/or sell\n" 2130 " * copies of the Software, and to permit persons to whom the Software is\n" 2131 " * furnished to do so, subject to the following conditions:\n" 2132 " *\n" 2133 " * The above copyright notice and this permission notice shall be " 2134 "included in\n" 2135 " * all copies or substantial portions of the Software.\n" 2136 " *\n" 2137 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2138 "EXPRESS OR\n" 2139 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2140 "MERCHANTABILITY,\n" 2141 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2142 "SHALL THE\n" 2143 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2144 "OTHER\n" 2145 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2146 "ARISING FROM,\n" 2147 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2148 "DEALINGS IN\n" 2149 " * THE SOFTWARE.\n" 2150 " *\n" 2151 " *===--------------------------------------------------------------------" 2152 "---===\n" 2153 " */\n\n"; 2154 2155 OS << "#ifndef __ARM_NEON_H\n"; 2156 OS << "#define __ARM_NEON_H\n\n"; 2157 2158 OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n"; 2159 OS << "#error \"NEON support not enabled\"\n"; 2160 OS << "#endif\n\n"; 2161 2162 OS << "#include <stdint.h>\n\n"; 2163 2164 // Emit NEON-specific scalar typedefs. 2165 OS << "typedef float float32_t;\n"; 2166 OS << "typedef __fp16 float16_t;\n"; 2167 2168 OS << "#ifdef __aarch64__\n"; 2169 OS << "typedef double float64_t;\n"; 2170 OS << "#endif\n\n"; 2171 2172 // For now, signedness of polynomial types depends on target 2173 OS << "#ifdef __aarch64__\n"; 2174 OS << "typedef uint8_t poly8_t;\n"; 2175 OS << "typedef uint16_t poly16_t;\n"; 2176 OS << "#else\n"; 2177 OS << "typedef int8_t poly8_t;\n"; 2178 OS << "typedef int16_t poly16_t;\n"; 2179 OS << "#endif\n"; 2180 2181 // Emit Neon vector typedefs. 2182 std::string TypedefTypes( 2183 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs"); 2184 SmallVector<StringRef, 24> TDTypeVec; 2185 ParseTypes(0, TypedefTypes, TDTypeVec); 2186 2187 // Emit vector typedefs. 2188 bool isA64 = false; 2189 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2190 bool dummy, quad = false, poly = false; 2191 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2192 bool preinsert = false; 2193 bool postinsert = false; 2194 2195 if (type == 'd') { 2196 preinsert = isA64? false: true; 2197 isA64 = true; 2198 } else { 2199 postinsert = isA64? true: false; 2200 isA64 = false; 2201 } 2202 if (postinsert) 2203 OS << "#endif\n"; 2204 if (preinsert) 2205 OS << "#ifdef __aarch64__\n"; 2206 2207 if (poly) 2208 OS << "typedef __attribute__((neon_polyvector_type("; 2209 else 2210 OS << "typedef __attribute__((neon_vector_type("; 2211 2212 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2213 OS << utostr(nElts) << "))) "; 2214 if (nElts < 10) 2215 OS << " "; 2216 2217 OS << TypeString('s', TDTypeVec[i]); 2218 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2219 2220 } 2221 OS << "\n"; 2222 2223 // Emit struct typedefs. 2224 isA64 = false; 2225 for (unsigned vi = 2; vi != 5; ++vi) { 2226 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2227 bool dummy, quad = false, poly = false; 2228 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2229 bool preinsert = false; 2230 bool postinsert = false; 2231 2232 if (type == 'd') { 2233 preinsert = isA64? false: true; 2234 isA64 = true; 2235 } else { 2236 postinsert = isA64? true: false; 2237 isA64 = false; 2238 } 2239 if (postinsert) 2240 OS << "#endif\n"; 2241 if (preinsert) 2242 OS << "#ifdef __aarch64__\n"; 2243 2244 std::string ts = TypeString('d', TDTypeVec[i]); 2245 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2246 OS << "typedef struct " << vs << " {\n"; 2247 OS << " " << ts << " val"; 2248 OS << "[" << utostr(vi) << "]"; 2249 OS << ";\n} "; 2250 OS << vs << ";\n"; 2251 OS << "\n"; 2252 } 2253 } 2254 2255 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2256 2257 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2258 2259 StringMap<ClassKind> EmittedMap; 2260 2261 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2262 // intrinsics. (Some of the saturating multiply instructions are also 2263 // used to implement the corresponding "_lane" variants, but tablegen 2264 // sorts the records into alphabetical order so that the "_lane" variants 2265 // come after the intrinsics they use.) 2266 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2267 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2268 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2269 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2270 2271 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2272 // common intrinsics appear only once in the output stream. 2273 // The check for uniquiness is done in emitIntrinsic. 2274 // Emit ARM intrinsics. 2275 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2276 Record *R = RV[i]; 2277 2278 // Skip AArch64 intrinsics; they will be emitted at the end. 2279 bool isA64 = R->getValueAsBit("isA64"); 2280 if (isA64) 2281 continue; 2282 2283 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2284 R->getName() != "VABD") 2285 emitIntrinsic(OS, R, EmittedMap); 2286 } 2287 2288 // Emit AArch64-specific intrinsics. 2289 OS << "#ifdef __aarch64__\n"; 2290 2291 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2292 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2293 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2294 2295 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2296 Record *R = RV[i]; 2297 2298 // Skip ARM intrinsics already included above. 2299 bool isA64 = R->getValueAsBit("isA64"); 2300 if (!isA64) 2301 continue; 2302 2303 emitIntrinsic(OS, R, EmittedMap); 2304 } 2305 2306 OS << "#endif\n\n"; 2307 2308 OS << "#undef __ai\n\n"; 2309 OS << "#endif /* __ARM_NEON_H */\n"; 2310 } 2311 2312 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2313 /// intrinsics specified by record R checking for intrinsic uniqueness. 2314 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2315 StringMap<ClassKind> &EmittedMap) { 2316 std::string name = R->getValueAsString("Name"); 2317 std::string Proto = R->getValueAsString("Prototype"); 2318 std::string Types = R->getValueAsString("Types"); 2319 2320 SmallVector<StringRef, 16> TypeVec; 2321 ParseTypes(R, Types, TypeVec); 2322 2323 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2324 2325 ClassKind classKind = ClassNone; 2326 if (R->getSuperClasses().size() >= 2) 2327 classKind = ClassMap[R->getSuperClasses()[1]]; 2328 if (classKind == ClassNone && kind == OpNone) 2329 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2330 2331 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2332 if (kind == OpReinterpret) { 2333 bool outQuad = false; 2334 bool dummy = false; 2335 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2336 for (unsigned srcti = 0, srcte = TypeVec.size(); 2337 srcti != srcte; ++srcti) { 2338 bool inQuad = false; 2339 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2340 if (srcti == ti || inQuad != outQuad) 2341 continue; 2342 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2343 OpCast, ClassS); 2344 if (EmittedMap.count(s)) 2345 continue; 2346 EmittedMap[s] = ClassS; 2347 OS << s; 2348 } 2349 } else { 2350 std::string s = 2351 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2352 if (EmittedMap.count(s)) 2353 continue; 2354 EmittedMap[s] = classKind; 2355 OS << s; 2356 } 2357 } 2358 OS << "\n"; 2359 } 2360 2361 static unsigned RangeFromType(const char mod, StringRef typestr) { 2362 // base type to get the type string for. 2363 bool quad = false, dummy = false; 2364 char type = ClassifyType(typestr, quad, dummy, dummy); 2365 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2366 2367 switch (type) { 2368 case 'c': 2369 return (8 << (int)quad) - 1; 2370 case 'h': 2371 case 's': 2372 return (4 << (int)quad) - 1; 2373 case 'f': 2374 case 'i': 2375 return (2 << (int)quad) - 1; 2376 case 'd': 2377 case 'l': 2378 return (1 << (int)quad) - 1; 2379 default: 2380 PrintFatalError("unhandled type!"); 2381 } 2382 } 2383 2384 /// Generate the ARM and AArch64 intrinsic range checking code for 2385 /// shift/lane immediates, checking for unique declarations. 2386 void 2387 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2388 StringMap<ClassKind> &A64IntrinsicMap, 2389 bool isA64RangeCheck) { 2390 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2391 StringMap<OpKind> EmittedMap; 2392 2393 // Generate the intrinsic range checking code for shift/lane immediates. 2394 if (isA64RangeCheck) 2395 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2396 else 2397 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2398 2399 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2400 Record *R = RV[i]; 2401 2402 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2403 if (k != OpNone) 2404 continue; 2405 2406 std::string name = R->getValueAsString("Name"); 2407 std::string Proto = R->getValueAsString("Prototype"); 2408 std::string Types = R->getValueAsString("Types"); 2409 std::string Rename = name + "@" + Proto; 2410 2411 // Functions with 'a' (the splat code) in the type prototype should not get 2412 // their own builtin as they use the non-splat variant. 2413 if (Proto.find('a') != std::string::npos) 2414 continue; 2415 2416 // Functions which do not have an immediate do not need to have range 2417 // checking code emitted. 2418 size_t immPos = Proto.find('i'); 2419 if (immPos == std::string::npos) 2420 continue; 2421 2422 SmallVector<StringRef, 16> TypeVec; 2423 ParseTypes(R, Types, TypeVec); 2424 2425 if (R->getSuperClasses().size() < 2) 2426 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2427 2428 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2429 2430 // Do not include AArch64 range checks if not generating code for AArch64. 2431 bool isA64 = R->getValueAsBit("isA64"); 2432 if (!isA64RangeCheck && isA64) 2433 continue; 2434 2435 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2436 // redefined by AArch64 to handle new types. 2437 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2438 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2439 if (A64CK == ck && ck != ClassNone) 2440 continue; 2441 } 2442 2443 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2444 std::string namestr, shiftstr, rangestr; 2445 2446 if (R->getValueAsBit("isVCVT_N")) { 2447 // VCVT between floating- and fixed-point values takes an immediate 2448 // in the range [1, 32] for f32, or [1, 64] for f64. 2449 ck = ClassB; 2450 if (name.find("32") != std::string::npos) 2451 rangestr = "l = 1; u = 31"; // upper bound = l + u 2452 else if (name.find("64") != std::string::npos) 2453 rangestr = "l = 1; u = 63"; 2454 else 2455 PrintFatalError(R->getLoc(), 2456 "Fixed point convert name should contains \"32\" or \"64\""); 2457 } else if (!ProtoHasScalar(Proto)) { 2458 // Builtins which are overloaded by type will need to have their upper 2459 // bound computed at Sema time based on the type constant. 2460 ck = ClassB; 2461 if (R->getValueAsBit("isShift")) { 2462 shiftstr = ", true"; 2463 2464 // Right shifts have an 'r' in the name, left shifts do not. 2465 if (name.find('r') != std::string::npos) 2466 rangestr = "l = 1; "; 2467 } 2468 rangestr += "u = RFT(TV" + shiftstr + ")"; 2469 } else { 2470 // The immediate generally refers to a lane in the preceding argument. 2471 assert(immPos > 0 && "unexpected immediate operand"); 2472 rangestr = 2473 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2474 } 2475 // Make sure cases appear only once by uniquing them in a string map. 2476 namestr = MangleName(name, TypeVec[ti], ck); 2477 if (EmittedMap.count(namestr)) 2478 continue; 2479 EmittedMap[namestr] = OpNone; 2480 2481 // Calculate the index of the immediate that should be range checked. 2482 unsigned immidx = 0; 2483 2484 // Builtins that return a struct of multiple vectors have an extra 2485 // leading arg for the struct return. 2486 if (Proto[0] >= '2' && Proto[0] <= '4') 2487 ++immidx; 2488 2489 // Add one to the index for each argument until we reach the immediate 2490 // to be checked. Structs of vectors are passed as multiple arguments. 2491 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2492 switch (Proto[ii]) { 2493 default: 2494 immidx += 1; 2495 break; 2496 case '2': 2497 immidx += 2; 2498 break; 2499 case '3': 2500 immidx += 3; 2501 break; 2502 case '4': 2503 immidx += 4; 2504 break; 2505 case 'i': 2506 ie = ii + 1; 2507 break; 2508 } 2509 } 2510 if (isA64RangeCheck) 2511 OS << "case AArch64::BI__builtin_neon_"; 2512 else 2513 OS << "case ARM::BI__builtin_neon_"; 2514 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2515 << rangestr << "; break;\n"; 2516 } 2517 } 2518 OS << "#endif\n\n"; 2519 } 2520 2521 /// Generate the ARM and AArch64 overloaded type checking code for 2522 /// SemaChecking.cpp, checking for unique builtin declarations. 2523 void 2524 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2525 StringMap<ClassKind> &A64IntrinsicMap, 2526 bool isA64TypeCheck) { 2527 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2528 StringMap<OpKind> EmittedMap; 2529 2530 // Generate the overloaded type checking code for SemaChecking.cpp 2531 if (isA64TypeCheck) 2532 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2533 else 2534 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 2535 2536 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2537 Record *R = RV[i]; 2538 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2539 if (k != OpNone) 2540 continue; 2541 2542 std::string Proto = R->getValueAsString("Prototype"); 2543 std::string Types = R->getValueAsString("Types"); 2544 std::string name = R->getValueAsString("Name"); 2545 std::string Rename = name + "@" + Proto; 2546 2547 // Functions with 'a' (the splat code) in the type prototype should not get 2548 // their own builtin as they use the non-splat variant. 2549 if (Proto.find('a') != std::string::npos) 2550 continue; 2551 2552 // Functions which have a scalar argument cannot be overloaded, no need to 2553 // check them if we are emitting the type checking code. 2554 if (ProtoHasScalar(Proto)) 2555 continue; 2556 2557 SmallVector<StringRef, 16> TypeVec; 2558 ParseTypes(R, Types, TypeVec); 2559 2560 if (R->getSuperClasses().size() < 2) 2561 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2562 2563 // Do not include AArch64 type checks if not generating code for AArch64. 2564 bool isA64 = R->getValueAsBit("isA64"); 2565 if (!isA64TypeCheck && isA64) 2566 continue; 2567 2568 // Include ARM type check in AArch64 but only if ARM intrinsics 2569 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2570 // redefined in AArch64 to handle an additional 2 x f64 type. 2571 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2572 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2573 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2574 if (A64CK == ck && ck != ClassNone) 2575 continue; 2576 } 2577 2578 int si = -1, qi = -1; 2579 uint64_t mask = 0, qmask = 0; 2580 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2581 // Generate the switch case(s) for this builtin for the type validation. 2582 bool quad = false, poly = false, usgn = false; 2583 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 2584 2585 if (quad) { 2586 qi = ti; 2587 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2588 } else { 2589 si = ti; 2590 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 2591 } 2592 } 2593 2594 // Check if the builtin function has a pointer or const pointer argument. 2595 int PtrArgNum = -1; 2596 bool HasConstPtr = false; 2597 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 2598 char ArgType = Proto[arg]; 2599 if (ArgType == 'c') { 2600 HasConstPtr = true; 2601 PtrArgNum = arg - 1; 2602 break; 2603 } 2604 if (ArgType == 'p') { 2605 PtrArgNum = arg - 1; 2606 break; 2607 } 2608 } 2609 // For sret builtins, adjust the pointer argument index. 2610 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) 2611 PtrArgNum += 1; 2612 2613 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2614 // and vst1_lane intrinsics. Using a pointer to the vector element 2615 // type with one of those operations causes codegen to select an aligned 2616 // load/store instruction. If you want an unaligned operation, 2617 // the pointer argument needs to have less alignment than element type, 2618 // so just accept any pointer type. 2619 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 2620 PtrArgNum = -1; 2621 HasConstPtr = false; 2622 } 2623 2624 if (mask) { 2625 if (isA64TypeCheck) 2626 OS << "case AArch64::BI__builtin_neon_"; 2627 else 2628 OS << "case ARM::BI__builtin_neon_"; 2629 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 2630 << "0x" << utohexstr(mask) << "ULL"; 2631 if (PtrArgNum >= 0) 2632 OS << "; PtrArgNum = " << PtrArgNum; 2633 if (HasConstPtr) 2634 OS << "; HasConstPtr = true"; 2635 OS << "; break;\n"; 2636 } 2637 if (qmask) { 2638 if (isA64TypeCheck) 2639 OS << "case AArch64::BI__builtin_neon_"; 2640 else 2641 OS << "case ARM::BI__builtin_neon_"; 2642 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 2643 << "0x" << utohexstr(qmask) << "ULL"; 2644 if (PtrArgNum >= 0) 2645 OS << "; PtrArgNum = " << PtrArgNum; 2646 if (HasConstPtr) 2647 OS << "; HasConstPtr = true"; 2648 OS << "; break;\n"; 2649 } 2650 } 2651 OS << "#endif\n\n"; 2652 } 2653 2654 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 2655 /// declaration of builtins, checking for unique builtin declarations. 2656 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 2657 StringMap<ClassKind> &A64IntrinsicMap, 2658 bool isA64GenBuiltinDef) { 2659 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2660 StringMap<OpKind> EmittedMap; 2661 2662 // Generate BuiltinsARM.def and BuiltinsAArch64.def 2663 if (isA64GenBuiltinDef) 2664 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 2665 else 2666 OS << "#ifdef GET_NEON_BUILTINS\n"; 2667 2668 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2669 Record *R = RV[i]; 2670 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2671 if (k != OpNone) 2672 continue; 2673 2674 std::string Proto = R->getValueAsString("Prototype"); 2675 std::string name = R->getValueAsString("Name"); 2676 std::string Rename = name + "@" + Proto; 2677 2678 // Functions with 'a' (the splat code) in the type prototype should not get 2679 // their own builtin as they use the non-splat variant. 2680 if (Proto.find('a') != std::string::npos) 2681 continue; 2682 2683 std::string Types = R->getValueAsString("Types"); 2684 SmallVector<StringRef, 16> TypeVec; 2685 ParseTypes(R, Types, TypeVec); 2686 2687 if (R->getSuperClasses().size() < 2) 2688 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2689 2690 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2691 2692 // Do not include AArch64 BUILTIN() macros if not generating 2693 // code for AArch64 2694 bool isA64 = R->getValueAsBit("isA64"); 2695 if (!isA64GenBuiltinDef && isA64) 2696 continue; 2697 2698 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 2699 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 2700 // redefined in AArch64 to handle an additional 2 x f64 type. 2701 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 2702 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2703 if (A64CK == ck && ck != ClassNone) 2704 continue; 2705 } 2706 2707 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2708 // Generate the declaration for this builtin, ensuring 2709 // that each unique BUILTIN() macro appears only once in the output 2710 // stream. 2711 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 2712 if (EmittedMap.count(bd)) 2713 continue; 2714 2715 EmittedMap[bd] = OpNone; 2716 OS << bd << "\n"; 2717 } 2718 } 2719 OS << "#endif\n\n"; 2720 } 2721 2722 /// runHeader - Emit a file with sections defining: 2723 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2724 /// 2. the SemaChecking code for the type overload checking. 2725 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2726 void NeonEmitter::runHeader(raw_ostream &OS) { 2727 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2728 2729 // build a map of AArch64 intriniscs to be used in uniqueness checks. 2730 StringMap<ClassKind> A64IntrinsicMap; 2731 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2732 Record *R = RV[i]; 2733 2734 bool isA64 = R->getValueAsBit("isA64"); 2735 if (!isA64) 2736 continue; 2737 2738 ClassKind CK = ClassNone; 2739 if (R->getSuperClasses().size() >= 2) 2740 CK = ClassMap[R->getSuperClasses()[1]]; 2741 2742 std::string Name = R->getValueAsString("Name"); 2743 std::string Proto = R->getValueAsString("Prototype"); 2744 std::string Rename = Name + "@" + Proto; 2745 if (A64IntrinsicMap.count(Rename)) 2746 continue; 2747 A64IntrinsicMap[Rename] = CK; 2748 } 2749 2750 // Generate BuiltinsARM.def for ARM 2751 genBuiltinsDef(OS, A64IntrinsicMap, false); 2752 2753 // Generate BuiltinsAArch64.def for AArch64 2754 genBuiltinsDef(OS, A64IntrinsicMap, true); 2755 2756 // Generate ARM overloaded type checking code for SemaChecking.cpp 2757 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 2758 2759 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 2760 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 2761 2762 // Generate ARM range checking code for shift/lane immediates. 2763 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 2764 2765 // Generate the AArch64 range checking code for shift/lane immediates. 2766 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 2767 } 2768 2769 /// GenTest - Write out a test for the intrinsic specified by the name and 2770 /// type strings, including the embedded patterns for FileCheck to match. 2771 static std::string GenTest(const std::string &name, 2772 const std::string &proto, 2773 StringRef outTypeStr, StringRef inTypeStr, 2774 bool isShift, bool isHiddenLOp, 2775 ClassKind ck, const std::string &InstName, 2776 bool isA64, 2777 std::string & testFuncProto) { 2778 assert(!proto.empty() && ""); 2779 std::string s; 2780 2781 // Function name with type suffix 2782 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2783 if (outTypeStr != inTypeStr) { 2784 // If the input type is different (e.g., for vreinterpret), append a suffix 2785 // for the input type. String off a "Q" (quad) prefix so that MangleName 2786 // does not insert another "q" in the name. 2787 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2788 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2789 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2790 } 2791 2792 // todo: GenerateChecksForIntrinsic does not generate CHECK 2793 // for aarch64 instructions yet 2794 std::vector<std::string> FileCheckPatterns; 2795 if (!isA64) { 2796 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 2797 isHiddenLOp, FileCheckPatterns); 2798 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 2799 } 2800 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 2801 2802 // Emit the FileCheck patterns. 2803 // If for any reason we do not want to emit a check, mangledInst 2804 // will be the empty string. 2805 if (FileCheckPatterns.size()) { 2806 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 2807 e = FileCheckPatterns.end(); 2808 i != e; 2809 ++i) { 2810 s += "// CHECK_ARM: " + *i + "\n"; 2811 } 2812 } 2813 2814 // Emit the start of the test function. 2815 2816 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 2817 char arg = 'a'; 2818 std::string comma; 2819 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2820 // Do not create arguments for values that must be immediate constants. 2821 if (proto[i] == 'i') 2822 continue; 2823 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 2824 testFuncProto.push_back(arg); 2825 comma = ", "; 2826 } 2827 testFuncProto += ")"; 2828 2829 s+= testFuncProto; 2830 s+= " {\n "; 2831 2832 if (proto[0] != 'v') 2833 s += "return "; 2834 s += mangledName + "("; 2835 arg = 'a'; 2836 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2837 if (proto[i] == 'i') { 2838 // For immediate operands, test the maximum value. 2839 if (isShift) 2840 s += "1"; // FIXME 2841 else 2842 // The immediate generally refers to a lane in the preceding argument. 2843 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 2844 } else { 2845 s.push_back(arg); 2846 } 2847 if ((i + 1) < e) 2848 s += ", "; 2849 } 2850 s += ");\n}\n\n"; 2851 return s; 2852 } 2853 2854 /// Write out all intrinsic tests for the specified target, checking 2855 /// for intrinsic test uniqueness. 2856 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 2857 bool isA64GenTest) { 2858 if (isA64GenTest) 2859 OS << "#ifdef __aarch64__\n"; 2860 2861 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2862 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2863 Record *R = RV[i]; 2864 std::string name = R->getValueAsString("Name"); 2865 std::string Proto = R->getValueAsString("Prototype"); 2866 std::string Types = R->getValueAsString("Types"); 2867 bool isShift = R->getValueAsBit("isShift"); 2868 std::string InstName = R->getValueAsString("InstName"); 2869 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 2870 bool isA64 = R->getValueAsBit("isA64"); 2871 2872 // do not include AArch64 intrinsic test if not generating 2873 // code for AArch64 2874 if (!isA64GenTest && isA64) 2875 continue; 2876 2877 SmallVector<StringRef, 16> TypeVec; 2878 ParseTypes(R, Types, TypeVec); 2879 2880 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2881 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2882 if (kind == OpUnavailable) 2883 continue; 2884 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2885 if (kind == OpReinterpret) { 2886 bool outQuad = false; 2887 bool dummy = false; 2888 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2889 for (unsigned srcti = 0, srcte = TypeVec.size(); 2890 srcti != srcte; ++srcti) { 2891 bool inQuad = false; 2892 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2893 if (srcti == ti || inQuad != outQuad) 2894 continue; 2895 std::string testFuncProto; 2896 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 2897 isShift, isHiddenLOp, ck, InstName, isA64, 2898 testFuncProto); 2899 if (EmittedMap.count(testFuncProto)) 2900 continue; 2901 EmittedMap[testFuncProto] = kind; 2902 OS << s << "\n"; 2903 } 2904 } else { 2905 std::string testFuncProto; 2906 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 2907 isHiddenLOp, ck, InstName, isA64, testFuncProto); 2908 if (EmittedMap.count(testFuncProto)) 2909 continue; 2910 EmittedMap[testFuncProto] = kind; 2911 OS << s << "\n"; 2912 } 2913 } 2914 } 2915 2916 if (isA64GenTest) 2917 OS << "#endif\n"; 2918 } 2919 /// runTests - Write out a complete set of tests for all of the Neon 2920 /// intrinsics. 2921 void NeonEmitter::runTests(raw_ostream &OS) { 2922 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 2923 "apcs-gnu\\\n" 2924 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 2925 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 2926 "\n" 2927 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 2928 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 2929 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 2930 "\n" 2931 "// REQUIRES: long_tests\n" 2932 "\n" 2933 "#include <arm_neon.h>\n" 2934 "\n"; 2935 2936 // ARM tests must be emitted before AArch64 tests to ensure 2937 // tests for intrinsics that are common to ARM and AArch64 2938 // appear only once in the output stream. 2939 // The check for uniqueness is done in genTargetTest. 2940 StringMap<OpKind> EmittedMap; 2941 2942 genTargetTest(OS, EmittedMap, false); 2943 2944 genTargetTest(OS, EmittedMap, true); 2945 } 2946 2947 namespace clang { 2948 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2949 NeonEmitter(Records).run(OS); 2950 } 2951 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2952 NeonEmitter(Records).runHeader(OS); 2953 } 2954 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2955 NeonEmitter(Records).runTests(OS); 2956 } 2957 } // End namespace clang 2958