1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting arm_neon.h, which includes 10 // a declaration and definition of each function specified by the ARM NEON 11 // compiler interface. See ARM document DUI0348B. 12 // 13 // Each NEON instruction is implemented in terms of 1 or more functions which 14 // are suffixed with the element type of the input vectors. Functions may be 15 // implemented in terms of generic vector operations such as +, *, -, etc. or 16 // by calling a __builtin_-prefixed function which will be handled by clang's 17 // CodeGen library. 18 // 19 // Additional validation code can be generated by this file when runHeader() is 20 // called, rather than the normal run() entry point. 21 // 22 // See also the documentation in include/clang/Basic/arm_neon.td. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "TableGenBackends.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/None.h" 30 #include "llvm/ADT/Optional.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include "llvm/TableGen/SetTheory.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cctype> 44 #include <cstddef> 45 #include <cstdint> 46 #include <deque> 47 #include <map> 48 #include <set> 49 #include <sstream> 50 #include <string> 51 #include <utility> 52 #include <vector> 53 54 using namespace llvm; 55 56 namespace { 57 58 // While globals are generally bad, this one allows us to perform assertions 59 // liberally and somehow still trace them back to the def they indirectly 60 // came from. 61 static Record *CurrentRecord = nullptr; 62 static void assert_with_loc(bool Assertion, const std::string &Str) { 63 if (!Assertion) { 64 if (CurrentRecord) 65 PrintFatalError(CurrentRecord->getLoc(), Str); 66 else 67 PrintFatalError(Str); 68 } 69 } 70 71 enum ClassKind { 72 ClassNone, 73 ClassI, // generic integer instruction, e.g., "i8" suffix 74 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 75 ClassW, // width-specific instruction, e.g., "8" suffix 76 ClassB, // bitcast arguments with enum argument to specify type 77 ClassL, // Logical instructions which are op instructions 78 // but we need to not emit any suffix for in our 79 // tests. 80 ClassNoTest // Instructions which we do not test since they are 81 // not TRUE instructions. 82 }; 83 84 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 85 /// builtins. These must be kept in sync with the flags in 86 /// include/clang/Basic/TargetBuiltins.h. 87 namespace NeonTypeFlags { 88 89 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 90 91 enum EltType { 92 Int8, 93 Int16, 94 Int32, 95 Int64, 96 Poly8, 97 Poly16, 98 Poly64, 99 Poly128, 100 Float16, 101 Float32, 102 Float64 103 }; 104 105 } // end namespace NeonTypeFlags 106 107 class NeonEmitter; 108 109 //===----------------------------------------------------------------------===// 110 // TypeSpec 111 //===----------------------------------------------------------------------===// 112 113 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 114 /// for strong typing purposes. 115 /// 116 /// A TypeSpec can be used to create a type. 117 class TypeSpec : public std::string { 118 public: 119 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 120 std::vector<TypeSpec> Ret; 121 TypeSpec Acc; 122 for (char I : Str.str()) { 123 if (islower(I)) { 124 Acc.push_back(I); 125 Ret.push_back(TypeSpec(Acc)); 126 Acc.clear(); 127 } else { 128 Acc.push_back(I); 129 } 130 } 131 return Ret; 132 } 133 }; 134 135 //===----------------------------------------------------------------------===// 136 // Type 137 //===----------------------------------------------------------------------===// 138 139 /// A Type. Not much more to say here. 140 class Type { 141 private: 142 TypeSpec TS; 143 144 enum TypeKind { 145 Void, 146 Float, 147 SInt, 148 UInt, 149 Poly, 150 }; 151 TypeKind Kind; 152 bool Immediate, Constant, Pointer; 153 // ScalarForMangling and NoManglingQ are really not suited to live here as 154 // they are not related to the type. But they live in the TypeSpec (not the 155 // prototype), so this is really the only place to store them. 156 bool ScalarForMangling, NoManglingQ; 157 unsigned Bitwidth, ElementBitwidth, NumVectors; 158 159 public: 160 Type() 161 : Kind(Void), Immediate(false), Constant(false), 162 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 163 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 164 165 Type(TypeSpec TS, StringRef CharMods) 166 : TS(std::move(TS)), Kind(Void), Immediate(false), 167 Constant(false), Pointer(false), ScalarForMangling(false), 168 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 169 applyModifiers(CharMods); 170 } 171 172 /// Returns a type representing "void". 173 static Type getVoid() { return Type(); } 174 175 bool operator==(const Type &Other) const { return str() == Other.str(); } 176 bool operator!=(const Type &Other) const { return !operator==(Other); } 177 178 // 179 // Query functions 180 // 181 bool isScalarForMangling() const { return ScalarForMangling; } 182 bool noManglingQ() const { return NoManglingQ; } 183 184 bool isPointer() const { return Pointer; } 185 bool isValue() const { return !isVoid() && !isPointer(); } 186 bool isScalar() const { return isValue() && NumVectors == 0; } 187 bool isVector() const { return isValue() && NumVectors > 0; } 188 bool isConstPointer() const { return Constant; } 189 bool isFloating() const { return Kind == Float; } 190 bool isInteger() const { return Kind == SInt || Kind == UInt; } 191 bool isPoly() const { return Kind == Poly; } 192 bool isSigned() const { return Kind == SInt; } 193 bool isImmediate() const { return Immediate; } 194 bool isFloat() const { return isFloating() && ElementBitwidth == 32; } 195 bool isDouble() const { return isFloating() && ElementBitwidth == 64; } 196 bool isHalf() const { return isFloating() && ElementBitwidth == 16; } 197 bool isChar() const { return ElementBitwidth == 8; } 198 bool isShort() const { return isInteger() && ElementBitwidth == 16; } 199 bool isInt() const { return isInteger() && ElementBitwidth == 32; } 200 bool isLong() const { return isInteger() && ElementBitwidth == 64; } 201 bool isVoid() const { return Kind == Void; } 202 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 203 unsigned getSizeInBits() const { return Bitwidth; } 204 unsigned getElementSizeInBits() const { return ElementBitwidth; } 205 unsigned getNumVectors() const { return NumVectors; } 206 207 // 208 // Mutator functions 209 // 210 void makeUnsigned() { 211 assert(!isVoid() && "not a potentially signed type"); 212 Kind = UInt; 213 } 214 void makeSigned() { 215 assert(!isVoid() && "not a potentially signed type"); 216 Kind = SInt; 217 } 218 219 void makeInteger(unsigned ElemWidth, bool Sign) { 220 assert(!isVoid() && "converting void to int probably not useful"); 221 Kind = Sign ? SInt : UInt; 222 Immediate = false; 223 ElementBitwidth = ElemWidth; 224 } 225 226 void makeImmediate(unsigned ElemWidth) { 227 Kind = SInt; 228 Immediate = true; 229 ElementBitwidth = ElemWidth; 230 } 231 232 void makeScalar() { 233 Bitwidth = ElementBitwidth; 234 NumVectors = 0; 235 } 236 237 void makeOneVector() { 238 assert(isVector()); 239 NumVectors = 1; 240 } 241 242 void make32BitElement() { 243 assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!"); 244 ElementBitwidth = 32; 245 } 246 247 void doubleLanes() { 248 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 249 Bitwidth = 128; 250 } 251 252 void halveLanes() { 253 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 254 Bitwidth = 64; 255 } 256 257 /// Return the C string representation of a type, which is the typename 258 /// defined in stdint.h or arm_neon.h. 259 std::string str() const; 260 261 /// Return the string representation of a type, which is an encoded 262 /// string for passing to the BUILTIN() macro in Builtins.def. 263 std::string builtin_str() const; 264 265 /// Return the value in NeonTypeFlags for this type. 266 unsigned getNeonEnum() const; 267 268 /// Parse a type from a stdint.h or arm_neon.h typedef name, 269 /// for example uint32x2_t or int64_t. 270 static Type fromTypedefName(StringRef Name); 271 272 private: 273 /// Creates the type based on the typespec string in TS. 274 /// Sets "Quad" to true if the "Q" or "H" modifiers were 275 /// seen. This is needed by applyModifier as some modifiers 276 /// only take effect if the type size was changed by "Q" or "H". 277 void applyTypespec(bool &Quad); 278 /// Applies prototype modifiers to the type. 279 void applyModifiers(StringRef Mods); 280 }; 281 282 //===----------------------------------------------------------------------===// 283 // Variable 284 //===----------------------------------------------------------------------===// 285 286 /// A variable is a simple class that just has a type and a name. 287 class Variable { 288 Type T; 289 std::string N; 290 291 public: 292 Variable() : T(Type::getVoid()), N("") {} 293 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {} 294 295 Type getType() const { return T; } 296 std::string getName() const { return "__" + N; } 297 }; 298 299 //===----------------------------------------------------------------------===// 300 // Intrinsic 301 //===----------------------------------------------------------------------===// 302 303 /// The main grunt class. This represents an instantiation of an intrinsic with 304 /// a particular typespec and prototype. 305 class Intrinsic { 306 friend class DagEmitter; 307 308 /// The Record this intrinsic was created from. 309 Record *R; 310 /// The unmangled name. 311 std::string Name; 312 /// The input and output typespecs. InTS == OutTS except when 313 /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. 314 TypeSpec OutTS, InTS; 315 /// The base class kind. Most intrinsics use ClassS, which has full type 316 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 317 /// signedness (i32), while some (ClassB) have no type at all, only a width 318 /// (32). 319 ClassKind CK; 320 /// The list of DAGs for the body. May be empty, in which case we should 321 /// emit a builtin call. 322 ListInit *Body; 323 /// The architectural #ifdef guard. 324 std::string Guard; 325 /// Set if the Unavailable bit is 1. This means we don't generate a body, 326 /// just an "unavailable" attribute on a declaration. 327 bool IsUnavailable; 328 /// Is this intrinsic safe for big-endian? or does it need its arguments 329 /// reversing? 330 bool BigEndianSafe; 331 332 /// The types of return value [0] and parameters [1..]. 333 std::vector<Type> Types; 334 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. 335 int PolymorphicKeyType; 336 /// The local variables defined. 337 std::map<std::string, Variable> Variables; 338 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 339 bool NeededEarly; 340 /// UseMacro - set if we should implement using a macro or unset for a 341 /// function. 342 bool UseMacro; 343 /// The set of intrinsics that this intrinsic uses/requires. 344 std::set<Intrinsic *> Dependencies; 345 /// The "base type", which is Type('d', OutTS). InBaseType is only 346 /// different if CartesianProductOfTypes = 1 (for vreinterpret). 347 Type BaseType, InBaseType; 348 /// The return variable. 349 Variable RetVar; 350 /// A postfix to apply to every variable. Defaults to "". 351 std::string VariablePostfix; 352 353 NeonEmitter &Emitter; 354 std::stringstream OS; 355 356 bool isBigEndianSafe() const { 357 if (BigEndianSafe) 358 return true; 359 360 for (const auto &T : Types){ 361 if (T.isVector() && T.getNumElements() > 1) 362 return false; 363 } 364 return true; 365 } 366 367 public: 368 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 369 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 370 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 371 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), 372 Guard(Guard.str()), IsUnavailable(IsUnavailable), 373 BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), 374 UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), 375 Emitter(Emitter) { 376 // Modify the TypeSpec per-argument to get a concrete Type, and create 377 // known variables for each. 378 // Types[0] is the return value. 379 unsigned Pos = 0; 380 Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); 381 StringRef Mods = getNextModifiers(Proto, Pos); 382 while (!Mods.empty()) { 383 Types.emplace_back(InTS, Mods); 384 if (Mods.find("!") != StringRef::npos) 385 PolymorphicKeyType = Types.size() - 1; 386 387 Mods = getNextModifiers(Proto, Pos); 388 } 389 390 for (auto Type : Types) { 391 // If this builtin takes an immediate argument, we need to #define it rather 392 // than use a standard declaration, so that SemaChecking can range check 393 // the immediate passed by the user. 394 395 // Pointer arguments need to use macros to avoid hiding aligned attributes 396 // from the pointer type. 397 398 // It is not permitted to pass or return an __fp16 by value, so intrinsics 399 // taking a scalar float16_t must be implemented as macros. 400 if (Type.isImmediate() || Type.isPointer() || 401 (Type.isScalar() && Type.isHalf())) 402 UseMacro = true; 403 } 404 } 405 406 /// Get the Record that this intrinsic is based off. 407 Record *getRecord() const { return R; } 408 /// Get the set of Intrinsics that this intrinsic calls. 409 /// this is the set of immediate dependencies, NOT the 410 /// transitive closure. 411 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 412 /// Get the architectural guard string (#ifdef). 413 std::string getGuard() const { return Guard; } 414 /// Get the non-mangled name. 415 std::string getName() const { return Name; } 416 417 /// Return true if the intrinsic takes an immediate operand. 418 bool hasImmediate() const { 419 return std::any_of(Types.begin(), Types.end(), 420 [](const Type &T) { return T.isImmediate(); }); 421 } 422 423 /// Return the parameter index of the immediate operand. 424 unsigned getImmediateIdx() const { 425 for (unsigned Idx = 0; Idx < Types.size(); ++Idx) 426 if (Types[Idx].isImmediate()) 427 return Idx - 1; 428 llvm_unreachable("Intrinsic has no immediate"); 429 } 430 431 432 unsigned getNumParams() const { return Types.size() - 1; } 433 Type getReturnType() const { return Types[0]; } 434 Type getParamType(unsigned I) const { return Types[I + 1]; } 435 Type getBaseType() const { return BaseType; } 436 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } 437 438 /// Return true if the prototype has a scalar argument. 439 bool protoHasScalar() const; 440 441 /// Return the index that parameter PIndex will sit at 442 /// in a generated function call. This is often just PIndex, 443 /// but may not be as things such as multiple-vector operands 444 /// and sret parameters need to be taken into accont. 445 unsigned getGeneratedParamIdx(unsigned PIndex) { 446 unsigned Idx = 0; 447 if (getReturnType().getNumVectors() > 1) 448 // Multiple vectors are passed as sret. 449 ++Idx; 450 451 for (unsigned I = 0; I < PIndex; ++I) 452 Idx += std::max(1U, getParamType(I).getNumVectors()); 453 454 return Idx; 455 } 456 457 bool hasBody() const { return Body && !Body->getValues().empty(); } 458 459 void setNeededEarly() { NeededEarly = true; } 460 461 bool operator<(const Intrinsic &Other) const { 462 // Sort lexicographically on a two-tuple (Guard, Name) 463 if (Guard != Other.Guard) 464 return Guard < Other.Guard; 465 return Name < Other.Name; 466 } 467 468 ClassKind getClassKind(bool UseClassBIfScalar = false) { 469 if (UseClassBIfScalar && !protoHasScalar()) 470 return ClassB; 471 return CK; 472 } 473 474 /// Return the name, mangled with type information. 475 /// If ForceClassS is true, use ClassS (u32/s32) instead 476 /// of the intrinsic's own type class. 477 std::string getMangledName(bool ForceClassS = false) const; 478 /// Return the type code for a builtin function call. 479 std::string getInstTypeCode(Type T, ClassKind CK) const; 480 /// Return the type string for a BUILTIN() macro in Builtins.def. 481 std::string getBuiltinTypeStr(); 482 483 /// Generate the intrinsic, returning code. 484 std::string generate(); 485 /// Perform type checking and populate the dependency graph, but 486 /// don't generate code yet. 487 void indexBody(); 488 489 private: 490 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; 491 492 std::string mangleName(std::string Name, ClassKind CK) const; 493 494 void initVariables(); 495 std::string replaceParamsIn(std::string S); 496 497 void emitBodyAsBuiltinCall(); 498 499 void generateImpl(bool ReverseArguments, 500 StringRef NamePrefix, StringRef CallPrefix); 501 void emitReturn(); 502 void emitBody(StringRef CallPrefix); 503 void emitShadowedArgs(); 504 void emitArgumentReversal(); 505 void emitReturnReversal(); 506 void emitReverseVariable(Variable &Dest, Variable &Src); 507 void emitNewLine(); 508 void emitClosingBrace(); 509 void emitOpeningBrace(); 510 void emitPrototype(StringRef NamePrefix); 511 512 class DagEmitter { 513 Intrinsic &Intr; 514 StringRef CallPrefix; 515 516 public: 517 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 518 Intr(Intr), CallPrefix(CallPrefix) { 519 } 520 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 521 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 522 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 523 std::pair<Type, std::string> emitDagDup(DagInit *DI); 524 std::pair<Type, std::string> emitDagDupTyped(DagInit *DI); 525 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 526 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 527 std::pair<Type, std::string> emitDagCall(DagInit *DI, 528 bool MatchMangledName); 529 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 530 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 531 std::pair<Type, std::string> emitDagOp(DagInit *DI); 532 std::pair<Type, std::string> emitDag(DagInit *DI); 533 }; 534 }; 535 536 //===----------------------------------------------------------------------===// 537 // NeonEmitter 538 //===----------------------------------------------------------------------===// 539 540 class NeonEmitter { 541 RecordKeeper &Records; 542 DenseMap<Record *, ClassKind> ClassMap; 543 std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; 544 unsigned UniqueNumber; 545 546 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 547 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 548 void genOverloadTypeCheckCode(raw_ostream &OS, 549 SmallVectorImpl<Intrinsic *> &Defs); 550 void genIntrinsicRangeCheckCode(raw_ostream &OS, 551 SmallVectorImpl<Intrinsic *> &Defs); 552 553 public: 554 /// Called by Intrinsic - this attempts to get an intrinsic that takes 555 /// the given types as arguments. 556 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types, 557 Optional<std::string> MangledName); 558 559 /// Called by Intrinsic - returns a globally-unique number. 560 unsigned getUniqueNumber() { return UniqueNumber++; } 561 562 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 563 Record *SI = R.getClass("SInst"); 564 Record *II = R.getClass("IInst"); 565 Record *WI = R.getClass("WInst"); 566 Record *SOpI = R.getClass("SOpInst"); 567 Record *IOpI = R.getClass("IOpInst"); 568 Record *WOpI = R.getClass("WOpInst"); 569 Record *LOpI = R.getClass("LOpInst"); 570 Record *NoTestOpI = R.getClass("NoTestOpInst"); 571 572 ClassMap[SI] = ClassS; 573 ClassMap[II] = ClassI; 574 ClassMap[WI] = ClassW; 575 ClassMap[SOpI] = ClassS; 576 ClassMap[IOpI] = ClassI; 577 ClassMap[WOpI] = ClassW; 578 ClassMap[LOpI] = ClassL; 579 ClassMap[NoTestOpI] = ClassNoTest; 580 } 581 582 // run - Emit arm_neon.h.inc 583 void run(raw_ostream &o); 584 585 // runFP16 - Emit arm_fp16.h.inc 586 void runFP16(raw_ostream &o); 587 588 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 589 // and arm_fp16.h 590 void runHeader(raw_ostream &o); 591 592 // runTests - Emit tests for all the Neon intrinsics. 593 void runTests(raw_ostream &o); 594 }; 595 596 } // end anonymous namespace 597 598 //===----------------------------------------------------------------------===// 599 // Type implementation 600 //===----------------------------------------------------------------------===// 601 602 std::string Type::str() const { 603 if (isVoid()) 604 return "void"; 605 std::string S; 606 607 if (isInteger() && !isSigned()) 608 S += "u"; 609 610 if (isPoly()) 611 S += "poly"; 612 else if (isFloating()) 613 S += "float"; 614 else 615 S += "int"; 616 617 S += utostr(ElementBitwidth); 618 if (isVector()) 619 S += "x" + utostr(getNumElements()); 620 if (NumVectors > 1) 621 S += "x" + utostr(NumVectors); 622 S += "_t"; 623 624 if (Constant) 625 S += " const"; 626 if (Pointer) 627 S += " *"; 628 629 return S; 630 } 631 632 std::string Type::builtin_str() const { 633 std::string S; 634 if (isVoid()) 635 return "v"; 636 637 if (isPointer()) { 638 // All pointers are void pointers. 639 S = "v"; 640 if (isConstPointer()) 641 S += "C"; 642 S += "*"; 643 return S; 644 } else if (isInteger()) 645 switch (ElementBitwidth) { 646 case 8: S += "c"; break; 647 case 16: S += "s"; break; 648 case 32: S += "i"; break; 649 case 64: S += "Wi"; break; 650 case 128: S += "LLLi"; break; 651 default: llvm_unreachable("Unhandled case!"); 652 } 653 else 654 switch (ElementBitwidth) { 655 case 16: S += "h"; break; 656 case 32: S += "f"; break; 657 case 64: S += "d"; break; 658 default: llvm_unreachable("Unhandled case!"); 659 } 660 661 // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? 662 if (isChar() && !isPointer() && isSigned()) 663 // Make chars explicitly signed. 664 S = "S" + S; 665 else if (isInteger() && !isSigned()) 666 S = "U" + S; 667 668 // Constant indices are "int", but have the "constant expression" modifier. 669 if (isImmediate()) { 670 assert(isInteger() && isSigned()); 671 S = "I" + S; 672 } 673 674 if (isScalar()) 675 return S; 676 677 std::string Ret; 678 for (unsigned I = 0; I < NumVectors; ++I) 679 Ret += "V" + utostr(getNumElements()) + S; 680 681 return Ret; 682 } 683 684 unsigned Type::getNeonEnum() const { 685 unsigned Addend; 686 switch (ElementBitwidth) { 687 case 8: Addend = 0; break; 688 case 16: Addend = 1; break; 689 case 32: Addend = 2; break; 690 case 64: Addend = 3; break; 691 case 128: Addend = 4; break; 692 default: llvm_unreachable("Unhandled element bitwidth!"); 693 } 694 695 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 696 if (isPoly()) { 697 // Adjustment needed because Poly32 doesn't exist. 698 if (Addend >= 2) 699 --Addend; 700 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 701 } 702 if (isFloating()) { 703 assert(Addend != 0 && "Float8 doesn't exist!"); 704 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 705 } 706 707 if (Bitwidth == 128) 708 Base |= (unsigned)NeonTypeFlags::QuadFlag; 709 if (isInteger() && !isSigned()) 710 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 711 712 return Base; 713 } 714 715 Type Type::fromTypedefName(StringRef Name) { 716 Type T; 717 T.Kind = SInt; 718 719 if (Name.front() == 'u') { 720 T.Kind = UInt; 721 Name = Name.drop_front(); 722 } 723 724 if (Name.startswith("float")) { 725 T.Kind = Float; 726 Name = Name.drop_front(5); 727 } else if (Name.startswith("poly")) { 728 T.Kind = Poly; 729 Name = Name.drop_front(4); 730 } else { 731 assert(Name.startswith("int")); 732 Name = Name.drop_front(3); 733 } 734 735 unsigned I = 0; 736 for (I = 0; I < Name.size(); ++I) { 737 if (!isdigit(Name[I])) 738 break; 739 } 740 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 741 Name = Name.drop_front(I); 742 743 T.Bitwidth = T.ElementBitwidth; 744 T.NumVectors = 1; 745 746 if (Name.front() == 'x') { 747 Name = Name.drop_front(); 748 unsigned I = 0; 749 for (I = 0; I < Name.size(); ++I) { 750 if (!isdigit(Name[I])) 751 break; 752 } 753 unsigned NumLanes; 754 Name.substr(0, I).getAsInteger(10, NumLanes); 755 Name = Name.drop_front(I); 756 T.Bitwidth = T.ElementBitwidth * NumLanes; 757 } else { 758 // Was scalar. 759 T.NumVectors = 0; 760 } 761 if (Name.front() == 'x') { 762 Name = Name.drop_front(); 763 unsigned I = 0; 764 for (I = 0; I < Name.size(); ++I) { 765 if (!isdigit(Name[I])) 766 break; 767 } 768 Name.substr(0, I).getAsInteger(10, T.NumVectors); 769 Name = Name.drop_front(I); 770 } 771 772 assert(Name.startswith("_t") && "Malformed typedef!"); 773 return T; 774 } 775 776 void Type::applyTypespec(bool &Quad) { 777 std::string S = TS; 778 ScalarForMangling = false; 779 Kind = SInt; 780 ElementBitwidth = ~0U; 781 NumVectors = 1; 782 783 for (char I : S) { 784 switch (I) { 785 case 'S': 786 ScalarForMangling = true; 787 break; 788 case 'H': 789 NoManglingQ = true; 790 Quad = true; 791 break; 792 case 'Q': 793 Quad = true; 794 break; 795 case 'P': 796 Kind = Poly; 797 break; 798 case 'U': 799 Kind = UInt; 800 break; 801 case 'c': 802 ElementBitwidth = 8; 803 break; 804 case 'h': 805 Kind = Float; 806 LLVM_FALLTHROUGH; 807 case 's': 808 ElementBitwidth = 16; 809 break; 810 case 'f': 811 Kind = Float; 812 LLVM_FALLTHROUGH; 813 case 'i': 814 ElementBitwidth = 32; 815 break; 816 case 'd': 817 Kind = Float; 818 LLVM_FALLTHROUGH; 819 case 'l': 820 ElementBitwidth = 64; 821 break; 822 case 'k': 823 ElementBitwidth = 128; 824 // Poly doesn't have a 128x1 type. 825 if (isPoly()) 826 NumVectors = 0; 827 break; 828 default: 829 llvm_unreachable("Unhandled type code!"); 830 } 831 } 832 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 833 834 Bitwidth = Quad ? 128 : 64; 835 } 836 837 void Type::applyModifiers(StringRef Mods) { 838 bool AppliedQuad = false; 839 applyTypespec(AppliedQuad); 840 841 for (char Mod : Mods) { 842 switch (Mod) { 843 case '.': 844 break; 845 case 'v': 846 Kind = Void; 847 break; 848 case 'S': 849 Kind = SInt; 850 break; 851 case 'U': 852 Kind = UInt; 853 break; 854 case 'F': 855 Kind = Float; 856 break; 857 case 'P': 858 Kind = Poly; 859 break; 860 case '>': 861 assert(ElementBitwidth < 128); 862 ElementBitwidth *= 2; 863 break; 864 case '<': 865 assert(ElementBitwidth > 8); 866 ElementBitwidth /= 2; 867 break; 868 case '1': 869 NumVectors = 0; 870 break; 871 case '2': 872 NumVectors = 2; 873 break; 874 case '3': 875 NumVectors = 3; 876 break; 877 case '4': 878 NumVectors = 4; 879 break; 880 case '*': 881 Pointer = true; 882 break; 883 case 'c': 884 Constant = true; 885 break; 886 case 'Q': 887 Bitwidth = 128; 888 break; 889 case 'q': 890 Bitwidth = 64; 891 break; 892 case 'I': 893 Kind = SInt; 894 ElementBitwidth = Bitwidth = 32; 895 NumVectors = 0; 896 Immediate = true; 897 break; 898 case 'p': 899 if (isPoly()) 900 Kind = UInt; 901 break; 902 case '!': 903 // Key type, handled elsewhere. 904 break; 905 default: 906 llvm_unreachable("Unhandled character!"); 907 } 908 } 909 } 910 911 //===----------------------------------------------------------------------===// 912 // Intrinsic implementation 913 //===----------------------------------------------------------------------===// 914 915 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { 916 if (Proto.size() == Pos) 917 return StringRef(); 918 else if (Proto[Pos] != '(') 919 return Proto.substr(Pos++, 1); 920 921 size_t Start = Pos + 1; 922 size_t End = Proto.find(')', Start); 923 assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); 924 Pos = End + 1; 925 return Proto.slice(Start, End); 926 } 927 928 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { 929 char typeCode = '\0'; 930 bool printNumber = true; 931 932 if (CK == ClassB) 933 return ""; 934 935 if (T.isPoly()) 936 typeCode = 'p'; 937 else if (T.isInteger()) 938 typeCode = T.isSigned() ? 's' : 'u'; 939 else 940 typeCode = 'f'; 941 942 if (CK == ClassI) { 943 switch (typeCode) { 944 default: 945 break; 946 case 's': 947 case 'u': 948 case 'p': 949 typeCode = 'i'; 950 break; 951 } 952 } 953 if (CK == ClassB) { 954 typeCode = '\0'; 955 } 956 957 std::string S; 958 if (typeCode != '\0') 959 S.push_back(typeCode); 960 if (printNumber) 961 S += utostr(T.getElementSizeInBits()); 962 963 return S; 964 } 965 966 std::string Intrinsic::getBuiltinTypeStr() { 967 ClassKind LocalCK = getClassKind(true); 968 std::string S; 969 970 Type RetT = getReturnType(); 971 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 972 !RetT.isFloating()) 973 RetT.makeInteger(RetT.getElementSizeInBits(), false); 974 975 // Since the return value must be one type, return a vector type of the 976 // appropriate width which we will bitcast. An exception is made for 977 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 978 // fashion, storing them to a pointer arg. 979 if (RetT.getNumVectors() > 1) { 980 S += "vv*"; // void result with void* first argument 981 } else { 982 if (RetT.isPoly()) 983 RetT.makeInteger(RetT.getElementSizeInBits(), false); 984 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) 985 RetT.makeSigned(); 986 987 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) 988 // Cast to vector of 8-bit elements. 989 RetT.makeInteger(8, true); 990 991 S += RetT.builtin_str(); 992 } 993 994 for (unsigned I = 0; I < getNumParams(); ++I) { 995 Type T = getParamType(I); 996 if (T.isPoly()) 997 T.makeInteger(T.getElementSizeInBits(), false); 998 999 if (LocalCK == ClassB && !T.isScalar()) 1000 T.makeInteger(8, true); 1001 // Halves always get converted to 8-bit elements. 1002 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1003 T.makeInteger(8, true); 1004 1005 if (LocalCK == ClassI && T.isInteger()) 1006 T.makeSigned(); 1007 1008 if (hasImmediate() && getImmediateIdx() == I) 1009 T.makeImmediate(32); 1010 1011 S += T.builtin_str(); 1012 } 1013 1014 // Extra constant integer to hold type class enum for this function, e.g. s8 1015 if (LocalCK == ClassB) 1016 S += "i"; 1017 1018 return S; 1019 } 1020 1021 std::string Intrinsic::getMangledName(bool ForceClassS) const { 1022 // Check if the prototype has a scalar operand with the type of the vector 1023 // elements. If not, bitcasting the args will take care of arg checking. 1024 // The actual signedness etc. will be taken care of with special enums. 1025 ClassKind LocalCK = CK; 1026 if (!protoHasScalar()) 1027 LocalCK = ClassB; 1028 1029 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1030 } 1031 1032 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { 1033 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1034 std::string S = Name; 1035 1036 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || 1037 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32") 1038 return Name; 1039 1040 if (!typeCode.empty()) { 1041 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1042 if (Name.size() >= 3 && isdigit(Name.back()) && 1043 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1044 S.insert(S.length() - 3, "_" + typeCode); 1045 else 1046 S += "_" + typeCode; 1047 } 1048 1049 if (BaseType != InBaseType) { 1050 // A reinterpret - out the input base type at the end. 1051 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1052 } 1053 1054 if (LocalCK == ClassB) 1055 S += "_v"; 1056 1057 // Insert a 'q' before the first '_' character so that it ends up before 1058 // _lane or _n on vector-scalar operations. 1059 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1060 size_t Pos = S.find('_'); 1061 S.insert(Pos, "q"); 1062 } 1063 1064 char Suffix = '\0'; 1065 if (BaseType.isScalarForMangling()) { 1066 switch (BaseType.getElementSizeInBits()) { 1067 case 8: Suffix = 'b'; break; 1068 case 16: Suffix = 'h'; break; 1069 case 32: Suffix = 's'; break; 1070 case 64: Suffix = 'd'; break; 1071 default: llvm_unreachable("Bad suffix!"); 1072 } 1073 } 1074 if (Suffix != '\0') { 1075 size_t Pos = S.find('_'); 1076 S.insert(Pos, &Suffix, 1); 1077 } 1078 1079 return S; 1080 } 1081 1082 std::string Intrinsic::replaceParamsIn(std::string S) { 1083 while (S.find('$') != std::string::npos) { 1084 size_t Pos = S.find('$'); 1085 size_t End = Pos + 1; 1086 while (isalpha(S[End])) 1087 ++End; 1088 1089 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1090 assert_with_loc(Variables.find(VarName) != Variables.end(), 1091 "Variable not defined!"); 1092 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1093 } 1094 1095 return S; 1096 } 1097 1098 void Intrinsic::initVariables() { 1099 Variables.clear(); 1100 1101 // Modify the TypeSpec per-argument to get a concrete Type, and create 1102 // known variables for each. 1103 for (unsigned I = 1; I < Types.size(); ++I) { 1104 char NameC = '0' + (I - 1); 1105 std::string Name = "p"; 1106 Name.push_back(NameC); 1107 1108 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1109 } 1110 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1111 } 1112 1113 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1114 if (UseMacro) 1115 OS << "#define "; 1116 else 1117 OS << "__ai " << Types[0].str() << " "; 1118 1119 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1120 1121 for (unsigned I = 0; I < getNumParams(); ++I) { 1122 if (I != 0) 1123 OS << ", "; 1124 1125 char NameC = '0' + I; 1126 std::string Name = "p"; 1127 Name.push_back(NameC); 1128 assert(Variables.find(Name) != Variables.end()); 1129 Variable &V = Variables[Name]; 1130 1131 if (!UseMacro) 1132 OS << V.getType().str() << " "; 1133 OS << V.getName(); 1134 } 1135 1136 OS << ")"; 1137 } 1138 1139 void Intrinsic::emitOpeningBrace() { 1140 if (UseMacro) 1141 OS << " __extension__ ({"; 1142 else 1143 OS << " {"; 1144 emitNewLine(); 1145 } 1146 1147 void Intrinsic::emitClosingBrace() { 1148 if (UseMacro) 1149 OS << "})"; 1150 else 1151 OS << "}"; 1152 } 1153 1154 void Intrinsic::emitNewLine() { 1155 if (UseMacro) 1156 OS << " \\\n"; 1157 else 1158 OS << "\n"; 1159 } 1160 1161 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1162 if (Dest.getType().getNumVectors() > 1) { 1163 emitNewLine(); 1164 1165 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1166 OS << " " << Dest.getName() << ".val[" << K << "] = " 1167 << "__builtin_shufflevector(" 1168 << Src.getName() << ".val[" << K << "], " 1169 << Src.getName() << ".val[" << K << "]"; 1170 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1171 OS << ", " << J; 1172 OS << ");"; 1173 emitNewLine(); 1174 } 1175 } else { 1176 OS << " " << Dest.getName() 1177 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1178 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1179 OS << ", " << J; 1180 OS << ");"; 1181 emitNewLine(); 1182 } 1183 } 1184 1185 void Intrinsic::emitArgumentReversal() { 1186 if (isBigEndianSafe()) 1187 return; 1188 1189 // Reverse all vector arguments. 1190 for (unsigned I = 0; I < getNumParams(); ++I) { 1191 std::string Name = "p" + utostr(I); 1192 std::string NewName = "rev" + utostr(I); 1193 1194 Variable &V = Variables[Name]; 1195 Variable NewV(V.getType(), NewName + VariablePostfix); 1196 1197 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1198 continue; 1199 1200 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1201 emitReverseVariable(NewV, V); 1202 V = NewV; 1203 } 1204 } 1205 1206 void Intrinsic::emitReturnReversal() { 1207 if (isBigEndianSafe()) 1208 return; 1209 if (!getReturnType().isVector() || getReturnType().isVoid() || 1210 getReturnType().getNumElements() == 1) 1211 return; 1212 emitReverseVariable(RetVar, RetVar); 1213 } 1214 1215 void Intrinsic::emitShadowedArgs() { 1216 // Macro arguments are not type-checked like inline function arguments, 1217 // so assign them to local temporaries to get the right type checking. 1218 if (!UseMacro) 1219 return; 1220 1221 for (unsigned I = 0; I < getNumParams(); ++I) { 1222 // Do not create a temporary for an immediate argument. 1223 // That would defeat the whole point of using a macro! 1224 if (getParamType(I).isImmediate()) 1225 continue; 1226 // Do not create a temporary for pointer arguments. The input 1227 // pointer may have an alignment hint. 1228 if (getParamType(I).isPointer()) 1229 continue; 1230 1231 std::string Name = "p" + utostr(I); 1232 1233 assert(Variables.find(Name) != Variables.end()); 1234 Variable &V = Variables[Name]; 1235 1236 std::string NewName = "s" + utostr(I); 1237 Variable V2(V.getType(), NewName + VariablePostfix); 1238 1239 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1240 << V.getName() << ";"; 1241 emitNewLine(); 1242 1243 V = V2; 1244 } 1245 } 1246 1247 bool Intrinsic::protoHasScalar() const { 1248 return std::any_of(Types.begin(), Types.end(), [](const Type &T) { 1249 return T.isScalar() && !T.isImmediate(); 1250 }); 1251 } 1252 1253 void Intrinsic::emitBodyAsBuiltinCall() { 1254 std::string S; 1255 1256 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1257 // sret-like argument. 1258 bool SRet = getReturnType().getNumVectors() >= 2; 1259 1260 StringRef N = Name; 1261 ClassKind LocalCK = CK; 1262 if (!protoHasScalar()) 1263 LocalCK = ClassB; 1264 1265 if (!getReturnType().isVoid() && !SRet) 1266 S += "(" + RetVar.getType().str() + ") "; 1267 1268 S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "("; 1269 1270 if (SRet) 1271 S += "&" + RetVar.getName() + ", "; 1272 1273 for (unsigned I = 0; I < getNumParams(); ++I) { 1274 Variable &V = Variables["p" + utostr(I)]; 1275 Type T = V.getType(); 1276 1277 // Handle multiple-vector values specially, emitting each subvector as an 1278 // argument to the builtin. 1279 if (T.getNumVectors() > 1) { 1280 // Check if an explicit cast is needed. 1281 std::string Cast; 1282 if (LocalCK == ClassB) { 1283 Type T2 = T; 1284 T2.makeOneVector(); 1285 T2.makeInteger(8, /*Signed=*/true); 1286 Cast = "(" + T2.str() + ")"; 1287 } 1288 1289 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1290 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1291 continue; 1292 } 1293 1294 std::string Arg = V.getName(); 1295 Type CastToType = T; 1296 1297 // Check if an explicit cast is needed. 1298 if (CastToType.isVector() && 1299 (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) { 1300 CastToType.makeInteger(8, true); 1301 Arg = "(" + CastToType.str() + ")" + Arg; 1302 } else if (CastToType.isVector() && LocalCK == ClassI) { 1303 if (CastToType.isInteger()) 1304 CastToType.makeSigned(); 1305 Arg = "(" + CastToType.str() + ")" + Arg; 1306 } 1307 1308 S += Arg + ", "; 1309 } 1310 1311 // Extra constant integer to hold type class enum for this function, e.g. s8 1312 if (getClassKind(true) == ClassB) { 1313 S += utostr(getPolymorphicKeyType().getNeonEnum()); 1314 } else { 1315 // Remove extraneous ", ". 1316 S.pop_back(); 1317 S.pop_back(); 1318 } 1319 S += ");"; 1320 1321 std::string RetExpr; 1322 if (!SRet && !RetVar.getType().isVoid()) 1323 RetExpr = RetVar.getName() + " = "; 1324 1325 OS << " " << RetExpr << S; 1326 emitNewLine(); 1327 } 1328 1329 void Intrinsic::emitBody(StringRef CallPrefix) { 1330 std::vector<std::string> Lines; 1331 1332 assert(RetVar.getType() == Types[0]); 1333 // Create a return variable, if we're not void. 1334 if (!RetVar.getType().isVoid()) { 1335 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1336 emitNewLine(); 1337 } 1338 1339 if (!Body || Body->getValues().empty()) { 1340 // Nothing specific to output - must output a builtin. 1341 emitBodyAsBuiltinCall(); 1342 return; 1343 } 1344 1345 // We have a list of "things to output". The last should be returned. 1346 for (auto *I : Body->getValues()) { 1347 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1348 Lines.push_back(replaceParamsIn(SI->getAsString())); 1349 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1350 DagEmitter DE(*this, CallPrefix); 1351 Lines.push_back(DE.emitDag(DI).second + ";"); 1352 } 1353 } 1354 1355 assert(!Lines.empty() && "Empty def?"); 1356 if (!RetVar.getType().isVoid()) 1357 Lines.back().insert(0, RetVar.getName() + " = "); 1358 1359 for (auto &L : Lines) { 1360 OS << " " << L; 1361 emitNewLine(); 1362 } 1363 } 1364 1365 void Intrinsic::emitReturn() { 1366 if (RetVar.getType().isVoid()) 1367 return; 1368 if (UseMacro) 1369 OS << " " << RetVar.getName() << ";"; 1370 else 1371 OS << " return " << RetVar.getName() << ";"; 1372 emitNewLine(); 1373 } 1374 1375 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1376 // At this point we should only be seeing a def. 1377 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1378 std::string Op = DefI->getAsString(); 1379 1380 if (Op == "cast" || Op == "bitcast") 1381 return emitDagCast(DI, Op == "bitcast"); 1382 if (Op == "shuffle") 1383 return emitDagShuffle(DI); 1384 if (Op == "dup") 1385 return emitDagDup(DI); 1386 if (Op == "dup_typed") 1387 return emitDagDupTyped(DI); 1388 if (Op == "splat") 1389 return emitDagSplat(DI); 1390 if (Op == "save_temp") 1391 return emitDagSaveTemp(DI); 1392 if (Op == "op") 1393 return emitDagOp(DI); 1394 if (Op == "call" || Op == "call_mangled") 1395 return emitDagCall(DI, Op == "call_mangled"); 1396 if (Op == "name_replace") 1397 return emitDagNameReplace(DI); 1398 if (Op == "literal") 1399 return emitDagLiteral(DI); 1400 assert_with_loc(false, "Unknown operation!"); 1401 return std::make_pair(Type::getVoid(), ""); 1402 } 1403 1404 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1405 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1406 if (DI->getNumArgs() == 2) { 1407 // Unary op. 1408 std::pair<Type, std::string> R = 1409 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1410 return std::make_pair(R.first, Op + R.second); 1411 } else { 1412 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1413 std::pair<Type, std::string> R1 = 1414 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1415 std::pair<Type, std::string> R2 = 1416 emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2))); 1417 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1418 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1419 } 1420 } 1421 1422 std::pair<Type, std::string> 1423 Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) { 1424 std::vector<Type> Types; 1425 std::vector<std::string> Values; 1426 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1427 std::pair<Type, std::string> R = 1428 emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1))); 1429 Types.push_back(R.first); 1430 Values.push_back(R.second); 1431 } 1432 1433 // Look up the called intrinsic. 1434 std::string N; 1435 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1436 N = SI->getAsUnquotedString(); 1437 else 1438 N = emitDagArg(DI->getArg(0), "").second; 1439 Optional<std::string> MangledName; 1440 if (MatchMangledName) { 1441 if (Intr.getRecord()->getValueAsBit("isLaneQ")) 1442 N += "q"; 1443 MangledName = Intr.mangleName(N, ClassS); 1444 } 1445 Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName); 1446 1447 // Make sure the callee is known as an early def. 1448 Callee.setNeededEarly(); 1449 Intr.Dependencies.insert(&Callee); 1450 1451 // Now create the call itself. 1452 std::string S = ""; 1453 if (!Callee.isBigEndianSafe()) 1454 S += CallPrefix.str(); 1455 S += Callee.getMangledName(true) + "("; 1456 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1457 if (I != 0) 1458 S += ", "; 1459 S += Values[I]; 1460 } 1461 S += ")"; 1462 1463 return std::make_pair(Callee.getReturnType(), S); 1464 } 1465 1466 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1467 bool IsBitCast){ 1468 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1469 std::pair<Type, std::string> R = 1470 emitDagArg(DI->getArg(DI->getNumArgs() - 1), 1471 std::string(DI->getArgNameStr(DI->getNumArgs() - 1))); 1472 Type castToType = R.first; 1473 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1474 1475 // MOD can take several forms: 1476 // 1. $X - take the type of parameter / variable X. 1477 // 2. The value "R" - take the type of the return type. 1478 // 3. a type string 1479 // 4. The value "U" or "S" to switch the signedness. 1480 // 5. The value "H" or "D" to half or double the bitwidth. 1481 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1482 if (!DI->getArgNameStr(ArgIdx).empty()) { 1483 assert_with_loc(Intr.Variables.find(std::string( 1484 DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(), 1485 "Variable not found"); 1486 castToType = 1487 Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType(); 1488 } else { 1489 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1490 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1491 1492 if (SI->getAsUnquotedString() == "R") { 1493 castToType = Intr.getReturnType(); 1494 } else if (SI->getAsUnquotedString() == "U") { 1495 castToType.makeUnsigned(); 1496 } else if (SI->getAsUnquotedString() == "S") { 1497 castToType.makeSigned(); 1498 } else if (SI->getAsUnquotedString() == "H") { 1499 castToType.halveLanes(); 1500 } else if (SI->getAsUnquotedString() == "D") { 1501 castToType.doubleLanes(); 1502 } else if (SI->getAsUnquotedString() == "8") { 1503 castToType.makeInteger(8, true); 1504 } else if (SI->getAsUnquotedString() == "32") { 1505 castToType.make32BitElement(); 1506 } else { 1507 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1508 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1509 } 1510 } 1511 } 1512 1513 std::string S; 1514 if (IsBitCast) { 1515 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1516 // a temporary. 1517 std::string N = "reint"; 1518 unsigned I = 0; 1519 while (Intr.Variables.find(N) != Intr.Variables.end()) 1520 N = "reint" + utostr(++I); 1521 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1522 1523 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1524 << R.second << ";"; 1525 Intr.emitNewLine(); 1526 1527 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1528 } else { 1529 // Emit a normal (static) cast. 1530 S = "(" + castToType.str() + ")(" + R.second + ")"; 1531 } 1532 1533 return std::make_pair(castToType, S); 1534 } 1535 1536 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1537 // See the documentation in arm_neon.td for a description of these operators. 1538 class LowHalf : public SetTheory::Operator { 1539 public: 1540 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1541 ArrayRef<SMLoc> Loc) override { 1542 SetTheory::RecSet Elts2; 1543 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1544 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1545 } 1546 }; 1547 1548 class HighHalf : public SetTheory::Operator { 1549 public: 1550 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1551 ArrayRef<SMLoc> Loc) override { 1552 SetTheory::RecSet Elts2; 1553 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1554 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1555 } 1556 }; 1557 1558 class Rev : public SetTheory::Operator { 1559 unsigned ElementSize; 1560 1561 public: 1562 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1563 1564 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1565 ArrayRef<SMLoc> Loc) override { 1566 SetTheory::RecSet Elts2; 1567 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1568 1569 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1570 VectorSize /= ElementSize; 1571 1572 std::vector<Record *> Revved; 1573 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1574 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1575 Revved.push_back(Elts2[VI + LI]); 1576 } 1577 } 1578 1579 Elts.insert(Revved.begin(), Revved.end()); 1580 } 1581 }; 1582 1583 class MaskExpander : public SetTheory::Expander { 1584 unsigned N; 1585 1586 public: 1587 MaskExpander(unsigned N) : N(N) {} 1588 1589 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1590 unsigned Addend = 0; 1591 if (R->getName() == "mask0") 1592 Addend = 0; 1593 else if (R->getName() == "mask1") 1594 Addend = N; 1595 else 1596 return; 1597 for (unsigned I = 0; I < N; ++I) 1598 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1599 } 1600 }; 1601 1602 // (shuffle arg1, arg2, sequence) 1603 std::pair<Type, std::string> Arg1 = 1604 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1605 std::pair<Type, std::string> Arg2 = 1606 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1607 assert_with_loc(Arg1.first == Arg2.first, 1608 "Different types in arguments to shuffle!"); 1609 1610 SetTheory ST; 1611 SetTheory::RecSet Elts; 1612 ST.addOperator("lowhalf", std::make_unique<LowHalf>()); 1613 ST.addOperator("highhalf", std::make_unique<HighHalf>()); 1614 ST.addOperator("rev", 1615 std::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1616 ST.addExpander("MaskExpand", 1617 std::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1618 ST.evaluate(DI->getArg(2), Elts, None); 1619 1620 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1621 for (auto &E : Elts) { 1622 StringRef Name = E->getName(); 1623 assert_with_loc(Name.startswith("sv"), 1624 "Incorrect element kind in shuffle mask!"); 1625 S += ", " + Name.drop_front(2).str(); 1626 } 1627 S += ")"; 1628 1629 // Recalculate the return type - the shuffle may have halved or doubled it. 1630 Type T(Arg1.first); 1631 if (Elts.size() > T.getNumElements()) { 1632 assert_with_loc( 1633 Elts.size() == T.getNumElements() * 2, 1634 "Can only double or half the number of elements in a shuffle!"); 1635 T.doubleLanes(); 1636 } else if (Elts.size() < T.getNumElements()) { 1637 assert_with_loc( 1638 Elts.size() == T.getNumElements() / 2, 1639 "Can only double or half the number of elements in a shuffle!"); 1640 T.halveLanes(); 1641 } 1642 1643 return std::make_pair(T, S); 1644 } 1645 1646 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1647 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1648 std::pair<Type, std::string> A = 1649 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1650 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1651 1652 Type T = Intr.getBaseType(); 1653 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1654 std::string S = "(" + T.str() + ") {"; 1655 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1656 if (I != 0) 1657 S += ", "; 1658 S += A.second; 1659 } 1660 S += "}"; 1661 1662 return std::make_pair(T, S); 1663 } 1664 1665 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) { 1666 assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments"); 1667 std::pair<Type, std::string> A = 1668 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1669 std::pair<Type, std::string> B = 1670 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1671 assert_with_loc(B.first.isScalar(), 1672 "dup_typed() requires a scalar as the second argument"); 1673 1674 Type T = A.first; 1675 assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!"); 1676 std::string S = "(" + T.str() + ") {"; 1677 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1678 if (I != 0) 1679 S += ", "; 1680 S += B.second; 1681 } 1682 S += "}"; 1683 1684 return std::make_pair(T, S); 1685 } 1686 1687 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1688 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1689 std::pair<Type, std::string> A = 1690 emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))); 1691 std::pair<Type, std::string> B = 1692 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1693 1694 assert_with_loc(B.first.isScalar(), 1695 "splat() requires a scalar int as the second argument"); 1696 1697 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1698 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1699 S += ", " + B.second; 1700 } 1701 S += ")"; 1702 1703 return std::make_pair(Intr.getBaseType(), S); 1704 } 1705 1706 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1707 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1708 std::pair<Type, std::string> A = 1709 emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1))); 1710 1711 assert_with_loc(!A.first.isVoid(), 1712 "Argument to save_temp() must have non-void type!"); 1713 1714 std::string N = std::string(DI->getArgNameStr(0)); 1715 assert_with_loc(!N.empty(), 1716 "save_temp() expects a name as the first argument"); 1717 1718 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1719 "Variable already defined!"); 1720 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1721 1722 std::string S = 1723 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1724 1725 return std::make_pair(Type::getVoid(), S); 1726 } 1727 1728 std::pair<Type, std::string> 1729 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1730 std::string S = Intr.Name; 1731 1732 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1733 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1734 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1735 1736 size_t Idx = S.find(ToReplace); 1737 1738 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1739 S.replace(Idx, ToReplace.size(), ReplaceWith); 1740 1741 return std::make_pair(Type::getVoid(), S); 1742 } 1743 1744 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1745 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1746 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1747 return std::make_pair(Type::fromTypedefName(Ty), Value); 1748 } 1749 1750 std::pair<Type, std::string> 1751 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1752 if (!ArgName.empty()) { 1753 assert_with_loc(!Arg->isComplete(), 1754 "Arguments must either be DAGs or names, not both!"); 1755 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1756 "Variable not defined!"); 1757 Variable &V = Intr.Variables[ArgName]; 1758 return std::make_pair(V.getType(), V.getName()); 1759 } 1760 1761 assert(Arg && "Neither ArgName nor Arg?!"); 1762 DagInit *DI = dyn_cast<DagInit>(Arg); 1763 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1764 1765 return emitDag(DI); 1766 } 1767 1768 std::string Intrinsic::generate() { 1769 // Avoid duplicated code for big and little endian 1770 if (isBigEndianSafe()) { 1771 generateImpl(false, "", ""); 1772 return OS.str(); 1773 } 1774 // Little endian intrinsics are simple and don't require any argument 1775 // swapping. 1776 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1777 1778 generateImpl(false, "", ""); 1779 1780 OS << "#else\n"; 1781 1782 // Big endian intrinsics are more complex. The user intended these 1783 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1784 // but we load as-if (V)LD1. So we should swap all arguments and 1785 // swap the return value too. 1786 // 1787 // If we call sub-intrinsics, we should call a version that does 1788 // not re-swap the arguments! 1789 generateImpl(true, "", "__noswap_"); 1790 1791 // If we're needed early, create a non-swapping variant for 1792 // big-endian. 1793 if (NeededEarly) { 1794 generateImpl(false, "__noswap_", "__noswap_"); 1795 } 1796 OS << "#endif\n\n"; 1797 1798 return OS.str(); 1799 } 1800 1801 void Intrinsic::generateImpl(bool ReverseArguments, 1802 StringRef NamePrefix, StringRef CallPrefix) { 1803 CurrentRecord = R; 1804 1805 // If we call a macro, our local variables may be corrupted due to 1806 // lack of proper lexical scoping. So, add a globally unique postfix 1807 // to every variable. 1808 // 1809 // indexBody() should have set up the Dependencies set by now. 1810 for (auto *I : Dependencies) 1811 if (I->UseMacro) { 1812 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1813 break; 1814 } 1815 1816 initVariables(); 1817 1818 emitPrototype(NamePrefix); 1819 1820 if (IsUnavailable) { 1821 OS << " __attribute__((unavailable));"; 1822 } else { 1823 emitOpeningBrace(); 1824 emitShadowedArgs(); 1825 if (ReverseArguments) 1826 emitArgumentReversal(); 1827 emitBody(CallPrefix); 1828 if (ReverseArguments) 1829 emitReturnReversal(); 1830 emitReturn(); 1831 emitClosingBrace(); 1832 } 1833 OS << "\n"; 1834 1835 CurrentRecord = nullptr; 1836 } 1837 1838 void Intrinsic::indexBody() { 1839 CurrentRecord = R; 1840 1841 initVariables(); 1842 emitBody(""); 1843 OS.str(""); 1844 1845 CurrentRecord = nullptr; 1846 } 1847 1848 //===----------------------------------------------------------------------===// 1849 // NeonEmitter implementation 1850 //===----------------------------------------------------------------------===// 1851 1852 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types, 1853 Optional<std::string> MangledName) { 1854 // First, look up the name in the intrinsic map. 1855 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1856 ("Intrinsic '" + Name + "' not found!").str()); 1857 auto &V = IntrinsicMap.find(Name.str())->second; 1858 std::vector<Intrinsic *> GoodVec; 1859 1860 // Create a string to print if we end up failing. 1861 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1862 for (unsigned I = 0; I < Types.size(); ++I) { 1863 if (I != 0) 1864 ErrMsg += ", "; 1865 ErrMsg += Types[I].str(); 1866 } 1867 ErrMsg += ")'\n"; 1868 ErrMsg += "Available overloads:\n"; 1869 1870 // Now, look through each intrinsic implementation and see if the types are 1871 // compatible. 1872 for (auto &I : V) { 1873 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); 1874 ErrMsg += "("; 1875 for (unsigned A = 0; A < I.getNumParams(); ++A) { 1876 if (A != 0) 1877 ErrMsg += ", "; 1878 ErrMsg += I.getParamType(A).str(); 1879 } 1880 ErrMsg += ")\n"; 1881 1882 if (MangledName && MangledName != I.getMangledName(true)) 1883 continue; 1884 1885 if (I.getNumParams() != Types.size()) 1886 continue; 1887 1888 unsigned ArgNum = 0; 1889 bool MatchingArgumentTypes = 1890 std::all_of(Types.begin(), Types.end(), [&](const auto &Type) { 1891 return Type == I.getParamType(ArgNum++); 1892 }); 1893 1894 if (MatchingArgumentTypes) 1895 GoodVec.push_back(&I); 1896 } 1897 1898 assert_with_loc(!GoodVec.empty(), 1899 "No compatible intrinsic found - " + ErrMsg); 1900 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1901 1902 return *GoodVec.front(); 1903 } 1904 1905 void NeonEmitter::createIntrinsic(Record *R, 1906 SmallVectorImpl<Intrinsic *> &Out) { 1907 std::string Name = std::string(R->getValueAsString("Name")); 1908 std::string Proto = std::string(R->getValueAsString("Prototype")); 1909 std::string Types = std::string(R->getValueAsString("Types")); 1910 Record *OperationRec = R->getValueAsDef("Operation"); 1911 bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); 1912 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1913 std::string Guard = std::string(R->getValueAsString("ArchGuard")); 1914 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1915 1916 // Set the global current record. This allows assert_with_loc to produce 1917 // decent location information even when highly nested. 1918 CurrentRecord = R; 1919 1920 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1921 1922 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1923 1924 ClassKind CK = ClassNone; 1925 if (R->getSuperClasses().size() >= 2) 1926 CK = ClassMap[R->getSuperClasses()[1].first]; 1927 1928 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1929 for (auto TS : TypeSpecs) { 1930 if (CartesianProductOfTypes) { 1931 Type DefaultT(TS, "."); 1932 for (auto SrcTS : TypeSpecs) { 1933 Type DefaultSrcT(SrcTS, "."); 1934 if (TS == SrcTS || 1935 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1936 continue; 1937 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1938 } 1939 } else { 1940 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1941 } 1942 } 1943 1944 llvm::sort(NewTypeSpecs); 1945 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1946 NewTypeSpecs.end()); 1947 auto &Entry = IntrinsicMap[Name]; 1948 1949 for (auto &I : NewTypeSpecs) { 1950 Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, 1951 Guard, IsUnavailable, BigEndianSafe); 1952 Out.push_back(&Entry.back()); 1953 } 1954 1955 CurrentRecord = nullptr; 1956 } 1957 1958 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1959 /// declaration of builtins, checking for unique builtin declarations. 1960 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1961 SmallVectorImpl<Intrinsic *> &Defs) { 1962 OS << "#ifdef GET_NEON_BUILTINS\n"; 1963 1964 // We only want to emit a builtin once, and we want to emit them in 1965 // alphabetical order, so use a std::set. 1966 std::set<std::string> Builtins; 1967 1968 for (auto *Def : Defs) { 1969 if (Def->hasBody()) 1970 continue; 1971 1972 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 1973 1974 S += Def->getBuiltinTypeStr(); 1975 S += "\", \"n\")"; 1976 1977 Builtins.insert(S); 1978 } 1979 1980 for (auto &S : Builtins) 1981 OS << S << "\n"; 1982 OS << "#endif\n\n"; 1983 } 1984 1985 /// Generate the ARM and AArch64 overloaded type checking code for 1986 /// SemaChecking.cpp, checking for unique builtin declarations. 1987 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 1988 SmallVectorImpl<Intrinsic *> &Defs) { 1989 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1990 1991 // We record each overload check line before emitting because subsequent Inst 1992 // definitions may extend the number of permitted types (i.e. augment the 1993 // Mask). Use std::map to avoid sorting the table by hash number. 1994 struct OverloadInfo { 1995 uint64_t Mask; 1996 int PtrArgNum; 1997 bool HasConstPtr; 1998 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 1999 }; 2000 std::map<std::string, OverloadInfo> OverloadMap; 2001 2002 for (auto *Def : Defs) { 2003 // If the def has a body (that is, it has Operation DAGs), it won't call 2004 // __builtin_neon_* so we don't need to generate a definition for it. 2005 if (Def->hasBody()) 2006 continue; 2007 // Functions which have a scalar argument cannot be overloaded, no need to 2008 // check them if we are emitting the type checking code. 2009 if (Def->protoHasScalar()) 2010 continue; 2011 2012 uint64_t Mask = 0ULL; 2013 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); 2014 2015 // Check if the function has a pointer or const pointer argument. 2016 int PtrArgNum = -1; 2017 bool HasConstPtr = false; 2018 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2019 const auto &Type = Def->getParamType(I); 2020 if (Type.isPointer()) { 2021 PtrArgNum = I; 2022 HasConstPtr = Type.isConstPointer(); 2023 } 2024 } 2025 2026 // For sret builtins, adjust the pointer argument index. 2027 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2028 PtrArgNum += 1; 2029 2030 std::string Name = Def->getName(); 2031 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2032 // and vst1_lane intrinsics. Using a pointer to the vector element 2033 // type with one of those operations causes codegen to select an aligned 2034 // load/store instruction. If you want an unaligned operation, 2035 // the pointer argument needs to have less alignment than element type, 2036 // so just accept any pointer type. 2037 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2038 PtrArgNum = -1; 2039 HasConstPtr = false; 2040 } 2041 2042 if (Mask) { 2043 std::string Name = Def->getMangledName(); 2044 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2045 OverloadInfo &OI = OverloadMap[Name]; 2046 OI.Mask |= Mask; 2047 OI.PtrArgNum |= PtrArgNum; 2048 OI.HasConstPtr = HasConstPtr; 2049 } 2050 } 2051 2052 for (auto &I : OverloadMap) { 2053 OverloadInfo &OI = I.second; 2054 2055 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2056 OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; 2057 if (OI.PtrArgNum >= 0) 2058 OS << "; PtrArgNum = " << OI.PtrArgNum; 2059 if (OI.HasConstPtr) 2060 OS << "; HasConstPtr = true"; 2061 OS << "; break;\n"; 2062 } 2063 OS << "#endif\n\n"; 2064 } 2065 2066 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2067 SmallVectorImpl<Intrinsic *> &Defs) { 2068 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2069 2070 std::set<std::string> Emitted; 2071 2072 for (auto *Def : Defs) { 2073 if (Def->hasBody()) 2074 continue; 2075 // Functions which do not have an immediate do not need to have range 2076 // checking code emitted. 2077 if (!Def->hasImmediate()) 2078 continue; 2079 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2080 continue; 2081 2082 std::string LowerBound, UpperBound; 2083 2084 Record *R = Def->getRecord(); 2085 if (R->getValueAsBit("isVCVT_N")) { 2086 // VCVT between floating- and fixed-point values takes an immediate 2087 // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. 2088 LowerBound = "1"; 2089 if (Def->getBaseType().getElementSizeInBits() == 16 || 2090 Def->getName().find('h') != std::string::npos) 2091 // VCVTh operating on FP16 intrinsics in range [1, 16) 2092 UpperBound = "15"; 2093 else if (Def->getBaseType().getElementSizeInBits() == 32) 2094 UpperBound = "31"; 2095 else 2096 UpperBound = "63"; 2097 } else if (R->getValueAsBit("isScalarShift")) { 2098 // Right shifts have an 'r' in the name, left shifts do not. Convert 2099 // instructions have the same bounds and right shifts. 2100 if (Def->getName().find('r') != std::string::npos || 2101 Def->getName().find("cvt") != std::string::npos) 2102 LowerBound = "1"; 2103 2104 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2105 } else if (R->getValueAsBit("isShift")) { 2106 // Builtins which are overloaded by type will need to have their upper 2107 // bound computed at Sema time based on the type constant. 2108 2109 // Right shifts have an 'r' in the name, left shifts do not. 2110 if (Def->getName().find('r') != std::string::npos) 2111 LowerBound = "1"; 2112 UpperBound = "RFT(TV, true)"; 2113 } else if (Def->getClassKind(true) == ClassB) { 2114 // ClassB intrinsics have a type (and hence lane number) that is only 2115 // known at runtime. 2116 if (R->getValueAsBit("isLaneQ")) 2117 UpperBound = "RFT(TV, false, true)"; 2118 else 2119 UpperBound = "RFT(TV, false, false)"; 2120 } else { 2121 // The immediate generally refers to a lane in the preceding argument. 2122 assert(Def->getImmediateIdx() > 0); 2123 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2124 UpperBound = utostr(T.getNumElements() - 1); 2125 } 2126 2127 // Calculate the index of the immediate that should be range checked. 2128 unsigned Idx = Def->getNumParams(); 2129 if (Def->hasImmediate()) 2130 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2131 2132 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2133 << "i = " << Idx << ";"; 2134 if (!LowerBound.empty()) 2135 OS << " l = " << LowerBound << ";"; 2136 if (!UpperBound.empty()) 2137 OS << " u = " << UpperBound << ";"; 2138 OS << " break;\n"; 2139 2140 Emitted.insert(Def->getMangledName()); 2141 } 2142 2143 OS << "#endif\n\n"; 2144 } 2145 2146 /// runHeader - Emit a file with sections defining: 2147 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2148 /// 2. the SemaChecking code for the type overload checking. 2149 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2150 void NeonEmitter::runHeader(raw_ostream &OS) { 2151 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2152 2153 SmallVector<Intrinsic *, 128> Defs; 2154 for (auto *R : RV) 2155 createIntrinsic(R, Defs); 2156 2157 // Generate shared BuiltinsXXX.def 2158 genBuiltinsDef(OS, Defs); 2159 2160 // Generate ARM overloaded type checking code for SemaChecking.cpp 2161 genOverloadTypeCheckCode(OS, Defs); 2162 2163 // Generate ARM range checking code for shift/lane immediates. 2164 genIntrinsicRangeCheckCode(OS, Defs); 2165 } 2166 2167 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2168 /// is comprised of type definitions and function declarations. 2169 void NeonEmitter::run(raw_ostream &OS) { 2170 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2171 "------------------------------" 2172 "---===\n" 2173 " *\n" 2174 " * Permission is hereby granted, free of charge, to any person " 2175 "obtaining " 2176 "a copy\n" 2177 " * of this software and associated documentation files (the " 2178 "\"Software\")," 2179 " to deal\n" 2180 " * in the Software without restriction, including without limitation " 2181 "the " 2182 "rights\n" 2183 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2184 "and/or sell\n" 2185 " * copies of the Software, and to permit persons to whom the Software " 2186 "is\n" 2187 " * furnished to do so, subject to the following conditions:\n" 2188 " *\n" 2189 " * The above copyright notice and this permission notice shall be " 2190 "included in\n" 2191 " * all copies or substantial portions of the Software.\n" 2192 " *\n" 2193 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2194 "EXPRESS OR\n" 2195 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2196 "MERCHANTABILITY,\n" 2197 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2198 "SHALL THE\n" 2199 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2200 "OTHER\n" 2201 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2202 "ARISING FROM,\n" 2203 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2204 "DEALINGS IN\n" 2205 " * THE SOFTWARE.\n" 2206 " *\n" 2207 " *===-----------------------------------------------------------------" 2208 "---" 2209 "---===\n" 2210 " */\n\n"; 2211 2212 OS << "#ifndef __ARM_NEON_H\n"; 2213 OS << "#define __ARM_NEON_H\n\n"; 2214 2215 OS << "#if !defined(__ARM_NEON)\n"; 2216 OS << "#error \"NEON support not enabled\"\n"; 2217 OS << "#endif\n\n"; 2218 2219 OS << "#include <stdint.h>\n\n"; 2220 2221 // Emit NEON-specific scalar typedefs. 2222 OS << "typedef float float32_t;\n"; 2223 OS << "typedef __fp16 float16_t;\n"; 2224 2225 OS << "#ifdef __aarch64__\n"; 2226 OS << "typedef double float64_t;\n"; 2227 OS << "#endif\n\n"; 2228 2229 // For now, signedness of polynomial types depends on target 2230 OS << "#ifdef __aarch64__\n"; 2231 OS << "typedef uint8_t poly8_t;\n"; 2232 OS << "typedef uint16_t poly16_t;\n"; 2233 OS << "typedef uint64_t poly64_t;\n"; 2234 OS << "typedef __uint128_t poly128_t;\n"; 2235 OS << "#else\n"; 2236 OS << "typedef int8_t poly8_t;\n"; 2237 OS << "typedef int16_t poly16_t;\n"; 2238 OS << "#endif\n"; 2239 2240 // Emit Neon vector typedefs. 2241 std::string TypedefTypes( 2242 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2243 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2244 2245 // Emit vector typedefs. 2246 bool InIfdef = false; 2247 for (auto &TS : TDTypeVec) { 2248 bool IsA64 = false; 2249 Type T(TS, "."); 2250 if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) 2251 IsA64 = true; 2252 2253 if (InIfdef && !IsA64) { 2254 OS << "#endif\n"; 2255 InIfdef = false; 2256 } 2257 if (!InIfdef && IsA64) { 2258 OS << "#ifdef __aarch64__\n"; 2259 InIfdef = true; 2260 } 2261 2262 if (T.isPoly()) 2263 OS << "typedef __attribute__((neon_polyvector_type("; 2264 else 2265 OS << "typedef __attribute__((neon_vector_type("; 2266 2267 Type T2 = T; 2268 T2.makeScalar(); 2269 OS << T.getNumElements() << "))) "; 2270 OS << T2.str(); 2271 OS << " " << T.str() << ";\n"; 2272 } 2273 if (InIfdef) 2274 OS << "#endif\n"; 2275 OS << "\n"; 2276 2277 // Emit struct typedefs. 2278 InIfdef = false; 2279 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2280 for (auto &TS : TDTypeVec) { 2281 bool IsA64 = false; 2282 Type T(TS, "."); 2283 if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) 2284 IsA64 = true; 2285 2286 if (InIfdef && !IsA64) { 2287 OS << "#endif\n"; 2288 InIfdef = false; 2289 } 2290 if (!InIfdef && IsA64) { 2291 OS << "#ifdef __aarch64__\n"; 2292 InIfdef = true; 2293 } 2294 2295 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0}; 2296 Type VT(TS, Mods); 2297 OS << "typedef struct " << VT.str() << " {\n"; 2298 OS << " " << T.str() << " val"; 2299 OS << "[" << NumMembers << "]"; 2300 OS << ";\n} "; 2301 OS << VT.str() << ";\n"; 2302 OS << "\n"; 2303 } 2304 } 2305 if (InIfdef) 2306 OS << "#endif\n"; 2307 OS << "\n"; 2308 2309 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2310 "__nodebug__))\n\n"; 2311 2312 SmallVector<Intrinsic *, 128> Defs; 2313 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2314 for (auto *R : RV) 2315 createIntrinsic(R, Defs); 2316 2317 for (auto *I : Defs) 2318 I->indexBody(); 2319 2320 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2321 2322 // Only emit a def when its requirements have been met. 2323 // FIXME: This loop could be made faster, but it's fast enough for now. 2324 bool MadeProgress = true; 2325 std::string InGuard; 2326 while (!Defs.empty() && MadeProgress) { 2327 MadeProgress = false; 2328 2329 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2330 I != Defs.end(); /*No step*/) { 2331 bool DependenciesSatisfied = true; 2332 for (auto *II : (*I)->getDependencies()) { 2333 if (llvm::is_contained(Defs, II)) 2334 DependenciesSatisfied = false; 2335 } 2336 if (!DependenciesSatisfied) { 2337 // Try the next one. 2338 ++I; 2339 continue; 2340 } 2341 2342 // Emit #endif/#if pair if needed. 2343 if ((*I)->getGuard() != InGuard) { 2344 if (!InGuard.empty()) 2345 OS << "#endif\n"; 2346 InGuard = (*I)->getGuard(); 2347 if (!InGuard.empty()) 2348 OS << "#if " << InGuard << "\n"; 2349 } 2350 2351 // Actually generate the intrinsic code. 2352 OS << (*I)->generate(); 2353 2354 MadeProgress = true; 2355 I = Defs.erase(I); 2356 } 2357 } 2358 assert(Defs.empty() && "Some requirements were not satisfied!"); 2359 if (!InGuard.empty()) 2360 OS << "#endif\n"; 2361 2362 OS << "\n"; 2363 OS << "#undef __ai\n\n"; 2364 OS << "#endif /* __ARM_NEON_H */\n"; 2365 } 2366 2367 /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h 2368 /// is comprised of type definitions and function declarations. 2369 void NeonEmitter::runFP16(raw_ostream &OS) { 2370 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics " 2371 "------------------------------" 2372 "---===\n" 2373 " *\n" 2374 " * Permission is hereby granted, free of charge, to any person " 2375 "obtaining a copy\n" 2376 " * of this software and associated documentation files (the " 2377 "\"Software\"), to deal\n" 2378 " * in the Software without restriction, including without limitation " 2379 "the rights\n" 2380 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2381 "and/or sell\n" 2382 " * copies of the Software, and to permit persons to whom the Software " 2383 "is\n" 2384 " * furnished to do so, subject to the following conditions:\n" 2385 " *\n" 2386 " * The above copyright notice and this permission notice shall be " 2387 "included in\n" 2388 " * all copies or substantial portions of the Software.\n" 2389 " *\n" 2390 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2391 "EXPRESS OR\n" 2392 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2393 "MERCHANTABILITY,\n" 2394 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2395 "SHALL THE\n" 2396 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2397 "OTHER\n" 2398 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2399 "ARISING FROM,\n" 2400 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2401 "DEALINGS IN\n" 2402 " * THE SOFTWARE.\n" 2403 " *\n" 2404 " *===-----------------------------------------------------------------" 2405 "---" 2406 "---===\n" 2407 " */\n\n"; 2408 2409 OS << "#ifndef __ARM_FP16_H\n"; 2410 OS << "#define __ARM_FP16_H\n\n"; 2411 2412 OS << "#include <stdint.h>\n\n"; 2413 2414 OS << "typedef __fp16 float16_t;\n"; 2415 2416 OS << "#define __ai static __inline__ __attribute__((__always_inline__, " 2417 "__nodebug__))\n\n"; 2418 2419 SmallVector<Intrinsic *, 128> Defs; 2420 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2421 for (auto *R : RV) 2422 createIntrinsic(R, Defs); 2423 2424 for (auto *I : Defs) 2425 I->indexBody(); 2426 2427 llvm::stable_sort(Defs, llvm::deref<std::less<>>()); 2428 2429 // Only emit a def when its requirements have been met. 2430 // FIXME: This loop could be made faster, but it's fast enough for now. 2431 bool MadeProgress = true; 2432 std::string InGuard; 2433 while (!Defs.empty() && MadeProgress) { 2434 MadeProgress = false; 2435 2436 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2437 I != Defs.end(); /*No step*/) { 2438 bool DependenciesSatisfied = true; 2439 for (auto *II : (*I)->getDependencies()) { 2440 if (llvm::is_contained(Defs, II)) 2441 DependenciesSatisfied = false; 2442 } 2443 if (!DependenciesSatisfied) { 2444 // Try the next one. 2445 ++I; 2446 continue; 2447 } 2448 2449 // Emit #endif/#if pair if needed. 2450 if ((*I)->getGuard() != InGuard) { 2451 if (!InGuard.empty()) 2452 OS << "#endif\n"; 2453 InGuard = (*I)->getGuard(); 2454 if (!InGuard.empty()) 2455 OS << "#if " << InGuard << "\n"; 2456 } 2457 2458 // Actually generate the intrinsic code. 2459 OS << (*I)->generate(); 2460 2461 MadeProgress = true; 2462 I = Defs.erase(I); 2463 } 2464 } 2465 assert(Defs.empty() && "Some requirements were not satisfied!"); 2466 if (!InGuard.empty()) 2467 OS << "#endif\n"; 2468 2469 OS << "\n"; 2470 OS << "#undef __ai\n\n"; 2471 OS << "#endif /* __ARM_FP16_H */\n"; 2472 } 2473 2474 void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2475 NeonEmitter(Records).run(OS); 2476 } 2477 2478 void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { 2479 NeonEmitter(Records).runFP16(OS); 2480 } 2481 2482 void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2483 NeonEmitter(Records).runHeader(OS); 2484 } 2485 2486 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2487 llvm_unreachable("Neon test generation no longer implemented!"); 2488 } 2489