1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. 22 // 23 // See also the documentation in include/clang/Basic/arm_neon.td. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/ADT/SmallString.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/ADT/StringMap.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/TableGen/Error.h" 35 #include "llvm/TableGen/Record.h" 36 #include "llvm/TableGen/SetTheory.h" 37 #include "llvm/TableGen/TableGenBackend.h" 38 #include <algorithm> 39 #include <map> 40 #include <sstream> 41 #include <string> 42 #include <vector> 43 using namespace llvm; 44 45 namespace { 46 47 // While globals are generally bad, this one allows us to perform assertions 48 // liberally and somehow still trace them back to the def they indirectly 49 // came from. 50 static Record *CurrentRecord = nullptr; 51 static void assert_with_loc(bool Assertion, const std::string &Str) { 52 if (!Assertion) { 53 if (CurrentRecord) 54 PrintFatalError(CurrentRecord->getLoc(), Str); 55 else 56 PrintFatalError(Str); 57 } 58 } 59 60 enum ClassKind { 61 ClassNone, 62 ClassI, // generic integer instruction, e.g., "i8" suffix 63 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 64 ClassW, // width-specific instruction, e.g., "8" suffix 65 ClassB, // bitcast arguments with enum argument to specify type 66 ClassL, // Logical instructions which are op instructions 67 // but we need to not emit any suffix for in our 68 // tests. 69 ClassNoTest // Instructions which we do not test since they are 70 // not TRUE instructions. 71 }; 72 73 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 74 /// builtins. These must be kept in sync with the flags in 75 /// include/clang/Basic/TargetBuiltins.h. 76 namespace NeonTypeFlags { 77 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 78 79 enum EltType { 80 Int8, 81 Int16, 82 Int32, 83 Int64, 84 Poly8, 85 Poly16, 86 Poly64, 87 Poly128, 88 Float16, 89 Float32, 90 Float64 91 }; 92 } 93 94 class Intrinsic; 95 class NeonEmitter; 96 class Type; 97 class Variable; 98 99 //===----------------------------------------------------------------------===// 100 // TypeSpec 101 //===----------------------------------------------------------------------===// 102 103 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 104 /// for strong typing purposes. 105 /// 106 /// A TypeSpec can be used to create a type. 107 class TypeSpec : public std::string { 108 public: 109 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 110 std::vector<TypeSpec> Ret; 111 TypeSpec Acc; 112 for (char I : Str.str()) { 113 if (islower(I)) { 114 Acc.push_back(I); 115 Ret.push_back(TypeSpec(Acc)); 116 Acc.clear(); 117 } else { 118 Acc.push_back(I); 119 } 120 } 121 return Ret; 122 } 123 }; 124 125 //===----------------------------------------------------------------------===// 126 // Type 127 //===----------------------------------------------------------------------===// 128 129 /// A Type. Not much more to say here. 130 class Type { 131 private: 132 TypeSpec TS; 133 134 bool Float, Signed, Immediate, Void, Poly, Constant, Pointer; 135 // ScalarForMangling and NoManglingQ are really not suited to live here as 136 // they are not related to the type. But they live in the TypeSpec (not the 137 // prototype), so this is really the only place to store them. 138 bool ScalarForMangling, NoManglingQ; 139 unsigned Bitwidth, ElementBitwidth, NumVectors; 140 141 public: 142 Type() 143 : Float(false), Signed(false), Immediate(false), Void(true), Poly(false), 144 Constant(false), Pointer(false), ScalarForMangling(false), 145 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 146 147 Type(TypeSpec TS, char CharMod) 148 : TS(TS), Float(false), Signed(false), Immediate(false), Void(false), 149 Poly(false), Constant(false), Pointer(false), ScalarForMangling(false), 150 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 151 applyModifier(CharMod); 152 } 153 154 /// Returns a type representing "void". 155 static Type getVoid() { return Type(); } 156 157 bool operator==(const Type &Other) const { return str() == Other.str(); } 158 bool operator!=(const Type &Other) const { return !operator==(Other); } 159 160 // 161 // Query functions 162 // 163 bool isScalarForMangling() const { return ScalarForMangling; } 164 bool noManglingQ() const { return NoManglingQ; } 165 166 bool isPointer() const { return Pointer; } 167 bool isFloating() const { return Float; } 168 bool isInteger() const { return !Float && !Poly; } 169 bool isSigned() const { return Signed; } 170 bool isImmediate() const { return Immediate; } 171 bool isScalar() const { return NumVectors == 0; } 172 bool isVector() const { return NumVectors > 0; } 173 bool isFloat() const { return Float && ElementBitwidth == 32; } 174 bool isDouble() const { return Float && ElementBitwidth == 64; } 175 bool isHalf() const { return Float && ElementBitwidth == 16; } 176 bool isPoly() const { return Poly; } 177 bool isChar() const { return ElementBitwidth == 8; } 178 bool isShort() const { return !Float && ElementBitwidth == 16; } 179 bool isInt() const { return !Float && ElementBitwidth == 32; } 180 bool isLong() const { return !Float && ElementBitwidth == 64; } 181 bool isVoid() const { return Void; } 182 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 183 unsigned getSizeInBits() const { return Bitwidth; } 184 unsigned getElementSizeInBits() const { return ElementBitwidth; } 185 unsigned getNumVectors() const { return NumVectors; } 186 187 // 188 // Mutator functions 189 // 190 void makeUnsigned() { Signed = false; } 191 void makeSigned() { Signed = true; } 192 void makeInteger(unsigned ElemWidth, bool Sign) { 193 Float = false; 194 Poly = false; 195 Signed = Sign; 196 Immediate = false; 197 ElementBitwidth = ElemWidth; 198 } 199 void makeImmediate(unsigned ElemWidth) { 200 Float = false; 201 Poly = false; 202 Signed = true; 203 Immediate = true; 204 ElementBitwidth = ElemWidth; 205 } 206 void makeScalar() { 207 Bitwidth = ElementBitwidth; 208 NumVectors = 0; 209 } 210 void makeOneVector() { 211 assert(isVector()); 212 NumVectors = 1; 213 } 214 void doubleLanes() { 215 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 216 Bitwidth = 128; 217 } 218 void halveLanes() { 219 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 220 Bitwidth = 64; 221 } 222 223 /// Return the C string representation of a type, which is the typename 224 /// defined in stdint.h or arm_neon.h. 225 std::string str() const; 226 227 /// Return the string representation of a type, which is an encoded 228 /// string for passing to the BUILTIN() macro in Builtins.def. 229 std::string builtin_str() const; 230 231 /// Return the value in NeonTypeFlags for this type. 232 unsigned getNeonEnum() const; 233 234 /// Parse a type from a stdint.h or arm_neon.h typedef name, 235 /// for example uint32x2_t or int64_t. 236 static Type fromTypedefName(StringRef Name); 237 238 private: 239 /// Creates the type based on the typespec string in TS. 240 /// Sets "Quad" to true if the "Q" or "H" modifiers were 241 /// seen. This is needed by applyModifier as some modifiers 242 /// only take effect if the type size was changed by "Q" or "H". 243 void applyTypespec(bool &Quad); 244 /// Applies a prototype modifier to the type. 245 void applyModifier(char Mod); 246 }; 247 248 //===----------------------------------------------------------------------===// 249 // Variable 250 //===----------------------------------------------------------------------===// 251 252 /// A variable is a simple class that just has a type and a name. 253 class Variable { 254 Type T; 255 std::string N; 256 257 public: 258 Variable() : T(Type::getVoid()), N("") {} 259 Variable(Type T, std::string N) : T(T), N(N) {} 260 261 Type getType() const { return T; } 262 std::string getName() const { return "__" + N; } 263 }; 264 265 //===----------------------------------------------------------------------===// 266 // Intrinsic 267 //===----------------------------------------------------------------------===// 268 269 /// The main grunt class. This represents an instantiation of an intrinsic with 270 /// a particular typespec and prototype. 271 class Intrinsic { 272 friend class DagEmitter; 273 274 /// The Record this intrinsic was created from. 275 Record *R; 276 /// The unmangled name and prototype. 277 std::string Name, Proto; 278 /// The input and output typespecs. InTS == OutTS except when 279 /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. 280 TypeSpec OutTS, InTS; 281 /// The base class kind. Most intrinsics use ClassS, which has full type 282 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 283 /// signedness (i32), while some (ClassB) have no type at all, only a width 284 /// (32). 285 ClassKind CK; 286 /// The list of DAGs for the body. May be empty, in which case we should 287 /// emit a builtin call. 288 ListInit *Body; 289 /// The architectural #ifdef guard. 290 std::string Guard; 291 /// Set if the Unvailable bit is 1. This means we don't generate a body, 292 /// just an "unavailable" attribute on a declaration. 293 bool IsUnavailable; 294 /// Is this intrinsic safe for big-endian? or does it need its arguments 295 /// reversing? 296 bool BigEndianSafe; 297 298 /// The types of return value [0] and parameters [1..]. 299 std::vector<Type> Types; 300 /// The local variables defined. 301 std::map<std::string, Variable> Variables; 302 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 303 bool NeededEarly; 304 /// UseMacro - set if we should implement using a macro or unset for a 305 /// function. 306 bool UseMacro; 307 /// The set of intrinsics that this intrinsic uses/requires. 308 std::set<Intrinsic *> Dependencies; 309 /// The "base type", which is Type('d', OutTS). InBaseType is only 310 /// different if CartesianProductOfTypes = 1 (for vreinterpret). 311 Type BaseType, InBaseType; 312 /// The return variable. 313 Variable RetVar; 314 /// A postfix to apply to every variable. Defaults to "". 315 std::string VariablePostfix; 316 317 NeonEmitter &Emitter; 318 std::stringstream OS; 319 320 public: 321 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 322 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 323 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 324 : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), 325 CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), 326 BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), 327 BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { 328 // If this builtin takes an immediate argument, we need to #define it rather 329 // than use a standard declaration, so that SemaChecking can range check 330 // the immediate passed by the user. 331 if (Proto.find('i') != std::string::npos) 332 UseMacro = true; 333 334 // Pointer arguments need to use macros to avoid hiding aligned attributes 335 // from the pointer type. 336 if (Proto.find('p') != std::string::npos || 337 Proto.find('c') != std::string::npos) 338 UseMacro = true; 339 340 // It is not permitted to pass or return an __fp16 by value, so intrinsics 341 // taking a scalar float16_t must be implemented as macros. 342 if (OutTS.find('h') != std::string::npos && 343 Proto.find('s') != std::string::npos) 344 UseMacro = true; 345 346 // Modify the TypeSpec per-argument to get a concrete Type, and create 347 // known variables for each. 348 // Types[0] is the return value. 349 Types.emplace_back(OutTS, Proto[0]); 350 for (unsigned I = 1; I < Proto.size(); ++I) 351 Types.emplace_back(InTS, Proto[I]); 352 } 353 354 /// Get the Record that this intrinsic is based off. 355 Record *getRecord() const { return R; } 356 /// Get the set of Intrinsics that this intrinsic calls. 357 /// this is the set of immediate dependencies, NOT the 358 /// transitive closure. 359 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 360 /// Get the architectural guard string (#ifdef). 361 std::string getGuard() const { return Guard; } 362 /// Get the non-mangled name. 363 std::string getName() const { return Name; } 364 365 /// Return true if the intrinsic takes an immediate operand. 366 bool hasImmediate() const { 367 return Proto.find('i') != std::string::npos; 368 } 369 /// Return the parameter index of the immediate operand. 370 unsigned getImmediateIdx() const { 371 assert(hasImmediate()); 372 unsigned Idx = Proto.find('i'); 373 assert(Idx > 0 && "Can't return an immediate!"); 374 return Idx - 1; 375 } 376 377 /// Return true if the intrinsic takes an splat operand. 378 bool hasSplat() const { return Proto.find('a') != std::string::npos; } 379 /// Return the parameter index of the splat operand. 380 unsigned getSplatIdx() const { 381 assert(hasSplat()); 382 unsigned Idx = Proto.find('a'); 383 assert(Idx > 0 && "Can't return a splat!"); 384 return Idx - 1; 385 } 386 387 unsigned getNumParams() const { return Proto.size() - 1; } 388 Type getReturnType() const { return Types[0]; } 389 Type getParamType(unsigned I) const { return Types[I + 1]; } 390 Type getBaseType() const { return BaseType; } 391 /// Return the raw prototype string. 392 std::string getProto() const { return Proto; } 393 394 /// Return true if the prototype has a scalar argument. 395 /// This does not return true for the "splat" code ('a'). 396 bool protoHasScalar(); 397 398 /// Return the index that parameter PIndex will sit at 399 /// in a generated function call. This is often just PIndex, 400 /// but may not be as things such as multiple-vector operands 401 /// and sret parameters need to be taken into accont. 402 unsigned getGeneratedParamIdx(unsigned PIndex) { 403 unsigned Idx = 0; 404 if (getReturnType().getNumVectors() > 1) 405 // Multiple vectors are passed as sret. 406 ++Idx; 407 408 for (unsigned I = 0; I < PIndex; ++I) 409 Idx += std::max(1U, getParamType(I).getNumVectors()); 410 411 return Idx; 412 } 413 414 bool hasBody() const { return Body && Body->getValues().size() > 0; } 415 416 void setNeededEarly() { NeededEarly = true; } 417 418 bool operator<(const Intrinsic &Other) const { 419 // Sort lexicographically on a two-tuple (Guard, Name) 420 if (Guard != Other.Guard) 421 return Guard < Other.Guard; 422 return Name < Other.Name; 423 } 424 425 ClassKind getClassKind(bool UseClassBIfScalar = false) { 426 if (UseClassBIfScalar && !protoHasScalar()) 427 return ClassB; 428 return CK; 429 } 430 431 /// Return the name, mangled with type information. 432 /// If ForceClassS is true, use ClassS (u32/s32) instead 433 /// of the intrinsic's own type class. 434 std::string getMangledName(bool ForceClassS = false); 435 /// Return the type code for a builtin function call. 436 std::string getInstTypeCode(Type T, ClassKind CK); 437 /// Return the type string for a BUILTIN() macro in Builtins.def. 438 std::string getBuiltinTypeStr(); 439 440 /// Generate the intrinsic, returning code. 441 std::string generate(); 442 /// Perform type checking and populate the dependency graph, but 443 /// don't generate code yet. 444 void indexBody(); 445 446 private: 447 std::string mangleName(std::string Name, ClassKind CK); 448 449 void initVariables(); 450 std::string replaceParamsIn(std::string S); 451 452 void emitBodyAsBuiltinCall(); 453 454 void generateImpl(bool ReverseArguments, 455 StringRef NamePrefix, StringRef CallPrefix); 456 void emitReturn(); 457 void emitBody(StringRef CallPrefix); 458 void emitShadowedArgs(); 459 void emitArgumentReversal(); 460 void emitReturnReversal(); 461 void emitReverseVariable(Variable &Dest, Variable &Src); 462 void emitNewLine(); 463 void emitClosingBrace(); 464 void emitOpeningBrace(); 465 void emitPrototype(StringRef NamePrefix); 466 467 class DagEmitter { 468 Intrinsic &Intr; 469 StringRef CallPrefix; 470 471 public: 472 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 473 Intr(Intr), CallPrefix(CallPrefix) { 474 } 475 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 476 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 477 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 478 std::pair<Type, std::string> emitDagDup(DagInit *DI); 479 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 480 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 481 std::pair<Type, std::string> emitDagCall(DagInit *DI); 482 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 483 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 484 std::pair<Type, std::string> emitDagOp(DagInit *DI); 485 std::pair<Type, std::string> emitDag(DagInit *DI); 486 }; 487 488 }; 489 490 //===----------------------------------------------------------------------===// 491 // NeonEmitter 492 //===----------------------------------------------------------------------===// 493 494 class NeonEmitter { 495 RecordKeeper &Records; 496 DenseMap<Record *, ClassKind> ClassMap; 497 std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap; 498 unsigned UniqueNumber; 499 500 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 501 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 502 void genOverloadTypeCheckCode(raw_ostream &OS, 503 SmallVectorImpl<Intrinsic *> &Defs); 504 void genIntrinsicRangeCheckCode(raw_ostream &OS, 505 SmallVectorImpl<Intrinsic *> &Defs); 506 507 public: 508 /// Called by Intrinsic - this attempts to get an intrinsic that takes 509 /// the given types as arguments. 510 Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types); 511 512 /// Called by Intrinsic - returns a globally-unique number. 513 unsigned getUniqueNumber() { return UniqueNumber++; } 514 515 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 516 Record *SI = R.getClass("SInst"); 517 Record *II = R.getClass("IInst"); 518 Record *WI = R.getClass("WInst"); 519 Record *SOpI = R.getClass("SOpInst"); 520 Record *IOpI = R.getClass("IOpInst"); 521 Record *WOpI = R.getClass("WOpInst"); 522 Record *LOpI = R.getClass("LOpInst"); 523 Record *NoTestOpI = R.getClass("NoTestOpInst"); 524 525 ClassMap[SI] = ClassS; 526 ClassMap[II] = ClassI; 527 ClassMap[WI] = ClassW; 528 ClassMap[SOpI] = ClassS; 529 ClassMap[IOpI] = ClassI; 530 ClassMap[WOpI] = ClassW; 531 ClassMap[LOpI] = ClassL; 532 ClassMap[NoTestOpI] = ClassNoTest; 533 } 534 535 // run - Emit arm_neon.h.inc 536 void run(raw_ostream &o); 537 538 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 539 void runHeader(raw_ostream &o); 540 541 // runTests - Emit tests for all the Neon intrinsics. 542 void runTests(raw_ostream &o); 543 }; 544 545 } // end anonymous namespace 546 547 //===----------------------------------------------------------------------===// 548 // Type implementation 549 //===----------------------------------------------------------------------===// 550 551 std::string Type::str() const { 552 if (Void) 553 return "void"; 554 std::string S; 555 556 if (!Signed && isInteger()) 557 S += "u"; 558 559 if (Poly) 560 S += "poly"; 561 else if (Float) 562 S += "float"; 563 else 564 S += "int"; 565 566 S += utostr(ElementBitwidth); 567 if (isVector()) 568 S += "x" + utostr(getNumElements()); 569 if (NumVectors > 1) 570 S += "x" + utostr(NumVectors); 571 S += "_t"; 572 573 if (Constant) 574 S += " const"; 575 if (Pointer) 576 S += " *"; 577 578 return S; 579 } 580 581 std::string Type::builtin_str() const { 582 std::string S; 583 if (isVoid()) 584 return "v"; 585 586 if (Pointer) 587 // All pointers are void pointers. 588 S += "v"; 589 else if (isInteger()) 590 switch (ElementBitwidth) { 591 case 8: S += "c"; break; 592 case 16: S += "s"; break; 593 case 32: S += "i"; break; 594 case 64: S += "Wi"; break; 595 case 128: S += "LLLi"; break; 596 default: llvm_unreachable("Unhandled case!"); 597 } 598 else 599 switch (ElementBitwidth) { 600 case 16: S += "h"; break; 601 case 32: S += "f"; break; 602 case 64: S += "d"; break; 603 default: llvm_unreachable("Unhandled case!"); 604 } 605 606 if (isChar() && !Pointer) 607 // Make chars explicitly signed. 608 S = "S" + S; 609 else if (isInteger() && !Pointer && !Signed) 610 S = "U" + S; 611 612 // Constant indices are "int", but have the "constant expression" modifier. 613 if (isImmediate()) { 614 assert(isInteger() && isSigned()); 615 S = "I" + S; 616 } 617 618 if (isScalar()) { 619 if (Constant) S += "C"; 620 if (Pointer) S += "*"; 621 return S; 622 } 623 624 std::string Ret; 625 for (unsigned I = 0; I < NumVectors; ++I) 626 Ret += "V" + utostr(getNumElements()) + S; 627 628 return Ret; 629 } 630 631 unsigned Type::getNeonEnum() const { 632 unsigned Addend; 633 switch (ElementBitwidth) { 634 case 8: Addend = 0; break; 635 case 16: Addend = 1; break; 636 case 32: Addend = 2; break; 637 case 64: Addend = 3; break; 638 case 128: Addend = 4; break; 639 default: llvm_unreachable("Unhandled element bitwidth!"); 640 } 641 642 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 643 if (Poly) { 644 // Adjustment needed because Poly32 doesn't exist. 645 if (Addend >= 2) 646 --Addend; 647 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 648 } 649 if (Float) { 650 assert(Addend != 0 && "Float8 doesn't exist!"); 651 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 652 } 653 654 if (Bitwidth == 128) 655 Base |= (unsigned)NeonTypeFlags::QuadFlag; 656 if (isInteger() && !Signed) 657 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 658 659 return Base; 660 } 661 662 Type Type::fromTypedefName(StringRef Name) { 663 Type T; 664 T.Void = false; 665 T.Float = false; 666 T.Poly = false; 667 668 if (Name.front() == 'u') { 669 T.Signed = false; 670 Name = Name.drop_front(); 671 } else { 672 T.Signed = true; 673 } 674 675 if (Name.startswith("float")) { 676 T.Float = true; 677 Name = Name.drop_front(5); 678 } else if (Name.startswith("poly")) { 679 T.Poly = true; 680 Name = Name.drop_front(4); 681 } else { 682 assert(Name.startswith("int")); 683 Name = Name.drop_front(3); 684 } 685 686 unsigned I = 0; 687 for (I = 0; I < Name.size(); ++I) { 688 if (!isdigit(Name[I])) 689 break; 690 } 691 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 692 Name = Name.drop_front(I); 693 694 T.Bitwidth = T.ElementBitwidth; 695 T.NumVectors = 1; 696 697 if (Name.front() == 'x') { 698 Name = Name.drop_front(); 699 unsigned I = 0; 700 for (I = 0; I < Name.size(); ++I) { 701 if (!isdigit(Name[I])) 702 break; 703 } 704 unsigned NumLanes; 705 Name.substr(0, I).getAsInteger(10, NumLanes); 706 Name = Name.drop_front(I); 707 T.Bitwidth = T.ElementBitwidth * NumLanes; 708 } else { 709 // Was scalar. 710 T.NumVectors = 0; 711 } 712 if (Name.front() == 'x') { 713 Name = Name.drop_front(); 714 unsigned I = 0; 715 for (I = 0; I < Name.size(); ++I) { 716 if (!isdigit(Name[I])) 717 break; 718 } 719 Name.substr(0, I).getAsInteger(10, T.NumVectors); 720 Name = Name.drop_front(I); 721 } 722 723 assert(Name.startswith("_t") && "Malformed typedef!"); 724 return T; 725 } 726 727 void Type::applyTypespec(bool &Quad) { 728 std::string S = TS; 729 ScalarForMangling = false; 730 Void = false; 731 Poly = Float = false; 732 ElementBitwidth = ~0U; 733 Signed = true; 734 NumVectors = 1; 735 736 for (char I : S) { 737 switch (I) { 738 case 'S': 739 ScalarForMangling = true; 740 break; 741 case 'H': 742 NoManglingQ = true; 743 Quad = true; 744 break; 745 case 'Q': 746 Quad = true; 747 break; 748 case 'P': 749 Poly = true; 750 break; 751 case 'U': 752 Signed = false; 753 break; 754 case 'c': 755 ElementBitwidth = 8; 756 break; 757 case 'h': 758 Float = true; 759 // Fall through 760 case 's': 761 ElementBitwidth = 16; 762 break; 763 case 'f': 764 Float = true; 765 // Fall through 766 case 'i': 767 ElementBitwidth = 32; 768 break; 769 case 'd': 770 Float = true; 771 // Fall through 772 case 'l': 773 ElementBitwidth = 64; 774 break; 775 case 'k': 776 ElementBitwidth = 128; 777 // Poly doesn't have a 128x1 type. 778 if (Poly) 779 NumVectors = 0; 780 break; 781 default: 782 llvm_unreachable("Unhandled type code!"); 783 } 784 } 785 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 786 787 Bitwidth = Quad ? 128 : 64; 788 } 789 790 void Type::applyModifier(char Mod) { 791 bool AppliedQuad = false; 792 applyTypespec(AppliedQuad); 793 794 switch (Mod) { 795 case 'v': 796 Void = true; 797 break; 798 case 't': 799 if (Poly) { 800 Poly = false; 801 Signed = false; 802 } 803 break; 804 case 'b': 805 Signed = false; 806 Float = false; 807 Poly = false; 808 NumVectors = 0; 809 Bitwidth = ElementBitwidth; 810 break; 811 case '$': 812 Signed = true; 813 Float = false; 814 Poly = false; 815 NumVectors = 0; 816 Bitwidth = ElementBitwidth; 817 break; 818 case 'u': 819 Signed = false; 820 Poly = false; 821 Float = false; 822 break; 823 case 'x': 824 Signed = true; 825 assert(!Poly && "'u' can't be used with poly types!"); 826 Float = false; 827 break; 828 case 'o': 829 Bitwidth = ElementBitwidth = 64; 830 NumVectors = 0; 831 Float = true; 832 break; 833 case 'y': 834 Bitwidth = ElementBitwidth = 32; 835 NumVectors = 0; 836 Float = true; 837 break; 838 case 'f': 839 // Special case - if we're half-precision, a floating 840 // point argument needs to be 128-bits (double size). 841 if (isHalf()) 842 Bitwidth = 128; 843 Float = true; 844 ElementBitwidth = 32; 845 break; 846 case 'F': 847 Float = true; 848 ElementBitwidth = 64; 849 break; 850 case 'g': 851 if (AppliedQuad) 852 Bitwidth /= 2; 853 break; 854 case 'j': 855 if (!AppliedQuad) 856 Bitwidth *= 2; 857 break; 858 case 'w': 859 ElementBitwidth *= 2; 860 Bitwidth *= 2; 861 break; 862 case 'n': 863 ElementBitwidth *= 2; 864 break; 865 case 'i': 866 Float = false; 867 Poly = false; 868 ElementBitwidth = Bitwidth = 32; 869 NumVectors = 0; 870 Signed = true; 871 Immediate = true; 872 break; 873 case 'l': 874 Float = false; 875 Poly = false; 876 ElementBitwidth = Bitwidth = 64; 877 NumVectors = 0; 878 Signed = false; 879 Immediate = true; 880 break; 881 case 'z': 882 ElementBitwidth /= 2; 883 Bitwidth = ElementBitwidth; 884 NumVectors = 0; 885 break; 886 case 'r': 887 ElementBitwidth *= 2; 888 Bitwidth = ElementBitwidth; 889 NumVectors = 0; 890 break; 891 case 's': 892 case 'a': 893 Bitwidth = ElementBitwidth; 894 NumVectors = 0; 895 break; 896 case 'k': 897 Bitwidth *= 2; 898 break; 899 case 'c': 900 Constant = true; 901 // Fall through 902 case 'p': 903 Pointer = true; 904 Bitwidth = ElementBitwidth; 905 NumVectors = 0; 906 break; 907 case 'h': 908 ElementBitwidth /= 2; 909 break; 910 case 'q': 911 ElementBitwidth /= 2; 912 Bitwidth *= 2; 913 break; 914 case 'e': 915 ElementBitwidth /= 2; 916 Signed = false; 917 break; 918 case 'm': 919 ElementBitwidth /= 2; 920 Bitwidth /= 2; 921 break; 922 case 'd': 923 break; 924 case '2': 925 NumVectors = 2; 926 break; 927 case '3': 928 NumVectors = 3; 929 break; 930 case '4': 931 NumVectors = 4; 932 break; 933 case 'B': 934 NumVectors = 2; 935 if (!AppliedQuad) 936 Bitwidth *= 2; 937 break; 938 case 'C': 939 NumVectors = 3; 940 if (!AppliedQuad) 941 Bitwidth *= 2; 942 break; 943 case 'D': 944 NumVectors = 4; 945 if (!AppliedQuad) 946 Bitwidth *= 2; 947 break; 948 default: 949 llvm_unreachable("Unhandled character!"); 950 } 951 } 952 953 //===----------------------------------------------------------------------===// 954 // Intrinsic implementation 955 //===----------------------------------------------------------------------===// 956 957 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) { 958 char typeCode = '\0'; 959 bool printNumber = true; 960 961 if (CK == ClassB) 962 return ""; 963 964 if (T.isPoly()) 965 typeCode = 'p'; 966 else if (T.isInteger()) 967 typeCode = T.isSigned() ? 's' : 'u'; 968 else 969 typeCode = 'f'; 970 971 if (CK == ClassI) { 972 switch (typeCode) { 973 default: 974 break; 975 case 's': 976 case 'u': 977 case 'p': 978 typeCode = 'i'; 979 break; 980 } 981 } 982 if (CK == ClassB) { 983 typeCode = '\0'; 984 } 985 986 std::string S; 987 if (typeCode != '\0') 988 S.push_back(typeCode); 989 if (printNumber) 990 S += utostr(T.getElementSizeInBits()); 991 992 return S; 993 } 994 995 std::string Intrinsic::getBuiltinTypeStr() { 996 ClassKind LocalCK = getClassKind(true); 997 std::string S; 998 999 Type RetT = getReturnType(); 1000 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 1001 !RetT.isFloating()) 1002 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1003 1004 // Since the return value must be one type, return a vector type of the 1005 // appropriate width which we will bitcast. An exception is made for 1006 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 1007 // fashion, storing them to a pointer arg. 1008 if (RetT.getNumVectors() > 1) { 1009 S += "vv*"; // void result with void* first argument 1010 } else { 1011 if (RetT.isPoly()) 1012 RetT.makeInteger(RetT.getElementSizeInBits(), false); 1013 if (!RetT.isScalar() && !RetT.isSigned()) 1014 RetT.makeSigned(); 1015 1016 bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f'; 1017 if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType) 1018 // Cast to vector of 8-bit elements. 1019 RetT.makeInteger(8, true); 1020 1021 S += RetT.builtin_str(); 1022 } 1023 1024 for (unsigned I = 0; I < getNumParams(); ++I) { 1025 Type T = getParamType(I); 1026 if (T.isPoly()) 1027 T.makeInteger(T.getElementSizeInBits(), false); 1028 1029 bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f'; 1030 if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType) 1031 T.makeInteger(8, true); 1032 // Halves always get converted to 8-bit elements. 1033 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1034 T.makeInteger(8, true); 1035 1036 if (LocalCK == ClassI) 1037 T.makeSigned(); 1038 1039 if (hasImmediate() && getImmediateIdx() == I) 1040 T.makeImmediate(32); 1041 1042 S += T.builtin_str(); 1043 } 1044 1045 // Extra constant integer to hold type class enum for this function, e.g. s8 1046 if (LocalCK == ClassB) 1047 S += "i"; 1048 1049 return S; 1050 } 1051 1052 std::string Intrinsic::getMangledName(bool ForceClassS) { 1053 // Check if the prototype has a scalar operand with the type of the vector 1054 // elements. If not, bitcasting the args will take care of arg checking. 1055 // The actual signedness etc. will be taken care of with special enums. 1056 ClassKind LocalCK = CK; 1057 if (!protoHasScalar()) 1058 LocalCK = ClassB; 1059 1060 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1061 } 1062 1063 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) { 1064 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1065 std::string S = Name; 1066 1067 if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" || 1068 Name == "vcvt_f64_f32") 1069 return Name; 1070 1071 if (typeCode.size() > 0) { 1072 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1073 if (Name.size() >= 3 && isdigit(Name.back()) && 1074 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1075 S.insert(S.length() - 3, "_" + typeCode); 1076 else 1077 S += "_" + typeCode; 1078 } 1079 1080 if (BaseType != InBaseType) { 1081 // A reinterpret - out the input base type at the end. 1082 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1083 } 1084 1085 if (LocalCK == ClassB) 1086 S += "_v"; 1087 1088 // Insert a 'q' before the first '_' character so that it ends up before 1089 // _lane or _n on vector-scalar operations. 1090 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1091 size_t Pos = S.find('_'); 1092 S.insert(Pos, "q"); 1093 } 1094 1095 char Suffix = '\0'; 1096 if (BaseType.isScalarForMangling()) { 1097 switch (BaseType.getElementSizeInBits()) { 1098 case 8: Suffix = 'b'; break; 1099 case 16: Suffix = 'h'; break; 1100 case 32: Suffix = 's'; break; 1101 case 64: Suffix = 'd'; break; 1102 default: llvm_unreachable("Bad suffix!"); 1103 } 1104 } 1105 if (Suffix != '\0') { 1106 size_t Pos = S.find('_'); 1107 S.insert(Pos, &Suffix, 1); 1108 } 1109 1110 return S; 1111 } 1112 1113 std::string Intrinsic::replaceParamsIn(std::string S) { 1114 while (S.find('$') != std::string::npos) { 1115 size_t Pos = S.find('$'); 1116 size_t End = Pos + 1; 1117 while (isalpha(S[End])) 1118 ++End; 1119 1120 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1121 assert_with_loc(Variables.find(VarName) != Variables.end(), 1122 "Variable not defined!"); 1123 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1124 } 1125 1126 return S; 1127 } 1128 1129 void Intrinsic::initVariables() { 1130 Variables.clear(); 1131 1132 // Modify the TypeSpec per-argument to get a concrete Type, and create 1133 // known variables for each. 1134 for (unsigned I = 1; I < Proto.size(); ++I) { 1135 char NameC = '0' + (I - 1); 1136 std::string Name = "p"; 1137 Name.push_back(NameC); 1138 1139 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1140 } 1141 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1142 } 1143 1144 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1145 if (UseMacro) 1146 OS << "#define "; 1147 else 1148 OS << "__ai " << Types[0].str() << " "; 1149 1150 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1151 1152 for (unsigned I = 0; I < getNumParams(); ++I) { 1153 if (I != 0) 1154 OS << ", "; 1155 1156 char NameC = '0' + I; 1157 std::string Name = "p"; 1158 Name.push_back(NameC); 1159 assert(Variables.find(Name) != Variables.end()); 1160 Variable &V = Variables[Name]; 1161 1162 if (!UseMacro) 1163 OS << V.getType().str() << " "; 1164 OS << V.getName(); 1165 } 1166 1167 OS << ")"; 1168 } 1169 1170 void Intrinsic::emitOpeningBrace() { 1171 if (UseMacro) 1172 OS << " __extension__ ({"; 1173 else 1174 OS << " {"; 1175 emitNewLine(); 1176 } 1177 1178 void Intrinsic::emitClosingBrace() { 1179 if (UseMacro) 1180 OS << "})"; 1181 else 1182 OS << "}"; 1183 } 1184 1185 void Intrinsic::emitNewLine() { 1186 if (UseMacro) 1187 OS << " \\\n"; 1188 else 1189 OS << "\n"; 1190 } 1191 1192 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1193 if (Dest.getType().getNumVectors() > 1) { 1194 emitNewLine(); 1195 1196 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1197 OS << " " << Dest.getName() << ".val[" << utostr(K) << "] = " 1198 << "__builtin_shufflevector(" 1199 << Src.getName() << ".val[" << utostr(K) << "], " 1200 << Src.getName() << ".val[" << utostr(K) << "]"; 1201 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1202 OS << ", " << utostr(J); 1203 OS << ");"; 1204 emitNewLine(); 1205 } 1206 } else { 1207 OS << " " << Dest.getName() 1208 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1209 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1210 OS << ", " << utostr(J); 1211 OS << ");"; 1212 emitNewLine(); 1213 } 1214 } 1215 1216 void Intrinsic::emitArgumentReversal() { 1217 if (BigEndianSafe) 1218 return; 1219 1220 // Reverse all vector arguments. 1221 for (unsigned I = 0; I < getNumParams(); ++I) { 1222 std::string Name = "p" + utostr(I); 1223 std::string NewName = "rev" + utostr(I); 1224 1225 Variable &V = Variables[Name]; 1226 Variable NewV(V.getType(), NewName + VariablePostfix); 1227 1228 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1229 continue; 1230 1231 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1232 emitReverseVariable(NewV, V); 1233 V = NewV; 1234 } 1235 } 1236 1237 void Intrinsic::emitReturnReversal() { 1238 if (BigEndianSafe) 1239 return; 1240 if (!getReturnType().isVector() || getReturnType().isVoid() || 1241 getReturnType().getNumElements() == 1) 1242 return; 1243 emitReverseVariable(RetVar, RetVar); 1244 } 1245 1246 1247 void Intrinsic::emitShadowedArgs() { 1248 // Macro arguments are not type-checked like inline function arguments, 1249 // so assign them to local temporaries to get the right type checking. 1250 if (!UseMacro) 1251 return; 1252 1253 for (unsigned I = 0; I < getNumParams(); ++I) { 1254 // Do not create a temporary for an immediate argument. 1255 // That would defeat the whole point of using a macro! 1256 if (hasImmediate() && Proto[I+1] == 'i') 1257 continue; 1258 // Do not create a temporary for pointer arguments. The input 1259 // pointer may have an alignment hint. 1260 if (getParamType(I).isPointer()) 1261 continue; 1262 1263 std::string Name = "p" + utostr(I); 1264 1265 assert(Variables.find(Name) != Variables.end()); 1266 Variable &V = Variables[Name]; 1267 1268 std::string NewName = "s" + utostr(I); 1269 Variable V2(V.getType(), NewName + VariablePostfix); 1270 1271 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1272 << V.getName() << ";"; 1273 emitNewLine(); 1274 1275 V = V2; 1276 } 1277 } 1278 1279 // We don't check 'a' in this function, because for builtin function the 1280 // argument matching to 'a' uses a vector type splatted from a scalar type. 1281 bool Intrinsic::protoHasScalar() { 1282 return (Proto.find('s') != std::string::npos || 1283 Proto.find('z') != std::string::npos || 1284 Proto.find('r') != std::string::npos || 1285 Proto.find('b') != std::string::npos || 1286 Proto.find('$') != std::string::npos || 1287 Proto.find('y') != std::string::npos || 1288 Proto.find('o') != std::string::npos); 1289 } 1290 1291 void Intrinsic::emitBodyAsBuiltinCall() { 1292 std::string S; 1293 1294 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1295 // sret-like argument. 1296 bool SRet = getReturnType().getNumVectors() >= 2; 1297 1298 StringRef N = Name; 1299 if (hasSplat()) { 1300 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1301 assert(N.endswith("_n")); 1302 N = N.drop_back(2); 1303 } 1304 1305 ClassKind LocalCK = CK; 1306 if (!protoHasScalar()) 1307 LocalCK = ClassB; 1308 1309 if (!getReturnType().isVoid() && !SRet) 1310 S += "(" + RetVar.getType().str() + ") "; 1311 1312 S += "__builtin_neon_" + mangleName(N, LocalCK) + "("; 1313 1314 if (SRet) 1315 S += "&" + RetVar.getName() + ", "; 1316 1317 for (unsigned I = 0; I < getNumParams(); ++I) { 1318 Variable &V = Variables["p" + utostr(I)]; 1319 Type T = V.getType(); 1320 1321 // Handle multiple-vector values specially, emitting each subvector as an 1322 // argument to the builtin. 1323 if (T.getNumVectors() > 1) { 1324 // Check if an explicit cast is needed. 1325 std::string Cast; 1326 if (T.isChar() || T.isPoly() || !T.isSigned()) { 1327 Type T2 = T; 1328 T2.makeOneVector(); 1329 T2.makeInteger(8, /*Signed=*/true); 1330 Cast = "(" + T2.str() + ")"; 1331 } 1332 1333 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1334 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1335 continue; 1336 } 1337 1338 std::string Arg; 1339 Type CastToType = T; 1340 if (hasSplat() && I == getSplatIdx()) { 1341 Arg = "(" + BaseType.str() + ") {"; 1342 for (unsigned J = 0; J < BaseType.getNumElements(); ++J) { 1343 if (J != 0) 1344 Arg += ", "; 1345 Arg += V.getName(); 1346 } 1347 Arg += "}"; 1348 1349 CastToType = BaseType; 1350 } else { 1351 Arg = V.getName(); 1352 } 1353 1354 // Check if an explicit cast is needed. 1355 if (CastToType.isVector()) { 1356 CastToType.makeInteger(8, true); 1357 Arg = "(" + CastToType.str() + ")" + Arg; 1358 } 1359 1360 S += Arg + ", "; 1361 } 1362 1363 // Extra constant integer to hold type class enum for this function, e.g. s8 1364 if (getClassKind(true) == ClassB) { 1365 Type ThisTy = getReturnType(); 1366 if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F') 1367 ThisTy = getParamType(0); 1368 if (ThisTy.isPointer()) 1369 ThisTy = getParamType(1); 1370 1371 S += utostr(ThisTy.getNeonEnum()); 1372 } else { 1373 // Remove extraneous ", ". 1374 S.pop_back(); 1375 S.pop_back(); 1376 } 1377 S += ");"; 1378 1379 std::string RetExpr; 1380 if (!SRet && !RetVar.getType().isVoid()) 1381 RetExpr = RetVar.getName() + " = "; 1382 1383 OS << " " << RetExpr << S; 1384 emitNewLine(); 1385 } 1386 1387 void Intrinsic::emitBody(StringRef CallPrefix) { 1388 std::vector<std::string> Lines; 1389 1390 assert(RetVar.getType() == Types[0]); 1391 // Create a return variable, if we're not void. 1392 if (!RetVar.getType().isVoid()) { 1393 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1394 emitNewLine(); 1395 } 1396 1397 if (!Body || Body->getValues().size() == 0) { 1398 // Nothing specific to output - must output a builtin. 1399 emitBodyAsBuiltinCall(); 1400 return; 1401 } 1402 1403 // We have a list of "things to output". The last should be returned. 1404 for (auto *I : Body->getValues()) { 1405 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1406 Lines.push_back(replaceParamsIn(SI->getAsString())); 1407 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1408 DagEmitter DE(*this, CallPrefix); 1409 Lines.push_back(DE.emitDag(DI).second + ";"); 1410 } 1411 } 1412 1413 assert(!Lines.empty() && "Empty def?"); 1414 if (!RetVar.getType().isVoid()) 1415 Lines.back().insert(0, RetVar.getName() + " = "); 1416 1417 for (auto &L : Lines) { 1418 OS << " " << L; 1419 emitNewLine(); 1420 } 1421 } 1422 1423 void Intrinsic::emitReturn() { 1424 if (RetVar.getType().isVoid()) 1425 return; 1426 if (UseMacro) 1427 OS << " " << RetVar.getName() << ";"; 1428 else 1429 OS << " return " << RetVar.getName() << ";"; 1430 emitNewLine(); 1431 } 1432 1433 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1434 // At this point we should only be seeing a def. 1435 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1436 std::string Op = DefI->getAsString(); 1437 1438 if (Op == "cast" || Op == "bitcast") 1439 return emitDagCast(DI, Op == "bitcast"); 1440 if (Op == "shuffle") 1441 return emitDagShuffle(DI); 1442 if (Op == "dup") 1443 return emitDagDup(DI); 1444 if (Op == "splat") 1445 return emitDagSplat(DI); 1446 if (Op == "save_temp") 1447 return emitDagSaveTemp(DI); 1448 if (Op == "op") 1449 return emitDagOp(DI); 1450 if (Op == "call") 1451 return emitDagCall(DI); 1452 if (Op == "name_replace") 1453 return emitDagNameReplace(DI); 1454 if (Op == "literal") 1455 return emitDagLiteral(DI); 1456 assert_with_loc(false, "Unknown operation!"); 1457 return std::make_pair(Type::getVoid(), ""); 1458 } 1459 1460 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1461 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1462 if (DI->getNumArgs() == 2) { 1463 // Unary op. 1464 std::pair<Type, std::string> R = 1465 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1466 return std::make_pair(R.first, Op + R.second); 1467 } else { 1468 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1469 std::pair<Type, std::string> R1 = 1470 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1471 std::pair<Type, std::string> R2 = 1472 emitDagArg(DI->getArg(2), DI->getArgName(2)); 1473 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1474 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1475 } 1476 } 1477 1478 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { 1479 std::vector<Type> Types; 1480 std::vector<std::string> Values; 1481 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1482 std::pair<Type, std::string> R = 1483 emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1)); 1484 Types.push_back(R.first); 1485 Values.push_back(R.second); 1486 } 1487 1488 // Look up the called intrinsic. 1489 std::string N; 1490 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1491 N = SI->getAsUnquotedString(); 1492 else 1493 N = emitDagArg(DI->getArg(0), "").second; 1494 Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types); 1495 assert(Callee && "getIntrinsic should not return us nullptr!"); 1496 1497 // Make sure the callee is known as an early def. 1498 Callee->setNeededEarly(); 1499 Intr.Dependencies.insert(Callee); 1500 1501 // Now create the call itself. 1502 std::string S = CallPrefix.str() + Callee->getMangledName(true) + "("; 1503 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1504 if (I != 0) 1505 S += ", "; 1506 S += Values[I]; 1507 } 1508 S += ")"; 1509 1510 return std::make_pair(Callee->getReturnType(), S); 1511 } 1512 1513 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1514 bool IsBitCast){ 1515 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1516 std::pair<Type, std::string> R = emitDagArg( 1517 DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1)); 1518 Type castToType = R.first; 1519 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1520 1521 // MOD can take several forms: 1522 // 1. $X - take the type of parameter / variable X. 1523 // 2. The value "R" - take the type of the return type. 1524 // 3. a type string 1525 // 4. The value "U" or "S" to switch the signedness. 1526 // 5. The value "H" or "D" to half or double the bitwidth. 1527 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1528 if (DI->getArgName(ArgIdx).size()) { 1529 assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) != 1530 Intr.Variables.end(), 1531 "Variable not found"); 1532 castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType(); 1533 } else { 1534 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1535 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1536 1537 if (SI->getAsUnquotedString() == "R") { 1538 castToType = Intr.getReturnType(); 1539 } else if (SI->getAsUnquotedString() == "U") { 1540 castToType.makeUnsigned(); 1541 } else if (SI->getAsUnquotedString() == "S") { 1542 castToType.makeSigned(); 1543 } else if (SI->getAsUnquotedString() == "H") { 1544 castToType.halveLanes(); 1545 } else if (SI->getAsUnquotedString() == "D") { 1546 castToType.doubleLanes(); 1547 } else if (SI->getAsUnquotedString() == "8") { 1548 castToType.makeInteger(8, true); 1549 } else { 1550 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1551 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1552 } 1553 } 1554 } 1555 1556 std::string S; 1557 if (IsBitCast) { 1558 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1559 // a temporary. 1560 std::string N = "reint"; 1561 unsigned I = 0; 1562 while (Intr.Variables.find(N) != Intr.Variables.end()) 1563 N = "reint" + utostr(++I); 1564 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1565 1566 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1567 << R.second << ";"; 1568 Intr.emitNewLine(); 1569 1570 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1571 } else { 1572 // Emit a normal (static) cast. 1573 S = "(" + castToType.str() + ")(" + R.second + ")"; 1574 } 1575 1576 return std::make_pair(castToType, S); 1577 } 1578 1579 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1580 // See the documentation in arm_neon.td for a description of these operators. 1581 class LowHalf : public SetTheory::Operator { 1582 public: 1583 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1584 ArrayRef<SMLoc> Loc) override { 1585 SetTheory::RecSet Elts2; 1586 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1587 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1588 } 1589 }; 1590 class HighHalf : public SetTheory::Operator { 1591 public: 1592 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1593 ArrayRef<SMLoc> Loc) override { 1594 SetTheory::RecSet Elts2; 1595 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1596 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1597 } 1598 }; 1599 class Rev : public SetTheory::Operator { 1600 unsigned ElementSize; 1601 1602 public: 1603 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1604 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1605 ArrayRef<SMLoc> Loc) override { 1606 SetTheory::RecSet Elts2; 1607 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1608 1609 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1610 VectorSize /= ElementSize; 1611 1612 std::vector<Record *> Revved; 1613 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1614 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1615 Revved.push_back(Elts2[VI + LI]); 1616 } 1617 } 1618 1619 Elts.insert(Revved.begin(), Revved.end()); 1620 } 1621 }; 1622 class MaskExpander : public SetTheory::Expander { 1623 unsigned N; 1624 1625 public: 1626 MaskExpander(unsigned N) : N(N) {} 1627 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1628 unsigned Addend = 0; 1629 if (R->getName() == "mask0") 1630 Addend = 0; 1631 else if (R->getName() == "mask1") 1632 Addend = N; 1633 else 1634 return; 1635 for (unsigned I = 0; I < N; ++I) 1636 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1637 } 1638 }; 1639 1640 // (shuffle arg1, arg2, sequence) 1641 std::pair<Type, std::string> Arg1 = 1642 emitDagArg(DI->getArg(0), DI->getArgName(0)); 1643 std::pair<Type, std::string> Arg2 = 1644 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1645 assert_with_loc(Arg1.first == Arg2.first, 1646 "Different types in arguments to shuffle!"); 1647 1648 SetTheory ST; 1649 SetTheory::RecSet Elts; 1650 ST.addOperator("lowhalf", llvm::make_unique<LowHalf>()); 1651 ST.addOperator("highhalf", llvm::make_unique<HighHalf>()); 1652 ST.addOperator("rev", 1653 llvm::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1654 ST.addExpander("MaskExpand", 1655 llvm::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1656 ST.evaluate(DI->getArg(2), Elts, None); 1657 1658 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1659 for (auto &E : Elts) { 1660 StringRef Name = E->getName(); 1661 assert_with_loc(Name.startswith("sv"), 1662 "Incorrect element kind in shuffle mask!"); 1663 S += ", " + Name.drop_front(2).str(); 1664 } 1665 S += ")"; 1666 1667 // Recalculate the return type - the shuffle may have halved or doubled it. 1668 Type T(Arg1.first); 1669 if (Elts.size() > T.getNumElements()) { 1670 assert_with_loc( 1671 Elts.size() == T.getNumElements() * 2, 1672 "Can only double or half the number of elements in a shuffle!"); 1673 T.doubleLanes(); 1674 } else if (Elts.size() < T.getNumElements()) { 1675 assert_with_loc( 1676 Elts.size() == T.getNumElements() / 2, 1677 "Can only double or half the number of elements in a shuffle!"); 1678 T.halveLanes(); 1679 } 1680 1681 return std::make_pair(T, S); 1682 } 1683 1684 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1685 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1686 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1687 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1688 1689 Type T = Intr.getBaseType(); 1690 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1691 std::string S = "(" + T.str() + ") {"; 1692 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1693 if (I != 0) 1694 S += ", "; 1695 S += A.second; 1696 } 1697 S += "}"; 1698 1699 return std::make_pair(T, S); 1700 } 1701 1702 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1703 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1704 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1705 std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1706 1707 assert_with_loc(B.first.isScalar(), 1708 "splat() requires a scalar int as the second argument"); 1709 1710 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1711 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1712 S += ", " + B.second; 1713 } 1714 S += ")"; 1715 1716 return std::make_pair(Intr.getBaseType(), S); 1717 } 1718 1719 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1720 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1721 std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1722 1723 assert_with_loc(!A.first.isVoid(), 1724 "Argument to save_temp() must have non-void type!"); 1725 1726 std::string N = DI->getArgName(0); 1727 assert_with_loc(N.size(), "save_temp() expects a name as the first argument"); 1728 1729 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1730 "Variable already defined!"); 1731 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1732 1733 std::string S = 1734 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1735 1736 return std::make_pair(Type::getVoid(), S); 1737 } 1738 1739 std::pair<Type, std::string> 1740 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1741 std::string S = Intr.Name; 1742 1743 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1744 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1745 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1746 1747 size_t Idx = S.find(ToReplace); 1748 1749 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1750 S.replace(Idx, ToReplace.size(), ReplaceWith); 1751 1752 return std::make_pair(Type::getVoid(), S); 1753 } 1754 1755 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1756 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1757 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1758 return std::make_pair(Type::fromTypedefName(Ty), Value); 1759 } 1760 1761 std::pair<Type, std::string> 1762 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1763 if (ArgName.size()) { 1764 assert_with_loc(!Arg->isComplete(), 1765 "Arguments must either be DAGs or names, not both!"); 1766 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1767 "Variable not defined!"); 1768 Variable &V = Intr.Variables[ArgName]; 1769 return std::make_pair(V.getType(), V.getName()); 1770 } 1771 1772 assert(Arg && "Neither ArgName nor Arg?!"); 1773 DagInit *DI = dyn_cast<DagInit>(Arg); 1774 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1775 1776 return emitDag(DI); 1777 } 1778 1779 std::string Intrinsic::generate() { 1780 // Little endian intrinsics are simple and don't require any argument 1781 // swapping. 1782 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1783 1784 generateImpl(false, "", ""); 1785 1786 OS << "#else\n"; 1787 1788 // Big endian intrinsics are more complex. The user intended these 1789 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1790 // but we load as-if (V)LD1. So we should swap all arguments and 1791 // swap the return value too. 1792 // 1793 // If we call sub-intrinsics, we should call a version that does 1794 // not re-swap the arguments! 1795 generateImpl(true, "", "__noswap_"); 1796 1797 // If we're needed early, create a non-swapping variant for 1798 // big-endian. 1799 if (NeededEarly) { 1800 generateImpl(false, "__noswap_", "__noswap_"); 1801 } 1802 OS << "#endif\n\n"; 1803 1804 return OS.str(); 1805 } 1806 1807 void Intrinsic::generateImpl(bool ReverseArguments, 1808 StringRef NamePrefix, StringRef CallPrefix) { 1809 CurrentRecord = R; 1810 1811 // If we call a macro, our local variables may be corrupted due to 1812 // lack of proper lexical scoping. So, add a globally unique postfix 1813 // to every variable. 1814 // 1815 // indexBody() should have set up the Dependencies set by now. 1816 for (auto *I : Dependencies) 1817 if (I->UseMacro) { 1818 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1819 break; 1820 } 1821 1822 initVariables(); 1823 1824 emitPrototype(NamePrefix); 1825 1826 if (IsUnavailable) { 1827 OS << " __attribute__((unavailable));"; 1828 } else { 1829 emitOpeningBrace(); 1830 emitShadowedArgs(); 1831 if (ReverseArguments) 1832 emitArgumentReversal(); 1833 emitBody(CallPrefix); 1834 if (ReverseArguments) 1835 emitReturnReversal(); 1836 emitReturn(); 1837 emitClosingBrace(); 1838 } 1839 OS << "\n"; 1840 1841 CurrentRecord = nullptr; 1842 } 1843 1844 void Intrinsic::indexBody() { 1845 CurrentRecord = R; 1846 1847 initVariables(); 1848 emitBody(""); 1849 OS.str(""); 1850 1851 CurrentRecord = nullptr; 1852 } 1853 1854 //===----------------------------------------------------------------------===// 1855 // NeonEmitter implementation 1856 //===----------------------------------------------------------------------===// 1857 1858 Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { 1859 // First, look up the name in the intrinsic map. 1860 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1861 ("Intrinsic '" + Name + "' not found!").str()); 1862 std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()]; 1863 std::vector<Intrinsic *> GoodVec; 1864 1865 // Create a string to print if we end up failing. 1866 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1867 for (unsigned I = 0; I < Types.size(); ++I) { 1868 if (I != 0) 1869 ErrMsg += ", "; 1870 ErrMsg += Types[I].str(); 1871 } 1872 ErrMsg += ")'\n"; 1873 ErrMsg += "Available overloads:\n"; 1874 1875 // Now, look through each intrinsic implementation and see if the types are 1876 // compatible. 1877 for (auto *I : V) { 1878 ErrMsg += " - " + I->getReturnType().str() + " " + I->getMangledName(); 1879 ErrMsg += "("; 1880 for (unsigned A = 0; A < I->getNumParams(); ++A) { 1881 if (A != 0) 1882 ErrMsg += ", "; 1883 ErrMsg += I->getParamType(A).str(); 1884 } 1885 ErrMsg += ")\n"; 1886 1887 if (I->getNumParams() != Types.size()) 1888 continue; 1889 1890 bool Good = true; 1891 for (unsigned Arg = 0; Arg < Types.size(); ++Arg) { 1892 if (I->getParamType(Arg) != Types[Arg]) { 1893 Good = false; 1894 break; 1895 } 1896 } 1897 if (Good) 1898 GoodVec.push_back(I); 1899 } 1900 1901 assert_with_loc(GoodVec.size() > 0, 1902 "No compatible intrinsic found - " + ErrMsg); 1903 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1904 1905 return GoodVec.front(); 1906 } 1907 1908 void NeonEmitter::createIntrinsic(Record *R, 1909 SmallVectorImpl<Intrinsic *> &Out) { 1910 std::string Name = R->getValueAsString("Name"); 1911 std::string Proto = R->getValueAsString("Prototype"); 1912 std::string Types = R->getValueAsString("Types"); 1913 Record *OperationRec = R->getValueAsDef("Operation"); 1914 bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); 1915 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1916 std::string Guard = R->getValueAsString("ArchGuard"); 1917 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1918 1919 // Set the global current record. This allows assert_with_loc to produce 1920 // decent location information even when highly nested. 1921 CurrentRecord = R; 1922 1923 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1924 1925 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1926 1927 ClassKind CK = ClassNone; 1928 if (R->getSuperClasses().size() >= 2) 1929 CK = ClassMap[R->getSuperClasses()[1]]; 1930 1931 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1932 for (auto TS : TypeSpecs) { 1933 if (CartesianProductOfTypes) { 1934 Type DefaultT(TS, 'd'); 1935 for (auto SrcTS : TypeSpecs) { 1936 Type DefaultSrcT(SrcTS, 'd'); 1937 if (TS == SrcTS || 1938 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1939 continue; 1940 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1941 } 1942 } else { 1943 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1944 } 1945 } 1946 1947 std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end()); 1948 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1949 NewTypeSpecs.end()); 1950 1951 for (auto &I : NewTypeSpecs) { 1952 Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body, 1953 *this, Guard, IsUnavailable, BigEndianSafe); 1954 1955 IntrinsicMap[Name].push_back(IT); 1956 Out.push_back(IT); 1957 } 1958 1959 CurrentRecord = nullptr; 1960 } 1961 1962 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1963 /// declaration of builtins, checking for unique builtin declarations. 1964 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1965 SmallVectorImpl<Intrinsic *> &Defs) { 1966 OS << "#ifdef GET_NEON_BUILTINS\n"; 1967 1968 // We only want to emit a builtin once, and we want to emit them in 1969 // alphabetical order, so use a std::set. 1970 std::set<std::string> Builtins; 1971 1972 for (auto *Def : Defs) { 1973 if (Def->hasBody()) 1974 continue; 1975 // Functions with 'a' (the splat code) in the type prototype should not get 1976 // their own builtin as they use the non-splat variant. 1977 if (Def->hasSplat()) 1978 continue; 1979 1980 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 1981 1982 S += Def->getBuiltinTypeStr(); 1983 S += "\", \"n\")"; 1984 1985 Builtins.insert(S); 1986 } 1987 1988 for (auto &S : Builtins) 1989 OS << S << "\n"; 1990 OS << "#endif\n\n"; 1991 } 1992 1993 /// Generate the ARM and AArch64 overloaded type checking code for 1994 /// SemaChecking.cpp, checking for unique builtin declarations. 1995 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 1996 SmallVectorImpl<Intrinsic *> &Defs) { 1997 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1998 1999 // We record each overload check line before emitting because subsequent Inst 2000 // definitions may extend the number of permitted types (i.e. augment the 2001 // Mask). Use std::map to avoid sorting the table by hash number. 2002 struct OverloadInfo { 2003 uint64_t Mask; 2004 int PtrArgNum; 2005 bool HasConstPtr; 2006 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 2007 }; 2008 std::map<std::string, OverloadInfo> OverloadMap; 2009 2010 for (auto *Def : Defs) { 2011 // If the def has a body (that is, it has Operation DAGs), it won't call 2012 // __builtin_neon_* so we don't need to generate a definition for it. 2013 if (Def->hasBody()) 2014 continue; 2015 // Functions with 'a' (the splat code) in the type prototype should not get 2016 // their own builtin as they use the non-splat variant. 2017 if (Def->hasSplat()) 2018 continue; 2019 // Functions which have a scalar argument cannot be overloaded, no need to 2020 // check them if we are emitting the type checking code. 2021 if (Def->protoHasScalar()) 2022 continue; 2023 2024 uint64_t Mask = 0ULL; 2025 Type Ty = Def->getReturnType(); 2026 if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' || 2027 Def->getProto()[0] == 'F') 2028 Ty = Def->getParamType(0); 2029 if (Ty.isPointer()) 2030 Ty = Def->getParamType(1); 2031 2032 Mask |= 1ULL << Ty.getNeonEnum(); 2033 2034 // Check if the function has a pointer or const pointer argument. 2035 std::string Proto = Def->getProto(); 2036 int PtrArgNum = -1; 2037 bool HasConstPtr = false; 2038 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2039 char ArgType = Proto[I + 1]; 2040 if (ArgType == 'c') { 2041 HasConstPtr = true; 2042 PtrArgNum = I; 2043 break; 2044 } 2045 if (ArgType == 'p') { 2046 PtrArgNum = I; 2047 break; 2048 } 2049 } 2050 // For sret builtins, adjust the pointer argument index. 2051 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2052 PtrArgNum += 1; 2053 2054 std::string Name = Def->getName(); 2055 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2056 // and vst1_lane intrinsics. Using a pointer to the vector element 2057 // type with one of those operations causes codegen to select an aligned 2058 // load/store instruction. If you want an unaligned operation, 2059 // the pointer argument needs to have less alignment than element type, 2060 // so just accept any pointer type. 2061 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2062 PtrArgNum = -1; 2063 HasConstPtr = false; 2064 } 2065 2066 if (Mask) { 2067 std::string Name = Def->getMangledName(); 2068 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2069 OverloadInfo &OI = OverloadMap[Name]; 2070 OI.Mask |= Mask; 2071 OI.PtrArgNum |= PtrArgNum; 2072 OI.HasConstPtr = HasConstPtr; 2073 } 2074 } 2075 2076 for (auto &I : OverloadMap) { 2077 OverloadInfo &OI = I.second; 2078 2079 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2080 OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; 2081 if (OI.PtrArgNum >= 0) 2082 OS << "; PtrArgNum = " << OI.PtrArgNum; 2083 if (OI.HasConstPtr) 2084 OS << "; HasConstPtr = true"; 2085 OS << "; break;\n"; 2086 } 2087 OS << "#endif\n\n"; 2088 } 2089 2090 void 2091 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2092 SmallVectorImpl<Intrinsic *> &Defs) { 2093 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2094 2095 std::set<std::string> Emitted; 2096 2097 for (auto *Def : Defs) { 2098 if (Def->hasBody()) 2099 continue; 2100 // Functions with 'a' (the splat code) in the type prototype should not get 2101 // their own builtin as they use the non-splat variant. 2102 if (Def->hasSplat()) 2103 continue; 2104 // Functions which do not have an immediate do not need to have range 2105 // checking code emitted. 2106 if (!Def->hasImmediate()) 2107 continue; 2108 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2109 continue; 2110 2111 std::string LowerBound, UpperBound; 2112 2113 Record *R = Def->getRecord(); 2114 if (R->getValueAsBit("isVCVT_N")) { 2115 // VCVT between floating- and fixed-point values takes an immediate 2116 // in the range [1, 32) for f32 or [1, 64) for f64. 2117 LowerBound = "1"; 2118 if (Def->getBaseType().getElementSizeInBits() == 32) 2119 UpperBound = "31"; 2120 else 2121 UpperBound = "63"; 2122 } else if (R->getValueAsBit("isScalarShift")) { 2123 // Right shifts have an 'r' in the name, left shifts do not. Convert 2124 // instructions have the same bounds and right shifts. 2125 if (Def->getName().find('r') != std::string::npos || 2126 Def->getName().find("cvt") != std::string::npos) 2127 LowerBound = "1"; 2128 2129 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2130 } else if (R->getValueAsBit("isShift")) { 2131 // Builtins which are overloaded by type will need to have their upper 2132 // bound computed at Sema time based on the type constant. 2133 2134 // Right shifts have an 'r' in the name, left shifts do not. 2135 if (Def->getName().find('r') != std::string::npos) 2136 LowerBound = "1"; 2137 UpperBound = "RFT(TV, true)"; 2138 } else if (Def->getClassKind(true) == ClassB) { 2139 // ClassB intrinsics have a type (and hence lane number) that is only 2140 // known at runtime. 2141 if (R->getValueAsBit("isLaneQ")) 2142 UpperBound = "RFT(TV, false, true)"; 2143 else 2144 UpperBound = "RFT(TV, false, false)"; 2145 } else { 2146 // The immediate generally refers to a lane in the preceding argument. 2147 assert(Def->getImmediateIdx() > 0); 2148 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2149 UpperBound = utostr(T.getNumElements() - 1); 2150 } 2151 2152 // Calculate the index of the immediate that should be range checked. 2153 unsigned Idx = Def->getNumParams(); 2154 if (Def->hasImmediate()) 2155 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2156 2157 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2158 << "i = " << Idx << ";"; 2159 if (LowerBound.size()) 2160 OS << " l = " << LowerBound << ";"; 2161 if (UpperBound.size()) 2162 OS << " u = " << UpperBound << ";"; 2163 OS << " break;\n"; 2164 2165 Emitted.insert(Def->getMangledName()); 2166 } 2167 2168 OS << "#endif\n\n"; 2169 } 2170 2171 /// runHeader - Emit a file with sections defining: 2172 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2173 /// 2. the SemaChecking code for the type overload checking. 2174 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2175 void NeonEmitter::runHeader(raw_ostream &OS) { 2176 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2177 2178 SmallVector<Intrinsic *, 128> Defs; 2179 for (auto *R : RV) 2180 createIntrinsic(R, Defs); 2181 2182 // Generate shared BuiltinsXXX.def 2183 genBuiltinsDef(OS, Defs); 2184 2185 // Generate ARM overloaded type checking code for SemaChecking.cpp 2186 genOverloadTypeCheckCode(OS, Defs); 2187 2188 // Generate ARM range checking code for shift/lane immediates. 2189 genIntrinsicRangeCheckCode(OS, Defs); 2190 } 2191 2192 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2193 /// is comprised of type definitions and function declarations. 2194 void NeonEmitter::run(raw_ostream &OS) { 2195 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2196 "------------------------------" 2197 "---===\n" 2198 " *\n" 2199 " * Permission is hereby granted, free of charge, to any person " 2200 "obtaining " 2201 "a copy\n" 2202 " * of this software and associated documentation files (the " 2203 "\"Software\")," 2204 " to deal\n" 2205 " * in the Software without restriction, including without limitation " 2206 "the " 2207 "rights\n" 2208 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2209 "and/or sell\n" 2210 " * copies of the Software, and to permit persons to whom the Software " 2211 "is\n" 2212 " * furnished to do so, subject to the following conditions:\n" 2213 " *\n" 2214 " * The above copyright notice and this permission notice shall be " 2215 "included in\n" 2216 " * all copies or substantial portions of the Software.\n" 2217 " *\n" 2218 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2219 "EXPRESS OR\n" 2220 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2221 "MERCHANTABILITY,\n" 2222 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2223 "SHALL THE\n" 2224 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2225 "OTHER\n" 2226 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2227 "ARISING FROM,\n" 2228 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2229 "DEALINGS IN\n" 2230 " * THE SOFTWARE.\n" 2231 " *\n" 2232 " *===-----------------------------------------------------------------" 2233 "---" 2234 "---===\n" 2235 " */\n\n"; 2236 2237 OS << "#ifndef __ARM_NEON_H\n"; 2238 OS << "#define __ARM_NEON_H\n\n"; 2239 2240 OS << "#if !defined(__ARM_NEON)\n"; 2241 OS << "#error \"NEON support not enabled\"\n"; 2242 OS << "#endif\n\n"; 2243 2244 OS << "#include <stdint.h>\n\n"; 2245 2246 // Emit NEON-specific scalar typedefs. 2247 OS << "typedef float float32_t;\n"; 2248 OS << "typedef __fp16 float16_t;\n"; 2249 2250 OS << "#ifdef __aarch64__\n"; 2251 OS << "typedef double float64_t;\n"; 2252 OS << "#endif\n\n"; 2253 2254 // For now, signedness of polynomial types depends on target 2255 OS << "#ifdef __aarch64__\n"; 2256 OS << "typedef uint8_t poly8_t;\n"; 2257 OS << "typedef uint16_t poly16_t;\n"; 2258 OS << "typedef uint64_t poly64_t;\n"; 2259 OS << "typedef __uint128_t poly128_t;\n"; 2260 OS << "#else\n"; 2261 OS << "typedef int8_t poly8_t;\n"; 2262 OS << "typedef int16_t poly16_t;\n"; 2263 OS << "#endif\n"; 2264 2265 // Emit Neon vector typedefs. 2266 std::string TypedefTypes( 2267 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2268 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2269 2270 // Emit vector typedefs. 2271 bool InIfdef = false; 2272 for (auto &TS : TDTypeVec) { 2273 bool IsA64 = false; 2274 Type T(TS, 'd'); 2275 if (T.isDouble() || (T.isPoly() && T.isLong())) 2276 IsA64 = true; 2277 2278 if (InIfdef && !IsA64) { 2279 OS << "#endif\n"; 2280 InIfdef = false; 2281 } 2282 if (!InIfdef && IsA64) { 2283 OS << "#ifdef __aarch64__\n"; 2284 InIfdef = true; 2285 } 2286 2287 if (T.isPoly()) 2288 OS << "typedef __attribute__((neon_polyvector_type("; 2289 else 2290 OS << "typedef __attribute__((neon_vector_type("; 2291 2292 Type T2 = T; 2293 T2.makeScalar(); 2294 OS << utostr(T.getNumElements()) << "))) "; 2295 OS << T2.str(); 2296 OS << " " << T.str() << ";\n"; 2297 } 2298 if (InIfdef) 2299 OS << "#endif\n"; 2300 OS << "\n"; 2301 2302 // Emit struct typedefs. 2303 InIfdef = false; 2304 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2305 for (auto &TS : TDTypeVec) { 2306 bool IsA64 = false; 2307 Type T(TS, 'd'); 2308 if (T.isDouble() || (T.isPoly() && T.isLong())) 2309 IsA64 = true; 2310 2311 if (InIfdef && !IsA64) { 2312 OS << "#endif\n"; 2313 InIfdef = false; 2314 } 2315 if (!InIfdef && IsA64) { 2316 OS << "#ifdef __aarch64__\n"; 2317 InIfdef = true; 2318 } 2319 2320 char M = '2' + (NumMembers - 2); 2321 Type VT(TS, M); 2322 OS << "typedef struct " << VT.str() << " {\n"; 2323 OS << " " << T.str() << " val"; 2324 OS << "[" << utostr(NumMembers) << "]"; 2325 OS << ";\n} "; 2326 OS << VT.str() << ";\n"; 2327 OS << "\n"; 2328 } 2329 } 2330 if (InIfdef) 2331 OS << "#endif\n"; 2332 OS << "\n"; 2333 2334 OS << "#define __ai static inline __attribute__((__always_inline__, " 2335 "__nodebug__))\n\n"; 2336 2337 SmallVector<Intrinsic *, 128> Defs; 2338 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2339 for (auto *R : RV) 2340 createIntrinsic(R, Defs); 2341 2342 for (auto *I : Defs) 2343 I->indexBody(); 2344 2345 std::stable_sort( 2346 Defs.begin(), Defs.end(), 2347 [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; }); 2348 2349 // Only emit a def when its requirements have been met. 2350 // FIXME: This loop could be made faster, but it's fast enough for now. 2351 bool MadeProgress = true; 2352 std::string InGuard = ""; 2353 while (!Defs.empty() && MadeProgress) { 2354 MadeProgress = false; 2355 2356 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2357 I != Defs.end(); /*No step*/) { 2358 bool DependenciesSatisfied = true; 2359 for (auto *II : (*I)->getDependencies()) { 2360 if (std::find(Defs.begin(), Defs.end(), II) != Defs.end()) 2361 DependenciesSatisfied = false; 2362 } 2363 if (!DependenciesSatisfied) { 2364 // Try the next one. 2365 ++I; 2366 continue; 2367 } 2368 2369 // Emit #endif/#if pair if needed. 2370 if ((*I)->getGuard() != InGuard) { 2371 if (!InGuard.empty()) 2372 OS << "#endif\n"; 2373 InGuard = (*I)->getGuard(); 2374 if (!InGuard.empty()) 2375 OS << "#if " << InGuard << "\n"; 2376 } 2377 2378 // Actually generate the intrinsic code. 2379 OS << (*I)->generate(); 2380 2381 MadeProgress = true; 2382 I = Defs.erase(I); 2383 } 2384 } 2385 assert(Defs.empty() && "Some requirements were not satisfied!"); 2386 if (!InGuard.empty()) 2387 OS << "#endif\n"; 2388 2389 OS << "\n"; 2390 OS << "#undef __ai\n\n"; 2391 OS << "#endif /* __ARM_NEON_H */\n"; 2392 } 2393 2394 namespace clang { 2395 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2396 NeonEmitter(Records).run(OS); 2397 } 2398 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2399 NeonEmitter(Records).runHeader(OS); 2400 } 2401 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2402 llvm_unreachable("Neon test generation no longer implemented!"); 2403 } 2404 } // End namespace clang 2405