1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This tablegen backend is responsible for emitting arm_neon.h, which includes 11 // a declaration and definition of each function specified by the ARM NEON 12 // compiler interface. See ARM document DUI0348B. 13 // 14 // Each NEON instruction is implemented in terms of 1 or more functions which 15 // are suffixed with the element type of the input vectors. Functions may be 16 // implemented in terms of generic vector operations such as +, *, -, etc. or 17 // by calling a __builtin_-prefixed function which will be handled by clang's 18 // CodeGen library. 19 // 20 // Additional validation code can be generated by this file when runHeader() is 21 // called, rather than the normal run() entry point. 22 // 23 // See also the documentation in include/clang/Basic/arm_neon.td. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "llvm/ADT/DenseMap.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/ADT/SmallString.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/ADT/StringMap.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/TableGen/Error.h" 35 #include "llvm/TableGen/Record.h" 36 #include "llvm/TableGen/SetTheory.h" 37 #include "llvm/TableGen/TableGenBackend.h" 38 #include <algorithm> 39 #include <map> 40 #include <sstream> 41 #include <string> 42 #include <vector> 43 using namespace llvm; 44 45 namespace { 46 47 // While globals are generally bad, this one allows us to perform assertions 48 // liberally and somehow still trace them back to the def they indirectly 49 // came from. 50 static Record *CurrentRecord = nullptr; 51 static void assert_with_loc(bool Assertion, const std::string &Str) { 52 if (!Assertion) { 53 if (CurrentRecord) 54 PrintFatalError(CurrentRecord->getLoc(), Str); 55 else 56 PrintFatalError(Str); 57 } 58 } 59 60 enum ClassKind { 61 ClassNone, 62 ClassI, // generic integer instruction, e.g., "i8" suffix 63 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 64 ClassW, // width-specific instruction, e.g., "8" suffix 65 ClassB, // bitcast arguments with enum argument to specify type 66 ClassL, // Logical instructions which are op instructions 67 // but we need to not emit any suffix for in our 68 // tests. 69 ClassNoTest // Instructions which we do not test since they are 70 // not TRUE instructions. 71 }; 72 73 /// NeonTypeFlags - Flags to identify the types for overloaded Neon 74 /// builtins. These must be kept in sync with the flags in 75 /// include/clang/Basic/TargetBuiltins.h. 76 namespace NeonTypeFlags { 77 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; 78 79 enum EltType { 80 Int8, 81 Int16, 82 Int32, 83 Int64, 84 Poly8, 85 Poly16, 86 Poly64, 87 Poly128, 88 Float16, 89 Float32, 90 Float64 91 }; 92 } 93 94 class Intrinsic; 95 class NeonEmitter; 96 class Type; 97 class Variable; 98 99 //===----------------------------------------------------------------------===// 100 // TypeSpec 101 //===----------------------------------------------------------------------===// 102 103 /// A TypeSpec is just a simple wrapper around a string, but gets its own type 104 /// for strong typing purposes. 105 /// 106 /// A TypeSpec can be used to create a type. 107 class TypeSpec : public std::string { 108 public: 109 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { 110 std::vector<TypeSpec> Ret; 111 TypeSpec Acc; 112 for (char I : Str.str()) { 113 if (islower(I)) { 114 Acc.push_back(I); 115 Ret.push_back(TypeSpec(Acc)); 116 Acc.clear(); 117 } else { 118 Acc.push_back(I); 119 } 120 } 121 return Ret; 122 } 123 }; 124 125 //===----------------------------------------------------------------------===// 126 // Type 127 //===----------------------------------------------------------------------===// 128 129 /// A Type. Not much more to say here. 130 class Type { 131 private: 132 TypeSpec TS; 133 134 bool Float, Signed, Void, Poly, Constant, Pointer; 135 // ScalarForMangling and NoManglingQ are really not suited to live here as 136 // they are not related to the type. But they live in the TypeSpec (not the 137 // prototype), so this is really the only place to store them. 138 bool ScalarForMangling, NoManglingQ; 139 unsigned Bitwidth, ElementBitwidth, NumVectors; 140 141 public: 142 Type() 143 : Float(false), Signed(false), Void(true), Poly(false), Constant(false), 144 Pointer(false), ScalarForMangling(false), NoManglingQ(false), 145 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} 146 147 Type(TypeSpec TS, char CharMod) 148 : TS(TS), Float(false), Signed(false), Void(false), Poly(false), 149 Constant(false), Pointer(false), ScalarForMangling(false), 150 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { 151 applyModifier(CharMod); 152 } 153 154 /// Returns a type representing "void". 155 static Type getVoid() { return Type(); } 156 157 bool operator==(const Type &Other) const { return str() == Other.str(); } 158 bool operator!=(const Type &Other) const { return !operator==(Other); } 159 160 // 161 // Query functions 162 // 163 bool isScalarForMangling() const { return ScalarForMangling; } 164 bool noManglingQ() const { return NoManglingQ; } 165 166 bool isPointer() const { return Pointer; } 167 bool isFloating() const { return Float; } 168 bool isInteger() const { return !Float && !Poly; } 169 bool isSigned() const { return Signed; } 170 bool isScalar() const { return NumVectors == 0; } 171 bool isVector() const { return NumVectors > 0; } 172 bool isFloat() const { return Float && ElementBitwidth == 32; } 173 bool isDouble() const { return Float && ElementBitwidth == 64; } 174 bool isHalf() const { return Float && ElementBitwidth == 16; } 175 bool isPoly() const { return Poly; } 176 bool isChar() const { return ElementBitwidth == 8; } 177 bool isShort() const { return !Float && ElementBitwidth == 16; } 178 bool isInt() const { return !Float && ElementBitwidth == 32; } 179 bool isLong() const { return !Float && ElementBitwidth == 64; } 180 bool isVoid() const { return Void; } 181 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } 182 unsigned getSizeInBits() const { return Bitwidth; } 183 unsigned getElementSizeInBits() const { return ElementBitwidth; } 184 unsigned getNumVectors() const { return NumVectors; } 185 186 // 187 // Mutator functions 188 // 189 void makeUnsigned() { Signed = false; } 190 void makeSigned() { Signed = true; } 191 void makeInteger(unsigned ElemWidth, bool Sign) { 192 Float = false; 193 Poly = false; 194 Signed = Sign; 195 ElementBitwidth = ElemWidth; 196 } 197 void makeScalar() { 198 Bitwidth = ElementBitwidth; 199 NumVectors = 0; 200 } 201 void makeOneVector() { 202 assert(isVector()); 203 NumVectors = 1; 204 } 205 void doubleLanes() { 206 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); 207 Bitwidth = 128; 208 } 209 void halveLanes() { 210 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); 211 Bitwidth = 64; 212 } 213 214 /// Return the C string representation of a type, which is the typename 215 /// defined in stdint.h or arm_neon.h. 216 std::string str() const; 217 218 /// Return the string representation of a type, which is an encoded 219 /// string for passing to the BUILTIN() macro in Builtins.def. 220 std::string builtin_str() const; 221 222 /// Return the value in NeonTypeFlags for this type. 223 unsigned getNeonEnum() const; 224 225 /// Parse a type from a stdint.h or arm_neon.h typedef name, 226 /// for example uint32x2_t or int64_t. 227 static Type fromTypedefName(StringRef Name); 228 229 private: 230 /// Creates the type based on the typespec string in TS. 231 /// Sets "Quad" to true if the "Q" or "H" modifiers were 232 /// seen. This is needed by applyModifier as some modifiers 233 /// only take effect if the type size was changed by "Q" or "H". 234 void applyTypespec(bool &Quad); 235 /// Applies a prototype modifier to the type. 236 void applyModifier(char Mod); 237 }; 238 239 //===----------------------------------------------------------------------===// 240 // Variable 241 //===----------------------------------------------------------------------===// 242 243 /// A variable is a simple class that just has a type and a name. 244 class Variable { 245 Type T; 246 std::string N; 247 248 public: 249 Variable() : T(Type::getVoid()), N("") {} 250 Variable(Type T, std::string N) : T(T), N(N) {} 251 252 Type getType() const { return T; } 253 std::string getName() const { return "__" + N; } 254 }; 255 256 //===----------------------------------------------------------------------===// 257 // Intrinsic 258 //===----------------------------------------------------------------------===// 259 260 /// The main grunt class. This represents an instantiation of an intrinsic with 261 /// a particular typespec and prototype. 262 class Intrinsic { 263 friend class DagEmitter; 264 265 /// The Record this intrinsic was created from. 266 Record *R; 267 /// The unmangled name and prototype. 268 std::string Name, Proto; 269 /// The input and output typespecs. InTS == OutTS except when 270 /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. 271 TypeSpec OutTS, InTS; 272 /// The base class kind. Most intrinsics use ClassS, which has full type 273 /// info for integers (s32/u32). Some use ClassI, which doesn't care about 274 /// signedness (i32), while some (ClassB) have no type at all, only a width 275 /// (32). 276 ClassKind CK; 277 /// The list of DAGs for the body. May be empty, in which case we should 278 /// emit a builtin call. 279 ListInit *Body; 280 /// The architectural #ifdef guard. 281 std::string Guard; 282 /// Set if the Unvailable bit is 1. This means we don't generate a body, 283 /// just an "unavailable" attribute on a declaration. 284 bool IsUnavailable; 285 /// Is this intrinsic safe for big-endian? or does it need its arguments 286 /// reversing? 287 bool BigEndianSafe; 288 289 /// The types of return value [0] and parameters [1..]. 290 std::vector<Type> Types; 291 /// The local variables defined. 292 std::map<std::string, Variable> Variables; 293 /// NeededEarly - set if any other intrinsic depends on this intrinsic. 294 bool NeededEarly; 295 /// UseMacro - set if we should implement using a macro or unset for a 296 /// function. 297 bool UseMacro; 298 /// The set of intrinsics that this intrinsic uses/requires. 299 std::set<Intrinsic *> Dependencies; 300 /// The "base type", which is Type('d', OutTS). InBaseType is only 301 /// different if CartesianProductOfTypes = 1 (for vreinterpret). 302 Type BaseType, InBaseType; 303 /// The return variable. 304 Variable RetVar; 305 /// A postfix to apply to every variable. Defaults to "". 306 std::string VariablePostfix; 307 308 NeonEmitter &Emitter; 309 std::stringstream OS; 310 311 public: 312 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, 313 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, 314 StringRef Guard, bool IsUnavailable, bool BigEndianSafe) 315 : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), 316 CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), 317 BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), 318 BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { 319 // If this builtin takes an immediate argument, we need to #define it rather 320 // than use a standard declaration, so that SemaChecking can range check 321 // the immediate passed by the user. 322 if (Proto.find('i') != std::string::npos) 323 UseMacro = true; 324 325 // Pointer arguments need to use macros to avoid hiding aligned attributes 326 // from the pointer type. 327 if (Proto.find('p') != std::string::npos || 328 Proto.find('c') != std::string::npos) 329 UseMacro = true; 330 331 // It is not permitted to pass or return an __fp16 by value, so intrinsics 332 // taking a scalar float16_t must be implemented as macros. 333 if (OutTS.find('h') != std::string::npos && 334 Proto.find('s') != std::string::npos) 335 UseMacro = true; 336 337 // Modify the TypeSpec per-argument to get a concrete Type, and create 338 // known variables for each. 339 // Types[0] is the return value. 340 Types.push_back(Type(OutTS, Proto[0])); 341 for (unsigned I = 1; I < Proto.size(); ++I) 342 Types.push_back(Type(InTS, Proto[I])); 343 } 344 345 /// Get the Record that this intrinsic is based off. 346 Record *getRecord() const { return R; } 347 /// Get the set of Intrinsics that this intrinsic calls. 348 /// this is the set of immediate dependencies, NOT the 349 /// transitive closure. 350 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } 351 /// Get the architectural guard string (#ifdef). 352 std::string getGuard() const { return Guard; } 353 /// Get the non-mangled name. 354 std::string getName() const { return Name; } 355 356 /// Return true if the intrinsic takes an immediate operand. 357 bool hasImmediate() const { 358 return Proto.find('i') != std::string::npos; 359 } 360 /// Return the parameter index of the immediate operand. 361 unsigned getImmediateIdx() const { 362 assert(hasImmediate()); 363 unsigned Idx = Proto.find('i'); 364 assert(Idx > 0 && "Can't return an immediate!"); 365 return Idx - 1; 366 } 367 368 /// Return true if the intrinsic takes an splat operand. 369 bool hasSplat() const { return Proto.find('a') != std::string::npos; } 370 /// Return the parameter index of the splat operand. 371 unsigned getSplatIdx() const { 372 assert(hasSplat()); 373 unsigned Idx = Proto.find('a'); 374 assert(Idx > 0 && "Can't return a splat!"); 375 return Idx - 1; 376 } 377 378 unsigned getNumParams() const { return Proto.size() - 1; } 379 Type getReturnType() const { return Types[0]; } 380 Type getParamType(unsigned I) const { return Types[I + 1]; } 381 Type getBaseType() const { return BaseType; } 382 /// Return the raw prototype string. 383 std::string getProto() const { return Proto; } 384 385 /// Return true if the prototype has a scalar argument. 386 /// This does not return true for the "splat" code ('a'). 387 bool protoHasScalar(); 388 389 /// Return the index that parameter PIndex will sit at 390 /// in a generated function call. This is often just PIndex, 391 /// but may not be as things such as multiple-vector operands 392 /// and sret parameters need to be taken into accont. 393 unsigned getGeneratedParamIdx(unsigned PIndex) { 394 unsigned Idx = 0; 395 if (getReturnType().getNumVectors() > 1) 396 // Multiple vectors are passed as sret. 397 ++Idx; 398 399 for (unsigned I = 0; I < PIndex; ++I) 400 Idx += std::max(1U, getParamType(I).getNumVectors()); 401 402 return Idx; 403 } 404 405 bool hasBody() const { return Body && Body->getValues().size() > 0; } 406 407 void setNeededEarly() { NeededEarly = true; } 408 409 bool operator<(const Intrinsic &Other) const { 410 // Sort lexicographically on a two-tuple (Guard, Name) 411 if (Guard != Other.Guard) 412 return Guard < Other.Guard; 413 return Name < Other.Name; 414 } 415 416 ClassKind getClassKind(bool UseClassBIfScalar = false) { 417 if (UseClassBIfScalar && !protoHasScalar()) 418 return ClassB; 419 return CK; 420 } 421 422 /// Return the name, mangled with type information. 423 /// If ForceClassS is true, use ClassS (u32/s32) instead 424 /// of the intrinsic's own type class. 425 std::string getMangledName(bool ForceClassS = false); 426 /// Return the type code for a builtin function call. 427 std::string getInstTypeCode(Type T, ClassKind CK); 428 /// Return the type string for a BUILTIN() macro in Builtins.def. 429 std::string getBuiltinTypeStr(); 430 431 /// Generate the intrinsic, returning code. 432 std::string generate(); 433 /// Perform type checking and populate the dependency graph, but 434 /// don't generate code yet. 435 void indexBody(); 436 437 private: 438 std::string mangleName(std::string Name, ClassKind CK); 439 440 void initVariables(); 441 std::string replaceParamsIn(std::string S); 442 443 void emitBodyAsBuiltinCall(); 444 445 void generateImpl(bool ReverseArguments, 446 StringRef NamePrefix, StringRef CallPrefix); 447 void emitReturn(); 448 void emitBody(StringRef CallPrefix); 449 void emitShadowedArgs(); 450 void emitArgumentReversal(); 451 void emitReturnReversal(); 452 void emitReverseVariable(Variable &Dest, Variable &Src); 453 void emitNewLine(); 454 void emitClosingBrace(); 455 void emitOpeningBrace(); 456 void emitPrototype(StringRef NamePrefix); 457 458 class DagEmitter { 459 Intrinsic &Intr; 460 StringRef CallPrefix; 461 462 public: 463 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : 464 Intr(Intr), CallPrefix(CallPrefix) { 465 } 466 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); 467 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); 468 std::pair<Type, std::string> emitDagSplat(DagInit *DI); 469 std::pair<Type, std::string> emitDagDup(DagInit *DI); 470 std::pair<Type, std::string> emitDagShuffle(DagInit *DI); 471 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); 472 std::pair<Type, std::string> emitDagCall(DagInit *DI); 473 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); 474 std::pair<Type, std::string> emitDagLiteral(DagInit *DI); 475 std::pair<Type, std::string> emitDagOp(DagInit *DI); 476 std::pair<Type, std::string> emitDag(DagInit *DI); 477 }; 478 479 }; 480 481 //===----------------------------------------------------------------------===// 482 // NeonEmitter 483 //===----------------------------------------------------------------------===// 484 485 class NeonEmitter { 486 RecordKeeper &Records; 487 DenseMap<Record *, ClassKind> ClassMap; 488 std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap; 489 unsigned UniqueNumber; 490 491 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); 492 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); 493 void genOverloadTypeCheckCode(raw_ostream &OS, 494 SmallVectorImpl<Intrinsic *> &Defs); 495 void genIntrinsicRangeCheckCode(raw_ostream &OS, 496 SmallVectorImpl<Intrinsic *> &Defs); 497 498 public: 499 /// Called by Intrinsic - this attempts to get an intrinsic that takes 500 /// the given types as arguments. 501 Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types); 502 503 /// Called by Intrinsic - returns a globally-unique number. 504 unsigned getUniqueNumber() { return UniqueNumber++; } 505 506 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { 507 Record *SI = R.getClass("SInst"); 508 Record *II = R.getClass("IInst"); 509 Record *WI = R.getClass("WInst"); 510 Record *SOpI = R.getClass("SOpInst"); 511 Record *IOpI = R.getClass("IOpInst"); 512 Record *WOpI = R.getClass("WOpInst"); 513 Record *LOpI = R.getClass("LOpInst"); 514 Record *NoTestOpI = R.getClass("NoTestOpInst"); 515 516 ClassMap[SI] = ClassS; 517 ClassMap[II] = ClassI; 518 ClassMap[WI] = ClassW; 519 ClassMap[SOpI] = ClassS; 520 ClassMap[IOpI] = ClassI; 521 ClassMap[WOpI] = ClassW; 522 ClassMap[LOpI] = ClassL; 523 ClassMap[NoTestOpI] = ClassNoTest; 524 } 525 526 // run - Emit arm_neon.h.inc 527 void run(raw_ostream &o); 528 529 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 530 void runHeader(raw_ostream &o); 531 532 // runTests - Emit tests for all the Neon intrinsics. 533 void runTests(raw_ostream &o); 534 }; 535 536 } // end anonymous namespace 537 538 //===----------------------------------------------------------------------===// 539 // Type implementation 540 //===----------------------------------------------------------------------===// 541 542 std::string Type::str() const { 543 if (Void) 544 return "void"; 545 std::string S; 546 547 if (!Signed && isInteger()) 548 S += "u"; 549 550 if (Poly) 551 S += "poly"; 552 else if (Float) 553 S += "float"; 554 else 555 S += "int"; 556 557 S += utostr(ElementBitwidth); 558 if (isVector()) 559 S += "x" + utostr(getNumElements()); 560 if (NumVectors > 1) 561 S += "x" + utostr(NumVectors); 562 S += "_t"; 563 564 if (Constant) 565 S += " const"; 566 if (Pointer) 567 S += " *"; 568 569 return S; 570 } 571 572 std::string Type::builtin_str() const { 573 std::string S; 574 if (isVoid()) 575 return "v"; 576 577 if (Pointer) 578 // All pointers are void pointers. 579 S += "v"; 580 else if (isInteger()) 581 switch (ElementBitwidth) { 582 case 8: S += "c"; break; 583 case 16: S += "s"; break; 584 case 32: S += "i"; break; 585 case 64: S += "Wi"; break; 586 case 128: S += "LLLi"; break; 587 default: llvm_unreachable("Unhandled case!"); 588 } 589 else 590 switch (ElementBitwidth) { 591 case 16: S += "h"; break; 592 case 32: S += "f"; break; 593 case 64: S += "d"; break; 594 default: llvm_unreachable("Unhandled case!"); 595 } 596 597 if (isChar() && !Pointer) 598 // Make chars explicitly signed. 599 S = "S" + S; 600 else if (isInteger() && !Pointer && !Signed) 601 S = "U" + S; 602 603 if (isScalar()) { 604 if (Constant) S += "C"; 605 if (Pointer) S += "*"; 606 return S; 607 } 608 609 std::string Ret; 610 for (unsigned I = 0; I < NumVectors; ++I) 611 Ret += "V" + utostr(getNumElements()) + S; 612 613 return Ret; 614 } 615 616 unsigned Type::getNeonEnum() const { 617 unsigned Addend; 618 switch (ElementBitwidth) { 619 case 8: Addend = 0; break; 620 case 16: Addend = 1; break; 621 case 32: Addend = 2; break; 622 case 64: Addend = 3; break; 623 case 128: Addend = 4; break; 624 default: llvm_unreachable("Unhandled element bitwidth!"); 625 } 626 627 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; 628 if (Poly) { 629 // Adjustment needed because Poly32 doesn't exist. 630 if (Addend >= 2) 631 --Addend; 632 Base = (unsigned)NeonTypeFlags::Poly8 + Addend; 633 } 634 if (Float) { 635 assert(Addend != 0 && "Float8 doesn't exist!"); 636 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); 637 } 638 639 if (Bitwidth == 128) 640 Base |= (unsigned)NeonTypeFlags::QuadFlag; 641 if (isInteger() && !Signed) 642 Base |= (unsigned)NeonTypeFlags::UnsignedFlag; 643 644 return Base; 645 } 646 647 Type Type::fromTypedefName(StringRef Name) { 648 Type T; 649 T.Void = false; 650 T.Float = false; 651 T.Poly = false; 652 653 if (Name.front() == 'u') { 654 T.Signed = false; 655 Name = Name.drop_front(); 656 } else { 657 T.Signed = true; 658 } 659 660 if (Name.startswith("float")) { 661 T.Float = true; 662 Name = Name.drop_front(5); 663 } else if (Name.startswith("poly")) { 664 T.Poly = true; 665 Name = Name.drop_front(4); 666 } else { 667 assert(Name.startswith("int")); 668 Name = Name.drop_front(3); 669 } 670 671 unsigned I = 0; 672 for (I = 0; I < Name.size(); ++I) { 673 if (!isdigit(Name[I])) 674 break; 675 } 676 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); 677 Name = Name.drop_front(I); 678 679 T.Bitwidth = T.ElementBitwidth; 680 T.NumVectors = 1; 681 682 if (Name.front() == 'x') { 683 Name = Name.drop_front(); 684 unsigned I = 0; 685 for (I = 0; I < Name.size(); ++I) { 686 if (!isdigit(Name[I])) 687 break; 688 } 689 unsigned NumLanes; 690 Name.substr(0, I).getAsInteger(10, NumLanes); 691 Name = Name.drop_front(I); 692 T.Bitwidth = T.ElementBitwidth * NumLanes; 693 } else { 694 // Was scalar. 695 T.NumVectors = 0; 696 } 697 if (Name.front() == 'x') { 698 Name = Name.drop_front(); 699 unsigned I = 0; 700 for (I = 0; I < Name.size(); ++I) { 701 if (!isdigit(Name[I])) 702 break; 703 } 704 Name.substr(0, I).getAsInteger(10, T.NumVectors); 705 Name = Name.drop_front(I); 706 } 707 708 assert(Name.startswith("_t") && "Malformed typedef!"); 709 return T; 710 } 711 712 void Type::applyTypespec(bool &Quad) { 713 std::string S = TS; 714 ScalarForMangling = false; 715 Void = false; 716 Poly = Float = false; 717 ElementBitwidth = ~0U; 718 Signed = true; 719 NumVectors = 1; 720 721 for (char I : S) { 722 switch (I) { 723 case 'S': 724 ScalarForMangling = true; 725 break; 726 case 'H': 727 NoManglingQ = true; 728 Quad = true; 729 break; 730 case 'Q': 731 Quad = true; 732 break; 733 case 'P': 734 Poly = true; 735 break; 736 case 'U': 737 Signed = false; 738 break; 739 case 'c': 740 ElementBitwidth = 8; 741 break; 742 case 'h': 743 Float = true; 744 // Fall through 745 case 's': 746 ElementBitwidth = 16; 747 break; 748 case 'f': 749 Float = true; 750 // Fall through 751 case 'i': 752 ElementBitwidth = 32; 753 break; 754 case 'd': 755 Float = true; 756 // Fall through 757 case 'l': 758 ElementBitwidth = 64; 759 break; 760 case 'k': 761 ElementBitwidth = 128; 762 // Poly doesn't have a 128x1 type. 763 if (Poly) 764 NumVectors = 0; 765 break; 766 default: 767 llvm_unreachable("Unhandled type code!"); 768 } 769 } 770 assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); 771 772 Bitwidth = Quad ? 128 : 64; 773 } 774 775 void Type::applyModifier(char Mod) { 776 bool AppliedQuad = false; 777 applyTypespec(AppliedQuad); 778 779 switch (Mod) { 780 case 'v': 781 Void = true; 782 break; 783 case 't': 784 if (Poly) { 785 Poly = false; 786 Signed = false; 787 } 788 break; 789 case 'b': 790 Signed = false; 791 Float = false; 792 Poly = false; 793 NumVectors = 0; 794 Bitwidth = ElementBitwidth; 795 break; 796 case '$': 797 Signed = true; 798 Float = false; 799 Poly = false; 800 NumVectors = 0; 801 Bitwidth = ElementBitwidth; 802 break; 803 case 'u': 804 Signed = false; 805 Poly = false; 806 Float = false; 807 break; 808 case 'x': 809 Signed = true; 810 assert(!Poly && "'u' can't be used with poly types!"); 811 Float = false; 812 break; 813 case 'o': 814 Bitwidth = ElementBitwidth = 64; 815 NumVectors = 0; 816 Float = true; 817 break; 818 case 'y': 819 Bitwidth = ElementBitwidth = 32; 820 NumVectors = 0; 821 Float = true; 822 break; 823 case 'f': 824 // Special case - if we're half-precision, a floating 825 // point argument needs to be 128-bits (double size). 826 if (isHalf()) 827 Bitwidth = 128; 828 Float = true; 829 ElementBitwidth = 32; 830 break; 831 case 'F': 832 Float = true; 833 ElementBitwidth = 64; 834 break; 835 case 'g': 836 if (AppliedQuad) 837 Bitwidth /= 2; 838 break; 839 case 'j': 840 if (!AppliedQuad) 841 Bitwidth *= 2; 842 break; 843 case 'w': 844 ElementBitwidth *= 2; 845 Bitwidth *= 2; 846 break; 847 case 'n': 848 ElementBitwidth *= 2; 849 break; 850 case 'i': 851 Float = false; 852 Poly = false; 853 ElementBitwidth = Bitwidth = 32; 854 NumVectors = 0; 855 Signed = true; 856 break; 857 case 'l': 858 Float = false; 859 Poly = false; 860 ElementBitwidth = Bitwidth = 64; 861 NumVectors = 0; 862 Signed = false; 863 break; 864 case 'z': 865 ElementBitwidth /= 2; 866 Bitwidth = ElementBitwidth; 867 NumVectors = 0; 868 break; 869 case 'r': 870 ElementBitwidth *= 2; 871 Bitwidth = ElementBitwidth; 872 NumVectors = 0; 873 break; 874 case 's': 875 case 'a': 876 Bitwidth = ElementBitwidth; 877 NumVectors = 0; 878 break; 879 case 'k': 880 Bitwidth *= 2; 881 break; 882 case 'c': 883 Constant = true; 884 // Fall through 885 case 'p': 886 Pointer = true; 887 Bitwidth = ElementBitwidth; 888 NumVectors = 0; 889 break; 890 case 'h': 891 ElementBitwidth /= 2; 892 break; 893 case 'q': 894 ElementBitwidth /= 2; 895 Bitwidth *= 2; 896 break; 897 case 'e': 898 ElementBitwidth /= 2; 899 Signed = false; 900 break; 901 case 'm': 902 ElementBitwidth /= 2; 903 Bitwidth /= 2; 904 break; 905 case 'd': 906 break; 907 case '2': 908 NumVectors = 2; 909 break; 910 case '3': 911 NumVectors = 3; 912 break; 913 case '4': 914 NumVectors = 4; 915 break; 916 case 'B': 917 NumVectors = 2; 918 if (!AppliedQuad) 919 Bitwidth *= 2; 920 break; 921 case 'C': 922 NumVectors = 3; 923 if (!AppliedQuad) 924 Bitwidth *= 2; 925 break; 926 case 'D': 927 NumVectors = 4; 928 if (!AppliedQuad) 929 Bitwidth *= 2; 930 break; 931 default: 932 llvm_unreachable("Unhandled character!"); 933 } 934 } 935 936 //===----------------------------------------------------------------------===// 937 // Intrinsic implementation 938 //===----------------------------------------------------------------------===// 939 940 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) { 941 char typeCode = '\0'; 942 bool printNumber = true; 943 944 if (CK == ClassB) 945 return ""; 946 947 if (T.isPoly()) 948 typeCode = 'p'; 949 else if (T.isInteger()) 950 typeCode = T.isSigned() ? 's' : 'u'; 951 else 952 typeCode = 'f'; 953 954 if (CK == ClassI) { 955 switch (typeCode) { 956 default: 957 break; 958 case 's': 959 case 'u': 960 case 'p': 961 typeCode = 'i'; 962 break; 963 } 964 } 965 if (CK == ClassB) { 966 typeCode = '\0'; 967 } 968 969 std::string S; 970 if (typeCode != '\0') 971 S.push_back(typeCode); 972 if (printNumber) 973 S += utostr(T.getElementSizeInBits()); 974 975 return S; 976 } 977 978 std::string Intrinsic::getBuiltinTypeStr() { 979 ClassKind LocalCK = getClassKind(true); 980 std::string S; 981 982 Type RetT = getReturnType(); 983 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && 984 !RetT.isFloating()) 985 RetT.makeInteger(RetT.getElementSizeInBits(), false); 986 987 // Since the return value must be one type, return a vector type of the 988 // appropriate width which we will bitcast. An exception is made for 989 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 990 // fashion, storing them to a pointer arg. 991 if (RetT.getNumVectors() > 1) { 992 S += "vv*"; // void result with void* first argument 993 } else { 994 if (RetT.isPoly()) 995 RetT.makeInteger(RetT.getElementSizeInBits(), false); 996 if (!RetT.isScalar() && !RetT.isSigned()) 997 RetT.makeSigned(); 998 999 bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f'; 1000 if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType) 1001 // Cast to vector of 8-bit elements. 1002 RetT.makeInteger(8, true); 1003 1004 S += RetT.builtin_str(); 1005 } 1006 1007 for (unsigned I = 0; I < getNumParams(); ++I) { 1008 Type T = getParamType(I); 1009 if (T.isPoly()) 1010 T.makeInteger(T.getElementSizeInBits(), false); 1011 1012 bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f'; 1013 if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType) 1014 T.makeInteger(8, true); 1015 // Halves always get converted to 8-bit elements. 1016 if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) 1017 T.makeInteger(8, true); 1018 1019 if (LocalCK == ClassI) 1020 T.makeSigned(); 1021 1022 // Constant indices are always just "int". 1023 if (hasImmediate() && getImmediateIdx() == I) 1024 T.makeInteger(32, true); 1025 1026 S += T.builtin_str(); 1027 } 1028 1029 // Extra constant integer to hold type class enum for this function, e.g. s8 1030 if (LocalCK == ClassB) 1031 S += "i"; 1032 1033 return S; 1034 } 1035 1036 std::string Intrinsic::getMangledName(bool ForceClassS) { 1037 // Check if the prototype has a scalar operand with the type of the vector 1038 // elements. If not, bitcasting the args will take care of arg checking. 1039 // The actual signedness etc. will be taken care of with special enums. 1040 ClassKind LocalCK = CK; 1041 if (!protoHasScalar()) 1042 LocalCK = ClassB; 1043 1044 return mangleName(Name, ForceClassS ? ClassS : LocalCK); 1045 } 1046 1047 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) { 1048 std::string typeCode = getInstTypeCode(BaseType, LocalCK); 1049 std::string S = Name; 1050 1051 if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" || 1052 Name == "vcvt_f64_f32") 1053 return Name; 1054 1055 if (typeCode.size() > 0) { 1056 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. 1057 if (Name.size() >= 3 && isdigit(Name.back()) && 1058 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') 1059 S.insert(S.length() - 3, "_" + typeCode); 1060 else 1061 S += "_" + typeCode; 1062 } 1063 1064 if (BaseType != InBaseType) { 1065 // A reinterpret - out the input base type at the end. 1066 S += "_" + getInstTypeCode(InBaseType, LocalCK); 1067 } 1068 1069 if (LocalCK == ClassB) 1070 S += "_v"; 1071 1072 // Insert a 'q' before the first '_' character so that it ends up before 1073 // _lane or _n on vector-scalar operations. 1074 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { 1075 size_t Pos = S.find('_'); 1076 S.insert(Pos, "q"); 1077 } 1078 1079 char Suffix = '\0'; 1080 if (BaseType.isScalarForMangling()) { 1081 switch (BaseType.getElementSizeInBits()) { 1082 case 8: Suffix = 'b'; break; 1083 case 16: Suffix = 'h'; break; 1084 case 32: Suffix = 's'; break; 1085 case 64: Suffix = 'd'; break; 1086 default: llvm_unreachable("Bad suffix!"); 1087 } 1088 } 1089 if (Suffix != '\0') { 1090 size_t Pos = S.find('_'); 1091 S.insert(Pos, &Suffix, 1); 1092 } 1093 1094 return S; 1095 } 1096 1097 std::string Intrinsic::replaceParamsIn(std::string S) { 1098 while (S.find('$') != std::string::npos) { 1099 size_t Pos = S.find('$'); 1100 size_t End = Pos + 1; 1101 while (isalpha(S[End])) 1102 ++End; 1103 1104 std::string VarName = S.substr(Pos + 1, End - Pos - 1); 1105 assert_with_loc(Variables.find(VarName) != Variables.end(), 1106 "Variable not defined!"); 1107 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); 1108 } 1109 1110 return S; 1111 } 1112 1113 void Intrinsic::initVariables() { 1114 Variables.clear(); 1115 1116 // Modify the TypeSpec per-argument to get a concrete Type, and create 1117 // known variables for each. 1118 for (unsigned I = 1; I < Proto.size(); ++I) { 1119 char NameC = '0' + (I - 1); 1120 std::string Name = "p"; 1121 Name.push_back(NameC); 1122 1123 Variables[Name] = Variable(Types[I], Name + VariablePostfix); 1124 } 1125 RetVar = Variable(Types[0], "ret" + VariablePostfix); 1126 } 1127 1128 void Intrinsic::emitPrototype(StringRef NamePrefix) { 1129 if (UseMacro) 1130 OS << "#define "; 1131 else 1132 OS << "__ai " << Types[0].str() << " "; 1133 1134 OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; 1135 1136 for (unsigned I = 0; I < getNumParams(); ++I) { 1137 if (I != 0) 1138 OS << ", "; 1139 1140 char NameC = '0' + I; 1141 std::string Name = "p"; 1142 Name.push_back(NameC); 1143 assert(Variables.find(Name) != Variables.end()); 1144 Variable &V = Variables[Name]; 1145 1146 if (!UseMacro) 1147 OS << V.getType().str() << " "; 1148 OS << V.getName(); 1149 } 1150 1151 OS << ")"; 1152 } 1153 1154 void Intrinsic::emitOpeningBrace() { 1155 if (UseMacro) 1156 OS << " __extension__ ({"; 1157 else 1158 OS << " {"; 1159 emitNewLine(); 1160 } 1161 1162 void Intrinsic::emitClosingBrace() { 1163 if (UseMacro) 1164 OS << "})"; 1165 else 1166 OS << "}"; 1167 } 1168 1169 void Intrinsic::emitNewLine() { 1170 if (UseMacro) 1171 OS << " \\\n"; 1172 else 1173 OS << "\n"; 1174 } 1175 1176 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { 1177 if (Dest.getType().getNumVectors() > 1) { 1178 emitNewLine(); 1179 1180 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { 1181 OS << " " << Dest.getName() << ".val[" << utostr(K) << "] = " 1182 << "__builtin_shufflevector(" 1183 << Src.getName() << ".val[" << utostr(K) << "], " 1184 << Src.getName() << ".val[" << utostr(K) << "]"; 1185 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1186 OS << ", " << utostr(J); 1187 OS << ");"; 1188 emitNewLine(); 1189 } 1190 } else { 1191 OS << " " << Dest.getName() 1192 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); 1193 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) 1194 OS << ", " << utostr(J); 1195 OS << ");"; 1196 emitNewLine(); 1197 } 1198 } 1199 1200 void Intrinsic::emitArgumentReversal() { 1201 if (BigEndianSafe) 1202 return; 1203 1204 // Reverse all vector arguments. 1205 for (unsigned I = 0; I < getNumParams(); ++I) { 1206 std::string Name = "p" + utostr(I); 1207 std::string NewName = "rev" + utostr(I); 1208 1209 Variable &V = Variables[Name]; 1210 Variable NewV(V.getType(), NewName + VariablePostfix); 1211 1212 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) 1213 continue; 1214 1215 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; 1216 emitReverseVariable(NewV, V); 1217 V = NewV; 1218 } 1219 } 1220 1221 void Intrinsic::emitReturnReversal() { 1222 if (BigEndianSafe) 1223 return; 1224 if (!getReturnType().isVector() || getReturnType().isVoid() || 1225 getReturnType().getNumElements() == 1) 1226 return; 1227 emitReverseVariable(RetVar, RetVar); 1228 } 1229 1230 1231 void Intrinsic::emitShadowedArgs() { 1232 // Macro arguments are not type-checked like inline function arguments, 1233 // so assign them to local temporaries to get the right type checking. 1234 if (!UseMacro) 1235 return; 1236 1237 for (unsigned I = 0; I < getNumParams(); ++I) { 1238 // Do not create a temporary for an immediate argument. 1239 // That would defeat the whole point of using a macro! 1240 if (hasImmediate() && Proto[I+1] == 'i') 1241 continue; 1242 // Do not create a temporary for pointer arguments. The input 1243 // pointer may have an alignment hint. 1244 if (getParamType(I).isPointer()) 1245 continue; 1246 1247 std::string Name = "p" + utostr(I); 1248 1249 assert(Variables.find(Name) != Variables.end()); 1250 Variable &V = Variables[Name]; 1251 1252 std::string NewName = "s" + utostr(I); 1253 Variable V2(V.getType(), NewName + VariablePostfix); 1254 1255 OS << " " << V2.getType().str() << " " << V2.getName() << " = " 1256 << V.getName() << ";"; 1257 emitNewLine(); 1258 1259 V = V2; 1260 } 1261 } 1262 1263 // We don't check 'a' in this function, because for builtin function the 1264 // argument matching to 'a' uses a vector type splatted from a scalar type. 1265 bool Intrinsic::protoHasScalar() { 1266 return (Proto.find('s') != std::string::npos || 1267 Proto.find('z') != std::string::npos || 1268 Proto.find('r') != std::string::npos || 1269 Proto.find('b') != std::string::npos || 1270 Proto.find('$') != std::string::npos || 1271 Proto.find('y') != std::string::npos || 1272 Proto.find('o') != std::string::npos); 1273 } 1274 1275 void Intrinsic::emitBodyAsBuiltinCall() { 1276 std::string S; 1277 1278 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 1279 // sret-like argument. 1280 bool SRet = getReturnType().getNumVectors() >= 2; 1281 1282 StringRef N = Name; 1283 if (hasSplat()) { 1284 // Call the non-splat builtin: chop off the "_n" suffix from the name. 1285 assert(N.endswith("_n")); 1286 N = N.drop_back(2); 1287 } 1288 1289 ClassKind LocalCK = CK; 1290 if (!protoHasScalar()) 1291 LocalCK = ClassB; 1292 1293 if (!getReturnType().isVoid() && !SRet) 1294 S += "(" + RetVar.getType().str() + ") "; 1295 1296 S += "__builtin_neon_" + mangleName(N, LocalCK) + "("; 1297 1298 if (SRet) 1299 S += "&" + RetVar.getName() + ", "; 1300 1301 for (unsigned I = 0; I < getNumParams(); ++I) { 1302 Variable &V = Variables["p" + utostr(I)]; 1303 Type T = V.getType(); 1304 1305 // Handle multiple-vector values specially, emitting each subvector as an 1306 // argument to the builtin. 1307 if (T.getNumVectors() > 1) { 1308 // Check if an explicit cast is needed. 1309 std::string Cast; 1310 if (T.isChar() || T.isPoly() || !T.isSigned()) { 1311 Type T2 = T; 1312 T2.makeOneVector(); 1313 T2.makeInteger(8, /*Signed=*/true); 1314 Cast = "(" + T2.str() + ")"; 1315 } 1316 1317 for (unsigned J = 0; J < T.getNumVectors(); ++J) 1318 S += Cast + V.getName() + ".val[" + utostr(J) + "], "; 1319 continue; 1320 } 1321 1322 std::string Arg; 1323 Type CastToType = T; 1324 if (hasSplat() && I == getSplatIdx()) { 1325 Arg = "(" + BaseType.str() + ") {"; 1326 for (unsigned J = 0; J < BaseType.getNumElements(); ++J) { 1327 if (J != 0) 1328 Arg += ", "; 1329 Arg += V.getName(); 1330 } 1331 Arg += "}"; 1332 1333 CastToType = BaseType; 1334 } else { 1335 Arg = V.getName(); 1336 } 1337 1338 // Check if an explicit cast is needed. 1339 if (CastToType.isVector()) { 1340 CastToType.makeInteger(8, true); 1341 Arg = "(" + CastToType.str() + ")" + Arg; 1342 } 1343 1344 S += Arg + ", "; 1345 } 1346 1347 // Extra constant integer to hold type class enum for this function, e.g. s8 1348 if (getClassKind(true) == ClassB) { 1349 Type ThisTy = getReturnType(); 1350 if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F') 1351 ThisTy = getParamType(0); 1352 if (ThisTy.isPointer()) 1353 ThisTy = getParamType(1); 1354 1355 S += utostr(ThisTy.getNeonEnum()); 1356 } else { 1357 // Remove extraneous ", ". 1358 S.pop_back(); 1359 S.pop_back(); 1360 } 1361 S += ");"; 1362 1363 std::string RetExpr; 1364 if (!SRet && !RetVar.getType().isVoid()) 1365 RetExpr = RetVar.getName() + " = "; 1366 1367 OS << " " << RetExpr << S; 1368 emitNewLine(); 1369 } 1370 1371 void Intrinsic::emitBody(StringRef CallPrefix) { 1372 std::vector<std::string> Lines; 1373 1374 assert(RetVar.getType() == Types[0]); 1375 // Create a return variable, if we're not void. 1376 if (!RetVar.getType().isVoid()) { 1377 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; 1378 emitNewLine(); 1379 } 1380 1381 if (!Body || Body->getValues().size() == 0) { 1382 // Nothing specific to output - must output a builtin. 1383 emitBodyAsBuiltinCall(); 1384 return; 1385 } 1386 1387 // We have a list of "things to output". The last should be returned. 1388 for (auto *I : Body->getValues()) { 1389 if (StringInit *SI = dyn_cast<StringInit>(I)) { 1390 Lines.push_back(replaceParamsIn(SI->getAsString())); 1391 } else if (DagInit *DI = dyn_cast<DagInit>(I)) { 1392 DagEmitter DE(*this, CallPrefix); 1393 Lines.push_back(DE.emitDag(DI).second + ";"); 1394 } 1395 } 1396 1397 assert(!Lines.empty() && "Empty def?"); 1398 if (!RetVar.getType().isVoid()) 1399 Lines.back().insert(0, RetVar.getName() + " = "); 1400 1401 for (auto &L : Lines) { 1402 OS << " " << L; 1403 emitNewLine(); 1404 } 1405 } 1406 1407 void Intrinsic::emitReturn() { 1408 if (RetVar.getType().isVoid()) 1409 return; 1410 if (UseMacro) 1411 OS << " " << RetVar.getName() << ";"; 1412 else 1413 OS << " return " << RetVar.getName() << ";"; 1414 emitNewLine(); 1415 } 1416 1417 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { 1418 // At this point we should only be seeing a def. 1419 DefInit *DefI = cast<DefInit>(DI->getOperator()); 1420 std::string Op = DefI->getAsString(); 1421 1422 if (Op == "cast" || Op == "bitcast") 1423 return emitDagCast(DI, Op == "bitcast"); 1424 if (Op == "shuffle") 1425 return emitDagShuffle(DI); 1426 if (Op == "dup") 1427 return emitDagDup(DI); 1428 if (Op == "splat") 1429 return emitDagSplat(DI); 1430 if (Op == "save_temp") 1431 return emitDagSaveTemp(DI); 1432 if (Op == "op") 1433 return emitDagOp(DI); 1434 if (Op == "call") 1435 return emitDagCall(DI); 1436 if (Op == "name_replace") 1437 return emitDagNameReplace(DI); 1438 if (Op == "literal") 1439 return emitDagLiteral(DI); 1440 assert_with_loc(false, "Unknown operation!"); 1441 return std::make_pair(Type::getVoid(), ""); 1442 } 1443 1444 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { 1445 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1446 if (DI->getNumArgs() == 2) { 1447 // Unary op. 1448 std::pair<Type, std::string> R = 1449 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1450 return std::make_pair(R.first, Op + R.second); 1451 } else { 1452 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); 1453 std::pair<Type, std::string> R1 = 1454 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1455 std::pair<Type, std::string> R2 = 1456 emitDagArg(DI->getArg(2), DI->getArgName(2)); 1457 assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); 1458 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); 1459 } 1460 } 1461 1462 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { 1463 std::vector<Type> Types; 1464 std::vector<std::string> Values; 1465 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1466 std::pair<Type, std::string> R = 1467 emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1)); 1468 Types.push_back(R.first); 1469 Values.push_back(R.second); 1470 } 1471 1472 // Look up the called intrinsic. 1473 std::string N; 1474 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) 1475 N = SI->getAsUnquotedString(); 1476 else 1477 N = emitDagArg(DI->getArg(0), "").second; 1478 Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types); 1479 assert(Callee && "getIntrinsic should not return us nullptr!"); 1480 1481 // Make sure the callee is known as an early def. 1482 Callee->setNeededEarly(); 1483 Intr.Dependencies.insert(Callee); 1484 1485 // Now create the call itself. 1486 std::string S = CallPrefix.str() + Callee->getMangledName(true) + "("; 1487 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { 1488 if (I != 0) 1489 S += ", "; 1490 S += Values[I]; 1491 } 1492 S += ")"; 1493 1494 return std::make_pair(Callee->getReturnType(), S); 1495 } 1496 1497 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, 1498 bool IsBitCast){ 1499 // (cast MOD* VAL) -> cast VAL to type given by MOD. 1500 std::pair<Type, std::string> R = emitDagArg( 1501 DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1)); 1502 Type castToType = R.first; 1503 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { 1504 1505 // MOD can take several forms: 1506 // 1. $X - take the type of parameter / variable X. 1507 // 2. The value "R" - take the type of the return type. 1508 // 3. a type string 1509 // 4. The value "U" or "S" to switch the signedness. 1510 // 5. The value "H" or "D" to half or double the bitwidth. 1511 // 6. The value "8" to convert to 8-bit (signed) integer lanes. 1512 if (DI->getArgName(ArgIdx).size()) { 1513 assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) != 1514 Intr.Variables.end(), 1515 "Variable not found"); 1516 castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType(); 1517 } else { 1518 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); 1519 assert_with_loc(SI, "Expected string type or $Name for cast type"); 1520 1521 if (SI->getAsUnquotedString() == "R") { 1522 castToType = Intr.getReturnType(); 1523 } else if (SI->getAsUnquotedString() == "U") { 1524 castToType.makeUnsigned(); 1525 } else if (SI->getAsUnquotedString() == "S") { 1526 castToType.makeSigned(); 1527 } else if (SI->getAsUnquotedString() == "H") { 1528 castToType.halveLanes(); 1529 } else if (SI->getAsUnquotedString() == "D") { 1530 castToType.doubleLanes(); 1531 } else if (SI->getAsUnquotedString() == "8") { 1532 castToType.makeInteger(8, true); 1533 } else { 1534 castToType = Type::fromTypedefName(SI->getAsUnquotedString()); 1535 assert_with_loc(!castToType.isVoid(), "Unknown typedef"); 1536 } 1537 } 1538 } 1539 1540 std::string S; 1541 if (IsBitCast) { 1542 // Emit a reinterpret cast. The second operand must be an lvalue, so create 1543 // a temporary. 1544 std::string N = "reint"; 1545 unsigned I = 0; 1546 while (Intr.Variables.find(N) != Intr.Variables.end()) 1547 N = "reint" + utostr(++I); 1548 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); 1549 1550 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " 1551 << R.second << ";"; 1552 Intr.emitNewLine(); 1553 1554 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; 1555 } else { 1556 // Emit a normal (static) cast. 1557 S = "(" + castToType.str() + ")(" + R.second + ")"; 1558 } 1559 1560 return std::make_pair(castToType, S); 1561 } 1562 1563 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ 1564 // See the documentation in arm_neon.td for a description of these operators. 1565 class LowHalf : public SetTheory::Operator { 1566 public: 1567 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1568 ArrayRef<SMLoc> Loc) override { 1569 SetTheory::RecSet Elts2; 1570 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1571 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); 1572 } 1573 }; 1574 class HighHalf : public SetTheory::Operator { 1575 public: 1576 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1577 ArrayRef<SMLoc> Loc) override { 1578 SetTheory::RecSet Elts2; 1579 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); 1580 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); 1581 } 1582 }; 1583 class Rev : public SetTheory::Operator { 1584 unsigned ElementSize; 1585 1586 public: 1587 Rev(unsigned ElementSize) : ElementSize(ElementSize) {} 1588 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, 1589 ArrayRef<SMLoc> Loc) override { 1590 SetTheory::RecSet Elts2; 1591 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); 1592 1593 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); 1594 VectorSize /= ElementSize; 1595 1596 std::vector<Record *> Revved; 1597 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { 1598 for (int LI = VectorSize - 1; LI >= 0; --LI) { 1599 Revved.push_back(Elts2[VI + LI]); 1600 } 1601 } 1602 1603 Elts.insert(Revved.begin(), Revved.end()); 1604 } 1605 }; 1606 class MaskExpander : public SetTheory::Expander { 1607 unsigned N; 1608 1609 public: 1610 MaskExpander(unsigned N) : N(N) {} 1611 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { 1612 unsigned Addend = 0; 1613 if (R->getName() == "mask0") 1614 Addend = 0; 1615 else if (R->getName() == "mask1") 1616 Addend = N; 1617 else 1618 return; 1619 for (unsigned I = 0; I < N; ++I) 1620 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); 1621 } 1622 }; 1623 1624 // (shuffle arg1, arg2, sequence) 1625 std::pair<Type, std::string> Arg1 = 1626 emitDagArg(DI->getArg(0), DI->getArgName(0)); 1627 std::pair<Type, std::string> Arg2 = 1628 emitDagArg(DI->getArg(1), DI->getArgName(1)); 1629 assert_with_loc(Arg1.first == Arg2.first, 1630 "Different types in arguments to shuffle!"); 1631 1632 SetTheory ST; 1633 SetTheory::RecSet Elts; 1634 ST.addOperator("lowhalf", llvm::make_unique<LowHalf>()); 1635 ST.addOperator("highhalf", llvm::make_unique<HighHalf>()); 1636 ST.addOperator("rev", 1637 llvm::make_unique<Rev>(Arg1.first.getElementSizeInBits())); 1638 ST.addExpander("MaskExpand", 1639 llvm::make_unique<MaskExpander>(Arg1.first.getNumElements())); 1640 ST.evaluate(DI->getArg(2), Elts, None); 1641 1642 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; 1643 for (auto &E : Elts) { 1644 StringRef Name = E->getName(); 1645 assert_with_loc(Name.startswith("sv"), 1646 "Incorrect element kind in shuffle mask!"); 1647 S += ", " + Name.drop_front(2).str(); 1648 } 1649 S += ")"; 1650 1651 // Recalculate the return type - the shuffle may have halved or doubled it. 1652 Type T(Arg1.first); 1653 if (Elts.size() > T.getNumElements()) { 1654 assert_with_loc( 1655 Elts.size() == T.getNumElements() * 2, 1656 "Can only double or half the number of elements in a shuffle!"); 1657 T.doubleLanes(); 1658 } else if (Elts.size() < T.getNumElements()) { 1659 assert_with_loc( 1660 Elts.size() == T.getNumElements() / 2, 1661 "Can only double or half the number of elements in a shuffle!"); 1662 T.halveLanes(); 1663 } 1664 1665 return std::make_pair(T, S); 1666 } 1667 1668 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { 1669 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); 1670 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1671 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); 1672 1673 Type T = Intr.getBaseType(); 1674 assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); 1675 std::string S = "(" + T.str() + ") {"; 1676 for (unsigned I = 0; I < T.getNumElements(); ++I) { 1677 if (I != 0) 1678 S += ", "; 1679 S += A.second; 1680 } 1681 S += "}"; 1682 1683 return std::make_pair(T, S); 1684 } 1685 1686 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { 1687 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); 1688 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); 1689 std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1690 1691 assert_with_loc(B.first.isScalar(), 1692 "splat() requires a scalar int as the second argument"); 1693 1694 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; 1695 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { 1696 S += ", " + B.second; 1697 } 1698 S += ")"; 1699 1700 return std::make_pair(Intr.getBaseType(), S); 1701 } 1702 1703 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { 1704 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); 1705 std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1)); 1706 1707 assert_with_loc(!A.first.isVoid(), 1708 "Argument to save_temp() must have non-void type!"); 1709 1710 std::string N = DI->getArgName(0); 1711 assert_with_loc(N.size(), "save_temp() expects a name as the first argument"); 1712 1713 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), 1714 "Variable already defined!"); 1715 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); 1716 1717 std::string S = 1718 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; 1719 1720 return std::make_pair(Type::getVoid(), S); 1721 } 1722 1723 std::pair<Type, std::string> 1724 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { 1725 std::string S = Intr.Name; 1726 1727 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); 1728 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1729 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1730 1731 size_t Idx = S.find(ToReplace); 1732 1733 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); 1734 S.replace(Idx, ToReplace.size(), ReplaceWith); 1735 1736 return std::make_pair(Type::getVoid(), S); 1737 } 1738 1739 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ 1740 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); 1741 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); 1742 return std::make_pair(Type::fromTypedefName(Ty), Value); 1743 } 1744 1745 std::pair<Type, std::string> 1746 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { 1747 if (ArgName.size()) { 1748 assert_with_loc(!Arg->isComplete(), 1749 "Arguments must either be DAGs or names, not both!"); 1750 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), 1751 "Variable not defined!"); 1752 Variable &V = Intr.Variables[ArgName]; 1753 return std::make_pair(V.getType(), V.getName()); 1754 } 1755 1756 assert(Arg && "Neither ArgName nor Arg?!"); 1757 DagInit *DI = dyn_cast<DagInit>(Arg); 1758 assert_with_loc(DI, "Arguments must either be DAGs or names!"); 1759 1760 return emitDag(DI); 1761 } 1762 1763 std::string Intrinsic::generate() { 1764 // Little endian intrinsics are simple and don't require any argument 1765 // swapping. 1766 OS << "#ifdef __LITTLE_ENDIAN__\n"; 1767 1768 generateImpl(false, "", ""); 1769 1770 OS << "#else\n"; 1771 1772 // Big endian intrinsics are more complex. The user intended these 1773 // intrinsics to operate on a vector "as-if" loaded by (V)LDR, 1774 // but we load as-if (V)LD1. So we should swap all arguments and 1775 // swap the return value too. 1776 // 1777 // If we call sub-intrinsics, we should call a version that does 1778 // not re-swap the arguments! 1779 generateImpl(true, "", "__noswap_"); 1780 1781 // If we're needed early, create a non-swapping variant for 1782 // big-endian. 1783 if (NeededEarly) { 1784 generateImpl(false, "__noswap_", "__noswap_"); 1785 } 1786 OS << "#endif\n\n"; 1787 1788 return OS.str(); 1789 } 1790 1791 void Intrinsic::generateImpl(bool ReverseArguments, 1792 StringRef NamePrefix, StringRef CallPrefix) { 1793 CurrentRecord = R; 1794 1795 // If we call a macro, our local variables may be corrupted due to 1796 // lack of proper lexical scoping. So, add a globally unique postfix 1797 // to every variable. 1798 // 1799 // indexBody() should have set up the Dependencies set by now. 1800 for (auto *I : Dependencies) 1801 if (I->UseMacro) { 1802 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); 1803 break; 1804 } 1805 1806 initVariables(); 1807 1808 emitPrototype(NamePrefix); 1809 1810 if (IsUnavailable) { 1811 OS << " __attribute__((unavailable));"; 1812 } else { 1813 emitOpeningBrace(); 1814 emitShadowedArgs(); 1815 if (ReverseArguments) 1816 emitArgumentReversal(); 1817 emitBody(CallPrefix); 1818 if (ReverseArguments) 1819 emitReturnReversal(); 1820 emitReturn(); 1821 emitClosingBrace(); 1822 } 1823 OS << "\n"; 1824 1825 CurrentRecord = nullptr; 1826 } 1827 1828 void Intrinsic::indexBody() { 1829 CurrentRecord = R; 1830 1831 initVariables(); 1832 emitBody(""); 1833 OS.str(""); 1834 1835 CurrentRecord = nullptr; 1836 } 1837 1838 //===----------------------------------------------------------------------===// 1839 // NeonEmitter implementation 1840 //===----------------------------------------------------------------------===// 1841 1842 Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { 1843 // First, look up the name in the intrinsic map. 1844 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), 1845 ("Intrinsic '" + Name + "' not found!").str()); 1846 std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()]; 1847 std::vector<Intrinsic *> GoodVec; 1848 1849 // Create a string to print if we end up failing. 1850 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; 1851 for (unsigned I = 0; I < Types.size(); ++I) { 1852 if (I != 0) 1853 ErrMsg += ", "; 1854 ErrMsg += Types[I].str(); 1855 } 1856 ErrMsg += ")'\n"; 1857 ErrMsg += "Available overloads:\n"; 1858 1859 // Now, look through each intrinsic implementation and see if the types are 1860 // compatible. 1861 for (auto *I : V) { 1862 ErrMsg += " - " + I->getReturnType().str() + " " + I->getMangledName(); 1863 ErrMsg += "("; 1864 for (unsigned A = 0; A < I->getNumParams(); ++A) { 1865 if (A != 0) 1866 ErrMsg += ", "; 1867 ErrMsg += I->getParamType(A).str(); 1868 } 1869 ErrMsg += ")\n"; 1870 1871 if (I->getNumParams() != Types.size()) 1872 continue; 1873 1874 bool Good = true; 1875 for (unsigned Arg = 0; Arg < Types.size(); ++Arg) { 1876 if (I->getParamType(Arg) != Types[Arg]) { 1877 Good = false; 1878 break; 1879 } 1880 } 1881 if (Good) 1882 GoodVec.push_back(I); 1883 } 1884 1885 assert_with_loc(GoodVec.size() > 0, 1886 "No compatible intrinsic found - " + ErrMsg); 1887 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); 1888 1889 return GoodVec.front(); 1890 } 1891 1892 void NeonEmitter::createIntrinsic(Record *R, 1893 SmallVectorImpl<Intrinsic *> &Out) { 1894 std::string Name = R->getValueAsString("Name"); 1895 std::string Proto = R->getValueAsString("Prototype"); 1896 std::string Types = R->getValueAsString("Types"); 1897 Record *OperationRec = R->getValueAsDef("Operation"); 1898 bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); 1899 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); 1900 std::string Guard = R->getValueAsString("ArchGuard"); 1901 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); 1902 1903 // Set the global current record. This allows assert_with_loc to produce 1904 // decent location information even when highly nested. 1905 CurrentRecord = R; 1906 1907 ListInit *Body = OperationRec->getValueAsListInit("Ops"); 1908 1909 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); 1910 1911 ClassKind CK = ClassNone; 1912 if (R->getSuperClasses().size() >= 2) 1913 CK = ClassMap[R->getSuperClasses()[1]]; 1914 1915 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; 1916 for (auto TS : TypeSpecs) { 1917 if (CartesianProductOfTypes) { 1918 Type DefaultT(TS, 'd'); 1919 for (auto SrcTS : TypeSpecs) { 1920 Type DefaultSrcT(SrcTS, 'd'); 1921 if (TS == SrcTS || 1922 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) 1923 continue; 1924 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); 1925 } 1926 } else { 1927 NewTypeSpecs.push_back(std::make_pair(TS, TS)); 1928 } 1929 } 1930 1931 std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end()); 1932 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), 1933 NewTypeSpecs.end()); 1934 1935 for (auto &I : NewTypeSpecs) { 1936 Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body, 1937 *this, Guard, IsUnavailable, BigEndianSafe); 1938 1939 IntrinsicMap[Name].push_back(IT); 1940 Out.push_back(IT); 1941 } 1942 1943 CurrentRecord = nullptr; 1944 } 1945 1946 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 1947 /// declaration of builtins, checking for unique builtin declarations. 1948 void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 1949 SmallVectorImpl<Intrinsic *> &Defs) { 1950 OS << "#ifdef GET_NEON_BUILTINS\n"; 1951 1952 // We only want to emit a builtin once, and we want to emit them in 1953 // alphabetical order, so use a std::set. 1954 std::set<std::string> Builtins; 1955 1956 for (auto *Def : Defs) { 1957 if (Def->hasBody()) 1958 continue; 1959 // Functions with 'a' (the splat code) in the type prototype should not get 1960 // their own builtin as they use the non-splat variant. 1961 if (Def->hasSplat()) 1962 continue; 1963 1964 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; 1965 1966 S += Def->getBuiltinTypeStr(); 1967 S += "\", \"n\")"; 1968 1969 Builtins.insert(S); 1970 } 1971 1972 for (auto &S : Builtins) 1973 OS << S << "\n"; 1974 OS << "#endif\n\n"; 1975 } 1976 1977 /// Generate the ARM and AArch64 overloaded type checking code for 1978 /// SemaChecking.cpp, checking for unique builtin declarations. 1979 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 1980 SmallVectorImpl<Intrinsic *> &Defs) { 1981 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 1982 1983 // We record each overload check line before emitting because subsequent Inst 1984 // definitions may extend the number of permitted types (i.e. augment the 1985 // Mask). Use std::map to avoid sorting the table by hash number. 1986 struct OverloadInfo { 1987 uint64_t Mask; 1988 int PtrArgNum; 1989 bool HasConstPtr; 1990 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} 1991 }; 1992 std::map<std::string, OverloadInfo> OverloadMap; 1993 1994 for (auto *Def : Defs) { 1995 // If the def has a body (that is, it has Operation DAGs), it won't call 1996 // __builtin_neon_* so we don't need to generate a definition for it. 1997 if (Def->hasBody()) 1998 continue; 1999 // Functions with 'a' (the splat code) in the type prototype should not get 2000 // their own builtin as they use the non-splat variant. 2001 if (Def->hasSplat()) 2002 continue; 2003 // Functions which have a scalar argument cannot be overloaded, no need to 2004 // check them if we are emitting the type checking code. 2005 if (Def->protoHasScalar()) 2006 continue; 2007 2008 uint64_t Mask = 0ULL; 2009 Type Ty = Def->getReturnType(); 2010 if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' || 2011 Def->getProto()[0] == 'F') 2012 Ty = Def->getParamType(0); 2013 if (Ty.isPointer()) 2014 Ty = Def->getParamType(1); 2015 2016 Mask |= 1ULL << Ty.getNeonEnum(); 2017 2018 // Check if the function has a pointer or const pointer argument. 2019 std::string Proto = Def->getProto(); 2020 int PtrArgNum = -1; 2021 bool HasConstPtr = false; 2022 for (unsigned I = 0; I < Def->getNumParams(); ++I) { 2023 char ArgType = Proto[I + 1]; 2024 if (ArgType == 'c') { 2025 HasConstPtr = true; 2026 PtrArgNum = I; 2027 break; 2028 } 2029 if (ArgType == 'p') { 2030 PtrArgNum = I; 2031 break; 2032 } 2033 } 2034 // For sret builtins, adjust the pointer argument index. 2035 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) 2036 PtrArgNum += 1; 2037 2038 std::string Name = Def->getName(); 2039 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 2040 // and vst1_lane intrinsics. Using a pointer to the vector element 2041 // type with one of those operations causes codegen to select an aligned 2042 // load/store instruction. If you want an unaligned operation, 2043 // the pointer argument needs to have less alignment than element type, 2044 // so just accept any pointer type. 2045 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { 2046 PtrArgNum = -1; 2047 HasConstPtr = false; 2048 } 2049 2050 if (Mask) { 2051 std::string Name = Def->getMangledName(); 2052 OverloadMap.insert(std::make_pair(Name, OverloadInfo())); 2053 OverloadInfo &OI = OverloadMap[Name]; 2054 OI.Mask |= Mask; 2055 OI.PtrArgNum |= PtrArgNum; 2056 OI.HasConstPtr = HasConstPtr; 2057 } 2058 } 2059 2060 for (auto &I : OverloadMap) { 2061 OverloadInfo &OI = I.second; 2062 2063 OS << "case NEON::BI__builtin_neon_" << I.first << ": "; 2064 OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; 2065 if (OI.PtrArgNum >= 0) 2066 OS << "; PtrArgNum = " << OI.PtrArgNum; 2067 if (OI.HasConstPtr) 2068 OS << "; HasConstPtr = true"; 2069 OS << "; break;\n"; 2070 } 2071 OS << "#endif\n\n"; 2072 } 2073 2074 void 2075 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2076 SmallVectorImpl<Intrinsic *> &Defs) { 2077 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2078 2079 std::set<std::string> Emitted; 2080 2081 for (auto *Def : Defs) { 2082 if (Def->hasBody()) 2083 continue; 2084 // Functions with 'a' (the splat code) in the type prototype should not get 2085 // their own builtin as they use the non-splat variant. 2086 if (Def->hasSplat()) 2087 continue; 2088 // Functions which do not have an immediate do not need to have range 2089 // checking code emitted. 2090 if (!Def->hasImmediate()) 2091 continue; 2092 if (Emitted.find(Def->getMangledName()) != Emitted.end()) 2093 continue; 2094 2095 std::string LowerBound, UpperBound; 2096 2097 Record *R = Def->getRecord(); 2098 if (R->getValueAsBit("isVCVT_N")) { 2099 // VCVT between floating- and fixed-point values takes an immediate 2100 // in the range [1, 32) for f32 or [1, 64) for f64. 2101 LowerBound = "1"; 2102 if (Def->getBaseType().getElementSizeInBits() == 32) 2103 UpperBound = "31"; 2104 else 2105 UpperBound = "63"; 2106 } else if (R->getValueAsBit("isScalarShift")) { 2107 // Right shifts have an 'r' in the name, left shifts do not. Convert 2108 // instructions have the same bounds and right shifts. 2109 if (Def->getName().find('r') != std::string::npos || 2110 Def->getName().find("cvt") != std::string::npos) 2111 LowerBound = "1"; 2112 2113 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); 2114 } else if (R->getValueAsBit("isShift")) { 2115 // Builtins which are overloaded by type will need to have their upper 2116 // bound computed at Sema time based on the type constant. 2117 2118 // Right shifts have an 'r' in the name, left shifts do not. 2119 if (Def->getName().find('r') != std::string::npos) 2120 LowerBound = "1"; 2121 UpperBound = "RFT(TV, true)"; 2122 } else if (Def->getClassKind(true) == ClassB) { 2123 // ClassB intrinsics have a type (and hence lane number) that is only 2124 // known at runtime. 2125 if (R->getValueAsBit("isLaneQ")) 2126 UpperBound = "RFT(TV, false, true)"; 2127 else 2128 UpperBound = "RFT(TV, false, false)"; 2129 } else { 2130 // The immediate generally refers to a lane in the preceding argument. 2131 assert(Def->getImmediateIdx() > 0); 2132 Type T = Def->getParamType(Def->getImmediateIdx() - 1); 2133 UpperBound = utostr(T.getNumElements() - 1); 2134 } 2135 2136 // Calculate the index of the immediate that should be range checked. 2137 unsigned Idx = Def->getNumParams(); 2138 if (Def->hasImmediate()) 2139 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); 2140 2141 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " 2142 << "i = " << Idx << ";"; 2143 if (LowerBound.size()) 2144 OS << " l = " << LowerBound << ";"; 2145 if (UpperBound.size()) 2146 OS << " u = " << UpperBound << ";"; 2147 OS << " break;\n"; 2148 2149 Emitted.insert(Def->getMangledName()); 2150 } 2151 2152 OS << "#endif\n\n"; 2153 } 2154 2155 /// runHeader - Emit a file with sections defining: 2156 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 2157 /// 2. the SemaChecking code for the type overload checking. 2158 /// 3. the SemaChecking code for validation of intrinsic immediate arguments. 2159 void NeonEmitter::runHeader(raw_ostream &OS) { 2160 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2161 2162 SmallVector<Intrinsic *, 128> Defs; 2163 for (auto *R : RV) 2164 createIntrinsic(R, Defs); 2165 2166 // Generate shared BuiltinsXXX.def 2167 genBuiltinsDef(OS, Defs); 2168 2169 // Generate ARM overloaded type checking code for SemaChecking.cpp 2170 genOverloadTypeCheckCode(OS, Defs); 2171 2172 // Generate ARM range checking code for shift/lane immediates. 2173 genIntrinsicRangeCheckCode(OS, Defs); 2174 } 2175 2176 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2177 /// is comprised of type definitions and function declarations. 2178 void NeonEmitter::run(raw_ostream &OS) { 2179 OS << "/*===---- arm_neon.h - ARM Neon intrinsics " 2180 "------------------------------" 2181 "---===\n" 2182 " *\n" 2183 " * Permission is hereby granted, free of charge, to any person " 2184 "obtaining " 2185 "a copy\n" 2186 " * of this software and associated documentation files (the " 2187 "\"Software\")," 2188 " to deal\n" 2189 " * in the Software without restriction, including without limitation " 2190 "the " 2191 "rights\n" 2192 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2193 "and/or sell\n" 2194 " * copies of the Software, and to permit persons to whom the Software " 2195 "is\n" 2196 " * furnished to do so, subject to the following conditions:\n" 2197 " *\n" 2198 " * The above copyright notice and this permission notice shall be " 2199 "included in\n" 2200 " * all copies or substantial portions of the Software.\n" 2201 " *\n" 2202 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2203 "EXPRESS OR\n" 2204 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2205 "MERCHANTABILITY,\n" 2206 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2207 "SHALL THE\n" 2208 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2209 "OTHER\n" 2210 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2211 "ARISING FROM,\n" 2212 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2213 "DEALINGS IN\n" 2214 " * THE SOFTWARE.\n" 2215 " *\n" 2216 " *===-----------------------------------------------------------------" 2217 "---" 2218 "---===\n" 2219 " */\n\n"; 2220 2221 OS << "#ifndef __ARM_NEON_H\n"; 2222 OS << "#define __ARM_NEON_H\n\n"; 2223 2224 OS << "#if !defined(__ARM_NEON)\n"; 2225 OS << "#error \"NEON support not enabled\"\n"; 2226 OS << "#endif\n\n"; 2227 2228 OS << "#include <stdint.h>\n\n"; 2229 2230 // Emit NEON-specific scalar typedefs. 2231 OS << "typedef float float32_t;\n"; 2232 OS << "typedef __fp16 float16_t;\n"; 2233 2234 OS << "#ifdef __aarch64__\n"; 2235 OS << "typedef double float64_t;\n"; 2236 OS << "#endif\n\n"; 2237 2238 // For now, signedness of polynomial types depends on target 2239 OS << "#ifdef __aarch64__\n"; 2240 OS << "typedef uint8_t poly8_t;\n"; 2241 OS << "typedef uint16_t poly16_t;\n"; 2242 OS << "typedef uint64_t poly64_t;\n"; 2243 OS << "typedef __uint128_t poly128_t;\n"; 2244 OS << "#else\n"; 2245 OS << "typedef int8_t poly8_t;\n"; 2246 OS << "typedef int16_t poly16_t;\n"; 2247 OS << "#endif\n"; 2248 2249 // Emit Neon vector typedefs. 2250 std::string TypedefTypes( 2251 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2252 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); 2253 2254 // Emit vector typedefs. 2255 bool InIfdef = false; 2256 for (auto &TS : TDTypeVec) { 2257 bool IsA64 = false; 2258 Type T(TS, 'd'); 2259 if (T.isDouble() || (T.isPoly() && T.isLong())) 2260 IsA64 = true; 2261 2262 if (InIfdef && !IsA64) { 2263 OS << "#endif\n"; 2264 InIfdef = false; 2265 } 2266 if (!InIfdef && IsA64) { 2267 OS << "#ifdef __aarch64__\n"; 2268 InIfdef = true; 2269 } 2270 2271 if (T.isPoly()) 2272 OS << "typedef __attribute__((neon_polyvector_type("; 2273 else 2274 OS << "typedef __attribute__((neon_vector_type("; 2275 2276 Type T2 = T; 2277 T2.makeScalar(); 2278 OS << utostr(T.getNumElements()) << "))) "; 2279 OS << T2.str(); 2280 OS << " " << T.str() << ";\n"; 2281 } 2282 if (InIfdef) 2283 OS << "#endif\n"; 2284 OS << "\n"; 2285 2286 // Emit struct typedefs. 2287 InIfdef = false; 2288 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { 2289 for (auto &TS : TDTypeVec) { 2290 bool IsA64 = false; 2291 Type T(TS, 'd'); 2292 if (T.isDouble() || (T.isPoly() && T.isLong())) 2293 IsA64 = true; 2294 2295 if (InIfdef && !IsA64) { 2296 OS << "#endif\n"; 2297 InIfdef = false; 2298 } 2299 if (!InIfdef && IsA64) { 2300 OS << "#ifdef __aarch64__\n"; 2301 InIfdef = true; 2302 } 2303 2304 char M = '2' + (NumMembers - 2); 2305 Type VT(TS, M); 2306 OS << "typedef struct " << VT.str() << " {\n"; 2307 OS << " " << T.str() << " val"; 2308 OS << "[" << utostr(NumMembers) << "]"; 2309 OS << ";\n} "; 2310 OS << VT.str() << ";\n"; 2311 OS << "\n"; 2312 } 2313 } 2314 if (InIfdef) 2315 OS << "#endif\n"; 2316 OS << "\n"; 2317 2318 OS << "#define __ai static inline __attribute__((__always_inline__, " 2319 "__nodebug__))\n\n"; 2320 2321 SmallVector<Intrinsic *, 128> Defs; 2322 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2323 for (auto *R : RV) 2324 createIntrinsic(R, Defs); 2325 2326 for (auto *I : Defs) 2327 I->indexBody(); 2328 2329 std::stable_sort( 2330 Defs.begin(), Defs.end(), 2331 [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; }); 2332 2333 // Only emit a def when its requirements have been met. 2334 // FIXME: This loop could be made faster, but it's fast enough for now. 2335 bool MadeProgress = true; 2336 std::string InGuard = ""; 2337 while (!Defs.empty() && MadeProgress) { 2338 MadeProgress = false; 2339 2340 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); 2341 I != Defs.end(); /*No step*/) { 2342 bool DependenciesSatisfied = true; 2343 for (auto *II : (*I)->getDependencies()) { 2344 if (std::find(Defs.begin(), Defs.end(), II) != Defs.end()) 2345 DependenciesSatisfied = false; 2346 } 2347 if (!DependenciesSatisfied) { 2348 // Try the next one. 2349 ++I; 2350 continue; 2351 } 2352 2353 // Emit #endif/#if pair if needed. 2354 if ((*I)->getGuard() != InGuard) { 2355 if (!InGuard.empty()) 2356 OS << "#endif\n"; 2357 InGuard = (*I)->getGuard(); 2358 if (!InGuard.empty()) 2359 OS << "#if " << InGuard << "\n"; 2360 } 2361 2362 // Actually generate the intrinsic code. 2363 OS << (*I)->generate(); 2364 2365 MadeProgress = true; 2366 I = Defs.erase(I); 2367 } 2368 } 2369 assert(Defs.empty() && "Some requirements were not satisfied!"); 2370 if (!InGuard.empty()) 2371 OS << "#endif\n"; 2372 2373 OS << "\n"; 2374 OS << "#undef __ai\n\n"; 2375 OS << "#endif /* __ARM_NEON_H */\n"; 2376 } 2377 2378 namespace clang { 2379 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 2380 NeonEmitter(Records).run(OS); 2381 } 2382 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 2383 NeonEmitter(Records).runHeader(OS); 2384 } 2385 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 2386 llvm_unreachable("Neon test generation no longer implemented!"); 2387 } 2388 } // End namespace clang 2389