1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This tablegen backend is responsible for emitting arm_neon.h, which includes
10 // a declaration and definition of each function specified by the ARM NEON
11 // compiler interface.  See ARM document DUI0348B.
12 //
13 // Each NEON instruction is implemented in terms of 1 or more functions which
14 // are suffixed with the element type of the input vectors.  Functions may be
15 // implemented in terms of generic vector operations such as +, *, -, etc. or
16 // by calling a __builtin_-prefixed function which will be handled by clang's
17 // CodeGen library.
18 //
19 // Additional validation code can be generated by this file when runHeader() is
20 // called, rather than the normal run() entry point.
21 //
22 // See also the documentation in include/clang/Basic/arm_neon.td.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/None.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/TableGen/Error.h"
37 #include "llvm/TableGen/Record.h"
38 #include "llvm/TableGen/SetTheory.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <cctype>
42 #include <cstddef>
43 #include <cstdint>
44 #include <deque>
45 #include <map>
46 #include <set>
47 #include <sstream>
48 #include <string>
49 #include <utility>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 namespace {
55 
56 // While globals are generally bad, this one allows us to perform assertions
57 // liberally and somehow still trace them back to the def they indirectly
58 // came from.
59 static Record *CurrentRecord = nullptr;
60 static void assert_with_loc(bool Assertion, const std::string &Str) {
61   if (!Assertion) {
62     if (CurrentRecord)
63       PrintFatalError(CurrentRecord->getLoc(), Str);
64     else
65       PrintFatalError(Str);
66   }
67 }
68 
69 enum ClassKind {
70   ClassNone,
71   ClassI,     // generic integer instruction, e.g., "i8" suffix
72   ClassS,     // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
73   ClassW,     // width-specific instruction, e.g., "8" suffix
74   ClassB,     // bitcast arguments with enum argument to specify type
75   ClassL,     // Logical instructions which are op instructions
76               // but we need to not emit any suffix for in our
77               // tests.
78   ClassNoTest // Instructions which we do not test since they are
79               // not TRUE instructions.
80 };
81 
82 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
83 /// builtins.  These must be kept in sync with the flags in
84 /// include/clang/Basic/TargetBuiltins.h.
85 namespace NeonTypeFlags {
86 
87 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
88 
89 enum EltType {
90   Int8,
91   Int16,
92   Int32,
93   Int64,
94   Poly8,
95   Poly16,
96   Poly64,
97   Poly128,
98   Float16,
99   Float32,
100   Float64
101 };
102 
103 } // end namespace NeonTypeFlags
104 
105 class NeonEmitter;
106 
107 //===----------------------------------------------------------------------===//
108 // TypeSpec
109 //===----------------------------------------------------------------------===//
110 
111 /// A TypeSpec is just a simple wrapper around a string, but gets its own type
112 /// for strong typing purposes.
113 ///
114 /// A TypeSpec can be used to create a type.
115 class TypeSpec : public std::string {
116 public:
117   static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
118     std::vector<TypeSpec> Ret;
119     TypeSpec Acc;
120     for (char I : Str.str()) {
121       if (islower(I)) {
122         Acc.push_back(I);
123         Ret.push_back(TypeSpec(Acc));
124         Acc.clear();
125       } else {
126         Acc.push_back(I);
127       }
128     }
129     return Ret;
130   }
131 };
132 
133 //===----------------------------------------------------------------------===//
134 // Type
135 //===----------------------------------------------------------------------===//
136 
137 /// A Type. Not much more to say here.
138 class Type {
139 private:
140   TypeSpec TS;
141 
142   bool Float, Signed, Immediate, Void, Poly, Constant, Pointer;
143   // ScalarForMangling and NoManglingQ are really not suited to live here as
144   // they are not related to the type. But they live in the TypeSpec (not the
145   // prototype), so this is really the only place to store them.
146   bool ScalarForMangling, NoManglingQ;
147   unsigned Bitwidth, ElementBitwidth, NumVectors;
148 
149 public:
150   Type()
151       : Float(false), Signed(false), Immediate(false), Void(true), Poly(false),
152         Constant(false), Pointer(false), ScalarForMangling(false),
153         NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
154 
155   Type(TypeSpec TS, char CharMod)
156       : TS(std::move(TS)), Float(false), Signed(false), Immediate(false),
157         Void(false), Poly(false), Constant(false), Pointer(false),
158         ScalarForMangling(false), NoManglingQ(false), Bitwidth(0),
159         ElementBitwidth(0), NumVectors(0) {
160     applyModifier(CharMod);
161   }
162 
163   /// Returns a type representing "void".
164   static Type getVoid() { return Type(); }
165 
166   bool operator==(const Type &Other) const { return str() == Other.str(); }
167   bool operator!=(const Type &Other) const { return !operator==(Other); }
168 
169   //
170   // Query functions
171   //
172   bool isScalarForMangling() const { return ScalarForMangling; }
173   bool noManglingQ() const { return NoManglingQ; }
174 
175   bool isPointer() const { return Pointer; }
176   bool isFloating() const { return Float; }
177   bool isInteger() const { return !Float && !Poly; }
178   bool isSigned() const { return Signed; }
179   bool isImmediate() const { return Immediate; }
180   bool isScalar() const { return NumVectors == 0; }
181   bool isVector() const { return NumVectors > 0; }
182   bool isFloat() const { return Float && ElementBitwidth == 32; }
183   bool isDouble() const { return Float && ElementBitwidth == 64; }
184   bool isHalf() const { return Float && ElementBitwidth == 16; }
185   bool isPoly() const { return Poly; }
186   bool isChar() const { return ElementBitwidth == 8; }
187   bool isShort() const { return !Float && ElementBitwidth == 16; }
188   bool isInt() const { return !Float && ElementBitwidth == 32; }
189   bool isLong() const { return !Float && ElementBitwidth == 64; }
190   bool isVoid() const { return Void; }
191   unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
192   unsigned getSizeInBits() const { return Bitwidth; }
193   unsigned getElementSizeInBits() const { return ElementBitwidth; }
194   unsigned getNumVectors() const { return NumVectors; }
195 
196   //
197   // Mutator functions
198   //
199   void makeUnsigned() { Signed = false; }
200   void makeSigned() { Signed = true; }
201 
202   void makeInteger(unsigned ElemWidth, bool Sign) {
203     Float = false;
204     Poly = false;
205     Signed = Sign;
206     Immediate = false;
207     ElementBitwidth = ElemWidth;
208   }
209 
210   void makeImmediate(unsigned ElemWidth) {
211     Float = false;
212     Poly = false;
213     Signed = true;
214     Immediate = true;
215     ElementBitwidth = ElemWidth;
216   }
217 
218   void makeScalar() {
219     Bitwidth = ElementBitwidth;
220     NumVectors = 0;
221   }
222 
223   void makeOneVector() {
224     assert(isVector());
225     NumVectors = 1;
226   }
227 
228   void doubleLanes() {
229     assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
230     Bitwidth = 128;
231   }
232 
233   void halveLanes() {
234     assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
235     Bitwidth = 64;
236   }
237 
238   /// Return the C string representation of a type, which is the typename
239   /// defined in stdint.h or arm_neon.h.
240   std::string str() const;
241 
242   /// Return the string representation of a type, which is an encoded
243   /// string for passing to the BUILTIN() macro in Builtins.def.
244   std::string builtin_str() const;
245 
246   /// Return the value in NeonTypeFlags for this type.
247   unsigned getNeonEnum() const;
248 
249   /// Parse a type from a stdint.h or arm_neon.h typedef name,
250   /// for example uint32x2_t or int64_t.
251   static Type fromTypedefName(StringRef Name);
252 
253 private:
254   /// Creates the type based on the typespec string in TS.
255   /// Sets "Quad" to true if the "Q" or "H" modifiers were
256   /// seen. This is needed by applyModifier as some modifiers
257   /// only take effect if the type size was changed by "Q" or "H".
258   void applyTypespec(bool &Quad);
259   /// Applies a prototype modifier to the type.
260   void applyModifier(char Mod);
261 };
262 
263 //===----------------------------------------------------------------------===//
264 // Variable
265 //===----------------------------------------------------------------------===//
266 
267 /// A variable is a simple class that just has a type and a name.
268 class Variable {
269   Type T;
270   std::string N;
271 
272 public:
273   Variable() : T(Type::getVoid()), N("") {}
274   Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
275 
276   Type getType() const { return T; }
277   std::string getName() const { return "__" + N; }
278 };
279 
280 //===----------------------------------------------------------------------===//
281 // Intrinsic
282 //===----------------------------------------------------------------------===//
283 
284 /// The main grunt class. This represents an instantiation of an intrinsic with
285 /// a particular typespec and prototype.
286 class Intrinsic {
287   friend class DagEmitter;
288 
289   /// The Record this intrinsic was created from.
290   Record *R;
291   /// The unmangled name and prototype.
292   std::string Name, Proto;
293   /// The input and output typespecs. InTS == OutTS except when
294   /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
295   TypeSpec OutTS, InTS;
296   /// The base class kind. Most intrinsics use ClassS, which has full type
297   /// info for integers (s32/u32). Some use ClassI, which doesn't care about
298   /// signedness (i32), while some (ClassB) have no type at all, only a width
299   /// (32).
300   ClassKind CK;
301   /// The list of DAGs for the body. May be empty, in which case we should
302   /// emit a builtin call.
303   ListInit *Body;
304   /// The architectural #ifdef guard.
305   std::string Guard;
306   /// Set if the Unavailable bit is 1. This means we don't generate a body,
307   /// just an "unavailable" attribute on a declaration.
308   bool IsUnavailable;
309   /// Is this intrinsic safe for big-endian? or does it need its arguments
310   /// reversing?
311   bool BigEndianSafe;
312 
313   /// The types of return value [0] and parameters [1..].
314   std::vector<Type> Types;
315   /// The local variables defined.
316   std::map<std::string, Variable> Variables;
317   /// NeededEarly - set if any other intrinsic depends on this intrinsic.
318   bool NeededEarly;
319   /// UseMacro - set if we should implement using a macro or unset for a
320   ///            function.
321   bool UseMacro;
322   /// The set of intrinsics that this intrinsic uses/requires.
323   std::set<Intrinsic *> Dependencies;
324   /// The "base type", which is Type('d', OutTS). InBaseType is only
325   /// different if CartesianProductOfTypes = 1 (for vreinterpret).
326   Type BaseType, InBaseType;
327   /// The return variable.
328   Variable RetVar;
329   /// A postfix to apply to every variable. Defaults to "".
330   std::string VariablePostfix;
331 
332   NeonEmitter &Emitter;
333   std::stringstream OS;
334 
335   bool isBigEndianSafe() const {
336     if (BigEndianSafe)
337       return true;
338 
339     for (const auto &T : Types){
340       if (T.isVector() && T.getNumElements() > 1)
341         return false;
342     }
343     return true;
344   }
345 
346 public:
347   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
348             TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
349             StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
350       : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
351         CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
352         BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
353         BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
354     // If this builtin takes an immediate argument, we need to #define it rather
355     // than use a standard declaration, so that SemaChecking can range check
356     // the immediate passed by the user.
357     if (Proto.find('i') != std::string::npos)
358       UseMacro = true;
359 
360     // Pointer arguments need to use macros to avoid hiding aligned attributes
361     // from the pointer type.
362     if (Proto.find('p') != std::string::npos ||
363         Proto.find('c') != std::string::npos)
364       UseMacro = true;
365 
366     // It is not permitted to pass or return an __fp16 by value, so intrinsics
367     // taking a scalar float16_t must be implemented as macros.
368     if (OutTS.find('h') != std::string::npos &&
369         Proto.find('s') != std::string::npos)
370       UseMacro = true;
371 
372     // Modify the TypeSpec per-argument to get a concrete Type, and create
373     // known variables for each.
374     // Types[0] is the return value.
375     Types.emplace_back(OutTS, Proto[0]);
376     for (unsigned I = 1; I < Proto.size(); ++I)
377       Types.emplace_back(InTS, Proto[I]);
378   }
379 
380   /// Get the Record that this intrinsic is based off.
381   Record *getRecord() const { return R; }
382   /// Get the set of Intrinsics that this intrinsic calls.
383   /// this is the set of immediate dependencies, NOT the
384   /// transitive closure.
385   const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
386   /// Get the architectural guard string (#ifdef).
387   std::string getGuard() const { return Guard; }
388   /// Get the non-mangled name.
389   std::string getName() const { return Name; }
390 
391   /// Return true if the intrinsic takes an immediate operand.
392   bool hasImmediate() const {
393     return Proto.find('i') != std::string::npos;
394   }
395 
396   /// Return the parameter index of the immediate operand.
397   unsigned getImmediateIdx() const {
398     assert(hasImmediate());
399     unsigned Idx = Proto.find('i');
400     assert(Idx > 0 && "Can't return an immediate!");
401     return Idx - 1;
402   }
403 
404   /// Return true if the intrinsic takes an splat operand.
405   bool hasSplat() const { return Proto.find('a') != std::string::npos; }
406 
407   /// Return the parameter index of the splat operand.
408   unsigned getSplatIdx() const {
409     assert(hasSplat());
410     unsigned Idx = Proto.find('a');
411     assert(Idx > 0 && "Can't return a splat!");
412     return Idx - 1;
413   }
414 
415   unsigned getNumParams() const { return Proto.size() - 1; }
416   Type getReturnType() const { return Types[0]; }
417   Type getParamType(unsigned I) const { return Types[I + 1]; }
418   Type getBaseType() const { return BaseType; }
419   /// Return the raw prototype string.
420   std::string getProto() const { return Proto; }
421 
422   /// Return true if the prototype has a scalar argument.
423   /// This does not return true for the "splat" code ('a').
424   bool protoHasScalar() const;
425 
426   /// Return the index that parameter PIndex will sit at
427   /// in a generated function call. This is often just PIndex,
428   /// but may not be as things such as multiple-vector operands
429   /// and sret parameters need to be taken into accont.
430   unsigned getGeneratedParamIdx(unsigned PIndex) {
431     unsigned Idx = 0;
432     if (getReturnType().getNumVectors() > 1)
433       // Multiple vectors are passed as sret.
434       ++Idx;
435 
436     for (unsigned I = 0; I < PIndex; ++I)
437       Idx += std::max(1U, getParamType(I).getNumVectors());
438 
439     return Idx;
440   }
441 
442   bool hasBody() const { return Body && !Body->getValues().empty(); }
443 
444   void setNeededEarly() { NeededEarly = true; }
445 
446   bool operator<(const Intrinsic &Other) const {
447     // Sort lexicographically on a two-tuple (Guard, Name)
448     if (Guard != Other.Guard)
449       return Guard < Other.Guard;
450     return Name < Other.Name;
451   }
452 
453   ClassKind getClassKind(bool UseClassBIfScalar = false) {
454     if (UseClassBIfScalar && !protoHasScalar())
455       return ClassB;
456     return CK;
457   }
458 
459   /// Return the name, mangled with type information.
460   /// If ForceClassS is true, use ClassS (u32/s32) instead
461   /// of the intrinsic's own type class.
462   std::string getMangledName(bool ForceClassS = false) const;
463   /// Return the type code for a builtin function call.
464   std::string getInstTypeCode(Type T, ClassKind CK) const;
465   /// Return the type string for a BUILTIN() macro in Builtins.def.
466   std::string getBuiltinTypeStr();
467 
468   /// Generate the intrinsic, returning code.
469   std::string generate();
470   /// Perform type checking and populate the dependency graph, but
471   /// don't generate code yet.
472   void indexBody();
473 
474 private:
475   std::string mangleName(std::string Name, ClassKind CK) const;
476 
477   void initVariables();
478   std::string replaceParamsIn(std::string S);
479 
480   void emitBodyAsBuiltinCall();
481 
482   void generateImpl(bool ReverseArguments,
483                     StringRef NamePrefix, StringRef CallPrefix);
484   void emitReturn();
485   void emitBody(StringRef CallPrefix);
486   void emitShadowedArgs();
487   void emitArgumentReversal();
488   void emitReturnReversal();
489   void emitReverseVariable(Variable &Dest, Variable &Src);
490   void emitNewLine();
491   void emitClosingBrace();
492   void emitOpeningBrace();
493   void emitPrototype(StringRef NamePrefix);
494 
495   class DagEmitter {
496     Intrinsic &Intr;
497     StringRef CallPrefix;
498 
499   public:
500     DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
501       Intr(Intr), CallPrefix(CallPrefix) {
502     }
503     std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
504     std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
505     std::pair<Type, std::string> emitDagSplat(DagInit *DI);
506     std::pair<Type, std::string> emitDagDup(DagInit *DI);
507     std::pair<Type, std::string> emitDagDupTyped(DagInit *DI);
508     std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
509     std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
510     std::pair<Type, std::string> emitDagCall(DagInit *DI);
511     std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
512     std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
513     std::pair<Type, std::string> emitDagOp(DagInit *DI);
514     std::pair<Type, std::string> emitDag(DagInit *DI);
515   };
516 };
517 
518 //===----------------------------------------------------------------------===//
519 // NeonEmitter
520 //===----------------------------------------------------------------------===//
521 
522 class NeonEmitter {
523   RecordKeeper &Records;
524   DenseMap<Record *, ClassKind> ClassMap;
525   std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;
526   unsigned UniqueNumber;
527 
528   void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
529   void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
530   void genOverloadTypeCheckCode(raw_ostream &OS,
531                                 SmallVectorImpl<Intrinsic *> &Defs);
532   void genIntrinsicRangeCheckCode(raw_ostream &OS,
533                                   SmallVectorImpl<Intrinsic *> &Defs);
534 
535 public:
536   /// Called by Intrinsic - this attempts to get an intrinsic that takes
537   /// the given types as arguments.
538   Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types);
539 
540   /// Called by Intrinsic - returns a globally-unique number.
541   unsigned getUniqueNumber() { return UniqueNumber++; }
542 
543   NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
544     Record *SI = R.getClass("SInst");
545     Record *II = R.getClass("IInst");
546     Record *WI = R.getClass("WInst");
547     Record *SOpI = R.getClass("SOpInst");
548     Record *IOpI = R.getClass("IOpInst");
549     Record *WOpI = R.getClass("WOpInst");
550     Record *LOpI = R.getClass("LOpInst");
551     Record *NoTestOpI = R.getClass("NoTestOpInst");
552 
553     ClassMap[SI] = ClassS;
554     ClassMap[II] = ClassI;
555     ClassMap[WI] = ClassW;
556     ClassMap[SOpI] = ClassS;
557     ClassMap[IOpI] = ClassI;
558     ClassMap[WOpI] = ClassW;
559     ClassMap[LOpI] = ClassL;
560     ClassMap[NoTestOpI] = ClassNoTest;
561   }
562 
563   // run - Emit arm_neon.h.inc
564   void run(raw_ostream &o);
565 
566   // runFP16 - Emit arm_fp16.h.inc
567   void runFP16(raw_ostream &o);
568 
569   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
570 	// and arm_fp16.h
571   void runHeader(raw_ostream &o);
572 
573   // runTests - Emit tests for all the Neon intrinsics.
574   void runTests(raw_ostream &o);
575 };
576 
577 } // end anonymous namespace
578 
579 //===----------------------------------------------------------------------===//
580 // Type implementation
581 //===----------------------------------------------------------------------===//
582 
583 std::string Type::str() const {
584   if (Void)
585     return "void";
586   std::string S;
587 
588   if (!Signed && isInteger())
589     S += "u";
590 
591   if (Poly)
592     S += "poly";
593   else if (Float)
594     S += "float";
595   else
596     S += "int";
597 
598   S += utostr(ElementBitwidth);
599   if (isVector())
600     S += "x" + utostr(getNumElements());
601   if (NumVectors > 1)
602     S += "x" + utostr(NumVectors);
603   S += "_t";
604 
605   if (Constant)
606     S += " const";
607   if (Pointer)
608     S += " *";
609 
610   return S;
611 }
612 
613 std::string Type::builtin_str() const {
614   std::string S;
615   if (isVoid())
616     return "v";
617 
618   if (Pointer)
619     // All pointers are void pointers.
620     S += "v";
621   else if (isInteger())
622     switch (ElementBitwidth) {
623     case 8: S += "c"; break;
624     case 16: S += "s"; break;
625     case 32: S += "i"; break;
626     case 64: S += "Wi"; break;
627     case 128: S += "LLLi"; break;
628     default: llvm_unreachable("Unhandled case!");
629     }
630   else
631     switch (ElementBitwidth) {
632     case 16: S += "h"; break;
633     case 32: S += "f"; break;
634     case 64: S += "d"; break;
635     default: llvm_unreachable("Unhandled case!");
636     }
637 
638   if (isChar() && !Pointer)
639     // Make chars explicitly signed.
640     S = "S" + S;
641   else if (isInteger() && !Pointer && !Signed)
642     S = "U" + S;
643 
644   // Constant indices are "int", but have the "constant expression" modifier.
645   if (isImmediate()) {
646     assert(isInteger() && isSigned());
647     S = "I" + S;
648   }
649 
650   if (isScalar()) {
651     if (Constant) S += "C";
652     if (Pointer) S += "*";
653     return S;
654   }
655 
656   std::string Ret;
657   for (unsigned I = 0; I < NumVectors; ++I)
658     Ret += "V" + utostr(getNumElements()) + S;
659 
660   return Ret;
661 }
662 
663 unsigned Type::getNeonEnum() const {
664   unsigned Addend;
665   switch (ElementBitwidth) {
666   case 8: Addend = 0; break;
667   case 16: Addend = 1; break;
668   case 32: Addend = 2; break;
669   case 64: Addend = 3; break;
670   case 128: Addend = 4; break;
671   default: llvm_unreachable("Unhandled element bitwidth!");
672   }
673 
674   unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
675   if (Poly) {
676     // Adjustment needed because Poly32 doesn't exist.
677     if (Addend >= 2)
678       --Addend;
679     Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
680   }
681   if (Float) {
682     assert(Addend != 0 && "Float8 doesn't exist!");
683     Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
684   }
685 
686   if (Bitwidth == 128)
687     Base |= (unsigned)NeonTypeFlags::QuadFlag;
688   if (isInteger() && !Signed)
689     Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
690 
691   return Base;
692 }
693 
694 Type Type::fromTypedefName(StringRef Name) {
695   Type T;
696   T.Void = false;
697   T.Float = false;
698   T.Poly = false;
699 
700   if (Name.front() == 'u') {
701     T.Signed = false;
702     Name = Name.drop_front();
703   } else {
704     T.Signed = true;
705   }
706 
707   if (Name.startswith("float")) {
708     T.Float = true;
709     Name = Name.drop_front(5);
710   } else if (Name.startswith("poly")) {
711     T.Poly = true;
712     Name = Name.drop_front(4);
713   } else {
714     assert(Name.startswith("int"));
715     Name = Name.drop_front(3);
716   }
717 
718   unsigned I = 0;
719   for (I = 0; I < Name.size(); ++I) {
720     if (!isdigit(Name[I]))
721       break;
722   }
723   Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
724   Name = Name.drop_front(I);
725 
726   T.Bitwidth = T.ElementBitwidth;
727   T.NumVectors = 1;
728 
729   if (Name.front() == 'x') {
730     Name = Name.drop_front();
731     unsigned I = 0;
732     for (I = 0; I < Name.size(); ++I) {
733       if (!isdigit(Name[I]))
734         break;
735     }
736     unsigned NumLanes;
737     Name.substr(0, I).getAsInteger(10, NumLanes);
738     Name = Name.drop_front(I);
739     T.Bitwidth = T.ElementBitwidth * NumLanes;
740   } else {
741     // Was scalar.
742     T.NumVectors = 0;
743   }
744   if (Name.front() == 'x') {
745     Name = Name.drop_front();
746     unsigned I = 0;
747     for (I = 0; I < Name.size(); ++I) {
748       if (!isdigit(Name[I]))
749         break;
750     }
751     Name.substr(0, I).getAsInteger(10, T.NumVectors);
752     Name = Name.drop_front(I);
753   }
754 
755   assert(Name.startswith("_t") && "Malformed typedef!");
756   return T;
757 }
758 
759 void Type::applyTypespec(bool &Quad) {
760   std::string S = TS;
761   ScalarForMangling = false;
762   Void = false;
763   Poly = Float = false;
764   ElementBitwidth = ~0U;
765   Signed = true;
766   NumVectors = 1;
767 
768   for (char I : S) {
769     switch (I) {
770     case 'S':
771       ScalarForMangling = true;
772       break;
773     case 'H':
774       NoManglingQ = true;
775       Quad = true;
776       break;
777     case 'Q':
778       Quad = true;
779       break;
780     case 'P':
781       Poly = true;
782       break;
783     case 'U':
784       Signed = false;
785       break;
786     case 'c':
787       ElementBitwidth = 8;
788       break;
789     case 'h':
790       Float = true;
791       LLVM_FALLTHROUGH;
792     case 's':
793       ElementBitwidth = 16;
794       break;
795     case 'f':
796       Float = true;
797       LLVM_FALLTHROUGH;
798     case 'i':
799       ElementBitwidth = 32;
800       break;
801     case 'd':
802       Float = true;
803       LLVM_FALLTHROUGH;
804     case 'l':
805       ElementBitwidth = 64;
806       break;
807     case 'k':
808       ElementBitwidth = 128;
809       // Poly doesn't have a 128x1 type.
810       if (Poly)
811         NumVectors = 0;
812       break;
813     default:
814       llvm_unreachable("Unhandled type code!");
815     }
816   }
817   assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
818 
819   Bitwidth = Quad ? 128 : 64;
820 }
821 
822 void Type::applyModifier(char Mod) {
823   bool AppliedQuad = false;
824   applyTypespec(AppliedQuad);
825 
826   switch (Mod) {
827   case 'v':
828     Void = true;
829     break;
830   case 't':
831     if (Poly) {
832       Poly = false;
833       Signed = false;
834     }
835     break;
836   case 'b':
837     Signed = false;
838     Float = false;
839     Poly = false;
840     NumVectors = 0;
841     Bitwidth = ElementBitwidth;
842     break;
843   case '$':
844     Signed = true;
845     Float = false;
846     Poly = false;
847     NumVectors = 0;
848     Bitwidth = ElementBitwidth;
849     break;
850   case 'u':
851     Signed = false;
852     Poly = false;
853     Float = false;
854     break;
855   case 'x':
856     Signed = true;
857     assert(!Poly && "'u' can't be used with poly types!");
858     Float = false;
859     break;
860   case 'o':
861     Bitwidth = ElementBitwidth = 64;
862     NumVectors = 0;
863     Float = true;
864     break;
865   case 'y':
866     Bitwidth = ElementBitwidth = 32;
867     NumVectors = 0;
868     Float = true;
869     break;
870   case 'Y':
871     Bitwidth = ElementBitwidth = 16;
872     NumVectors = 0;
873     Float = true;
874     break;
875   case 'I':
876     Bitwidth = ElementBitwidth = 32;
877     NumVectors = 0;
878     Float = false;
879     Signed = true;
880     break;
881   case 'L':
882     Bitwidth = ElementBitwidth = 64;
883     NumVectors = 0;
884     Float = false;
885     Signed = true;
886     break;
887   case 'U':
888     Bitwidth = ElementBitwidth = 32;
889     NumVectors = 0;
890     Float = false;
891     Signed = false;
892     break;
893   case 'O':
894     Bitwidth = ElementBitwidth = 64;
895     NumVectors = 0;
896     Float = false;
897     Signed = false;
898     break;
899   case 'f':
900     Float = true;
901     ElementBitwidth = 32;
902     break;
903   case 'F':
904     Float = true;
905     ElementBitwidth = 64;
906     break;
907   case 'H':
908     Float = true;
909     ElementBitwidth = 16;
910     break;
911   case '0':
912     Float = true;
913     if (AppliedQuad)
914       Bitwidth /= 2;
915     ElementBitwidth = 16;
916     break;
917   case '1':
918     Float = true;
919     if (!AppliedQuad)
920       Bitwidth *= 2;
921     ElementBitwidth = 16;
922     break;
923   case 'g':
924     if (AppliedQuad)
925       Bitwidth /= 2;
926     break;
927   case 'j':
928     if (!AppliedQuad)
929       Bitwidth *= 2;
930     break;
931   case 'w':
932     ElementBitwidth *= 2;
933     Bitwidth *= 2;
934     break;
935   case 'n':
936     ElementBitwidth *= 2;
937     break;
938   case 'i':
939     Float = false;
940     Poly = false;
941     ElementBitwidth = Bitwidth = 32;
942     NumVectors = 0;
943     Signed = true;
944     Immediate = true;
945     break;
946   case 'l':
947     Float = false;
948     Poly = false;
949     ElementBitwidth = Bitwidth = 64;
950     NumVectors = 0;
951     Signed = false;
952     Immediate = true;
953     break;
954   case 'z':
955     ElementBitwidth /= 2;
956     Bitwidth = ElementBitwidth;
957     NumVectors = 0;
958     break;
959   case 'r':
960     ElementBitwidth *= 2;
961     Bitwidth = ElementBitwidth;
962     NumVectors = 0;
963     break;
964   case 's':
965   case 'a':
966     Bitwidth = ElementBitwidth;
967     NumVectors = 0;
968     break;
969   case 'k':
970     Bitwidth *= 2;
971     break;
972   case 'c':
973     Constant = true;
974     LLVM_FALLTHROUGH;
975   case 'p':
976     Pointer = true;
977     Bitwidth = ElementBitwidth;
978     NumVectors = 0;
979     break;
980   case 'h':
981     ElementBitwidth /= 2;
982     break;
983   case 'q':
984     ElementBitwidth /= 2;
985     Bitwidth *= 2;
986     break;
987   case 'e':
988     ElementBitwidth /= 2;
989     Signed = false;
990     break;
991   case 'm':
992     ElementBitwidth /= 2;
993     Bitwidth /= 2;
994     break;
995   case 'd':
996     break;
997   case '2':
998     NumVectors = 2;
999     break;
1000   case '3':
1001     NumVectors = 3;
1002     break;
1003   case '4':
1004     NumVectors = 4;
1005     break;
1006   case 'B':
1007     NumVectors = 2;
1008     if (!AppliedQuad)
1009       Bitwidth *= 2;
1010     break;
1011   case 'C':
1012     NumVectors = 3;
1013     if (!AppliedQuad)
1014       Bitwidth *= 2;
1015     break;
1016   case 'D':
1017     NumVectors = 4;
1018     if (!AppliedQuad)
1019       Bitwidth *= 2;
1020     break;
1021   case '7':
1022     if (AppliedQuad)
1023       Bitwidth /= 2;
1024     ElementBitwidth = 8;
1025     break;
1026   case '8':
1027     ElementBitwidth = 8;
1028     break;
1029   case '9':
1030     if (!AppliedQuad)
1031       Bitwidth *= 2;
1032     ElementBitwidth = 8;
1033     break;
1034   default:
1035     llvm_unreachable("Unhandled character!");
1036   }
1037 }
1038 
1039 //===----------------------------------------------------------------------===//
1040 // Intrinsic implementation
1041 //===----------------------------------------------------------------------===//
1042 
1043 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
1044   char typeCode = '\0';
1045   bool printNumber = true;
1046 
1047   if (CK == ClassB)
1048     return "";
1049 
1050   if (T.isPoly())
1051     typeCode = 'p';
1052   else if (T.isInteger())
1053     typeCode = T.isSigned() ? 's' : 'u';
1054   else
1055     typeCode = 'f';
1056 
1057   if (CK == ClassI) {
1058     switch (typeCode) {
1059     default:
1060       break;
1061     case 's':
1062     case 'u':
1063     case 'p':
1064       typeCode = 'i';
1065       break;
1066     }
1067   }
1068   if (CK == ClassB) {
1069     typeCode = '\0';
1070   }
1071 
1072   std::string S;
1073   if (typeCode != '\0')
1074     S.push_back(typeCode);
1075   if (printNumber)
1076     S += utostr(T.getElementSizeInBits());
1077 
1078   return S;
1079 }
1080 
1081 static bool isFloatingPointProtoModifier(char Mod) {
1082   return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
1083 }
1084 
1085 std::string Intrinsic::getBuiltinTypeStr() {
1086   ClassKind LocalCK = getClassKind(true);
1087   std::string S;
1088 
1089   Type RetT = getReturnType();
1090   if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
1091       !RetT.isFloating())
1092     RetT.makeInteger(RetT.getElementSizeInBits(), false);
1093 
1094   // Since the return value must be one type, return a vector type of the
1095   // appropriate width which we will bitcast.  An exception is made for
1096   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
1097   // fashion, storing them to a pointer arg.
1098   if (RetT.getNumVectors() > 1) {
1099     S += "vv*"; // void result with void* first argument
1100   } else {
1101     if (RetT.isPoly())
1102       RetT.makeInteger(RetT.getElementSizeInBits(), false);
1103     if (!RetT.isScalar() && !RetT.isSigned())
1104       RetT.makeSigned();
1105 
1106     bool ForcedVectorFloatingType = isFloatingPointProtoModifier(Proto[0]);
1107     if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
1108       // Cast to vector of 8-bit elements.
1109       RetT.makeInteger(8, true);
1110 
1111     S += RetT.builtin_str();
1112   }
1113 
1114   for (unsigned I = 0; I < getNumParams(); ++I) {
1115     Type T = getParamType(I);
1116     if (T.isPoly())
1117       T.makeInteger(T.getElementSizeInBits(), false);
1118 
1119     bool ForcedFloatingType = isFloatingPointProtoModifier(Proto[I + 1]);
1120     if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
1121       T.makeInteger(8, true);
1122     // Halves always get converted to 8-bit elements.
1123     if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1124       T.makeInteger(8, true);
1125 
1126     if (LocalCK == ClassI)
1127       T.makeSigned();
1128 
1129     if (hasImmediate() && getImmediateIdx() == I)
1130       T.makeImmediate(32);
1131 
1132     S += T.builtin_str();
1133   }
1134 
1135   // Extra constant integer to hold type class enum for this function, e.g. s8
1136   if (LocalCK == ClassB)
1137     S += "i";
1138 
1139   return S;
1140 }
1141 
1142 std::string Intrinsic::getMangledName(bool ForceClassS) const {
1143   // Check if the prototype has a scalar operand with the type of the vector
1144   // elements.  If not, bitcasting the args will take care of arg checking.
1145   // The actual signedness etc. will be taken care of with special enums.
1146   ClassKind LocalCK = CK;
1147   if (!protoHasScalar())
1148     LocalCK = ClassB;
1149 
1150   return mangleName(Name, ForceClassS ? ClassS : LocalCK);
1151 }
1152 
1153 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
1154   std::string typeCode = getInstTypeCode(BaseType, LocalCK);
1155   std::string S = Name;
1156 
1157   if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
1158       Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32")
1159     return Name;
1160 
1161   if (!typeCode.empty()) {
1162     // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1163     if (Name.size() >= 3 && isdigit(Name.back()) &&
1164         Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
1165       S.insert(S.length() - 3, "_" + typeCode);
1166     else
1167       S += "_" + typeCode;
1168   }
1169 
1170   if (BaseType != InBaseType) {
1171     // A reinterpret - out the input base type at the end.
1172     S += "_" + getInstTypeCode(InBaseType, LocalCK);
1173   }
1174 
1175   if (LocalCK == ClassB)
1176     S += "_v";
1177 
1178   // Insert a 'q' before the first '_' character so that it ends up before
1179   // _lane or _n on vector-scalar operations.
1180   if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
1181     size_t Pos = S.find('_');
1182     S.insert(Pos, "q");
1183   }
1184 
1185   char Suffix = '\0';
1186   if (BaseType.isScalarForMangling()) {
1187     switch (BaseType.getElementSizeInBits()) {
1188     case 8: Suffix = 'b'; break;
1189     case 16: Suffix = 'h'; break;
1190     case 32: Suffix = 's'; break;
1191     case 64: Suffix = 'd'; break;
1192     default: llvm_unreachable("Bad suffix!");
1193     }
1194   }
1195   if (Suffix != '\0') {
1196     size_t Pos = S.find('_');
1197     S.insert(Pos, &Suffix, 1);
1198   }
1199 
1200   return S;
1201 }
1202 
1203 std::string Intrinsic::replaceParamsIn(std::string S) {
1204   while (S.find('$') != std::string::npos) {
1205     size_t Pos = S.find('$');
1206     size_t End = Pos + 1;
1207     while (isalpha(S[End]))
1208       ++End;
1209 
1210     std::string VarName = S.substr(Pos + 1, End - Pos - 1);
1211     assert_with_loc(Variables.find(VarName) != Variables.end(),
1212                     "Variable not defined!");
1213     S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
1214   }
1215 
1216   return S;
1217 }
1218 
1219 void Intrinsic::initVariables() {
1220   Variables.clear();
1221 
1222   // Modify the TypeSpec per-argument to get a concrete Type, and create
1223   // known variables for each.
1224   for (unsigned I = 1; I < Proto.size(); ++I) {
1225     char NameC = '0' + (I - 1);
1226     std::string Name = "p";
1227     Name.push_back(NameC);
1228 
1229     Variables[Name] = Variable(Types[I], Name + VariablePostfix);
1230   }
1231   RetVar = Variable(Types[0], "ret" + VariablePostfix);
1232 }
1233 
1234 void Intrinsic::emitPrototype(StringRef NamePrefix) {
1235   if (UseMacro)
1236     OS << "#define ";
1237   else
1238     OS << "__ai " << Types[0].str() << " ";
1239 
1240   OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
1241 
1242   for (unsigned I = 0; I < getNumParams(); ++I) {
1243     if (I != 0)
1244       OS << ", ";
1245 
1246     char NameC = '0' + I;
1247     std::string Name = "p";
1248     Name.push_back(NameC);
1249     assert(Variables.find(Name) != Variables.end());
1250     Variable &V = Variables[Name];
1251 
1252     if (!UseMacro)
1253       OS << V.getType().str() << " ";
1254     OS << V.getName();
1255   }
1256 
1257   OS << ")";
1258 }
1259 
1260 void Intrinsic::emitOpeningBrace() {
1261   if (UseMacro)
1262     OS << " __extension__ ({";
1263   else
1264     OS << " {";
1265   emitNewLine();
1266 }
1267 
1268 void Intrinsic::emitClosingBrace() {
1269   if (UseMacro)
1270     OS << "})";
1271   else
1272     OS << "}";
1273 }
1274 
1275 void Intrinsic::emitNewLine() {
1276   if (UseMacro)
1277     OS << " \\\n";
1278   else
1279     OS << "\n";
1280 }
1281 
1282 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1283   if (Dest.getType().getNumVectors() > 1) {
1284     emitNewLine();
1285 
1286     for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
1287       OS << "  " << Dest.getName() << ".val[" << K << "] = "
1288          << "__builtin_shufflevector("
1289          << Src.getName() << ".val[" << K << "], "
1290          << Src.getName() << ".val[" << K << "]";
1291       for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
1292         OS << ", " << J;
1293       OS << ");";
1294       emitNewLine();
1295     }
1296   } else {
1297     OS << "  " << Dest.getName()
1298        << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
1299     for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
1300       OS << ", " << J;
1301     OS << ");";
1302     emitNewLine();
1303   }
1304 }
1305 
1306 void Intrinsic::emitArgumentReversal() {
1307   if (isBigEndianSafe())
1308     return;
1309 
1310   // Reverse all vector arguments.
1311   for (unsigned I = 0; I < getNumParams(); ++I) {
1312     std::string Name = "p" + utostr(I);
1313     std::string NewName = "rev" + utostr(I);
1314 
1315     Variable &V = Variables[Name];
1316     Variable NewV(V.getType(), NewName + VariablePostfix);
1317 
1318     if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
1319       continue;
1320 
1321     OS << "  " << NewV.getType().str() << " " << NewV.getName() << ";";
1322     emitReverseVariable(NewV, V);
1323     V = NewV;
1324   }
1325 }
1326 
1327 void Intrinsic::emitReturnReversal() {
1328   if (isBigEndianSafe())
1329     return;
1330   if (!getReturnType().isVector() || getReturnType().isVoid() ||
1331       getReturnType().getNumElements() == 1)
1332     return;
1333   emitReverseVariable(RetVar, RetVar);
1334 }
1335 
1336 void Intrinsic::emitShadowedArgs() {
1337   // Macro arguments are not type-checked like inline function arguments,
1338   // so assign them to local temporaries to get the right type checking.
1339   if (!UseMacro)
1340     return;
1341 
1342   for (unsigned I = 0; I < getNumParams(); ++I) {
1343     // Do not create a temporary for an immediate argument.
1344     // That would defeat the whole point of using a macro!
1345     if (hasImmediate() && Proto[I+1] == 'i')
1346       continue;
1347     // Do not create a temporary for pointer arguments. The input
1348     // pointer may have an alignment hint.
1349     if (getParamType(I).isPointer())
1350       continue;
1351 
1352     std::string Name = "p" + utostr(I);
1353 
1354     assert(Variables.find(Name) != Variables.end());
1355     Variable &V = Variables[Name];
1356 
1357     std::string NewName = "s" + utostr(I);
1358     Variable V2(V.getType(), NewName + VariablePostfix);
1359 
1360     OS << "  " << V2.getType().str() << " " << V2.getName() << " = "
1361        << V.getName() << ";";
1362     emitNewLine();
1363 
1364     V = V2;
1365   }
1366 }
1367 
1368 // We don't check 'a' in this function, because for builtin function the
1369 // argument matching to 'a' uses a vector type splatted from a scalar type.
1370 bool Intrinsic::protoHasScalar() const {
1371   return (Proto.find('s') != std::string::npos ||
1372           Proto.find('z') != std::string::npos ||
1373           Proto.find('r') != std::string::npos ||
1374           Proto.find('b') != std::string::npos ||
1375           Proto.find('$') != std::string::npos ||
1376           Proto.find('y') != std::string::npos ||
1377           Proto.find('o') != std::string::npos);
1378 }
1379 
1380 void Intrinsic::emitBodyAsBuiltinCall() {
1381   std::string S;
1382 
1383   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1384   // sret-like argument.
1385   bool SRet = getReturnType().getNumVectors() >= 2;
1386 
1387   StringRef N = Name;
1388   if (hasSplat()) {
1389     // Call the non-splat builtin: chop off the "_n" suffix from the name.
1390     assert(N.endswith("_n"));
1391     N = N.drop_back(2);
1392   }
1393 
1394   ClassKind LocalCK = CK;
1395   if (!protoHasScalar())
1396     LocalCK = ClassB;
1397 
1398   if (!getReturnType().isVoid() && !SRet)
1399     S += "(" + RetVar.getType().str() + ") ";
1400 
1401   S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";
1402 
1403   if (SRet)
1404     S += "&" + RetVar.getName() + ", ";
1405 
1406   for (unsigned I = 0; I < getNumParams(); ++I) {
1407     Variable &V = Variables["p" + utostr(I)];
1408     Type T = V.getType();
1409 
1410     // Handle multiple-vector values specially, emitting each subvector as an
1411     // argument to the builtin.
1412     if (T.getNumVectors() > 1) {
1413       // Check if an explicit cast is needed.
1414       std::string Cast;
1415       if (T.isChar() || T.isPoly() || !T.isSigned()) {
1416         Type T2 = T;
1417         T2.makeOneVector();
1418         T2.makeInteger(8, /*Signed=*/true);
1419         Cast = "(" + T2.str() + ")";
1420       }
1421 
1422       for (unsigned J = 0; J < T.getNumVectors(); ++J)
1423         S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
1424       continue;
1425     }
1426 
1427     std::string Arg;
1428     Type CastToType = T;
1429     if (hasSplat() && I == getSplatIdx()) {
1430       Arg = "(" + BaseType.str() + ") {";
1431       for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
1432         if (J != 0)
1433           Arg += ", ";
1434         Arg += V.getName();
1435       }
1436       Arg += "}";
1437 
1438       CastToType = BaseType;
1439     } else {
1440       Arg = V.getName();
1441     }
1442 
1443     // Check if an explicit cast is needed.
1444     if (CastToType.isVector()) {
1445       CastToType.makeInteger(8, true);
1446       Arg = "(" + CastToType.str() + ")" + Arg;
1447     }
1448 
1449     S += Arg + ", ";
1450   }
1451 
1452   // Extra constant integer to hold type class enum for this function, e.g. s8
1453   if (getClassKind(true) == ClassB) {
1454     Type ThisTy = getReturnType();
1455     if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0]))
1456       ThisTy = getParamType(0);
1457     if (ThisTy.isPointer())
1458       ThisTy = getParamType(1);
1459 
1460     S += utostr(ThisTy.getNeonEnum());
1461   } else {
1462     // Remove extraneous ", ".
1463     S.pop_back();
1464     S.pop_back();
1465   }
1466   S += ");";
1467 
1468   std::string RetExpr;
1469   if (!SRet && !RetVar.getType().isVoid())
1470     RetExpr = RetVar.getName() + " = ";
1471 
1472   OS << "  " << RetExpr << S;
1473   emitNewLine();
1474 }
1475 
1476 void Intrinsic::emitBody(StringRef CallPrefix) {
1477   std::vector<std::string> Lines;
1478 
1479   assert(RetVar.getType() == Types[0]);
1480   // Create a return variable, if we're not void.
1481   if (!RetVar.getType().isVoid()) {
1482     OS << "  " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1483     emitNewLine();
1484   }
1485 
1486   if (!Body || Body->getValues().empty()) {
1487     // Nothing specific to output - must output a builtin.
1488     emitBodyAsBuiltinCall();
1489     return;
1490   }
1491 
1492   // We have a list of "things to output". The last should be returned.
1493   for (auto *I : Body->getValues()) {
1494     if (StringInit *SI = dyn_cast<StringInit>(I)) {
1495       Lines.push_back(replaceParamsIn(SI->getAsString()));
1496     } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
1497       DagEmitter DE(*this, CallPrefix);
1498       Lines.push_back(DE.emitDag(DI).second + ";");
1499     }
1500   }
1501 
1502   assert(!Lines.empty() && "Empty def?");
1503   if (!RetVar.getType().isVoid())
1504     Lines.back().insert(0, RetVar.getName() + " = ");
1505 
1506   for (auto &L : Lines) {
1507     OS << "  " << L;
1508     emitNewLine();
1509   }
1510 }
1511 
1512 void Intrinsic::emitReturn() {
1513   if (RetVar.getType().isVoid())
1514     return;
1515   if (UseMacro)
1516     OS << "  " << RetVar.getName() << ";";
1517   else
1518     OS << "  return " << RetVar.getName() << ";";
1519   emitNewLine();
1520 }
1521 
1522 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
1523   // At this point we should only be seeing a def.
1524   DefInit *DefI = cast<DefInit>(DI->getOperator());
1525   std::string Op = DefI->getAsString();
1526 
1527   if (Op == "cast" || Op == "bitcast")
1528     return emitDagCast(DI, Op == "bitcast");
1529   if (Op == "shuffle")
1530     return emitDagShuffle(DI);
1531   if (Op == "dup")
1532     return emitDagDup(DI);
1533   if (Op == "dup_typed")
1534     return emitDagDupTyped(DI);
1535   if (Op == "splat")
1536     return emitDagSplat(DI);
1537   if (Op == "save_temp")
1538     return emitDagSaveTemp(DI);
1539   if (Op == "op")
1540     return emitDagOp(DI);
1541   if (Op == "call")
1542     return emitDagCall(DI);
1543   if (Op == "name_replace")
1544     return emitDagNameReplace(DI);
1545   if (Op == "literal")
1546     return emitDagLiteral(DI);
1547   assert_with_loc(false, "Unknown operation!");
1548   return std::make_pair(Type::getVoid(), "");
1549 }
1550 
1551 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
1552   std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1553   if (DI->getNumArgs() == 2) {
1554     // Unary op.
1555     std::pair<Type, std::string> R =
1556         emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
1557     return std::make_pair(R.first, Op + R.second);
1558   } else {
1559     assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
1560     std::pair<Type, std::string> R1 =
1561         emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
1562     std::pair<Type, std::string> R2 =
1563         emitDagArg(DI->getArg(2), DI->getArgNameStr(2));
1564     assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
1565     return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
1566   }
1567 }
1568 
1569 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
1570   std::vector<Type> Types;
1571   std::vector<std::string> Values;
1572   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1573     std::pair<Type, std::string> R =
1574         emitDagArg(DI->getArg(I + 1), DI->getArgNameStr(I + 1));
1575     Types.push_back(R.first);
1576     Values.push_back(R.second);
1577   }
1578 
1579   // Look up the called intrinsic.
1580   std::string N;
1581   if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
1582     N = SI->getAsUnquotedString();
1583   else
1584     N = emitDagArg(DI->getArg(0), "").second;
1585   Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types);
1586 
1587   // Make sure the callee is known as an early def.
1588   Callee.setNeededEarly();
1589   Intr.Dependencies.insert(&Callee);
1590 
1591   // Now create the call itself.
1592   std::string S = "";
1593   if (!Callee.isBigEndianSafe())
1594     S += CallPrefix.str();
1595   S += Callee.getMangledName(true) + "(";
1596   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1597     if (I != 0)
1598       S += ", ";
1599     S += Values[I];
1600   }
1601   S += ")";
1602 
1603   return std::make_pair(Callee.getReturnType(), S);
1604 }
1605 
1606 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
1607                                                                 bool IsBitCast){
1608   // (cast MOD* VAL) -> cast VAL to type given by MOD.
1609   std::pair<Type, std::string> R = emitDagArg(
1610       DI->getArg(DI->getNumArgs() - 1),
1611       DI->getArgNameStr(DI->getNumArgs() - 1));
1612   Type castToType = R.first;
1613   for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
1614 
1615     // MOD can take several forms:
1616     //   1. $X - take the type of parameter / variable X.
1617     //   2. The value "R" - take the type of the return type.
1618     //   3. a type string
1619     //   4. The value "U" or "S" to switch the signedness.
1620     //   5. The value "H" or "D" to half or double the bitwidth.
1621     //   6. The value "8" to convert to 8-bit (signed) integer lanes.
1622     if (!DI->getArgNameStr(ArgIdx).empty()) {
1623       assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) !=
1624                       Intr.Variables.end(),
1625                       "Variable not found");
1626       castToType = Intr.Variables[DI->getArgNameStr(ArgIdx)].getType();
1627     } else {
1628       StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
1629       assert_with_loc(SI, "Expected string type or $Name for cast type");
1630 
1631       if (SI->getAsUnquotedString() == "R") {
1632         castToType = Intr.getReturnType();
1633       } else if (SI->getAsUnquotedString() == "U") {
1634         castToType.makeUnsigned();
1635       } else if (SI->getAsUnquotedString() == "S") {
1636         castToType.makeSigned();
1637       } else if (SI->getAsUnquotedString() == "H") {
1638         castToType.halveLanes();
1639       } else if (SI->getAsUnquotedString() == "D") {
1640         castToType.doubleLanes();
1641       } else if (SI->getAsUnquotedString() == "8") {
1642         castToType.makeInteger(8, true);
1643       } else {
1644         castToType = Type::fromTypedefName(SI->getAsUnquotedString());
1645         assert_with_loc(!castToType.isVoid(), "Unknown typedef");
1646       }
1647     }
1648   }
1649 
1650   std::string S;
1651   if (IsBitCast) {
1652     // Emit a reinterpret cast. The second operand must be an lvalue, so create
1653     // a temporary.
1654     std::string N = "reint";
1655     unsigned I = 0;
1656     while (Intr.Variables.find(N) != Intr.Variables.end())
1657       N = "reint" + utostr(++I);
1658     Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
1659 
1660     Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
1661             << R.second << ";";
1662     Intr.emitNewLine();
1663 
1664     S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
1665   } else {
1666     // Emit a normal (static) cast.
1667     S = "(" + castToType.str() + ")(" + R.second + ")";
1668   }
1669 
1670   return std::make_pair(castToType, S);
1671 }
1672 
1673 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
1674   // See the documentation in arm_neon.td for a description of these operators.
1675   class LowHalf : public SetTheory::Operator {
1676   public:
1677     void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1678                ArrayRef<SMLoc> Loc) override {
1679       SetTheory::RecSet Elts2;
1680       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1681       Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
1682     }
1683   };
1684 
1685   class HighHalf : public SetTheory::Operator {
1686   public:
1687     void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1688                ArrayRef<SMLoc> Loc) override {
1689       SetTheory::RecSet Elts2;
1690       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1691       Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
1692     }
1693   };
1694 
1695   class Rev : public SetTheory::Operator {
1696     unsigned ElementSize;
1697 
1698   public:
1699     Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1700 
1701     void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1702                ArrayRef<SMLoc> Loc) override {
1703       SetTheory::RecSet Elts2;
1704       ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
1705 
1706       int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
1707       VectorSize /= ElementSize;
1708 
1709       std::vector<Record *> Revved;
1710       for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
1711         for (int LI = VectorSize - 1; LI >= 0; --LI) {
1712           Revved.push_back(Elts2[VI + LI]);
1713         }
1714       }
1715 
1716       Elts.insert(Revved.begin(), Revved.end());
1717     }
1718   };
1719 
1720   class MaskExpander : public SetTheory::Expander {
1721     unsigned N;
1722 
1723   public:
1724     MaskExpander(unsigned N) : N(N) {}
1725 
1726     void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
1727       unsigned Addend = 0;
1728       if (R->getName() == "mask0")
1729         Addend = 0;
1730       else if (R->getName() == "mask1")
1731         Addend = N;
1732       else
1733         return;
1734       for (unsigned I = 0; I < N; ++I)
1735         Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
1736     }
1737   };
1738 
1739   // (shuffle arg1, arg2, sequence)
1740   std::pair<Type, std::string> Arg1 =
1741       emitDagArg(DI->getArg(0), DI->getArgNameStr(0));
1742   std::pair<Type, std::string> Arg2 =
1743       emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
1744   assert_with_loc(Arg1.first == Arg2.first,
1745                   "Different types in arguments to shuffle!");
1746 
1747   SetTheory ST;
1748   SetTheory::RecSet Elts;
1749   ST.addOperator("lowhalf", std::make_unique<LowHalf>());
1750   ST.addOperator("highhalf", std::make_unique<HighHalf>());
1751   ST.addOperator("rev",
1752                  std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
1753   ST.addExpander("MaskExpand",
1754                  std::make_unique<MaskExpander>(Arg1.first.getNumElements()));
1755   ST.evaluate(DI->getArg(2), Elts, None);
1756 
1757   std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1758   for (auto &E : Elts) {
1759     StringRef Name = E->getName();
1760     assert_with_loc(Name.startswith("sv"),
1761                     "Incorrect element kind in shuffle mask!");
1762     S += ", " + Name.drop_front(2).str();
1763   }
1764   S += ")";
1765 
1766   // Recalculate the return type - the shuffle may have halved or doubled it.
1767   Type T(Arg1.first);
1768   if (Elts.size() > T.getNumElements()) {
1769     assert_with_loc(
1770         Elts.size() == T.getNumElements() * 2,
1771         "Can only double or half the number of elements in a shuffle!");
1772     T.doubleLanes();
1773   } else if (Elts.size() < T.getNumElements()) {
1774     assert_with_loc(
1775         Elts.size() == T.getNumElements() / 2,
1776         "Can only double or half the number of elements in a shuffle!");
1777     T.halveLanes();
1778   }
1779 
1780   return std::make_pair(T, S);
1781 }
1782 
1783 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
1784   assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
1785   std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
1786                                               DI->getArgNameStr(0));
1787   assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
1788 
1789   Type T = Intr.getBaseType();
1790   assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
1791   std::string S = "(" + T.str() + ") {";
1792   for (unsigned I = 0; I < T.getNumElements(); ++I) {
1793     if (I != 0)
1794       S += ", ";
1795     S += A.second;
1796   }
1797   S += "}";
1798 
1799   return std::make_pair(T, S);
1800 }
1801 
1802 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) {
1803   assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
1804   std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
1805                                               DI->getArgNameStr(0));
1806   std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
1807                                               DI->getArgNameStr(1));
1808   assert_with_loc(B.first.isScalar(),
1809                   "dup_typed() requires a scalar as the second argument");
1810 
1811   Type T = A.first;
1812   assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");
1813   std::string S = "(" + T.str() + ") {";
1814   for (unsigned I = 0; I < T.getNumElements(); ++I) {
1815     if (I != 0)
1816       S += ", ";
1817     S += B.second;
1818   }
1819   S += "}";
1820 
1821   return std::make_pair(T, S);
1822 }
1823 
1824 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
1825   assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
1826   std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
1827                                               DI->getArgNameStr(0));
1828   std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
1829                                               DI->getArgNameStr(1));
1830 
1831   assert_with_loc(B.first.isScalar(),
1832                   "splat() requires a scalar int as the second argument");
1833 
1834   std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1835   for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
1836     S += ", " + B.second;
1837   }
1838   S += ")";
1839 
1840   return std::make_pair(Intr.getBaseType(), S);
1841 }
1842 
1843 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
1844   assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
1845   std::pair<Type, std::string> A = emitDagArg(DI->getArg(1),
1846                                               DI->getArgNameStr(1));
1847 
1848   assert_with_loc(!A.first.isVoid(),
1849                   "Argument to save_temp() must have non-void type!");
1850 
1851   std::string N = DI->getArgNameStr(0);
1852   assert_with_loc(!N.empty(),
1853                   "save_temp() expects a name as the first argument");
1854 
1855   assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
1856                   "Variable already defined!");
1857   Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
1858 
1859   std::string S =
1860       A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
1861 
1862   return std::make_pair(Type::getVoid(), S);
1863 }
1864 
1865 std::pair<Type, std::string>
1866 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
1867   std::string S = Intr.Name;
1868 
1869   assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
1870   std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1871   std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1872 
1873   size_t Idx = S.find(ToReplace);
1874 
1875   assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
1876   S.replace(Idx, ToReplace.size(), ReplaceWith);
1877 
1878   return std::make_pair(Type::getVoid(), S);
1879 }
1880 
1881 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
1882   std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1883   std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1884   return std::make_pair(Type::fromTypedefName(Ty), Value);
1885 }
1886 
1887 std::pair<Type, std::string>
1888 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
1889   if (!ArgName.empty()) {
1890     assert_with_loc(!Arg->isComplete(),
1891                     "Arguments must either be DAGs or names, not both!");
1892     assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
1893                     "Variable not defined!");
1894     Variable &V = Intr.Variables[ArgName];
1895     return std::make_pair(V.getType(), V.getName());
1896   }
1897 
1898   assert(Arg && "Neither ArgName nor Arg?!");
1899   DagInit *DI = dyn_cast<DagInit>(Arg);
1900   assert_with_loc(DI, "Arguments must either be DAGs or names!");
1901 
1902   return emitDag(DI);
1903 }
1904 
1905 std::string Intrinsic::generate() {
1906   // Avoid duplicated code for big and little endian
1907   if (isBigEndianSafe()) {
1908     generateImpl(false, "", "");
1909     return OS.str();
1910   }
1911   // Little endian intrinsics are simple and don't require any argument
1912   // swapping.
1913   OS << "#ifdef __LITTLE_ENDIAN__\n";
1914 
1915   generateImpl(false, "", "");
1916 
1917   OS << "#else\n";
1918 
1919   // Big endian intrinsics are more complex. The user intended these
1920   // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
1921   // but we load as-if (V)LD1. So we should swap all arguments and
1922   // swap the return value too.
1923   //
1924   // If we call sub-intrinsics, we should call a version that does
1925   // not re-swap the arguments!
1926   generateImpl(true, "", "__noswap_");
1927 
1928   // If we're needed early, create a non-swapping variant for
1929   // big-endian.
1930   if (NeededEarly) {
1931     generateImpl(false, "__noswap_", "__noswap_");
1932   }
1933   OS << "#endif\n\n";
1934 
1935   return OS.str();
1936 }
1937 
1938 void Intrinsic::generateImpl(bool ReverseArguments,
1939                              StringRef NamePrefix, StringRef CallPrefix) {
1940   CurrentRecord = R;
1941 
1942   // If we call a macro, our local variables may be corrupted due to
1943   // lack of proper lexical scoping. So, add a globally unique postfix
1944   // to every variable.
1945   //
1946   // indexBody() should have set up the Dependencies set by now.
1947   for (auto *I : Dependencies)
1948     if (I->UseMacro) {
1949       VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
1950       break;
1951     }
1952 
1953   initVariables();
1954 
1955   emitPrototype(NamePrefix);
1956 
1957   if (IsUnavailable) {
1958     OS << " __attribute__((unavailable));";
1959   } else {
1960     emitOpeningBrace();
1961     emitShadowedArgs();
1962     if (ReverseArguments)
1963       emitArgumentReversal();
1964     emitBody(CallPrefix);
1965     if (ReverseArguments)
1966       emitReturnReversal();
1967     emitReturn();
1968     emitClosingBrace();
1969   }
1970   OS << "\n";
1971 
1972   CurrentRecord = nullptr;
1973 }
1974 
1975 void Intrinsic::indexBody() {
1976   CurrentRecord = R;
1977 
1978   initVariables();
1979   emitBody("");
1980   OS.str("");
1981 
1982   CurrentRecord = nullptr;
1983 }
1984 
1985 //===----------------------------------------------------------------------===//
1986 // NeonEmitter implementation
1987 //===----------------------------------------------------------------------===//
1988 
1989 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
1990   // First, look up the name in the intrinsic map.
1991   assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
1992                   ("Intrinsic '" + Name + "' not found!").str());
1993   auto &V = IntrinsicMap.find(Name.str())->second;
1994   std::vector<Intrinsic *> GoodVec;
1995 
1996   // Create a string to print if we end up failing.
1997   std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1998   for (unsigned I = 0; I < Types.size(); ++I) {
1999     if (I != 0)
2000       ErrMsg += ", ";
2001     ErrMsg += Types[I].str();
2002   }
2003   ErrMsg += ")'\n";
2004   ErrMsg += "Available overloads:\n";
2005 
2006   // Now, look through each intrinsic implementation and see if the types are
2007   // compatible.
2008   for (auto &I : V) {
2009     ErrMsg += "  - " + I.getReturnType().str() + " " + I.getMangledName();
2010     ErrMsg += "(";
2011     for (unsigned A = 0; A < I.getNumParams(); ++A) {
2012       if (A != 0)
2013         ErrMsg += ", ";
2014       ErrMsg += I.getParamType(A).str();
2015     }
2016     ErrMsg += ")\n";
2017 
2018     if (I.getNumParams() != Types.size())
2019       continue;
2020 
2021     bool Good = true;
2022     for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
2023       if (I.getParamType(Arg) != Types[Arg]) {
2024         Good = false;
2025         break;
2026       }
2027     }
2028     if (Good)
2029       GoodVec.push_back(&I);
2030   }
2031 
2032   assert_with_loc(!GoodVec.empty(),
2033                   "No compatible intrinsic found - " + ErrMsg);
2034   assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
2035 
2036   return *GoodVec.front();
2037 }
2038 
2039 void NeonEmitter::createIntrinsic(Record *R,
2040                                   SmallVectorImpl<Intrinsic *> &Out) {
2041   std::string Name = R->getValueAsString("Name");
2042   std::string Proto = R->getValueAsString("Prototype");
2043   std::string Types = R->getValueAsString("Types");
2044   Record *OperationRec = R->getValueAsDef("Operation");
2045   bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
2046   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
2047   std::string Guard = R->getValueAsString("ArchGuard");
2048   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
2049 
2050   // Set the global current record. This allows assert_with_loc to produce
2051   // decent location information even when highly nested.
2052   CurrentRecord = R;
2053 
2054   ListInit *Body = OperationRec->getValueAsListInit("Ops");
2055 
2056   std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
2057 
2058   ClassKind CK = ClassNone;
2059   if (R->getSuperClasses().size() >= 2)
2060     CK = ClassMap[R->getSuperClasses()[1].first];
2061 
2062   std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
2063   for (auto TS : TypeSpecs) {
2064     if (CartesianProductOfTypes) {
2065       Type DefaultT(TS, 'd');
2066       for (auto SrcTS : TypeSpecs) {
2067         Type DefaultSrcT(SrcTS, 'd');
2068         if (TS == SrcTS ||
2069             DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
2070           continue;
2071         NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
2072       }
2073     } else {
2074       NewTypeSpecs.push_back(std::make_pair(TS, TS));
2075     }
2076   }
2077 
2078   llvm::sort(NewTypeSpecs);
2079   NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
2080 		     NewTypeSpecs.end());
2081   auto &Entry = IntrinsicMap[Name];
2082 
2083   for (auto &I : NewTypeSpecs) {
2084     Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
2085                        Guard, IsUnavailable, BigEndianSafe);
2086     Out.push_back(&Entry.back());
2087   }
2088 
2089   CurrentRecord = nullptr;
2090 }
2091 
2092 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2093 /// declaration of builtins, checking for unique builtin declarations.
2094 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2095                                  SmallVectorImpl<Intrinsic *> &Defs) {
2096   OS << "#ifdef GET_NEON_BUILTINS\n";
2097 
2098   // We only want to emit a builtin once, and we want to emit them in
2099   // alphabetical order, so use a std::set.
2100   std::set<std::string> Builtins;
2101 
2102   for (auto *Def : Defs) {
2103     if (Def->hasBody())
2104       continue;
2105     // Functions with 'a' (the splat code) in the type prototype should not get
2106     // their own builtin as they use the non-splat variant.
2107     if (Def->hasSplat())
2108       continue;
2109 
2110     std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
2111 
2112     S += Def->getBuiltinTypeStr();
2113     S += "\", \"n\")";
2114 
2115     Builtins.insert(S);
2116   }
2117 
2118   for (auto &S : Builtins)
2119     OS << S << "\n";
2120   OS << "#endif\n\n";
2121 }
2122 
2123 /// Generate the ARM and AArch64 overloaded type checking code for
2124 /// SemaChecking.cpp, checking for unique builtin declarations.
2125 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2126                                            SmallVectorImpl<Intrinsic *> &Defs) {
2127   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2128 
2129   // We record each overload check line before emitting because subsequent Inst
2130   // definitions may extend the number of permitted types (i.e. augment the
2131   // Mask). Use std::map to avoid sorting the table by hash number.
2132   struct OverloadInfo {
2133     uint64_t Mask;
2134     int PtrArgNum;
2135     bool HasConstPtr;
2136     OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
2137   };
2138   std::map<std::string, OverloadInfo> OverloadMap;
2139 
2140   for (auto *Def : Defs) {
2141     // If the def has a body (that is, it has Operation DAGs), it won't call
2142     // __builtin_neon_* so we don't need to generate a definition for it.
2143     if (Def->hasBody())
2144       continue;
2145     // Functions with 'a' (the splat code) in the type prototype should not get
2146     // their own builtin as they use the non-splat variant.
2147     if (Def->hasSplat())
2148       continue;
2149     // Functions which have a scalar argument cannot be overloaded, no need to
2150     // check them if we are emitting the type checking code.
2151     if (Def->protoHasScalar())
2152       continue;
2153 
2154     uint64_t Mask = 0ULL;
2155     Type Ty = Def->getReturnType();
2156     if (Def->getProto()[0] == 'v' ||
2157         isFloatingPointProtoModifier(Def->getProto()[0]))
2158       Ty = Def->getParamType(0);
2159     if (Ty.isPointer())
2160       Ty = Def->getParamType(1);
2161 
2162     Mask |= 1ULL << Ty.getNeonEnum();
2163 
2164     // Check if the function has a pointer or const pointer argument.
2165     std::string Proto = Def->getProto();
2166     int PtrArgNum = -1;
2167     bool HasConstPtr = false;
2168     for (unsigned I = 0; I < Def->getNumParams(); ++I) {
2169       char ArgType = Proto[I + 1];
2170       if (ArgType == 'c') {
2171         HasConstPtr = true;
2172         PtrArgNum = I;
2173         break;
2174       }
2175       if (ArgType == 'p') {
2176         PtrArgNum = I;
2177         break;
2178       }
2179     }
2180     // For sret builtins, adjust the pointer argument index.
2181     if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
2182       PtrArgNum += 1;
2183 
2184     std::string Name = Def->getName();
2185     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2186     // and vst1_lane intrinsics.  Using a pointer to the vector element
2187     // type with one of those operations causes codegen to select an aligned
2188     // load/store instruction.  If you want an unaligned operation,
2189     // the pointer argument needs to have less alignment than element type,
2190     // so just accept any pointer type.
2191     if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
2192       PtrArgNum = -1;
2193       HasConstPtr = false;
2194     }
2195 
2196     if (Mask) {
2197       std::string Name = Def->getMangledName();
2198       OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
2199       OverloadInfo &OI = OverloadMap[Name];
2200       OI.Mask |= Mask;
2201       OI.PtrArgNum |= PtrArgNum;
2202       OI.HasConstPtr = HasConstPtr;
2203     }
2204   }
2205 
2206   for (auto &I : OverloadMap) {
2207     OverloadInfo &OI = I.second;
2208 
2209     OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2210     OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
2211     if (OI.PtrArgNum >= 0)
2212       OS << "; PtrArgNum = " << OI.PtrArgNum;
2213     if (OI.HasConstPtr)
2214       OS << "; HasConstPtr = true";
2215     OS << "; break;\n";
2216   }
2217   OS << "#endif\n\n";
2218 }
2219 
2220 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2221                                         SmallVectorImpl<Intrinsic *> &Defs) {
2222   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2223 
2224   std::set<std::string> Emitted;
2225 
2226   for (auto *Def : Defs) {
2227     if (Def->hasBody())
2228       continue;
2229     // Functions with 'a' (the splat code) in the type prototype should not get
2230     // their own builtin as they use the non-splat variant.
2231     if (Def->hasSplat())
2232       continue;
2233     // Functions which do not have an immediate do not need to have range
2234     // checking code emitted.
2235     if (!Def->hasImmediate())
2236       continue;
2237     if (Emitted.find(Def->getMangledName()) != Emitted.end())
2238       continue;
2239 
2240     std::string LowerBound, UpperBound;
2241 
2242     Record *R = Def->getRecord();
2243     if (R->getValueAsBit("isVCVT_N")) {
2244       // VCVT between floating- and fixed-point values takes an immediate
2245       // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
2246       LowerBound = "1";
2247 	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
2248 		  Def->getName().find('h') != std::string::npos)
2249 		// VCVTh operating on FP16 intrinsics in range [1, 16)
2250 		UpperBound = "15";
2251 	  else if (Def->getBaseType().getElementSizeInBits() == 32)
2252         UpperBound = "31";
2253 	  else
2254         UpperBound = "63";
2255     } else if (R->getValueAsBit("isScalarShift")) {
2256       // Right shifts have an 'r' in the name, left shifts do not. Convert
2257       // instructions have the same bounds and right shifts.
2258       if (Def->getName().find('r') != std::string::npos ||
2259           Def->getName().find("cvt") != std::string::npos)
2260         LowerBound = "1";
2261 
2262       UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
2263     } else if (R->getValueAsBit("isShift")) {
2264       // Builtins which are overloaded by type will need to have their upper
2265       // bound computed at Sema time based on the type constant.
2266 
2267       // Right shifts have an 'r' in the name, left shifts do not.
2268       if (Def->getName().find('r') != std::string::npos)
2269         LowerBound = "1";
2270       UpperBound = "RFT(TV, true)";
2271     } else if (Def->getClassKind(true) == ClassB) {
2272       // ClassB intrinsics have a type (and hence lane number) that is only
2273       // known at runtime.
2274       if (R->getValueAsBit("isLaneQ"))
2275         UpperBound = "RFT(TV, false, true)";
2276       else
2277         UpperBound = "RFT(TV, false, false)";
2278     } else {
2279       // The immediate generally refers to a lane in the preceding argument.
2280       assert(Def->getImmediateIdx() > 0);
2281       Type T = Def->getParamType(Def->getImmediateIdx() - 1);
2282       UpperBound = utostr(T.getNumElements() - 1);
2283     }
2284 
2285     // Calculate the index of the immediate that should be range checked.
2286     unsigned Idx = Def->getNumParams();
2287     if (Def->hasImmediate())
2288       Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
2289 
2290     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
2291        << "i = " << Idx << ";";
2292     if (!LowerBound.empty())
2293       OS << " l = " << LowerBound << ";";
2294     if (!UpperBound.empty())
2295       OS << " u = " << UpperBound << ";";
2296     OS << " break;\n";
2297 
2298     Emitted.insert(Def->getMangledName());
2299   }
2300 
2301   OS << "#endif\n\n";
2302 }
2303 
2304 /// runHeader - Emit a file with sections defining:
2305 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2306 /// 2. the SemaChecking code for the type overload checking.
2307 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2308 void NeonEmitter::runHeader(raw_ostream &OS) {
2309   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2310 
2311   SmallVector<Intrinsic *, 128> Defs;
2312   for (auto *R : RV)
2313     createIntrinsic(R, Defs);
2314 
2315   // Generate shared BuiltinsXXX.def
2316   genBuiltinsDef(OS, Defs);
2317 
2318   // Generate ARM overloaded type checking code for SemaChecking.cpp
2319   genOverloadTypeCheckCode(OS, Defs);
2320 
2321   // Generate ARM range checking code for shift/lane immediates.
2322   genIntrinsicRangeCheckCode(OS, Defs);
2323 }
2324 
2325 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2326 /// is comprised of type definitions and function declarations.
2327 void NeonEmitter::run(raw_ostream &OS) {
2328   OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2329         "------------------------------"
2330         "---===\n"
2331         " *\n"
2332         " * Permission is hereby granted, free of charge, to any person "
2333         "obtaining "
2334         "a copy\n"
2335         " * of this software and associated documentation files (the "
2336         "\"Software\"),"
2337         " to deal\n"
2338         " * in the Software without restriction, including without limitation "
2339         "the "
2340         "rights\n"
2341         " * to use, copy, modify, merge, publish, distribute, sublicense, "
2342         "and/or sell\n"
2343         " * copies of the Software, and to permit persons to whom the Software "
2344         "is\n"
2345         " * furnished to do so, subject to the following conditions:\n"
2346         " *\n"
2347         " * The above copyright notice and this permission notice shall be "
2348         "included in\n"
2349         " * all copies or substantial portions of the Software.\n"
2350         " *\n"
2351         " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2352         "EXPRESS OR\n"
2353         " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2354         "MERCHANTABILITY,\n"
2355         " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2356         "SHALL THE\n"
2357         " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2358         "OTHER\n"
2359         " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2360         "ARISING FROM,\n"
2361         " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2362         "DEALINGS IN\n"
2363         " * THE SOFTWARE.\n"
2364         " *\n"
2365         " *===-----------------------------------------------------------------"
2366         "---"
2367         "---===\n"
2368         " */\n\n";
2369 
2370   OS << "#ifndef __ARM_NEON_H\n";
2371   OS << "#define __ARM_NEON_H\n\n";
2372 
2373   OS << "#if !defined(__ARM_NEON)\n";
2374   OS << "#error \"NEON support not enabled\"\n";
2375   OS << "#endif\n\n";
2376 
2377   OS << "#include <stdint.h>\n\n";
2378 
2379   // Emit NEON-specific scalar typedefs.
2380   OS << "typedef float float32_t;\n";
2381   OS << "typedef __fp16 float16_t;\n";
2382 
2383   OS << "#ifdef __aarch64__\n";
2384   OS << "typedef double float64_t;\n";
2385   OS << "#endif\n\n";
2386 
2387   // For now, signedness of polynomial types depends on target
2388   OS << "#ifdef __aarch64__\n";
2389   OS << "typedef uint8_t poly8_t;\n";
2390   OS << "typedef uint16_t poly16_t;\n";
2391   OS << "typedef uint64_t poly64_t;\n";
2392   OS << "typedef __uint128_t poly128_t;\n";
2393   OS << "#else\n";
2394   OS << "typedef int8_t poly8_t;\n";
2395   OS << "typedef int16_t poly16_t;\n";
2396   OS << "#endif\n";
2397 
2398   // Emit Neon vector typedefs.
2399   std::string TypedefTypes(
2400       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2401   std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
2402 
2403   // Emit vector typedefs.
2404   bool InIfdef = false;
2405   for (auto &TS : TDTypeVec) {
2406     bool IsA64 = false;
2407     Type T(TS, 'd');
2408     if (T.isDouble() || (T.isPoly() && T.isLong()))
2409       IsA64 = true;
2410 
2411     if (InIfdef && !IsA64) {
2412       OS << "#endif\n";
2413       InIfdef = false;
2414     }
2415     if (!InIfdef && IsA64) {
2416       OS << "#ifdef __aarch64__\n";
2417       InIfdef = true;
2418     }
2419 
2420     if (T.isPoly())
2421       OS << "typedef __attribute__((neon_polyvector_type(";
2422     else
2423       OS << "typedef __attribute__((neon_vector_type(";
2424 
2425     Type T2 = T;
2426     T2.makeScalar();
2427     OS << T.getNumElements() << "))) ";
2428     OS << T2.str();
2429     OS << " " << T.str() << ";\n";
2430   }
2431   if (InIfdef)
2432     OS << "#endif\n";
2433   OS << "\n";
2434 
2435   // Emit struct typedefs.
2436   InIfdef = false;
2437   for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
2438     for (auto &TS : TDTypeVec) {
2439       bool IsA64 = false;
2440       Type T(TS, 'd');
2441       if (T.isDouble() || (T.isPoly() && T.isLong()))
2442         IsA64 = true;
2443 
2444       if (InIfdef && !IsA64) {
2445         OS << "#endif\n";
2446         InIfdef = false;
2447       }
2448       if (!InIfdef && IsA64) {
2449         OS << "#ifdef __aarch64__\n";
2450         InIfdef = true;
2451       }
2452 
2453       char M = '2' + (NumMembers - 2);
2454       Type VT(TS, M);
2455       OS << "typedef struct " << VT.str() << " {\n";
2456       OS << "  " << T.str() << " val";
2457       OS << "[" << NumMembers << "]";
2458       OS << ";\n} ";
2459       OS << VT.str() << ";\n";
2460       OS << "\n";
2461     }
2462   }
2463   if (InIfdef)
2464     OS << "#endif\n";
2465   OS << "\n";
2466 
2467   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2468         "__nodebug__))\n\n";
2469 
2470   SmallVector<Intrinsic *, 128> Defs;
2471   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2472   for (auto *R : RV)
2473     createIntrinsic(R, Defs);
2474 
2475   for (auto *I : Defs)
2476     I->indexBody();
2477 
2478   llvm::stable_sort(Defs, llvm::deref<std::less<>>());
2479 
2480   // Only emit a def when its requirements have been met.
2481   // FIXME: This loop could be made faster, but it's fast enough for now.
2482   bool MadeProgress = true;
2483   std::string InGuard;
2484   while (!Defs.empty() && MadeProgress) {
2485     MadeProgress = false;
2486 
2487     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2488          I != Defs.end(); /*No step*/) {
2489       bool DependenciesSatisfied = true;
2490       for (auto *II : (*I)->getDependencies()) {
2491         if (llvm::is_contained(Defs, II))
2492           DependenciesSatisfied = false;
2493       }
2494       if (!DependenciesSatisfied) {
2495         // Try the next one.
2496         ++I;
2497         continue;
2498       }
2499 
2500       // Emit #endif/#if pair if needed.
2501       if ((*I)->getGuard() != InGuard) {
2502         if (!InGuard.empty())
2503           OS << "#endif\n";
2504         InGuard = (*I)->getGuard();
2505         if (!InGuard.empty())
2506           OS << "#if " << InGuard << "\n";
2507       }
2508 
2509       // Actually generate the intrinsic code.
2510       OS << (*I)->generate();
2511 
2512       MadeProgress = true;
2513       I = Defs.erase(I);
2514     }
2515   }
2516   assert(Defs.empty() && "Some requirements were not satisfied!");
2517   if (!InGuard.empty())
2518     OS << "#endif\n";
2519 
2520   OS << "\n";
2521   OS << "#undef __ai\n\n";
2522   OS << "#endif /* __ARM_NEON_H */\n";
2523 }
2524 
2525 /// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
2526 /// is comprised of type definitions and function declarations.
2527 void NeonEmitter::runFP16(raw_ostream &OS) {
2528   OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
2529         "------------------------------"
2530         "---===\n"
2531         " *\n"
2532         " * Permission is hereby granted, free of charge, to any person "
2533         "obtaining a copy\n"
2534         " * of this software and associated documentation files (the "
2535 				"\"Software\"), to deal\n"
2536         " * in the Software without restriction, including without limitation "
2537 				"the rights\n"
2538         " * to use, copy, modify, merge, publish, distribute, sublicense, "
2539 				"and/or sell\n"
2540         " * copies of the Software, and to permit persons to whom the Software "
2541 				"is\n"
2542         " * furnished to do so, subject to the following conditions:\n"
2543         " *\n"
2544         " * The above copyright notice and this permission notice shall be "
2545         "included in\n"
2546         " * all copies or substantial portions of the Software.\n"
2547         " *\n"
2548         " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2549         "EXPRESS OR\n"
2550         " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2551         "MERCHANTABILITY,\n"
2552         " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2553         "SHALL THE\n"
2554         " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2555         "OTHER\n"
2556         " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2557         "ARISING FROM,\n"
2558         " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2559         "DEALINGS IN\n"
2560         " * THE SOFTWARE.\n"
2561         " *\n"
2562         " *===-----------------------------------------------------------------"
2563         "---"
2564         "---===\n"
2565         " */\n\n";
2566 
2567   OS << "#ifndef __ARM_FP16_H\n";
2568   OS << "#define __ARM_FP16_H\n\n";
2569 
2570   OS << "#include <stdint.h>\n\n";
2571 
2572   OS << "typedef __fp16 float16_t;\n";
2573 
2574   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2575         "__nodebug__))\n\n";
2576 
2577   SmallVector<Intrinsic *, 128> Defs;
2578   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2579   for (auto *R : RV)
2580     createIntrinsic(R, Defs);
2581 
2582   for (auto *I : Defs)
2583     I->indexBody();
2584 
2585   llvm::stable_sort(Defs, llvm::deref<std::less<>>());
2586 
2587   // Only emit a def when its requirements have been met.
2588   // FIXME: This loop could be made faster, but it's fast enough for now.
2589   bool MadeProgress = true;
2590   std::string InGuard;
2591   while (!Defs.empty() && MadeProgress) {
2592     MadeProgress = false;
2593 
2594     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2595          I != Defs.end(); /*No step*/) {
2596       bool DependenciesSatisfied = true;
2597       for (auto *II : (*I)->getDependencies()) {
2598         if (llvm::is_contained(Defs, II))
2599           DependenciesSatisfied = false;
2600       }
2601       if (!DependenciesSatisfied) {
2602         // Try the next one.
2603         ++I;
2604         continue;
2605       }
2606 
2607       // Emit #endif/#if pair if needed.
2608       if ((*I)->getGuard() != InGuard) {
2609         if (!InGuard.empty())
2610           OS << "#endif\n";
2611         InGuard = (*I)->getGuard();
2612         if (!InGuard.empty())
2613           OS << "#if " << InGuard << "\n";
2614       }
2615 
2616       // Actually generate the intrinsic code.
2617       OS << (*I)->generate();
2618 
2619       MadeProgress = true;
2620       I = Defs.erase(I);
2621     }
2622   }
2623   assert(Defs.empty() && "Some requirements were not satisfied!");
2624   if (!InGuard.empty())
2625     OS << "#endif\n";
2626 
2627   OS << "\n";
2628   OS << "#undef __ai\n\n";
2629   OS << "#endif /* __ARM_FP16_H */\n";
2630 }
2631 
2632 namespace clang {
2633 
2634 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2635   NeonEmitter(Records).run(OS);
2636 }
2637 
2638 void EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
2639   NeonEmitter(Records).runFP16(OS);
2640 }
2641 
2642 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2643   NeonEmitter(Records).runHeader(OS);
2644 }
2645 
2646 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2647   llvm_unreachable("Neon test generation no longer implemented!");
2648 }
2649 
2650 } // end namespace clang
2651