1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface.  See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors.  Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.  A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37 
38 enum OpKind {
39   OpNone,
40   OpUnavailable,
41   OpAdd,
42   OpAddl,
43   OpAddlHi,
44   OpAddw,
45   OpAddwHi,
46   OpSub,
47   OpSubl,
48   OpSublHi,
49   OpSubw,
50   OpSubwHi,
51   OpMul,
52   OpMla,
53   OpMlal,
54   OpMullHi,
55   OpMlalHi,
56   OpMls,
57   OpMlsl,
58   OpMlslHi,
59   OpMulN,
60   OpMlaN,
61   OpMlsN,
62   OpMlalN,
63   OpMlslN,
64   OpMulLane,
65   OpMullLane,
66   OpMlaLane,
67   OpMlsLane,
68   OpMlalLane,
69   OpMlslLane,
70   OpQDMullLane,
71   OpQDMlalLane,
72   OpQDMlslLane,
73   OpQDMulhLane,
74   OpQRDMulhLane,
75   OpEq,
76   OpGe,
77   OpLe,
78   OpGt,
79   OpLt,
80   OpNeg,
81   OpNot,
82   OpAnd,
83   OpOr,
84   OpXor,
85   OpAndNot,
86   OpOrNot,
87   OpCast,
88   OpConcat,
89   OpDup,
90   OpDupLane,
91   OpHi,
92   OpLo,
93   OpSelect,
94   OpRev16,
95   OpRev32,
96   OpRev64,
97   OpReinterpret,
98   OpAddhnHi,
99   OpRAddhnHi,
100   OpSubhnHi,
101   OpRSubhnHi,
102   OpAbdl,
103   OpAbdlHi,
104   OpAba,
105   OpAbal,
106   OpAbalHi,
107   OpQDMullHi,
108   OpQDMlalHi,
109   OpQDMlslHi,
110   OpDiv,
111   OpLongHi,
112   OpNarrowHi,
113   OpMovlHi
114 };
115 
116 enum ClassKind {
117   ClassNone,
118   ClassI,           // generic integer instruction, e.g., "i8" suffix
119   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
120   ClassW,           // width-specific instruction, e.g., "8" suffix
121   ClassB,           // bitcast arguments with enum argument to specify type
122   ClassL,           // Logical instructions which are op instructions
123                     // but we need to not emit any suffix for in our
124                     // tests.
125   ClassNoTest       // Instructions which we do not test since they are
126                     // not TRUE instructions.
127 };
128 
129 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
130 /// builtins.  These must be kept in sync with the flags in
131 /// include/clang/Basic/TargetBuiltins.h.
132 namespace {
133 class NeonTypeFlags {
134   enum {
135     EltTypeMask = 0xf,
136     UnsignedFlag = 0x10,
137     QuadFlag = 0x20
138   };
139   uint32_t Flags;
140 
141 public:
142   enum EltType {
143     Int8,
144     Int16,
145     Int32,
146     Int64,
147     Poly8,
148     Poly16,
149     Float16,
150     Float32,
151     Float64
152   };
153 
154   NeonTypeFlags(unsigned F) : Flags(F) {}
155   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
156     if (IsUnsigned)
157       Flags |= UnsignedFlag;
158     if (IsQuad)
159       Flags |= QuadFlag;
160   }
161 
162   uint32_t getFlags() const { return Flags; }
163 };
164 } // end anonymous namespace
165 
166 namespace {
167 class NeonEmitter {
168   RecordKeeper &Records;
169   StringMap<OpKind> OpMap;
170   DenseMap<Record*, ClassKind> ClassMap;
171 
172 public:
173   NeonEmitter(RecordKeeper &R) : Records(R) {
174     OpMap["OP_NONE"]  = OpNone;
175     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
176     OpMap["OP_ADD"]   = OpAdd;
177     OpMap["OP_ADDL"]  = OpAddl;
178     OpMap["OP_ADDLHi"] = OpAddlHi;
179     OpMap["OP_ADDW"]  = OpAddw;
180     OpMap["OP_ADDWHi"] = OpAddwHi;
181     OpMap["OP_SUB"]   = OpSub;
182     OpMap["OP_SUBL"]  = OpSubl;
183     OpMap["OP_SUBLHi"] = OpSublHi;
184     OpMap["OP_SUBW"]  = OpSubw;
185     OpMap["OP_SUBWHi"] = OpSubwHi;
186     OpMap["OP_MUL"]   = OpMul;
187     OpMap["OP_MLA"]   = OpMla;
188     OpMap["OP_MLAL"]  = OpMlal;
189     OpMap["OP_MULLHi"]  = OpMullHi;
190     OpMap["OP_MLALHi"]  = OpMlalHi;
191     OpMap["OP_MLS"]   = OpMls;
192     OpMap["OP_MLSL"]  = OpMlsl;
193     OpMap["OP_MLSLHi"] = OpMlslHi;
194     OpMap["OP_MUL_N"] = OpMulN;
195     OpMap["OP_MLA_N"] = OpMlaN;
196     OpMap["OP_MLS_N"] = OpMlsN;
197     OpMap["OP_MLAL_N"] = OpMlalN;
198     OpMap["OP_MLSL_N"] = OpMlslN;
199     OpMap["OP_MUL_LN"]= OpMulLane;
200     OpMap["OP_MULL_LN"] = OpMullLane;
201     OpMap["OP_MLA_LN"]= OpMlaLane;
202     OpMap["OP_MLS_LN"]= OpMlsLane;
203     OpMap["OP_MLAL_LN"] = OpMlalLane;
204     OpMap["OP_MLSL_LN"] = OpMlslLane;
205     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
206     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
207     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
208     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
209     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
210     OpMap["OP_EQ"]    = OpEq;
211     OpMap["OP_GE"]    = OpGe;
212     OpMap["OP_LE"]    = OpLe;
213     OpMap["OP_GT"]    = OpGt;
214     OpMap["OP_LT"]    = OpLt;
215     OpMap["OP_NEG"]   = OpNeg;
216     OpMap["OP_NOT"]   = OpNot;
217     OpMap["OP_AND"]   = OpAnd;
218     OpMap["OP_OR"]    = OpOr;
219     OpMap["OP_XOR"]   = OpXor;
220     OpMap["OP_ANDN"]  = OpAndNot;
221     OpMap["OP_ORN"]   = OpOrNot;
222     OpMap["OP_CAST"]  = OpCast;
223     OpMap["OP_CONC"]  = OpConcat;
224     OpMap["OP_HI"]    = OpHi;
225     OpMap["OP_LO"]    = OpLo;
226     OpMap["OP_DUP"]   = OpDup;
227     OpMap["OP_DUP_LN"] = OpDupLane;
228     OpMap["OP_SEL"]   = OpSelect;
229     OpMap["OP_REV16"] = OpRev16;
230     OpMap["OP_REV32"] = OpRev32;
231     OpMap["OP_REV64"] = OpRev64;
232     OpMap["OP_REINT"] = OpReinterpret;
233     OpMap["OP_ADDHNHi"] = OpAddhnHi;
234     OpMap["OP_RADDHNHi"] = OpRAddhnHi;
235     OpMap["OP_SUBHNHi"] = OpSubhnHi;
236     OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
237     OpMap["OP_ABDL"]  = OpAbdl;
238     OpMap["OP_ABDLHi"] = OpAbdlHi;
239     OpMap["OP_ABA"]   = OpAba;
240     OpMap["OP_ABAL"]  = OpAbal;
241     OpMap["OP_ABALHi"] = OpAbalHi;
242     OpMap["OP_QDMULLHi"] = OpQDMullHi;
243     OpMap["OP_QDMLALHi"] = OpQDMlalHi;
244     OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
245     OpMap["OP_DIV"] = OpDiv;
246     OpMap["OP_LONG_HI"] = OpLongHi;
247     OpMap["OP_NARROW_HI"] = OpNarrowHi;
248     OpMap["OP_MOVL_HI"] = OpMovlHi;
249 
250     Record *SI = R.getClass("SInst");
251     Record *II = R.getClass("IInst");
252     Record *WI = R.getClass("WInst");
253     Record *SOpI = R.getClass("SOpInst");
254     Record *IOpI = R.getClass("IOpInst");
255     Record *WOpI = R.getClass("WOpInst");
256     Record *LOpI = R.getClass("LOpInst");
257     Record *NoTestOpI = R.getClass("NoTestOpInst");
258 
259     ClassMap[SI] = ClassS;
260     ClassMap[II] = ClassI;
261     ClassMap[WI] = ClassW;
262     ClassMap[SOpI] = ClassS;
263     ClassMap[IOpI] = ClassI;
264     ClassMap[WOpI] = ClassW;
265     ClassMap[LOpI] = ClassL;
266     ClassMap[NoTestOpI] = ClassNoTest;
267   }
268 
269   // run - Emit arm_neon.h.inc
270   void run(raw_ostream &o);
271 
272   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
273   void runHeader(raw_ostream &o);
274 
275   // runTests - Emit tests for all the Neon intrinsics.
276   void runTests(raw_ostream &o);
277 
278 private:
279   void emitIntrinsic(raw_ostream &OS, Record *R,
280                      StringMap<ClassKind> &EmittedMap);
281   void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
282                       bool isA64GenBuiltinDef);
283   void genOverloadTypeCheckCode(raw_ostream &OS,
284                                 StringMap<ClassKind> &A64IntrinsicMap,
285                                 bool isA64TypeCheck);
286   void genIntrinsicRangeCheckCode(raw_ostream &OS,
287                                   StringMap<ClassKind> &A64IntrinsicMap,
288                                   bool isA64RangeCheck);
289   void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
290                      bool isA64TestGen);
291 };
292 } // end anonymous namespace
293 
294 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
295 /// which each StringRef representing a single type declared in the string.
296 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
297 /// 2xfloat and 4xfloat respectively.
298 static void ParseTypes(Record *r, std::string &s,
299                        SmallVectorImpl<StringRef> &TV) {
300   const char *data = s.data();
301   int len = 0;
302 
303   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
304     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
305                          || data[len] == 'H' || data[len] == 'S')
306       continue;
307 
308     switch (data[len]) {
309       case 'c':
310       case 's':
311       case 'i':
312       case 'l':
313       case 'h':
314       case 'f':
315       case 'd':
316         break;
317       default:
318         PrintFatalError(r->getLoc(),
319                       "Unexpected letter: " + std::string(data + len, 1));
320     }
321     TV.push_back(StringRef(data, len + 1));
322     data += len + 1;
323     len = -1;
324   }
325 }
326 
327 /// Widen - Convert a type code into the next wider type.  char -> short,
328 /// short -> int, etc.
329 static char Widen(const char t) {
330   switch (t) {
331     case 'c':
332       return 's';
333     case 's':
334       return 'i';
335     case 'i':
336       return 'l';
337     case 'h':
338       return 'f';
339     default:
340       PrintFatalError("unhandled type in widen!");
341   }
342 }
343 
344 /// Narrow - Convert a type code into the next smaller type.  short -> char,
345 /// float -> half float, etc.
346 static char Narrow(const char t) {
347   switch (t) {
348     case 's':
349       return 'c';
350     case 'i':
351       return 's';
352     case 'l':
353       return 'i';
354     case 'f':
355       return 'h';
356     default:
357       PrintFatalError("unhandled type in narrow!");
358   }
359 }
360 
361 static std::string GetNarrowTypestr(StringRef ty)
362 {
363   std::string s;
364   for (size_t i = 0, end = ty.size(); i < end; i++) {
365     switch (ty[i]) {
366       case 's':
367         s += 'c';
368         break;
369       case 'i':
370         s += 's';
371         break;
372       case 'l':
373         s += 'i';
374         break;
375       default:
376         s += ty[i];
377         break;
378     }
379   }
380 
381   return s;
382 }
383 
384 /// For a particular StringRef, return the base type code, and whether it has
385 /// the quad-vector, polynomial, or unsigned modifiers set.
386 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
387   unsigned off = 0;
388   // ignore scalar.
389   if (ty[off] == 'S') {
390     ++off;
391   }
392   // remember quad.
393   if (ty[off] == 'Q' || ty[off] == 'H') {
394     quad = true;
395     ++off;
396   }
397 
398   // remember poly.
399   if (ty[off] == 'P') {
400     poly = true;
401     ++off;
402   }
403 
404   // remember unsigned.
405   if (ty[off] == 'U') {
406     usgn = true;
407     ++off;
408   }
409 
410   // base type to get the type string for.
411   return ty[off];
412 }
413 
414 /// ModType - Transform a type code and its modifiers based on a mod code. The
415 /// mod code definitions may be found at the top of arm_neon.td.
416 static char ModType(const char mod, char type, bool &quad, bool &poly,
417                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
418   switch (mod) {
419     case 't':
420       if (poly) {
421         poly = false;
422         usgn = true;
423       }
424       break;
425     case 'u':
426       usgn = true;
427       poly = false;
428       if (type == 'f')
429         type = 'i';
430       if (type == 'd')
431         type = 'l';
432       break;
433     case 'x':
434       usgn = false;
435       poly = false;
436       if (type == 'f')
437         type = 'i';
438       if (type == 'd')
439         type = 'l';
440       break;
441     case 'f':
442       if (type == 'h')
443         quad = true;
444       type = 'f';
445       usgn = false;
446       break;
447     case 'g':
448       quad = false;
449       break;
450     case 'w':
451       type = Widen(type);
452       quad = true;
453       break;
454     case 'n':
455       type = Widen(type);
456       break;
457     case 'i':
458       type = 'i';
459       scal = true;
460       break;
461     case 'l':
462       type = 'l';
463       scal = true;
464       usgn = true;
465       break;
466     case 's':
467     case 'a':
468       scal = true;
469       break;
470     case 'k':
471       quad = true;
472       break;
473     case 'c':
474       cnst = true;
475     case 'p':
476       pntr = true;
477       scal = true;
478       break;
479     case 'h':
480       type = Narrow(type);
481       if (type == 'h')
482         quad = false;
483       break;
484     case 'q':
485       type = Narrow(type);
486       quad = true;
487       break;
488     case 'e':
489       type = Narrow(type);
490       usgn = true;
491       break;
492     case 'm':
493       type = Narrow(type);
494       quad = false;
495       break;
496     default:
497       break;
498   }
499   return type;
500 }
501 
502 /// TypeString - for a modifier and type, generate the name of the typedef for
503 /// that type.  QUc -> uint8x8_t.
504 static std::string TypeString(const char mod, StringRef typestr) {
505   bool quad = false;
506   bool poly = false;
507   bool usgn = false;
508   bool scal = false;
509   bool cnst = false;
510   bool pntr = false;
511 
512   if (mod == 'v')
513     return "void";
514   if (mod == 'i')
515     return "int";
516 
517   // base type to get the type string for.
518   char type = ClassifyType(typestr, quad, poly, usgn);
519 
520   // Based on the modifying character, change the type and width if necessary.
521   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
522 
523   SmallString<128> s;
524 
525   if (usgn)
526     s.push_back('u');
527 
528   switch (type) {
529     case 'c':
530       s += poly ? "poly8" : "int8";
531       if (scal)
532         break;
533       s += quad ? "x16" : "x8";
534       break;
535     case 's':
536       s += poly ? "poly16" : "int16";
537       if (scal)
538         break;
539       s += quad ? "x8" : "x4";
540       break;
541     case 'i':
542       s += "int32";
543       if (scal)
544         break;
545       s += quad ? "x4" : "x2";
546       break;
547     case 'l':
548       s += "int64";
549       if (scal)
550         break;
551       s += quad ? "x2" : "x1";
552       break;
553     case 'h':
554       s += "float16";
555       if (scal)
556         break;
557       s += quad ? "x8" : "x4";
558       break;
559     case 'f':
560       s += "float32";
561       if (scal)
562         break;
563       s += quad ? "x4" : "x2";
564       break;
565     case 'd':
566       s += "float64";
567       if (scal)
568         break;
569       s += quad ? "x2" : "x1";
570       break;
571 
572     default:
573       PrintFatalError("unhandled type!");
574   }
575 
576   if (mod == '2')
577     s += "x2";
578   if (mod == '3')
579     s += "x3";
580   if (mod == '4')
581     s += "x4";
582 
583   // Append _t, finishing the type string typedef type.
584   s += "_t";
585 
586   if (cnst)
587     s += " const";
588 
589   if (pntr)
590     s += " *";
591 
592   return s.str();
593 }
594 
595 /// BuiltinTypeString - for a modifier and type, generate the clang
596 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
597 /// Builtins.def for a description of the type strings.
598 static std::string BuiltinTypeString(const char mod, StringRef typestr,
599                                      ClassKind ck, bool ret) {
600   bool quad = false;
601   bool poly = false;
602   bool usgn = false;
603   bool scal = false;
604   bool cnst = false;
605   bool pntr = false;
606 
607   if (mod == 'v')
608     return "v"; // void
609   if (mod == 'i')
610     return "i"; // int
611 
612   // base type to get the type string for.
613   char type = ClassifyType(typestr, quad, poly, usgn);
614 
615   // Based on the modifying character, change the type and width if necessary.
616   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
617 
618   // All pointers are void* pointers.  Change type to 'v' now.
619   if (pntr) {
620     usgn = false;
621     poly = false;
622     type = 'v';
623   }
624   // Treat half-float ('h') types as unsigned short ('s') types.
625   if (type == 'h') {
626     type = 's';
627     usgn = true;
628   }
629   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
630 
631   if (scal) {
632     SmallString<128> s;
633 
634     if (usgn)
635       s.push_back('U');
636     else if (type == 'c')
637       s.push_back('S'); // make chars explicitly signed
638 
639     if (type == 'l') // 64-bit long
640       s += "LLi";
641     else
642       s.push_back(type);
643 
644     if (cnst)
645       s.push_back('C');
646     if (pntr)
647       s.push_back('*');
648     return s.str();
649   }
650 
651   // Since the return value must be one type, return a vector type of the
652   // appropriate width which we will bitcast.  An exception is made for
653   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
654   // fashion, storing them to a pointer arg.
655   if (ret) {
656     if (mod >= '2' && mod <= '4')
657       return "vv*"; // void result with void* first argument
658     if (mod == 'f' || (ck != ClassB && type == 'f'))
659       return quad ? "V4f" : "V2f";
660     if (ck != ClassB && type == 's')
661       return quad ? "V8s" : "V4s";
662     if (ck != ClassB && type == 'i')
663       return quad ? "V4i" : "V2i";
664     if (ck != ClassB && type == 'l')
665       return quad ? "V2LLi" : "V1LLi";
666 
667     return quad ? "V16Sc" : "V8Sc";
668   }
669 
670   // Non-return array types are passed as individual vectors.
671   if (mod == '2')
672     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
673   if (mod == '3')
674     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
675   if (mod == '4')
676     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
677 
678   if (mod == 'f' || (ck != ClassB && type == 'f'))
679     return quad ? "V4f" : "V2f";
680   if (ck != ClassB && type == 's')
681     return quad ? "V8s" : "V4s";
682   if (ck != ClassB && type == 'i')
683     return quad ? "V4i" : "V2i";
684   if (ck != ClassB && type == 'l')
685     return quad ? "V2LLi" : "V1LLi";
686 
687   return quad ? "V16Sc" : "V8Sc";
688 }
689 
690 /// InstructionTypeCode - Computes the ARM argument character code and
691 /// quad status for a specific type string and ClassKind.
692 static void InstructionTypeCode(const StringRef &typeStr,
693                                 const ClassKind ck,
694                                 bool &quad,
695                                 std::string &typeCode) {
696   bool poly = false;
697   bool usgn = false;
698   char type = ClassifyType(typeStr, quad, poly, usgn);
699 
700   switch (type) {
701   case 'c':
702     switch (ck) {
703     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
704     case ClassI: typeCode = "i8"; break;
705     case ClassW: typeCode = "8"; break;
706     default: break;
707     }
708     break;
709   case 's':
710     switch (ck) {
711     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
712     case ClassI: typeCode = "i16"; break;
713     case ClassW: typeCode = "16"; break;
714     default: break;
715     }
716     break;
717   case 'i':
718     switch (ck) {
719     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
720     case ClassI: typeCode = "i32"; break;
721     case ClassW: typeCode = "32"; break;
722     default: break;
723     }
724     break;
725   case 'l':
726     switch (ck) {
727     case ClassS: typeCode = usgn ? "u64" : "s64"; break;
728     case ClassI: typeCode = "i64"; break;
729     case ClassW: typeCode = "64"; break;
730     default: break;
731     }
732     break;
733   case 'h':
734     switch (ck) {
735     case ClassS:
736     case ClassI: typeCode = "f16"; break;
737     case ClassW: typeCode = "16"; break;
738     default: break;
739     }
740     break;
741   case 'f':
742     switch (ck) {
743     case ClassS:
744     case ClassI: typeCode = "f32"; break;
745     case ClassW: typeCode = "32"; break;
746     default: break;
747     }
748     break;
749   case 'd':
750     switch (ck) {
751     case ClassS:
752     case ClassI:
753       typeCode += "f64";
754       break;
755     case ClassW:
756       PrintFatalError("unhandled type!");
757     default:
758       break;
759     }
760     break;
761   default:
762     PrintFatalError("unhandled type!");
763   }
764 }
765 
766 static char Insert_BHSD_Suffix(StringRef typestr){
767   unsigned off = 0;
768   if(typestr[off++] == 'S'){
769     while(typestr[off] == 'Q' || typestr[off] == 'H'||
770           typestr[off] == 'P' || typestr[off] == 'U')
771       ++off;
772     switch (typestr[off]){
773     default  : break;
774     case 'c' : return 'b';
775     case 's' : return 'h';
776     case 'i' :
777     case 'f' : return 's';
778     case 'l' :
779     case 'd' : return 'd';
780     }
781   }
782   return 0;
783 }
784 
785 /// MangleName - Append a type or width suffix to a base neon function name,
786 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
787 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
788 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
789 static std::string MangleName(const std::string &name, StringRef typestr,
790                               ClassKind ck) {
791   if (name == "vcvt_f32_f16")
792     return name;
793 
794   bool quad = false;
795   std::string typeCode = "";
796 
797   InstructionTypeCode(typestr, ck, quad, typeCode);
798 
799   std::string s = name;
800 
801   if (typeCode.size() > 0) {
802     s += "_" + typeCode;
803   }
804 
805   if (ck == ClassB)
806     s += "_v";
807 
808   // Insert a 'q' before the first '_' character so that it ends up before
809   // _lane or _n on vector-scalar operations.
810   if (typestr.find("Q") != StringRef::npos) {
811       size_t pos = s.find('_');
812       s = s.insert(pos, "q");
813   }
814   char ins = Insert_BHSD_Suffix(typestr);
815   if(ins){
816     size_t pos = s.find('_');
817     s = s.insert(pos, &ins, 1);
818   }
819 
820   return s;
821 }
822 
823 static void PreprocessInstruction(const StringRef &Name,
824                                   const std::string &InstName,
825                                   std::string &Prefix,
826                                   bool &HasNPostfix,
827                                   bool &HasLanePostfix,
828                                   bool &HasDupPostfix,
829                                   bool &IsSpecialVCvt,
830                                   size_t &TBNumber) {
831   // All of our instruction name fields from arm_neon.td are of the form
832   //   <instructionname>_...
833   // Thus we grab our instruction name via computation of said Prefix.
834   const size_t PrefixEnd = Name.find_first_of('_');
835   // If InstName is passed in, we use that instead of our name Prefix.
836   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
837 
838   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
839 
840   HasNPostfix = Postfix.count("_n");
841   HasLanePostfix = Postfix.count("_lane");
842   HasDupPostfix = Postfix.count("_dup");
843   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
844 
845   if (InstName.compare("vtbl") == 0 ||
846       InstName.compare("vtbx") == 0) {
847     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
848     // encoding to get its true value.
849     TBNumber = Name[Name.size()-1] - 48;
850   }
851 }
852 
853 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
854 /// extracted, generate a FileCheck pattern for a Load Or Store
855 static void
856 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
857                                           const std::string& OutTypeCode,
858                                           const bool &IsQuad,
859                                           const bool &HasDupPostfix,
860                                           const bool &HasLanePostfix,
861                                           const size_t Count,
862                                           std::string &RegisterSuffix) {
863   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
864   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
865   // will output a series of v{ld,st}1s, so we have to handle it specially.
866   if ((Count == 3 || Count == 4) && IsQuad) {
867     RegisterSuffix += "{";
868     for (size_t i = 0; i < Count; i++) {
869       RegisterSuffix += "d{{[0-9]+}}";
870       if (HasDupPostfix) {
871         RegisterSuffix += "[]";
872       }
873       if (HasLanePostfix) {
874         RegisterSuffix += "[{{[0-9]+}}]";
875       }
876       if (i < Count-1) {
877         RegisterSuffix += ", ";
878       }
879     }
880     RegisterSuffix += "}";
881   } else {
882 
883     // Handle normal loads and stores.
884     RegisterSuffix += "{";
885     for (size_t i = 0; i < Count; i++) {
886       RegisterSuffix += "d{{[0-9]+}}";
887       if (HasDupPostfix) {
888         RegisterSuffix += "[]";
889       }
890       if (HasLanePostfix) {
891         RegisterSuffix += "[{{[0-9]+}}]";
892       }
893       if (IsQuad && !HasLanePostfix) {
894         RegisterSuffix += ", d{{[0-9]+}}";
895         if (HasDupPostfix) {
896           RegisterSuffix += "[]";
897         }
898       }
899       if (i < Count-1) {
900         RegisterSuffix += ", ";
901       }
902     }
903     RegisterSuffix += "}, [r{{[0-9]+}}";
904 
905     // We only include the alignment hint if we have a vld1.*64 or
906     // a dup/lane instruction.
907     if (IsLDSTOne) {
908       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
909         RegisterSuffix += ":" + OutTypeCode;
910       }
911     }
912 
913     RegisterSuffix += "]";
914   }
915 }
916 
917 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
918                                      const bool &HasNPostfix) {
919   return (NameRef.count("vmla") ||
920           NameRef.count("vmlal") ||
921           NameRef.count("vmlsl") ||
922           NameRef.count("vmull") ||
923           NameRef.count("vqdmlal") ||
924           NameRef.count("vqdmlsl") ||
925           NameRef.count("vqdmulh") ||
926           NameRef.count("vqdmull") ||
927           NameRef.count("vqrdmulh")) && HasNPostfix;
928 }
929 
930 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
931                                          const bool &HasLanePostfix) {
932   return (NameRef.count("vmla") ||
933           NameRef.count("vmls") ||
934           NameRef.count("vmlal") ||
935           NameRef.count("vmlsl") ||
936           (NameRef.count("vmul") && NameRef.size() == 3)||
937           NameRef.count("vqdmlal") ||
938           NameRef.count("vqdmlsl") ||
939           NameRef.count("vqdmulh") ||
940           NameRef.count("vqrdmulh")) && HasLanePostfix;
941 }
942 
943 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
944                                   const bool &HasLanePostfix,
945                                   const bool &IsQuad) {
946   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
947                                && IsQuad;
948   const bool IsVMull = NameRef.count("mull") && !IsQuad;
949   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
950 }
951 
952 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
953                                                      const std::string &Proto,
954                                                      const bool &HasNPostfix,
955                                                      const bool &IsQuad,
956                                                      const bool &HasLanePostfix,
957                                                      const bool &HasDupPostfix,
958                                                      std::string &NormedProto) {
959   // Handle generic case.
960   const StringRef NameRef(Name);
961   for (size_t i = 0, end = Proto.size(); i < end; i++) {
962     switch (Proto[i]) {
963     case 'u':
964     case 'f':
965     case 'd':
966     case 's':
967     case 'x':
968     case 't':
969     case 'n':
970       NormedProto += IsQuad? 'q' : 'd';
971       break;
972     case 'w':
973     case 'k':
974       NormedProto += 'q';
975       break;
976     case 'g':
977     case 'h':
978     case 'e':
979       NormedProto += 'd';
980       break;
981     case 'i':
982       NormedProto += HasLanePostfix? 'a' : 'i';
983       break;
984     case 'a':
985       if (HasLanePostfix) {
986         NormedProto += 'a';
987       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
988         NormedProto += IsQuad? 'q' : 'd';
989       } else {
990         NormedProto += 'i';
991       }
992       break;
993     }
994   }
995 
996   // Handle Special Cases.
997   const bool IsNotVExt = !NameRef.count("vext");
998   const bool IsVPADAL = NameRef.count("vpadal");
999   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1000                                                            HasLanePostfix);
1001   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1002                                                       IsQuad);
1003 
1004   if (IsSpecialLaneMul) {
1005     // If
1006     NormedProto[2] = NormedProto[3];
1007     NormedProto.erase(3);
1008   } else if (NormedProto.size() == 4 &&
1009              NormedProto[0] == NormedProto[1] &&
1010              IsNotVExt) {
1011     // If NormedProto.size() == 4 and the first two proto characters are the
1012     // same, ignore the first.
1013     NormedProto = NormedProto.substr(1, 3);
1014   } else if (Is5OpLaneAccum) {
1015     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1016     std::string tmp = NormedProto.substr(1,2);
1017     tmp += NormedProto[4];
1018     NormedProto = tmp;
1019   } else if (IsVPADAL) {
1020     // If we have VPADAL, ignore the first character.
1021     NormedProto = NormedProto.substr(0, 2);
1022   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1023     // If our instruction is a dup instruction, keep only the first and
1024     // last characters.
1025     std::string tmp = "";
1026     tmp += NormedProto[0];
1027     tmp += NormedProto[NormedProto.size()-1];
1028     NormedProto = tmp;
1029   }
1030 }
1031 
1032 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1033 /// extracted, generate a FileCheck pattern to check that an
1034 /// instruction's arguments are correct.
1035 static void GenerateRegisterCheckPattern(const std::string &Name,
1036                                          const std::string &Proto,
1037                                          const std::string &OutTypeCode,
1038                                          const bool &HasNPostfix,
1039                                          const bool &IsQuad,
1040                                          const bool &HasLanePostfix,
1041                                          const bool &HasDupPostfix,
1042                                          const size_t &TBNumber,
1043                                          std::string &RegisterSuffix) {
1044 
1045   RegisterSuffix = "";
1046 
1047   const StringRef NameRef(Name);
1048   const StringRef ProtoRef(Proto);
1049 
1050   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1051     return;
1052   }
1053 
1054   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1055   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1056 
1057   if (IsLoadStore) {
1058     // Grab N value from  v{ld,st}N using its ascii representation.
1059     const size_t Count = NameRef[3] - 48;
1060 
1061     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1062                                               HasDupPostfix, HasLanePostfix,
1063                                               Count, RegisterSuffix);
1064   } else if (IsTBXOrTBL) {
1065     RegisterSuffix += "d{{[0-9]+}}, {";
1066     for (size_t i = 0; i < TBNumber-1; i++) {
1067       RegisterSuffix += "d{{[0-9]+}}, ";
1068     }
1069     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1070   } else {
1071     // Handle a normal instruction.
1072     if (NameRef.count("vget") || NameRef.count("vset"))
1073       return;
1074 
1075     // We first normalize our proto, since we only need to emit 4
1076     // different types of checks, yet have more than 4 proto types
1077     // that map onto those 4 patterns.
1078     std::string NormalizedProto("");
1079     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1080                                              HasLanePostfix, HasDupPostfix,
1081                                              NormalizedProto);
1082 
1083     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1084       const char &c = NormalizedProto[i];
1085       switch (c) {
1086       case 'q':
1087         RegisterSuffix += "q{{[0-9]+}}, ";
1088         break;
1089 
1090       case 'd':
1091         RegisterSuffix += "d{{[0-9]+}}, ";
1092         break;
1093 
1094       case 'i':
1095         RegisterSuffix += "#{{[0-9]+}}, ";
1096         break;
1097 
1098       case 'a':
1099         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1100         break;
1101       }
1102     }
1103 
1104     // Remove extra ", ".
1105     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1106   }
1107 }
1108 
1109 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1110 /// typestr + class kind, generate the proper set of FileCheck
1111 /// Patterns to check for. We could just return a string, but instead
1112 /// use a vector since it provides us with the extra flexibility of
1113 /// emitting multiple checks, which comes in handy for certain cases
1114 /// like mla where we want to check for 2 different instructions.
1115 static void GenerateChecksForIntrinsic(const std::string &Name,
1116                                        const std::string &Proto,
1117                                        StringRef &OutTypeStr,
1118                                        StringRef &InTypeStr,
1119                                        ClassKind Ck,
1120                                        const std::string &InstName,
1121                                        bool IsHiddenLOp,
1122                                        std::vector<std::string>& Result) {
1123 
1124   // If Ck is a ClassNoTest instruction, just return so no test is
1125   // emitted.
1126   if(Ck == ClassNoTest)
1127     return;
1128 
1129   if (Name == "vcvt_f32_f16") {
1130     Result.push_back("vcvt.f32.f16");
1131     return;
1132   }
1133 
1134 
1135   // Now we preprocess our instruction given the data we have to get the
1136   // data that we need.
1137   // Create a StringRef for String Manipulation of our Name.
1138   const StringRef NameRef(Name);
1139   // Instruction Prefix.
1140   std::string Prefix;
1141   // The type code for our out type string.
1142   std::string OutTypeCode;
1143   // To handle our different cases, we need to check for different postfixes.
1144   // Is our instruction a quad instruction.
1145   bool IsQuad = false;
1146   // Our instruction is of the form <instructionname>_n.
1147   bool HasNPostfix = false;
1148   // Our instruction is of the form <instructionname>_lane.
1149   bool HasLanePostfix = false;
1150   // Our instruction is of the form <instructionname>_dup.
1151   bool HasDupPostfix  = false;
1152   // Our instruction is a vcvt instruction which requires special handling.
1153   bool IsSpecialVCvt = false;
1154   // If we have a vtbxN or vtblN instruction, this is set to N.
1155   size_t TBNumber = -1;
1156   // Register Suffix
1157   std::string RegisterSuffix;
1158 
1159   PreprocessInstruction(NameRef, InstName, Prefix,
1160                         HasNPostfix, HasLanePostfix, HasDupPostfix,
1161                         IsSpecialVCvt, TBNumber);
1162 
1163   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1164   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1165                                HasLanePostfix, HasDupPostfix, TBNumber,
1166                                RegisterSuffix);
1167 
1168   // In the following section, we handle a bunch of special cases. You can tell
1169   // a special case by the fact we are returning early.
1170 
1171   // If our instruction is a logical instruction without postfix or a
1172   // hidden LOp just return the current Prefix.
1173   if (Ck == ClassL || IsHiddenLOp) {
1174     Result.push_back(Prefix + " " + RegisterSuffix);
1175     return;
1176   }
1177 
1178   // If we have a vmov, due to the many different cases, some of which
1179   // vary within the different intrinsics generated for a single
1180   // instruction type, just output a vmov. (e.g. given an instruction
1181   // A, A.u32 might be vmov and A.u8 might be vmov.8).
1182   //
1183   // FIXME: Maybe something can be done about this. The two cases that we care
1184   // about are vmov as an LType and vmov as a WType.
1185   if (Prefix == "vmov") {
1186     Result.push_back(Prefix + " " + RegisterSuffix);
1187     return;
1188   }
1189 
1190   // In the following section, we handle special cases.
1191 
1192   if (OutTypeCode == "64") {
1193     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1194     // type, the intrinsic will be optimized away, so just return
1195     // nothing.  On the other hand if we are handling an uint64x2_t
1196     // (i.e. quad instruction), vdup/vmov instructions should be
1197     // emitted.
1198     if (Prefix == "vdup" || Prefix == "vext") {
1199       if (IsQuad) {
1200         Result.push_back("{{vmov|vdup}}");
1201       }
1202       return;
1203     }
1204 
1205     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1206     // multiple register operands.
1207     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1208                             || Prefix == "vld4";
1209     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1210                             || Prefix == "vst4";
1211     if (MultiLoadPrefix || MultiStorePrefix) {
1212       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1213       return;
1214     }
1215 
1216     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1217     // emitting said instructions. So return a check for
1218     // vldr/vstr/vmov/str instead.
1219     if (HasLanePostfix || HasDupPostfix) {
1220       if (Prefix == "vst1") {
1221         Result.push_back("{{str|vstr|vmov}}");
1222         return;
1223       } else if (Prefix == "vld1") {
1224         Result.push_back("{{ldr|vldr|vmov}}");
1225         return;
1226       }
1227     }
1228   }
1229 
1230   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1231   // sometimes disassembled as vtrn.32. We use a regex to handle both
1232   // cases.
1233   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1234     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1235     return;
1236   }
1237 
1238   // Currently on most ARM processors, we do not use vmla/vmls for
1239   // quad floating point operations. Instead we output vmul + vadd. So
1240   // check if we have one of those instructions and just output a
1241   // check for vmul.
1242   if (OutTypeCode == "f32") {
1243     if (Prefix == "vmls") {
1244       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1245       Result.push_back("vsub." + OutTypeCode);
1246       return;
1247     } else if (Prefix == "vmla") {
1248       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1249       Result.push_back("vadd." + OutTypeCode);
1250       return;
1251     }
1252   }
1253 
1254   // If we have vcvt, get the input type from the instruction name
1255   // (which should be of the form instname_inputtype) and append it
1256   // before the output type.
1257   if (Prefix == "vcvt") {
1258     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1259     Prefix += "." + inTypeCode;
1260   }
1261 
1262   // Append output type code to get our final mangled instruction.
1263   Prefix += "." + OutTypeCode;
1264 
1265   Result.push_back(Prefix + " " + RegisterSuffix);
1266 }
1267 
1268 /// UseMacro - Examine the prototype string to determine if the intrinsic
1269 /// should be defined as a preprocessor macro instead of an inline function.
1270 static bool UseMacro(const std::string &proto) {
1271   // If this builtin takes an immediate argument, we need to #define it rather
1272   // than use a standard declaration, so that SemaChecking can range check
1273   // the immediate passed by the user.
1274   if (proto.find('i') != std::string::npos)
1275     return true;
1276 
1277   // Pointer arguments need to use macros to avoid hiding aligned attributes
1278   // from the pointer type.
1279   if (proto.find('p') != std::string::npos ||
1280       proto.find('c') != std::string::npos)
1281     return true;
1282 
1283   return false;
1284 }
1285 
1286 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1287 /// defined as a macro should be accessed directly instead of being first
1288 /// assigned to a local temporary.
1289 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1290   // True for constant ints (i), pointers (p) and const pointers (c).
1291   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1292 }
1293 
1294 // Generate the string "(argtype a, argtype b, ...)"
1295 static std::string GenArgs(const std::string &proto, StringRef typestr) {
1296   bool define = UseMacro(proto);
1297   char arg = 'a';
1298 
1299   std::string s;
1300   s += "(";
1301 
1302   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1303     if (define) {
1304       // Some macro arguments are used directly instead of being assigned
1305       // to local temporaries; prepend an underscore prefix to make their
1306       // names consistent with the local temporaries.
1307       if (MacroArgUsedDirectly(proto, i))
1308         s += "__";
1309     } else {
1310       s += TypeString(proto[i], typestr) + " __";
1311     }
1312     s.push_back(arg);
1313     if ((i + 1) < e)
1314       s += ", ";
1315   }
1316 
1317   s += ")";
1318   return s;
1319 }
1320 
1321 // Macro arguments are not type-checked like inline function arguments, so
1322 // assign them to local temporaries to get the right type checking.
1323 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1324   char arg = 'a';
1325   std::string s;
1326   bool generatedLocal = false;
1327 
1328   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1329     // Do not create a temporary for an immediate argument.
1330     // That would defeat the whole point of using a macro!
1331     if (MacroArgUsedDirectly(proto, i))
1332       continue;
1333     generatedLocal = true;
1334 
1335     s += TypeString(proto[i], typestr) + " __";
1336     s.push_back(arg);
1337     s += " = (";
1338     s.push_back(arg);
1339     s += "); ";
1340   }
1341 
1342   if (generatedLocal)
1343     s += "\\\n  ";
1344   return s;
1345 }
1346 
1347 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1348 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1349   std::string s, high;
1350   high = h ? "_high" : "";
1351   s = MangleName("vmovl" + high, typestr, ClassS);
1352   s += "(" + a + ")";
1353   return s;
1354 }
1355 
1356 // Get the high 64-bit part of a vector
1357 static std::string GetHigh(const std::string &a, StringRef typestr) {
1358   std::string s;
1359   s = MangleName("vget_high", typestr, ClassS);
1360   s += "(" + a + ")";
1361   return s;
1362 }
1363 
1364 // Gen operation with two operands and get high 64-bit for both of two operands.
1365 static std::string Gen2OpWith2High(StringRef typestr,
1366                                    const std::string &op,
1367                                    const std::string &a,
1368                                    const std::string &b) {
1369   std::string s;
1370   std::string Op1 = GetHigh(a, typestr);
1371   std::string Op2 = GetHigh(b, typestr);
1372   s = MangleName(op, typestr, ClassS);
1373   s += "(" + Op1 + ", " + Op2 + ");";
1374   return s;
1375 }
1376 
1377 // Gen operation with three operands and get high 64-bit of the latter
1378 // two operands.
1379 static std::string Gen3OpWith2High(StringRef typestr,
1380                                    const std::string &op,
1381                                    const std::string &a,
1382                                    const std::string &b,
1383                                    const std::string &c) {
1384   std::string s;
1385   std::string Op1 = GetHigh(b, typestr);
1386   std::string Op2 = GetHigh(c, typestr);
1387   s = MangleName(op, typestr, ClassS);
1388   s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1389   return s;
1390 }
1391 
1392 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1393 static std::string GenCombine(std::string typestr,
1394                               const std::string &a,
1395                               const std::string &b) {
1396   std::string s;
1397   s = MangleName("vcombine", typestr, ClassS);
1398   s += "(" + a + ", " + b + ")";
1399   return s;
1400 }
1401 
1402 static std::string Duplicate(unsigned nElts, StringRef typestr,
1403                              const std::string &a) {
1404   std::string s;
1405 
1406   s = "(" + TypeString('d', typestr) + "){ ";
1407   for (unsigned i = 0; i != nElts; ++i) {
1408     s += a;
1409     if ((i + 1) < nElts)
1410       s += ", ";
1411   }
1412   s += " }";
1413 
1414   return s;
1415 }
1416 
1417 static std::string SplatLane(unsigned nElts, const std::string &vec,
1418                              const std::string &lane) {
1419   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1420   for (unsigned i = 0; i < nElts; ++i)
1421     s += ", " + lane;
1422   s += ")";
1423   return s;
1424 }
1425 
1426 static std::string RemoveHigh(const std::string &name) {
1427   std::string s = name;
1428   std::size_t found = s.find("_high_");
1429   if (found == std::string::npos)
1430     PrintFatalError("name should contain \"_high_\" for high intrinsics");
1431   s.replace(found, 5, "");
1432   return s;
1433 }
1434 
1435 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1436   quad = false;
1437   bool dummy = false;
1438   char type = ClassifyType(typestr, quad, dummy, dummy);
1439   unsigned nElts = 0;
1440   switch (type) {
1441   case 'c': nElts = 8; break;
1442   case 's': nElts = 4; break;
1443   case 'i': nElts = 2; break;
1444   case 'l': nElts = 1; break;
1445   case 'h': nElts = 4; break;
1446   case 'f': nElts = 2; break;
1447   case 'd':
1448     nElts = 1;
1449     break;
1450   default:
1451     PrintFatalError("unhandled type!");
1452   }
1453   if (quad) nElts <<= 1;
1454   return nElts;
1455 }
1456 
1457 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1458 static std::string GenOpString(const std::string &name, OpKind op,
1459                                const std::string &proto, StringRef typestr) {
1460   bool quad;
1461   unsigned nElts = GetNumElements(typestr, quad);
1462   bool define = UseMacro(proto);
1463 
1464   std::string ts = TypeString(proto[0], typestr);
1465   std::string s;
1466   if (!define) {
1467     s = "return ";
1468   }
1469 
1470   switch(op) {
1471   case OpAdd:
1472     s += "__a + __b;";
1473     break;
1474   case OpAddl:
1475     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1476     break;
1477   case OpAddlHi:
1478     s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1479     break;
1480   case OpAddw:
1481     s += "__a + " + Extend(typestr, "__b") + ";";
1482     break;
1483   case OpAddwHi:
1484     s += "__a + " + Extend(typestr, "__b", 1) + ";";
1485     break;
1486   case OpSub:
1487     s += "__a - __b;";
1488     break;
1489   case OpSubl:
1490     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1491     break;
1492   case OpSublHi:
1493     s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1494     break;
1495   case OpSubw:
1496     s += "__a - " + Extend(typestr, "__b") + ";";
1497     break;
1498   case OpSubwHi:
1499     s += "__a - " + Extend(typestr, "__b", 1) + ";";
1500     break;
1501   case OpMulN:
1502     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1503     break;
1504   case OpMulLane:
1505     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1506     break;
1507   case OpMul:
1508     s += "__a * __b;";
1509     break;
1510   case OpMullLane:
1511     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1512       SplatLane(nElts, "__b", "__c") + ");";
1513     break;
1514   case OpMlaN:
1515     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1516     break;
1517   case OpMlaLane:
1518     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1519     break;
1520   case OpMla:
1521     s += "__a + (__b * __c);";
1522     break;
1523   case OpMlalN:
1524     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1525       Duplicate(nElts, typestr, "__c") + ");";
1526     break;
1527   case OpMlalLane:
1528     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1529       SplatLane(nElts, "__c", "__d") + ");";
1530     break;
1531   case OpMlal:
1532     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1533     break;
1534   case OpMullHi:
1535     s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1536     break;
1537   case OpMlalHi:
1538     s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1539     break;
1540   case OpMlsN:
1541     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1542     break;
1543   case OpMlsLane:
1544     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1545     break;
1546   case OpMls:
1547     s += "__a - (__b * __c);";
1548     break;
1549   case OpMlslN:
1550     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1551       Duplicate(nElts, typestr, "__c") + ");";
1552     break;
1553   case OpMlslLane:
1554     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1555       SplatLane(nElts, "__c", "__d") + ");";
1556     break;
1557   case OpMlsl:
1558     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1559     break;
1560   case OpMlslHi:
1561     s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1562     break;
1563   case OpQDMullLane:
1564     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1565       SplatLane(nElts, "__b", "__c") + ");";
1566     break;
1567   case OpQDMlalLane:
1568     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1569       SplatLane(nElts, "__c", "__d") + ");";
1570     break;
1571   case OpQDMlslLane:
1572     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1573       SplatLane(nElts, "__c", "__d") + ");";
1574     break;
1575   case OpQDMulhLane:
1576     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1577       SplatLane(nElts, "__b", "__c") + ");";
1578     break;
1579   case OpQRDMulhLane:
1580     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1581       SplatLane(nElts, "__b", "__c") + ");";
1582     break;
1583   case OpEq:
1584     s += "(" + ts + ")(__a == __b);";
1585     break;
1586   case OpGe:
1587     s += "(" + ts + ")(__a >= __b);";
1588     break;
1589   case OpLe:
1590     s += "(" + ts + ")(__a <= __b);";
1591     break;
1592   case OpGt:
1593     s += "(" + ts + ")(__a > __b);";
1594     break;
1595   case OpLt:
1596     s += "(" + ts + ")(__a < __b);";
1597     break;
1598   case OpNeg:
1599     s += " -__a;";
1600     break;
1601   case OpNot:
1602     s += " ~__a;";
1603     break;
1604   case OpAnd:
1605     s += "__a & __b;";
1606     break;
1607   case OpOr:
1608     s += "__a | __b;";
1609     break;
1610   case OpXor:
1611     s += "__a ^ __b;";
1612     break;
1613   case OpAndNot:
1614     s += "__a & ~__b;";
1615     break;
1616   case OpOrNot:
1617     s += "__a | ~__b;";
1618     break;
1619   case OpCast:
1620     s += "(" + ts + ")__a;";
1621     break;
1622   case OpConcat:
1623     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1624     s += ", (int64x1_t)__b, 0, 1);";
1625     break;
1626   case OpHi:
1627     // nElts is for the result vector, so the source is twice that number.
1628     s += "__builtin_shufflevector(__a, __a";
1629     for (unsigned i = nElts; i < nElts * 2; ++i)
1630       s += ", " + utostr(i);
1631     s+= ");";
1632     break;
1633   case OpLo:
1634     s += "__builtin_shufflevector(__a, __a";
1635     for (unsigned i = 0; i < nElts; ++i)
1636       s += ", " + utostr(i);
1637     s+= ");";
1638     break;
1639   case OpDup:
1640     s += Duplicate(nElts, typestr, "__a") + ";";
1641     break;
1642   case OpDupLane:
1643     s += SplatLane(nElts, "__a", "__b") + ";";
1644     break;
1645   case OpSelect:
1646     // ((0 & 1) | (~0 & 2))
1647     s += "(" + ts + ")";
1648     ts = TypeString(proto[1], typestr);
1649     s += "((__a & (" + ts + ")__b) | ";
1650     s += "(~__a & (" + ts + ")__c));";
1651     break;
1652   case OpRev16:
1653     s += "__builtin_shufflevector(__a, __a";
1654     for (unsigned i = 2; i <= nElts; i += 2)
1655       for (unsigned j = 0; j != 2; ++j)
1656         s += ", " + utostr(i - j - 1);
1657     s += ");";
1658     break;
1659   case OpRev32: {
1660     unsigned WordElts = nElts >> (1 + (int)quad);
1661     s += "__builtin_shufflevector(__a, __a";
1662     for (unsigned i = WordElts; i <= nElts; i += WordElts)
1663       for (unsigned j = 0; j != WordElts; ++j)
1664         s += ", " + utostr(i - j - 1);
1665     s += ");";
1666     break;
1667   }
1668   case OpRev64: {
1669     unsigned DblWordElts = nElts >> (int)quad;
1670     s += "__builtin_shufflevector(__a, __a";
1671     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1672       for (unsigned j = 0; j != DblWordElts; ++j)
1673         s += ", " + utostr(i - j - 1);
1674     s += ");";
1675     break;
1676   }
1677   case OpAbdl: {
1678     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1679     if (typestr[0] != 'U') {
1680       // vabd results are always unsigned and must be zero-extended.
1681       std::string utype = "U" + typestr.str();
1682       s += "(" + TypeString(proto[0], typestr) + ")";
1683       abd = "(" + TypeString('d', utype) + ")" + abd;
1684       s += Extend(utype, abd) + ";";
1685     } else {
1686       s += Extend(typestr, abd) + ";";
1687     }
1688     break;
1689   }
1690   case OpAbdlHi:
1691     s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1692     break;
1693   case OpAddhnHi: {
1694     std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1695     s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1696     s += ";";
1697     break;
1698   }
1699   case OpRAddhnHi: {
1700     std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1701     s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1702     s += ";";
1703     break;
1704   }
1705   case OpSubhnHi: {
1706     std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1707     s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1708     s += ";";
1709     break;
1710   }
1711   case OpRSubhnHi: {
1712     std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1713     s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1714     s += ";";
1715     break;
1716   }
1717   case OpAba:
1718     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1719     break;
1720   case OpAbal:
1721     s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1722     break;
1723   case OpAbalHi:
1724     s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1725     break;
1726   case OpQDMullHi:
1727     s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1728     break;
1729   case OpQDMlalHi:
1730     s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1731     break;
1732   case OpQDMlslHi:
1733     s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
1734     break;
1735   case OpDiv:
1736     s += "__a / __b;";
1737     break;
1738   case OpMovlHi: {
1739     s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1740         MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
1741     s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
1742     s += "(__a1, 0);";
1743     break;
1744   }
1745   case OpLongHi: {
1746     // Another local variable __a1 is needed for calling a Macro,
1747     // or using __a will have naming conflict when Macro expanding.
1748     s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1749          MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
1750     s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
1751          "(__a1, __b);";
1752     break;
1753   }
1754   case OpNarrowHi: {
1755     s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
1756          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
1757     break;
1758   }
1759   default:
1760     PrintFatalError("unknown OpKind!");
1761   }
1762   return s;
1763 }
1764 
1765 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1766   unsigned mod = proto[0];
1767 
1768   if (mod == 'v' || mod == 'f')
1769     mod = proto[1];
1770 
1771   bool quad = false;
1772   bool poly = false;
1773   bool usgn = false;
1774   bool scal = false;
1775   bool cnst = false;
1776   bool pntr = false;
1777 
1778   // Base type to get the type string for.
1779   char type = ClassifyType(typestr, quad, poly, usgn);
1780 
1781   // Based on the modifying character, change the type and width if necessary.
1782   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1783 
1784   NeonTypeFlags::EltType ET;
1785   switch (type) {
1786     case 'c':
1787       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1788       break;
1789     case 's':
1790       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1791       break;
1792     case 'i':
1793       ET = NeonTypeFlags::Int32;
1794       break;
1795     case 'l':
1796       ET = NeonTypeFlags::Int64;
1797       break;
1798     case 'h':
1799       ET = NeonTypeFlags::Float16;
1800       break;
1801     case 'f':
1802       ET = NeonTypeFlags::Float32;
1803       break;
1804     case 'd':
1805       ET = NeonTypeFlags::Float64;
1806       break;
1807     default:
1808       PrintFatalError("unhandled type!");
1809   }
1810   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1811   return Flags.getFlags();
1812 }
1813 
1814 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1815 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1816                               StringRef typestr, ClassKind ck) {
1817   std::string s;
1818 
1819   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1820   // sret-like argument.
1821   bool sret = (proto[0] >= '2' && proto[0] <= '4');
1822 
1823   bool define = UseMacro(proto);
1824 
1825   // Check if the prototype has a scalar operand with the type of the vector
1826   // elements.  If not, bitcasting the args will take care of arg checking.
1827   // The actual signedness etc. will be taken care of with special enums.
1828   if (proto.find('s') == std::string::npos)
1829     ck = ClassB;
1830 
1831   if (proto[0] != 'v') {
1832     std::string ts = TypeString(proto[0], typestr);
1833 
1834     if (define) {
1835       if (sret)
1836         s += ts + " r; ";
1837       else
1838         s += "(" + ts + ")";
1839     } else if (sret) {
1840       s += ts + " r; ";
1841     } else {
1842       s += "return (" + ts + ")";
1843     }
1844   }
1845 
1846   bool splat = proto.find('a') != std::string::npos;
1847 
1848   s += "__builtin_neon_";
1849   if (splat) {
1850     // Call the non-splat builtin: chop off the "_n" suffix from the name.
1851     std::string vname(name, 0, name.size()-2);
1852     s += MangleName(vname, typestr, ck);
1853   } else {
1854     s += MangleName(name, typestr, ck);
1855   }
1856   s += "(";
1857 
1858   // Pass the address of the return variable as the first argument to sret-like
1859   // builtins.
1860   if (sret)
1861     s += "&r, ";
1862 
1863   char arg = 'a';
1864   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1865     std::string args = std::string(&arg, 1);
1866 
1867     // Use the local temporaries instead of the macro arguments.
1868     args = "__" + args;
1869 
1870     bool argQuad = false;
1871     bool argPoly = false;
1872     bool argUsgn = false;
1873     bool argScalar = false;
1874     bool dummy = false;
1875     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1876     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1877                       dummy, dummy);
1878 
1879     // Handle multiple-vector values specially, emitting each subvector as an
1880     // argument to the __builtin.
1881     if (proto[i] >= '2' && proto[i] <= '4') {
1882       // Check if an explicit cast is needed.
1883       if (argType != 'c' || argPoly || argUsgn)
1884         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1885 
1886       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1887         s += args + ".val[" + utostr(vi) + "]";
1888         if ((vi + 1) < ve)
1889           s += ", ";
1890       }
1891       if ((i + 1) < e)
1892         s += ", ";
1893 
1894       continue;
1895     }
1896 
1897     if (splat && (i + 1) == e)
1898       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1899 
1900     // Check if an explicit cast is needed.
1901     if ((splat || !argScalar) &&
1902         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1903       std::string argTypeStr = "c";
1904       if (ck != ClassB)
1905         argTypeStr = argType;
1906       if (argQuad)
1907         argTypeStr = "Q" + argTypeStr;
1908       args = "(" + TypeString('d', argTypeStr) + ")" + args;
1909     }
1910 
1911     s += args;
1912     if ((i + 1) < e)
1913       s += ", ";
1914   }
1915 
1916   // Extra constant integer to hold type class enum for this function, e.g. s8
1917   if (ck == ClassB)
1918     s += ", " + utostr(GetNeonEnum(proto, typestr));
1919 
1920   s += ");";
1921 
1922   if (proto[0] != 'v' && sret) {
1923     if (define)
1924       s += " r;";
1925     else
1926       s += " return r;";
1927   }
1928   return s;
1929 }
1930 
1931 static std::string GenBuiltinDef(const std::string &name,
1932                                  const std::string &proto,
1933                                  StringRef typestr, ClassKind ck) {
1934   std::string s("BUILTIN(__builtin_neon_");
1935 
1936   // If all types are the same size, bitcasting the args will take care
1937   // of arg checking.  The actual signedness etc. will be taken care of with
1938   // special enums.
1939   if (proto.find('s') == std::string::npos)
1940     ck = ClassB;
1941 
1942   s += MangleName(name, typestr, ck);
1943   s += ", \"";
1944 
1945   for (unsigned i = 0, e = proto.size(); i != e; ++i)
1946     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1947 
1948   // Extra constant integer to hold type class enum for this function, e.g. s8
1949   if (ck == ClassB)
1950     s += "i";
1951 
1952   s += "\", \"n\")";
1953   return s;
1954 }
1955 
1956 static std::string GenIntrinsic(const std::string &name,
1957                                 const std::string &proto,
1958                                 StringRef outTypeStr, StringRef inTypeStr,
1959                                 OpKind kind, ClassKind classKind) {
1960   assert(!proto.empty() && "");
1961   bool define = UseMacro(proto) && kind != OpUnavailable;
1962   std::string s;
1963 
1964   // static always inline + return type
1965   if (define)
1966     s += "#define ";
1967   else
1968     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1969 
1970   // Function name with type suffix
1971   std::string mangledName = MangleName(name, outTypeStr, ClassS);
1972   if (outTypeStr != inTypeStr) {
1973     // If the input type is different (e.g., for vreinterpret), append a suffix
1974     // for the input type.  String off a "Q" (quad) prefix so that MangleName
1975     // does not insert another "q" in the name.
1976     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1977     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1978     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1979   }
1980   s += mangledName;
1981 
1982   // Function arguments
1983   s += GenArgs(proto, inTypeStr);
1984 
1985   // Definition.
1986   if (define) {
1987     s += " __extension__ ({ \\\n  ";
1988     s += GenMacroLocals(proto, inTypeStr);
1989   } else if (kind == OpUnavailable) {
1990     s += " __attribute__((unavailable));\n";
1991     return s;
1992   } else
1993     s += " {\n  ";
1994 
1995   if (kind != OpNone)
1996     s += GenOpString(name, kind, proto, outTypeStr);
1997   else
1998     s += GenBuiltin(name, proto, outTypeStr, classKind);
1999   if (define)
2000     s += " })";
2001   else
2002     s += " }";
2003   s += "\n";
2004   return s;
2005 }
2006 
2007 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2008 /// is comprised of type definitions and function declarations.
2009 void NeonEmitter::run(raw_ostream &OS) {
2010   OS <<
2011     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2012     "---===\n"
2013     " *\n"
2014     " * Permission is hereby granted, free of charge, to any person obtaining "
2015     "a copy\n"
2016     " * of this software and associated documentation files (the \"Software\"),"
2017     " to deal\n"
2018     " * in the Software without restriction, including without limitation the "
2019     "rights\n"
2020     " * to use, copy, modify, merge, publish, distribute, sublicense, "
2021     "and/or sell\n"
2022     " * copies of the Software, and to permit persons to whom the Software is\n"
2023     " * furnished to do so, subject to the following conditions:\n"
2024     " *\n"
2025     " * The above copyright notice and this permission notice shall be "
2026     "included in\n"
2027     " * all copies or substantial portions of the Software.\n"
2028     " *\n"
2029     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2030     "EXPRESS OR\n"
2031     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2032     "MERCHANTABILITY,\n"
2033     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2034     "SHALL THE\n"
2035     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2036     "OTHER\n"
2037     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2038     "ARISING FROM,\n"
2039     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2040     "DEALINGS IN\n"
2041     " * THE SOFTWARE.\n"
2042     " *\n"
2043     " *===--------------------------------------------------------------------"
2044     "---===\n"
2045     " */\n\n";
2046 
2047   OS << "#ifndef __ARM_NEON_H\n";
2048   OS << "#define __ARM_NEON_H\n\n";
2049 
2050   OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
2051   OS << "#error \"NEON support not enabled\"\n";
2052   OS << "#endif\n\n";
2053 
2054   OS << "#include <stdint.h>\n\n";
2055 
2056   // Emit NEON-specific scalar typedefs.
2057   OS << "typedef float float32_t;\n";
2058   OS << "typedef __fp16 float16_t;\n";
2059 
2060   OS << "#ifdef __aarch64__\n";
2061   OS << "typedef double float64_t;\n";
2062   OS << "#endif\n\n";
2063 
2064   // For now, signedness of polynomial types depends on target
2065   OS << "#ifdef __aarch64__\n";
2066   OS << "typedef uint8_t poly8_t;\n";
2067   OS << "typedef uint16_t poly16_t;\n";
2068   OS << "#else\n";
2069   OS << "typedef int8_t poly8_t;\n";
2070   OS << "typedef int16_t poly16_t;\n";
2071   OS << "#endif\n";
2072 
2073   // Emit Neon vector typedefs.
2074   std::string TypedefTypes(
2075       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
2076   SmallVector<StringRef, 24> TDTypeVec;
2077   ParseTypes(0, TypedefTypes, TDTypeVec);
2078 
2079   // Emit vector typedefs.
2080   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2081     bool dummy, quad = false, poly = false;
2082     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2083     bool isA64 = false;
2084 
2085     if (type == 'd' && quad)
2086       isA64 = true;
2087 
2088     if (isA64)
2089       OS << "#ifdef __aarch64__\n";
2090 
2091     if (poly)
2092       OS << "typedef __attribute__((neon_polyvector_type(";
2093     else
2094       OS << "typedef __attribute__((neon_vector_type(";
2095 
2096     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2097     OS << utostr(nElts) << "))) ";
2098     if (nElts < 10)
2099       OS << " ";
2100 
2101     OS << TypeString('s', TDTypeVec[i]);
2102     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2103 
2104     if (isA64)
2105       OS << "#endif\n";
2106   }
2107   OS << "\n";
2108 
2109   // Emit struct typedefs.
2110   for (unsigned vi = 2; vi != 5; ++vi) {
2111     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2112       bool dummy, quad = false, poly = false;
2113       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2114       bool isA64 = false;
2115 
2116       if (type == 'd' && quad)
2117         isA64 = true;
2118 
2119       if (isA64)
2120         OS << "#ifdef __aarch64__\n";
2121 
2122       std::string ts = TypeString('d', TDTypeVec[i]);
2123       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2124       OS << "typedef struct " << vs << " {\n";
2125       OS << "  " << ts << " val";
2126       OS << "[" << utostr(vi) << "]";
2127       OS << ";\n} ";
2128       OS << vs << ";\n";
2129 
2130       if (isA64)
2131         OS << "#endif\n";
2132 
2133       OS << "\n";
2134     }
2135   }
2136 
2137   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2138 
2139   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2140 
2141   StringMap<ClassKind> EmittedMap;
2142 
2143   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2144   // intrinsics.  (Some of the saturating multiply instructions are also
2145   // used to implement the corresponding "_lane" variants, but tablegen
2146   // sorts the records into alphabetical order so that the "_lane" variants
2147   // come after the intrinsics they use.)
2148   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2149   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2150   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2151   emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2152 
2153   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2154   // common intrinsics appear only once in the output stream.
2155   // The check for uniquiness is done in emitIntrinsic.
2156   // Emit ARM intrinsics.
2157   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2158     Record *R = RV[i];
2159 
2160     // Skip AArch64 intrinsics; they will be emitted at the end.
2161     bool isA64 = R->getValueAsBit("isA64");
2162     if (isA64)
2163       continue;
2164 
2165     if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2166         R->getName() != "VABD")
2167       emitIntrinsic(OS, R, EmittedMap);
2168   }
2169 
2170   // Emit AArch64-specific intrinsics.
2171   OS << "#ifdef __aarch64__\n";
2172 
2173   emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2174   emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2175   emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2176 
2177   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2178     Record *R = RV[i];
2179 
2180     // Skip ARM intrinsics already included above.
2181     bool isA64 = R->getValueAsBit("isA64");
2182     if (!isA64)
2183       continue;
2184 
2185     emitIntrinsic(OS, R, EmittedMap);
2186   }
2187 
2188   OS << "#endif\n\n";
2189 
2190   OS << "#undef __ai\n\n";
2191   OS << "#endif /* __ARM_NEON_H */\n";
2192 }
2193 
2194 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2195 /// intrinsics specified by record R checking for intrinsic uniqueness.
2196 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2197                                 StringMap<ClassKind> &EmittedMap) {
2198   std::string name = R->getValueAsString("Name");
2199   std::string Proto = R->getValueAsString("Prototype");
2200   std::string Types = R->getValueAsString("Types");
2201 
2202   SmallVector<StringRef, 16> TypeVec;
2203   ParseTypes(R, Types, TypeVec);
2204 
2205   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2206 
2207   ClassKind classKind = ClassNone;
2208   if (R->getSuperClasses().size() >= 2)
2209     classKind = ClassMap[R->getSuperClasses()[1]];
2210   if (classKind == ClassNone && kind == OpNone)
2211     PrintFatalError(R->getLoc(), "Builtin has no class kind");
2212 
2213   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2214     if (kind == OpReinterpret) {
2215       bool outQuad = false;
2216       bool dummy = false;
2217       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2218       for (unsigned srcti = 0, srcte = TypeVec.size();
2219            srcti != srcte; ++srcti) {
2220         bool inQuad = false;
2221         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2222         if (srcti == ti || inQuad != outQuad)
2223           continue;
2224         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2225                                      OpCast, ClassS);
2226         if (EmittedMap.count(s))
2227           continue;
2228         EmittedMap[s] = ClassS;
2229         OS << s;
2230       }
2231     } else {
2232       std::string s =
2233           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2234       if (EmittedMap.count(s))
2235         continue;
2236       EmittedMap[s] = classKind;
2237       OS << s;
2238     }
2239   }
2240   OS << "\n";
2241 }
2242 
2243 static unsigned RangeFromType(const char mod, StringRef typestr) {
2244   // base type to get the type string for.
2245   bool quad = false, dummy = false;
2246   char type = ClassifyType(typestr, quad, dummy, dummy);
2247   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2248 
2249   switch (type) {
2250     case 'c':
2251       return (8 << (int)quad) - 1;
2252     case 'h':
2253     case 's':
2254       return (4 << (int)quad) - 1;
2255     case 'f':
2256     case 'i':
2257       return (2 << (int)quad) - 1;
2258     case 'l':
2259       return (1 << (int)quad) - 1;
2260     default:
2261       PrintFatalError("unhandled type!");
2262   }
2263 }
2264 
2265 /// Generate the ARM and AArch64 intrinsic range checking code for
2266 /// shift/lane immediates, checking for unique declarations.
2267 void
2268 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2269                                         StringMap<ClassKind> &A64IntrinsicMap,
2270                                         bool isA64RangeCheck) {
2271   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2272   StringMap<OpKind> EmittedMap;
2273 
2274   // Generate the intrinsic range checking code for shift/lane immediates.
2275   if (isA64RangeCheck)
2276     OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2277   else
2278     OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2279 
2280   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2281     Record *R = RV[i];
2282 
2283     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2284     if (k != OpNone)
2285       continue;
2286 
2287     std::string name = R->getValueAsString("Name");
2288     std::string Proto = R->getValueAsString("Prototype");
2289     std::string Types = R->getValueAsString("Types");
2290     std::string Rename = name + "@" + Proto;
2291 
2292     // Functions with 'a' (the splat code) in the type prototype should not get
2293     // their own builtin as they use the non-splat variant.
2294     if (Proto.find('a') != std::string::npos)
2295       continue;
2296 
2297     // Functions which do not have an immediate do not need to have range
2298     // checking code emitted.
2299     size_t immPos = Proto.find('i');
2300     if (immPos == std::string::npos)
2301       continue;
2302 
2303     SmallVector<StringRef, 16> TypeVec;
2304     ParseTypes(R, Types, TypeVec);
2305 
2306     if (R->getSuperClasses().size() < 2)
2307       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2308 
2309     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2310 
2311     // Do not include AArch64 range checks if not generating code for AArch64.
2312     bool isA64 = R->getValueAsBit("isA64");
2313     if (!isA64RangeCheck && isA64)
2314       continue;
2315 
2316     // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2317     // redefined by AArch64 to handle new types.
2318     if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2319       ClassKind &A64CK = A64IntrinsicMap[Rename];
2320       if (A64CK == ck && ck != ClassNone)
2321         continue;
2322     }
2323 
2324     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2325       std::string namestr, shiftstr, rangestr;
2326 
2327       if (R->getValueAsBit("isVCVT_N")) {
2328         // VCVT between floating- and fixed-point values takes an immediate
2329         // in the range [1, 32] for f32, or [1, 64] for f64.
2330         ck = ClassB;
2331         if (name.find("32") != std::string::npos)
2332           rangestr = "l = 1; u = 31"; // upper bound = l + u
2333         else if (name.find("64") != std::string::npos)
2334           rangestr = "l = 1; u = 63";
2335         else
2336           PrintFatalError(R->getLoc(),
2337               "Fixed point convert name should contains \"32\" or \"64\"");
2338       } else if (Proto.find('s') == std::string::npos) {
2339         // Builtins which are overloaded by type will need to have their upper
2340         // bound computed at Sema time based on the type constant.
2341         ck = ClassB;
2342         if (R->getValueAsBit("isShift")) {
2343           shiftstr = ", true";
2344 
2345           // Right shifts have an 'r' in the name, left shifts do not.
2346           if (name.find('r') != std::string::npos)
2347             rangestr = "l = 1; ";
2348         }
2349         rangestr += "u = RFT(TV" + shiftstr + ")";
2350       } else {
2351         // The immediate generally refers to a lane in the preceding argument.
2352         assert(immPos > 0 && "unexpected immediate operand");
2353         rangestr =
2354             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2355       }
2356       // Make sure cases appear only once by uniquing them in a string map.
2357       namestr = MangleName(name, TypeVec[ti], ck);
2358       if (EmittedMap.count(namestr))
2359         continue;
2360       EmittedMap[namestr] = OpNone;
2361 
2362       // Calculate the index of the immediate that should be range checked.
2363       unsigned immidx = 0;
2364 
2365       // Builtins that return a struct of multiple vectors have an extra
2366       // leading arg for the struct return.
2367       if (Proto[0] >= '2' && Proto[0] <= '4')
2368         ++immidx;
2369 
2370       // Add one to the index for each argument until we reach the immediate
2371       // to be checked.  Structs of vectors are passed as multiple arguments.
2372       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2373         switch (Proto[ii]) {
2374         default:
2375           immidx += 1;
2376           break;
2377         case '2':
2378           immidx += 2;
2379           break;
2380         case '3':
2381           immidx += 3;
2382           break;
2383         case '4':
2384           immidx += 4;
2385           break;
2386         case 'i':
2387           ie = ii + 1;
2388           break;
2389         }
2390       }
2391       if (isA64RangeCheck)
2392         OS << "case AArch64::BI__builtin_neon_";
2393       else
2394         OS << "case ARM::BI__builtin_neon_";
2395       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2396          << rangestr << "; break;\n";
2397     }
2398   }
2399   OS << "#endif\n\n";
2400 }
2401 
2402 /// Generate the ARM and AArch64 overloaded type checking code for
2403 /// SemaChecking.cpp, checking for unique builtin declarations.
2404 void
2405 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2406                                       StringMap<ClassKind> &A64IntrinsicMap,
2407                                       bool isA64TypeCheck) {
2408   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2409   StringMap<OpKind> EmittedMap;
2410 
2411   // Generate the overloaded type checking code for SemaChecking.cpp
2412   if (isA64TypeCheck)
2413     OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2414   else
2415     OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2416 
2417   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2418     Record *R = RV[i];
2419     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2420     if (k != OpNone)
2421       continue;
2422 
2423     std::string Proto = R->getValueAsString("Prototype");
2424     std::string Types = R->getValueAsString("Types");
2425     std::string name = R->getValueAsString("Name");
2426     std::string Rename = name + "@" + Proto;
2427 
2428     // Functions with 'a' (the splat code) in the type prototype should not get
2429     // their own builtin as they use the non-splat variant.
2430     if (Proto.find('a') != std::string::npos)
2431       continue;
2432 
2433     // Functions which have a scalar argument cannot be overloaded, no need to
2434     // check them if we are emitting the type checking code.
2435     if (Proto.find('s') != std::string::npos)
2436       continue;
2437 
2438     SmallVector<StringRef, 16> TypeVec;
2439     ParseTypes(R, Types, TypeVec);
2440 
2441     if (R->getSuperClasses().size() < 2)
2442       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2443 
2444     // Do not include AArch64 type checks if not generating code for AArch64.
2445     bool isA64 = R->getValueAsBit("isA64");
2446     if (!isA64TypeCheck && isA64)
2447       continue;
2448 
2449     // Include ARM  type check in AArch64 but only if ARM intrinsics
2450     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2451     // redefined in AArch64 to handle an additional 2 x f64 type.
2452     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2453     if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2454       ClassKind &A64CK = A64IntrinsicMap[Rename];
2455       if (A64CK == ck && ck != ClassNone)
2456         continue;
2457     }
2458 
2459     int si = -1, qi = -1;
2460     uint64_t mask = 0, qmask = 0;
2461     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2462       // Generate the switch case(s) for this builtin for the type validation.
2463       bool quad = false, poly = false, usgn = false;
2464       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2465 
2466       if (quad) {
2467         qi = ti;
2468         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2469       } else {
2470         si = ti;
2471         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2472       }
2473     }
2474 
2475     // Check if the builtin function has a pointer or const pointer argument.
2476     int PtrArgNum = -1;
2477     bool HasConstPtr = false;
2478     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2479       char ArgType = Proto[arg];
2480       if (ArgType == 'c') {
2481         HasConstPtr = true;
2482         PtrArgNum = arg - 1;
2483         break;
2484       }
2485       if (ArgType == 'p') {
2486         PtrArgNum = arg - 1;
2487         break;
2488       }
2489     }
2490     // For sret builtins, adjust the pointer argument index.
2491     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2492       PtrArgNum += 1;
2493 
2494     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2495     // and vst1_lane intrinsics.  Using a pointer to the vector element
2496     // type with one of those operations causes codegen to select an aligned
2497     // load/store instruction.  If you want an unaligned operation,
2498     // the pointer argument needs to have less alignment than element type,
2499     // so just accept any pointer type.
2500     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2501       PtrArgNum = -1;
2502       HasConstPtr = false;
2503     }
2504 
2505     if (mask) {
2506       if (isA64TypeCheck)
2507         OS << "case AArch64::BI__builtin_neon_";
2508       else
2509         OS << "case ARM::BI__builtin_neon_";
2510       OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2511          << "0x" << utohexstr(mask) << "ULL";
2512       if (PtrArgNum >= 0)
2513         OS << "; PtrArgNum = " << PtrArgNum;
2514       if (HasConstPtr)
2515         OS << "; HasConstPtr = true";
2516       OS << "; break;\n";
2517     }
2518     if (qmask) {
2519       if (isA64TypeCheck)
2520         OS << "case AArch64::BI__builtin_neon_";
2521       else
2522         OS << "case ARM::BI__builtin_neon_";
2523       OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2524          << "0x" << utohexstr(qmask) << "ULL";
2525       if (PtrArgNum >= 0)
2526         OS << "; PtrArgNum = " << PtrArgNum;
2527       if (HasConstPtr)
2528         OS << "; HasConstPtr = true";
2529       OS << "; break;\n";
2530     }
2531   }
2532   OS << "#endif\n\n";
2533 }
2534 
2535 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2536 /// declaration of builtins, checking for unique builtin declarations.
2537 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2538                                  StringMap<ClassKind> &A64IntrinsicMap,
2539                                  bool isA64GenBuiltinDef) {
2540   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2541   StringMap<OpKind> EmittedMap;
2542 
2543   // Generate BuiltinsARM.def and BuiltinsAArch64.def
2544   if (isA64GenBuiltinDef)
2545     OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2546   else
2547     OS << "#ifdef GET_NEON_BUILTINS\n";
2548 
2549   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2550     Record *R = RV[i];
2551     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2552     if (k != OpNone)
2553       continue;
2554 
2555     std::string Proto = R->getValueAsString("Prototype");
2556     std::string name = R->getValueAsString("Name");
2557     std::string Rename = name + "@" + Proto;
2558 
2559     // Functions with 'a' (the splat code) in the type prototype should not get
2560     // their own builtin as they use the non-splat variant.
2561     if (Proto.find('a') != std::string::npos)
2562       continue;
2563 
2564     std::string Types = R->getValueAsString("Types");
2565     SmallVector<StringRef, 16> TypeVec;
2566     ParseTypes(R, Types, TypeVec);
2567 
2568     if (R->getSuperClasses().size() < 2)
2569       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2570 
2571     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2572 
2573     // Do not include AArch64 BUILTIN() macros if not generating
2574     // code for AArch64
2575     bool isA64 = R->getValueAsBit("isA64");
2576     if (!isA64GenBuiltinDef && isA64)
2577       continue;
2578 
2579     // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2580     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2581     // redefined in AArch64 to handle an additional 2 x f64 type.
2582     if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2583       ClassKind &A64CK = A64IntrinsicMap[Rename];
2584       if (A64CK == ck && ck != ClassNone)
2585         continue;
2586     }
2587 
2588     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2589       // Generate the declaration for this builtin, ensuring
2590       // that each unique BUILTIN() macro appears only once in the output
2591       // stream.
2592       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2593       if (EmittedMap.count(bd))
2594         continue;
2595 
2596       EmittedMap[bd] = OpNone;
2597       OS << bd << "\n";
2598     }
2599   }
2600   OS << "#endif\n\n";
2601 }
2602 
2603 /// runHeader - Emit a file with sections defining:
2604 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2605 /// 2. the SemaChecking code for the type overload checking.
2606 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2607 void NeonEmitter::runHeader(raw_ostream &OS) {
2608   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2609 
2610   // build a map of AArch64 intriniscs to be used in uniqueness checks.
2611   StringMap<ClassKind> A64IntrinsicMap;
2612   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2613     Record *R = RV[i];
2614 
2615     bool isA64 = R->getValueAsBit("isA64");
2616     if (!isA64)
2617       continue;
2618 
2619     ClassKind CK = ClassNone;
2620     if (R->getSuperClasses().size() >= 2)
2621       CK = ClassMap[R->getSuperClasses()[1]];
2622 
2623     std::string Name = R->getValueAsString("Name");
2624     std::string Proto = R->getValueAsString("Prototype");
2625     std::string Rename = Name + "@" + Proto;
2626     if (A64IntrinsicMap.count(Rename))
2627       continue;
2628     A64IntrinsicMap[Rename] = CK;
2629   }
2630 
2631   // Generate BuiltinsARM.def for ARM
2632   genBuiltinsDef(OS, A64IntrinsicMap, false);
2633 
2634   // Generate BuiltinsAArch64.def for AArch64
2635   genBuiltinsDef(OS, A64IntrinsicMap, true);
2636 
2637   // Generate ARM overloaded type checking code for SemaChecking.cpp
2638   genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2639 
2640   // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2641   genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2642 
2643   // Generate ARM range checking code for shift/lane immediates.
2644   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2645 
2646   // Generate the AArch64 range checking code for shift/lane immediates.
2647   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2648 }
2649 
2650 /// GenTest - Write out a test for the intrinsic specified by the name and
2651 /// type strings, including the embedded patterns for FileCheck to match.
2652 static std::string GenTest(const std::string &name,
2653                            const std::string &proto,
2654                            StringRef outTypeStr, StringRef inTypeStr,
2655                            bool isShift, bool isHiddenLOp,
2656                            ClassKind ck, const std::string &InstName,
2657 						   bool isA64,
2658 						   std::string & testFuncProto) {
2659   assert(!proto.empty() && "");
2660   std::string s;
2661 
2662   // Function name with type suffix
2663   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2664   if (outTypeStr != inTypeStr) {
2665     // If the input type is different (e.g., for vreinterpret), append a suffix
2666     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2667     // does not insert another "q" in the name.
2668     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2669     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2670     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2671   }
2672 
2673   // todo: GenerateChecksForIntrinsic does not generate CHECK
2674   // for aarch64 instructions yet
2675   std::vector<std::string> FileCheckPatterns;
2676   if (!isA64) {
2677 	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2678 							   isHiddenLOp, FileCheckPatterns);
2679 	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2680   }
2681   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2682 
2683   // Emit the FileCheck patterns.
2684   // If for any reason we do not want to emit a check, mangledInst
2685   // will be the empty string.
2686   if (FileCheckPatterns.size()) {
2687     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2688                                                   e = FileCheckPatterns.end();
2689          i != e;
2690          ++i) {
2691       s += "// CHECK_ARM: " + *i + "\n";
2692     }
2693   }
2694 
2695   // Emit the start of the test function.
2696 
2697   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2698   char arg = 'a';
2699   std::string comma;
2700   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2701     // Do not create arguments for values that must be immediate constants.
2702     if (proto[i] == 'i')
2703       continue;
2704     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2705     testFuncProto.push_back(arg);
2706     comma = ", ";
2707   }
2708   testFuncProto += ")";
2709 
2710   s+= testFuncProto;
2711   s+= " {\n  ";
2712 
2713   if (proto[0] != 'v')
2714     s += "return ";
2715   s += mangledName + "(";
2716   arg = 'a';
2717   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2718     if (proto[i] == 'i') {
2719       // For immediate operands, test the maximum value.
2720       if (isShift)
2721         s += "1"; // FIXME
2722       else
2723         // The immediate generally refers to a lane in the preceding argument.
2724         s += utostr(RangeFromType(proto[i-1], inTypeStr));
2725     } else {
2726       s.push_back(arg);
2727     }
2728     if ((i + 1) < e)
2729       s += ", ";
2730   }
2731   s += ");\n}\n\n";
2732   return s;
2733 }
2734 
2735 /// Write out all intrinsic tests for the specified target, checking
2736 /// for intrinsic test uniqueness.
2737 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2738                                 bool isA64GenTest) {
2739   if (isA64GenTest)
2740 	OS << "#ifdef __aarch64__\n";
2741 
2742   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2743   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2744     Record *R = RV[i];
2745     std::string name = R->getValueAsString("Name");
2746     std::string Proto = R->getValueAsString("Prototype");
2747     std::string Types = R->getValueAsString("Types");
2748     bool isShift = R->getValueAsBit("isShift");
2749     std::string InstName = R->getValueAsString("InstName");
2750     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2751     bool isA64 = R->getValueAsBit("isA64");
2752 
2753     // do not include AArch64 intrinsic test if not generating
2754     // code for AArch64
2755     if (!isA64GenTest && isA64)
2756       continue;
2757 
2758     SmallVector<StringRef, 16> TypeVec;
2759     ParseTypes(R, Types, TypeVec);
2760 
2761     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2762     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2763     if (kind == OpUnavailable)
2764       continue;
2765     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2766       if (kind == OpReinterpret) {
2767         bool outQuad = false;
2768         bool dummy = false;
2769         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2770         for (unsigned srcti = 0, srcte = TypeVec.size();
2771              srcti != srcte; ++srcti) {
2772           bool inQuad = false;
2773           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2774           if (srcti == ti || inQuad != outQuad)
2775             continue;
2776 		  std::string testFuncProto;
2777           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2778                                   isShift, isHiddenLOp, ck, InstName, isA64,
2779 								  testFuncProto);
2780           if (EmittedMap.count(testFuncProto))
2781             continue;
2782           EmittedMap[testFuncProto] = kind;
2783           OS << s << "\n";
2784         }
2785       } else {
2786 		std::string testFuncProto;
2787         std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2788                                 isHiddenLOp, ck, InstName, isA64, testFuncProto);
2789         if (EmittedMap.count(testFuncProto))
2790           continue;
2791         EmittedMap[testFuncProto] = kind;
2792         OS << s << "\n";
2793       }
2794     }
2795   }
2796 
2797   if (isA64GenTest)
2798 	OS << "#endif\n";
2799 }
2800 /// runTests - Write out a complete set of tests for all of the Neon
2801 /// intrinsics.
2802 void NeonEmitter::runTests(raw_ostream &OS) {
2803   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2804         "apcs-gnu\\\n"
2805         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2806         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
2807 		"\n"
2808 	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2809 	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
2810 	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2811         "\n"
2812         "// REQUIRES: long_tests\n"
2813         "\n"
2814         "#include <arm_neon.h>\n"
2815         "\n";
2816 
2817   // ARM tests must be emitted before AArch64 tests to ensure
2818   // tests for intrinsics that are common to ARM and AArch64
2819   // appear only once in the output stream.
2820   // The check for uniqueness is done in genTargetTest.
2821   StringMap<OpKind> EmittedMap;
2822 
2823   genTargetTest(OS, EmittedMap, false);
2824 
2825   genTargetTest(OS, EmittedMap, true);
2826 }
2827 
2828 namespace clang {
2829 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2830   NeonEmitter(Records).run(OS);
2831 }
2832 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2833   NeonEmitter(Records).runHeader(OS);
2834 }
2835 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2836   NeonEmitter(Records).runTests(OS);
2837 }
2838 } // End namespace clang
2839