1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface.  See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors.  Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.  A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37 
38 enum OpKind {
39   OpNone,
40   OpUnavailable,
41   OpAdd,
42   OpAddl,
43   OpAddlHi,
44   OpAddw,
45   OpAddwHi,
46   OpSub,
47   OpSubl,
48   OpSublHi,
49   OpSubw,
50   OpSubwHi,
51   OpMul,
52   OpMla,
53   OpMlal,
54   OpMullHi,
55   OpMlalHi,
56   OpMls,
57   OpMlsl,
58   OpMlslHi,
59   OpMulN,
60   OpMlaN,
61   OpMlsN,
62   OpMlalN,
63   OpMlslN,
64   OpMulLane,
65   OpMulXLane,
66   OpMullLane,
67   OpMullHiLane,
68   OpMlaLane,
69   OpMlsLane,
70   OpMlalLane,
71   OpMlalHiLane,
72   OpMlslLane,
73   OpMlslHiLane,
74   OpQDMullLane,
75   OpQDMullHiLane,
76   OpQDMlalLane,
77   OpQDMlalHiLane,
78   OpQDMlslLane,
79   OpQDMlslHiLane,
80   OpQDMulhLane,
81   OpQRDMulhLane,
82   OpFMSLane,
83   OpFMSLaneQ,
84   OpEq,
85   OpGe,
86   OpLe,
87   OpGt,
88   OpLt,
89   OpNeg,
90   OpNot,
91   OpAnd,
92   OpOr,
93   OpXor,
94   OpAndNot,
95   OpOrNot,
96   OpCast,
97   OpConcat,
98   OpDup,
99   OpDupLane,
100   OpHi,
101   OpLo,
102   OpSelect,
103   OpRev16,
104   OpRev32,
105   OpRev64,
106   OpReinterpret,
107   OpAddhnHi,
108   OpRAddhnHi,
109   OpSubhnHi,
110   OpRSubhnHi,
111   OpAbdl,
112   OpAbdlHi,
113   OpAba,
114   OpAbal,
115   OpAbalHi,
116   OpQDMullHi,
117   OpQDMlalHi,
118   OpQDMlslHi,
119   OpDiv,
120   OpLongHi,
121   OpNarrowHi,
122   OpMovlHi,
123   OpCopy
124 };
125 
126 enum ClassKind {
127   ClassNone,
128   ClassI,           // generic integer instruction, e.g., "i8" suffix
129   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
130   ClassW,           // width-specific instruction, e.g., "8" suffix
131   ClassB,           // bitcast arguments with enum argument to specify type
132   ClassL,           // Logical instructions which are op instructions
133                     // but we need to not emit any suffix for in our
134                     // tests.
135   ClassNoTest       // Instructions which we do not test since they are
136                     // not TRUE instructions.
137 };
138 
139 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
140 /// builtins.  These must be kept in sync with the flags in
141 /// include/clang/Basic/TargetBuiltins.h.
142 namespace {
143 class NeonTypeFlags {
144   enum {
145     EltTypeMask = 0xf,
146     UnsignedFlag = 0x10,
147     QuadFlag = 0x20
148   };
149   uint32_t Flags;
150 
151 public:
152   enum EltType {
153     Int8,
154     Int16,
155     Int32,
156     Int64,
157     Poly8,
158     Poly16,
159     Float16,
160     Float32,
161     Float64
162   };
163 
164   NeonTypeFlags(unsigned F) : Flags(F) {}
165   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
166     if (IsUnsigned)
167       Flags |= UnsignedFlag;
168     if (IsQuad)
169       Flags |= QuadFlag;
170   }
171 
172   uint32_t getFlags() const { return Flags; }
173 };
174 } // end anonymous namespace
175 
176 namespace {
177 class NeonEmitter {
178   RecordKeeper &Records;
179   StringMap<OpKind> OpMap;
180   DenseMap<Record*, ClassKind> ClassMap;
181 
182 public:
183   NeonEmitter(RecordKeeper &R) : Records(R) {
184     OpMap["OP_NONE"]  = OpNone;
185     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
186     OpMap["OP_ADD"]   = OpAdd;
187     OpMap["OP_ADDL"]  = OpAddl;
188     OpMap["OP_ADDLHi"] = OpAddlHi;
189     OpMap["OP_ADDW"]  = OpAddw;
190     OpMap["OP_ADDWHi"] = OpAddwHi;
191     OpMap["OP_SUB"]   = OpSub;
192     OpMap["OP_SUBL"]  = OpSubl;
193     OpMap["OP_SUBLHi"] = OpSublHi;
194     OpMap["OP_SUBW"]  = OpSubw;
195     OpMap["OP_SUBWHi"] = OpSubwHi;
196     OpMap["OP_MUL"]   = OpMul;
197     OpMap["OP_MLA"]   = OpMla;
198     OpMap["OP_MLAL"]  = OpMlal;
199     OpMap["OP_MULLHi"]  = OpMullHi;
200     OpMap["OP_MLALHi"]  = OpMlalHi;
201     OpMap["OP_MLS"]   = OpMls;
202     OpMap["OP_MLSL"]  = OpMlsl;
203     OpMap["OP_MLSLHi"] = OpMlslHi;
204     OpMap["OP_MUL_N"] = OpMulN;
205     OpMap["OP_MLA_N"] = OpMlaN;
206     OpMap["OP_MLS_N"] = OpMlsN;
207     OpMap["OP_MLAL_N"] = OpMlalN;
208     OpMap["OP_MLSL_N"] = OpMlslN;
209     OpMap["OP_MUL_LN"]= OpMulLane;
210     OpMap["OP_MULX_LN"]= OpMulXLane;
211     OpMap["OP_MULL_LN"] = OpMullLane;
212     OpMap["OP_MULLHi_LN"] = OpMullHiLane;
213     OpMap["OP_MLA_LN"]= OpMlaLane;
214     OpMap["OP_MLS_LN"]= OpMlsLane;
215     OpMap["OP_MLAL_LN"] = OpMlalLane;
216     OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
217     OpMap["OP_MLSL_LN"] = OpMlslLane;
218     OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
219     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
220     OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
221     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
222     OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
223     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
224     OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
225     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
226     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
227     OpMap["OP_FMS_LN"] = OpFMSLane;
228     OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
229     OpMap["OP_EQ"]    = OpEq;
230     OpMap["OP_GE"]    = OpGe;
231     OpMap["OP_LE"]    = OpLe;
232     OpMap["OP_GT"]    = OpGt;
233     OpMap["OP_LT"]    = OpLt;
234     OpMap["OP_NEG"]   = OpNeg;
235     OpMap["OP_NOT"]   = OpNot;
236     OpMap["OP_AND"]   = OpAnd;
237     OpMap["OP_OR"]    = OpOr;
238     OpMap["OP_XOR"]   = OpXor;
239     OpMap["OP_ANDN"]  = OpAndNot;
240     OpMap["OP_ORN"]   = OpOrNot;
241     OpMap["OP_CAST"]  = OpCast;
242     OpMap["OP_CONC"]  = OpConcat;
243     OpMap["OP_HI"]    = OpHi;
244     OpMap["OP_LO"]    = OpLo;
245     OpMap["OP_DUP"]   = OpDup;
246     OpMap["OP_DUP_LN"] = OpDupLane;
247     OpMap["OP_SEL"]   = OpSelect;
248     OpMap["OP_REV16"] = OpRev16;
249     OpMap["OP_REV32"] = OpRev32;
250     OpMap["OP_REV64"] = OpRev64;
251     OpMap["OP_REINT"] = OpReinterpret;
252     OpMap["OP_ADDHNHi"] = OpAddhnHi;
253     OpMap["OP_RADDHNHi"] = OpRAddhnHi;
254     OpMap["OP_SUBHNHi"] = OpSubhnHi;
255     OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
256     OpMap["OP_ABDL"]  = OpAbdl;
257     OpMap["OP_ABDLHi"] = OpAbdlHi;
258     OpMap["OP_ABA"]   = OpAba;
259     OpMap["OP_ABAL"]  = OpAbal;
260     OpMap["OP_ABALHi"] = OpAbalHi;
261     OpMap["OP_QDMULLHi"] = OpQDMullHi;
262     OpMap["OP_QDMLALHi"] = OpQDMlalHi;
263     OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
264     OpMap["OP_DIV"] = OpDiv;
265     OpMap["OP_LONG_HI"] = OpLongHi;
266     OpMap["OP_NARROW_HI"] = OpNarrowHi;
267     OpMap["OP_MOVL_HI"] = OpMovlHi;
268     OpMap["OP_COPY"] = OpCopy;
269 
270     Record *SI = R.getClass("SInst");
271     Record *II = R.getClass("IInst");
272     Record *WI = R.getClass("WInst");
273     Record *SOpI = R.getClass("SOpInst");
274     Record *IOpI = R.getClass("IOpInst");
275     Record *WOpI = R.getClass("WOpInst");
276     Record *LOpI = R.getClass("LOpInst");
277     Record *NoTestOpI = R.getClass("NoTestOpInst");
278 
279     ClassMap[SI] = ClassS;
280     ClassMap[II] = ClassI;
281     ClassMap[WI] = ClassW;
282     ClassMap[SOpI] = ClassS;
283     ClassMap[IOpI] = ClassI;
284     ClassMap[WOpI] = ClassW;
285     ClassMap[LOpI] = ClassL;
286     ClassMap[NoTestOpI] = ClassNoTest;
287   }
288 
289   // run - Emit arm_neon.h.inc
290   void run(raw_ostream &o);
291 
292   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
293   void runHeader(raw_ostream &o);
294 
295   // runTests - Emit tests for all the Neon intrinsics.
296   void runTests(raw_ostream &o);
297 
298 private:
299   void emitIntrinsic(raw_ostream &OS, Record *R,
300                      StringMap<ClassKind> &EmittedMap);
301   void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
302                       bool isA64GenBuiltinDef);
303   void genOverloadTypeCheckCode(raw_ostream &OS,
304                                 StringMap<ClassKind> &A64IntrinsicMap,
305                                 bool isA64TypeCheck);
306   void genIntrinsicRangeCheckCode(raw_ostream &OS,
307                                   StringMap<ClassKind> &A64IntrinsicMap,
308                                   bool isA64RangeCheck);
309   void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
310                      bool isA64TestGen);
311 };
312 } // end anonymous namespace
313 
314 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
315 /// which each StringRef representing a single type declared in the string.
316 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
317 /// 2xfloat and 4xfloat respectively.
318 static void ParseTypes(Record *r, std::string &s,
319                        SmallVectorImpl<StringRef> &TV) {
320   const char *data = s.data();
321   int len = 0;
322 
323   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
324     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
325                          || data[len] == 'H' || data[len] == 'S')
326       continue;
327 
328     switch (data[len]) {
329       case 'c':
330       case 's':
331       case 'i':
332       case 'l':
333       case 'h':
334       case 'f':
335       case 'd':
336         break;
337       default:
338         PrintFatalError(r->getLoc(),
339                       "Unexpected letter: " + std::string(data + len, 1));
340     }
341     TV.push_back(StringRef(data, len + 1));
342     data += len + 1;
343     len = -1;
344   }
345 }
346 
347 /// Widen - Convert a type code into the next wider type.  char -> short,
348 /// short -> int, etc.
349 static char Widen(const char t) {
350   switch (t) {
351     case 'c':
352       return 's';
353     case 's':
354       return 'i';
355     case 'i':
356       return 'l';
357     case 'h':
358       return 'f';
359     default:
360       PrintFatalError("unhandled type in widen!");
361   }
362 }
363 
364 /// Narrow - Convert a type code into the next smaller type.  short -> char,
365 /// float -> half float, etc.
366 static char Narrow(const char t) {
367   switch (t) {
368     case 's':
369       return 'c';
370     case 'i':
371       return 's';
372     case 'l':
373       return 'i';
374     case 'f':
375       return 'h';
376     default:
377       PrintFatalError("unhandled type in narrow!");
378   }
379 }
380 
381 static std::string GetNarrowTypestr(StringRef ty)
382 {
383   std::string s;
384   for (size_t i = 0, end = ty.size(); i < end; i++) {
385     switch (ty[i]) {
386       case 's':
387         s += 'c';
388         break;
389       case 'i':
390         s += 's';
391         break;
392       case 'l':
393         s += 'i';
394         break;
395       default:
396         s += ty[i];
397         break;
398     }
399   }
400 
401   return s;
402 }
403 
404 /// For a particular StringRef, return the base type code, and whether it has
405 /// the quad-vector, polynomial, or unsigned modifiers set.
406 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
407   unsigned off = 0;
408   // ignore scalar.
409   if (ty[off] == 'S') {
410     ++off;
411   }
412   // remember quad.
413   if (ty[off] == 'Q' || ty[off] == 'H') {
414     quad = true;
415     ++off;
416   }
417 
418   // remember poly.
419   if (ty[off] == 'P') {
420     poly = true;
421     ++off;
422   }
423 
424   // remember unsigned.
425   if (ty[off] == 'U') {
426     usgn = true;
427     ++off;
428   }
429 
430   // base type to get the type string for.
431   return ty[off];
432 }
433 
434 /// ModType - Transform a type code and its modifiers based on a mod code. The
435 /// mod code definitions may be found at the top of arm_neon.td.
436 static char ModType(const char mod, char type, bool &quad, bool &poly,
437                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
438   switch (mod) {
439     case 't':
440       if (poly) {
441         poly = false;
442         usgn = true;
443       }
444       break;
445     case 'u':
446       usgn = true;
447       poly = false;
448       if (type == 'f')
449         type = 'i';
450       if (type == 'd')
451         type = 'l';
452       break;
453     case 'x':
454       usgn = false;
455       poly = false;
456       if (type == 'f')
457         type = 'i';
458       if (type == 'd')
459         type = 'l';
460       break;
461     case 'o':
462       scal = true;
463       type = 'd';
464       usgn = false;
465       break;
466     case 'y':
467       scal = true;
468     case 'f':
469       if (type == 'h')
470         quad = true;
471       type = 'f';
472       usgn = false;
473       break;
474     case 'g':
475       quad = false;
476       break;
477     case 'j':
478       quad = true;
479       break;
480     case 'w':
481       type = Widen(type);
482       quad = true;
483       break;
484     case 'n':
485       type = Widen(type);
486       break;
487     case 'i':
488       type = 'i';
489       scal = true;
490       break;
491     case 'l':
492       type = 'l';
493       scal = true;
494       usgn = true;
495       break;
496     case 'z':
497       type = Narrow(type);
498       scal = true;
499       break;
500     case 'r':
501       type = Widen(type);
502       scal = true;
503       break;
504     case 's':
505     case 'a':
506       scal = true;
507       break;
508     case 'k':
509       quad = true;
510       break;
511     case 'c':
512       cnst = true;
513     case 'p':
514       pntr = true;
515       scal = true;
516       break;
517     case 'h':
518       type = Narrow(type);
519       if (type == 'h')
520         quad = false;
521       break;
522     case 'q':
523       type = Narrow(type);
524       quad = true;
525       break;
526     case 'e':
527       type = Narrow(type);
528       usgn = true;
529       break;
530     case 'm':
531       type = Narrow(type);
532       quad = false;
533       break;
534     default:
535       break;
536   }
537   return type;
538 }
539 
540 /// TypeString - for a modifier and type, generate the name of the typedef for
541 /// that type.  QUc -> uint8x8_t.
542 static std::string TypeString(const char mod, StringRef typestr) {
543   bool quad = false;
544   bool poly = false;
545   bool usgn = false;
546   bool scal = false;
547   bool cnst = false;
548   bool pntr = false;
549 
550   if (mod == 'v')
551     return "void";
552   if (mod == 'i')
553     return "int";
554 
555   // base type to get the type string for.
556   char type = ClassifyType(typestr, quad, poly, usgn);
557 
558   // Based on the modifying character, change the type and width if necessary.
559   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
560 
561   SmallString<128> s;
562 
563   if (usgn)
564     s.push_back('u');
565 
566   switch (type) {
567     case 'c':
568       s += poly ? "poly8" : "int8";
569       if (scal)
570         break;
571       s += quad ? "x16" : "x8";
572       break;
573     case 's':
574       s += poly ? "poly16" : "int16";
575       if (scal)
576         break;
577       s += quad ? "x8" : "x4";
578       break;
579     case 'i':
580       s += "int32";
581       if (scal)
582         break;
583       s += quad ? "x4" : "x2";
584       break;
585     case 'l':
586       s += "int64";
587       if (scal)
588         break;
589       s += quad ? "x2" : "x1";
590       break;
591     case 'h':
592       s += "float16";
593       if (scal)
594         break;
595       s += quad ? "x8" : "x4";
596       break;
597     case 'f':
598       s += "float32";
599       if (scal)
600         break;
601       s += quad ? "x4" : "x2";
602       break;
603     case 'd':
604       s += "float64";
605       if (scal)
606         break;
607       s += quad ? "x2" : "x1";
608       break;
609 
610     default:
611       PrintFatalError("unhandled type!");
612   }
613 
614   if (mod == '2')
615     s += "x2";
616   if (mod == '3')
617     s += "x3";
618   if (mod == '4')
619     s += "x4";
620 
621   // Append _t, finishing the type string typedef type.
622   s += "_t";
623 
624   if (cnst)
625     s += " const";
626 
627   if (pntr)
628     s += " *";
629 
630   return s.str();
631 }
632 
633 /// BuiltinTypeString - for a modifier and type, generate the clang
634 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
635 /// Builtins.def for a description of the type strings.
636 static std::string BuiltinTypeString(const char mod, StringRef typestr,
637                                      ClassKind ck, bool ret) {
638   bool quad = false;
639   bool poly = false;
640   bool usgn = false;
641   bool scal = false;
642   bool cnst = false;
643   bool pntr = false;
644 
645   if (mod == 'v')
646     return "v"; // void
647   if (mod == 'i')
648     return "i"; // int
649 
650   // base type to get the type string for.
651   char type = ClassifyType(typestr, quad, poly, usgn);
652 
653   // Based on the modifying character, change the type and width if necessary.
654   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
655 
656   // All pointers are void* pointers.  Change type to 'v' now.
657   if (pntr) {
658     usgn = false;
659     poly = false;
660     type = 'v';
661   }
662   // Treat half-float ('h') types as unsigned short ('s') types.
663   if (type == 'h') {
664     type = 's';
665     usgn = true;
666   }
667   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
668                          scal && type != 'f' && type != 'd');
669 
670   if (scal) {
671     SmallString<128> s;
672 
673     if (usgn)
674       s.push_back('U');
675     else if (type == 'c')
676       s.push_back('S'); // make chars explicitly signed
677 
678     if (type == 'l') // 64-bit long
679       s += "LLi";
680     else
681       s.push_back(type);
682 
683     if (cnst)
684       s.push_back('C');
685     if (pntr)
686       s.push_back('*');
687     return s.str();
688   }
689 
690   // Since the return value must be one type, return a vector type of the
691   // appropriate width which we will bitcast.  An exception is made for
692   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
693   // fashion, storing them to a pointer arg.
694   if (ret) {
695     if (mod >= '2' && mod <= '4')
696       return "vv*"; // void result with void* first argument
697     if (mod == 'f' || (ck != ClassB && type == 'f'))
698       return quad ? "V4f" : "V2f";
699     if (ck != ClassB && type == 'd')
700       return quad ? "V2d" : "V1d";
701     if (ck != ClassB && type == 's')
702       return quad ? "V8s" : "V4s";
703     if (ck != ClassB && type == 'i')
704       return quad ? "V4i" : "V2i";
705     if (ck != ClassB && type == 'l')
706       return quad ? "V2LLi" : "V1LLi";
707 
708     return quad ? "V16Sc" : "V8Sc";
709   }
710 
711   // Non-return array types are passed as individual vectors.
712   if (mod == '2')
713     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
714   if (mod == '3')
715     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
716   if (mod == '4')
717     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
718 
719   if (mod == 'f' || (ck != ClassB && type == 'f'))
720     return quad ? "V4f" : "V2f";
721   if (ck != ClassB && type == 'd')
722     return quad ? "V2d" : "V1d";
723   if (ck != ClassB && type == 's')
724     return quad ? "V8s" : "V4s";
725   if (ck != ClassB && type == 'i')
726     return quad ? "V4i" : "V2i";
727   if (ck != ClassB && type == 'l')
728     return quad ? "V2LLi" : "V1LLi";
729 
730   return quad ? "V16Sc" : "V8Sc";
731 }
732 
733 /// InstructionTypeCode - Computes the ARM argument character code and
734 /// quad status for a specific type string and ClassKind.
735 static void InstructionTypeCode(const StringRef &typeStr,
736                                 const ClassKind ck,
737                                 bool &quad,
738                                 std::string &typeCode) {
739   bool poly = false;
740   bool usgn = false;
741   char type = ClassifyType(typeStr, quad, poly, usgn);
742 
743   switch (type) {
744   case 'c':
745     switch (ck) {
746     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
747     case ClassI: typeCode = "i8"; break;
748     case ClassW: typeCode = "8"; break;
749     default: break;
750     }
751     break;
752   case 's':
753     switch (ck) {
754     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
755     case ClassI: typeCode = "i16"; break;
756     case ClassW: typeCode = "16"; break;
757     default: break;
758     }
759     break;
760   case 'i':
761     switch (ck) {
762     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
763     case ClassI: typeCode = "i32"; break;
764     case ClassW: typeCode = "32"; break;
765     default: break;
766     }
767     break;
768   case 'l':
769     switch (ck) {
770     case ClassS: typeCode = usgn ? "u64" : "s64"; break;
771     case ClassI: typeCode = "i64"; break;
772     case ClassW: typeCode = "64"; break;
773     default: break;
774     }
775     break;
776   case 'h':
777     switch (ck) {
778     case ClassS:
779     case ClassI: typeCode = "f16"; break;
780     case ClassW: typeCode = "16"; break;
781     default: break;
782     }
783     break;
784   case 'f':
785     switch (ck) {
786     case ClassS:
787     case ClassI: typeCode = "f32"; break;
788     case ClassW: typeCode = "32"; break;
789     default: break;
790     }
791     break;
792   case 'd':
793     switch (ck) {
794     case ClassS:
795     case ClassI:
796       typeCode += "f64";
797       break;
798     case ClassW:
799       PrintFatalError("unhandled type!");
800     default:
801       break;
802     }
803     break;
804   default:
805     PrintFatalError("unhandled type!");
806   }
807 }
808 
809 static char Insert_BHSD_Suffix(StringRef typestr){
810   unsigned off = 0;
811   if(typestr[off++] == 'S'){
812     while(typestr[off] == 'Q' || typestr[off] == 'H'||
813           typestr[off] == 'P' || typestr[off] == 'U')
814       ++off;
815     switch (typestr[off]){
816     default  : break;
817     case 'c' : return 'b';
818     case 's' : return 'h';
819     case 'i' :
820     case 'f' : return 's';
821     case 'l' :
822     case 'd' : return 'd';
823     }
824   }
825   return 0;
826 }
827 
828 /// MangleName - Append a type or width suffix to a base neon function name,
829 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
830 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
831 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
832 static std::string MangleName(const std::string &name, StringRef typestr,
833                               ClassKind ck) {
834   if (name == "vcvt_f32_f16")
835     return name;
836 
837   bool quad = false;
838   std::string typeCode = "";
839 
840   InstructionTypeCode(typestr, ck, quad, typeCode);
841 
842   std::string s = name;
843 
844   if (typeCode.size() > 0) {
845     s += "_" + typeCode;
846   }
847 
848   if (ck == ClassB)
849     s += "_v";
850 
851   // Insert a 'q' before the first '_' character so that it ends up before
852   // _lane or _n on vector-scalar operations.
853   if (typestr.find("Q") != StringRef::npos) {
854       size_t pos = s.find('_');
855       s = s.insert(pos, "q");
856   }
857   char ins = Insert_BHSD_Suffix(typestr);
858   if(ins){
859     size_t pos = s.find('_');
860     s = s.insert(pos, &ins, 1);
861   }
862 
863   return s;
864 }
865 
866 static void PreprocessInstruction(const StringRef &Name,
867                                   const std::string &InstName,
868                                   std::string &Prefix,
869                                   bool &HasNPostfix,
870                                   bool &HasLanePostfix,
871                                   bool &HasDupPostfix,
872                                   bool &IsSpecialVCvt,
873                                   size_t &TBNumber) {
874   // All of our instruction name fields from arm_neon.td are of the form
875   //   <instructionname>_...
876   // Thus we grab our instruction name via computation of said Prefix.
877   const size_t PrefixEnd = Name.find_first_of('_');
878   // If InstName is passed in, we use that instead of our name Prefix.
879   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
880 
881   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
882 
883   HasNPostfix = Postfix.count("_n");
884   HasLanePostfix = Postfix.count("_lane");
885   HasDupPostfix = Postfix.count("_dup");
886   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
887 
888   if (InstName.compare("vtbl") == 0 ||
889       InstName.compare("vtbx") == 0) {
890     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
891     // encoding to get its true value.
892     TBNumber = Name[Name.size()-1] - 48;
893   }
894 }
895 
896 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
897 /// extracted, generate a FileCheck pattern for a Load Or Store
898 static void
899 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
900                                           const std::string& OutTypeCode,
901                                           const bool &IsQuad,
902                                           const bool &HasDupPostfix,
903                                           const bool &HasLanePostfix,
904                                           const size_t Count,
905                                           std::string &RegisterSuffix) {
906   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
907   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
908   // will output a series of v{ld,st}1s, so we have to handle it specially.
909   if ((Count == 3 || Count == 4) && IsQuad) {
910     RegisterSuffix += "{";
911     for (size_t i = 0; i < Count; i++) {
912       RegisterSuffix += "d{{[0-9]+}}";
913       if (HasDupPostfix) {
914         RegisterSuffix += "[]";
915       }
916       if (HasLanePostfix) {
917         RegisterSuffix += "[{{[0-9]+}}]";
918       }
919       if (i < Count-1) {
920         RegisterSuffix += ", ";
921       }
922     }
923     RegisterSuffix += "}";
924   } else {
925 
926     // Handle normal loads and stores.
927     RegisterSuffix += "{";
928     for (size_t i = 0; i < Count; i++) {
929       RegisterSuffix += "d{{[0-9]+}}";
930       if (HasDupPostfix) {
931         RegisterSuffix += "[]";
932       }
933       if (HasLanePostfix) {
934         RegisterSuffix += "[{{[0-9]+}}]";
935       }
936       if (IsQuad && !HasLanePostfix) {
937         RegisterSuffix += ", d{{[0-9]+}}";
938         if (HasDupPostfix) {
939           RegisterSuffix += "[]";
940         }
941       }
942       if (i < Count-1) {
943         RegisterSuffix += ", ";
944       }
945     }
946     RegisterSuffix += "}, [r{{[0-9]+}}";
947 
948     // We only include the alignment hint if we have a vld1.*64 or
949     // a dup/lane instruction.
950     if (IsLDSTOne) {
951       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
952         RegisterSuffix += ":" + OutTypeCode;
953       }
954     }
955 
956     RegisterSuffix += "]";
957   }
958 }
959 
960 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
961                                      const bool &HasNPostfix) {
962   return (NameRef.count("vmla") ||
963           NameRef.count("vmlal") ||
964           NameRef.count("vmlsl") ||
965           NameRef.count("vmull") ||
966           NameRef.count("vqdmlal") ||
967           NameRef.count("vqdmlsl") ||
968           NameRef.count("vqdmulh") ||
969           NameRef.count("vqdmull") ||
970           NameRef.count("vqrdmulh")) && HasNPostfix;
971 }
972 
973 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
974                                          const bool &HasLanePostfix) {
975   return (NameRef.count("vmla") ||
976           NameRef.count("vmls") ||
977           NameRef.count("vmlal") ||
978           NameRef.count("vmlsl") ||
979           (NameRef.count("vmul") && NameRef.size() == 3)||
980           NameRef.count("vqdmlal") ||
981           NameRef.count("vqdmlsl") ||
982           NameRef.count("vqdmulh") ||
983           NameRef.count("vqrdmulh")) && HasLanePostfix;
984 }
985 
986 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
987                                   const bool &HasLanePostfix,
988                                   const bool &IsQuad) {
989   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
990                                && IsQuad;
991   const bool IsVMull = NameRef.count("mull") && !IsQuad;
992   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
993 }
994 
995 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
996                                                      const std::string &Proto,
997                                                      const bool &HasNPostfix,
998                                                      const bool &IsQuad,
999                                                      const bool &HasLanePostfix,
1000                                                      const bool &HasDupPostfix,
1001                                                      std::string &NormedProto) {
1002   // Handle generic case.
1003   const StringRef NameRef(Name);
1004   for (size_t i = 0, end = Proto.size(); i < end; i++) {
1005     switch (Proto[i]) {
1006     case 'u':
1007     case 'f':
1008     case 'd':
1009     case 's':
1010     case 'x':
1011     case 't':
1012     case 'n':
1013       NormedProto += IsQuad? 'q' : 'd';
1014       break;
1015     case 'w':
1016     case 'k':
1017       NormedProto += 'q';
1018       break;
1019     case 'g':
1020     case 'j':
1021     case 'h':
1022     case 'e':
1023       NormedProto += 'd';
1024       break;
1025     case 'i':
1026       NormedProto += HasLanePostfix? 'a' : 'i';
1027       break;
1028     case 'a':
1029       if (HasLanePostfix) {
1030         NormedProto += 'a';
1031       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1032         NormedProto += IsQuad? 'q' : 'd';
1033       } else {
1034         NormedProto += 'i';
1035       }
1036       break;
1037     }
1038   }
1039 
1040   // Handle Special Cases.
1041   const bool IsNotVExt = !NameRef.count("vext");
1042   const bool IsVPADAL = NameRef.count("vpadal");
1043   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1044                                                            HasLanePostfix);
1045   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1046                                                       IsQuad);
1047 
1048   if (IsSpecialLaneMul) {
1049     // If
1050     NormedProto[2] = NormedProto[3];
1051     NormedProto.erase(3);
1052   } else if (NormedProto.size() == 4 &&
1053              NormedProto[0] == NormedProto[1] &&
1054              IsNotVExt) {
1055     // If NormedProto.size() == 4 and the first two proto characters are the
1056     // same, ignore the first.
1057     NormedProto = NormedProto.substr(1, 3);
1058   } else if (Is5OpLaneAccum) {
1059     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1060     std::string tmp = NormedProto.substr(1,2);
1061     tmp += NormedProto[4];
1062     NormedProto = tmp;
1063   } else if (IsVPADAL) {
1064     // If we have VPADAL, ignore the first character.
1065     NormedProto = NormedProto.substr(0, 2);
1066   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1067     // If our instruction is a dup instruction, keep only the first and
1068     // last characters.
1069     std::string tmp = "";
1070     tmp += NormedProto[0];
1071     tmp += NormedProto[NormedProto.size()-1];
1072     NormedProto = tmp;
1073   }
1074 }
1075 
1076 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1077 /// extracted, generate a FileCheck pattern to check that an
1078 /// instruction's arguments are correct.
1079 static void GenerateRegisterCheckPattern(const std::string &Name,
1080                                          const std::string &Proto,
1081                                          const std::string &OutTypeCode,
1082                                          const bool &HasNPostfix,
1083                                          const bool &IsQuad,
1084                                          const bool &HasLanePostfix,
1085                                          const bool &HasDupPostfix,
1086                                          const size_t &TBNumber,
1087                                          std::string &RegisterSuffix) {
1088 
1089   RegisterSuffix = "";
1090 
1091   const StringRef NameRef(Name);
1092   const StringRef ProtoRef(Proto);
1093 
1094   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1095     return;
1096   }
1097 
1098   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1099   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1100 
1101   if (IsLoadStore) {
1102     // Grab N value from  v{ld,st}N using its ascii representation.
1103     const size_t Count = NameRef[3] - 48;
1104 
1105     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1106                                               HasDupPostfix, HasLanePostfix,
1107                                               Count, RegisterSuffix);
1108   } else if (IsTBXOrTBL) {
1109     RegisterSuffix += "d{{[0-9]+}}, {";
1110     for (size_t i = 0; i < TBNumber-1; i++) {
1111       RegisterSuffix += "d{{[0-9]+}}, ";
1112     }
1113     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1114   } else {
1115     // Handle a normal instruction.
1116     if (NameRef.count("vget") || NameRef.count("vset"))
1117       return;
1118 
1119     // We first normalize our proto, since we only need to emit 4
1120     // different types of checks, yet have more than 4 proto types
1121     // that map onto those 4 patterns.
1122     std::string NormalizedProto("");
1123     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1124                                              HasLanePostfix, HasDupPostfix,
1125                                              NormalizedProto);
1126 
1127     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1128       const char &c = NormalizedProto[i];
1129       switch (c) {
1130       case 'q':
1131         RegisterSuffix += "q{{[0-9]+}}, ";
1132         break;
1133 
1134       case 'd':
1135         RegisterSuffix += "d{{[0-9]+}}, ";
1136         break;
1137 
1138       case 'i':
1139         RegisterSuffix += "#{{[0-9]+}}, ";
1140         break;
1141 
1142       case 'a':
1143         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1144         break;
1145       }
1146     }
1147 
1148     // Remove extra ", ".
1149     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1150   }
1151 }
1152 
1153 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1154 /// typestr + class kind, generate the proper set of FileCheck
1155 /// Patterns to check for. We could just return a string, but instead
1156 /// use a vector since it provides us with the extra flexibility of
1157 /// emitting multiple checks, which comes in handy for certain cases
1158 /// like mla where we want to check for 2 different instructions.
1159 static void GenerateChecksForIntrinsic(const std::string &Name,
1160                                        const std::string &Proto,
1161                                        StringRef &OutTypeStr,
1162                                        StringRef &InTypeStr,
1163                                        ClassKind Ck,
1164                                        const std::string &InstName,
1165                                        bool IsHiddenLOp,
1166                                        std::vector<std::string>& Result) {
1167 
1168   // If Ck is a ClassNoTest instruction, just return so no test is
1169   // emitted.
1170   if(Ck == ClassNoTest)
1171     return;
1172 
1173   if (Name == "vcvt_f32_f16") {
1174     Result.push_back("vcvt.f32.f16");
1175     return;
1176   }
1177 
1178 
1179   // Now we preprocess our instruction given the data we have to get the
1180   // data that we need.
1181   // Create a StringRef for String Manipulation of our Name.
1182   const StringRef NameRef(Name);
1183   // Instruction Prefix.
1184   std::string Prefix;
1185   // The type code for our out type string.
1186   std::string OutTypeCode;
1187   // To handle our different cases, we need to check for different postfixes.
1188   // Is our instruction a quad instruction.
1189   bool IsQuad = false;
1190   // Our instruction is of the form <instructionname>_n.
1191   bool HasNPostfix = false;
1192   // Our instruction is of the form <instructionname>_lane.
1193   bool HasLanePostfix = false;
1194   // Our instruction is of the form <instructionname>_dup.
1195   bool HasDupPostfix  = false;
1196   // Our instruction is a vcvt instruction which requires special handling.
1197   bool IsSpecialVCvt = false;
1198   // If we have a vtbxN or vtblN instruction, this is set to N.
1199   size_t TBNumber = -1;
1200   // Register Suffix
1201   std::string RegisterSuffix;
1202 
1203   PreprocessInstruction(NameRef, InstName, Prefix,
1204                         HasNPostfix, HasLanePostfix, HasDupPostfix,
1205                         IsSpecialVCvt, TBNumber);
1206 
1207   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1208   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1209                                HasLanePostfix, HasDupPostfix, TBNumber,
1210                                RegisterSuffix);
1211 
1212   // In the following section, we handle a bunch of special cases. You can tell
1213   // a special case by the fact we are returning early.
1214 
1215   // If our instruction is a logical instruction without postfix or a
1216   // hidden LOp just return the current Prefix.
1217   if (Ck == ClassL || IsHiddenLOp) {
1218     Result.push_back(Prefix + " " + RegisterSuffix);
1219     return;
1220   }
1221 
1222   // If we have a vmov, due to the many different cases, some of which
1223   // vary within the different intrinsics generated for a single
1224   // instruction type, just output a vmov. (e.g. given an instruction
1225   // A, A.u32 might be vmov and A.u8 might be vmov.8).
1226   //
1227   // FIXME: Maybe something can be done about this. The two cases that we care
1228   // about are vmov as an LType and vmov as a WType.
1229   if (Prefix == "vmov") {
1230     Result.push_back(Prefix + " " + RegisterSuffix);
1231     return;
1232   }
1233 
1234   // In the following section, we handle special cases.
1235 
1236   if (OutTypeCode == "64") {
1237     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1238     // type, the intrinsic will be optimized away, so just return
1239     // nothing.  On the other hand if we are handling an uint64x2_t
1240     // (i.e. quad instruction), vdup/vmov instructions should be
1241     // emitted.
1242     if (Prefix == "vdup" || Prefix == "vext") {
1243       if (IsQuad) {
1244         Result.push_back("{{vmov|vdup}}");
1245       }
1246       return;
1247     }
1248 
1249     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1250     // multiple register operands.
1251     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1252                             || Prefix == "vld4";
1253     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1254                             || Prefix == "vst4";
1255     if (MultiLoadPrefix || MultiStorePrefix) {
1256       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1257       return;
1258     }
1259 
1260     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1261     // emitting said instructions. So return a check for
1262     // vldr/vstr/vmov/str instead.
1263     if (HasLanePostfix || HasDupPostfix) {
1264       if (Prefix == "vst1") {
1265         Result.push_back("{{str|vstr|vmov}}");
1266         return;
1267       } else if (Prefix == "vld1") {
1268         Result.push_back("{{ldr|vldr|vmov}}");
1269         return;
1270       }
1271     }
1272   }
1273 
1274   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1275   // sometimes disassembled as vtrn.32. We use a regex to handle both
1276   // cases.
1277   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1278     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1279     return;
1280   }
1281 
1282   // Currently on most ARM processors, we do not use vmla/vmls for
1283   // quad floating point operations. Instead we output vmul + vadd. So
1284   // check if we have one of those instructions and just output a
1285   // check for vmul.
1286   if (OutTypeCode == "f32") {
1287     if (Prefix == "vmls") {
1288       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1289       Result.push_back("vsub." + OutTypeCode);
1290       return;
1291     } else if (Prefix == "vmla") {
1292       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1293       Result.push_back("vadd." + OutTypeCode);
1294       return;
1295     }
1296   }
1297 
1298   // If we have vcvt, get the input type from the instruction name
1299   // (which should be of the form instname_inputtype) and append it
1300   // before the output type.
1301   if (Prefix == "vcvt") {
1302     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1303     Prefix += "." + inTypeCode;
1304   }
1305 
1306   // Append output type code to get our final mangled instruction.
1307   Prefix += "." + OutTypeCode;
1308 
1309   Result.push_back(Prefix + " " + RegisterSuffix);
1310 }
1311 
1312 /// UseMacro - Examine the prototype string to determine if the intrinsic
1313 /// should be defined as a preprocessor macro instead of an inline function.
1314 static bool UseMacro(const std::string &proto) {
1315   // If this builtin takes an immediate argument, we need to #define it rather
1316   // than use a standard declaration, so that SemaChecking can range check
1317   // the immediate passed by the user.
1318   if (proto.find('i') != std::string::npos)
1319     return true;
1320 
1321   // Pointer arguments need to use macros to avoid hiding aligned attributes
1322   // from the pointer type.
1323   if (proto.find('p') != std::string::npos ||
1324       proto.find('c') != std::string::npos)
1325     return true;
1326 
1327   return false;
1328 }
1329 
1330 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1331 /// defined as a macro should be accessed directly instead of being first
1332 /// assigned to a local temporary.
1333 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1334   // True for constant ints (i), pointers (p) and const pointers (c).
1335   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1336 }
1337 
1338 // Generate the string "(argtype a, argtype b, ...)"
1339 static std::string GenArgs(const std::string &proto, StringRef typestr,
1340                            const std::string &name) {
1341   bool define = UseMacro(proto);
1342   char arg = 'a';
1343 
1344   std::string s;
1345   s += "(";
1346 
1347   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1348     if (define) {
1349       // Some macro arguments are used directly instead of being assigned
1350       // to local temporaries; prepend an underscore prefix to make their
1351       // names consistent with the local temporaries.
1352       if (MacroArgUsedDirectly(proto, i))
1353         s += "__";
1354     } else {
1355       s += TypeString(proto[i], typestr) + " __";
1356     }
1357     s.push_back(arg);
1358     //To avoid argument being multiple defined, add extra number for renaming.
1359     if (name == "vcopy_lane")
1360       s.push_back('1');
1361     if ((i + 1) < e)
1362       s += ", ";
1363   }
1364 
1365   s += ")";
1366   return s;
1367 }
1368 
1369 // Macro arguments are not type-checked like inline function arguments, so
1370 // assign them to local temporaries to get the right type checking.
1371 static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1372                                   const std::string &name ) {
1373   char arg = 'a';
1374   std::string s;
1375   bool generatedLocal = false;
1376 
1377   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1378     // Do not create a temporary for an immediate argument.
1379     // That would defeat the whole point of using a macro!
1380     if (MacroArgUsedDirectly(proto, i))
1381       continue;
1382     generatedLocal = true;
1383     bool extranumber = false;
1384     if(name == "vcopy_lane")
1385       extranumber = true;
1386 
1387     s += TypeString(proto[i], typestr) + " __";
1388     s.push_back(arg);
1389     if(extranumber)
1390       s.push_back('1');
1391     s += " = (";
1392     s.push_back(arg);
1393     if(extranumber)
1394       s.push_back('1');
1395     s += "); ";
1396   }
1397 
1398   if (generatedLocal)
1399     s += "\\\n  ";
1400   return s;
1401 }
1402 
1403 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1404 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1405   std::string s, high;
1406   high = h ? "_high" : "";
1407   s = MangleName("vmovl" + high, typestr, ClassS);
1408   s += "(" + a + ")";
1409   return s;
1410 }
1411 
1412 // Get the high 64-bit part of a vector
1413 static std::string GetHigh(const std::string &a, StringRef typestr) {
1414   std::string s;
1415   s = MangleName("vget_high", typestr, ClassS);
1416   s += "(" + a + ")";
1417   return s;
1418 }
1419 
1420 // Gen operation with two operands and get high 64-bit for both of two operands.
1421 static std::string Gen2OpWith2High(StringRef typestr,
1422                                    const std::string &op,
1423                                    const std::string &a,
1424                                    const std::string &b) {
1425   std::string s;
1426   std::string Op1 = GetHigh(a, typestr);
1427   std::string Op2 = GetHigh(b, typestr);
1428   s = MangleName(op, typestr, ClassS);
1429   s += "(" + Op1 + ", " + Op2 + ");";
1430   return s;
1431 }
1432 
1433 // Gen operation with three operands and get high 64-bit of the latter
1434 // two operands.
1435 static std::string Gen3OpWith2High(StringRef typestr,
1436                                    const std::string &op,
1437                                    const std::string &a,
1438                                    const std::string &b,
1439                                    const std::string &c) {
1440   std::string s;
1441   std::string Op1 = GetHigh(b, typestr);
1442   std::string Op2 = GetHigh(c, typestr);
1443   s = MangleName(op, typestr, ClassS);
1444   s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1445   return s;
1446 }
1447 
1448 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1449 static std::string GenCombine(std::string typestr,
1450                               const std::string &a,
1451                               const std::string &b) {
1452   std::string s;
1453   s = MangleName("vcombine", typestr, ClassS);
1454   s += "(" + a + ", " + b + ")";
1455   return s;
1456 }
1457 
1458 static std::string Duplicate(unsigned nElts, StringRef typestr,
1459                              const std::string &a) {
1460   std::string s;
1461 
1462   s = "(" + TypeString('d', typestr) + "){ ";
1463   for (unsigned i = 0; i != nElts; ++i) {
1464     s += a;
1465     if ((i + 1) < nElts)
1466       s += ", ";
1467   }
1468   s += " }";
1469 
1470   return s;
1471 }
1472 
1473 static std::string SplatLane(unsigned nElts, const std::string &vec,
1474                              const std::string &lane) {
1475   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1476   for (unsigned i = 0; i < nElts; ++i)
1477     s += ", " + lane;
1478   s += ")";
1479   return s;
1480 }
1481 
1482 static std::string RemoveHigh(const std::string &name) {
1483   std::string s = name;
1484   std::size_t found = s.find("_high_");
1485   if (found == std::string::npos)
1486     PrintFatalError("name should contain \"_high_\" for high intrinsics");
1487   s.replace(found, 5, "");
1488   return s;
1489 }
1490 
1491 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1492   quad = false;
1493   bool dummy = false;
1494   char type = ClassifyType(typestr, quad, dummy, dummy);
1495   unsigned nElts = 0;
1496   switch (type) {
1497   case 'c': nElts = 8; break;
1498   case 's': nElts = 4; break;
1499   case 'i': nElts = 2; break;
1500   case 'l': nElts = 1; break;
1501   case 'h': nElts = 4; break;
1502   case 'f': nElts = 2; break;
1503   case 'd':
1504     nElts = 1;
1505     break;
1506   default:
1507     PrintFatalError("unhandled type!");
1508   }
1509   if (quad) nElts <<= 1;
1510   return nElts;
1511 }
1512 
1513 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1514 static std::string GenOpString(const std::string &name, OpKind op,
1515                                const std::string &proto, StringRef typestr) {
1516   bool quad;
1517   unsigned nElts = GetNumElements(typestr, quad);
1518   bool define = UseMacro(proto);
1519 
1520   std::string ts = TypeString(proto[0], typestr);
1521   std::string s;
1522   if (!define) {
1523     s = "return ";
1524   }
1525 
1526   switch(op) {
1527   case OpAdd:
1528     s += "__a + __b;";
1529     break;
1530   case OpAddl:
1531     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1532     break;
1533   case OpAddlHi:
1534     s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1535     break;
1536   case OpAddw:
1537     s += "__a + " + Extend(typestr, "__b") + ";";
1538     break;
1539   case OpAddwHi:
1540     s += "__a + " + Extend(typestr, "__b", 1) + ";";
1541     break;
1542   case OpSub:
1543     s += "__a - __b;";
1544     break;
1545   case OpSubl:
1546     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1547     break;
1548   case OpSublHi:
1549     s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1550     break;
1551   case OpSubw:
1552     s += "__a - " + Extend(typestr, "__b") + ";";
1553     break;
1554   case OpSubwHi:
1555     s += "__a - " + Extend(typestr, "__b", 1) + ";";
1556     break;
1557   case OpMulN:
1558     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1559     break;
1560   case OpMulLane:
1561     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1562     break;
1563   case OpMulXLane:
1564     s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1565       SplatLane(nElts, "__b", "__c") + ");";
1566     break;
1567   case OpMul:
1568     s += "__a * __b;";
1569     break;
1570   case OpMullLane:
1571     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1572       SplatLane(nElts, "__b", "__c") + ");";
1573     break;
1574   case OpMullHiLane:
1575     s += MangleName("vmull", typestr, ClassS) + "(" +
1576       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1577     break;
1578   case OpMlaN:
1579     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1580     break;
1581   case OpMlaLane:
1582     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1583     break;
1584   case OpMla:
1585     s += "__a + (__b * __c);";
1586     break;
1587   case OpMlalN:
1588     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1589       Duplicate(nElts, typestr, "__c") + ");";
1590     break;
1591   case OpMlalLane:
1592     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1593       SplatLane(nElts, "__c", "__d") + ");";
1594     break;
1595   case OpMlalHiLane:
1596     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1597       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1598     break;
1599   case OpMlal:
1600     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1601     break;
1602   case OpMullHi:
1603     s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1604     break;
1605   case OpMlalHi:
1606     s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1607     break;
1608   case OpMlsN:
1609     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1610     break;
1611   case OpMlsLane:
1612     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1613     break;
1614   case OpFMSLane:
1615     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1616     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1617     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1618     s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1619     break;
1620   case OpFMSLaneQ:
1621     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1622     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1623     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1624     s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1625     break;
1626   case OpMls:
1627     s += "__a - (__b * __c);";
1628     break;
1629   case OpMlslN:
1630     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1631       Duplicate(nElts, typestr, "__c") + ");";
1632     break;
1633   case OpMlslLane:
1634     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1635       SplatLane(nElts, "__c", "__d") + ");";
1636     break;
1637   case OpMlslHiLane:
1638     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1639       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1640     break;
1641   case OpMlsl:
1642     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1643     break;
1644   case OpMlslHi:
1645     s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1646     break;
1647   case OpQDMullLane:
1648     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1649       SplatLane(nElts, "__b", "__c") + ");";
1650     break;
1651   case OpQDMullHiLane:
1652     s += MangleName("vqdmull", typestr, ClassS) + "(" +
1653       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1654     break;
1655   case OpQDMlalLane:
1656     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1657       SplatLane(nElts, "__c", "__d") + ");";
1658     break;
1659   case OpQDMlalHiLane:
1660     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1661       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1662     break;
1663   case OpQDMlslLane:
1664     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1665       SplatLane(nElts, "__c", "__d") + ");";
1666     break;
1667   case OpQDMlslHiLane:
1668     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1669       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1670     break;
1671   case OpQDMulhLane:
1672     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1673       SplatLane(nElts, "__b", "__c") + ");";
1674     break;
1675   case OpQRDMulhLane:
1676     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1677       SplatLane(nElts, "__b", "__c") + ");";
1678     break;
1679   case OpEq:
1680     s += "(" + ts + ")(__a == __b);";
1681     break;
1682   case OpGe:
1683     s += "(" + ts + ")(__a >= __b);";
1684     break;
1685   case OpLe:
1686     s += "(" + ts + ")(__a <= __b);";
1687     break;
1688   case OpGt:
1689     s += "(" + ts + ")(__a > __b);";
1690     break;
1691   case OpLt:
1692     s += "(" + ts + ")(__a < __b);";
1693     break;
1694   case OpNeg:
1695     s += " -__a;";
1696     break;
1697   case OpNot:
1698     s += " ~__a;";
1699     break;
1700   case OpAnd:
1701     s += "__a & __b;";
1702     break;
1703   case OpOr:
1704     s += "__a | __b;";
1705     break;
1706   case OpXor:
1707     s += "__a ^ __b;";
1708     break;
1709   case OpAndNot:
1710     s += "__a & ~__b;";
1711     break;
1712   case OpOrNot:
1713     s += "__a | ~__b;";
1714     break;
1715   case OpCast:
1716     s += "(" + ts + ")__a;";
1717     break;
1718   case OpConcat:
1719     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1720     s += ", (int64x1_t)__b, 0, 1);";
1721     break;
1722   case OpHi:
1723     // nElts is for the result vector, so the source is twice that number.
1724     s += "__builtin_shufflevector(__a, __a";
1725     for (unsigned i = nElts; i < nElts * 2; ++i)
1726       s += ", " + utostr(i);
1727     s+= ");";
1728     break;
1729   case OpLo:
1730     s += "__builtin_shufflevector(__a, __a";
1731     for (unsigned i = 0; i < nElts; ++i)
1732       s += ", " + utostr(i);
1733     s+= ");";
1734     break;
1735   case OpDup:
1736     s += Duplicate(nElts, typestr, "__a") + ";";
1737     break;
1738   case OpDupLane:
1739     s += SplatLane(nElts, "__a", "__b") + ";";
1740     break;
1741   case OpSelect:
1742     // ((0 & 1) | (~0 & 2))
1743     s += "(" + ts + ")";
1744     ts = TypeString(proto[1], typestr);
1745     s += "((__a & (" + ts + ")__b) | ";
1746     s += "(~__a & (" + ts + ")__c));";
1747     break;
1748   case OpRev16:
1749     s += "__builtin_shufflevector(__a, __a";
1750     for (unsigned i = 2; i <= nElts; i += 2)
1751       for (unsigned j = 0; j != 2; ++j)
1752         s += ", " + utostr(i - j - 1);
1753     s += ");";
1754     break;
1755   case OpRev32: {
1756     unsigned WordElts = nElts >> (1 + (int)quad);
1757     s += "__builtin_shufflevector(__a, __a";
1758     for (unsigned i = WordElts; i <= nElts; i += WordElts)
1759       for (unsigned j = 0; j != WordElts; ++j)
1760         s += ", " + utostr(i - j - 1);
1761     s += ");";
1762     break;
1763   }
1764   case OpRev64: {
1765     unsigned DblWordElts = nElts >> (int)quad;
1766     s += "__builtin_shufflevector(__a, __a";
1767     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1768       for (unsigned j = 0; j != DblWordElts; ++j)
1769         s += ", " + utostr(i - j - 1);
1770     s += ");";
1771     break;
1772   }
1773   case OpAbdl: {
1774     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1775     if (typestr[0] != 'U') {
1776       // vabd results are always unsigned and must be zero-extended.
1777       std::string utype = "U" + typestr.str();
1778       s += "(" + TypeString(proto[0], typestr) + ")";
1779       abd = "(" + TypeString('d', utype) + ")" + abd;
1780       s += Extend(utype, abd) + ";";
1781     } else {
1782       s += Extend(typestr, abd) + ";";
1783     }
1784     break;
1785   }
1786   case OpAbdlHi:
1787     s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1788     break;
1789   case OpAddhnHi: {
1790     std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1791     s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1792     s += ";";
1793     break;
1794   }
1795   case OpRAddhnHi: {
1796     std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1797     s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1798     s += ";";
1799     break;
1800   }
1801   case OpSubhnHi: {
1802     std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1803     s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1804     s += ";";
1805     break;
1806   }
1807   case OpRSubhnHi: {
1808     std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1809     s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1810     s += ";";
1811     break;
1812   }
1813   case OpAba:
1814     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1815     break;
1816   case OpAbal:
1817     s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1818     break;
1819   case OpAbalHi:
1820     s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1821     break;
1822   case OpQDMullHi:
1823     s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1824     break;
1825   case OpQDMlalHi:
1826     s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1827     break;
1828   case OpQDMlslHi:
1829     s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
1830     break;
1831   case OpDiv:
1832     s += "__a / __b;";
1833     break;
1834   case OpMovlHi: {
1835     s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1836         MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
1837     s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
1838     s += "(__a1, 0);";
1839     break;
1840   }
1841   case OpLongHi: {
1842     // Another local variable __a1 is needed for calling a Macro,
1843     // or using __a will have naming conflict when Macro expanding.
1844     s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1845          MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
1846     s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
1847          "(__a1, __b);";
1848     break;
1849   }
1850   case OpNarrowHi: {
1851     s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
1852          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
1853     break;
1854   }
1855   case OpCopy: {
1856     s += TypeString('s', typestr) + " __c2 = " +
1857          MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
1858          MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
1859     break;
1860   }
1861   default:
1862     PrintFatalError("unknown OpKind!");
1863   }
1864   return s;
1865 }
1866 
1867 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1868   unsigned mod = proto[0];
1869 
1870   if (mod == 'v' || mod == 'f')
1871     mod = proto[1];
1872 
1873   bool quad = false;
1874   bool poly = false;
1875   bool usgn = false;
1876   bool scal = false;
1877   bool cnst = false;
1878   bool pntr = false;
1879 
1880   // Base type to get the type string for.
1881   char type = ClassifyType(typestr, quad, poly, usgn);
1882 
1883   // Based on the modifying character, change the type and width if necessary.
1884   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1885 
1886   NeonTypeFlags::EltType ET;
1887   switch (type) {
1888     case 'c':
1889       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1890       break;
1891     case 's':
1892       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1893       break;
1894     case 'i':
1895       ET = NeonTypeFlags::Int32;
1896       break;
1897     case 'l':
1898       ET = NeonTypeFlags::Int64;
1899       break;
1900     case 'h':
1901       ET = NeonTypeFlags::Float16;
1902       break;
1903     case 'f':
1904       ET = NeonTypeFlags::Float32;
1905       break;
1906     case 'd':
1907       ET = NeonTypeFlags::Float64;
1908       break;
1909     default:
1910       PrintFatalError("unhandled type!");
1911   }
1912   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1913   return Flags.getFlags();
1914 }
1915 
1916 static bool ProtoHasScalar(const std::string proto)
1917 {
1918   return (proto.find('s') != std::string::npos
1919           || proto.find('r') != std::string::npos);
1920 }
1921 
1922 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1923 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1924                               StringRef typestr, ClassKind ck) {
1925   std::string s;
1926 
1927   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1928   // sret-like argument.
1929   bool sret = (proto[0] >= '2' && proto[0] <= '4');
1930 
1931   bool define = UseMacro(proto);
1932 
1933   // Check if the prototype has a scalar operand with the type of the vector
1934   // elements.  If not, bitcasting the args will take care of arg checking.
1935   // The actual signedness etc. will be taken care of with special enums.
1936   if (!ProtoHasScalar(proto))
1937     ck = ClassB;
1938 
1939   if (proto[0] != 'v') {
1940     std::string ts = TypeString(proto[0], typestr);
1941 
1942     if (define) {
1943       if (sret)
1944         s += ts + " r; ";
1945       else
1946         s += "(" + ts + ")";
1947     } else if (sret) {
1948       s += ts + " r; ";
1949     } else {
1950       s += "return (" + ts + ")";
1951     }
1952   }
1953 
1954   bool splat = proto.find('a') != std::string::npos;
1955 
1956   s += "__builtin_neon_";
1957   if (splat) {
1958     // Call the non-splat builtin: chop off the "_n" suffix from the name.
1959     std::string vname(name, 0, name.size()-2);
1960     s += MangleName(vname, typestr, ck);
1961   } else {
1962     s += MangleName(name, typestr, ck);
1963   }
1964   s += "(";
1965 
1966   // Pass the address of the return variable as the first argument to sret-like
1967   // builtins.
1968   if (sret)
1969     s += "&r, ";
1970 
1971   char arg = 'a';
1972   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1973     std::string args = std::string(&arg, 1);
1974 
1975     // Use the local temporaries instead of the macro arguments.
1976     args = "__" + args;
1977 
1978     bool argQuad = false;
1979     bool argPoly = false;
1980     bool argUsgn = false;
1981     bool argScalar = false;
1982     bool dummy = false;
1983     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1984     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1985                       dummy, dummy);
1986 
1987     // Handle multiple-vector values specially, emitting each subvector as an
1988     // argument to the __builtin.
1989     if (proto[i] >= '2' && proto[i] <= '4') {
1990       // Check if an explicit cast is needed.
1991       if (argType != 'c' || argPoly || argUsgn)
1992         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1993 
1994       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1995         s += args + ".val[" + utostr(vi) + "]";
1996         if ((vi + 1) < ve)
1997           s += ", ";
1998       }
1999       if ((i + 1) < e)
2000         s += ", ";
2001 
2002       continue;
2003     }
2004 
2005     if (splat && (i + 1) == e)
2006       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2007 
2008     // Check if an explicit cast is needed.
2009     if ((splat || !argScalar) &&
2010         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2011       std::string argTypeStr = "c";
2012       if (ck != ClassB)
2013         argTypeStr = argType;
2014       if (argQuad)
2015         argTypeStr = "Q" + argTypeStr;
2016       args = "(" + TypeString('d', argTypeStr) + ")" + args;
2017     }
2018 
2019     s += args;
2020     if ((i + 1) < e)
2021       s += ", ";
2022   }
2023 
2024   // Extra constant integer to hold type class enum for this function, e.g. s8
2025   if (ck == ClassB)
2026     s += ", " + utostr(GetNeonEnum(proto, typestr));
2027 
2028   s += ");";
2029 
2030   if (proto[0] != 'v' && sret) {
2031     if (define)
2032       s += " r;";
2033     else
2034       s += " return r;";
2035   }
2036   return s;
2037 }
2038 
2039 static std::string GenBuiltinDef(const std::string &name,
2040                                  const std::string &proto,
2041                                  StringRef typestr, ClassKind ck) {
2042   std::string s("BUILTIN(__builtin_neon_");
2043 
2044   // If all types are the same size, bitcasting the args will take care
2045   // of arg checking.  The actual signedness etc. will be taken care of with
2046   // special enums.
2047   if (!ProtoHasScalar(proto))
2048     ck = ClassB;
2049 
2050   s += MangleName(name, typestr, ck);
2051   s += ", \"";
2052 
2053   for (unsigned i = 0, e = proto.size(); i != e; ++i)
2054     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2055 
2056   // Extra constant integer to hold type class enum for this function, e.g. s8
2057   if (ck == ClassB)
2058     s += "i";
2059 
2060   s += "\", \"n\")";
2061   return s;
2062 }
2063 
2064 static std::string GenIntrinsic(const std::string &name,
2065                                 const std::string &proto,
2066                                 StringRef outTypeStr, StringRef inTypeStr,
2067                                 OpKind kind, ClassKind classKind) {
2068   assert(!proto.empty() && "");
2069   bool define = UseMacro(proto) && kind != OpUnavailable;
2070   std::string s;
2071 
2072   // static always inline + return type
2073   if (define)
2074     s += "#define ";
2075   else
2076     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2077 
2078   // Function name with type suffix
2079   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2080   if (outTypeStr != inTypeStr) {
2081     // If the input type is different (e.g., for vreinterpret), append a suffix
2082     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2083     // does not insert another "q" in the name.
2084     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2085     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2086     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2087   }
2088   s += mangledName;
2089 
2090   // Function arguments
2091   s += GenArgs(proto, inTypeStr, name);
2092 
2093   // Definition.
2094   if (define) {
2095     s += " __extension__ ({ \\\n  ";
2096     s += GenMacroLocals(proto, inTypeStr, name);
2097   } else if (kind == OpUnavailable) {
2098     s += " __attribute__((unavailable));\n";
2099     return s;
2100   } else
2101     s += " {\n  ";
2102 
2103   if (kind != OpNone)
2104     s += GenOpString(name, kind, proto, outTypeStr);
2105   else
2106     s += GenBuiltin(name, proto, outTypeStr, classKind);
2107   if (define)
2108     s += " })";
2109   else
2110     s += " }";
2111   s += "\n";
2112   return s;
2113 }
2114 
2115 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2116 /// is comprised of type definitions and function declarations.
2117 void NeonEmitter::run(raw_ostream &OS) {
2118   OS <<
2119     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2120     "---===\n"
2121     " *\n"
2122     " * Permission is hereby granted, free of charge, to any person obtaining "
2123     "a copy\n"
2124     " * of this software and associated documentation files (the \"Software\"),"
2125     " to deal\n"
2126     " * in the Software without restriction, including without limitation the "
2127     "rights\n"
2128     " * to use, copy, modify, merge, publish, distribute, sublicense, "
2129     "and/or sell\n"
2130     " * copies of the Software, and to permit persons to whom the Software is\n"
2131     " * furnished to do so, subject to the following conditions:\n"
2132     " *\n"
2133     " * The above copyright notice and this permission notice shall be "
2134     "included in\n"
2135     " * all copies or substantial portions of the Software.\n"
2136     " *\n"
2137     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2138     "EXPRESS OR\n"
2139     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2140     "MERCHANTABILITY,\n"
2141     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2142     "SHALL THE\n"
2143     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2144     "OTHER\n"
2145     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2146     "ARISING FROM,\n"
2147     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2148     "DEALINGS IN\n"
2149     " * THE SOFTWARE.\n"
2150     " *\n"
2151     " *===--------------------------------------------------------------------"
2152     "---===\n"
2153     " */\n\n";
2154 
2155   OS << "#ifndef __ARM_NEON_H\n";
2156   OS << "#define __ARM_NEON_H\n\n";
2157 
2158   OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
2159   OS << "#error \"NEON support not enabled\"\n";
2160   OS << "#endif\n\n";
2161 
2162   OS << "#include <stdint.h>\n\n";
2163 
2164   // Emit NEON-specific scalar typedefs.
2165   OS << "typedef float float32_t;\n";
2166   OS << "typedef __fp16 float16_t;\n";
2167 
2168   OS << "#ifdef __aarch64__\n";
2169   OS << "typedef double float64_t;\n";
2170   OS << "#endif\n\n";
2171 
2172   // For now, signedness of polynomial types depends on target
2173   OS << "#ifdef __aarch64__\n";
2174   OS << "typedef uint8_t poly8_t;\n";
2175   OS << "typedef uint16_t poly16_t;\n";
2176   OS << "#else\n";
2177   OS << "typedef int8_t poly8_t;\n";
2178   OS << "typedef int16_t poly16_t;\n";
2179   OS << "#endif\n";
2180 
2181   // Emit Neon vector typedefs.
2182   std::string TypedefTypes(
2183       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs");
2184   SmallVector<StringRef, 24> TDTypeVec;
2185   ParseTypes(0, TypedefTypes, TDTypeVec);
2186 
2187   // Emit vector typedefs.
2188   bool isA64 = false;
2189   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2190     bool dummy, quad = false, poly = false;
2191     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2192     bool preinsert = false;
2193     bool postinsert = false;
2194 
2195     if (type == 'd') {
2196       preinsert = isA64? false: true;
2197       isA64 = true;
2198     } else {
2199       postinsert = isA64? true: false;
2200       isA64 = false;
2201     }
2202     if (postinsert)
2203       OS << "#endif\n";
2204     if (preinsert)
2205       OS << "#ifdef __aarch64__\n";
2206 
2207     if (poly)
2208       OS << "typedef __attribute__((neon_polyvector_type(";
2209     else
2210       OS << "typedef __attribute__((neon_vector_type(";
2211 
2212     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2213     OS << utostr(nElts) << "))) ";
2214     if (nElts < 10)
2215       OS << " ";
2216 
2217     OS << TypeString('s', TDTypeVec[i]);
2218     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2219 
2220   }
2221   OS << "\n";
2222 
2223   // Emit struct typedefs.
2224   isA64 = false;
2225   for (unsigned vi = 2; vi != 5; ++vi) {
2226     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2227       bool dummy, quad = false, poly = false;
2228       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2229       bool preinsert = false;
2230       bool postinsert = false;
2231 
2232       if (type == 'd') {
2233         preinsert = isA64? false: true;
2234         isA64 = true;
2235       } else {
2236         postinsert = isA64? true: false;
2237         isA64 = false;
2238       }
2239       if (postinsert)
2240         OS << "#endif\n";
2241       if (preinsert)
2242         OS << "#ifdef __aarch64__\n";
2243 
2244       std::string ts = TypeString('d', TDTypeVec[i]);
2245       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2246       OS << "typedef struct " << vs << " {\n";
2247       OS << "  " << ts << " val";
2248       OS << "[" << utostr(vi) << "]";
2249       OS << ";\n} ";
2250       OS << vs << ";\n";
2251       OS << "\n";
2252     }
2253   }
2254 
2255   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2256 
2257   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2258 
2259   StringMap<ClassKind> EmittedMap;
2260 
2261   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2262   // intrinsics.  (Some of the saturating multiply instructions are also
2263   // used to implement the corresponding "_lane" variants, but tablegen
2264   // sorts the records into alphabetical order so that the "_lane" variants
2265   // come after the intrinsics they use.)
2266   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2267   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2268   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2269   emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2270 
2271   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2272   // common intrinsics appear only once in the output stream.
2273   // The check for uniquiness is done in emitIntrinsic.
2274   // Emit ARM intrinsics.
2275   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2276     Record *R = RV[i];
2277 
2278     // Skip AArch64 intrinsics; they will be emitted at the end.
2279     bool isA64 = R->getValueAsBit("isA64");
2280     if (isA64)
2281       continue;
2282 
2283     if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2284         R->getName() != "VABD")
2285       emitIntrinsic(OS, R, EmittedMap);
2286   }
2287 
2288   // Emit AArch64-specific intrinsics.
2289   OS << "#ifdef __aarch64__\n";
2290 
2291   emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2292   emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2293   emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2294 
2295   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2296     Record *R = RV[i];
2297 
2298     // Skip ARM intrinsics already included above.
2299     bool isA64 = R->getValueAsBit("isA64");
2300     if (!isA64)
2301       continue;
2302 
2303     emitIntrinsic(OS, R, EmittedMap);
2304   }
2305 
2306   OS << "#endif\n\n";
2307 
2308   OS << "#undef __ai\n\n";
2309   OS << "#endif /* __ARM_NEON_H */\n";
2310 }
2311 
2312 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2313 /// intrinsics specified by record R checking for intrinsic uniqueness.
2314 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2315                                 StringMap<ClassKind> &EmittedMap) {
2316   std::string name = R->getValueAsString("Name");
2317   std::string Proto = R->getValueAsString("Prototype");
2318   std::string Types = R->getValueAsString("Types");
2319 
2320   SmallVector<StringRef, 16> TypeVec;
2321   ParseTypes(R, Types, TypeVec);
2322 
2323   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2324 
2325   ClassKind classKind = ClassNone;
2326   if (R->getSuperClasses().size() >= 2)
2327     classKind = ClassMap[R->getSuperClasses()[1]];
2328   if (classKind == ClassNone && kind == OpNone)
2329     PrintFatalError(R->getLoc(), "Builtin has no class kind");
2330 
2331   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2332     if (kind == OpReinterpret) {
2333       bool outQuad = false;
2334       bool dummy = false;
2335       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2336       for (unsigned srcti = 0, srcte = TypeVec.size();
2337            srcti != srcte; ++srcti) {
2338         bool inQuad = false;
2339         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2340         if (srcti == ti || inQuad != outQuad)
2341           continue;
2342         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2343                                      OpCast, ClassS);
2344         if (EmittedMap.count(s))
2345           continue;
2346         EmittedMap[s] = ClassS;
2347         OS << s;
2348       }
2349     } else {
2350       std::string s =
2351           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2352       if (EmittedMap.count(s))
2353         continue;
2354       EmittedMap[s] = classKind;
2355       OS << s;
2356     }
2357   }
2358   OS << "\n";
2359 }
2360 
2361 static unsigned RangeFromType(const char mod, StringRef typestr) {
2362   // base type to get the type string for.
2363   bool quad = false, dummy = false;
2364   char type = ClassifyType(typestr, quad, dummy, dummy);
2365   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2366 
2367   switch (type) {
2368     case 'c':
2369       return (8 << (int)quad) - 1;
2370     case 'h':
2371     case 's':
2372       return (4 << (int)quad) - 1;
2373     case 'f':
2374     case 'i':
2375       return (2 << (int)quad) - 1;
2376     case 'd':
2377     case 'l':
2378       return (1 << (int)quad) - 1;
2379     default:
2380       PrintFatalError("unhandled type!");
2381   }
2382 }
2383 
2384 /// Generate the ARM and AArch64 intrinsic range checking code for
2385 /// shift/lane immediates, checking for unique declarations.
2386 void
2387 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2388                                         StringMap<ClassKind> &A64IntrinsicMap,
2389                                         bool isA64RangeCheck) {
2390   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2391   StringMap<OpKind> EmittedMap;
2392 
2393   // Generate the intrinsic range checking code for shift/lane immediates.
2394   if (isA64RangeCheck)
2395     OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2396   else
2397     OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2398 
2399   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2400     Record *R = RV[i];
2401 
2402     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2403     if (k != OpNone)
2404       continue;
2405 
2406     std::string name = R->getValueAsString("Name");
2407     std::string Proto = R->getValueAsString("Prototype");
2408     std::string Types = R->getValueAsString("Types");
2409     std::string Rename = name + "@" + Proto;
2410 
2411     // Functions with 'a' (the splat code) in the type prototype should not get
2412     // their own builtin as they use the non-splat variant.
2413     if (Proto.find('a') != std::string::npos)
2414       continue;
2415 
2416     // Functions which do not have an immediate do not need to have range
2417     // checking code emitted.
2418     size_t immPos = Proto.find('i');
2419     if (immPos == std::string::npos)
2420       continue;
2421 
2422     SmallVector<StringRef, 16> TypeVec;
2423     ParseTypes(R, Types, TypeVec);
2424 
2425     if (R->getSuperClasses().size() < 2)
2426       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2427 
2428     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2429 
2430     // Do not include AArch64 range checks if not generating code for AArch64.
2431     bool isA64 = R->getValueAsBit("isA64");
2432     if (!isA64RangeCheck && isA64)
2433       continue;
2434 
2435     // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2436     // redefined by AArch64 to handle new types.
2437     if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2438       ClassKind &A64CK = A64IntrinsicMap[Rename];
2439       if (A64CK == ck && ck != ClassNone)
2440         continue;
2441     }
2442 
2443     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2444       std::string namestr, shiftstr, rangestr;
2445 
2446       if (R->getValueAsBit("isVCVT_N")) {
2447         // VCVT between floating- and fixed-point values takes an immediate
2448         // in the range [1, 32] for f32, or [1, 64] for f64.
2449         ck = ClassB;
2450         if (name.find("32") != std::string::npos)
2451           rangestr = "l = 1; u = 31"; // upper bound = l + u
2452         else if (name.find("64") != std::string::npos)
2453           rangestr = "l = 1; u = 63";
2454         else
2455           PrintFatalError(R->getLoc(),
2456               "Fixed point convert name should contains \"32\" or \"64\"");
2457       } else if (!ProtoHasScalar(Proto)) {
2458         // Builtins which are overloaded by type will need to have their upper
2459         // bound computed at Sema time based on the type constant.
2460         ck = ClassB;
2461         if (R->getValueAsBit("isShift")) {
2462           shiftstr = ", true";
2463 
2464           // Right shifts have an 'r' in the name, left shifts do not.
2465           if (name.find('r') != std::string::npos)
2466             rangestr = "l = 1; ";
2467         }
2468         rangestr += "u = RFT(TV" + shiftstr + ")";
2469       } else {
2470         // The immediate generally refers to a lane in the preceding argument.
2471         assert(immPos > 0 && "unexpected immediate operand");
2472         rangestr =
2473             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2474       }
2475       // Make sure cases appear only once by uniquing them in a string map.
2476       namestr = MangleName(name, TypeVec[ti], ck);
2477       if (EmittedMap.count(namestr))
2478         continue;
2479       EmittedMap[namestr] = OpNone;
2480 
2481       // Calculate the index of the immediate that should be range checked.
2482       unsigned immidx = 0;
2483 
2484       // Builtins that return a struct of multiple vectors have an extra
2485       // leading arg for the struct return.
2486       if (Proto[0] >= '2' && Proto[0] <= '4')
2487         ++immidx;
2488 
2489       // Add one to the index for each argument until we reach the immediate
2490       // to be checked.  Structs of vectors are passed as multiple arguments.
2491       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2492         switch (Proto[ii]) {
2493         default:
2494           immidx += 1;
2495           break;
2496         case '2':
2497           immidx += 2;
2498           break;
2499         case '3':
2500           immidx += 3;
2501           break;
2502         case '4':
2503           immidx += 4;
2504           break;
2505         case 'i':
2506           ie = ii + 1;
2507           break;
2508         }
2509       }
2510       if (isA64RangeCheck)
2511         OS << "case AArch64::BI__builtin_neon_";
2512       else
2513         OS << "case ARM::BI__builtin_neon_";
2514       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2515          << rangestr << "; break;\n";
2516     }
2517   }
2518   OS << "#endif\n\n";
2519 }
2520 
2521 /// Generate the ARM and AArch64 overloaded type checking code for
2522 /// SemaChecking.cpp, checking for unique builtin declarations.
2523 void
2524 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2525                                       StringMap<ClassKind> &A64IntrinsicMap,
2526                                       bool isA64TypeCheck) {
2527   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2528   StringMap<OpKind> EmittedMap;
2529 
2530   // Generate the overloaded type checking code for SemaChecking.cpp
2531   if (isA64TypeCheck)
2532     OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2533   else
2534     OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2535 
2536   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2537     Record *R = RV[i];
2538     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2539     if (k != OpNone)
2540       continue;
2541 
2542     std::string Proto = R->getValueAsString("Prototype");
2543     std::string Types = R->getValueAsString("Types");
2544     std::string name = R->getValueAsString("Name");
2545     std::string Rename = name + "@" + Proto;
2546 
2547     // Functions with 'a' (the splat code) in the type prototype should not get
2548     // their own builtin as they use the non-splat variant.
2549     if (Proto.find('a') != std::string::npos)
2550       continue;
2551 
2552     // Functions which have a scalar argument cannot be overloaded, no need to
2553     // check them if we are emitting the type checking code.
2554     if (ProtoHasScalar(Proto))
2555       continue;
2556 
2557     SmallVector<StringRef, 16> TypeVec;
2558     ParseTypes(R, Types, TypeVec);
2559 
2560     if (R->getSuperClasses().size() < 2)
2561       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2562 
2563     // Do not include AArch64 type checks if not generating code for AArch64.
2564     bool isA64 = R->getValueAsBit("isA64");
2565     if (!isA64TypeCheck && isA64)
2566       continue;
2567 
2568     // Include ARM  type check in AArch64 but only if ARM intrinsics
2569     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2570     // redefined in AArch64 to handle an additional 2 x f64 type.
2571     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2572     if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2573       ClassKind &A64CK = A64IntrinsicMap[Rename];
2574       if (A64CK == ck && ck != ClassNone)
2575         continue;
2576     }
2577 
2578     int si = -1, qi = -1;
2579     uint64_t mask = 0, qmask = 0;
2580     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2581       // Generate the switch case(s) for this builtin for the type validation.
2582       bool quad = false, poly = false, usgn = false;
2583       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2584 
2585       if (quad) {
2586         qi = ti;
2587         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2588       } else {
2589         si = ti;
2590         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2591       }
2592     }
2593 
2594     // Check if the builtin function has a pointer or const pointer argument.
2595     int PtrArgNum = -1;
2596     bool HasConstPtr = false;
2597     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2598       char ArgType = Proto[arg];
2599       if (ArgType == 'c') {
2600         HasConstPtr = true;
2601         PtrArgNum = arg - 1;
2602         break;
2603       }
2604       if (ArgType == 'p') {
2605         PtrArgNum = arg - 1;
2606         break;
2607       }
2608     }
2609     // For sret builtins, adjust the pointer argument index.
2610     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2611       PtrArgNum += 1;
2612 
2613     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2614     // and vst1_lane intrinsics.  Using a pointer to the vector element
2615     // type with one of those operations causes codegen to select an aligned
2616     // load/store instruction.  If you want an unaligned operation,
2617     // the pointer argument needs to have less alignment than element type,
2618     // so just accept any pointer type.
2619     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2620       PtrArgNum = -1;
2621       HasConstPtr = false;
2622     }
2623 
2624     if (mask) {
2625       if (isA64TypeCheck)
2626         OS << "case AArch64::BI__builtin_neon_";
2627       else
2628         OS << "case ARM::BI__builtin_neon_";
2629       OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2630          << "0x" << utohexstr(mask) << "ULL";
2631       if (PtrArgNum >= 0)
2632         OS << "; PtrArgNum = " << PtrArgNum;
2633       if (HasConstPtr)
2634         OS << "; HasConstPtr = true";
2635       OS << "; break;\n";
2636     }
2637     if (qmask) {
2638       if (isA64TypeCheck)
2639         OS << "case AArch64::BI__builtin_neon_";
2640       else
2641         OS << "case ARM::BI__builtin_neon_";
2642       OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2643          << "0x" << utohexstr(qmask) << "ULL";
2644       if (PtrArgNum >= 0)
2645         OS << "; PtrArgNum = " << PtrArgNum;
2646       if (HasConstPtr)
2647         OS << "; HasConstPtr = true";
2648       OS << "; break;\n";
2649     }
2650   }
2651   OS << "#endif\n\n";
2652 }
2653 
2654 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2655 /// declaration of builtins, checking for unique builtin declarations.
2656 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2657                                  StringMap<ClassKind> &A64IntrinsicMap,
2658                                  bool isA64GenBuiltinDef) {
2659   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2660   StringMap<OpKind> EmittedMap;
2661 
2662   // Generate BuiltinsARM.def and BuiltinsAArch64.def
2663   if (isA64GenBuiltinDef)
2664     OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2665   else
2666     OS << "#ifdef GET_NEON_BUILTINS\n";
2667 
2668   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2669     Record *R = RV[i];
2670     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2671     if (k != OpNone)
2672       continue;
2673 
2674     std::string Proto = R->getValueAsString("Prototype");
2675     std::string name = R->getValueAsString("Name");
2676     std::string Rename = name + "@" + Proto;
2677 
2678     // Functions with 'a' (the splat code) in the type prototype should not get
2679     // their own builtin as they use the non-splat variant.
2680     if (Proto.find('a') != std::string::npos)
2681       continue;
2682 
2683     std::string Types = R->getValueAsString("Types");
2684     SmallVector<StringRef, 16> TypeVec;
2685     ParseTypes(R, Types, TypeVec);
2686 
2687     if (R->getSuperClasses().size() < 2)
2688       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2689 
2690     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2691 
2692     // Do not include AArch64 BUILTIN() macros if not generating
2693     // code for AArch64
2694     bool isA64 = R->getValueAsBit("isA64");
2695     if (!isA64GenBuiltinDef && isA64)
2696       continue;
2697 
2698     // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2699     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2700     // redefined in AArch64 to handle an additional 2 x f64 type.
2701     if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2702       ClassKind &A64CK = A64IntrinsicMap[Rename];
2703       if (A64CK == ck && ck != ClassNone)
2704         continue;
2705     }
2706 
2707     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2708       // Generate the declaration for this builtin, ensuring
2709       // that each unique BUILTIN() macro appears only once in the output
2710       // stream.
2711       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2712       if (EmittedMap.count(bd))
2713         continue;
2714 
2715       EmittedMap[bd] = OpNone;
2716       OS << bd << "\n";
2717     }
2718   }
2719   OS << "#endif\n\n";
2720 }
2721 
2722 /// runHeader - Emit a file with sections defining:
2723 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2724 /// 2. the SemaChecking code for the type overload checking.
2725 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2726 void NeonEmitter::runHeader(raw_ostream &OS) {
2727   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2728 
2729   // build a map of AArch64 intriniscs to be used in uniqueness checks.
2730   StringMap<ClassKind> A64IntrinsicMap;
2731   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2732     Record *R = RV[i];
2733 
2734     bool isA64 = R->getValueAsBit("isA64");
2735     if (!isA64)
2736       continue;
2737 
2738     ClassKind CK = ClassNone;
2739     if (R->getSuperClasses().size() >= 2)
2740       CK = ClassMap[R->getSuperClasses()[1]];
2741 
2742     std::string Name = R->getValueAsString("Name");
2743     std::string Proto = R->getValueAsString("Prototype");
2744     std::string Rename = Name + "@" + Proto;
2745     if (A64IntrinsicMap.count(Rename))
2746       continue;
2747     A64IntrinsicMap[Rename] = CK;
2748   }
2749 
2750   // Generate BuiltinsARM.def for ARM
2751   genBuiltinsDef(OS, A64IntrinsicMap, false);
2752 
2753   // Generate BuiltinsAArch64.def for AArch64
2754   genBuiltinsDef(OS, A64IntrinsicMap, true);
2755 
2756   // Generate ARM overloaded type checking code for SemaChecking.cpp
2757   genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2758 
2759   // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2760   genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2761 
2762   // Generate ARM range checking code for shift/lane immediates.
2763   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2764 
2765   // Generate the AArch64 range checking code for shift/lane immediates.
2766   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2767 }
2768 
2769 /// GenTest - Write out a test for the intrinsic specified by the name and
2770 /// type strings, including the embedded patterns for FileCheck to match.
2771 static std::string GenTest(const std::string &name,
2772                            const std::string &proto,
2773                            StringRef outTypeStr, StringRef inTypeStr,
2774                            bool isShift, bool isHiddenLOp,
2775                            ClassKind ck, const std::string &InstName,
2776 						   bool isA64,
2777 						   std::string & testFuncProto) {
2778   assert(!proto.empty() && "");
2779   std::string s;
2780 
2781   // Function name with type suffix
2782   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2783   if (outTypeStr != inTypeStr) {
2784     // If the input type is different (e.g., for vreinterpret), append a suffix
2785     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2786     // does not insert another "q" in the name.
2787     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2788     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2789     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2790   }
2791 
2792   // todo: GenerateChecksForIntrinsic does not generate CHECK
2793   // for aarch64 instructions yet
2794   std::vector<std::string> FileCheckPatterns;
2795   if (!isA64) {
2796 	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2797 							   isHiddenLOp, FileCheckPatterns);
2798 	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2799   }
2800   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2801 
2802   // Emit the FileCheck patterns.
2803   // If for any reason we do not want to emit a check, mangledInst
2804   // will be the empty string.
2805   if (FileCheckPatterns.size()) {
2806     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2807                                                   e = FileCheckPatterns.end();
2808          i != e;
2809          ++i) {
2810       s += "// CHECK_ARM: " + *i + "\n";
2811     }
2812   }
2813 
2814   // Emit the start of the test function.
2815 
2816   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2817   char arg = 'a';
2818   std::string comma;
2819   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2820     // Do not create arguments for values that must be immediate constants.
2821     if (proto[i] == 'i')
2822       continue;
2823     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2824     testFuncProto.push_back(arg);
2825     comma = ", ";
2826   }
2827   testFuncProto += ")";
2828 
2829   s+= testFuncProto;
2830   s+= " {\n  ";
2831 
2832   if (proto[0] != 'v')
2833     s += "return ";
2834   s += mangledName + "(";
2835   arg = 'a';
2836   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2837     if (proto[i] == 'i') {
2838       // For immediate operands, test the maximum value.
2839       if (isShift)
2840         s += "1"; // FIXME
2841       else
2842         // The immediate generally refers to a lane in the preceding argument.
2843         s += utostr(RangeFromType(proto[i-1], inTypeStr));
2844     } else {
2845       s.push_back(arg);
2846     }
2847     if ((i + 1) < e)
2848       s += ", ";
2849   }
2850   s += ");\n}\n\n";
2851   return s;
2852 }
2853 
2854 /// Write out all intrinsic tests for the specified target, checking
2855 /// for intrinsic test uniqueness.
2856 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2857                                 bool isA64GenTest) {
2858   if (isA64GenTest)
2859 	OS << "#ifdef __aarch64__\n";
2860 
2861   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2862   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2863     Record *R = RV[i];
2864     std::string name = R->getValueAsString("Name");
2865     std::string Proto = R->getValueAsString("Prototype");
2866     std::string Types = R->getValueAsString("Types");
2867     bool isShift = R->getValueAsBit("isShift");
2868     std::string InstName = R->getValueAsString("InstName");
2869     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2870     bool isA64 = R->getValueAsBit("isA64");
2871 
2872     // do not include AArch64 intrinsic test if not generating
2873     // code for AArch64
2874     if (!isA64GenTest && isA64)
2875       continue;
2876 
2877     SmallVector<StringRef, 16> TypeVec;
2878     ParseTypes(R, Types, TypeVec);
2879 
2880     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2881     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2882     if (kind == OpUnavailable)
2883       continue;
2884     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2885       if (kind == OpReinterpret) {
2886         bool outQuad = false;
2887         bool dummy = false;
2888         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2889         for (unsigned srcti = 0, srcte = TypeVec.size();
2890              srcti != srcte; ++srcti) {
2891           bool inQuad = false;
2892           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2893           if (srcti == ti || inQuad != outQuad)
2894             continue;
2895 		  std::string testFuncProto;
2896           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2897                                   isShift, isHiddenLOp, ck, InstName, isA64,
2898 								  testFuncProto);
2899           if (EmittedMap.count(testFuncProto))
2900             continue;
2901           EmittedMap[testFuncProto] = kind;
2902           OS << s << "\n";
2903         }
2904       } else {
2905 		std::string testFuncProto;
2906         std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2907                                 isHiddenLOp, ck, InstName, isA64, testFuncProto);
2908         if (EmittedMap.count(testFuncProto))
2909           continue;
2910         EmittedMap[testFuncProto] = kind;
2911         OS << s << "\n";
2912       }
2913     }
2914   }
2915 
2916   if (isA64GenTest)
2917 	OS << "#endif\n";
2918 }
2919 /// runTests - Write out a complete set of tests for all of the Neon
2920 /// intrinsics.
2921 void NeonEmitter::runTests(raw_ostream &OS) {
2922   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2923         "apcs-gnu\\\n"
2924         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2925         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
2926 		"\n"
2927 	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2928 	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
2929 	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2930         "\n"
2931         "// REQUIRES: long_tests\n"
2932         "\n"
2933         "#include <arm_neon.h>\n"
2934         "\n";
2935 
2936   // ARM tests must be emitted before AArch64 tests to ensure
2937   // tests for intrinsics that are common to ARM and AArch64
2938   // appear only once in the output stream.
2939   // The check for uniqueness is done in genTargetTest.
2940   StringMap<OpKind> EmittedMap;
2941 
2942   genTargetTest(OS, EmittedMap, false);
2943 
2944   genTargetTest(OS, EmittedMap, true);
2945 }
2946 
2947 namespace clang {
2948 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2949   NeonEmitter(Records).run(OS);
2950 }
2951 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2952   NeonEmitter(Records).runHeader(OS);
2953 }
2954 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2955   NeonEmitter(Records).runTests(OS);
2956 }
2957 } // End namespace clang
2958