1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface.  See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors.  Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.  A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37 
38 enum OpKind {
39   OpNone,
40   OpUnavailable,
41   OpAdd,
42   OpAddl,
43   OpAddlHi,
44   OpAddw,
45   OpAddwHi,
46   OpSub,
47   OpSubl,
48   OpSublHi,
49   OpSubw,
50   OpSubwHi,
51   OpMul,
52   OpMla,
53   OpMlal,
54   OpMullHi,
55   OpMlalHi,
56   OpMls,
57   OpMlsl,
58   OpMlslHi,
59   OpMulN,
60   OpMlaN,
61   OpMlsN,
62   OpMlalN,
63   OpMlslN,
64   OpMulLane,
65   OpMulXLane,
66   OpMullLane,
67   OpMullHiLane,
68   OpMlaLane,
69   OpMlsLane,
70   OpMlalLane,
71   OpMlalHiLane,
72   OpMlslLane,
73   OpMlslHiLane,
74   OpQDMullLane,
75   OpQDMullHiLane,
76   OpQDMlalLane,
77   OpQDMlalHiLane,
78   OpQDMlslLane,
79   OpQDMlslHiLane,
80   OpQDMulhLane,
81   OpQRDMulhLane,
82   OpFMSLane,
83   OpFMSLaneQ,
84   OpEq,
85   OpGe,
86   OpLe,
87   OpGt,
88   OpLt,
89   OpNeg,
90   OpNot,
91   OpAnd,
92   OpOr,
93   OpXor,
94   OpAndNot,
95   OpOrNot,
96   OpCast,
97   OpConcat,
98   OpDup,
99   OpDupLane,
100   OpHi,
101   OpLo,
102   OpSelect,
103   OpRev16,
104   OpRev32,
105   OpRev64,
106   OpReinterpret,
107   OpAddhnHi,
108   OpRAddhnHi,
109   OpSubhnHi,
110   OpRSubhnHi,
111   OpAbdl,
112   OpAbdlHi,
113   OpAba,
114   OpAbal,
115   OpAbalHi,
116   OpQDMullHi,
117   OpQDMlalHi,
118   OpQDMlslHi,
119   OpDiv,
120   OpLongHi,
121   OpNarrowHi,
122   OpMovlHi
123 };
124 
125 enum ClassKind {
126   ClassNone,
127   ClassI,           // generic integer instruction, e.g., "i8" suffix
128   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
129   ClassW,           // width-specific instruction, e.g., "8" suffix
130   ClassB,           // bitcast arguments with enum argument to specify type
131   ClassL,           // Logical instructions which are op instructions
132                     // but we need to not emit any suffix for in our
133                     // tests.
134   ClassNoTest       // Instructions which we do not test since they are
135                     // not TRUE instructions.
136 };
137 
138 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
139 /// builtins.  These must be kept in sync with the flags in
140 /// include/clang/Basic/TargetBuiltins.h.
141 namespace {
142 class NeonTypeFlags {
143   enum {
144     EltTypeMask = 0xf,
145     UnsignedFlag = 0x10,
146     QuadFlag = 0x20
147   };
148   uint32_t Flags;
149 
150 public:
151   enum EltType {
152     Int8,
153     Int16,
154     Int32,
155     Int64,
156     Poly8,
157     Poly16,
158     Float16,
159     Float32,
160     Float64
161   };
162 
163   NeonTypeFlags(unsigned F) : Flags(F) {}
164   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
165     if (IsUnsigned)
166       Flags |= UnsignedFlag;
167     if (IsQuad)
168       Flags |= QuadFlag;
169   }
170 
171   uint32_t getFlags() const { return Flags; }
172 };
173 } // end anonymous namespace
174 
175 namespace {
176 class NeonEmitter {
177   RecordKeeper &Records;
178   StringMap<OpKind> OpMap;
179   DenseMap<Record*, ClassKind> ClassMap;
180 
181 public:
182   NeonEmitter(RecordKeeper &R) : Records(R) {
183     OpMap["OP_NONE"]  = OpNone;
184     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
185     OpMap["OP_ADD"]   = OpAdd;
186     OpMap["OP_ADDL"]  = OpAddl;
187     OpMap["OP_ADDLHi"] = OpAddlHi;
188     OpMap["OP_ADDW"]  = OpAddw;
189     OpMap["OP_ADDWHi"] = OpAddwHi;
190     OpMap["OP_SUB"]   = OpSub;
191     OpMap["OP_SUBL"]  = OpSubl;
192     OpMap["OP_SUBLHi"] = OpSublHi;
193     OpMap["OP_SUBW"]  = OpSubw;
194     OpMap["OP_SUBWHi"] = OpSubwHi;
195     OpMap["OP_MUL"]   = OpMul;
196     OpMap["OP_MLA"]   = OpMla;
197     OpMap["OP_MLAL"]  = OpMlal;
198     OpMap["OP_MULLHi"]  = OpMullHi;
199     OpMap["OP_MLALHi"]  = OpMlalHi;
200     OpMap["OP_MLS"]   = OpMls;
201     OpMap["OP_MLSL"]  = OpMlsl;
202     OpMap["OP_MLSLHi"] = OpMlslHi;
203     OpMap["OP_MUL_N"] = OpMulN;
204     OpMap["OP_MLA_N"] = OpMlaN;
205     OpMap["OP_MLS_N"] = OpMlsN;
206     OpMap["OP_MLAL_N"] = OpMlalN;
207     OpMap["OP_MLSL_N"] = OpMlslN;
208     OpMap["OP_MUL_LN"]= OpMulLane;
209     OpMap["OP_MULX_LN"]= OpMulXLane;
210     OpMap["OP_MULL_LN"] = OpMullLane;
211     OpMap["OP_MULLHi_LN"] = OpMullHiLane;
212     OpMap["OP_MLA_LN"]= OpMlaLane;
213     OpMap["OP_MLS_LN"]= OpMlsLane;
214     OpMap["OP_MLAL_LN"] = OpMlalLane;
215     OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
216     OpMap["OP_MLSL_LN"] = OpMlslLane;
217     OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
218     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
219     OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
220     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
221     OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
222     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
223     OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
224     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
225     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
226     OpMap["OP_FMS_LN"] = OpFMSLane;
227     OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
228     OpMap["OP_EQ"]    = OpEq;
229     OpMap["OP_GE"]    = OpGe;
230     OpMap["OP_LE"]    = OpLe;
231     OpMap["OP_GT"]    = OpGt;
232     OpMap["OP_LT"]    = OpLt;
233     OpMap["OP_NEG"]   = OpNeg;
234     OpMap["OP_NOT"]   = OpNot;
235     OpMap["OP_AND"]   = OpAnd;
236     OpMap["OP_OR"]    = OpOr;
237     OpMap["OP_XOR"]   = OpXor;
238     OpMap["OP_ANDN"]  = OpAndNot;
239     OpMap["OP_ORN"]   = OpOrNot;
240     OpMap["OP_CAST"]  = OpCast;
241     OpMap["OP_CONC"]  = OpConcat;
242     OpMap["OP_HI"]    = OpHi;
243     OpMap["OP_LO"]    = OpLo;
244     OpMap["OP_DUP"]   = OpDup;
245     OpMap["OP_DUP_LN"] = OpDupLane;
246     OpMap["OP_SEL"]   = OpSelect;
247     OpMap["OP_REV16"] = OpRev16;
248     OpMap["OP_REV32"] = OpRev32;
249     OpMap["OP_REV64"] = OpRev64;
250     OpMap["OP_REINT"] = OpReinterpret;
251     OpMap["OP_ADDHNHi"] = OpAddhnHi;
252     OpMap["OP_RADDHNHi"] = OpRAddhnHi;
253     OpMap["OP_SUBHNHi"] = OpSubhnHi;
254     OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
255     OpMap["OP_ABDL"]  = OpAbdl;
256     OpMap["OP_ABDLHi"] = OpAbdlHi;
257     OpMap["OP_ABA"]   = OpAba;
258     OpMap["OP_ABAL"]  = OpAbal;
259     OpMap["OP_ABALHi"] = OpAbalHi;
260     OpMap["OP_QDMULLHi"] = OpQDMullHi;
261     OpMap["OP_QDMLALHi"] = OpQDMlalHi;
262     OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
263     OpMap["OP_DIV"] = OpDiv;
264     OpMap["OP_LONG_HI"] = OpLongHi;
265     OpMap["OP_NARROW_HI"] = OpNarrowHi;
266     OpMap["OP_MOVL_HI"] = OpMovlHi;
267 
268     Record *SI = R.getClass("SInst");
269     Record *II = R.getClass("IInst");
270     Record *WI = R.getClass("WInst");
271     Record *SOpI = R.getClass("SOpInst");
272     Record *IOpI = R.getClass("IOpInst");
273     Record *WOpI = R.getClass("WOpInst");
274     Record *LOpI = R.getClass("LOpInst");
275     Record *NoTestOpI = R.getClass("NoTestOpInst");
276 
277     ClassMap[SI] = ClassS;
278     ClassMap[II] = ClassI;
279     ClassMap[WI] = ClassW;
280     ClassMap[SOpI] = ClassS;
281     ClassMap[IOpI] = ClassI;
282     ClassMap[WOpI] = ClassW;
283     ClassMap[LOpI] = ClassL;
284     ClassMap[NoTestOpI] = ClassNoTest;
285   }
286 
287   // run - Emit arm_neon.h.inc
288   void run(raw_ostream &o);
289 
290   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
291   void runHeader(raw_ostream &o);
292 
293   // runTests - Emit tests for all the Neon intrinsics.
294   void runTests(raw_ostream &o);
295 
296 private:
297   void emitIntrinsic(raw_ostream &OS, Record *R,
298                      StringMap<ClassKind> &EmittedMap);
299   void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
300                       bool isA64GenBuiltinDef);
301   void genOverloadTypeCheckCode(raw_ostream &OS,
302                                 StringMap<ClassKind> &A64IntrinsicMap,
303                                 bool isA64TypeCheck);
304   void genIntrinsicRangeCheckCode(raw_ostream &OS,
305                                   StringMap<ClassKind> &A64IntrinsicMap,
306                                   bool isA64RangeCheck);
307   void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
308                      bool isA64TestGen);
309 };
310 } // end anonymous namespace
311 
312 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
313 /// which each StringRef representing a single type declared in the string.
314 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
315 /// 2xfloat and 4xfloat respectively.
316 static void ParseTypes(Record *r, std::string &s,
317                        SmallVectorImpl<StringRef> &TV) {
318   const char *data = s.data();
319   int len = 0;
320 
321   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
322     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
323                          || data[len] == 'H' || data[len] == 'S')
324       continue;
325 
326     switch (data[len]) {
327       case 'c':
328       case 's':
329       case 'i':
330       case 'l':
331       case 'h':
332       case 'f':
333       case 'd':
334         break;
335       default:
336         PrintFatalError(r->getLoc(),
337                       "Unexpected letter: " + std::string(data + len, 1));
338     }
339     TV.push_back(StringRef(data, len + 1));
340     data += len + 1;
341     len = -1;
342   }
343 }
344 
345 /// Widen - Convert a type code into the next wider type.  char -> short,
346 /// short -> int, etc.
347 static char Widen(const char t) {
348   switch (t) {
349     case 'c':
350       return 's';
351     case 's':
352       return 'i';
353     case 'i':
354       return 'l';
355     case 'h':
356       return 'f';
357     default:
358       PrintFatalError("unhandled type in widen!");
359   }
360 }
361 
362 /// Narrow - Convert a type code into the next smaller type.  short -> char,
363 /// float -> half float, etc.
364 static char Narrow(const char t) {
365   switch (t) {
366     case 's':
367       return 'c';
368     case 'i':
369       return 's';
370     case 'l':
371       return 'i';
372     case 'f':
373       return 'h';
374     default:
375       PrintFatalError("unhandled type in narrow!");
376   }
377 }
378 
379 static std::string GetNarrowTypestr(StringRef ty)
380 {
381   std::string s;
382   for (size_t i = 0, end = ty.size(); i < end; i++) {
383     switch (ty[i]) {
384       case 's':
385         s += 'c';
386         break;
387       case 'i':
388         s += 's';
389         break;
390       case 'l':
391         s += 'i';
392         break;
393       default:
394         s += ty[i];
395         break;
396     }
397   }
398 
399   return s;
400 }
401 
402 /// For a particular StringRef, return the base type code, and whether it has
403 /// the quad-vector, polynomial, or unsigned modifiers set.
404 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
405   unsigned off = 0;
406   // ignore scalar.
407   if (ty[off] == 'S') {
408     ++off;
409   }
410   // remember quad.
411   if (ty[off] == 'Q' || ty[off] == 'H') {
412     quad = true;
413     ++off;
414   }
415 
416   // remember poly.
417   if (ty[off] == 'P') {
418     poly = true;
419     ++off;
420   }
421 
422   // remember unsigned.
423   if (ty[off] == 'U') {
424     usgn = true;
425     ++off;
426   }
427 
428   // base type to get the type string for.
429   return ty[off];
430 }
431 
432 /// ModType - Transform a type code and its modifiers based on a mod code. The
433 /// mod code definitions may be found at the top of arm_neon.td.
434 static char ModType(const char mod, char type, bool &quad, bool &poly,
435                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
436   switch (mod) {
437     case 't':
438       if (poly) {
439         poly = false;
440         usgn = true;
441       }
442       break;
443     case 'u':
444       usgn = true;
445       poly = false;
446       if (type == 'f')
447         type = 'i';
448       if (type == 'd')
449         type = 'l';
450       break;
451     case 'x':
452       usgn = false;
453       poly = false;
454       if (type == 'f')
455         type = 'i';
456       if (type == 'd')
457         type = 'l';
458       break;
459     case 'o':
460       scal = true;
461       type = 'd';
462       usgn = false;
463       break;
464     case 'y':
465       scal = true;
466     case 'f':
467       if (type == 'h')
468         quad = true;
469       type = 'f';
470       usgn = false;
471       break;
472     case 'g':
473       quad = false;
474       break;
475     case 'j':
476       quad = true;
477       break;
478     case 'w':
479       type = Widen(type);
480       quad = true;
481       break;
482     case 'n':
483       type = Widen(type);
484       break;
485     case 'i':
486       type = 'i';
487       scal = true;
488       break;
489     case 'l':
490       type = 'l';
491       scal = true;
492       usgn = true;
493       break;
494     case 'r':
495       type = Widen(type);
496     case 's':
497     case 'a':
498       scal = true;
499       break;
500     case 'k':
501       quad = true;
502       break;
503     case 'c':
504       cnst = true;
505     case 'p':
506       pntr = true;
507       scal = true;
508       break;
509     case 'h':
510       type = Narrow(type);
511       if (type == 'h')
512         quad = false;
513       break;
514     case 'q':
515       type = Narrow(type);
516       quad = true;
517       break;
518     case 'e':
519       type = Narrow(type);
520       usgn = true;
521       break;
522     case 'm':
523       type = Narrow(type);
524       quad = false;
525       break;
526     default:
527       break;
528   }
529   return type;
530 }
531 
532 /// TypeString - for a modifier and type, generate the name of the typedef for
533 /// that type.  QUc -> uint8x8_t.
534 static std::string TypeString(const char mod, StringRef typestr) {
535   bool quad = false;
536   bool poly = false;
537   bool usgn = false;
538   bool scal = false;
539   bool cnst = false;
540   bool pntr = false;
541 
542   if (mod == 'v')
543     return "void";
544   if (mod == 'i')
545     return "int";
546 
547   // base type to get the type string for.
548   char type = ClassifyType(typestr, quad, poly, usgn);
549 
550   // Based on the modifying character, change the type and width if necessary.
551   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
552 
553   SmallString<128> s;
554 
555   if (usgn)
556     s.push_back('u');
557 
558   switch (type) {
559     case 'c':
560       s += poly ? "poly8" : "int8";
561       if (scal)
562         break;
563       s += quad ? "x16" : "x8";
564       break;
565     case 's':
566       s += poly ? "poly16" : "int16";
567       if (scal)
568         break;
569       s += quad ? "x8" : "x4";
570       break;
571     case 'i':
572       s += "int32";
573       if (scal)
574         break;
575       s += quad ? "x4" : "x2";
576       break;
577     case 'l':
578       s += "int64";
579       if (scal)
580         break;
581       s += quad ? "x2" : "x1";
582       break;
583     case 'h':
584       s += "float16";
585       if (scal)
586         break;
587       s += quad ? "x8" : "x4";
588       break;
589     case 'f':
590       s += "float32";
591       if (scal)
592         break;
593       s += quad ? "x4" : "x2";
594       break;
595     case 'd':
596       s += "float64";
597       if (scal)
598         break;
599       s += quad ? "x2" : "x1";
600       break;
601 
602     default:
603       PrintFatalError("unhandled type!");
604   }
605 
606   if (mod == '2')
607     s += "x2";
608   if (mod == '3')
609     s += "x3";
610   if (mod == '4')
611     s += "x4";
612 
613   // Append _t, finishing the type string typedef type.
614   s += "_t";
615 
616   if (cnst)
617     s += " const";
618 
619   if (pntr)
620     s += " *";
621 
622   return s.str();
623 }
624 
625 /// BuiltinTypeString - for a modifier and type, generate the clang
626 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
627 /// Builtins.def for a description of the type strings.
628 static std::string BuiltinTypeString(const char mod, StringRef typestr,
629                                      ClassKind ck, bool ret) {
630   bool quad = false;
631   bool poly = false;
632   bool usgn = false;
633   bool scal = false;
634   bool cnst = false;
635   bool pntr = false;
636 
637   if (mod == 'v')
638     return "v"; // void
639   if (mod == 'i')
640     return "i"; // int
641 
642   // base type to get the type string for.
643   char type = ClassifyType(typestr, quad, poly, usgn);
644 
645   // Based on the modifying character, change the type and width if necessary.
646   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
647 
648   // All pointers are void* pointers.  Change type to 'v' now.
649   if (pntr) {
650     usgn = false;
651     poly = false;
652     type = 'v';
653   }
654   // Treat half-float ('h') types as unsigned short ('s') types.
655   if (type == 'h') {
656     type = 's';
657     usgn = true;
658   }
659   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
660                          scal && type != 'f' && type != 'd');
661 
662   if (scal) {
663     SmallString<128> s;
664 
665     if (usgn)
666       s.push_back('U');
667     else if (type == 'c')
668       s.push_back('S'); // make chars explicitly signed
669 
670     if (type == 'l') // 64-bit long
671       s += "LLi";
672     else
673       s.push_back(type);
674 
675     if (cnst)
676       s.push_back('C');
677     if (pntr)
678       s.push_back('*');
679     return s.str();
680   }
681 
682   // Since the return value must be one type, return a vector type of the
683   // appropriate width which we will bitcast.  An exception is made for
684   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
685   // fashion, storing them to a pointer arg.
686   if (ret) {
687     if (mod >= '2' && mod <= '4')
688       return "vv*"; // void result with void* first argument
689     if (mod == 'f' || (ck != ClassB && type == 'f'))
690       return quad ? "V4f" : "V2f";
691     if (ck != ClassB && type == 'd')
692       return quad ? "V2d" : "V1d";
693     if (ck != ClassB && type == 's')
694       return quad ? "V8s" : "V4s";
695     if (ck != ClassB && type == 'i')
696       return quad ? "V4i" : "V2i";
697     if (ck != ClassB && type == 'l')
698       return quad ? "V2LLi" : "V1LLi";
699 
700     return quad ? "V16Sc" : "V8Sc";
701   }
702 
703   // Non-return array types are passed as individual vectors.
704   if (mod == '2')
705     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
706   if (mod == '3')
707     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
708   if (mod == '4')
709     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
710 
711   if (mod == 'f' || (ck != ClassB && type == 'f'))
712     return quad ? "V4f" : "V2f";
713   if (ck != ClassB && type == 'd')
714     return quad ? "V2d" : "V1d";
715   if (ck != ClassB && type == 's')
716     return quad ? "V8s" : "V4s";
717   if (ck != ClassB && type == 'i')
718     return quad ? "V4i" : "V2i";
719   if (ck != ClassB && type == 'l')
720     return quad ? "V2LLi" : "V1LLi";
721 
722   return quad ? "V16Sc" : "V8Sc";
723 }
724 
725 /// InstructionTypeCode - Computes the ARM argument character code and
726 /// quad status for a specific type string and ClassKind.
727 static void InstructionTypeCode(const StringRef &typeStr,
728                                 const ClassKind ck,
729                                 bool &quad,
730                                 std::string &typeCode) {
731   bool poly = false;
732   bool usgn = false;
733   char type = ClassifyType(typeStr, quad, poly, usgn);
734 
735   switch (type) {
736   case 'c':
737     switch (ck) {
738     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
739     case ClassI: typeCode = "i8"; break;
740     case ClassW: typeCode = "8"; break;
741     default: break;
742     }
743     break;
744   case 's':
745     switch (ck) {
746     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
747     case ClassI: typeCode = "i16"; break;
748     case ClassW: typeCode = "16"; break;
749     default: break;
750     }
751     break;
752   case 'i':
753     switch (ck) {
754     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
755     case ClassI: typeCode = "i32"; break;
756     case ClassW: typeCode = "32"; break;
757     default: break;
758     }
759     break;
760   case 'l':
761     switch (ck) {
762     case ClassS: typeCode = usgn ? "u64" : "s64"; break;
763     case ClassI: typeCode = "i64"; break;
764     case ClassW: typeCode = "64"; break;
765     default: break;
766     }
767     break;
768   case 'h':
769     switch (ck) {
770     case ClassS:
771     case ClassI: typeCode = "f16"; break;
772     case ClassW: typeCode = "16"; break;
773     default: break;
774     }
775     break;
776   case 'f':
777     switch (ck) {
778     case ClassS:
779     case ClassI: typeCode = "f32"; break;
780     case ClassW: typeCode = "32"; break;
781     default: break;
782     }
783     break;
784   case 'd':
785     switch (ck) {
786     case ClassS:
787     case ClassI:
788       typeCode += "f64";
789       break;
790     case ClassW:
791       PrintFatalError("unhandled type!");
792     default:
793       break;
794     }
795     break;
796   default:
797     PrintFatalError("unhandled type!");
798   }
799 }
800 
801 static char Insert_BHSD_Suffix(StringRef typestr){
802   unsigned off = 0;
803   if(typestr[off++] == 'S'){
804     while(typestr[off] == 'Q' || typestr[off] == 'H'||
805           typestr[off] == 'P' || typestr[off] == 'U')
806       ++off;
807     switch (typestr[off]){
808     default  : break;
809     case 'c' : return 'b';
810     case 's' : return 'h';
811     case 'i' :
812     case 'f' : return 's';
813     case 'l' :
814     case 'd' : return 'd';
815     }
816   }
817   return 0;
818 }
819 
820 /// MangleName - Append a type or width suffix to a base neon function name,
821 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
822 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
823 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
824 static std::string MangleName(const std::string &name, StringRef typestr,
825                               ClassKind ck) {
826   if (name == "vcvt_f32_f16")
827     return name;
828 
829   bool quad = false;
830   std::string typeCode = "";
831 
832   InstructionTypeCode(typestr, ck, quad, typeCode);
833 
834   std::string s = name;
835 
836   if (typeCode.size() > 0) {
837     s += "_" + typeCode;
838   }
839 
840   if (ck == ClassB)
841     s += "_v";
842 
843   // Insert a 'q' before the first '_' character so that it ends up before
844   // _lane or _n on vector-scalar operations.
845   if (typestr.find("Q") != StringRef::npos) {
846       size_t pos = s.find('_');
847       s = s.insert(pos, "q");
848   }
849   char ins = Insert_BHSD_Suffix(typestr);
850   if(ins){
851     size_t pos = s.find('_');
852     s = s.insert(pos, &ins, 1);
853   }
854 
855   return s;
856 }
857 
858 static void PreprocessInstruction(const StringRef &Name,
859                                   const std::string &InstName,
860                                   std::string &Prefix,
861                                   bool &HasNPostfix,
862                                   bool &HasLanePostfix,
863                                   bool &HasDupPostfix,
864                                   bool &IsSpecialVCvt,
865                                   size_t &TBNumber) {
866   // All of our instruction name fields from arm_neon.td are of the form
867   //   <instructionname>_...
868   // Thus we grab our instruction name via computation of said Prefix.
869   const size_t PrefixEnd = Name.find_first_of('_');
870   // If InstName is passed in, we use that instead of our name Prefix.
871   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
872 
873   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
874 
875   HasNPostfix = Postfix.count("_n");
876   HasLanePostfix = Postfix.count("_lane");
877   HasDupPostfix = Postfix.count("_dup");
878   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
879 
880   if (InstName.compare("vtbl") == 0 ||
881       InstName.compare("vtbx") == 0) {
882     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
883     // encoding to get its true value.
884     TBNumber = Name[Name.size()-1] - 48;
885   }
886 }
887 
888 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
889 /// extracted, generate a FileCheck pattern for a Load Or Store
890 static void
891 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
892                                           const std::string& OutTypeCode,
893                                           const bool &IsQuad,
894                                           const bool &HasDupPostfix,
895                                           const bool &HasLanePostfix,
896                                           const size_t Count,
897                                           std::string &RegisterSuffix) {
898   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
899   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
900   // will output a series of v{ld,st}1s, so we have to handle it specially.
901   if ((Count == 3 || Count == 4) && IsQuad) {
902     RegisterSuffix += "{";
903     for (size_t i = 0; i < Count; i++) {
904       RegisterSuffix += "d{{[0-9]+}}";
905       if (HasDupPostfix) {
906         RegisterSuffix += "[]";
907       }
908       if (HasLanePostfix) {
909         RegisterSuffix += "[{{[0-9]+}}]";
910       }
911       if (i < Count-1) {
912         RegisterSuffix += ", ";
913       }
914     }
915     RegisterSuffix += "}";
916   } else {
917 
918     // Handle normal loads and stores.
919     RegisterSuffix += "{";
920     for (size_t i = 0; i < Count; i++) {
921       RegisterSuffix += "d{{[0-9]+}}";
922       if (HasDupPostfix) {
923         RegisterSuffix += "[]";
924       }
925       if (HasLanePostfix) {
926         RegisterSuffix += "[{{[0-9]+}}]";
927       }
928       if (IsQuad && !HasLanePostfix) {
929         RegisterSuffix += ", d{{[0-9]+}}";
930         if (HasDupPostfix) {
931           RegisterSuffix += "[]";
932         }
933       }
934       if (i < Count-1) {
935         RegisterSuffix += ", ";
936       }
937     }
938     RegisterSuffix += "}, [r{{[0-9]+}}";
939 
940     // We only include the alignment hint if we have a vld1.*64 or
941     // a dup/lane instruction.
942     if (IsLDSTOne) {
943       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
944         RegisterSuffix += ":" + OutTypeCode;
945       }
946     }
947 
948     RegisterSuffix += "]";
949   }
950 }
951 
952 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
953                                      const bool &HasNPostfix) {
954   return (NameRef.count("vmla") ||
955           NameRef.count("vmlal") ||
956           NameRef.count("vmlsl") ||
957           NameRef.count("vmull") ||
958           NameRef.count("vqdmlal") ||
959           NameRef.count("vqdmlsl") ||
960           NameRef.count("vqdmulh") ||
961           NameRef.count("vqdmull") ||
962           NameRef.count("vqrdmulh")) && HasNPostfix;
963 }
964 
965 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
966                                          const bool &HasLanePostfix) {
967   return (NameRef.count("vmla") ||
968           NameRef.count("vmls") ||
969           NameRef.count("vmlal") ||
970           NameRef.count("vmlsl") ||
971           (NameRef.count("vmul") && NameRef.size() == 3)||
972           NameRef.count("vqdmlal") ||
973           NameRef.count("vqdmlsl") ||
974           NameRef.count("vqdmulh") ||
975           NameRef.count("vqrdmulh")) && HasLanePostfix;
976 }
977 
978 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
979                                   const bool &HasLanePostfix,
980                                   const bool &IsQuad) {
981   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
982                                && IsQuad;
983   const bool IsVMull = NameRef.count("mull") && !IsQuad;
984   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
985 }
986 
987 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
988                                                      const std::string &Proto,
989                                                      const bool &HasNPostfix,
990                                                      const bool &IsQuad,
991                                                      const bool &HasLanePostfix,
992                                                      const bool &HasDupPostfix,
993                                                      std::string &NormedProto) {
994   // Handle generic case.
995   const StringRef NameRef(Name);
996   for (size_t i = 0, end = Proto.size(); i < end; i++) {
997     switch (Proto[i]) {
998     case 'u':
999     case 'f':
1000     case 'd':
1001     case 's':
1002     case 'x':
1003     case 't':
1004     case 'n':
1005       NormedProto += IsQuad? 'q' : 'd';
1006       break;
1007     case 'w':
1008     case 'k':
1009       NormedProto += 'q';
1010       break;
1011     case 'g':
1012     case 'j':
1013     case 'h':
1014     case 'e':
1015       NormedProto += 'd';
1016       break;
1017     case 'i':
1018       NormedProto += HasLanePostfix? 'a' : 'i';
1019       break;
1020     case 'a':
1021       if (HasLanePostfix) {
1022         NormedProto += 'a';
1023       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1024         NormedProto += IsQuad? 'q' : 'd';
1025       } else {
1026         NormedProto += 'i';
1027       }
1028       break;
1029     }
1030   }
1031 
1032   // Handle Special Cases.
1033   const bool IsNotVExt = !NameRef.count("vext");
1034   const bool IsVPADAL = NameRef.count("vpadal");
1035   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1036                                                            HasLanePostfix);
1037   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1038                                                       IsQuad);
1039 
1040   if (IsSpecialLaneMul) {
1041     // If
1042     NormedProto[2] = NormedProto[3];
1043     NormedProto.erase(3);
1044   } else if (NormedProto.size() == 4 &&
1045              NormedProto[0] == NormedProto[1] &&
1046              IsNotVExt) {
1047     // If NormedProto.size() == 4 and the first two proto characters are the
1048     // same, ignore the first.
1049     NormedProto = NormedProto.substr(1, 3);
1050   } else if (Is5OpLaneAccum) {
1051     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1052     std::string tmp = NormedProto.substr(1,2);
1053     tmp += NormedProto[4];
1054     NormedProto = tmp;
1055   } else if (IsVPADAL) {
1056     // If we have VPADAL, ignore the first character.
1057     NormedProto = NormedProto.substr(0, 2);
1058   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1059     // If our instruction is a dup instruction, keep only the first and
1060     // last characters.
1061     std::string tmp = "";
1062     tmp += NormedProto[0];
1063     tmp += NormedProto[NormedProto.size()-1];
1064     NormedProto = tmp;
1065   }
1066 }
1067 
1068 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1069 /// extracted, generate a FileCheck pattern to check that an
1070 /// instruction's arguments are correct.
1071 static void GenerateRegisterCheckPattern(const std::string &Name,
1072                                          const std::string &Proto,
1073                                          const std::string &OutTypeCode,
1074                                          const bool &HasNPostfix,
1075                                          const bool &IsQuad,
1076                                          const bool &HasLanePostfix,
1077                                          const bool &HasDupPostfix,
1078                                          const size_t &TBNumber,
1079                                          std::string &RegisterSuffix) {
1080 
1081   RegisterSuffix = "";
1082 
1083   const StringRef NameRef(Name);
1084   const StringRef ProtoRef(Proto);
1085 
1086   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1087     return;
1088   }
1089 
1090   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1091   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1092 
1093   if (IsLoadStore) {
1094     // Grab N value from  v{ld,st}N using its ascii representation.
1095     const size_t Count = NameRef[3] - 48;
1096 
1097     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1098                                               HasDupPostfix, HasLanePostfix,
1099                                               Count, RegisterSuffix);
1100   } else if (IsTBXOrTBL) {
1101     RegisterSuffix += "d{{[0-9]+}}, {";
1102     for (size_t i = 0; i < TBNumber-1; i++) {
1103       RegisterSuffix += "d{{[0-9]+}}, ";
1104     }
1105     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1106   } else {
1107     // Handle a normal instruction.
1108     if (NameRef.count("vget") || NameRef.count("vset"))
1109       return;
1110 
1111     // We first normalize our proto, since we only need to emit 4
1112     // different types of checks, yet have more than 4 proto types
1113     // that map onto those 4 patterns.
1114     std::string NormalizedProto("");
1115     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1116                                              HasLanePostfix, HasDupPostfix,
1117                                              NormalizedProto);
1118 
1119     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1120       const char &c = NormalizedProto[i];
1121       switch (c) {
1122       case 'q':
1123         RegisterSuffix += "q{{[0-9]+}}, ";
1124         break;
1125 
1126       case 'd':
1127         RegisterSuffix += "d{{[0-9]+}}, ";
1128         break;
1129 
1130       case 'i':
1131         RegisterSuffix += "#{{[0-9]+}}, ";
1132         break;
1133 
1134       case 'a':
1135         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1136         break;
1137       }
1138     }
1139 
1140     // Remove extra ", ".
1141     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1142   }
1143 }
1144 
1145 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1146 /// typestr + class kind, generate the proper set of FileCheck
1147 /// Patterns to check for. We could just return a string, but instead
1148 /// use a vector since it provides us with the extra flexibility of
1149 /// emitting multiple checks, which comes in handy for certain cases
1150 /// like mla where we want to check for 2 different instructions.
1151 static void GenerateChecksForIntrinsic(const std::string &Name,
1152                                        const std::string &Proto,
1153                                        StringRef &OutTypeStr,
1154                                        StringRef &InTypeStr,
1155                                        ClassKind Ck,
1156                                        const std::string &InstName,
1157                                        bool IsHiddenLOp,
1158                                        std::vector<std::string>& Result) {
1159 
1160   // If Ck is a ClassNoTest instruction, just return so no test is
1161   // emitted.
1162   if(Ck == ClassNoTest)
1163     return;
1164 
1165   if (Name == "vcvt_f32_f16") {
1166     Result.push_back("vcvt.f32.f16");
1167     return;
1168   }
1169 
1170 
1171   // Now we preprocess our instruction given the data we have to get the
1172   // data that we need.
1173   // Create a StringRef for String Manipulation of our Name.
1174   const StringRef NameRef(Name);
1175   // Instruction Prefix.
1176   std::string Prefix;
1177   // The type code for our out type string.
1178   std::string OutTypeCode;
1179   // To handle our different cases, we need to check for different postfixes.
1180   // Is our instruction a quad instruction.
1181   bool IsQuad = false;
1182   // Our instruction is of the form <instructionname>_n.
1183   bool HasNPostfix = false;
1184   // Our instruction is of the form <instructionname>_lane.
1185   bool HasLanePostfix = false;
1186   // Our instruction is of the form <instructionname>_dup.
1187   bool HasDupPostfix  = false;
1188   // Our instruction is a vcvt instruction which requires special handling.
1189   bool IsSpecialVCvt = false;
1190   // If we have a vtbxN or vtblN instruction, this is set to N.
1191   size_t TBNumber = -1;
1192   // Register Suffix
1193   std::string RegisterSuffix;
1194 
1195   PreprocessInstruction(NameRef, InstName, Prefix,
1196                         HasNPostfix, HasLanePostfix, HasDupPostfix,
1197                         IsSpecialVCvt, TBNumber);
1198 
1199   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1200   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1201                                HasLanePostfix, HasDupPostfix, TBNumber,
1202                                RegisterSuffix);
1203 
1204   // In the following section, we handle a bunch of special cases. You can tell
1205   // a special case by the fact we are returning early.
1206 
1207   // If our instruction is a logical instruction without postfix or a
1208   // hidden LOp just return the current Prefix.
1209   if (Ck == ClassL || IsHiddenLOp) {
1210     Result.push_back(Prefix + " " + RegisterSuffix);
1211     return;
1212   }
1213 
1214   // If we have a vmov, due to the many different cases, some of which
1215   // vary within the different intrinsics generated for a single
1216   // instruction type, just output a vmov. (e.g. given an instruction
1217   // A, A.u32 might be vmov and A.u8 might be vmov.8).
1218   //
1219   // FIXME: Maybe something can be done about this. The two cases that we care
1220   // about are vmov as an LType and vmov as a WType.
1221   if (Prefix == "vmov") {
1222     Result.push_back(Prefix + " " + RegisterSuffix);
1223     return;
1224   }
1225 
1226   // In the following section, we handle special cases.
1227 
1228   if (OutTypeCode == "64") {
1229     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1230     // type, the intrinsic will be optimized away, so just return
1231     // nothing.  On the other hand if we are handling an uint64x2_t
1232     // (i.e. quad instruction), vdup/vmov instructions should be
1233     // emitted.
1234     if (Prefix == "vdup" || Prefix == "vext") {
1235       if (IsQuad) {
1236         Result.push_back("{{vmov|vdup}}");
1237       }
1238       return;
1239     }
1240 
1241     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1242     // multiple register operands.
1243     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1244                             || Prefix == "vld4";
1245     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1246                             || Prefix == "vst4";
1247     if (MultiLoadPrefix || MultiStorePrefix) {
1248       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1249       return;
1250     }
1251 
1252     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1253     // emitting said instructions. So return a check for
1254     // vldr/vstr/vmov/str instead.
1255     if (HasLanePostfix || HasDupPostfix) {
1256       if (Prefix == "vst1") {
1257         Result.push_back("{{str|vstr|vmov}}");
1258         return;
1259       } else if (Prefix == "vld1") {
1260         Result.push_back("{{ldr|vldr|vmov}}");
1261         return;
1262       }
1263     }
1264   }
1265 
1266   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1267   // sometimes disassembled as vtrn.32. We use a regex to handle both
1268   // cases.
1269   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1270     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1271     return;
1272   }
1273 
1274   // Currently on most ARM processors, we do not use vmla/vmls for
1275   // quad floating point operations. Instead we output vmul + vadd. So
1276   // check if we have one of those instructions and just output a
1277   // check for vmul.
1278   if (OutTypeCode == "f32") {
1279     if (Prefix == "vmls") {
1280       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1281       Result.push_back("vsub." + OutTypeCode);
1282       return;
1283     } else if (Prefix == "vmla") {
1284       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1285       Result.push_back("vadd." + OutTypeCode);
1286       return;
1287     }
1288   }
1289 
1290   // If we have vcvt, get the input type from the instruction name
1291   // (which should be of the form instname_inputtype) and append it
1292   // before the output type.
1293   if (Prefix == "vcvt") {
1294     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1295     Prefix += "." + inTypeCode;
1296   }
1297 
1298   // Append output type code to get our final mangled instruction.
1299   Prefix += "." + OutTypeCode;
1300 
1301   Result.push_back(Prefix + " " + RegisterSuffix);
1302 }
1303 
1304 /// UseMacro - Examine the prototype string to determine if the intrinsic
1305 /// should be defined as a preprocessor macro instead of an inline function.
1306 static bool UseMacro(const std::string &proto) {
1307   // If this builtin takes an immediate argument, we need to #define it rather
1308   // than use a standard declaration, so that SemaChecking can range check
1309   // the immediate passed by the user.
1310   if (proto.find('i') != std::string::npos)
1311     return true;
1312 
1313   // Pointer arguments need to use macros to avoid hiding aligned attributes
1314   // from the pointer type.
1315   if (proto.find('p') != std::string::npos ||
1316       proto.find('c') != std::string::npos)
1317     return true;
1318 
1319   return false;
1320 }
1321 
1322 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1323 /// defined as a macro should be accessed directly instead of being first
1324 /// assigned to a local temporary.
1325 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1326   // True for constant ints (i), pointers (p) and const pointers (c).
1327   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1328 }
1329 
1330 // Generate the string "(argtype a, argtype b, ...)"
1331 static std::string GenArgs(const std::string &proto, StringRef typestr) {
1332   bool define = UseMacro(proto);
1333   char arg = 'a';
1334 
1335   std::string s;
1336   s += "(";
1337 
1338   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1339     if (define) {
1340       // Some macro arguments are used directly instead of being assigned
1341       // to local temporaries; prepend an underscore prefix to make their
1342       // names consistent with the local temporaries.
1343       if (MacroArgUsedDirectly(proto, i))
1344         s += "__";
1345     } else {
1346       s += TypeString(proto[i], typestr) + " __";
1347     }
1348     s.push_back(arg);
1349     if ((i + 1) < e)
1350       s += ", ";
1351   }
1352 
1353   s += ")";
1354   return s;
1355 }
1356 
1357 // Macro arguments are not type-checked like inline function arguments, so
1358 // assign them to local temporaries to get the right type checking.
1359 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1360   char arg = 'a';
1361   std::string s;
1362   bool generatedLocal = false;
1363 
1364   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1365     // Do not create a temporary for an immediate argument.
1366     // That would defeat the whole point of using a macro!
1367     if (MacroArgUsedDirectly(proto, i))
1368       continue;
1369     generatedLocal = true;
1370 
1371     s += TypeString(proto[i], typestr) + " __";
1372     s.push_back(arg);
1373     s += " = (";
1374     s.push_back(arg);
1375     s += "); ";
1376   }
1377 
1378   if (generatedLocal)
1379     s += "\\\n  ";
1380   return s;
1381 }
1382 
1383 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1384 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1385   std::string s, high;
1386   high = h ? "_high" : "";
1387   s = MangleName("vmovl" + high, typestr, ClassS);
1388   s += "(" + a + ")";
1389   return s;
1390 }
1391 
1392 // Get the high 64-bit part of a vector
1393 static std::string GetHigh(const std::string &a, StringRef typestr) {
1394   std::string s;
1395   s = MangleName("vget_high", typestr, ClassS);
1396   s += "(" + a + ")";
1397   return s;
1398 }
1399 
1400 // Gen operation with two operands and get high 64-bit for both of two operands.
1401 static std::string Gen2OpWith2High(StringRef typestr,
1402                                    const std::string &op,
1403                                    const std::string &a,
1404                                    const std::string &b) {
1405   std::string s;
1406   std::string Op1 = GetHigh(a, typestr);
1407   std::string Op2 = GetHigh(b, typestr);
1408   s = MangleName(op, typestr, ClassS);
1409   s += "(" + Op1 + ", " + Op2 + ");";
1410   return s;
1411 }
1412 
1413 // Gen operation with three operands and get high 64-bit of the latter
1414 // two operands.
1415 static std::string Gen3OpWith2High(StringRef typestr,
1416                                    const std::string &op,
1417                                    const std::string &a,
1418                                    const std::string &b,
1419                                    const std::string &c) {
1420   std::string s;
1421   std::string Op1 = GetHigh(b, typestr);
1422   std::string Op2 = GetHigh(c, typestr);
1423   s = MangleName(op, typestr, ClassS);
1424   s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1425   return s;
1426 }
1427 
1428 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1429 static std::string GenCombine(std::string typestr,
1430                               const std::string &a,
1431                               const std::string &b) {
1432   std::string s;
1433   s = MangleName("vcombine", typestr, ClassS);
1434   s += "(" + a + ", " + b + ")";
1435   return s;
1436 }
1437 
1438 static std::string Duplicate(unsigned nElts, StringRef typestr,
1439                              const std::string &a) {
1440   std::string s;
1441 
1442   s = "(" + TypeString('d', typestr) + "){ ";
1443   for (unsigned i = 0; i != nElts; ++i) {
1444     s += a;
1445     if ((i + 1) < nElts)
1446       s += ", ";
1447   }
1448   s += " }";
1449 
1450   return s;
1451 }
1452 
1453 static std::string SplatLane(unsigned nElts, const std::string &vec,
1454                              const std::string &lane) {
1455   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1456   for (unsigned i = 0; i < nElts; ++i)
1457     s += ", " + lane;
1458   s += ")";
1459   return s;
1460 }
1461 
1462 static std::string RemoveHigh(const std::string &name) {
1463   std::string s = name;
1464   std::size_t found = s.find("_high_");
1465   if (found == std::string::npos)
1466     PrintFatalError("name should contain \"_high_\" for high intrinsics");
1467   s.replace(found, 5, "");
1468   return s;
1469 }
1470 
1471 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1472   quad = false;
1473   bool dummy = false;
1474   char type = ClassifyType(typestr, quad, dummy, dummy);
1475   unsigned nElts = 0;
1476   switch (type) {
1477   case 'c': nElts = 8; break;
1478   case 's': nElts = 4; break;
1479   case 'i': nElts = 2; break;
1480   case 'l': nElts = 1; break;
1481   case 'h': nElts = 4; break;
1482   case 'f': nElts = 2; break;
1483   case 'd':
1484     nElts = 1;
1485     break;
1486   default:
1487     PrintFatalError("unhandled type!");
1488   }
1489   if (quad) nElts <<= 1;
1490   return nElts;
1491 }
1492 
1493 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1494 static std::string GenOpString(const std::string &name, OpKind op,
1495                                const std::string &proto, StringRef typestr) {
1496   bool quad;
1497   unsigned nElts = GetNumElements(typestr, quad);
1498   bool define = UseMacro(proto);
1499 
1500   std::string ts = TypeString(proto[0], typestr);
1501   std::string s;
1502   if (!define) {
1503     s = "return ";
1504   }
1505 
1506   switch(op) {
1507   case OpAdd:
1508     s += "__a + __b;";
1509     break;
1510   case OpAddl:
1511     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1512     break;
1513   case OpAddlHi:
1514     s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1515     break;
1516   case OpAddw:
1517     s += "__a + " + Extend(typestr, "__b") + ";";
1518     break;
1519   case OpAddwHi:
1520     s += "__a + " + Extend(typestr, "__b", 1) + ";";
1521     break;
1522   case OpSub:
1523     s += "__a - __b;";
1524     break;
1525   case OpSubl:
1526     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1527     break;
1528   case OpSublHi:
1529     s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1530     break;
1531   case OpSubw:
1532     s += "__a - " + Extend(typestr, "__b") + ";";
1533     break;
1534   case OpSubwHi:
1535     s += "__a - " + Extend(typestr, "__b", 1) + ";";
1536     break;
1537   case OpMulN:
1538     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1539     break;
1540   case OpMulLane:
1541     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1542     break;
1543   case OpMulXLane:
1544     s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1545       SplatLane(nElts, "__b", "__c") + ");";
1546     break;
1547   case OpMul:
1548     s += "__a * __b;";
1549     break;
1550   case OpMullLane:
1551     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1552       SplatLane(nElts, "__b", "__c") + ");";
1553     break;
1554   case OpMullHiLane:
1555     s += MangleName("vmull", typestr, ClassS) + "(" +
1556       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1557     break;
1558   case OpMlaN:
1559     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1560     break;
1561   case OpMlaLane:
1562     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1563     break;
1564   case OpMla:
1565     s += "__a + (__b * __c);";
1566     break;
1567   case OpMlalN:
1568     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1569       Duplicate(nElts, typestr, "__c") + ");";
1570     break;
1571   case OpMlalLane:
1572     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1573       SplatLane(nElts, "__c", "__d") + ");";
1574     break;
1575   case OpMlalHiLane:
1576     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1577       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1578     break;
1579   case OpMlal:
1580     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1581     break;
1582   case OpMullHi:
1583     s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1584     break;
1585   case OpMlalHi:
1586     s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1587     break;
1588   case OpMlsN:
1589     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1590     break;
1591   case OpMlsLane:
1592     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1593     break;
1594   case OpFMSLane:
1595     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1596     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1597     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1598     s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1599     break;
1600   case OpFMSLaneQ:
1601     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1602     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1603     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1604     s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1605     break;
1606   case OpMls:
1607     s += "__a - (__b * __c);";
1608     break;
1609   case OpMlslN:
1610     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1611       Duplicate(nElts, typestr, "__c") + ");";
1612     break;
1613   case OpMlslLane:
1614     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1615       SplatLane(nElts, "__c", "__d") + ");";
1616     break;
1617   case OpMlslHiLane:
1618     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1619       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1620     break;
1621   case OpMlsl:
1622     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1623     break;
1624   case OpMlslHi:
1625     s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1626     break;
1627   case OpQDMullLane:
1628     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1629       SplatLane(nElts, "__b", "__c") + ");";
1630     break;
1631   case OpQDMullHiLane:
1632     s += MangleName("vqdmull", typestr, ClassS) + "(" +
1633       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1634     break;
1635   case OpQDMlalLane:
1636     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1637       SplatLane(nElts, "__c", "__d") + ");";
1638     break;
1639   case OpQDMlalHiLane:
1640     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1641       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1642     break;
1643   case OpQDMlslLane:
1644     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1645       SplatLane(nElts, "__c", "__d") + ");";
1646     break;
1647   case OpQDMlslHiLane:
1648     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1649       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1650     break;
1651   case OpQDMulhLane:
1652     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1653       SplatLane(nElts, "__b", "__c") + ");";
1654     break;
1655   case OpQRDMulhLane:
1656     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1657       SplatLane(nElts, "__b", "__c") + ");";
1658     break;
1659   case OpEq:
1660     s += "(" + ts + ")(__a == __b);";
1661     break;
1662   case OpGe:
1663     s += "(" + ts + ")(__a >= __b);";
1664     break;
1665   case OpLe:
1666     s += "(" + ts + ")(__a <= __b);";
1667     break;
1668   case OpGt:
1669     s += "(" + ts + ")(__a > __b);";
1670     break;
1671   case OpLt:
1672     s += "(" + ts + ")(__a < __b);";
1673     break;
1674   case OpNeg:
1675     s += " -__a;";
1676     break;
1677   case OpNot:
1678     s += " ~__a;";
1679     break;
1680   case OpAnd:
1681     s += "__a & __b;";
1682     break;
1683   case OpOr:
1684     s += "__a | __b;";
1685     break;
1686   case OpXor:
1687     s += "__a ^ __b;";
1688     break;
1689   case OpAndNot:
1690     s += "__a & ~__b;";
1691     break;
1692   case OpOrNot:
1693     s += "__a | ~__b;";
1694     break;
1695   case OpCast:
1696     s += "(" + ts + ")__a;";
1697     break;
1698   case OpConcat:
1699     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1700     s += ", (int64x1_t)__b, 0, 1);";
1701     break;
1702   case OpHi:
1703     // nElts is for the result vector, so the source is twice that number.
1704     s += "__builtin_shufflevector(__a, __a";
1705     for (unsigned i = nElts; i < nElts * 2; ++i)
1706       s += ", " + utostr(i);
1707     s+= ");";
1708     break;
1709   case OpLo:
1710     s += "__builtin_shufflevector(__a, __a";
1711     for (unsigned i = 0; i < nElts; ++i)
1712       s += ", " + utostr(i);
1713     s+= ");";
1714     break;
1715   case OpDup:
1716     s += Duplicate(nElts, typestr, "__a") + ";";
1717     break;
1718   case OpDupLane:
1719     s += SplatLane(nElts, "__a", "__b") + ";";
1720     break;
1721   case OpSelect:
1722     // ((0 & 1) | (~0 & 2))
1723     s += "(" + ts + ")";
1724     ts = TypeString(proto[1], typestr);
1725     s += "((__a & (" + ts + ")__b) | ";
1726     s += "(~__a & (" + ts + ")__c));";
1727     break;
1728   case OpRev16:
1729     s += "__builtin_shufflevector(__a, __a";
1730     for (unsigned i = 2; i <= nElts; i += 2)
1731       for (unsigned j = 0; j != 2; ++j)
1732         s += ", " + utostr(i - j - 1);
1733     s += ");";
1734     break;
1735   case OpRev32: {
1736     unsigned WordElts = nElts >> (1 + (int)quad);
1737     s += "__builtin_shufflevector(__a, __a";
1738     for (unsigned i = WordElts; i <= nElts; i += WordElts)
1739       for (unsigned j = 0; j != WordElts; ++j)
1740         s += ", " + utostr(i - j - 1);
1741     s += ");";
1742     break;
1743   }
1744   case OpRev64: {
1745     unsigned DblWordElts = nElts >> (int)quad;
1746     s += "__builtin_shufflevector(__a, __a";
1747     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1748       for (unsigned j = 0; j != DblWordElts; ++j)
1749         s += ", " + utostr(i - j - 1);
1750     s += ");";
1751     break;
1752   }
1753   case OpAbdl: {
1754     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1755     if (typestr[0] != 'U') {
1756       // vabd results are always unsigned and must be zero-extended.
1757       std::string utype = "U" + typestr.str();
1758       s += "(" + TypeString(proto[0], typestr) + ")";
1759       abd = "(" + TypeString('d', utype) + ")" + abd;
1760       s += Extend(utype, abd) + ";";
1761     } else {
1762       s += Extend(typestr, abd) + ";";
1763     }
1764     break;
1765   }
1766   case OpAbdlHi:
1767     s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
1768     break;
1769   case OpAddhnHi: {
1770     std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
1771     s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
1772     s += ";";
1773     break;
1774   }
1775   case OpRAddhnHi: {
1776     std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
1777     s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
1778     s += ";";
1779     break;
1780   }
1781   case OpSubhnHi: {
1782     std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
1783     s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
1784     s += ";";
1785     break;
1786   }
1787   case OpRSubhnHi: {
1788     std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
1789     s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
1790     s += ";";
1791     break;
1792   }
1793   case OpAba:
1794     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1795     break;
1796   case OpAbal:
1797     s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
1798     break;
1799   case OpAbalHi:
1800     s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
1801     break;
1802   case OpQDMullHi:
1803     s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
1804     break;
1805   case OpQDMlalHi:
1806     s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
1807     break;
1808   case OpQDMlslHi:
1809     s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
1810     break;
1811   case OpDiv:
1812     s += "__a / __b;";
1813     break;
1814   case OpMovlHi: {
1815     s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1816         MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
1817     s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
1818     s += "(__a1, 0);";
1819     break;
1820   }
1821   case OpLongHi: {
1822     // Another local variable __a1 is needed for calling a Macro,
1823     // or using __a will have naming conflict when Macro expanding.
1824     s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
1825          MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
1826     s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
1827          "(__a1, __b);";
1828     break;
1829   }
1830   case OpNarrowHi: {
1831     s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
1832          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
1833     break;
1834   }
1835   default:
1836     PrintFatalError("unknown OpKind!");
1837   }
1838   return s;
1839 }
1840 
1841 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1842   unsigned mod = proto[0];
1843 
1844   if (mod == 'v' || mod == 'f')
1845     mod = proto[1];
1846 
1847   bool quad = false;
1848   bool poly = false;
1849   bool usgn = false;
1850   bool scal = false;
1851   bool cnst = false;
1852   bool pntr = false;
1853 
1854   // Base type to get the type string for.
1855   char type = ClassifyType(typestr, quad, poly, usgn);
1856 
1857   // Based on the modifying character, change the type and width if necessary.
1858   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1859 
1860   NeonTypeFlags::EltType ET;
1861   switch (type) {
1862     case 'c':
1863       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1864       break;
1865     case 's':
1866       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1867       break;
1868     case 'i':
1869       ET = NeonTypeFlags::Int32;
1870       break;
1871     case 'l':
1872       ET = NeonTypeFlags::Int64;
1873       break;
1874     case 'h':
1875       ET = NeonTypeFlags::Float16;
1876       break;
1877     case 'f':
1878       ET = NeonTypeFlags::Float32;
1879       break;
1880     case 'd':
1881       ET = NeonTypeFlags::Float64;
1882       break;
1883     default:
1884       PrintFatalError("unhandled type!");
1885   }
1886   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1887   return Flags.getFlags();
1888 }
1889 
1890 static bool ProtoHasScalar(const std::string proto)
1891 {
1892   return (proto.find('s') != std::string::npos
1893           || proto.find('r') != std::string::npos);
1894 }
1895 
1896 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
1897 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1898                               StringRef typestr, ClassKind ck) {
1899   std::string s;
1900 
1901   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1902   // sret-like argument.
1903   bool sret = (proto[0] >= '2' && proto[0] <= '4');
1904 
1905   bool define = UseMacro(proto);
1906 
1907   // Check if the prototype has a scalar operand with the type of the vector
1908   // elements.  If not, bitcasting the args will take care of arg checking.
1909   // The actual signedness etc. will be taken care of with special enums.
1910   if (!ProtoHasScalar(proto))
1911     ck = ClassB;
1912 
1913   if (proto[0] != 'v') {
1914     std::string ts = TypeString(proto[0], typestr);
1915 
1916     if (define) {
1917       if (sret)
1918         s += ts + " r; ";
1919       else
1920         s += "(" + ts + ")";
1921     } else if (sret) {
1922       s += ts + " r; ";
1923     } else {
1924       s += "return (" + ts + ")";
1925     }
1926   }
1927 
1928   bool splat = proto.find('a') != std::string::npos;
1929 
1930   s += "__builtin_neon_";
1931   if (splat) {
1932     // Call the non-splat builtin: chop off the "_n" suffix from the name.
1933     std::string vname(name, 0, name.size()-2);
1934     s += MangleName(vname, typestr, ck);
1935   } else {
1936     s += MangleName(name, typestr, ck);
1937   }
1938   s += "(";
1939 
1940   // Pass the address of the return variable as the first argument to sret-like
1941   // builtins.
1942   if (sret)
1943     s += "&r, ";
1944 
1945   char arg = 'a';
1946   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1947     std::string args = std::string(&arg, 1);
1948 
1949     // Use the local temporaries instead of the macro arguments.
1950     args = "__" + args;
1951 
1952     bool argQuad = false;
1953     bool argPoly = false;
1954     bool argUsgn = false;
1955     bool argScalar = false;
1956     bool dummy = false;
1957     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1958     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1959                       dummy, dummy);
1960 
1961     // Handle multiple-vector values specially, emitting each subvector as an
1962     // argument to the __builtin.
1963     if (proto[i] >= '2' && proto[i] <= '4') {
1964       // Check if an explicit cast is needed.
1965       if (argType != 'c' || argPoly || argUsgn)
1966         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1967 
1968       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1969         s += args + ".val[" + utostr(vi) + "]";
1970         if ((vi + 1) < ve)
1971           s += ", ";
1972       }
1973       if ((i + 1) < e)
1974         s += ", ";
1975 
1976       continue;
1977     }
1978 
1979     if (splat && (i + 1) == e)
1980       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1981 
1982     // Check if an explicit cast is needed.
1983     if ((splat || !argScalar) &&
1984         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1985       std::string argTypeStr = "c";
1986       if (ck != ClassB)
1987         argTypeStr = argType;
1988       if (argQuad)
1989         argTypeStr = "Q" + argTypeStr;
1990       args = "(" + TypeString('d', argTypeStr) + ")" + args;
1991     }
1992 
1993     s += args;
1994     if ((i + 1) < e)
1995       s += ", ";
1996   }
1997 
1998   // Extra constant integer to hold type class enum for this function, e.g. s8
1999   if (ck == ClassB)
2000     s += ", " + utostr(GetNeonEnum(proto, typestr));
2001 
2002   s += ");";
2003 
2004   if (proto[0] != 'v' && sret) {
2005     if (define)
2006       s += " r;";
2007     else
2008       s += " return r;";
2009   }
2010   return s;
2011 }
2012 
2013 static std::string GenBuiltinDef(const std::string &name,
2014                                  const std::string &proto,
2015                                  StringRef typestr, ClassKind ck) {
2016   std::string s("BUILTIN(__builtin_neon_");
2017 
2018   // If all types are the same size, bitcasting the args will take care
2019   // of arg checking.  The actual signedness etc. will be taken care of with
2020   // special enums.
2021   if (!ProtoHasScalar(proto))
2022     ck = ClassB;
2023 
2024   s += MangleName(name, typestr, ck);
2025   s += ", \"";
2026 
2027   for (unsigned i = 0, e = proto.size(); i != e; ++i)
2028     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2029 
2030   // Extra constant integer to hold type class enum for this function, e.g. s8
2031   if (ck == ClassB)
2032     s += "i";
2033 
2034   s += "\", \"n\")";
2035   return s;
2036 }
2037 
2038 static std::string GenIntrinsic(const std::string &name,
2039                                 const std::string &proto,
2040                                 StringRef outTypeStr, StringRef inTypeStr,
2041                                 OpKind kind, ClassKind classKind) {
2042   assert(!proto.empty() && "");
2043   bool define = UseMacro(proto) && kind != OpUnavailable;
2044   std::string s;
2045 
2046   // static always inline + return type
2047   if (define)
2048     s += "#define ";
2049   else
2050     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2051 
2052   // Function name with type suffix
2053   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2054   if (outTypeStr != inTypeStr) {
2055     // If the input type is different (e.g., for vreinterpret), append a suffix
2056     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2057     // does not insert another "q" in the name.
2058     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2059     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2060     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2061   }
2062   s += mangledName;
2063 
2064   // Function arguments
2065   s += GenArgs(proto, inTypeStr);
2066 
2067   // Definition.
2068   if (define) {
2069     s += " __extension__ ({ \\\n  ";
2070     s += GenMacroLocals(proto, inTypeStr);
2071   } else if (kind == OpUnavailable) {
2072     s += " __attribute__((unavailable));\n";
2073     return s;
2074   } else
2075     s += " {\n  ";
2076 
2077   if (kind != OpNone)
2078     s += GenOpString(name, kind, proto, outTypeStr);
2079   else
2080     s += GenBuiltin(name, proto, outTypeStr, classKind);
2081   if (define)
2082     s += " })";
2083   else
2084     s += " }";
2085   s += "\n";
2086   return s;
2087 }
2088 
2089 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2090 /// is comprised of type definitions and function declarations.
2091 void NeonEmitter::run(raw_ostream &OS) {
2092   OS <<
2093     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2094     "---===\n"
2095     " *\n"
2096     " * Permission is hereby granted, free of charge, to any person obtaining "
2097     "a copy\n"
2098     " * of this software and associated documentation files (the \"Software\"),"
2099     " to deal\n"
2100     " * in the Software without restriction, including without limitation the "
2101     "rights\n"
2102     " * to use, copy, modify, merge, publish, distribute, sublicense, "
2103     "and/or sell\n"
2104     " * copies of the Software, and to permit persons to whom the Software is\n"
2105     " * furnished to do so, subject to the following conditions:\n"
2106     " *\n"
2107     " * The above copyright notice and this permission notice shall be "
2108     "included in\n"
2109     " * all copies or substantial portions of the Software.\n"
2110     " *\n"
2111     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2112     "EXPRESS OR\n"
2113     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2114     "MERCHANTABILITY,\n"
2115     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2116     "SHALL THE\n"
2117     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2118     "OTHER\n"
2119     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2120     "ARISING FROM,\n"
2121     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2122     "DEALINGS IN\n"
2123     " * THE SOFTWARE.\n"
2124     " *\n"
2125     " *===--------------------------------------------------------------------"
2126     "---===\n"
2127     " */\n\n";
2128 
2129   OS << "#ifndef __ARM_NEON_H\n";
2130   OS << "#define __ARM_NEON_H\n\n";
2131 
2132   OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
2133   OS << "#error \"NEON support not enabled\"\n";
2134   OS << "#endif\n\n";
2135 
2136   OS << "#include <stdint.h>\n\n";
2137 
2138   // Emit NEON-specific scalar typedefs.
2139   OS << "typedef float float32_t;\n";
2140   OS << "typedef __fp16 float16_t;\n";
2141 
2142   OS << "#ifdef __aarch64__\n";
2143   OS << "typedef double float64_t;\n";
2144   OS << "#endif\n\n";
2145 
2146   // For now, signedness of polynomial types depends on target
2147   OS << "#ifdef __aarch64__\n";
2148   OS << "typedef uint8_t poly8_t;\n";
2149   OS << "typedef uint16_t poly16_t;\n";
2150   OS << "#else\n";
2151   OS << "typedef int8_t poly8_t;\n";
2152   OS << "typedef int16_t poly16_t;\n";
2153   OS << "#endif\n";
2154 
2155   // Emit Neon vector typedefs.
2156   std::string TypedefTypes(
2157       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPs");
2158   SmallVector<StringRef, 24> TDTypeVec;
2159   ParseTypes(0, TypedefTypes, TDTypeVec);
2160 
2161   // Emit vector typedefs.
2162   bool isA64 = false;
2163   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2164     bool dummy, quad = false, poly = false;
2165     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2166     bool preinsert = false;
2167     bool postinsert = false;
2168 
2169     if (type == 'd') {
2170       preinsert = isA64? false: true;
2171       isA64 = true;
2172     } else {
2173       postinsert = isA64? true: false;
2174       isA64 = false;
2175     }
2176     if (postinsert)
2177       OS << "#endif\n";
2178     if (preinsert)
2179       OS << "#ifdef __aarch64__\n";
2180 
2181     if (poly)
2182       OS << "typedef __attribute__((neon_polyvector_type(";
2183     else
2184       OS << "typedef __attribute__((neon_vector_type(";
2185 
2186     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2187     OS << utostr(nElts) << "))) ";
2188     if (nElts < 10)
2189       OS << " ";
2190 
2191     OS << TypeString('s', TDTypeVec[i]);
2192     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2193 
2194   }
2195   OS << "\n";
2196 
2197   // Emit struct typedefs.
2198   isA64 = false;
2199   for (unsigned vi = 2; vi != 5; ++vi) {
2200     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2201       bool dummy, quad = false, poly = false;
2202       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2203       bool preinsert = false;
2204       bool postinsert = false;
2205 
2206       if (type == 'd') {
2207         preinsert = isA64? false: true;
2208         isA64 = true;
2209       } else {
2210         postinsert = isA64? true: false;
2211         isA64 = false;
2212       }
2213       if (postinsert)
2214         OS << "#endif\n";
2215       if (preinsert)
2216         OS << "#ifdef __aarch64__\n";
2217 
2218       std::string ts = TypeString('d', TDTypeVec[i]);
2219       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2220       OS << "typedef struct " << vs << " {\n";
2221       OS << "  " << ts << " val";
2222       OS << "[" << utostr(vi) << "]";
2223       OS << ";\n} ";
2224       OS << vs << ";\n";
2225       OS << "\n";
2226     }
2227   }
2228 
2229   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2230 
2231   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2232 
2233   StringMap<ClassKind> EmittedMap;
2234 
2235   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2236   // intrinsics.  (Some of the saturating multiply instructions are also
2237   // used to implement the corresponding "_lane" variants, but tablegen
2238   // sorts the records into alphabetical order so that the "_lane" variants
2239   // come after the intrinsics they use.)
2240   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2241   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2242   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2243   emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2244 
2245   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2246   // common intrinsics appear only once in the output stream.
2247   // The check for uniquiness is done in emitIntrinsic.
2248   // Emit ARM intrinsics.
2249   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2250     Record *R = RV[i];
2251 
2252     // Skip AArch64 intrinsics; they will be emitted at the end.
2253     bool isA64 = R->getValueAsBit("isA64");
2254     if (isA64)
2255       continue;
2256 
2257     if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2258         R->getName() != "VABD")
2259       emitIntrinsic(OS, R, EmittedMap);
2260   }
2261 
2262   // Emit AArch64-specific intrinsics.
2263   OS << "#ifdef __aarch64__\n";
2264 
2265   emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2266   emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2267   emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2268 
2269   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2270     Record *R = RV[i];
2271 
2272     // Skip ARM intrinsics already included above.
2273     bool isA64 = R->getValueAsBit("isA64");
2274     if (!isA64)
2275       continue;
2276 
2277     emitIntrinsic(OS, R, EmittedMap);
2278   }
2279 
2280   OS << "#endif\n\n";
2281 
2282   OS << "#undef __ai\n\n";
2283   OS << "#endif /* __ARM_NEON_H */\n";
2284 }
2285 
2286 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2287 /// intrinsics specified by record R checking for intrinsic uniqueness.
2288 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2289                                 StringMap<ClassKind> &EmittedMap) {
2290   std::string name = R->getValueAsString("Name");
2291   std::string Proto = R->getValueAsString("Prototype");
2292   std::string Types = R->getValueAsString("Types");
2293 
2294   SmallVector<StringRef, 16> TypeVec;
2295   ParseTypes(R, Types, TypeVec);
2296 
2297   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2298 
2299   ClassKind classKind = ClassNone;
2300   if (R->getSuperClasses().size() >= 2)
2301     classKind = ClassMap[R->getSuperClasses()[1]];
2302   if (classKind == ClassNone && kind == OpNone)
2303     PrintFatalError(R->getLoc(), "Builtin has no class kind");
2304 
2305   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2306     if (kind == OpReinterpret) {
2307       bool outQuad = false;
2308       bool dummy = false;
2309       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2310       for (unsigned srcti = 0, srcte = TypeVec.size();
2311            srcti != srcte; ++srcti) {
2312         bool inQuad = false;
2313         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2314         if (srcti == ti || inQuad != outQuad)
2315           continue;
2316         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2317                                      OpCast, ClassS);
2318         if (EmittedMap.count(s))
2319           continue;
2320         EmittedMap[s] = ClassS;
2321         OS << s;
2322       }
2323     } else {
2324       std::string s =
2325           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2326       if (EmittedMap.count(s))
2327         continue;
2328       EmittedMap[s] = classKind;
2329       OS << s;
2330     }
2331   }
2332   OS << "\n";
2333 }
2334 
2335 static unsigned RangeFromType(const char mod, StringRef typestr) {
2336   // base type to get the type string for.
2337   bool quad = false, dummy = false;
2338   char type = ClassifyType(typestr, quad, dummy, dummy);
2339   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2340 
2341   switch (type) {
2342     case 'c':
2343       return (8 << (int)quad) - 1;
2344     case 'h':
2345     case 's':
2346       return (4 << (int)quad) - 1;
2347     case 'f':
2348     case 'i':
2349       return (2 << (int)quad) - 1;
2350     case 'd':
2351     case 'l':
2352       return (1 << (int)quad) - 1;
2353     default:
2354       PrintFatalError("unhandled type!");
2355   }
2356 }
2357 
2358 /// Generate the ARM and AArch64 intrinsic range checking code for
2359 /// shift/lane immediates, checking for unique declarations.
2360 void
2361 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2362                                         StringMap<ClassKind> &A64IntrinsicMap,
2363                                         bool isA64RangeCheck) {
2364   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2365   StringMap<OpKind> EmittedMap;
2366 
2367   // Generate the intrinsic range checking code for shift/lane immediates.
2368   if (isA64RangeCheck)
2369     OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2370   else
2371     OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2372 
2373   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2374     Record *R = RV[i];
2375 
2376     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2377     if (k != OpNone)
2378       continue;
2379 
2380     std::string name = R->getValueAsString("Name");
2381     std::string Proto = R->getValueAsString("Prototype");
2382     std::string Types = R->getValueAsString("Types");
2383     std::string Rename = name + "@" + Proto;
2384 
2385     // Functions with 'a' (the splat code) in the type prototype should not get
2386     // their own builtin as they use the non-splat variant.
2387     if (Proto.find('a') != std::string::npos)
2388       continue;
2389 
2390     // Functions which do not have an immediate do not need to have range
2391     // checking code emitted.
2392     size_t immPos = Proto.find('i');
2393     if (immPos == std::string::npos)
2394       continue;
2395 
2396     SmallVector<StringRef, 16> TypeVec;
2397     ParseTypes(R, Types, TypeVec);
2398 
2399     if (R->getSuperClasses().size() < 2)
2400       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2401 
2402     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2403 
2404     // Do not include AArch64 range checks if not generating code for AArch64.
2405     bool isA64 = R->getValueAsBit("isA64");
2406     if (!isA64RangeCheck && isA64)
2407       continue;
2408 
2409     // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2410     // redefined by AArch64 to handle new types.
2411     if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2412       ClassKind &A64CK = A64IntrinsicMap[Rename];
2413       if (A64CK == ck && ck != ClassNone)
2414         continue;
2415     }
2416 
2417     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2418       std::string namestr, shiftstr, rangestr;
2419 
2420       if (R->getValueAsBit("isVCVT_N")) {
2421         // VCVT between floating- and fixed-point values takes an immediate
2422         // in the range [1, 32] for f32, or [1, 64] for f64.
2423         ck = ClassB;
2424         if (name.find("32") != std::string::npos)
2425           rangestr = "l = 1; u = 31"; // upper bound = l + u
2426         else if (name.find("64") != std::string::npos)
2427           rangestr = "l = 1; u = 63";
2428         else
2429           PrintFatalError(R->getLoc(),
2430               "Fixed point convert name should contains \"32\" or \"64\"");
2431       } else if (!ProtoHasScalar(Proto)) {
2432         // Builtins which are overloaded by type will need to have their upper
2433         // bound computed at Sema time based on the type constant.
2434         ck = ClassB;
2435         if (R->getValueAsBit("isShift")) {
2436           shiftstr = ", true";
2437 
2438           // Right shifts have an 'r' in the name, left shifts do not.
2439           if (name.find('r') != std::string::npos)
2440             rangestr = "l = 1; ";
2441         }
2442         rangestr += "u = RFT(TV" + shiftstr + ")";
2443       } else {
2444         // The immediate generally refers to a lane in the preceding argument.
2445         assert(immPos > 0 && "unexpected immediate operand");
2446         rangestr =
2447             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2448       }
2449       // Make sure cases appear only once by uniquing them in a string map.
2450       namestr = MangleName(name, TypeVec[ti], ck);
2451       if (EmittedMap.count(namestr))
2452         continue;
2453       EmittedMap[namestr] = OpNone;
2454 
2455       // Calculate the index of the immediate that should be range checked.
2456       unsigned immidx = 0;
2457 
2458       // Builtins that return a struct of multiple vectors have an extra
2459       // leading arg for the struct return.
2460       if (Proto[0] >= '2' && Proto[0] <= '4')
2461         ++immidx;
2462 
2463       // Add one to the index for each argument until we reach the immediate
2464       // to be checked.  Structs of vectors are passed as multiple arguments.
2465       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2466         switch (Proto[ii]) {
2467         default:
2468           immidx += 1;
2469           break;
2470         case '2':
2471           immidx += 2;
2472           break;
2473         case '3':
2474           immidx += 3;
2475           break;
2476         case '4':
2477           immidx += 4;
2478           break;
2479         case 'i':
2480           ie = ii + 1;
2481           break;
2482         }
2483       }
2484       if (isA64RangeCheck)
2485         OS << "case AArch64::BI__builtin_neon_";
2486       else
2487         OS << "case ARM::BI__builtin_neon_";
2488       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2489          << rangestr << "; break;\n";
2490     }
2491   }
2492   OS << "#endif\n\n";
2493 }
2494 
2495 /// Generate the ARM and AArch64 overloaded type checking code for
2496 /// SemaChecking.cpp, checking for unique builtin declarations.
2497 void
2498 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2499                                       StringMap<ClassKind> &A64IntrinsicMap,
2500                                       bool isA64TypeCheck) {
2501   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2502   StringMap<OpKind> EmittedMap;
2503 
2504   // Generate the overloaded type checking code for SemaChecking.cpp
2505   if (isA64TypeCheck)
2506     OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2507   else
2508     OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2509 
2510   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2511     Record *R = RV[i];
2512     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2513     if (k != OpNone)
2514       continue;
2515 
2516     std::string Proto = R->getValueAsString("Prototype");
2517     std::string Types = R->getValueAsString("Types");
2518     std::string name = R->getValueAsString("Name");
2519     std::string Rename = name + "@" + Proto;
2520 
2521     // Functions with 'a' (the splat code) in the type prototype should not get
2522     // their own builtin as they use the non-splat variant.
2523     if (Proto.find('a') != std::string::npos)
2524       continue;
2525 
2526     // Functions which have a scalar argument cannot be overloaded, no need to
2527     // check them if we are emitting the type checking code.
2528     if (ProtoHasScalar(Proto))
2529       continue;
2530 
2531     SmallVector<StringRef, 16> TypeVec;
2532     ParseTypes(R, Types, TypeVec);
2533 
2534     if (R->getSuperClasses().size() < 2)
2535       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2536 
2537     // Do not include AArch64 type checks if not generating code for AArch64.
2538     bool isA64 = R->getValueAsBit("isA64");
2539     if (!isA64TypeCheck && isA64)
2540       continue;
2541 
2542     // Include ARM  type check in AArch64 but only if ARM intrinsics
2543     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2544     // redefined in AArch64 to handle an additional 2 x f64 type.
2545     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2546     if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2547       ClassKind &A64CK = A64IntrinsicMap[Rename];
2548       if (A64CK == ck && ck != ClassNone)
2549         continue;
2550     }
2551 
2552     int si = -1, qi = -1;
2553     uint64_t mask = 0, qmask = 0;
2554     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2555       // Generate the switch case(s) for this builtin for the type validation.
2556       bool quad = false, poly = false, usgn = false;
2557       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2558 
2559       if (quad) {
2560         qi = ti;
2561         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2562       } else {
2563         si = ti;
2564         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2565       }
2566     }
2567 
2568     // Check if the builtin function has a pointer or const pointer argument.
2569     int PtrArgNum = -1;
2570     bool HasConstPtr = false;
2571     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2572       char ArgType = Proto[arg];
2573       if (ArgType == 'c') {
2574         HasConstPtr = true;
2575         PtrArgNum = arg - 1;
2576         break;
2577       }
2578       if (ArgType == 'p') {
2579         PtrArgNum = arg - 1;
2580         break;
2581       }
2582     }
2583     // For sret builtins, adjust the pointer argument index.
2584     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2585       PtrArgNum += 1;
2586 
2587     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2588     // and vst1_lane intrinsics.  Using a pointer to the vector element
2589     // type with one of those operations causes codegen to select an aligned
2590     // load/store instruction.  If you want an unaligned operation,
2591     // the pointer argument needs to have less alignment than element type,
2592     // so just accept any pointer type.
2593     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2594       PtrArgNum = -1;
2595       HasConstPtr = false;
2596     }
2597 
2598     if (mask) {
2599       if (isA64TypeCheck)
2600         OS << "case AArch64::BI__builtin_neon_";
2601       else
2602         OS << "case ARM::BI__builtin_neon_";
2603       OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2604          << "0x" << utohexstr(mask) << "ULL";
2605       if (PtrArgNum >= 0)
2606         OS << "; PtrArgNum = " << PtrArgNum;
2607       if (HasConstPtr)
2608         OS << "; HasConstPtr = true";
2609       OS << "; break;\n";
2610     }
2611     if (qmask) {
2612       if (isA64TypeCheck)
2613         OS << "case AArch64::BI__builtin_neon_";
2614       else
2615         OS << "case ARM::BI__builtin_neon_";
2616       OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2617          << "0x" << utohexstr(qmask) << "ULL";
2618       if (PtrArgNum >= 0)
2619         OS << "; PtrArgNum = " << PtrArgNum;
2620       if (HasConstPtr)
2621         OS << "; HasConstPtr = true";
2622       OS << "; break;\n";
2623     }
2624   }
2625   OS << "#endif\n\n";
2626 }
2627 
2628 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2629 /// declaration of builtins, checking for unique builtin declarations.
2630 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2631                                  StringMap<ClassKind> &A64IntrinsicMap,
2632                                  bool isA64GenBuiltinDef) {
2633   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2634   StringMap<OpKind> EmittedMap;
2635 
2636   // Generate BuiltinsARM.def and BuiltinsAArch64.def
2637   if (isA64GenBuiltinDef)
2638     OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2639   else
2640     OS << "#ifdef GET_NEON_BUILTINS\n";
2641 
2642   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2643     Record *R = RV[i];
2644     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2645     if (k != OpNone)
2646       continue;
2647 
2648     std::string Proto = R->getValueAsString("Prototype");
2649     std::string name = R->getValueAsString("Name");
2650     std::string Rename = name + "@" + Proto;
2651 
2652     // Functions with 'a' (the splat code) in the type prototype should not get
2653     // their own builtin as they use the non-splat variant.
2654     if (Proto.find('a') != std::string::npos)
2655       continue;
2656 
2657     std::string Types = R->getValueAsString("Types");
2658     SmallVector<StringRef, 16> TypeVec;
2659     ParseTypes(R, Types, TypeVec);
2660 
2661     if (R->getSuperClasses().size() < 2)
2662       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2663 
2664     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2665 
2666     // Do not include AArch64 BUILTIN() macros if not generating
2667     // code for AArch64
2668     bool isA64 = R->getValueAsBit("isA64");
2669     if (!isA64GenBuiltinDef && isA64)
2670       continue;
2671 
2672     // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2673     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2674     // redefined in AArch64 to handle an additional 2 x f64 type.
2675     if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
2676       ClassKind &A64CK = A64IntrinsicMap[Rename];
2677       if (A64CK == ck && ck != ClassNone)
2678         continue;
2679     }
2680 
2681     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2682       // Generate the declaration for this builtin, ensuring
2683       // that each unique BUILTIN() macro appears only once in the output
2684       // stream.
2685       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2686       if (EmittedMap.count(bd))
2687         continue;
2688 
2689       EmittedMap[bd] = OpNone;
2690       OS << bd << "\n";
2691     }
2692   }
2693   OS << "#endif\n\n";
2694 }
2695 
2696 /// runHeader - Emit a file with sections defining:
2697 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2698 /// 2. the SemaChecking code for the type overload checking.
2699 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2700 void NeonEmitter::runHeader(raw_ostream &OS) {
2701   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2702 
2703   // build a map of AArch64 intriniscs to be used in uniqueness checks.
2704   StringMap<ClassKind> A64IntrinsicMap;
2705   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2706     Record *R = RV[i];
2707 
2708     bool isA64 = R->getValueAsBit("isA64");
2709     if (!isA64)
2710       continue;
2711 
2712     ClassKind CK = ClassNone;
2713     if (R->getSuperClasses().size() >= 2)
2714       CK = ClassMap[R->getSuperClasses()[1]];
2715 
2716     std::string Name = R->getValueAsString("Name");
2717     std::string Proto = R->getValueAsString("Prototype");
2718     std::string Rename = Name + "@" + Proto;
2719     if (A64IntrinsicMap.count(Rename))
2720       continue;
2721     A64IntrinsicMap[Rename] = CK;
2722   }
2723 
2724   // Generate BuiltinsARM.def for ARM
2725   genBuiltinsDef(OS, A64IntrinsicMap, false);
2726 
2727   // Generate BuiltinsAArch64.def for AArch64
2728   genBuiltinsDef(OS, A64IntrinsicMap, true);
2729 
2730   // Generate ARM overloaded type checking code for SemaChecking.cpp
2731   genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2732 
2733   // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2734   genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2735 
2736   // Generate ARM range checking code for shift/lane immediates.
2737   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2738 
2739   // Generate the AArch64 range checking code for shift/lane immediates.
2740   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2741 }
2742 
2743 /// GenTest - Write out a test for the intrinsic specified by the name and
2744 /// type strings, including the embedded patterns for FileCheck to match.
2745 static std::string GenTest(const std::string &name,
2746                            const std::string &proto,
2747                            StringRef outTypeStr, StringRef inTypeStr,
2748                            bool isShift, bool isHiddenLOp,
2749                            ClassKind ck, const std::string &InstName,
2750 						   bool isA64,
2751 						   std::string & testFuncProto) {
2752   assert(!proto.empty() && "");
2753   std::string s;
2754 
2755   // Function name with type suffix
2756   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2757   if (outTypeStr != inTypeStr) {
2758     // If the input type is different (e.g., for vreinterpret), append a suffix
2759     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2760     // does not insert another "q" in the name.
2761     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2762     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2763     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2764   }
2765 
2766   // todo: GenerateChecksForIntrinsic does not generate CHECK
2767   // for aarch64 instructions yet
2768   std::vector<std::string> FileCheckPatterns;
2769   if (!isA64) {
2770 	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2771 							   isHiddenLOp, FileCheckPatterns);
2772 	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2773   }
2774   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2775 
2776   // Emit the FileCheck patterns.
2777   // If for any reason we do not want to emit a check, mangledInst
2778   // will be the empty string.
2779   if (FileCheckPatterns.size()) {
2780     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2781                                                   e = FileCheckPatterns.end();
2782          i != e;
2783          ++i) {
2784       s += "// CHECK_ARM: " + *i + "\n";
2785     }
2786   }
2787 
2788   // Emit the start of the test function.
2789 
2790   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2791   char arg = 'a';
2792   std::string comma;
2793   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2794     // Do not create arguments for values that must be immediate constants.
2795     if (proto[i] == 'i')
2796       continue;
2797     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2798     testFuncProto.push_back(arg);
2799     comma = ", ";
2800   }
2801   testFuncProto += ")";
2802 
2803   s+= testFuncProto;
2804   s+= " {\n  ";
2805 
2806   if (proto[0] != 'v')
2807     s += "return ";
2808   s += mangledName + "(";
2809   arg = 'a';
2810   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2811     if (proto[i] == 'i') {
2812       // For immediate operands, test the maximum value.
2813       if (isShift)
2814         s += "1"; // FIXME
2815       else
2816         // The immediate generally refers to a lane in the preceding argument.
2817         s += utostr(RangeFromType(proto[i-1], inTypeStr));
2818     } else {
2819       s.push_back(arg);
2820     }
2821     if ((i + 1) < e)
2822       s += ", ";
2823   }
2824   s += ");\n}\n\n";
2825   return s;
2826 }
2827 
2828 /// Write out all intrinsic tests for the specified target, checking
2829 /// for intrinsic test uniqueness.
2830 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2831                                 bool isA64GenTest) {
2832   if (isA64GenTest)
2833 	OS << "#ifdef __aarch64__\n";
2834 
2835   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2836   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2837     Record *R = RV[i];
2838     std::string name = R->getValueAsString("Name");
2839     std::string Proto = R->getValueAsString("Prototype");
2840     std::string Types = R->getValueAsString("Types");
2841     bool isShift = R->getValueAsBit("isShift");
2842     std::string InstName = R->getValueAsString("InstName");
2843     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2844     bool isA64 = R->getValueAsBit("isA64");
2845 
2846     // do not include AArch64 intrinsic test if not generating
2847     // code for AArch64
2848     if (!isA64GenTest && isA64)
2849       continue;
2850 
2851     SmallVector<StringRef, 16> TypeVec;
2852     ParseTypes(R, Types, TypeVec);
2853 
2854     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2855     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2856     if (kind == OpUnavailable)
2857       continue;
2858     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2859       if (kind == OpReinterpret) {
2860         bool outQuad = false;
2861         bool dummy = false;
2862         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2863         for (unsigned srcti = 0, srcte = TypeVec.size();
2864              srcti != srcte; ++srcti) {
2865           bool inQuad = false;
2866           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2867           if (srcti == ti || inQuad != outQuad)
2868             continue;
2869 		  std::string testFuncProto;
2870           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2871                                   isShift, isHiddenLOp, ck, InstName, isA64,
2872 								  testFuncProto);
2873           if (EmittedMap.count(testFuncProto))
2874             continue;
2875           EmittedMap[testFuncProto] = kind;
2876           OS << s << "\n";
2877         }
2878       } else {
2879 		std::string testFuncProto;
2880         std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2881                                 isHiddenLOp, ck, InstName, isA64, testFuncProto);
2882         if (EmittedMap.count(testFuncProto))
2883           continue;
2884         EmittedMap[testFuncProto] = kind;
2885         OS << s << "\n";
2886       }
2887     }
2888   }
2889 
2890   if (isA64GenTest)
2891 	OS << "#endif\n";
2892 }
2893 /// runTests - Write out a complete set of tests for all of the Neon
2894 /// intrinsics.
2895 void NeonEmitter::runTests(raw_ostream &OS) {
2896   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2897         "apcs-gnu\\\n"
2898         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2899         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
2900 		"\n"
2901 	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2902 	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
2903 	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2904         "\n"
2905         "// REQUIRES: long_tests\n"
2906         "\n"
2907         "#include <arm_neon.h>\n"
2908         "\n";
2909 
2910   // ARM tests must be emitted before AArch64 tests to ensure
2911   // tests for intrinsics that are common to ARM and AArch64
2912   // appear only once in the output stream.
2913   // The check for uniqueness is done in genTargetTest.
2914   StringMap<OpKind> EmittedMap;
2915 
2916   genTargetTest(OS, EmittedMap, false);
2917 
2918   genTargetTest(OS, EmittedMap, true);
2919 }
2920 
2921 namespace clang {
2922 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2923   NeonEmitter(Records).run(OS);
2924 }
2925 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2926   NeonEmitter(Records).runHeader(OS);
2927 }
2928 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2929   NeonEmitter(Records).runTests(OS);
2930 }
2931 } // End namespace clang
2932