1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface.  See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors.  Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.  A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37 
38 enum OpKind {
39   OpNone,
40   OpUnavailable,
41   OpAdd,
42   OpAddl,
43   OpAddlHi,
44   OpAddw,
45   OpAddwHi,
46   OpSub,
47   OpSubl,
48   OpSublHi,
49   OpSubw,
50   OpSubwHi,
51   OpMul,
52   OpMla,
53   OpMlal,
54   OpMullHi,
55   OpMullHiP64,
56   OpMullHiN,
57   OpMlalHi,
58   OpMlalHiN,
59   OpMls,
60   OpMlsl,
61   OpMlslHi,
62   OpMlslHiN,
63   OpMulN,
64   OpMlaN,
65   OpMlsN,
66   OpFMlaN,
67   OpFMlsN,
68   OpMlalN,
69   OpMlslN,
70   OpMulLane,
71   OpMulXLane,
72   OpMullLane,
73   OpMullHiLane,
74   OpMlaLane,
75   OpMlsLane,
76   OpMlalLane,
77   OpMlalHiLane,
78   OpMlslLane,
79   OpMlslHiLane,
80   OpQDMullLane,
81   OpQDMullHiLane,
82   OpQDMlalLane,
83   OpQDMlalHiLane,
84   OpQDMlslLane,
85   OpQDMlslHiLane,
86   OpQDMulhLane,
87   OpQRDMulhLane,
88   OpFMSLane,
89   OpFMSLaneQ,
90   OpTrn1,
91   OpZip1,
92   OpUzp1,
93   OpTrn2,
94   OpZip2,
95   OpUzp2,
96   OpEq,
97   OpGe,
98   OpLe,
99   OpGt,
100   OpLt,
101   OpNeg,
102   OpNot,
103   OpAnd,
104   OpOr,
105   OpXor,
106   OpAndNot,
107   OpOrNot,
108   OpCast,
109   OpConcat,
110   OpDup,
111   OpDupLane,
112   OpHi,
113   OpLo,
114   OpSelect,
115   OpRev16,
116   OpRev32,
117   OpRev64,
118   OpXtnHi,
119   OpSqxtunHi,
120   OpQxtnHi,
121   OpFcvtnHi,
122   OpFcvtlHi,
123   OpFcvtxnHi,
124   OpReinterpret,
125   OpAddhnHi,
126   OpRAddhnHi,
127   OpSubhnHi,
128   OpRSubhnHi,
129   OpAbdl,
130   OpAbdlHi,
131   OpAba,
132   OpAbal,
133   OpAbalHi,
134   OpQDMullHi,
135   OpQDMullHiN,
136   OpQDMlalHi,
137   OpQDMlalHiN,
138   OpQDMlslHi,
139   OpQDMlslHiN,
140   OpDiv,
141   OpLongHi,
142   OpNarrowHi,
143   OpMovlHi,
144   OpCopyLane,
145   OpCopyQLane,
146   OpCopyLaneQ,
147   OpScalarMulLane,
148   OpScalarMulLaneQ,
149   OpScalarMulXLane,
150   OpScalarMulXLaneQ,
151   OpScalarVMulXLane,
152   OpScalarVMulXLaneQ,
153   OpScalarQDMullLane,
154   OpScalarQDMullLaneQ,
155   OpScalarQDMulHiLane,
156   OpScalarQDMulHiLaneQ,
157   OpScalarQRDMulHiLane,
158   OpScalarQRDMulHiLaneQ,
159   OpScalarGetLane,
160   OpScalarSetLane
161 };
162 
163 enum ClassKind {
164   ClassNone,
165   ClassI,           // generic integer instruction, e.g., "i8" suffix
166   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
167   ClassW,           // width-specific instruction, e.g., "8" suffix
168   ClassB,           // bitcast arguments with enum argument to specify type
169   ClassL,           // Logical instructions which are op instructions
170                     // but we need to not emit any suffix for in our
171                     // tests.
172   ClassNoTest       // Instructions which we do not test since they are
173                     // not TRUE instructions.
174 };
175 
176 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
177 /// builtins.  These must be kept in sync with the flags in
178 /// include/clang/Basic/TargetBuiltins.h.
179 namespace {
180 class NeonTypeFlags {
181   enum {
182     EltTypeMask = 0xf,
183     UnsignedFlag = 0x10,
184     QuadFlag = 0x20
185   };
186   uint32_t Flags;
187 
188 public:
189   enum EltType {
190     Int8,
191     Int16,
192     Int32,
193     Int64,
194     Poly8,
195     Poly16,
196     Poly64,
197     Poly128,
198     Float16,
199     Float32,
200     Float64
201   };
202 
203   NeonTypeFlags(unsigned F) : Flags(F) {}
204   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
205     if (IsUnsigned)
206       Flags |= UnsignedFlag;
207     if (IsQuad)
208       Flags |= QuadFlag;
209   }
210 
211   uint32_t getFlags() const { return Flags; }
212 };
213 } // end anonymous namespace
214 
215 namespace {
216 class NeonEmitter {
217   RecordKeeper &Records;
218   StringMap<OpKind> OpMap;
219   DenseMap<Record*, ClassKind> ClassMap;
220 
221 public:
222   NeonEmitter(RecordKeeper &R) : Records(R) {
223     OpMap["OP_NONE"]  = OpNone;
224     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
225     OpMap["OP_ADD"]   = OpAdd;
226     OpMap["OP_ADDL"]  = OpAddl;
227     OpMap["OP_ADDLHi"] = OpAddlHi;
228     OpMap["OP_ADDW"]  = OpAddw;
229     OpMap["OP_ADDWHi"] = OpAddwHi;
230     OpMap["OP_SUB"]   = OpSub;
231     OpMap["OP_SUBL"]  = OpSubl;
232     OpMap["OP_SUBLHi"] = OpSublHi;
233     OpMap["OP_SUBW"]  = OpSubw;
234     OpMap["OP_SUBWHi"] = OpSubwHi;
235     OpMap["OP_MUL"]   = OpMul;
236     OpMap["OP_MLA"]   = OpMla;
237     OpMap["OP_MLAL"]  = OpMlal;
238     OpMap["OP_MULLHi"]  = OpMullHi;
239     OpMap["OP_MULLHi_P64"]  = OpMullHiP64;
240     OpMap["OP_MULLHi_N"]  = OpMullHiN;
241     OpMap["OP_MLALHi"]  = OpMlalHi;
242     OpMap["OP_MLALHi_N"]  = OpMlalHiN;
243     OpMap["OP_MLS"]   = OpMls;
244     OpMap["OP_MLSL"]  = OpMlsl;
245     OpMap["OP_MLSLHi"] = OpMlslHi;
246     OpMap["OP_MLSLHi_N"] = OpMlslHiN;
247     OpMap["OP_MUL_N"] = OpMulN;
248     OpMap["OP_MLA_N"] = OpMlaN;
249     OpMap["OP_MLS_N"] = OpMlsN;
250     OpMap["OP_FMLA_N"] = OpFMlaN;
251     OpMap["OP_FMLS_N"] = OpFMlsN;
252     OpMap["OP_MLAL_N"] = OpMlalN;
253     OpMap["OP_MLSL_N"] = OpMlslN;
254     OpMap["OP_MUL_LN"]= OpMulLane;
255     OpMap["OP_MULX_LN"]= OpMulXLane;
256     OpMap["OP_MULL_LN"] = OpMullLane;
257     OpMap["OP_MULLHi_LN"] = OpMullHiLane;
258     OpMap["OP_MLA_LN"]= OpMlaLane;
259     OpMap["OP_MLS_LN"]= OpMlsLane;
260     OpMap["OP_MLAL_LN"] = OpMlalLane;
261     OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
262     OpMap["OP_MLSL_LN"] = OpMlslLane;
263     OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
264     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
265     OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
266     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
267     OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
268     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
269     OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
270     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
271     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
272     OpMap["OP_FMS_LN"] = OpFMSLane;
273     OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
274     OpMap["OP_TRN1"]  = OpTrn1;
275     OpMap["OP_ZIP1"]  = OpZip1;
276     OpMap["OP_UZP1"]  = OpUzp1;
277     OpMap["OP_TRN2"]  = OpTrn2;
278     OpMap["OP_ZIP2"]  = OpZip2;
279     OpMap["OP_UZP2"]  = OpUzp2;
280     OpMap["OP_EQ"]    = OpEq;
281     OpMap["OP_GE"]    = OpGe;
282     OpMap["OP_LE"]    = OpLe;
283     OpMap["OP_GT"]    = OpGt;
284     OpMap["OP_LT"]    = OpLt;
285     OpMap["OP_NEG"]   = OpNeg;
286     OpMap["OP_NOT"]   = OpNot;
287     OpMap["OP_AND"]   = OpAnd;
288     OpMap["OP_OR"]    = OpOr;
289     OpMap["OP_XOR"]   = OpXor;
290     OpMap["OP_ANDN"]  = OpAndNot;
291     OpMap["OP_ORN"]   = OpOrNot;
292     OpMap["OP_CAST"]  = OpCast;
293     OpMap["OP_CONC"]  = OpConcat;
294     OpMap["OP_HI"]    = OpHi;
295     OpMap["OP_LO"]    = OpLo;
296     OpMap["OP_DUP"]   = OpDup;
297     OpMap["OP_DUP_LN"] = OpDupLane;
298     OpMap["OP_SEL"]   = OpSelect;
299     OpMap["OP_REV16"] = OpRev16;
300     OpMap["OP_REV32"] = OpRev32;
301     OpMap["OP_REV64"] = OpRev64;
302     OpMap["OP_XTN"] = OpXtnHi;
303     OpMap["OP_SQXTUN"] = OpSqxtunHi;
304     OpMap["OP_QXTN"] = OpQxtnHi;
305     OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
306     OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
307     OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
308     OpMap["OP_REINT"] = OpReinterpret;
309     OpMap["OP_ADDHNHi"] = OpAddhnHi;
310     OpMap["OP_RADDHNHi"] = OpRAddhnHi;
311     OpMap["OP_SUBHNHi"] = OpSubhnHi;
312     OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
313     OpMap["OP_ABDL"]  = OpAbdl;
314     OpMap["OP_ABDLHi"] = OpAbdlHi;
315     OpMap["OP_ABA"]   = OpAba;
316     OpMap["OP_ABAL"]  = OpAbal;
317     OpMap["OP_ABALHi"] = OpAbalHi;
318     OpMap["OP_QDMULLHi"] = OpQDMullHi;
319     OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
320     OpMap["OP_QDMLALHi"] = OpQDMlalHi;
321     OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
322     OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
323     OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
324     OpMap["OP_DIV"] = OpDiv;
325     OpMap["OP_LONG_HI"] = OpLongHi;
326     OpMap["OP_NARROW_HI"] = OpNarrowHi;
327     OpMap["OP_MOVL_HI"] = OpMovlHi;
328     OpMap["OP_COPY_LN"] = OpCopyLane;
329     OpMap["OP_COPYQ_LN"] = OpCopyQLane;
330     OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
331     OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
332     OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
333     OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
334     OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
335     OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
336     OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
337     OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
338     OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
339     OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
340     OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
341     OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
342     OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
343     OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
344     OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
345 
346     Record *SI = R.getClass("SInst");
347     Record *II = R.getClass("IInst");
348     Record *WI = R.getClass("WInst");
349     Record *SOpI = R.getClass("SOpInst");
350     Record *IOpI = R.getClass("IOpInst");
351     Record *WOpI = R.getClass("WOpInst");
352     Record *LOpI = R.getClass("LOpInst");
353     Record *NoTestOpI = R.getClass("NoTestOpInst");
354 
355     ClassMap[SI] = ClassS;
356     ClassMap[II] = ClassI;
357     ClassMap[WI] = ClassW;
358     ClassMap[SOpI] = ClassS;
359     ClassMap[IOpI] = ClassI;
360     ClassMap[WOpI] = ClassW;
361     ClassMap[LOpI] = ClassL;
362     ClassMap[NoTestOpI] = ClassNoTest;
363   }
364 
365   // run - Emit arm_neon.h.inc
366   void run(raw_ostream &o);
367 
368   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
369   void runHeader(raw_ostream &o);
370 
371   // runTests - Emit tests for all the Neon intrinsics.
372   void runTests(raw_ostream &o);
373 
374 private:
375   void emitGuardedIntrinsic(raw_ostream &OS, Record *R,
376                             std::string &CurrentGuard, bool &InGuard,
377                             StringMap<ClassKind> &EmittedMap);
378   void emitIntrinsic(raw_ostream &OS, Record *R,
379                      StringMap<ClassKind> &EmittedMap);
380   void genBuiltinsDef(raw_ostream &OS);
381   void genOverloadTypeCheckCode(raw_ostream &OS);
382   void genIntrinsicRangeCheckCode(raw_ostream &OS);
383   void genTargetTest(raw_ostream &OS);
384 };
385 } // end anonymous namespace
386 
387 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
388 /// which each StringRef representing a single type declared in the string.
389 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
390 /// 2xfloat and 4xfloat respectively.
391 static void ParseTypes(Record *r, std::string &s,
392                        SmallVectorImpl<StringRef> &TV) {
393   const char *data = s.data();
394   int len = 0;
395 
396   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
397     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
398                          || data[len] == 'H' || data[len] == 'S')
399       continue;
400 
401     switch (data[len]) {
402       case 'c':
403       case 's':
404       case 'i':
405       case 'l':
406       case 'k':
407       case 'h':
408       case 'f':
409       case 'd':
410         break;
411       default:
412         PrintFatalError(r->getLoc(),
413                       "Unexpected letter: " + std::string(data + len, 1));
414     }
415     TV.push_back(StringRef(data, len + 1));
416     data += len + 1;
417     len = -1;
418   }
419 }
420 
421 /// Widen - Convert a type code into the next wider type.  char -> short,
422 /// short -> int, etc.
423 static char Widen(const char t) {
424   switch (t) {
425     case 'c':
426       return 's';
427     case 's':
428       return 'i';
429     case 'i':
430       return 'l';
431     case 'l':
432       return 'k';
433     case 'h':
434       return 'f';
435     case 'f':
436       return 'd';
437     default:
438       PrintFatalError("unhandled type in widen!");
439   }
440 }
441 
442 /// Narrow - Convert a type code into the next smaller type.  short -> char,
443 /// float -> half float, etc.
444 static char Narrow(const char t) {
445   switch (t) {
446     case 's':
447       return 'c';
448     case 'i':
449       return 's';
450     case 'l':
451       return 'i';
452     case 'k':
453       return 'l';
454     case 'f':
455       return 'h';
456     case 'd':
457       return 'f';
458     default:
459       PrintFatalError("unhandled type in narrow!");
460   }
461 }
462 
463 static std::string GetNarrowTypestr(StringRef ty)
464 {
465   std::string s;
466   for (size_t i = 0, end = ty.size(); i < end; i++) {
467     switch (ty[i]) {
468       case 's':
469         s += 'c';
470         break;
471       case 'i':
472         s += 's';
473         break;
474       case 'l':
475         s += 'i';
476         break;
477       case 'k':
478         s += 'l';
479         break;
480       default:
481         s += ty[i];
482         break;
483     }
484   }
485 
486   return s;
487 }
488 
489 /// For a particular StringRef, return the base type code, and whether it has
490 /// the quad-vector, polynomial, or unsigned modifiers set.
491 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
492   unsigned off = 0;
493   // ignore scalar.
494   if (ty[off] == 'S') {
495     ++off;
496   }
497   // remember quad.
498   if (ty[off] == 'Q' || ty[off] == 'H') {
499     quad = true;
500     ++off;
501   }
502 
503   // remember poly.
504   if (ty[off] == 'P') {
505     poly = true;
506     ++off;
507   }
508 
509   // remember unsigned.
510   if (ty[off] == 'U') {
511     usgn = true;
512     ++off;
513   }
514 
515   // base type to get the type string for.
516   return ty[off];
517 }
518 
519 /// ModType - Transform a type code and its modifiers based on a mod code. The
520 /// mod code definitions may be found at the top of arm_neon.td.
521 static char ModType(const char mod, char type, bool &quad, bool &poly,
522                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
523   switch (mod) {
524     case 't':
525       if (poly) {
526         poly = false;
527         usgn = true;
528       }
529       break;
530     case 'b':
531       scal = true;
532     case 'u':
533       usgn = true;
534       poly = false;
535       if (type == 'f')
536         type = 'i';
537       if (type == 'd')
538         type = 'l';
539       break;
540     case '$':
541       scal = true;
542     case 'x':
543       usgn = false;
544       poly = false;
545       if (type == 'f')
546         type = 'i';
547       if (type == 'd')
548         type = 'l';
549       break;
550     case 'o':
551       scal = true;
552       type = 'd';
553       usgn = false;
554       break;
555     case 'y':
556       scal = true;
557     case 'f':
558       if (type == 'h')
559         quad = true;
560       type = 'f';
561       usgn = false;
562       break;
563     case 'F':
564       type = 'd';
565       usgn = false;
566       break;
567     case 'g':
568       quad = false;
569       break;
570     case 'B':
571     case 'C':
572     case 'D':
573     case 'j':
574       quad = true;
575       break;
576     case 'w':
577       type = Widen(type);
578       quad = true;
579       break;
580     case 'n':
581       type = Widen(type);
582       break;
583     case 'i':
584       type = 'i';
585       scal = true;
586       break;
587     case 'l':
588       type = 'l';
589       scal = true;
590       usgn = true;
591       break;
592     case 'z':
593       type = Narrow(type);
594       scal = true;
595       break;
596     case 'r':
597       type = Widen(type);
598       scal = true;
599       break;
600     case 's':
601     case 'a':
602       scal = true;
603       break;
604     case 'k':
605       quad = true;
606       break;
607     case 'c':
608       cnst = true;
609     case 'p':
610       pntr = true;
611       scal = true;
612       break;
613     case 'h':
614       type = Narrow(type);
615       if (type == 'h')
616         quad = false;
617       break;
618     case 'q':
619       type = Narrow(type);
620       quad = true;
621       break;
622     case 'e':
623       type = Narrow(type);
624       usgn = true;
625       break;
626     case 'm':
627       type = Narrow(type);
628       quad = false;
629       break;
630     default:
631       break;
632   }
633   return type;
634 }
635 
636 static bool IsMultiVecProto(const char p) {
637   return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
638 }
639 
640 /// TypeString - for a modifier and type, generate the name of the typedef for
641 /// that type.  QUc -> uint8x8_t.
642 static std::string TypeString(const char mod, StringRef typestr) {
643   bool quad = false;
644   bool poly = false;
645   bool usgn = false;
646   bool scal = false;
647   bool cnst = false;
648   bool pntr = false;
649 
650   if (mod == 'v')
651     return "void";
652   if (mod == 'i')
653     return "int";
654 
655   // base type to get the type string for.
656   char type = ClassifyType(typestr, quad, poly, usgn);
657 
658   // Based on the modifying character, change the type and width if necessary.
659   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
660 
661   SmallString<128> s;
662 
663   if (usgn)
664     s.push_back('u');
665 
666   switch (type) {
667     case 'c':
668       s += poly ? "poly8" : "int8";
669       if (scal)
670         break;
671       s += quad ? "x16" : "x8";
672       break;
673     case 's':
674       s += poly ? "poly16" : "int16";
675       if (scal)
676         break;
677       s += quad ? "x8" : "x4";
678       break;
679     case 'i':
680       s += "int32";
681       if (scal)
682         break;
683       s += quad ? "x4" : "x2";
684       break;
685     case 'l':
686       s += (poly && !usgn)? "poly64" : "int64";
687       if (scal)
688         break;
689       s += quad ? "x2" : "x1";
690       break;
691     case 'k':
692       s += "poly128";
693       break;
694     case 'h':
695       s += "float16";
696       if (scal)
697         break;
698       s += quad ? "x8" : "x4";
699       break;
700     case 'f':
701       s += "float32";
702       if (scal)
703         break;
704       s += quad ? "x4" : "x2";
705       break;
706     case 'd':
707       s += "float64";
708       if (scal)
709         break;
710       s += quad ? "x2" : "x1";
711       break;
712 
713     default:
714       PrintFatalError("unhandled type!");
715   }
716 
717   if (mod == '2' || mod == 'B')
718     s += "x2";
719   if (mod == '3' || mod == 'C')
720     s += "x3";
721   if (mod == '4' || mod == 'D')
722     s += "x4";
723 
724   // Append _t, finishing the type string typedef type.
725   s += "_t";
726 
727   if (cnst)
728     s += " const";
729 
730   if (pntr)
731     s += " *";
732 
733   return s.str();
734 }
735 
736 /// BuiltinTypeString - for a modifier and type, generate the clang
737 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
738 /// Builtins.def for a description of the type strings.
739 static std::string BuiltinTypeString(const char mod, StringRef typestr,
740                                      ClassKind ck, bool ret) {
741   bool quad = false;
742   bool poly = false;
743   bool usgn = false;
744   bool scal = false;
745   bool cnst = false;
746   bool pntr = false;
747 
748   if (mod == 'v')
749     return "v"; // void
750   if (mod == 'i')
751     return "i"; // int
752 
753   // base type to get the type string for.
754   char type = ClassifyType(typestr, quad, poly, usgn);
755 
756   // Based on the modifying character, change the type and width if necessary.
757   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
758 
759   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
760                          scal && type != 'f' && type != 'd');
761 
762   // All pointers are void* pointers.  Change type to 'v' now.
763   if (pntr) {
764     usgn = false;
765     poly = false;
766     type = 'v';
767   }
768   // Treat half-float ('h') types as unsigned short ('s') types.
769   if (type == 'h') {
770     type = 's';
771     usgn = true;
772   }
773 
774   if (scal) {
775     SmallString<128> s;
776 
777     if (usgn)
778       s.push_back('U');
779     else if (type == 'c')
780       s.push_back('S'); // make chars explicitly signed
781 
782     if (type == 'l') // 64-bit long
783       s += "Wi";
784     else if (type == 'k') // 128-bit long
785       s = "LLLi";
786     else
787       s.push_back(type);
788 
789     if (cnst)
790       s.push_back('C');
791     if (pntr)
792       s.push_back('*');
793     return s.str();
794   }
795 
796   // Since the return value must be one type, return a vector type of the
797   // appropriate width which we will bitcast.  An exception is made for
798   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
799   // fashion, storing them to a pointer arg.
800   if (ret) {
801     if (IsMultiVecProto(mod))
802       return "vv*"; // void result with void* first argument
803     if (mod == 'f' || (ck != ClassB && type == 'f'))
804       return quad ? "V4f" : "V2f";
805     if (mod == 'F' || (ck != ClassB && type == 'd'))
806       return quad ? "V2d" : "V1d";
807     if (ck != ClassB && type == 's')
808       return quad ? "V8s" : "V4s";
809     if (ck != ClassB && type == 'i')
810       return quad ? "V4i" : "V2i";
811     if (ck != ClassB && type == 'l')
812       return quad ? "V2Wi" : "V1Wi";
813 
814     return quad ? "V16Sc" : "V8Sc";
815   }
816 
817   // Non-return array types are passed as individual vectors.
818   if (mod == '2' || mod == 'B')
819     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
820   if (mod == '3' || mod == 'C')
821     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
822   if (mod == '4' || mod == 'D')
823     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
824 
825   if (mod == 'f' || (ck != ClassB && type == 'f'))
826     return quad ? "V4f" : "V2f";
827   if (mod == 'F' || (ck != ClassB && type == 'd'))
828     return quad ? "V2d" : "V1d";
829   if (ck != ClassB && type == 's')
830     return quad ? "V8s" : "V4s";
831   if (ck != ClassB && type == 'i')
832     return quad ? "V4i" : "V2i";
833   if (ck != ClassB && type == 'l')
834     return quad ? "V2Wi" : "V1Wi";
835 
836   return quad ? "V16Sc" : "V8Sc";
837 }
838 
839 /// InstructionTypeCode - Computes the ARM argument character code and
840 /// quad status for a specific type string and ClassKind.
841 static void InstructionTypeCode(const StringRef &typeStr,
842                                 const ClassKind ck,
843                                 bool &quad,
844                                 std::string &typeCode) {
845   bool poly = false;
846   bool usgn = false;
847   char type = ClassifyType(typeStr, quad, poly, usgn);
848 
849   switch (type) {
850   case 'c':
851     switch (ck) {
852     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
853     case ClassI: typeCode = "i8"; break;
854     case ClassW: typeCode = "8"; break;
855     default: break;
856     }
857     break;
858   case 's':
859     switch (ck) {
860     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
861     case ClassI: typeCode = "i16"; break;
862     case ClassW: typeCode = "16"; break;
863     default: break;
864     }
865     break;
866   case 'i':
867     switch (ck) {
868     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
869     case ClassI: typeCode = "i32"; break;
870     case ClassW: typeCode = "32"; break;
871     default: break;
872     }
873     break;
874   case 'l':
875     switch (ck) {
876     case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
877     case ClassI: typeCode = "i64"; break;
878     case ClassW: typeCode = "64"; break;
879     default: break;
880     }
881     break;
882   case 'k':
883     assert(poly && "Unrecognized 128 bit integer.");
884     typeCode = "p128";
885     break;
886   case 'h':
887     switch (ck) {
888     case ClassS:
889     case ClassI: typeCode = "f16"; break;
890     case ClassW: typeCode = "16"; break;
891     default: break;
892     }
893     break;
894   case 'f':
895     switch (ck) {
896     case ClassS:
897     case ClassI: typeCode = "f32"; break;
898     case ClassW: typeCode = "32"; break;
899     default: break;
900     }
901     break;
902   case 'd':
903     switch (ck) {
904     case ClassS:
905     case ClassI:
906       typeCode += "f64";
907       break;
908     case ClassW:
909       PrintFatalError("unhandled type!");
910     default:
911       break;
912     }
913     break;
914   default:
915     PrintFatalError("unhandled type!");
916   }
917 }
918 
919 static char Insert_BHSD_Suffix(StringRef typestr){
920   unsigned off = 0;
921   if(typestr[off++] == 'S'){
922     while(typestr[off] == 'Q' || typestr[off] == 'H'||
923           typestr[off] == 'P' || typestr[off] == 'U')
924       ++off;
925     switch (typestr[off]){
926     default  : break;
927     case 'c' : return 'b';
928     case 's' : return 'h';
929     case 'i' :
930     case 'f' : return 's';
931     case 'l' :
932     case 'd' : return 'd';
933     }
934   }
935   return 0;
936 }
937 
938 static bool endsWith_xN(std::string const &name) {
939   if (name.length() > 3) {
940     if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
941         name.compare(name.length() - 3, 3, "_x3") == 0 ||
942         name.compare(name.length() - 3, 3, "_x4") == 0)
943       return true;
944   }
945   return false;
946 }
947 
948 /// MangleName - Append a type or width suffix to a base neon function name,
949 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
950 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
951 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
952 static std::string MangleName(const std::string &name, StringRef typestr,
953                               ClassKind ck) {
954   if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
955       name == "vcvt_f64_f32")
956     return name;
957 
958   bool quad = false;
959   std::string typeCode = "";
960 
961   InstructionTypeCode(typestr, ck, quad, typeCode);
962 
963   std::string s = name;
964 
965   if (typeCode.size() > 0) {
966     // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
967     if (endsWith_xN(s))
968       s.insert(s.length() - 3, "_" + typeCode);
969     else
970       s += "_" + typeCode;
971   }
972 
973   if (ck == ClassB)
974     s += "_v";
975 
976   // Insert a 'q' before the first '_' character so that it ends up before
977   // _lane or _n on vector-scalar operations.
978   if (typestr.find("Q") != StringRef::npos) {
979       size_t pos = s.find('_');
980       s = s.insert(pos, "q");
981   }
982   char ins = Insert_BHSD_Suffix(typestr);
983   if(ins){
984     size_t pos = s.find('_');
985     s = s.insert(pos, &ins, 1);
986   }
987 
988   return s;
989 }
990 
991 static void PreprocessInstruction(const StringRef &Name,
992                                   const std::string &InstName,
993                                   std::string &Prefix,
994                                   bool &HasNPostfix,
995                                   bool &HasLanePostfix,
996                                   bool &HasDupPostfix,
997                                   bool &IsSpecialVCvt,
998                                   size_t &TBNumber) {
999   // All of our instruction name fields from arm_neon.td are of the form
1000   //   <instructionname>_...
1001   // Thus we grab our instruction name via computation of said Prefix.
1002   const size_t PrefixEnd = Name.find_first_of('_');
1003   // If InstName is passed in, we use that instead of our name Prefix.
1004   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
1005 
1006   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
1007 
1008   HasNPostfix = Postfix.count("_n");
1009   HasLanePostfix = Postfix.count("_lane");
1010   HasDupPostfix = Postfix.count("_dup");
1011   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
1012 
1013   if (InstName.compare("vtbl") == 0 ||
1014       InstName.compare("vtbx") == 0) {
1015     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
1016     // encoding to get its true value.
1017     TBNumber = Name[Name.size()-1] - 48;
1018   }
1019 }
1020 
1021 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
1022 /// extracted, generate a FileCheck pattern for a Load Or Store
1023 static void
1024 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
1025                                           const std::string& OutTypeCode,
1026                                           const bool &IsQuad,
1027                                           const bool &HasDupPostfix,
1028                                           const bool &HasLanePostfix,
1029                                           const size_t Count,
1030                                           std::string &RegisterSuffix) {
1031   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
1032   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
1033   // will output a series of v{ld,st}1s, so we have to handle it specially.
1034   if ((Count == 3 || Count == 4) && IsQuad) {
1035     RegisterSuffix += "{";
1036     for (size_t i = 0; i < Count; i++) {
1037       RegisterSuffix += "d{{[0-9]+}}";
1038       if (HasDupPostfix) {
1039         RegisterSuffix += "[]";
1040       }
1041       if (HasLanePostfix) {
1042         RegisterSuffix += "[{{[0-9]+}}]";
1043       }
1044       if (i < Count-1) {
1045         RegisterSuffix += ", ";
1046       }
1047     }
1048     RegisterSuffix += "}";
1049   } else {
1050 
1051     // Handle normal loads and stores.
1052     RegisterSuffix += "{";
1053     for (size_t i = 0; i < Count; i++) {
1054       RegisterSuffix += "d{{[0-9]+}}";
1055       if (HasDupPostfix) {
1056         RegisterSuffix += "[]";
1057       }
1058       if (HasLanePostfix) {
1059         RegisterSuffix += "[{{[0-9]+}}]";
1060       }
1061       if (IsQuad && !HasLanePostfix) {
1062         RegisterSuffix += ", d{{[0-9]+}}";
1063         if (HasDupPostfix) {
1064           RegisterSuffix += "[]";
1065         }
1066       }
1067       if (i < Count-1) {
1068         RegisterSuffix += ", ";
1069       }
1070     }
1071     RegisterSuffix += "}, [r{{[0-9]+}}";
1072 
1073     // We only include the alignment hint if we have a vld1.*64 or
1074     // a dup/lane instruction.
1075     if (IsLDSTOne) {
1076       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1077         RegisterSuffix += ":" + OutTypeCode;
1078       }
1079     }
1080 
1081     RegisterSuffix += "]";
1082   }
1083 }
1084 
1085 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1086                                      const bool &HasNPostfix) {
1087   return (NameRef.count("vmla") ||
1088           NameRef.count("vmlal") ||
1089           NameRef.count("vmlsl") ||
1090           NameRef.count("vmull") ||
1091           NameRef.count("vqdmlal") ||
1092           NameRef.count("vqdmlsl") ||
1093           NameRef.count("vqdmulh") ||
1094           NameRef.count("vqdmull") ||
1095           NameRef.count("vqrdmulh")) && HasNPostfix;
1096 }
1097 
1098 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1099                                          const bool &HasLanePostfix) {
1100   return (NameRef.count("vmla") ||
1101           NameRef.count("vmls") ||
1102           NameRef.count("vmlal") ||
1103           NameRef.count("vmlsl") ||
1104           (NameRef.count("vmul") && NameRef.size() == 3)||
1105           NameRef.count("vqdmlal") ||
1106           NameRef.count("vqdmlsl") ||
1107           NameRef.count("vqdmulh") ||
1108           NameRef.count("vqrdmulh")) && HasLanePostfix;
1109 }
1110 
1111 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1112                                   const bool &HasLanePostfix,
1113                                   const bool &IsQuad) {
1114   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1115                                && IsQuad;
1116   const bool IsVMull = NameRef.count("mull") && !IsQuad;
1117   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1118 }
1119 
1120 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1121                                                      const std::string &Proto,
1122                                                      const bool &HasNPostfix,
1123                                                      const bool &IsQuad,
1124                                                      const bool &HasLanePostfix,
1125                                                      const bool &HasDupPostfix,
1126                                                      std::string &NormedProto) {
1127   // Handle generic case.
1128   const StringRef NameRef(Name);
1129   for (size_t i = 0, end = Proto.size(); i < end; i++) {
1130     switch (Proto[i]) {
1131     case 'u':
1132     case 'f':
1133     case 'F':
1134     case 'd':
1135     case 's':
1136     case 'x':
1137     case 't':
1138     case 'n':
1139       NormedProto += IsQuad? 'q' : 'd';
1140       break;
1141     case 'w':
1142     case 'k':
1143       NormedProto += 'q';
1144       break;
1145     case 'g':
1146     case 'j':
1147     case 'h':
1148     case 'e':
1149       NormedProto += 'd';
1150       break;
1151     case 'i':
1152       NormedProto += HasLanePostfix? 'a' : 'i';
1153       break;
1154     case 'a':
1155       if (HasLanePostfix) {
1156         NormedProto += 'a';
1157       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1158         NormedProto += IsQuad? 'q' : 'd';
1159       } else {
1160         NormedProto += 'i';
1161       }
1162       break;
1163     }
1164   }
1165 
1166   // Handle Special Cases.
1167   const bool IsNotVExt = !NameRef.count("vext");
1168   const bool IsVPADAL = NameRef.count("vpadal");
1169   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1170                                                            HasLanePostfix);
1171   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1172                                                       IsQuad);
1173 
1174   if (IsSpecialLaneMul) {
1175     // If
1176     NormedProto[2] = NormedProto[3];
1177     NormedProto.erase(3);
1178   } else if (NormedProto.size() == 4 &&
1179              NormedProto[0] == NormedProto[1] &&
1180              IsNotVExt) {
1181     // If NormedProto.size() == 4 and the first two proto characters are the
1182     // same, ignore the first.
1183     NormedProto = NormedProto.substr(1, 3);
1184   } else if (Is5OpLaneAccum) {
1185     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1186     std::string tmp = NormedProto.substr(1,2);
1187     tmp += NormedProto[4];
1188     NormedProto = tmp;
1189   } else if (IsVPADAL) {
1190     // If we have VPADAL, ignore the first character.
1191     NormedProto = NormedProto.substr(0, 2);
1192   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1193     // If our instruction is a dup instruction, keep only the first and
1194     // last characters.
1195     std::string tmp = "";
1196     tmp += NormedProto[0];
1197     tmp += NormedProto[NormedProto.size()-1];
1198     NormedProto = tmp;
1199   }
1200 }
1201 
1202 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1203 /// extracted, generate a FileCheck pattern to check that an
1204 /// instruction's arguments are correct.
1205 static void GenerateRegisterCheckPattern(const std::string &Name,
1206                                          const std::string &Proto,
1207                                          const std::string &OutTypeCode,
1208                                          const bool &HasNPostfix,
1209                                          const bool &IsQuad,
1210                                          const bool &HasLanePostfix,
1211                                          const bool &HasDupPostfix,
1212                                          const size_t &TBNumber,
1213                                          std::string &RegisterSuffix) {
1214 
1215   RegisterSuffix = "";
1216 
1217   const StringRef NameRef(Name);
1218 
1219   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1220     return;
1221   }
1222 
1223   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1224   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1225 
1226   if (IsLoadStore) {
1227     // Grab N value from  v{ld,st}N using its ascii representation.
1228     const size_t Count = NameRef[3] - 48;
1229 
1230     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1231                                               HasDupPostfix, HasLanePostfix,
1232                                               Count, RegisterSuffix);
1233   } else if (IsTBXOrTBL) {
1234     RegisterSuffix += "d{{[0-9]+}}, {";
1235     for (size_t i = 0; i < TBNumber-1; i++) {
1236       RegisterSuffix += "d{{[0-9]+}}, ";
1237     }
1238     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1239   } else {
1240     // Handle a normal instruction.
1241     if (NameRef.count("vget") || NameRef.count("vset"))
1242       return;
1243 
1244     // We first normalize our proto, since we only need to emit 4
1245     // different types of checks, yet have more than 4 proto types
1246     // that map onto those 4 patterns.
1247     std::string NormalizedProto("");
1248     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1249                                              HasLanePostfix, HasDupPostfix,
1250                                              NormalizedProto);
1251 
1252     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1253       const char &c = NormalizedProto[i];
1254       switch (c) {
1255       case 'q':
1256         RegisterSuffix += "q{{[0-9]+}}, ";
1257         break;
1258 
1259       case 'd':
1260         RegisterSuffix += "d{{[0-9]+}}, ";
1261         break;
1262 
1263       case 'i':
1264         RegisterSuffix += "#{{[0-9]+}}, ";
1265         break;
1266 
1267       case 'a':
1268         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1269         break;
1270       }
1271     }
1272 
1273     // Remove extra ", ".
1274     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1275   }
1276 }
1277 
1278 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1279 /// typestr + class kind, generate the proper set of FileCheck
1280 /// Patterns to check for. We could just return a string, but instead
1281 /// use a vector since it provides us with the extra flexibility of
1282 /// emitting multiple checks, which comes in handy for certain cases
1283 /// like mla where we want to check for 2 different instructions.
1284 static void GenerateChecksForIntrinsic(const std::string &Name,
1285                                        const std::string &Proto,
1286                                        StringRef &OutTypeStr,
1287                                        StringRef &InTypeStr,
1288                                        ClassKind Ck,
1289                                        const std::string &InstName,
1290                                        bool IsHiddenLOp,
1291                                        std::vector<std::string>& Result) {
1292 
1293   // If Ck is a ClassNoTest instruction, just return so no test is
1294   // emitted.
1295   if(Ck == ClassNoTest)
1296     return;
1297 
1298   if (Name == "vcvt_f32_f16") {
1299     Result.push_back("vcvt.f32.f16");
1300     return;
1301   }
1302 
1303 
1304   // Now we preprocess our instruction given the data we have to get the
1305   // data that we need.
1306   // Create a StringRef for String Manipulation of our Name.
1307   const StringRef NameRef(Name);
1308   // Instruction Prefix.
1309   std::string Prefix;
1310   // The type code for our out type string.
1311   std::string OutTypeCode;
1312   // To handle our different cases, we need to check for different postfixes.
1313   // Is our instruction a quad instruction.
1314   bool IsQuad = false;
1315   // Our instruction is of the form <instructionname>_n.
1316   bool HasNPostfix = false;
1317   // Our instruction is of the form <instructionname>_lane.
1318   bool HasLanePostfix = false;
1319   // Our instruction is of the form <instructionname>_dup.
1320   bool HasDupPostfix  = false;
1321   // Our instruction is a vcvt instruction which requires special handling.
1322   bool IsSpecialVCvt = false;
1323   // If we have a vtbxN or vtblN instruction, this is set to N.
1324   size_t TBNumber = -1;
1325   // Register Suffix
1326   std::string RegisterSuffix;
1327 
1328   PreprocessInstruction(NameRef, InstName, Prefix,
1329                         HasNPostfix, HasLanePostfix, HasDupPostfix,
1330                         IsSpecialVCvt, TBNumber);
1331 
1332   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1333   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1334                                HasLanePostfix, HasDupPostfix, TBNumber,
1335                                RegisterSuffix);
1336 
1337   // In the following section, we handle a bunch of special cases. You can tell
1338   // a special case by the fact we are returning early.
1339 
1340   // If our instruction is a logical instruction without postfix or a
1341   // hidden LOp just return the current Prefix.
1342   if (Ck == ClassL || IsHiddenLOp) {
1343     Result.push_back(Prefix + " " + RegisterSuffix);
1344     return;
1345   }
1346 
1347   // If we have a vmov, due to the many different cases, some of which
1348   // vary within the different intrinsics generated for a single
1349   // instruction type, just output a vmov. (e.g. given an instruction
1350   // A, A.u32 might be vmov and A.u8 might be vmov.8).
1351   //
1352   // FIXME: Maybe something can be done about this. The two cases that we care
1353   // about are vmov as an LType and vmov as a WType.
1354   if (Prefix == "vmov") {
1355     Result.push_back(Prefix + " " + RegisterSuffix);
1356     return;
1357   }
1358 
1359   // In the following section, we handle special cases.
1360 
1361   if (OutTypeCode == "64") {
1362     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1363     // type, the intrinsic will be optimized away, so just return
1364     // nothing.  On the other hand if we are handling an uint64x2_t
1365     // (i.e. quad instruction), vdup/vmov instructions should be
1366     // emitted.
1367     if (Prefix == "vdup" || Prefix == "vext") {
1368       if (IsQuad) {
1369         Result.push_back("{{vmov|vdup}}");
1370       }
1371       return;
1372     }
1373 
1374     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1375     // multiple register operands.
1376     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1377                             || Prefix == "vld4";
1378     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1379                             || Prefix == "vst4";
1380     if (MultiLoadPrefix || MultiStorePrefix) {
1381       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1382       return;
1383     }
1384 
1385     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1386     // emitting said instructions. So return a check for
1387     // vldr/vstr/vmov/str instead.
1388     if (HasLanePostfix || HasDupPostfix) {
1389       if (Prefix == "vst1") {
1390         Result.push_back("{{str|vstr|vmov}}");
1391         return;
1392       } else if (Prefix == "vld1") {
1393         Result.push_back("{{ldr|vldr|vmov}}");
1394         return;
1395       }
1396     }
1397   }
1398 
1399   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1400   // sometimes disassembled as vtrn.32. We use a regex to handle both
1401   // cases.
1402   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1403     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1404     return;
1405   }
1406 
1407   // Currently on most ARM processors, we do not use vmla/vmls for
1408   // quad floating point operations. Instead we output vmul + vadd. So
1409   // check if we have one of those instructions and just output a
1410   // check for vmul.
1411   if (OutTypeCode == "f32") {
1412     if (Prefix == "vmls") {
1413       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1414       Result.push_back("vsub." + OutTypeCode);
1415       return;
1416     } else if (Prefix == "vmla") {
1417       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1418       Result.push_back("vadd." + OutTypeCode);
1419       return;
1420     }
1421   }
1422 
1423   // If we have vcvt, get the input type from the instruction name
1424   // (which should be of the form instname_inputtype) and append it
1425   // before the output type.
1426   if (Prefix == "vcvt") {
1427     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1428     Prefix += "." + inTypeCode;
1429   }
1430 
1431   // Append output type code to get our final mangled instruction.
1432   Prefix += "." + OutTypeCode;
1433 
1434   Result.push_back(Prefix + " " + RegisterSuffix);
1435 }
1436 
1437 /// UseMacro - Examine the prototype string to determine if the intrinsic
1438 /// should be defined as a preprocessor macro instead of an inline function.
1439 static bool UseMacro(const std::string &proto, StringRef typestr) {
1440   // If this builtin takes an immediate argument, we need to #define it rather
1441   // than use a standard declaration, so that SemaChecking can range check
1442   // the immediate passed by the user.
1443   if (proto.find('i') != std::string::npos)
1444     return true;
1445 
1446   // Pointer arguments need to use macros to avoid hiding aligned attributes
1447   // from the pointer type.
1448   if (proto.find('p') != std::string::npos ||
1449       proto.find('c') != std::string::npos)
1450     return true;
1451 
1452   // It is not permitted to pass or return an __fp16 by value, so intrinsics
1453   // taking a scalar float16_t must be implemented as macros.
1454   if (typestr.find('h') != std::string::npos &&
1455       proto.find('s') != std::string::npos)
1456     return true;
1457 
1458   return false;
1459 }
1460 
1461 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1462 /// defined as a macro should be accessed directly instead of being first
1463 /// assigned to a local temporary.
1464 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1465   // True for constant ints (i), pointers (p) and const pointers (c).
1466   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1467 }
1468 
1469 // Generate the string "(argtype a, argtype b, ...)"
1470 static std::string GenArgs(const std::string &proto, StringRef typestr,
1471                            const std::string &name) {
1472   bool define = UseMacro(proto, typestr);
1473   char arg = 'a';
1474 
1475   std::string s;
1476   s += "(";
1477 
1478   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1479     if (define) {
1480       // Some macro arguments are used directly instead of being assigned
1481       // to local temporaries; prepend an underscore prefix to make their
1482       // names consistent with the local temporaries.
1483       if (MacroArgUsedDirectly(proto, i))
1484         s += "__";
1485     } else {
1486       s += TypeString(proto[i], typestr) + " __";
1487     }
1488     s.push_back(arg);
1489     if ((i + 1) < e)
1490       s += ", ";
1491   }
1492 
1493   s += ")";
1494   return s;
1495 }
1496 
1497 // Macro arguments are not type-checked like inline function arguments, so
1498 // assign them to local temporaries to get the right type checking.
1499 static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1500                                   const std::string &name ) {
1501   char arg = 'a';
1502   std::string s;
1503   bool generatedLocal = false;
1504 
1505   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1506     // Do not create a temporary for an immediate argument.
1507     // That would defeat the whole point of using a macro!
1508     if (MacroArgUsedDirectly(proto, i))
1509       continue;
1510     generatedLocal = true;
1511     s += TypeString(proto[i], typestr) + " __";
1512     s.push_back(arg);
1513     s += " = (";
1514     s.push_back(arg);
1515     s += "); ";
1516   }
1517 
1518   if (generatedLocal)
1519     s += "\\\n  ";
1520   return s;
1521 }
1522 
1523 // Use the vmovl builtin to sign-extend or zero-extend a vector.
1524 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1525   std::string s, high;
1526   high = h ? "_high" : "";
1527   s = MangleName("vmovl" + high, typestr, ClassS);
1528   s += "(" + a + ")";
1529   return s;
1530 }
1531 
1532 // Get the high 64-bit part of a vector
1533 static std::string GetHigh(const std::string &a, StringRef typestr) {
1534   std::string s;
1535   s = MangleName("vget_high", typestr, ClassS);
1536   s += "(" + a + ")";
1537   return s;
1538 }
1539 
1540 // Gen operation with two operands and get high 64-bit for both of two operands.
1541 static std::string Gen2OpWith2High(StringRef typestr,
1542                                    const std::string &op,
1543                                    const std::string &a,
1544                                    const std::string &b) {
1545   std::string s;
1546   std::string Op1 = GetHigh(a, typestr);
1547   std::string Op2 = GetHigh(b, typestr);
1548   s = MangleName(op, typestr, ClassS);
1549   s += "(" + Op1 + ", " + Op2 + ");";
1550   return s;
1551 }
1552 
1553 // Gen operation with three operands and get high 64-bit of the latter
1554 // two operands.
1555 static std::string Gen3OpWith2High(StringRef typestr,
1556                                    const std::string &op,
1557                                    const std::string &a,
1558                                    const std::string &b,
1559                                    const std::string &c) {
1560   std::string s;
1561   std::string Op1 = GetHigh(b, typestr);
1562   std::string Op2 = GetHigh(c, typestr);
1563   s = MangleName(op, typestr, ClassS);
1564   s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1565   return s;
1566 }
1567 
1568 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1569 static std::string GenCombine(std::string typestr,
1570                               const std::string &a,
1571                               const std::string &b) {
1572   std::string s;
1573   s = MangleName("vcombine", typestr, ClassS);
1574   s += "(" + a + ", " + b + ")";
1575   return s;
1576 }
1577 
1578 static std::string Duplicate(unsigned nElts, StringRef typestr,
1579                              const std::string &a) {
1580   std::string s;
1581 
1582   s = "(" + TypeString('d', typestr) + "){ ";
1583   for (unsigned i = 0; i != nElts; ++i) {
1584     s += a;
1585     if ((i + 1) < nElts)
1586       s += ", ";
1587   }
1588   s += " }";
1589 
1590   return s;
1591 }
1592 
1593 static std::string SplatLane(unsigned nElts, const std::string &vec,
1594                              const std::string &lane) {
1595   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1596   for (unsigned i = 0; i < nElts; ++i)
1597     s += ", " + lane;
1598   s += ")";
1599   return s;
1600 }
1601 
1602 static std::string RemoveHigh(const std::string &name) {
1603   std::string s = name;
1604   std::size_t found = s.find("_high_");
1605   if (found == std::string::npos)
1606     PrintFatalError("name should contain \"_high_\" for high intrinsics");
1607   s.replace(found, 5, "");
1608   return s;
1609 }
1610 
1611 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1612   quad = false;
1613   bool dummy = false;
1614   char type = ClassifyType(typestr, quad, dummy, dummy);
1615   unsigned nElts = 0;
1616   switch (type) {
1617   case 'c': nElts = 8; break;
1618   case 's': nElts = 4; break;
1619   case 'i': nElts = 2; break;
1620   case 'l': nElts = 1; break;
1621   case 'k': nElts = 1; break;
1622   case 'h': nElts = 4; break;
1623   case 'f': nElts = 2; break;
1624   case 'd':
1625     nElts = 1;
1626     break;
1627   default:
1628     PrintFatalError("unhandled type!");
1629   }
1630   if (quad) nElts <<= 1;
1631   return nElts;
1632 }
1633 
1634 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1635 //
1636 // Note that some intrinsic definitions around 'lane' are being implemented
1637 // with macros, because they all contain constant integer argument, and we
1638 // statically check the range of the lane index to meet the semantic
1639 // requirement of different intrinsics.
1640 //
1641 // For the intrinsics implemented with macro, if they contain another intrinsic
1642 // implemented with maco, we have to avoid using the same argument names for
1643 // the nested instrinsics. For example, macro vfms_lane is being implemented
1644 // with another macor vfma_lane, so we rename all arguments for vfms_lane by
1645 // adding a suffix '1'.
1646 
1647 static std::string GenOpString(const std::string &name, OpKind op,
1648                                const std::string &proto, StringRef typestr) {
1649   bool quad;
1650   unsigned nElts = GetNumElements(typestr, quad);
1651   bool define = UseMacro(proto, typestr);
1652 
1653   std::string ts = TypeString(proto[0], typestr);
1654   std::string s;
1655   if (!define) {
1656     s = "return ";
1657   }
1658 
1659   switch(op) {
1660   case OpAdd:
1661     s += "__a + __b;";
1662     break;
1663   case OpAddl:
1664     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1665     break;
1666   case OpAddlHi:
1667     s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1668     break;
1669   case OpAddw:
1670     s += "__a + " + Extend(typestr, "__b") + ";";
1671     break;
1672   case OpAddwHi:
1673     s += "__a + " + Extend(typestr, "__b", 1) + ";";
1674     break;
1675   case OpSub:
1676     s += "__a - __b;";
1677     break;
1678   case OpSubl:
1679     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1680     break;
1681   case OpSublHi:
1682     s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1683     break;
1684   case OpSubw:
1685     s += "__a - " + Extend(typestr, "__b") + ";";
1686     break;
1687   case OpSubwHi:
1688     s += "__a - " + Extend(typestr, "__b", 1) + ";";
1689     break;
1690   case OpMulN:
1691     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1692     break;
1693   case OpMulLane:
1694     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1695     break;
1696   case OpMulXLane:
1697     s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1698       SplatLane(nElts, "__b", "__c") + ");";
1699     break;
1700   case OpMul:
1701     s += "__a * __b;";
1702     break;
1703   case OpFMlaN:
1704     s += MangleName("vfma", typestr, ClassS);
1705     s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1706     break;
1707   case OpFMlsN:
1708     s += MangleName("vfms", typestr, ClassS);
1709     s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1710     break;
1711   case OpMullLane:
1712     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1713       SplatLane(nElts, "__b", "__c") + ");";
1714     break;
1715   case OpMullHiLane:
1716     s += MangleName("vmull", typestr, ClassS) + "(" +
1717       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1718     break;
1719   case OpMlaN:
1720     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1721     break;
1722   case OpMlaLane:
1723     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1724     break;
1725   case OpMla:
1726     s += "__a + (__b * __c);";
1727     break;
1728   case OpMlalN:
1729     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1730       Duplicate(nElts, typestr, "__c") + ");";
1731     break;
1732   case OpMlalLane:
1733     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1734       SplatLane(nElts, "__c", "__d") + ");";
1735     break;
1736   case OpMlalHiLane:
1737     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1738       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1739     break;
1740   case OpMlal:
1741     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1742     break;
1743   case OpMullHi:
1744     s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1745     break;
1746   case OpMullHiP64: {
1747     std::string Op1 = GetHigh("__a", typestr);
1748     std::string Op2 = GetHigh("__b", typestr);
1749     s += MangleName("vmull", typestr, ClassS);
1750     s += "((poly64_t)" + Op1 + ", (poly64_t)" + Op2 + ");";
1751     break;
1752   }
1753   case OpMullHiN:
1754     s += MangleName("vmull_n", typestr, ClassS);
1755     s += "(" + GetHigh("__a", typestr) + ", __b);";
1756     return s;
1757   case OpMlalHi:
1758     s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1759     break;
1760   case OpMlalHiN:
1761     s += MangleName("vmlal_n", typestr, ClassS);
1762     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1763     return s;
1764   case OpMlsN:
1765     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1766     break;
1767   case OpMlsLane:
1768     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1769     break;
1770   case OpFMSLane:
1771     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1772     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1773     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1774     s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1775     break;
1776   case OpFMSLaneQ:
1777     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1778     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1779     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1780     s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1781     break;
1782   case OpMls:
1783     s += "__a - (__b * __c);";
1784     break;
1785   case OpMlslN:
1786     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1787       Duplicate(nElts, typestr, "__c") + ");";
1788     break;
1789   case OpMlslLane:
1790     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1791       SplatLane(nElts, "__c", "__d") + ");";
1792     break;
1793   case OpMlslHiLane:
1794     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1795       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1796     break;
1797   case OpMlsl:
1798     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1799     break;
1800   case OpMlslHi:
1801     s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1802     break;
1803   case OpMlslHiN:
1804     s += MangleName("vmlsl_n", typestr, ClassS);
1805     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1806     break;
1807   case OpQDMullLane:
1808     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1809       SplatLane(nElts, "__b", "__c") + ");";
1810     break;
1811   case OpQDMullHiLane:
1812     s += MangleName("vqdmull", typestr, ClassS) + "(" +
1813       GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1814     break;
1815   case OpQDMlalLane:
1816     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1817       SplatLane(nElts, "__c", "__d") + ");";
1818     break;
1819   case OpQDMlalHiLane:
1820     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1821       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1822     break;
1823   case OpQDMlslLane:
1824     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1825       SplatLane(nElts, "__c", "__d") + ");";
1826     break;
1827   case OpQDMlslHiLane:
1828     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1829       GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1830     break;
1831   case OpQDMulhLane:
1832     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1833       SplatLane(nElts, "__b", "__c") + ");";
1834     break;
1835   case OpQRDMulhLane:
1836     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1837       SplatLane(nElts, "__b", "__c") + ");";
1838     break;
1839   case OpEq:
1840     s += "(" + ts + ")(__a == __b);";
1841     break;
1842   case OpGe:
1843     s += "(" + ts + ")(__a >= __b);";
1844     break;
1845   case OpLe:
1846     s += "(" + ts + ")(__a <= __b);";
1847     break;
1848   case OpGt:
1849     s += "(" + ts + ")(__a > __b);";
1850     break;
1851   case OpLt:
1852     s += "(" + ts + ")(__a < __b);";
1853     break;
1854   case OpNeg:
1855     s += " -__a;";
1856     break;
1857   case OpNot:
1858     s += " ~__a;";
1859     break;
1860   case OpAnd:
1861     s += "__a & __b;";
1862     break;
1863   case OpOr:
1864     s += "__a | __b;";
1865     break;
1866   case OpXor:
1867     s += "__a ^ __b;";
1868     break;
1869   case OpAndNot:
1870     s += "__a & ~__b;";
1871     break;
1872   case OpOrNot:
1873     s += "__a | ~__b;";
1874     break;
1875   case OpCast:
1876     s += "(" + ts + ")__a;";
1877     break;
1878   case OpConcat:
1879     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1880     s += ", (int64x1_t)__b, 0, 1);";
1881     break;
1882   case OpHi:
1883     // nElts is for the result vector, so the source is twice that number.
1884     s += "__builtin_shufflevector(__a, __a";
1885     for (unsigned i = nElts; i < nElts * 2; ++i)
1886       s += ", " + utostr(i);
1887     s+= ");";
1888     break;
1889   case OpLo:
1890     s += "__builtin_shufflevector(__a, __a";
1891     for (unsigned i = 0; i < nElts; ++i)
1892       s += ", " + utostr(i);
1893     s+= ");";
1894     break;
1895   case OpDup:
1896     s += Duplicate(nElts, typestr, "__a") + ";";
1897     break;
1898   case OpDupLane:
1899     s += SplatLane(nElts, "__a", "__b") + ";";
1900     break;
1901   case OpSelect:
1902     // ((0 & 1) | (~0 & 2))
1903     s += "(" + ts + ")";
1904     ts = TypeString(proto[1], typestr);
1905     s += "((__a & (" + ts + ")__b) | ";
1906     s += "(~__a & (" + ts + ")__c));";
1907     break;
1908   case OpRev16:
1909     s += "__builtin_shufflevector(__a, __a";
1910     for (unsigned i = 2; i <= nElts; i += 2)
1911       for (unsigned j = 0; j != 2; ++j)
1912         s += ", " + utostr(i - j - 1);
1913     s += ");";
1914     break;
1915   case OpRev32: {
1916     unsigned WordElts = nElts >> (1 + (int)quad);
1917     s += "__builtin_shufflevector(__a, __a";
1918     for (unsigned i = WordElts; i <= nElts; i += WordElts)
1919       for (unsigned j = 0; j != WordElts; ++j)
1920         s += ", " + utostr(i - j - 1);
1921     s += ");";
1922     break;
1923   }
1924   case OpRev64: {
1925     unsigned DblWordElts = nElts >> (int)quad;
1926     s += "__builtin_shufflevector(__a, __a";
1927     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1928       for (unsigned j = 0; j != DblWordElts; ++j)
1929         s += ", " + utostr(i - j - 1);
1930     s += ");";
1931     break;
1932   }
1933   case OpXtnHi: {
1934     s = TypeString(proto[1], typestr) + " __a1 = " +
1935         MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
1936         "return __builtin_shufflevector(__a, __a1";
1937     for (unsigned i = 0; i < nElts * 4; ++i)
1938       s += ", " + utostr(i);
1939     s += ");";
1940     break;
1941   }
1942   case OpSqxtunHi: {
1943     s = TypeString(proto[1], typestr) + " __a1 = " +
1944         MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
1945         "return __builtin_shufflevector(__a, __a1";
1946     for (unsigned i = 0; i < nElts * 4; ++i)
1947       s += ", " + utostr(i);
1948     s += ");";
1949     break;
1950   }
1951   case OpQxtnHi: {
1952     s = TypeString(proto[1], typestr) + " __a1 = " +
1953         MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
1954         "return __builtin_shufflevector(__a, __a1";
1955     for (unsigned i = 0; i < nElts * 4; ++i)
1956       s += ", " + utostr(i);
1957     s += ");";
1958     break;
1959   }
1960   case OpFcvtnHi: {
1961     std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1962     s = TypeString(proto[1], typestr) + " __a1 = " +
1963         MangleName(FName, typestr, ClassS) + "(__b);\n  " +
1964         "return __builtin_shufflevector(__a, __a1";
1965     for (unsigned i = 0; i < nElts * 4; ++i)
1966       s += ", " + utostr(i);
1967     s += ");";
1968     break;
1969   }
1970   case OpFcvtlHi: {
1971     std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1972     s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1973         ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1974     break;
1975   }
1976   case OpFcvtxnHi: {
1977     s = TypeString(proto[1], typestr) + " __a1 = " +
1978         MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
1979         "return __builtin_shufflevector(__a, __a1";
1980     for (unsigned i = 0; i < nElts * 4; ++i)
1981       s += ", " + utostr(i);
1982     s += ");";
1983     break;
1984   }
1985   case OpUzp1:
1986     s += "__builtin_shufflevector(__a, __b";
1987     for (unsigned i = 0; i < nElts; i++)
1988       s += ", " + utostr(2*i);
1989     s += ");";
1990     break;
1991   case OpUzp2:
1992     s += "__builtin_shufflevector(__a, __b";
1993     for (unsigned i = 0; i < nElts; i++)
1994       s += ", " + utostr(2*i+1);
1995     s += ");";
1996     break;
1997   case OpZip1:
1998     s += "__builtin_shufflevector(__a, __b";
1999     for (unsigned i = 0; i < (nElts/2); i++)
2000        s += ", " + utostr(i) + ", " + utostr(i+nElts);
2001     s += ");";
2002     break;
2003   case OpZip2:
2004     s += "__builtin_shufflevector(__a, __b";
2005     for (unsigned i = nElts/2; i < nElts; i++)
2006        s += ", " + utostr(i) + ", " + utostr(i+nElts);
2007     s += ");";
2008     break;
2009   case OpTrn1:
2010     s += "__builtin_shufflevector(__a, __b";
2011     for (unsigned i = 0; i < (nElts/2); i++)
2012        s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
2013     s += ");";
2014     break;
2015   case OpTrn2:
2016     s += "__builtin_shufflevector(__a, __b";
2017     for (unsigned i = 0; i < (nElts/2); i++)
2018        s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
2019     s += ");";
2020     break;
2021   case OpAbdl: {
2022     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
2023     if (typestr[0] != 'U') {
2024       // vabd results are always unsigned and must be zero-extended.
2025       std::string utype = "U" + typestr.str();
2026       s += "(" + TypeString(proto[0], typestr) + ")";
2027       abd = "(" + TypeString('d', utype) + ")" + abd;
2028       s += Extend(utype, abd) + ";";
2029     } else {
2030       s += Extend(typestr, abd) + ";";
2031     }
2032     break;
2033   }
2034   case OpAbdlHi:
2035     s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
2036     break;
2037   case OpAddhnHi: {
2038     std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
2039     s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
2040     s += ";";
2041     break;
2042   }
2043   case OpRAddhnHi: {
2044     std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
2045     s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
2046     s += ";";
2047     break;
2048   }
2049   case OpSubhnHi: {
2050     std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
2051     s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
2052     s += ";";
2053     break;
2054   }
2055   case OpRSubhnHi: {
2056     std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
2057     s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
2058     s += ";";
2059     break;
2060   }
2061   case OpAba:
2062     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
2063     break;
2064   case OpAbal:
2065     s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
2066     break;
2067   case OpAbalHi:
2068     s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
2069     break;
2070   case OpQDMullHi:
2071     s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
2072     break;
2073   case OpQDMullHiN:
2074     s += MangleName("vqdmull_n", typestr, ClassS);
2075     s += "(" + GetHigh("__a", typestr) + ", __b);";
2076     return s;
2077   case OpQDMlalHi:
2078     s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
2079     break;
2080   case OpQDMlalHiN:
2081     s += MangleName("vqdmlal_n", typestr, ClassS);
2082     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2083     return s;
2084   case OpQDMlslHi:
2085     s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
2086     break;
2087   case OpQDMlslHiN:
2088     s += MangleName("vqdmlsl_n", typestr, ClassS);
2089     s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2090     return s;
2091   case OpDiv:
2092     s += "__a / __b;";
2093     break;
2094   case OpMovlHi: {
2095     s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2096         MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
2097     s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
2098     s += "(__a1, 0);";
2099     break;
2100   }
2101   case OpLongHi: {
2102     // Another local variable __a1 is needed for calling a Macro,
2103     // or using __a will have naming conflict when Macro expanding.
2104     s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2105          MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2106     s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2107          "(__a1, __b);";
2108     break;
2109   }
2110   case OpNarrowHi: {
2111     s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2112          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2113     break;
2114   }
2115   case OpCopyLane: {
2116     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2117     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
2118     s += TypeString('s', typestr) + " __c2 = " +
2119          MangleName("vget_lane", typestr, ClassS) + "(__c1, __d); \\\n  " +
2120          MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b);";
2121     break;
2122   }
2123   case OpCopyQLane: {
2124     std::string typeCode = "";
2125     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2126     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2127     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
2128     s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2129          "(__c1, __d); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b);";
2130     break;
2131   }
2132   case OpCopyLaneQ: {
2133     std::string typeCode = "";
2134     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2135     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2136     s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
2137     s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2138          "(__c1, __d); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b);";
2139     break;
2140   }
2141   case OpScalarMulLane: {
2142     std::string typeCode = "";
2143     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2144     s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2145       "(__b, __c);\\\n  __a * __d1;";
2146     break;
2147   }
2148   case OpScalarMulLaneQ: {
2149     std::string typeCode = "";
2150     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2151     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2152     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2153     s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2154       "(__b1, __c);\\\n  __a1 * __d1;";
2155     break;
2156   }
2157   case OpScalarMulXLane: {
2158     bool dummy = false;
2159     char type = ClassifyType(typestr, dummy, dummy, dummy);
2160     if (type == 'f') type = 's';
2161     std::string typeCode = "";
2162     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2163     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2164     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2165     s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2166       "(__b1, __c);\\\n  vmulx" + type + "_" +
2167       typeCode +  "(__a1, __d1);";
2168     break;
2169   }
2170   case OpScalarMulXLaneQ: {
2171     bool dummy = false;
2172     char type = ClassifyType(typestr, dummy, dummy, dummy);
2173     if (type == 'f') type = 's';
2174     std::string typeCode = "";
2175     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2176     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2177     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2178     s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2179       typeCode + "(__b1, __c);\\\n  vmulx" + type +
2180       "_" + typeCode +  "(__a1, __d1);";
2181     break;
2182   }
2183 
2184   case OpScalarVMulXLane: {
2185     bool dummy = false;
2186     char type = ClassifyType(typestr, dummy, dummy, dummy);
2187     if (type == 'f') type = 's';
2188     std::string typeCode = "";
2189     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2190     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2191     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2192     s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2193       typeCode + "(__a1, 0);\\\n" +
2194       "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2195       typeCode + "(__b1, __c);\\\n" +
2196       "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2197       typeCode + "(__d1, __e1);\\\n" +
2198       "  " + TypeString('d', typestr) + " __g1;\\\n" +
2199       "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
2200     break;
2201   }
2202 
2203   case OpScalarVMulXLaneQ: {
2204     bool dummy = false;
2205     char type = ClassifyType(typestr, dummy, dummy, dummy);
2206     if (type == 'f') type = 's';
2207     std::string typeCode = "";
2208     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2209     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2210     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2211     s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2212       typeCode + "(__a1, 0);\\\n" +
2213       "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2214       typeCode + "(__b1, __c);\\\n" +
2215       "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2216       typeCode + "(__d1, __e1);\\\n" +
2217       "  " + TypeString('d', typestr) + " __g1;\\\n" +
2218       "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
2219     break;
2220   }
2221   case OpScalarQDMullLane: {
2222     std::string typeCode = "";
2223     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2224     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2225     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2226     s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
2227     "vget_lane_" + typeCode + "(__b1, __c));";
2228     break;
2229   }
2230   case OpScalarQDMullLaneQ: {
2231     std::string typeCode = "";
2232     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2233     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2234     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2235     s += MangleName("vqdmull", typestr, ClassS) + "(__a1, " +
2236     "vgetq_lane_" + typeCode + "(__b1, __c));";
2237     break;
2238   }
2239   case OpScalarQDMulHiLane: {
2240     std::string typeCode = "";
2241     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2242     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2243     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2244     s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
2245     "vget_lane_" + typeCode + "(__b1, __c));";
2246     break;
2247   }
2248   case OpScalarQDMulHiLaneQ: {
2249     std::string typeCode = "";
2250     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2251     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2252     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2253     s += MangleName("vqdmulh", typestr, ClassS) + "(__a1, " +
2254     "vgetq_lane_" + typeCode + "(__b1, __c));";
2255     break;
2256   }
2257   case OpScalarQRDMulHiLane: {
2258     std::string typeCode = "";
2259     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2260     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2261     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2262     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
2263     "vget_lane_" + typeCode + "(__b1, __c));";
2264     break;
2265   }
2266   case OpScalarQRDMulHiLaneQ: {
2267     std::string typeCode = "";
2268     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2269     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2270     s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
2271     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a1, " +
2272     "vgetq_lane_" + typeCode + "(__b1, __c));";
2273     break;
2274   }
2275   case OpScalarGetLane:{
2276     std::string typeCode = "";
2277     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2278     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
2279 
2280     std::string intType = quad ? "int16x8_t" : "int16x4_t";
2281     std::string intName = quad ? "vgetq" : "vget";
2282 
2283     // reinterpret float16 vector as int16 vector
2284     s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";
2285 
2286     s += "  int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";
2287 
2288     // reinterpret int16 vector as float16 vector
2289     s += "  float16_t __a4 = *(float16_t *)(&__a3);\\\n";
2290     s += "  __a4;";
2291     break;
2292   }
2293   case OpScalarSetLane:{
2294     std::string typeCode = "";
2295     InstructionTypeCode(typestr, ClassS, quad, typeCode);
2296     s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n  ";
2297 
2298     std::string origType = quad ? "float16x8_t" : "float16x4_t";
2299     std::string intType = quad ? "int16x8_t" : "int16x4_t";
2300     std::string intName = quad ? "vsetq" : "vset";
2301 
2302     // reinterpret float16_t as int16_t
2303     s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
2304     // reinterpret float16 vector as int16 vector
2305     s += "  " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";
2306 
2307     s += "  " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";
2308 
2309     // reinterpret int16 vector as float16 vector
2310     s += "  " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
2311     s += "__b4;";
2312     break;
2313   }
2314 
2315   default:
2316     PrintFatalError("unknown OpKind!");
2317   }
2318   return s;
2319 }
2320 
2321 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2322   unsigned mod = proto[0];
2323 
2324   if (mod == 'v' || mod == 'f' || mod == 'F')
2325     mod = proto[1];
2326 
2327   bool quad = false;
2328   bool poly = false;
2329   bool usgn = false;
2330   bool scal = false;
2331   bool cnst = false;
2332   bool pntr = false;
2333 
2334   // Base type to get the type string for.
2335   char type = ClassifyType(typestr, quad, poly, usgn);
2336 
2337   // Based on the modifying character, change the type and width if necessary.
2338   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2339 
2340   NeonTypeFlags::EltType ET;
2341   switch (type) {
2342     case 'c':
2343       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2344       break;
2345     case 's':
2346       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2347       break;
2348     case 'i':
2349       ET = NeonTypeFlags::Int32;
2350       break;
2351     case 'l':
2352       ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2353       break;
2354     case 'k':
2355       ET = NeonTypeFlags::Poly128;
2356       break;
2357     case 'h':
2358       ET = NeonTypeFlags::Float16;
2359       break;
2360     case 'f':
2361       ET = NeonTypeFlags::Float32;
2362       break;
2363     case 'd':
2364       ET = NeonTypeFlags::Float64;
2365       break;
2366     default:
2367       PrintFatalError("unhandled type!");
2368   }
2369   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2370   return Flags.getFlags();
2371 }
2372 
2373 // We don't check 'a' in this function, because for builtin function the
2374 // argument matching to 'a' uses a vector type splatted from a scalar type.
2375 static bool ProtoHasScalar(const std::string proto)
2376 {
2377   return (proto.find('s') != std::string::npos
2378           || proto.find('z') != std::string::npos
2379           || proto.find('r') != std::string::npos
2380           || proto.find('b') != std::string::npos
2381           || proto.find('$') != std::string::npos
2382           || proto.find('y') != std::string::npos
2383           || proto.find('o') != std::string::npos);
2384 }
2385 
2386 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2387 static std::string GenBuiltin(const std::string &name, const std::string &proto,
2388                               StringRef typestr, ClassKind ck) {
2389   std::string s;
2390 
2391   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2392   // sret-like argument.
2393   bool sret = IsMultiVecProto(proto[0]);
2394 
2395   bool define = UseMacro(proto, typestr);
2396 
2397   // Check if the prototype has a scalar operand with the type of the vector
2398   // elements.  If not, bitcasting the args will take care of arg checking.
2399   // The actual signedness etc. will be taken care of with special enums.
2400   if (!ProtoHasScalar(proto))
2401     ck = ClassB;
2402 
2403   if (proto[0] != 'v') {
2404     std::string ts = TypeString(proto[0], typestr);
2405 
2406     if (define) {
2407       if (sret)
2408         s += ts + " r; ";
2409       else
2410         s += "(" + ts + ")";
2411     } else if (sret) {
2412       s += ts + " r; ";
2413     } else {
2414       s += "return (" + ts + ")";
2415     }
2416   }
2417 
2418   bool splat = proto.find('a') != std::string::npos;
2419 
2420   s += "__builtin_neon_";
2421   if (splat) {
2422     // Call the non-splat builtin: chop off the "_n" suffix from the name.
2423     std::string vname(name, 0, name.size()-2);
2424     s += MangleName(vname, typestr, ck);
2425   } else {
2426     s += MangleName(name, typestr, ck);
2427   }
2428   s += "(";
2429 
2430   // Pass the address of the return variable as the first argument to sret-like
2431   // builtins.
2432   if (sret)
2433     s += "&r, ";
2434 
2435   char arg = 'a';
2436   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2437     std::string args = std::string(&arg, 1);
2438 
2439     // Use the local temporaries instead of the macro arguments.
2440     args = "__" + args;
2441 
2442     bool argQuad = false;
2443     bool argPoly = false;
2444     bool argUsgn = false;
2445     bool argScalar = false;
2446     bool dummy = false;
2447     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2448     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2449                       dummy, dummy);
2450 
2451     // Handle multiple-vector values specially, emitting each subvector as an
2452     // argument to the __builtin.
2453     unsigned NumOfVec = 0;
2454     if (proto[i] >= '2' && proto[i] <= '4') {
2455       NumOfVec = proto[i] - '0';
2456     } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2457       NumOfVec = proto[i] - 'A' + 1;
2458     }
2459 
2460     if (NumOfVec > 0) {
2461       // Check if an explicit cast is needed.
2462       if (argType != 'c' || argPoly || argUsgn)
2463         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2464 
2465       for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2466         s += args + ".val[" + utostr(vi) + "]";
2467         if ((vi + 1) < ve)
2468           s += ", ";
2469       }
2470       if ((i + 1) < e)
2471         s += ", ";
2472 
2473       continue;
2474     }
2475 
2476     if (splat && (i + 1) == e)
2477       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2478 
2479     // Check if an explicit cast is needed.
2480     if ((splat || !argScalar) &&
2481         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2482       std::string argTypeStr = "c";
2483       if (ck != ClassB)
2484         argTypeStr = argType;
2485       if (argQuad)
2486         argTypeStr = "Q" + argTypeStr;
2487       args = "(" + TypeString('d', argTypeStr) + ")" + args;
2488     }
2489 
2490     s += args;
2491     if ((i + 1) < e)
2492       s += ", ";
2493   }
2494 
2495   // Extra constant integer to hold type class enum for this function, e.g. s8
2496   if (ck == ClassB)
2497     s += ", " + utostr(GetNeonEnum(proto, typestr));
2498 
2499   s += ");";
2500 
2501   if (proto[0] != 'v' && sret) {
2502     if (define)
2503       s += " r;";
2504     else
2505       s += " return r;";
2506   }
2507   return s;
2508 }
2509 
2510 static std::string GenBuiltinDef(const std::string &name,
2511                                  const std::string &proto,
2512                                  StringRef typestr, ClassKind ck) {
2513   std::string s("BUILTIN(__builtin_neon_");
2514 
2515   // If all types are the same size, bitcasting the args will take care
2516   // of arg checking.  The actual signedness etc. will be taken care of with
2517   // special enums.
2518   if (!ProtoHasScalar(proto))
2519     ck = ClassB;
2520 
2521   s += MangleName(name, typestr, ck);
2522   s += ", \"";
2523 
2524   for (unsigned i = 0, e = proto.size(); i != e; ++i)
2525     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2526 
2527   // Extra constant integer to hold type class enum for this function, e.g. s8
2528   if (ck == ClassB)
2529     s += "i";
2530 
2531   s += "\", \"n\")";
2532   return s;
2533 }
2534 
2535 static std::string GenIntrinsic(const std::string &name,
2536                                 const std::string &proto,
2537                                 StringRef outTypeStr, StringRef inTypeStr,
2538                                 OpKind kind, ClassKind classKind) {
2539   assert(!proto.empty() && "");
2540   bool define = UseMacro(proto, outTypeStr) && kind != OpUnavailable;
2541   std::string s;
2542 
2543   // static always inline + return type
2544   if (define)
2545     s += "#define ";
2546   else
2547     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2548 
2549   // Function name with type suffix
2550   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2551   if (outTypeStr != inTypeStr) {
2552     // If the input type is different (e.g., for vreinterpret), append a suffix
2553     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2554     // does not insert another "q" in the name.
2555     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2556     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2557     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2558   }
2559   s += mangledName;
2560 
2561   // Function arguments
2562   s += GenArgs(proto, inTypeStr, name);
2563 
2564   // Definition.
2565   if (define) {
2566     s += " __extension__ ({ \\\n  ";
2567     s += GenMacroLocals(proto, inTypeStr, name);
2568   } else if (kind == OpUnavailable) {
2569     s += " __attribute__((unavailable));\n";
2570     return s;
2571   } else
2572     s += " {\n  ";
2573 
2574   if (kind != OpNone)
2575     s += GenOpString(name, kind, proto, outTypeStr);
2576   else
2577     s += GenBuiltin(name, proto, outTypeStr, classKind);
2578   if (define)
2579     s += " })";
2580   else
2581     s += " }";
2582   s += "\n";
2583   return s;
2584 }
2585 
2586 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2587 /// is comprised of type definitions and function declarations.
2588 void NeonEmitter::run(raw_ostream &OS) {
2589   OS <<
2590     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2591     "---===\n"
2592     " *\n"
2593     " * Permission is hereby granted, free of charge, to any person obtaining "
2594     "a copy\n"
2595     " * of this software and associated documentation files (the \"Software\"),"
2596     " to deal\n"
2597     " * in the Software without restriction, including without limitation the "
2598     "rights\n"
2599     " * to use, copy, modify, merge, publish, distribute, sublicense, "
2600     "and/or sell\n"
2601     " * copies of the Software, and to permit persons to whom the Software is\n"
2602     " * furnished to do so, subject to the following conditions:\n"
2603     " *\n"
2604     " * The above copyright notice and this permission notice shall be "
2605     "included in\n"
2606     " * all copies or substantial portions of the Software.\n"
2607     " *\n"
2608     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2609     "EXPRESS OR\n"
2610     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2611     "MERCHANTABILITY,\n"
2612     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2613     "SHALL THE\n"
2614     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2615     "OTHER\n"
2616     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2617     "ARISING FROM,\n"
2618     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2619     "DEALINGS IN\n"
2620     " * THE SOFTWARE.\n"
2621     " *\n"
2622     " *===--------------------------------------------------------------------"
2623     "---===\n"
2624     " */\n\n";
2625 
2626   OS << "#ifndef __ARM_NEON_H\n";
2627   OS << "#define __ARM_NEON_H\n\n";
2628 
2629   OS << "#if !defined(__ARM_NEON)\n";
2630   OS << "#error \"NEON support not enabled\"\n";
2631   OS << "#endif\n\n";
2632 
2633   OS << "#include <stdint.h>\n\n";
2634 
2635   // Emit NEON-specific scalar typedefs.
2636   OS << "typedef float float32_t;\n";
2637   OS << "typedef __fp16 float16_t;\n";
2638 
2639   OS << "#ifdef __aarch64__\n";
2640   OS << "typedef double float64_t;\n";
2641   OS << "#endif\n\n";
2642 
2643   // For now, signedness of polynomial types depends on target
2644   OS << "#ifdef __aarch64__\n";
2645   OS << "typedef uint8_t poly8_t;\n";
2646   OS << "typedef uint16_t poly16_t;\n";
2647   OS << "typedef uint64_t poly64_t;\n";
2648   OS << "typedef __uint128_t poly128_t;\n";
2649   OS << "#else\n";
2650   OS << "typedef int8_t poly8_t;\n";
2651   OS << "typedef int16_t poly16_t;\n";
2652   OS << "#endif\n";
2653 
2654   // Emit Neon vector typedefs.
2655   std::string TypedefTypes(
2656       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2657   SmallVector<StringRef, 24> TDTypeVec;
2658   ParseTypes(0, TypedefTypes, TDTypeVec);
2659 
2660   // Emit vector typedefs.
2661   bool isA64 = false;
2662   bool preinsert;
2663   bool postinsert;
2664   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2665     bool dummy, quad = false, poly = false;
2666     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2667     preinsert = false;
2668     postinsert = false;
2669 
2670     if (type == 'd' || (type == 'l' && poly)) {
2671       preinsert = isA64? false: true;
2672       isA64 = true;
2673     } else {
2674       postinsert = isA64? true: false;
2675       isA64 = false;
2676     }
2677     if (postinsert)
2678       OS << "#endif\n";
2679     if (preinsert)
2680       OS << "#ifdef __aarch64__\n";
2681 
2682     if (poly)
2683       OS << "typedef __attribute__((neon_polyvector_type(";
2684     else
2685       OS << "typedef __attribute__((neon_vector_type(";
2686 
2687     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2688     OS << utostr(nElts) << "))) ";
2689     if (nElts < 10)
2690       OS << " ";
2691 
2692     OS << TypeString('s', TDTypeVec[i]);
2693     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2694 
2695   }
2696   postinsert = isA64? true: false;
2697   if (postinsert)
2698     OS << "#endif\n";
2699   OS << "\n";
2700 
2701   // Emit struct typedefs.
2702   isA64 = false;
2703   for (unsigned vi = 2; vi != 5; ++vi) {
2704     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2705       bool dummy, quad = false, poly = false;
2706       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2707       preinsert = false;
2708       postinsert = false;
2709 
2710       if (type == 'd' || (type == 'l' && poly)) {
2711         preinsert = isA64? false: true;
2712         isA64 = true;
2713       } else {
2714         postinsert = isA64? true: false;
2715         isA64 = false;
2716       }
2717       if (postinsert)
2718         OS << "#endif\n";
2719       if (preinsert)
2720         OS << "#ifdef __aarch64__\n";
2721 
2722       std::string ts = TypeString('d', TDTypeVec[i]);
2723       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2724       OS << "typedef struct " << vs << " {\n";
2725       OS << "  " << ts << " val";
2726       OS << "[" << utostr(vi) << "]";
2727       OS << ";\n} ";
2728       OS << vs << ";\n";
2729       OS << "\n";
2730     }
2731   }
2732   postinsert = isA64? true: false;
2733   if (postinsert)
2734     OS << "#endif\n";
2735   OS << "\n";
2736 
2737   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2738 
2739   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2740 
2741   StringMap<ClassKind> EmittedMap;
2742   std::string CurrentGuard = "";
2743   bool InGuard = false;
2744 
2745   // Some intrinsics are used to express others. These need to be emitted near
2746   // the beginning so that the declarations are present when needed. This is
2747   // rather an ugly, arbitrary list, but probably simpler than actually tracking
2748   // dependency info.
2749   static const char *EarlyDefsArr[] =
2750       { "VFMA",      "VQMOVN",    "VQMOVUN",  "VABD",    "VMOVL",
2751         "VABDL",     "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH",
2752         "VMULL",     "VMLAL_N",   "VMLSL_N",  "VMULL_N", "VMULL_P64",
2753         "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" };
2754   ArrayRef<const char *> EarlyDefs(EarlyDefsArr);
2755 
2756   for (unsigned i = 0; i < EarlyDefs.size(); ++i) {
2757     Record *R = Records.getDef(EarlyDefs[i]);
2758     emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
2759   }
2760 
2761   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2762     Record *R = RV[i];
2763     if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) !=
2764         EarlyDefs.end())
2765       continue;
2766 
2767     emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap);
2768   }
2769 
2770   if (InGuard)
2771     OS << "#endif\n\n";
2772 
2773   OS << "#undef __ai\n\n";
2774   OS << "#endif /* __ARM_NEON_H */\n";
2775 }
2776 
2777 void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R,
2778                                        std::string &CurrentGuard, bool &InGuard,
2779                                        StringMap<ClassKind> &EmittedMap) {
2780 
2781   std::string NewGuard = R->getValueAsString("ArchGuard");
2782   if (NewGuard != CurrentGuard) {
2783     if (InGuard)
2784       OS << "#endif\n\n";
2785     if (NewGuard.size())
2786       OS << "#if " << NewGuard << '\n';
2787 
2788     CurrentGuard = NewGuard;
2789     InGuard = NewGuard.size() != 0;
2790   }
2791 
2792   emitIntrinsic(OS, R, EmittedMap);
2793 }
2794 
2795 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2796 /// intrinsics specified by record R checking for intrinsic uniqueness.
2797 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2798                                 StringMap<ClassKind> &EmittedMap) {
2799   std::string name = R->getValueAsString("Name");
2800   std::string Proto = R->getValueAsString("Prototype");
2801   std::string Types = R->getValueAsString("Types");
2802 
2803   SmallVector<StringRef, 16> TypeVec;
2804   ParseTypes(R, Types, TypeVec);
2805 
2806   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2807 
2808   ClassKind classKind = ClassNone;
2809   if (R->getSuperClasses().size() >= 2)
2810     classKind = ClassMap[R->getSuperClasses()[1]];
2811   if (classKind == ClassNone && kind == OpNone)
2812     PrintFatalError(R->getLoc(), "Builtin has no class kind");
2813 
2814   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2815     if (kind == OpReinterpret) {
2816       bool outQuad = false;
2817       bool dummy = false;
2818       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2819       for (unsigned srcti = 0, srcte = TypeVec.size();
2820            srcti != srcte; ++srcti) {
2821         bool inQuad = false;
2822         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2823         if (srcti == ti || inQuad != outQuad)
2824           continue;
2825         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2826                                      OpCast, ClassS);
2827         if (EmittedMap.count(s))
2828           continue;
2829         EmittedMap[s] = ClassS;
2830         OS << s;
2831       }
2832     } else {
2833       std::string s =
2834           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2835       if (EmittedMap.count(s)) {
2836         errs() << "warning: duplicate definition: " << name
2837                << " (type: " << TypeString('d', TypeVec[ti]) << ")\n";
2838         continue;
2839       }
2840       EmittedMap[s] = classKind;
2841       OS << s;
2842     }
2843   }
2844   OS << "\n";
2845 }
2846 
2847 static unsigned RangeFromType(const char mod, StringRef typestr) {
2848   // base type to get the type string for.
2849   bool quad = false, dummy = false;
2850   char type = ClassifyType(typestr, quad, dummy, dummy);
2851   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2852 
2853   switch (type) {
2854     case 'c':
2855       return (8 << (int)quad) - 1;
2856     case 'h':
2857     case 's':
2858       return (4 << (int)quad) - 1;
2859     case 'f':
2860     case 'i':
2861       return (2 << (int)quad) - 1;
2862     case 'd':
2863     case 'l':
2864       return (1 << (int)quad) - 1;
2865     case 'k':
2866       return 0;
2867     default:
2868       PrintFatalError("unhandled type!");
2869   }
2870 }
2871 
2872 static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2873   // base type to get the type string for.
2874   bool dummy = false;
2875   char type = ClassifyType(typestr, dummy, dummy, dummy);
2876   type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2877 
2878   switch (type) {
2879     case 'c':
2880       return 7;
2881     case 'h':
2882     case 's':
2883       return 15;
2884     case 'f':
2885     case 'i':
2886       return 31;
2887     case 'd':
2888     case 'l':
2889       return 63;
2890     case 'k':
2891       return 127;
2892     default:
2893       PrintFatalError("unhandled type!");
2894   }
2895 }
2896 
2897 /// Generate the ARM and AArch64 intrinsic range checking code for
2898 /// shift/lane immediates, checking for unique declarations.
2899 void
2900 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS) {
2901   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2902   StringMap<OpKind> EmittedMap;
2903 
2904   // Generate the intrinsic range checking code for shift/lane immediates.
2905   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2906 
2907   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2908     Record *R = RV[i];
2909 
2910     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2911     if (k != OpNone)
2912       continue;
2913 
2914     std::string name = R->getValueAsString("Name");
2915     std::string Proto = R->getValueAsString("Prototype");
2916     std::string Types = R->getValueAsString("Types");
2917     std::string Rename = name + "@" + Proto;
2918 
2919     // Functions with 'a' (the splat code) in the type prototype should not get
2920     // their own builtin as they use the non-splat variant.
2921     if (Proto.find('a') != std::string::npos)
2922       continue;
2923 
2924     // Functions which do not have an immediate do not need to have range
2925     // checking code emitted.
2926     size_t immPos = Proto.find('i');
2927     if (immPos == std::string::npos)
2928       continue;
2929 
2930     SmallVector<StringRef, 16> TypeVec;
2931     ParseTypes(R, Types, TypeVec);
2932 
2933     if (R->getSuperClasses().size() < 2)
2934       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2935 
2936     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2937     if (!ProtoHasScalar(Proto))
2938       ck = ClassB;
2939 
2940     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2941       std::string namestr, shiftstr, rangestr;
2942 
2943       if (R->getValueAsBit("isVCVT_N")) {
2944         // VCVT between floating- and fixed-point values takes an immediate
2945         // in the range [1, 32] for f32, or [1, 64] for f64.
2946         ck = ClassB;
2947         if (name.find("32") != std::string::npos)
2948           rangestr = "l = 1; u = 31"; // upper bound = l + u
2949         else if (name.find("64") != std::string::npos)
2950           rangestr = "l = 1; u = 63";
2951         else
2952           PrintFatalError(R->getLoc(),
2953               "Fixed point convert name should contains \"32\" or \"64\"");
2954 
2955       } else if (R->getValueAsBit("isScalarShift")) {
2956         // Right shifts have an 'r' in the name, left shifts do not.  Convert
2957         // instructions have the same bounds and right shifts.
2958         if (name.find('r') != std::string::npos ||
2959             name.find("cvt") != std::string::npos)
2960           rangestr = "l = 1; ";
2961 
2962         unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
2963         // Narrow shift has half the upper bound
2964         if (R->getValueAsBit("isScalarNarrowShift"))
2965           upBound /= 2;
2966 
2967         rangestr += "u = " + utostr(upBound);
2968       } else if (R->getValueAsBit("isShift")) {
2969         // Builtins which are overloaded by type will need to have their upper
2970         // bound computed at Sema time based on the type constant.
2971         shiftstr = ", true";
2972 
2973         // Right shifts have an 'r' in the name, left shifts do not.
2974         if (name.find('r') != std::string::npos)
2975           rangestr = "l = 1; ";
2976 
2977         rangestr += "u = RFT(TV" + shiftstr + ")";
2978       } else if (ck == ClassB) {
2979         // ClassB intrinsics have a type (and hence lane number) that is only
2980         // known at runtime.
2981         assert(immPos > 0 && "unexpected immediate operand");
2982         if (R->getValueAsBit("isLaneQ"))
2983           rangestr = "u = RFT(TV, false, true)";
2984         else
2985           rangestr = "u = RFT(TV, false, false)";
2986       } else {
2987         // The immediate generally refers to a lane in the preceding argument.
2988         assert(immPos > 0 && "unexpected immediate operand");
2989         rangestr =
2990             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2991       }
2992       // Make sure cases appear only once by uniquing them in a string map.
2993       namestr = MangleName(name, TypeVec[ti], ck);
2994       if (EmittedMap.count(namestr))
2995         continue;
2996       EmittedMap[namestr] = OpNone;
2997 
2998       // Calculate the index of the immediate that should be range checked.
2999       unsigned immidx = 0;
3000 
3001       // Builtins that return a struct of multiple vectors have an extra
3002       // leading arg for the struct return.
3003       if (IsMultiVecProto(Proto[0]))
3004         ++immidx;
3005 
3006       // Add one to the index for each argument until we reach the immediate
3007       // to be checked.  Structs of vectors are passed as multiple arguments.
3008       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
3009         switch (Proto[ii]) {
3010         default:
3011           immidx += 1;
3012           break;
3013         case '2':
3014         case 'B':
3015           immidx += 2;
3016           break;
3017         case '3':
3018         case 'C':
3019           immidx += 3;
3020           break;
3021         case '4':
3022         case 'D':
3023           immidx += 4;
3024           break;
3025         case 'i':
3026           ie = ii + 1;
3027           break;
3028         }
3029       }
3030       OS << "case NEON::BI__builtin_neon_";
3031       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
3032          << rangestr << "; break;\n";
3033     }
3034   }
3035   OS << "#endif\n\n";
3036 }
3037 
3038 struct OverloadInfo {
3039   uint64_t Mask;
3040   int PtrArgNum;
3041   bool HasConstPtr;
3042 };
3043 /// Generate the ARM and AArch64 overloaded type checking code for
3044 /// SemaChecking.cpp, checking for unique builtin declarations.
3045 void
3046 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS) {
3047   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3048 
3049   // Generate the overloaded type checking code for SemaChecking.cpp
3050   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
3051 
3052   // We record each overload check line before emitting because subsequent Inst
3053   // definitions may extend the number of permitted types (i.e. augment the
3054   // Mask). Use std::map to avoid sorting the table by hash number.
3055   std::map<std::string, OverloadInfo> OverloadMap;
3056   typedef std::map<std::string, OverloadInfo>::iterator OverloadIterator;
3057 
3058   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3059     Record *R = RV[i];
3060     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3061     if (k != OpNone)
3062       continue;
3063 
3064     std::string Proto = R->getValueAsString("Prototype");
3065     std::string Types = R->getValueAsString("Types");
3066     std::string name = R->getValueAsString("Name");
3067     std::string Rename = name + "@" + Proto;
3068 
3069     // Functions with 'a' (the splat code) in the type prototype should not get
3070     // their own builtin as they use the non-splat variant.
3071     if (Proto.find('a') != std::string::npos)
3072       continue;
3073 
3074     // Functions which have a scalar argument cannot be overloaded, no need to
3075     // check them if we are emitting the type checking code.
3076     if (ProtoHasScalar(Proto))
3077       continue;
3078 
3079     SmallVector<StringRef, 16> TypeVec;
3080     ParseTypes(R, Types, TypeVec);
3081 
3082     if (R->getSuperClasses().size() < 2)
3083       PrintFatalError(R->getLoc(), "Builtin has no class kind");
3084 
3085     int si = -1, qi = -1;
3086     uint64_t mask = 0, qmask = 0;
3087     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3088       // Generate the switch case(s) for this builtin for the type validation.
3089       bool quad = false, poly = false, usgn = false;
3090       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
3091 
3092       if (quad) {
3093         qi = ti;
3094         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3095       } else {
3096         si = ti;
3097         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3098       }
3099     }
3100 
3101     // Check if the builtin function has a pointer or const pointer argument.
3102     int PtrArgNum = -1;
3103     bool HasConstPtr = false;
3104     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
3105       char ArgType = Proto[arg];
3106       if (ArgType == 'c') {
3107         HasConstPtr = true;
3108         PtrArgNum = arg - 1;
3109         break;
3110       }
3111       if (ArgType == 'p') {
3112         PtrArgNum = arg - 1;
3113         break;
3114       }
3115     }
3116     // For sret builtins, adjust the pointer argument index.
3117     if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
3118       PtrArgNum += 1;
3119 
3120     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
3121     // and vst1_lane intrinsics.  Using a pointer to the vector element
3122     // type with one of those operations causes codegen to select an aligned
3123     // load/store instruction.  If you want an unaligned operation,
3124     // the pointer argument needs to have less alignment than element type,
3125     // so just accept any pointer type.
3126     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
3127       PtrArgNum = -1;
3128       HasConstPtr = false;
3129     }
3130 
3131     if (mask) {
3132       std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
3133           MangleName(name, TypeVec[si], ClassB), OverloadInfo()));
3134       OverloadInfo &Record = I.first->second;
3135       if (!I.second)
3136         assert(Record.PtrArgNum == PtrArgNum &&
3137                Record.HasConstPtr == HasConstPtr);
3138       Record.Mask |= mask;
3139       Record.PtrArgNum = PtrArgNum;
3140       Record.HasConstPtr = HasConstPtr;
3141     }
3142     if (qmask) {
3143       std::pair<OverloadIterator, bool> I = OverloadMap.insert(std::make_pair(
3144           MangleName(name, TypeVec[qi], ClassB), OverloadInfo()));
3145       OverloadInfo &Record = I.first->second;
3146       if (!I.second)
3147         assert(Record.PtrArgNum == PtrArgNum &&
3148                Record.HasConstPtr == HasConstPtr);
3149       Record.Mask |= qmask;
3150       Record.PtrArgNum = PtrArgNum;
3151       Record.HasConstPtr = HasConstPtr;
3152     }
3153   }
3154 
3155   for (OverloadIterator I = OverloadMap.begin(), E = OverloadMap.end(); I != E;
3156        ++I) {
3157     OverloadInfo &BuiltinOverloads = I->second;
3158     OS << "case NEON::BI__builtin_neon_" << I->first << ": ";
3159     OS << "mask = " << "0x" << utohexstr(BuiltinOverloads.Mask) << "ULL";
3160     if (BuiltinOverloads.PtrArgNum >= 0)
3161       OS << "; PtrArgNum = " << BuiltinOverloads.PtrArgNum;
3162     if (BuiltinOverloads.HasConstPtr)
3163       OS << "; HasConstPtr = true";
3164     OS << "; break;\n";
3165   }
3166 
3167   OS << "#endif\n\n";
3168 }
3169 
3170 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
3171 /// declaration of builtins, checking for unique builtin declarations.
3172 void NeonEmitter::genBuiltinsDef(raw_ostream &OS) {
3173   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3174 
3175   // We want to emit the intrinsics in alphabetical order, so use the more
3176   // expensive std::map to gather them together first.
3177   std::map<std::string, OpKind> EmittedMap;
3178 
3179   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3180     Record *R = RV[i];
3181     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3182     if (k != OpNone)
3183       continue;
3184 
3185     std::string Proto = R->getValueAsString("Prototype");
3186     std::string name = R->getValueAsString("Name");
3187     std::string Rename = name + "@" + Proto;
3188 
3189     // Functions with 'a' (the splat code) in the type prototype should not get
3190     // their own builtin as they use the non-splat variant.
3191     if (Proto.find('a') != std::string::npos)
3192       continue;
3193 
3194     std::string Types = R->getValueAsString("Types");
3195     SmallVector<StringRef, 16> TypeVec;
3196     ParseTypes(R, Types, TypeVec);
3197 
3198     if (R->getSuperClasses().size() < 2)
3199       PrintFatalError(R->getLoc(), "Builtin has no class kind");
3200 
3201     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3202 
3203     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3204       // Generate the declaration for this builtin, ensuring
3205       // that each unique BUILTIN() macro appears only once in the output
3206       // stream.
3207       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3208       if (EmittedMap.count(bd))
3209         continue;
3210 
3211       EmittedMap[bd] = OpNone;
3212     }
3213   }
3214 
3215   // Generate BuiltinsNEON.
3216   OS << "#ifdef GET_NEON_BUILTINS\n";
3217 
3218   for (std::map<std::string, OpKind>::iterator I = EmittedMap.begin(),
3219                                                E = EmittedMap.end();
3220        I != E; ++I)
3221     OS << I->first << "\n";
3222 
3223   OS << "#endif\n\n";
3224 }
3225 
3226 /// runHeader - Emit a file with sections defining:
3227 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3228 /// 2. the SemaChecking code for the type overload checking.
3229 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
3230 void NeonEmitter::runHeader(raw_ostream &OS) {
3231   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3232 
3233   // Generate shared BuiltinsXXX.def
3234   genBuiltinsDef(OS);
3235 
3236   // Generate ARM overloaded type checking code for SemaChecking.cpp
3237   genOverloadTypeCheckCode(OS);
3238 
3239   // Generate ARM range checking code for shift/lane immediates.
3240   genIntrinsicRangeCheckCode(OS);
3241 }
3242 
3243 /// GenTest - Write out a test for the intrinsic specified by the name and
3244 /// type strings, including the embedded patterns for FileCheck to match.
3245 static std::string GenTest(const std::string &name,
3246                            const std::string &proto,
3247                            StringRef outTypeStr, StringRef inTypeStr,
3248                            bool isShift, bool isHiddenLOp,
3249                            ClassKind ck, const std::string &InstName,
3250                            bool isA64,
3251                            std::string & testFuncProto) {
3252   assert(!proto.empty() && "");
3253   std::string s;
3254 
3255   // Function name with type suffix
3256   std::string mangledName = MangleName(name, outTypeStr, ClassS);
3257   if (outTypeStr != inTypeStr) {
3258     // If the input type is different (e.g., for vreinterpret), append a suffix
3259     // for the input type.  String off a "Q" (quad) prefix so that MangleName
3260     // does not insert another "q" in the name.
3261     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3262     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3263     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3264   }
3265 
3266   // todo: GenerateChecksForIntrinsic does not generate CHECK
3267   // for aarch64 instructions yet
3268   std::vector<std::string> FileCheckPatterns;
3269   if (!isA64) {
3270     GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3271                                isHiddenLOp, FileCheckPatterns);
3272     s+= "// CHECK_ARM: test_" + mangledName + "\n";
3273   }
3274   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3275 
3276   // Emit the FileCheck patterns.
3277   // If for any reason we do not want to emit a check, mangledInst
3278   // will be the empty string.
3279   if (FileCheckPatterns.size()) {
3280     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3281                                                   e = FileCheckPatterns.end();
3282          i != e;
3283          ++i) {
3284       s += "// CHECK_ARM: " + *i + "\n";
3285     }
3286   }
3287 
3288   // Emit the start of the test function.
3289 
3290   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3291   char arg = 'a';
3292   std::string comma;
3293   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3294     // Do not create arguments for values that must be immediate constants.
3295     if (proto[i] == 'i')
3296       continue;
3297     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3298     testFuncProto.push_back(arg);
3299     comma = ", ";
3300   }
3301   testFuncProto += ")";
3302 
3303   s+= testFuncProto;
3304   s+= " {\n  ";
3305 
3306   if (proto[0] != 'v')
3307     s += "return ";
3308   s += mangledName + "(";
3309   arg = 'a';
3310   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3311     if (proto[i] == 'i') {
3312       // For immediate operands, test the maximum value.
3313       if (isShift)
3314         s += "1"; // FIXME
3315       else
3316         // The immediate generally refers to a lane in the preceding argument.
3317         s += utostr(RangeFromType(proto[i-1], inTypeStr));
3318     } else {
3319       s.push_back(arg);
3320     }
3321     if ((i + 1) < e)
3322       s += ", ";
3323   }
3324   s += ");\n}\n\n";
3325   return s;
3326 }
3327 
3328 /// Write out all intrinsic tests for the specified target, checking
3329 /// for intrinsic test uniqueness.
3330 void NeonEmitter::genTargetTest(raw_ostream &OS) {
3331   StringMap<OpKind> EmittedMap;
3332   std::string CurrentGuard = "";
3333   bool InGuard = false;
3334 
3335   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3336   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3337     Record *R = RV[i];
3338     std::string name = R->getValueAsString("Name");
3339     std::string Proto = R->getValueAsString("Prototype");
3340     std::string Types = R->getValueAsString("Types");
3341     bool isShift = R->getValueAsBit("isShift");
3342     std::string InstName = R->getValueAsString("InstName");
3343     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3344 
3345     std::string NewGuard = R->getValueAsString("ArchGuard");
3346     if (NewGuard != CurrentGuard) {
3347       if (InGuard)
3348         OS << "#endif\n\n";
3349       if (NewGuard.size())
3350         OS << "#if " << NewGuard << '\n';
3351 
3352       CurrentGuard = NewGuard;
3353       InGuard = NewGuard.size() != 0;
3354     }
3355 
3356     SmallVector<StringRef, 16> TypeVec;
3357     ParseTypes(R, Types, TypeVec);
3358 
3359     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3360     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3361     if (kind == OpUnavailable)
3362       continue;
3363     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3364       if (kind == OpReinterpret) {
3365         bool outQuad = false;
3366         bool dummy = false;
3367         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3368         for (unsigned srcti = 0, srcte = TypeVec.size();
3369              srcti != srcte; ++srcti) {
3370           bool inQuad = false;
3371           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3372           if (srcti == ti || inQuad != outQuad)
3373             continue;
3374           std::string testFuncProto;
3375           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3376                                   isShift, isHiddenLOp, ck, InstName,
3377                                   CurrentGuard.size(), testFuncProto);
3378           if (EmittedMap.count(testFuncProto))
3379             continue;
3380           EmittedMap[testFuncProto] = kind;
3381           OS << s << "\n";
3382         }
3383       } else {
3384         std::string testFuncProto;
3385         std::string s =
3386             GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp,
3387                     ck, InstName, CurrentGuard.size(), testFuncProto);
3388         OS << s << "\n";
3389       }
3390     }
3391   }
3392 
3393   if (InGuard)
3394     OS << "#endif\n";
3395 }
3396 /// runTests - Write out a complete set of tests for all of the Neon
3397 /// intrinsics.
3398 void NeonEmitter::runTests(raw_ostream &OS) {
3399   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3400         "apcs-gnu\\\n"
3401         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3402         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3403         "\n"
3404         "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3405         "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3406         "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3407         "\n"
3408         "// REQUIRES: long_tests\n"
3409         "\n"
3410         "#include <arm_neon.h>\n"
3411         "\n";
3412 
3413   genTargetTest(OS);
3414 }
3415 
3416 namespace clang {
3417 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3418   NeonEmitter(Records).run(OS);
3419 }
3420 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3421   NeonEmitter(Records).runHeader(OS);
3422 }
3423 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3424   NeonEmitter(Records).runTests(OS);
3425 }
3426 } // End namespace clang
3427