1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
405   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
406   subBits &= maskBits;
407   if (isSingleWord()) {
408     U.VAL &= ~(maskBits << bitPosition);
409     U.VAL |= subBits << bitPosition;
410     return;
411   }
412 
413   unsigned loBit = whichBit(bitPosition);
414   unsigned loWord = whichWord(bitPosition);
415   unsigned hiWord = whichWord(bitPosition + numBits - 1);
416   if (loWord == hiWord) {
417     U.pVal[loWord] &= ~(maskBits << loBit);
418     U.pVal[loWord] |= subBits << loBit;
419     return;
420   }
421 
422   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
423   unsigned wordBits = 8 * sizeof(WordType);
424   U.pVal[loWord] &= ~(maskBits << loBit);
425   U.pVal[loWord] |= subBits << loBit;
426 
427   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
428   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
429 }
430 
431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
432   assert(numBits > 0 && "Can't extract zero bits");
433   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
434          "Illegal bit extraction");
435 
436   if (isSingleWord())
437     return APInt(numBits, U.VAL >> bitPosition);
438 
439   unsigned loBit = whichBit(bitPosition);
440   unsigned loWord = whichWord(bitPosition);
441   unsigned hiWord = whichWord(bitPosition + numBits - 1);
442 
443   // Single word result extracting bits from a single word source.
444   if (loWord == hiWord)
445     return APInt(numBits, U.pVal[loWord] >> loBit);
446 
447   // Extracting bits that start on a source word boundary can be done
448   // as a fast memory copy.
449   if (loBit == 0)
450     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
451 
452   // General case - shift + copy source words directly into place.
453   APInt Result(numBits, 0);
454   unsigned NumSrcWords = getNumWords();
455   unsigned NumDstWords = Result.getNumWords();
456 
457   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
458   for (unsigned word = 0; word < NumDstWords; ++word) {
459     uint64_t w0 = U.pVal[loWord + word];
460     uint64_t w1 =
461         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
462     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
463   }
464 
465   return Result.clearUnusedBits();
466 }
467 
468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
469                                        unsigned bitPosition) const {
470   assert(numBits > 0 && "Can't extract zero bits");
471   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
472          "Illegal bit extraction");
473   assert(numBits <= 64 && "Illegal bit extraction");
474 
475   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
476   if (isSingleWord())
477     return (U.VAL >> bitPosition) & maskBits;
478 
479   unsigned loBit = whichBit(bitPosition);
480   unsigned loWord = whichWord(bitPosition);
481   unsigned hiWord = whichWord(bitPosition + numBits - 1);
482   if (loWord == hiWord)
483     return (U.pVal[loWord] >> loBit) & maskBits;
484 
485   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
486   unsigned wordBits = 8 * sizeof(WordType);
487   uint64_t retBits = U.pVal[loWord] >> loBit;
488   retBits |= U.pVal[hiWord] << (wordBits - loBit);
489   retBits &= maskBits;
490   return retBits;
491 }
492 
493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
494   assert(!str.empty() && "Invalid string length");
495   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
496           radix == 36) &&
497          "Radix should be 2, 8, 10, 16, or 36!");
498 
499   size_t slen = str.size();
500 
501   // Each computation below needs to know if it's negative.
502   StringRef::iterator p = str.begin();
503   unsigned isNegative = *p == '-';
504   if (*p == '-' || *p == '+') {
505     p++;
506     slen--;
507     assert(slen && "String is only a sign, needs a value.");
508   }
509 
510   // For radixes of power-of-two values, the bits required is accurately and
511   // easily computed
512   if (radix == 2)
513     return slen + isNegative;
514   if (radix == 8)
515     return slen * 3 + isNegative;
516   if (radix == 16)
517     return slen * 4 + isNegative;
518 
519   // FIXME: base 36
520 
521   // This is grossly inefficient but accurate. We could probably do something
522   // with a computation of roughly slen*64/20 and then adjust by the value of
523   // the first few digits. But, I'm not sure how accurate that could be.
524 
525   // Compute a sufficient number of bits that is always large enough but might
526   // be too large. This avoids the assertion in the constructor. This
527   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
528   // bits in that case.
529   unsigned sufficient
530     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
531                  : (slen == 1 ? 7 : slen * 16/3);
532 
533   // Convert to the actual binary value.
534   APInt tmp(sufficient, StringRef(p, slen), radix);
535 
536   // Compute how many bits are required. If the log is infinite, assume we need
537   // just bit. If the log is exact and value is negative, then the value is
538   // MinSignedValue with (log + 1) bits.
539   unsigned log = tmp.logBase2();
540   if (log == (unsigned)-1) {
541     return isNegative + 1;
542   } else if (isNegative && tmp.isPowerOf2()) {
543     return isNegative + log;
544   } else {
545     return isNegative + log + 1;
546   }
547 }
548 
549 hash_code llvm::hash_value(const APInt &Arg) {
550   if (Arg.isSingleWord())
551     return hash_combine(Arg.U.VAL);
552 
553   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
554 }
555 
556 bool APInt::isSplat(unsigned SplatSizeInBits) const {
557   assert(getBitWidth() % SplatSizeInBits == 0 &&
558          "SplatSizeInBits must divide width!");
559   // We can check that all parts of an integer are equal by making use of a
560   // little trick: rotate and check if it's still the same value.
561   return *this == rotl(SplatSizeInBits);
562 }
563 
564 /// This function returns the high "numBits" bits of this APInt.
565 APInt APInt::getHiBits(unsigned numBits) const {
566   return this->lshr(BitWidth - numBits);
567 }
568 
569 /// This function returns the low "numBits" bits of this APInt.
570 APInt APInt::getLoBits(unsigned numBits) const {
571   APInt Result(getLowBitsSet(BitWidth, numBits));
572   Result &= *this;
573   return Result;
574 }
575 
576 /// Return a value containing V broadcasted over NewLen bits.
577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
578   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
579 
580   APInt Val = V.zextOrSelf(NewLen);
581   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
582     Val |= Val << I;
583 
584   return Val;
585 }
586 
587 unsigned APInt::countLeadingZerosSlowCase() const {
588   unsigned Count = 0;
589   for (int i = getNumWords()-1; i >= 0; --i) {
590     uint64_t V = U.pVal[i];
591     if (V == 0)
592       Count += APINT_BITS_PER_WORD;
593     else {
594       Count += llvm::countLeadingZeros(V);
595       break;
596     }
597   }
598   // Adjust for unused bits in the most significant word (they are zero).
599   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
600   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
601   return Count;
602 }
603 
604 unsigned APInt::countLeadingOnesSlowCase() const {
605   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
606   unsigned shift;
607   if (!highWordBits) {
608     highWordBits = APINT_BITS_PER_WORD;
609     shift = 0;
610   } else {
611     shift = APINT_BITS_PER_WORD - highWordBits;
612   }
613   int i = getNumWords() - 1;
614   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
615   if (Count == highWordBits) {
616     for (i--; i >= 0; --i) {
617       if (U.pVal[i] == WORDTYPE_MAX)
618         Count += APINT_BITS_PER_WORD;
619       else {
620         Count += llvm::countLeadingOnes(U.pVal[i]);
621         break;
622       }
623     }
624   }
625   return Count;
626 }
627 
628 unsigned APInt::countTrailingZerosSlowCase() const {
629   unsigned Count = 0;
630   unsigned i = 0;
631   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
632     Count += APINT_BITS_PER_WORD;
633   if (i < getNumWords())
634     Count += llvm::countTrailingZeros(U.pVal[i]);
635   return std::min(Count, BitWidth);
636 }
637 
638 unsigned APInt::countTrailingOnesSlowCase() const {
639   unsigned Count = 0;
640   unsigned i = 0;
641   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
642     Count += APINT_BITS_PER_WORD;
643   if (i < getNumWords())
644     Count += llvm::countTrailingOnes(U.pVal[i]);
645   assert(Count <= BitWidth);
646   return Count;
647 }
648 
649 unsigned APInt::countPopulationSlowCase() const {
650   unsigned Count = 0;
651   for (unsigned i = 0; i < getNumWords(); ++i)
652     Count += llvm::countPopulation(U.pVal[i]);
653   return Count;
654 }
655 
656 bool APInt::intersectsSlowCase(const APInt &RHS) const {
657   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
658     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
659       return true;
660 
661   return false;
662 }
663 
664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
665   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
666     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
667       return false;
668 
669   return true;
670 }
671 
672 APInt APInt::byteSwap() const {
673   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
674   if (BitWidth == 16)
675     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
676   if (BitWidth == 32)
677     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
678   if (BitWidth == 48) {
679     unsigned Tmp1 = unsigned(U.VAL >> 16);
680     Tmp1 = ByteSwap_32(Tmp1);
681     uint16_t Tmp2 = uint16_t(U.VAL);
682     Tmp2 = ByteSwap_16(Tmp2);
683     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
684   }
685   if (BitWidth == 64)
686     return APInt(BitWidth, ByteSwap_64(U.VAL));
687 
688   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
689   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
690     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
691   if (Result.BitWidth != BitWidth) {
692     Result.lshrInPlace(Result.BitWidth - BitWidth);
693     Result.BitWidth = BitWidth;
694   }
695   return Result;
696 }
697 
698 APInt APInt::reverseBits() const {
699   switch (BitWidth) {
700   case 64:
701     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
702   case 32:
703     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
704   case 16:
705     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
706   case 8:
707     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
708   default:
709     break;
710   }
711 
712   APInt Val(*this);
713   APInt Reversed(BitWidth, 0);
714   unsigned S = BitWidth;
715 
716   for (; Val != 0; Val.lshrInPlace(1)) {
717     Reversed <<= 1;
718     Reversed |= Val[0];
719     --S;
720   }
721 
722   Reversed <<= S;
723   return Reversed;
724 }
725 
726 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
727   // Fast-path a common case.
728   if (A == B) return A;
729 
730   // Corner cases: if either operand is zero, the other is the gcd.
731   if (!A) return B;
732   if (!B) return A;
733 
734   // Count common powers of 2 and remove all other powers of 2.
735   unsigned Pow2;
736   {
737     unsigned Pow2_A = A.countTrailingZeros();
738     unsigned Pow2_B = B.countTrailingZeros();
739     if (Pow2_A > Pow2_B) {
740       A.lshrInPlace(Pow2_A - Pow2_B);
741       Pow2 = Pow2_B;
742     } else if (Pow2_B > Pow2_A) {
743       B.lshrInPlace(Pow2_B - Pow2_A);
744       Pow2 = Pow2_A;
745     } else {
746       Pow2 = Pow2_A;
747     }
748   }
749 
750   // Both operands are odd multiples of 2^Pow_2:
751   //
752   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
753   //
754   // This is a modified version of Stein's algorithm, taking advantage of
755   // efficient countTrailingZeros().
756   while (A != B) {
757     if (A.ugt(B)) {
758       A -= B;
759       A.lshrInPlace(A.countTrailingZeros() - Pow2);
760     } else {
761       B -= A;
762       B.lshrInPlace(B.countTrailingZeros() - Pow2);
763     }
764   }
765 
766   return A;
767 }
768 
769 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
770   uint64_t I = bit_cast<uint64_t>(Double);
771 
772   // Get the sign bit from the highest order bit
773   bool isNeg = I >> 63;
774 
775   // Get the 11-bit exponent and adjust for the 1023 bit bias
776   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
777 
778   // If the exponent is negative, the value is < 0 so just return 0.
779   if (exp < 0)
780     return APInt(width, 0u);
781 
782   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
783   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
784 
785   // If the exponent doesn't shift all bits out of the mantissa
786   if (exp < 52)
787     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
788                     APInt(width, mantissa >> (52 - exp));
789 
790   // If the client didn't provide enough bits for us to shift the mantissa into
791   // then the result is undefined, just return 0
792   if (width <= exp - 52)
793     return APInt(width, 0);
794 
795   // Otherwise, we have to shift the mantissa bits up to the right location
796   APInt Tmp(width, mantissa);
797   Tmp <<= (unsigned)exp - 52;
798   return isNeg ? -Tmp : Tmp;
799 }
800 
801 /// This function converts this APInt to a double.
802 /// The layout for double is as following (IEEE Standard 754):
803 ///  --------------------------------------
804 /// |  Sign    Exponent    Fraction    Bias |
805 /// |-------------------------------------- |
806 /// |  1[63]   11[62-52]   52[51-00]   1023 |
807 ///  --------------------------------------
808 double APInt::roundToDouble(bool isSigned) const {
809 
810   // Handle the simple case where the value is contained in one uint64_t.
811   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
812   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
813     if (isSigned) {
814       int64_t sext = SignExtend64(getWord(0), BitWidth);
815       return double(sext);
816     } else
817       return double(getWord(0));
818   }
819 
820   // Determine if the value is negative.
821   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
822 
823   // Construct the absolute value if we're negative.
824   APInt Tmp(isNeg ? -(*this) : (*this));
825 
826   // Figure out how many bits we're using.
827   unsigned n = Tmp.getActiveBits();
828 
829   // The exponent (without bias normalization) is just the number of bits
830   // we are using. Note that the sign bit is gone since we constructed the
831   // absolute value.
832   uint64_t exp = n;
833 
834   // Return infinity for exponent overflow
835   if (exp > 1023) {
836     if (!isSigned || !isNeg)
837       return std::numeric_limits<double>::infinity();
838     else
839       return -std::numeric_limits<double>::infinity();
840   }
841   exp += 1023; // Increment for 1023 bias
842 
843   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
844   // extract the high 52 bits from the correct words in pVal.
845   uint64_t mantissa;
846   unsigned hiWord = whichWord(n-1);
847   if (hiWord == 0) {
848     mantissa = Tmp.U.pVal[0];
849     if (n > 52)
850       mantissa >>= n - 52; // shift down, we want the top 52 bits.
851   } else {
852     assert(hiWord > 0 && "huh?");
853     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
854     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
855     mantissa = hibits | lobits;
856   }
857 
858   // The leading bit of mantissa is implicit, so get rid of it.
859   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
860   uint64_t I = sign | (exp << 52) | mantissa;
861   return bit_cast<double>(I);
862 }
863 
864 // Truncate to new width.
865 APInt APInt::trunc(unsigned width) const {
866   assert(width < BitWidth && "Invalid APInt Truncate request");
867   assert(width && "Can't truncate to 0 bits");
868 
869   if (width <= APINT_BITS_PER_WORD)
870     return APInt(width, getRawData()[0]);
871 
872   APInt Result(getMemory(getNumWords(width)), width);
873 
874   // Copy full words.
875   unsigned i;
876   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
877     Result.U.pVal[i] = U.pVal[i];
878 
879   // Truncate and copy any partial word.
880   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
881   if (bits != 0)
882     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
883 
884   return Result;
885 }
886 
887 // Sign extend to a new width.
888 APInt APInt::sext(unsigned Width) const {
889   assert(Width > BitWidth && "Invalid APInt SignExtend request");
890 
891   if (Width <= APINT_BITS_PER_WORD)
892     return APInt(Width, SignExtend64(U.VAL, BitWidth));
893 
894   APInt Result(getMemory(getNumWords(Width)), Width);
895 
896   // Copy words.
897   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
898 
899   // Sign extend the last word since there may be unused bits in the input.
900   Result.U.pVal[getNumWords() - 1] =
901       SignExtend64(Result.U.pVal[getNumWords() - 1],
902                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
903 
904   // Fill with sign bits.
905   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
906               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
907   Result.clearUnusedBits();
908   return Result;
909 }
910 
911 //  Zero extend to a new width.
912 APInt APInt::zext(unsigned width) const {
913   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
914 
915   if (width <= APINT_BITS_PER_WORD)
916     return APInt(width, U.VAL);
917 
918   APInt Result(getMemory(getNumWords(width)), width);
919 
920   // Copy words.
921   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
922 
923   // Zero remaining words.
924   std::memset(Result.U.pVal + getNumWords(), 0,
925               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
926 
927   return Result;
928 }
929 
930 APInt APInt::zextOrTrunc(unsigned width) const {
931   if (BitWidth < width)
932     return zext(width);
933   if (BitWidth > width)
934     return trunc(width);
935   return *this;
936 }
937 
938 APInt APInt::sextOrTrunc(unsigned width) const {
939   if (BitWidth < width)
940     return sext(width);
941   if (BitWidth > width)
942     return trunc(width);
943   return *this;
944 }
945 
946 APInt APInt::zextOrSelf(unsigned width) const {
947   if (BitWidth < width)
948     return zext(width);
949   return *this;
950 }
951 
952 APInt APInt::sextOrSelf(unsigned width) const {
953   if (BitWidth < width)
954     return sext(width);
955   return *this;
956 }
957 
958 /// Arithmetic right-shift this APInt by shiftAmt.
959 /// Arithmetic right-shift function.
960 void APInt::ashrInPlace(const APInt &shiftAmt) {
961   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
962 }
963 
964 /// Arithmetic right-shift this APInt by shiftAmt.
965 /// Arithmetic right-shift function.
966 void APInt::ashrSlowCase(unsigned ShiftAmt) {
967   // Don't bother performing a no-op shift.
968   if (!ShiftAmt)
969     return;
970 
971   // Save the original sign bit for later.
972   bool Negative = isNegative();
973 
974   // WordShift is the inter-part shift; BitShift is intra-part shift.
975   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
976   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
977 
978   unsigned WordsToMove = getNumWords() - WordShift;
979   if (WordsToMove != 0) {
980     // Sign extend the last word to fill in the unused bits.
981     U.pVal[getNumWords() - 1] = SignExtend64(
982         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
983 
984     // Fastpath for moving by whole words.
985     if (BitShift == 0) {
986       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
987     } else {
988       // Move the words containing significant bits.
989       for (unsigned i = 0; i != WordsToMove - 1; ++i)
990         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
991                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
992 
993       // Handle the last word which has no high bits to copy.
994       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
995       // Sign extend one more time.
996       U.pVal[WordsToMove - 1] =
997           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
998     }
999   }
1000 
1001   // Fill in the remainder based on the original sign.
1002   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1003               WordShift * APINT_WORD_SIZE);
1004   clearUnusedBits();
1005 }
1006 
1007 /// Logical right-shift this APInt by shiftAmt.
1008 /// Logical right-shift function.
1009 void APInt::lshrInPlace(const APInt &shiftAmt) {
1010   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1011 }
1012 
1013 /// Logical right-shift this APInt by shiftAmt.
1014 /// Logical right-shift function.
1015 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1016   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1017 }
1018 
1019 /// Left-shift this APInt by shiftAmt.
1020 /// Left-shift function.
1021 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1022   // It's undefined behavior in C to shift by BitWidth or greater.
1023   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1024   return *this;
1025 }
1026 
1027 void APInt::shlSlowCase(unsigned ShiftAmt) {
1028   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1029   clearUnusedBits();
1030 }
1031 
1032 // Calculate the rotate amount modulo the bit width.
1033 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1034   unsigned rotBitWidth = rotateAmt.getBitWidth();
1035   APInt rot = rotateAmt;
1036   if (rotBitWidth < BitWidth) {
1037     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1038     // e.g. APInt(1, 32) would give APInt(1, 0).
1039     rot = rotateAmt.zext(BitWidth);
1040   }
1041   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1042   return rot.getLimitedValue(BitWidth);
1043 }
1044 
1045 APInt APInt::rotl(const APInt &rotateAmt) const {
1046   return rotl(rotateModulo(BitWidth, rotateAmt));
1047 }
1048 
1049 APInt APInt::rotl(unsigned rotateAmt) const {
1050   rotateAmt %= BitWidth;
1051   if (rotateAmt == 0)
1052     return *this;
1053   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1054 }
1055 
1056 APInt APInt::rotr(const APInt &rotateAmt) const {
1057   return rotr(rotateModulo(BitWidth, rotateAmt));
1058 }
1059 
1060 APInt APInt::rotr(unsigned rotateAmt) const {
1061   rotateAmt %= BitWidth;
1062   if (rotateAmt == 0)
1063     return *this;
1064   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1065 }
1066 
1067 // Square Root - this method computes and returns the square root of "this".
1068 // Three mechanisms are used for computation. For small values (<= 5 bits),
1069 // a table lookup is done. This gets some performance for common cases. For
1070 // values using less than 52 bits, the value is converted to double and then
1071 // the libc sqrt function is called. The result is rounded and then converted
1072 // back to a uint64_t which is then used to construct the result. Finally,
1073 // the Babylonian method for computing square roots is used.
1074 APInt APInt::sqrt() const {
1075 
1076   // Determine the magnitude of the value.
1077   unsigned magnitude = getActiveBits();
1078 
1079   // Use a fast table for some small values. This also gets rid of some
1080   // rounding errors in libc sqrt for small values.
1081   if (magnitude <= 5) {
1082     static const uint8_t results[32] = {
1083       /*     0 */ 0,
1084       /*  1- 2 */ 1, 1,
1085       /*  3- 6 */ 2, 2, 2, 2,
1086       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1087       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1088       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1089       /*    31 */ 6
1090     };
1091     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1092   }
1093 
1094   // If the magnitude of the value fits in less than 52 bits (the precision of
1095   // an IEEE double precision floating point value), then we can use the
1096   // libc sqrt function which will probably use a hardware sqrt computation.
1097   // This should be faster than the algorithm below.
1098   if (magnitude < 52) {
1099     return APInt(BitWidth,
1100                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1101                                                                : U.pVal[0])))));
1102   }
1103 
1104   // Okay, all the short cuts are exhausted. We must compute it. The following
1105   // is a classical Babylonian method for computing the square root. This code
1106   // was adapted to APInt from a wikipedia article on such computations.
1107   // See http://www.wikipedia.org/ and go to the page named
1108   // Calculate_an_integer_square_root.
1109   unsigned nbits = BitWidth, i = 4;
1110   APInt testy(BitWidth, 16);
1111   APInt x_old(BitWidth, 1);
1112   APInt x_new(BitWidth, 0);
1113   APInt two(BitWidth, 2);
1114 
1115   // Select a good starting value using binary logarithms.
1116   for (;; i += 2, testy = testy.shl(2))
1117     if (i >= nbits || this->ule(testy)) {
1118       x_old = x_old.shl(i / 2);
1119       break;
1120     }
1121 
1122   // Use the Babylonian method to arrive at the integer square root:
1123   for (;;) {
1124     x_new = (this->udiv(x_old) + x_old).udiv(two);
1125     if (x_old.ule(x_new))
1126       break;
1127     x_old = x_new;
1128   }
1129 
1130   // Make sure we return the closest approximation
1131   // NOTE: The rounding calculation below is correct. It will produce an
1132   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1133   // determined to be a rounding issue with pari/gp as it begins to use a
1134   // floating point representation after 192 bits. There are no discrepancies
1135   // between this algorithm and pari/gp for bit widths < 192 bits.
1136   APInt square(x_old * x_old);
1137   APInt nextSquare((x_old + 1) * (x_old +1));
1138   if (this->ult(square))
1139     return x_old;
1140   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1141   APInt midpoint((nextSquare - square).udiv(two));
1142   APInt offset(*this - square);
1143   if (offset.ult(midpoint))
1144     return x_old;
1145   return x_old + 1;
1146 }
1147 
1148 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1149 /// iterative extended Euclidean algorithm is used to solve for this value,
1150 /// however we simplify it to speed up calculating only the inverse, and take
1151 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1152 /// (potentially large) APInts around.
1153 /// WARNING: a value of '0' may be returned,
1154 ///          signifying that no multiplicative inverse exists!
1155 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1156   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1157 
1158   // Using the properties listed at the following web page (accessed 06/21/08):
1159   //   http://www.numbertheory.org/php/euclid.html
1160   // (especially the properties numbered 3, 4 and 9) it can be proved that
1161   // BitWidth bits suffice for all the computations in the algorithm implemented
1162   // below. More precisely, this number of bits suffice if the multiplicative
1163   // inverse exists, but may not suffice for the general extended Euclidean
1164   // algorithm.
1165 
1166   APInt r[2] = { modulo, *this };
1167   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1168   APInt q(BitWidth, 0);
1169 
1170   unsigned i;
1171   for (i = 0; r[i^1] != 0; i ^= 1) {
1172     // An overview of the math without the confusing bit-flipping:
1173     // q = r[i-2] / r[i-1]
1174     // r[i] = r[i-2] % r[i-1]
1175     // t[i] = t[i-2] - t[i-1] * q
1176     udivrem(r[i], r[i^1], q, r[i]);
1177     t[i] -= t[i^1] * q;
1178   }
1179 
1180   // If this APInt and the modulo are not coprime, there is no multiplicative
1181   // inverse, so return 0. We check this by looking at the next-to-last
1182   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1183   // algorithm.
1184   if (r[i] != 1)
1185     return APInt(BitWidth, 0);
1186 
1187   // The next-to-last t is the multiplicative inverse.  However, we are
1188   // interested in a positive inverse. Calculate a positive one from a negative
1189   // one if necessary. A simple addition of the modulo suffices because
1190   // abs(t[i]) is known to be less than *this/2 (see the link above).
1191   if (t[i].isNegative())
1192     t[i] += modulo;
1193 
1194   return std::move(t[i]);
1195 }
1196 
1197 /// Calculate the magic numbers required to implement a signed integer division
1198 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1199 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1200 /// Warren, Jr., chapter 10.
1201 APInt::ms APInt::magic() const {
1202   const APInt& d = *this;
1203   unsigned p;
1204   APInt ad, anc, delta, q1, r1, q2, r2, t;
1205   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1206   struct ms mag;
1207 
1208   ad = d.abs();
1209   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1210   anc = t - 1 - t.urem(ad);   // absolute value of nc
1211   p = d.getBitWidth() - 1;    // initialize p
1212   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1213   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1214   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1215   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1216   do {
1217     p = p + 1;
1218     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1219     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1220     if (r1.uge(anc)) {  // must be unsigned comparison
1221       q1 = q1 + 1;
1222       r1 = r1 - anc;
1223     }
1224     q2 = q2<<1;          // update q2 = 2p/abs(d)
1225     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1226     if (r2.uge(ad)) {   // must be unsigned comparison
1227       q2 = q2 + 1;
1228       r2 = r2 - ad;
1229     }
1230     delta = ad - r2;
1231   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1232 
1233   mag.m = q2 + 1;
1234   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1235   mag.s = p - d.getBitWidth();          // resulting shift
1236   return mag;
1237 }
1238 
1239 /// Calculate the magic numbers required to implement an unsigned integer
1240 /// division by a constant as a sequence of multiplies, adds and shifts.
1241 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1242 /// S. Warren, Jr., chapter 10.
1243 /// LeadingZeros can be used to simplify the calculation if the upper bits
1244 /// of the divided value are known zero.
1245 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1246   const APInt& d = *this;
1247   unsigned p;
1248   APInt nc, delta, q1, r1, q2, r2;
1249   struct mu magu;
1250   magu.a = 0;               // initialize "add" indicator
1251   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1252   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1253   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1254 
1255   nc = allOnes - (allOnes - d).urem(d);
1256   p = d.getBitWidth() - 1;  // initialize p
1257   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1258   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1259   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1260   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1261   do {
1262     p = p + 1;
1263     if (r1.uge(nc - r1)) {
1264       q1 = q1 + q1 + 1;  // update q1
1265       r1 = r1 + r1 - nc; // update r1
1266     }
1267     else {
1268       q1 = q1+q1; // update q1
1269       r1 = r1+r1; // update r1
1270     }
1271     if ((r2 + 1).uge(d - r2)) {
1272       if (q2.uge(signedMax)) magu.a = 1;
1273       q2 = q2+q2 + 1;     // update q2
1274       r2 = r2+r2 + 1 - d; // update r2
1275     }
1276     else {
1277       if (q2.uge(signedMin)) magu.a = 1;
1278       q2 = q2+q2;     // update q2
1279       r2 = r2+r2 + 1; // update r2
1280     }
1281     delta = d - 1 - r2;
1282   } while (p < d.getBitWidth()*2 &&
1283            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1284   magu.m = q2 + 1; // resulting magic number
1285   magu.s = p - d.getBitWidth();  // resulting shift
1286   return magu;
1287 }
1288 
1289 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1290 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1291 /// variables here have the same names as in the algorithm. Comments explain
1292 /// the algorithm and any deviation from it.
1293 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1294                      unsigned m, unsigned n) {
1295   assert(u && "Must provide dividend");
1296   assert(v && "Must provide divisor");
1297   assert(q && "Must provide quotient");
1298   assert(u != v && u != q && v != q && "Must use different memory");
1299   assert(n>1 && "n must be > 1");
1300 
1301   // b denotes the base of the number system. In our case b is 2^32.
1302   const uint64_t b = uint64_t(1) << 32;
1303 
1304 // The DEBUG macros here tend to be spam in the debug output if you're not
1305 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1306 #ifdef KNUTH_DEBUG
1307 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1308 #else
1309 #define DEBUG_KNUTH(X) do {} while(false)
1310 #endif
1311 
1312   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1313   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1314   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1315   DEBUG_KNUTH(dbgs() << " by");
1316   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1317   DEBUG_KNUTH(dbgs() << '\n');
1318   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1319   // u and v by d. Note that we have taken Knuth's advice here to use a power
1320   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1321   // 2 allows us to shift instead of multiply and it is easy to determine the
1322   // shift amount from the leading zeros.  We are basically normalizing the u
1323   // and v so that its high bits are shifted to the top of v's range without
1324   // overflow. Note that this can require an extra word in u so that u must
1325   // be of length m+n+1.
1326   unsigned shift = countLeadingZeros(v[n-1]);
1327   uint32_t v_carry = 0;
1328   uint32_t u_carry = 0;
1329   if (shift) {
1330     for (unsigned i = 0; i < m+n; ++i) {
1331       uint32_t u_tmp = u[i] >> (32 - shift);
1332       u[i] = (u[i] << shift) | u_carry;
1333       u_carry = u_tmp;
1334     }
1335     for (unsigned i = 0; i < n; ++i) {
1336       uint32_t v_tmp = v[i] >> (32 - shift);
1337       v[i] = (v[i] << shift) | v_carry;
1338       v_carry = v_tmp;
1339     }
1340   }
1341   u[m+n] = u_carry;
1342 
1343   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1344   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1345   DEBUG_KNUTH(dbgs() << " by");
1346   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1347   DEBUG_KNUTH(dbgs() << '\n');
1348 
1349   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1350   int j = m;
1351   do {
1352     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1353     // D3. [Calculate q'.].
1354     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1355     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1356     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1357     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1358     // on v[n-2] determines at high speed most of the cases in which the trial
1359     // value qp is one too large, and it eliminates all cases where qp is two
1360     // too large.
1361     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1362     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1363     uint64_t qp = dividend / v[n-1];
1364     uint64_t rp = dividend % v[n-1];
1365     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1366       qp--;
1367       rp += v[n-1];
1368       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1369         qp--;
1370     }
1371     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1372 
1373     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1374     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1375     // consists of a simple multiplication by a one-place number, combined with
1376     // a subtraction.
1377     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1378     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1379     // true value plus b**(n+1), namely as the b's complement of
1380     // the true value, and a "borrow" to the left should be remembered.
1381     int64_t borrow = 0;
1382     for (unsigned i = 0; i < n; ++i) {
1383       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1384       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1385       u[j+i] = Lo_32(subres);
1386       borrow = Hi_32(p) - Hi_32(subres);
1387       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1388                         << ", borrow = " << borrow << '\n');
1389     }
1390     bool isNeg = u[j+n] < borrow;
1391     u[j+n] -= Lo_32(borrow);
1392 
1393     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1394     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1395     DEBUG_KNUTH(dbgs() << '\n');
1396 
1397     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1398     // negative, go to step D6; otherwise go on to step D7.
1399     q[j] = Lo_32(qp);
1400     if (isNeg) {
1401       // D6. [Add back]. The probability that this step is necessary is very
1402       // small, on the order of only 2/b. Make sure that test data accounts for
1403       // this possibility. Decrease q[j] by 1
1404       q[j]--;
1405       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1406       // A carry will occur to the left of u[j+n], and it should be ignored
1407       // since it cancels with the borrow that occurred in D4.
1408       bool carry = false;
1409       for (unsigned i = 0; i < n; i++) {
1410         uint32_t limit = std::min(u[j+i],v[i]);
1411         u[j+i] += v[i] + carry;
1412         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1413       }
1414       u[j+n] += carry;
1415     }
1416     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1417     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1418     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1419 
1420     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1421   } while (--j >= 0);
1422 
1423   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1424   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1425   DEBUG_KNUTH(dbgs() << '\n');
1426 
1427   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1428   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1429   // compute the remainder (urem uses this).
1430   if (r) {
1431     // The value d is expressed by the "shift" value above since we avoided
1432     // multiplication by d by using a shift left. So, all we have to do is
1433     // shift right here.
1434     if (shift) {
1435       uint32_t carry = 0;
1436       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1437       for (int i = n-1; i >= 0; i--) {
1438         r[i] = (u[i] >> shift) | carry;
1439         carry = u[i] << (32 - shift);
1440         DEBUG_KNUTH(dbgs() << " " << r[i]);
1441       }
1442     } else {
1443       for (int i = n-1; i >= 0; i--) {
1444         r[i] = u[i];
1445         DEBUG_KNUTH(dbgs() << " " << r[i]);
1446       }
1447     }
1448     DEBUG_KNUTH(dbgs() << '\n');
1449   }
1450   DEBUG_KNUTH(dbgs() << '\n');
1451 }
1452 
1453 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1454                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1455   assert(lhsWords >= rhsWords && "Fractional result");
1456 
1457   // First, compose the values into an array of 32-bit words instead of
1458   // 64-bit words. This is a necessity of both the "short division" algorithm
1459   // and the Knuth "classical algorithm" which requires there to be native
1460   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1461   // can't use 64-bit operands here because we don't have native results of
1462   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1463   // work on large-endian machines.
1464   unsigned n = rhsWords * 2;
1465   unsigned m = (lhsWords * 2) - n;
1466 
1467   // Allocate space for the temporary values we need either on the stack, if
1468   // it will fit, or on the heap if it won't.
1469   uint32_t SPACE[128];
1470   uint32_t *U = nullptr;
1471   uint32_t *V = nullptr;
1472   uint32_t *Q = nullptr;
1473   uint32_t *R = nullptr;
1474   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1475     U = &SPACE[0];
1476     V = &SPACE[m+n+1];
1477     Q = &SPACE[(m+n+1) + n];
1478     if (Remainder)
1479       R = &SPACE[(m+n+1) + n + (m+n)];
1480   } else {
1481     U = new uint32_t[m + n + 1];
1482     V = new uint32_t[n];
1483     Q = new uint32_t[m+n];
1484     if (Remainder)
1485       R = new uint32_t[n];
1486   }
1487 
1488   // Initialize the dividend
1489   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1490   for (unsigned i = 0; i < lhsWords; ++i) {
1491     uint64_t tmp = LHS[i];
1492     U[i * 2] = Lo_32(tmp);
1493     U[i * 2 + 1] = Hi_32(tmp);
1494   }
1495   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1496 
1497   // Initialize the divisor
1498   memset(V, 0, (n)*sizeof(uint32_t));
1499   for (unsigned i = 0; i < rhsWords; ++i) {
1500     uint64_t tmp = RHS[i];
1501     V[i * 2] = Lo_32(tmp);
1502     V[i * 2 + 1] = Hi_32(tmp);
1503   }
1504 
1505   // initialize the quotient and remainder
1506   memset(Q, 0, (m+n) * sizeof(uint32_t));
1507   if (Remainder)
1508     memset(R, 0, n * sizeof(uint32_t));
1509 
1510   // Now, adjust m and n for the Knuth division. n is the number of words in
1511   // the divisor. m is the number of words by which the dividend exceeds the
1512   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1513   // contain any zero words or the Knuth algorithm fails.
1514   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1515     n--;
1516     m++;
1517   }
1518   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1519     m--;
1520 
1521   // If we're left with only a single word for the divisor, Knuth doesn't work
1522   // so we implement the short division algorithm here. This is much simpler
1523   // and faster because we are certain that we can divide a 64-bit quantity
1524   // by a 32-bit quantity at hardware speed and short division is simply a
1525   // series of such operations. This is just like doing short division but we
1526   // are using base 2^32 instead of base 10.
1527   assert(n != 0 && "Divide by zero?");
1528   if (n == 1) {
1529     uint32_t divisor = V[0];
1530     uint32_t remainder = 0;
1531     for (int i = m; i >= 0; i--) {
1532       uint64_t partial_dividend = Make_64(remainder, U[i]);
1533       if (partial_dividend == 0) {
1534         Q[i] = 0;
1535         remainder = 0;
1536       } else if (partial_dividend < divisor) {
1537         Q[i] = 0;
1538         remainder = Lo_32(partial_dividend);
1539       } else if (partial_dividend == divisor) {
1540         Q[i] = 1;
1541         remainder = 0;
1542       } else {
1543         Q[i] = Lo_32(partial_dividend / divisor);
1544         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1545       }
1546     }
1547     if (R)
1548       R[0] = remainder;
1549   } else {
1550     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1551     // case n > 1.
1552     KnuthDiv(U, V, Q, R, m, n);
1553   }
1554 
1555   // If the caller wants the quotient
1556   if (Quotient) {
1557     for (unsigned i = 0; i < lhsWords; ++i)
1558       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1559   }
1560 
1561   // If the caller wants the remainder
1562   if (Remainder) {
1563     for (unsigned i = 0; i < rhsWords; ++i)
1564       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1565   }
1566 
1567   // Clean up the memory we allocated.
1568   if (U != &SPACE[0]) {
1569     delete [] U;
1570     delete [] V;
1571     delete [] Q;
1572     delete [] R;
1573   }
1574 }
1575 
1576 APInt APInt::udiv(const APInt &RHS) const {
1577   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1578 
1579   // First, deal with the easy case
1580   if (isSingleWord()) {
1581     assert(RHS.U.VAL != 0 && "Divide by zero?");
1582     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1583   }
1584 
1585   // Get some facts about the LHS and RHS number of bits and words
1586   unsigned lhsWords = getNumWords(getActiveBits());
1587   unsigned rhsBits  = RHS.getActiveBits();
1588   unsigned rhsWords = getNumWords(rhsBits);
1589   assert(rhsWords && "Divided by zero???");
1590 
1591   // Deal with some degenerate cases
1592   if (!lhsWords)
1593     // 0 / X ===> 0
1594     return APInt(BitWidth, 0);
1595   if (rhsBits == 1)
1596     // X / 1 ===> X
1597     return *this;
1598   if (lhsWords < rhsWords || this->ult(RHS))
1599     // X / Y ===> 0, iff X < Y
1600     return APInt(BitWidth, 0);
1601   if (*this == RHS)
1602     // X / X ===> 1
1603     return APInt(BitWidth, 1);
1604   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1605     // All high words are zero, just use native divide
1606     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1607 
1608   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1609   APInt Quotient(BitWidth, 0); // to hold result.
1610   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1611   return Quotient;
1612 }
1613 
1614 APInt APInt::udiv(uint64_t RHS) const {
1615   assert(RHS != 0 && "Divide by zero?");
1616 
1617   // First, deal with the easy case
1618   if (isSingleWord())
1619     return APInt(BitWidth, U.VAL / RHS);
1620 
1621   // Get some facts about the LHS words.
1622   unsigned lhsWords = getNumWords(getActiveBits());
1623 
1624   // Deal with some degenerate cases
1625   if (!lhsWords)
1626     // 0 / X ===> 0
1627     return APInt(BitWidth, 0);
1628   if (RHS == 1)
1629     // X / 1 ===> X
1630     return *this;
1631   if (this->ult(RHS))
1632     // X / Y ===> 0, iff X < Y
1633     return APInt(BitWidth, 0);
1634   if (*this == RHS)
1635     // X / X ===> 1
1636     return APInt(BitWidth, 1);
1637   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1638     // All high words are zero, just use native divide
1639     return APInt(BitWidth, this->U.pVal[0] / RHS);
1640 
1641   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1642   APInt Quotient(BitWidth, 0); // to hold result.
1643   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1644   return Quotient;
1645 }
1646 
1647 APInt APInt::sdiv(const APInt &RHS) const {
1648   if (isNegative()) {
1649     if (RHS.isNegative())
1650       return (-(*this)).udiv(-RHS);
1651     return -((-(*this)).udiv(RHS));
1652   }
1653   if (RHS.isNegative())
1654     return -(this->udiv(-RHS));
1655   return this->udiv(RHS);
1656 }
1657 
1658 APInt APInt::sdiv(int64_t RHS) const {
1659   if (isNegative()) {
1660     if (RHS < 0)
1661       return (-(*this)).udiv(-RHS);
1662     return -((-(*this)).udiv(RHS));
1663   }
1664   if (RHS < 0)
1665     return -(this->udiv(-RHS));
1666   return this->udiv(RHS);
1667 }
1668 
1669 APInt APInt::urem(const APInt &RHS) const {
1670   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1671   if (isSingleWord()) {
1672     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1673     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1674   }
1675 
1676   // Get some facts about the LHS
1677   unsigned lhsWords = getNumWords(getActiveBits());
1678 
1679   // Get some facts about the RHS
1680   unsigned rhsBits = RHS.getActiveBits();
1681   unsigned rhsWords = getNumWords(rhsBits);
1682   assert(rhsWords && "Performing remainder operation by zero ???");
1683 
1684   // Check the degenerate cases
1685   if (lhsWords == 0)
1686     // 0 % Y ===> 0
1687     return APInt(BitWidth, 0);
1688   if (rhsBits == 1)
1689     // X % 1 ===> 0
1690     return APInt(BitWidth, 0);
1691   if (lhsWords < rhsWords || this->ult(RHS))
1692     // X % Y ===> X, iff X < Y
1693     return *this;
1694   if (*this == RHS)
1695     // X % X == 0;
1696     return APInt(BitWidth, 0);
1697   if (lhsWords == 1)
1698     // All high words are zero, just use native remainder
1699     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1700 
1701   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1702   APInt Remainder(BitWidth, 0);
1703   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1704   return Remainder;
1705 }
1706 
1707 uint64_t APInt::urem(uint64_t RHS) const {
1708   assert(RHS != 0 && "Remainder by zero?");
1709 
1710   if (isSingleWord())
1711     return U.VAL % RHS;
1712 
1713   // Get some facts about the LHS
1714   unsigned lhsWords = getNumWords(getActiveBits());
1715 
1716   // Check the degenerate cases
1717   if (lhsWords == 0)
1718     // 0 % Y ===> 0
1719     return 0;
1720   if (RHS == 1)
1721     // X % 1 ===> 0
1722     return 0;
1723   if (this->ult(RHS))
1724     // X % Y ===> X, iff X < Y
1725     return getZExtValue();
1726   if (*this == RHS)
1727     // X % X == 0;
1728     return 0;
1729   if (lhsWords == 1)
1730     // All high words are zero, just use native remainder
1731     return U.pVal[0] % RHS;
1732 
1733   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1734   uint64_t Remainder;
1735   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1736   return Remainder;
1737 }
1738 
1739 APInt APInt::srem(const APInt &RHS) const {
1740   if (isNegative()) {
1741     if (RHS.isNegative())
1742       return -((-(*this)).urem(-RHS));
1743     return -((-(*this)).urem(RHS));
1744   }
1745   if (RHS.isNegative())
1746     return this->urem(-RHS);
1747   return this->urem(RHS);
1748 }
1749 
1750 int64_t APInt::srem(int64_t RHS) const {
1751   if (isNegative()) {
1752     if (RHS < 0)
1753       return -((-(*this)).urem(-RHS));
1754     return -((-(*this)).urem(RHS));
1755   }
1756   if (RHS < 0)
1757     return this->urem(-RHS);
1758   return this->urem(RHS);
1759 }
1760 
1761 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1762                     APInt &Quotient, APInt &Remainder) {
1763   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1764   unsigned BitWidth = LHS.BitWidth;
1765 
1766   // First, deal with the easy case
1767   if (LHS.isSingleWord()) {
1768     assert(RHS.U.VAL != 0 && "Divide by zero?");
1769     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1770     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1771     Quotient = APInt(BitWidth, QuotVal);
1772     Remainder = APInt(BitWidth, RemVal);
1773     return;
1774   }
1775 
1776   // Get some size facts about the dividend and divisor
1777   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1778   unsigned rhsBits  = RHS.getActiveBits();
1779   unsigned rhsWords = getNumWords(rhsBits);
1780   assert(rhsWords && "Performing divrem operation by zero ???");
1781 
1782   // Check the degenerate cases
1783   if (lhsWords == 0) {
1784     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1785     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1786     return;
1787   }
1788 
1789   if (rhsBits == 1) {
1790     Quotient = LHS;                   // X / 1 ===> X
1791     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1792   }
1793 
1794   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1795     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1796     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1797     return;
1798   }
1799 
1800   if (LHS == RHS) {
1801     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1802     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1803     return;
1804   }
1805 
1806   // Make sure there is enough space to hold the results.
1807   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1808   // change the size. This is necessary if Quotient or Remainder is aliased
1809   // with LHS or RHS.
1810   Quotient.reallocate(BitWidth);
1811   Remainder.reallocate(BitWidth);
1812 
1813   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1814     // There is only one word to consider so use the native versions.
1815     uint64_t lhsValue = LHS.U.pVal[0];
1816     uint64_t rhsValue = RHS.U.pVal[0];
1817     Quotient = lhsValue / rhsValue;
1818     Remainder = lhsValue % rhsValue;
1819     return;
1820   }
1821 
1822   // Okay, lets do it the long way
1823   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1824          Remainder.U.pVal);
1825   // Clear the rest of the Quotient and Remainder.
1826   std::memset(Quotient.U.pVal + lhsWords, 0,
1827               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1828   std::memset(Remainder.U.pVal + rhsWords, 0,
1829               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1830 }
1831 
1832 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1833                     uint64_t &Remainder) {
1834   assert(RHS != 0 && "Divide by zero?");
1835   unsigned BitWidth = LHS.BitWidth;
1836 
1837   // First, deal with the easy case
1838   if (LHS.isSingleWord()) {
1839     uint64_t QuotVal = LHS.U.VAL / RHS;
1840     Remainder = LHS.U.VAL % RHS;
1841     Quotient = APInt(BitWidth, QuotVal);
1842     return;
1843   }
1844 
1845   // Get some size facts about the dividend and divisor
1846   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1847 
1848   // Check the degenerate cases
1849   if (lhsWords == 0) {
1850     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1851     Remainder = 0;                    // 0 % Y ===> 0
1852     return;
1853   }
1854 
1855   if (RHS == 1) {
1856     Quotient = LHS;                   // X / 1 ===> X
1857     Remainder = 0;                    // X % 1 ===> 0
1858     return;
1859   }
1860 
1861   if (LHS.ult(RHS)) {
1862     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1863     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1864     return;
1865   }
1866 
1867   if (LHS == RHS) {
1868     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1869     Remainder = 0;                    // X % X ===> 0;
1870     return;
1871   }
1872 
1873   // Make sure there is enough space to hold the results.
1874   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1875   // change the size. This is necessary if Quotient is aliased with LHS.
1876   Quotient.reallocate(BitWidth);
1877 
1878   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1879     // There is only one word to consider so use the native versions.
1880     uint64_t lhsValue = LHS.U.pVal[0];
1881     Quotient = lhsValue / RHS;
1882     Remainder = lhsValue % RHS;
1883     return;
1884   }
1885 
1886   // Okay, lets do it the long way
1887   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1888   // Clear the rest of the Quotient.
1889   std::memset(Quotient.U.pVal + lhsWords, 0,
1890               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1891 }
1892 
1893 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1894                     APInt &Quotient, APInt &Remainder) {
1895   if (LHS.isNegative()) {
1896     if (RHS.isNegative())
1897       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1898     else {
1899       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1900       Quotient.negate();
1901     }
1902     Remainder.negate();
1903   } else if (RHS.isNegative()) {
1904     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1905     Quotient.negate();
1906   } else {
1907     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1908   }
1909 }
1910 
1911 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1912                     APInt &Quotient, int64_t &Remainder) {
1913   uint64_t R = Remainder;
1914   if (LHS.isNegative()) {
1915     if (RHS < 0)
1916       APInt::udivrem(-LHS, -RHS, Quotient, R);
1917     else {
1918       APInt::udivrem(-LHS, RHS, Quotient, R);
1919       Quotient.negate();
1920     }
1921     R = -R;
1922   } else if (RHS < 0) {
1923     APInt::udivrem(LHS, -RHS, Quotient, R);
1924     Quotient.negate();
1925   } else {
1926     APInt::udivrem(LHS, RHS, Quotient, R);
1927   }
1928   Remainder = R;
1929 }
1930 
1931 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1932   APInt Res = *this+RHS;
1933   Overflow = isNonNegative() == RHS.isNonNegative() &&
1934              Res.isNonNegative() != isNonNegative();
1935   return Res;
1936 }
1937 
1938 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1939   APInt Res = *this+RHS;
1940   Overflow = Res.ult(RHS);
1941   return Res;
1942 }
1943 
1944 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1945   APInt Res = *this - RHS;
1946   Overflow = isNonNegative() != RHS.isNonNegative() &&
1947              Res.isNonNegative() != isNonNegative();
1948   return Res;
1949 }
1950 
1951 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1952   APInt Res = *this-RHS;
1953   Overflow = Res.ugt(*this);
1954   return Res;
1955 }
1956 
1957 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1958   // MININT/-1  -->  overflow.
1959   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1960   return sdiv(RHS);
1961 }
1962 
1963 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1964   APInt Res = *this * RHS;
1965 
1966   if (*this != 0 && RHS != 0)
1967     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1968   else
1969     Overflow = false;
1970   return Res;
1971 }
1972 
1973 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1974   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1975     Overflow = true;
1976     return *this * RHS;
1977   }
1978 
1979   APInt Res = lshr(1) * RHS;
1980   Overflow = Res.isNegative();
1981   Res <<= 1;
1982   if ((*this)[0]) {
1983     Res += RHS;
1984     if (Res.ult(RHS))
1985       Overflow = true;
1986   }
1987   return Res;
1988 }
1989 
1990 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1991   Overflow = ShAmt.uge(getBitWidth());
1992   if (Overflow)
1993     return APInt(BitWidth, 0);
1994 
1995   if (isNonNegative()) // Don't allow sign change.
1996     Overflow = ShAmt.uge(countLeadingZeros());
1997   else
1998     Overflow = ShAmt.uge(countLeadingOnes());
1999 
2000   return *this << ShAmt;
2001 }
2002 
2003 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2004   Overflow = ShAmt.uge(getBitWidth());
2005   if (Overflow)
2006     return APInt(BitWidth, 0);
2007 
2008   Overflow = ShAmt.ugt(countLeadingZeros());
2009 
2010   return *this << ShAmt;
2011 }
2012 
2013 APInt APInt::sadd_sat(const APInt &RHS) const {
2014   bool Overflow;
2015   APInt Res = sadd_ov(RHS, Overflow);
2016   if (!Overflow)
2017     return Res;
2018 
2019   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2020                       : APInt::getSignedMaxValue(BitWidth);
2021 }
2022 
2023 APInt APInt::uadd_sat(const APInt &RHS) const {
2024   bool Overflow;
2025   APInt Res = uadd_ov(RHS, Overflow);
2026   if (!Overflow)
2027     return Res;
2028 
2029   return APInt::getMaxValue(BitWidth);
2030 }
2031 
2032 APInt APInt::ssub_sat(const APInt &RHS) const {
2033   bool Overflow;
2034   APInt Res = ssub_ov(RHS, Overflow);
2035   if (!Overflow)
2036     return Res;
2037 
2038   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2039                       : APInt::getSignedMaxValue(BitWidth);
2040 }
2041 
2042 APInt APInt::usub_sat(const APInt &RHS) const {
2043   bool Overflow;
2044   APInt Res = usub_ov(RHS, Overflow);
2045   if (!Overflow)
2046     return Res;
2047 
2048   return APInt(BitWidth, 0);
2049 }
2050 
2051 APInt APInt::smul_sat(const APInt &RHS) const {
2052   bool Overflow;
2053   APInt Res = smul_ov(RHS, Overflow);
2054   if (!Overflow)
2055     return Res;
2056 
2057   // The result is negative if one and only one of inputs is negative.
2058   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2059 
2060   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2061                        : APInt::getSignedMaxValue(BitWidth);
2062 }
2063 
2064 APInt APInt::umul_sat(const APInt &RHS) const {
2065   bool Overflow;
2066   APInt Res = umul_ov(RHS, Overflow);
2067   if (!Overflow)
2068     return Res;
2069 
2070   return APInt::getMaxValue(BitWidth);
2071 }
2072 
2073 APInt APInt::sshl_sat(const APInt &RHS) const {
2074   bool Overflow;
2075   APInt Res = sshl_ov(RHS, Overflow);
2076   if (!Overflow)
2077     return Res;
2078 
2079   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2080                       : APInt::getSignedMaxValue(BitWidth);
2081 }
2082 
2083 APInt APInt::ushl_sat(const APInt &RHS) const {
2084   bool Overflow;
2085   APInt Res = ushl_ov(RHS, Overflow);
2086   if (!Overflow)
2087     return Res;
2088 
2089   return APInt::getMaxValue(BitWidth);
2090 }
2091 
2092 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2093   // Check our assumptions here
2094   assert(!str.empty() && "Invalid string length");
2095   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2096           radix == 36) &&
2097          "Radix should be 2, 8, 10, 16, or 36!");
2098 
2099   StringRef::iterator p = str.begin();
2100   size_t slen = str.size();
2101   bool isNeg = *p == '-';
2102   if (*p == '-' || *p == '+') {
2103     p++;
2104     slen--;
2105     assert(slen && "String is only a sign, needs a value.");
2106   }
2107   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2108   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2109   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2110   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2111          "Insufficient bit width");
2112 
2113   // Allocate memory if needed
2114   if (isSingleWord())
2115     U.VAL = 0;
2116   else
2117     U.pVal = getClearedMemory(getNumWords());
2118 
2119   // Figure out if we can shift instead of multiply
2120   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2121 
2122   // Enter digit traversal loop
2123   for (StringRef::iterator e = str.end(); p != e; ++p) {
2124     unsigned digit = getDigit(*p, radix);
2125     assert(digit < radix && "Invalid character in digit string");
2126 
2127     // Shift or multiply the value by the radix
2128     if (slen > 1) {
2129       if (shift)
2130         *this <<= shift;
2131       else
2132         *this *= radix;
2133     }
2134 
2135     // Add in the digit we just interpreted
2136     *this += digit;
2137   }
2138   // If its negative, put it in two's complement form
2139   if (isNeg)
2140     this->negate();
2141 }
2142 
2143 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2144                      bool Signed, bool formatAsCLiteral) const {
2145   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2146           Radix == 36) &&
2147          "Radix should be 2, 8, 10, 16, or 36!");
2148 
2149   const char *Prefix = "";
2150   if (formatAsCLiteral) {
2151     switch (Radix) {
2152       case 2:
2153         // Binary literals are a non-standard extension added in gcc 4.3:
2154         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2155         Prefix = "0b";
2156         break;
2157       case 8:
2158         Prefix = "0";
2159         break;
2160       case 10:
2161         break; // No prefix
2162       case 16:
2163         Prefix = "0x";
2164         break;
2165       default:
2166         llvm_unreachable("Invalid radix!");
2167     }
2168   }
2169 
2170   // First, check for a zero value and just short circuit the logic below.
2171   if (*this == 0) {
2172     while (*Prefix) {
2173       Str.push_back(*Prefix);
2174       ++Prefix;
2175     };
2176     Str.push_back('0');
2177     return;
2178   }
2179 
2180   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2181 
2182   if (isSingleWord()) {
2183     char Buffer[65];
2184     char *BufPtr = std::end(Buffer);
2185 
2186     uint64_t N;
2187     if (!Signed) {
2188       N = getZExtValue();
2189     } else {
2190       int64_t I = getSExtValue();
2191       if (I >= 0) {
2192         N = I;
2193       } else {
2194         Str.push_back('-');
2195         N = -(uint64_t)I;
2196       }
2197     }
2198 
2199     while (*Prefix) {
2200       Str.push_back(*Prefix);
2201       ++Prefix;
2202     };
2203 
2204     while (N) {
2205       *--BufPtr = Digits[N % Radix];
2206       N /= Radix;
2207     }
2208     Str.append(BufPtr, std::end(Buffer));
2209     return;
2210   }
2211 
2212   APInt Tmp(*this);
2213 
2214   if (Signed && isNegative()) {
2215     // They want to print the signed version and it is a negative value
2216     // Flip the bits and add one to turn it into the equivalent positive
2217     // value and put a '-' in the result.
2218     Tmp.negate();
2219     Str.push_back('-');
2220   }
2221 
2222   while (*Prefix) {
2223     Str.push_back(*Prefix);
2224     ++Prefix;
2225   };
2226 
2227   // We insert the digits backward, then reverse them to get the right order.
2228   unsigned StartDig = Str.size();
2229 
2230   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2231   // because the number of bits per digit (1, 3 and 4 respectively) divides
2232   // equally.  We just shift until the value is zero.
2233   if (Radix == 2 || Radix == 8 || Radix == 16) {
2234     // Just shift tmp right for each digit width until it becomes zero
2235     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2236     unsigned MaskAmt = Radix - 1;
2237 
2238     while (Tmp.getBoolValue()) {
2239       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2240       Str.push_back(Digits[Digit]);
2241       Tmp.lshrInPlace(ShiftAmt);
2242     }
2243   } else {
2244     while (Tmp.getBoolValue()) {
2245       uint64_t Digit;
2246       udivrem(Tmp, Radix, Tmp, Digit);
2247       assert(Digit < Radix && "divide failed");
2248       Str.push_back(Digits[Digit]);
2249     }
2250   }
2251 
2252   // Reverse the digits before returning.
2253   std::reverse(Str.begin()+StartDig, Str.end());
2254 }
2255 
2256 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2257 /// It is better to pass in a SmallVector/SmallString to the methods above.
2258 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2259   SmallString<40> S;
2260   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2261   return S.str();
2262 }
2263 
2264 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2265 LLVM_DUMP_METHOD void APInt::dump() const {
2266   SmallString<40> S, U;
2267   this->toStringUnsigned(U);
2268   this->toStringSigned(S);
2269   dbgs() << "APInt(" << BitWidth << "b, "
2270          << U << "u " << S << "s)\n";
2271 }
2272 #endif
2273 
2274 void APInt::print(raw_ostream &OS, bool isSigned) const {
2275   SmallString<40> S;
2276   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2277   OS << S;
2278 }
2279 
2280 // This implements a variety of operations on a representation of
2281 // arbitrary precision, two's-complement, bignum integer values.
2282 
2283 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2284 // and unrestricting assumption.
2285 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2286               "Part width must be divisible by 2!");
2287 
2288 /* Some handy functions local to this file.  */
2289 
2290 /* Returns the integer part with the least significant BITS set.
2291    BITS cannot be zero.  */
2292 static inline APInt::WordType lowBitMask(unsigned bits) {
2293   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2294 
2295   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2296 }
2297 
2298 /* Returns the value of the lower half of PART.  */
2299 static inline APInt::WordType lowHalf(APInt::WordType part) {
2300   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2301 }
2302 
2303 /* Returns the value of the upper half of PART.  */
2304 static inline APInt::WordType highHalf(APInt::WordType part) {
2305   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2306 }
2307 
2308 /* Returns the bit number of the most significant set bit of a part.
2309    If the input number has no bits set -1U is returned.  */
2310 static unsigned partMSB(APInt::WordType value) {
2311   return findLastSet(value, ZB_Max);
2312 }
2313 
2314 /* Returns the bit number of the least significant set bit of a
2315    part.  If the input number has no bits set -1U is returned.  */
2316 static unsigned partLSB(APInt::WordType value) {
2317   return findFirstSet(value, ZB_Max);
2318 }
2319 
2320 /* Sets the least significant part of a bignum to the input value, and
2321    zeroes out higher parts.  */
2322 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2323   assert(parts > 0);
2324 
2325   dst[0] = part;
2326   for (unsigned i = 1; i < parts; i++)
2327     dst[i] = 0;
2328 }
2329 
2330 /* Assign one bignum to another.  */
2331 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2332   for (unsigned i = 0; i < parts; i++)
2333     dst[i] = src[i];
2334 }
2335 
2336 /* Returns true if a bignum is zero, false otherwise.  */
2337 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2338   for (unsigned i = 0; i < parts; i++)
2339     if (src[i])
2340       return false;
2341 
2342   return true;
2343 }
2344 
2345 /* Extract the given bit of a bignum; returns 0 or 1.  */
2346 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2347   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2348 }
2349 
2350 /* Set the given bit of a bignum. */
2351 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2352   parts[whichWord(bit)] |= maskBit(bit);
2353 }
2354 
2355 /* Clears the given bit of a bignum. */
2356 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2357   parts[whichWord(bit)] &= ~maskBit(bit);
2358 }
2359 
2360 /* Returns the bit number of the least significant set bit of a
2361    number.  If the input number has no bits set -1U is returned.  */
2362 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2363   for (unsigned i = 0; i < n; i++) {
2364     if (parts[i] != 0) {
2365       unsigned lsb = partLSB(parts[i]);
2366 
2367       return lsb + i * APINT_BITS_PER_WORD;
2368     }
2369   }
2370 
2371   return -1U;
2372 }
2373 
2374 /* Returns the bit number of the most significant set bit of a number.
2375    If the input number has no bits set -1U is returned.  */
2376 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2377   do {
2378     --n;
2379 
2380     if (parts[n] != 0) {
2381       unsigned msb = partMSB(parts[n]);
2382 
2383       return msb + n * APINT_BITS_PER_WORD;
2384     }
2385   } while (n);
2386 
2387   return -1U;
2388 }
2389 
2390 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2391    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2392    the least significant bit of DST.  All high bits above srcBITS in
2393    DST are zero-filled.  */
2394 void
2395 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2396                  unsigned srcBits, unsigned srcLSB) {
2397   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2398   assert(dstParts <= dstCount);
2399 
2400   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2401   tcAssign (dst, src + firstSrcPart, dstParts);
2402 
2403   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2404   tcShiftRight (dst, dstParts, shift);
2405 
2406   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2407      in DST.  If this is less that srcBits, append the rest, else
2408      clear the high bits.  */
2409   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2410   if (n < srcBits) {
2411     WordType mask = lowBitMask (srcBits - n);
2412     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2413                           << n % APINT_BITS_PER_WORD);
2414   } else if (n > srcBits) {
2415     if (srcBits % APINT_BITS_PER_WORD)
2416       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2417   }
2418 
2419   /* Clear high parts.  */
2420   while (dstParts < dstCount)
2421     dst[dstParts++] = 0;
2422 }
2423 
2424 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2425 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2426                              WordType c, unsigned parts) {
2427   assert(c <= 1);
2428 
2429   for (unsigned i = 0; i < parts; i++) {
2430     WordType l = dst[i];
2431     if (c) {
2432       dst[i] += rhs[i] + 1;
2433       c = (dst[i] <= l);
2434     } else {
2435       dst[i] += rhs[i];
2436       c = (dst[i] < l);
2437     }
2438   }
2439 
2440   return c;
2441 }
2442 
2443 /// This function adds a single "word" integer, src, to the multiple
2444 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2445 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2446 /// @returns the carry of the addition.
2447 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2448                                  unsigned parts) {
2449   for (unsigned i = 0; i < parts; ++i) {
2450     dst[i] += src;
2451     if (dst[i] >= src)
2452       return 0; // No need to carry so exit early.
2453     src = 1; // Carry one to next digit.
2454   }
2455 
2456   return 1;
2457 }
2458 
2459 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2460 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2461                                   WordType c, unsigned parts) {
2462   assert(c <= 1);
2463 
2464   for (unsigned i = 0; i < parts; i++) {
2465     WordType l = dst[i];
2466     if (c) {
2467       dst[i] -= rhs[i] + 1;
2468       c = (dst[i] >= l);
2469     } else {
2470       dst[i] -= rhs[i];
2471       c = (dst[i] > l);
2472     }
2473   }
2474 
2475   return c;
2476 }
2477 
2478 /// This function subtracts a single "word" (64-bit word), src, from
2479 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2480 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2481 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2482 /// exhausted. In other words, if src > dst then this function returns 1,
2483 /// otherwise 0.
2484 /// @returns the borrow out of the subtraction
2485 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2486                                       unsigned parts) {
2487   for (unsigned i = 0; i < parts; ++i) {
2488     WordType Dst = dst[i];
2489     dst[i] -= src;
2490     if (src <= Dst)
2491       return 0; // No need to borrow so exit early.
2492     src = 1; // We have to "borrow 1" from next "word"
2493   }
2494 
2495   return 1;
2496 }
2497 
2498 /* Negate a bignum in-place.  */
2499 void APInt::tcNegate(WordType *dst, unsigned parts) {
2500   tcComplement(dst, parts);
2501   tcIncrement(dst, parts);
2502 }
2503 
2504 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2505     DST  = SRC * MULTIPLIER + CARRY   if add is false
2506 
2507     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2508     they must start at the same point, i.e. DST == SRC.
2509 
2510     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2511     returned.  Otherwise DST is filled with the least significant
2512     DSTPARTS parts of the result, and if all of the omitted higher
2513     parts were zero return zero, otherwise overflow occurred and
2514     return one.  */
2515 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2516                           WordType multiplier, WordType carry,
2517                           unsigned srcParts, unsigned dstParts,
2518                           bool add) {
2519   /* Otherwise our writes of DST kill our later reads of SRC.  */
2520   assert(dst <= src || dst >= src + srcParts);
2521   assert(dstParts <= srcParts + 1);
2522 
2523   /* N loops; minimum of dstParts and srcParts.  */
2524   unsigned n = std::min(dstParts, srcParts);
2525 
2526   for (unsigned i = 0; i < n; i++) {
2527     WordType low, mid, high, srcPart;
2528 
2529       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2530 
2531          This cannot overflow, because
2532 
2533          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2534 
2535          which is less than n^2.  */
2536 
2537     srcPart = src[i];
2538 
2539     if (multiplier == 0 || srcPart == 0) {
2540       low = carry;
2541       high = 0;
2542     } else {
2543       low = lowHalf(srcPart) * lowHalf(multiplier);
2544       high = highHalf(srcPart) * highHalf(multiplier);
2545 
2546       mid = lowHalf(srcPart) * highHalf(multiplier);
2547       high += highHalf(mid);
2548       mid <<= APINT_BITS_PER_WORD / 2;
2549       if (low + mid < low)
2550         high++;
2551       low += mid;
2552 
2553       mid = highHalf(srcPart) * lowHalf(multiplier);
2554       high += highHalf(mid);
2555       mid <<= APINT_BITS_PER_WORD / 2;
2556       if (low + mid < low)
2557         high++;
2558       low += mid;
2559 
2560       /* Now add carry.  */
2561       if (low + carry < low)
2562         high++;
2563       low += carry;
2564     }
2565 
2566     if (add) {
2567       /* And now DST[i], and store the new low part there.  */
2568       if (low + dst[i] < low)
2569         high++;
2570       dst[i] += low;
2571     } else
2572       dst[i] = low;
2573 
2574     carry = high;
2575   }
2576 
2577   if (srcParts < dstParts) {
2578     /* Full multiplication, there is no overflow.  */
2579     assert(srcParts + 1 == dstParts);
2580     dst[srcParts] = carry;
2581     return 0;
2582   }
2583 
2584   /* We overflowed if there is carry.  */
2585   if (carry)
2586     return 1;
2587 
2588   /* We would overflow if any significant unwritten parts would be
2589      non-zero.  This is true if any remaining src parts are non-zero
2590      and the multiplier is non-zero.  */
2591   if (multiplier)
2592     for (unsigned i = dstParts; i < srcParts; i++)
2593       if (src[i])
2594         return 1;
2595 
2596   /* We fitted in the narrow destination.  */
2597   return 0;
2598 }
2599 
2600 /* DST = LHS * RHS, where DST has the same width as the operands and
2601    is filled with the least significant parts of the result.  Returns
2602    one if overflow occurred, otherwise zero.  DST must be disjoint
2603    from both operands.  */
2604 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2605                       const WordType *rhs, unsigned parts) {
2606   assert(dst != lhs && dst != rhs);
2607 
2608   int overflow = 0;
2609   tcSet(dst, 0, parts);
2610 
2611   for (unsigned i = 0; i < parts; i++)
2612     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2613                                parts - i, true);
2614 
2615   return overflow;
2616 }
2617 
2618 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2619 /// operands. No overflow occurs. DST must be disjoint from both operands.
2620 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2621                            const WordType *rhs, unsigned lhsParts,
2622                            unsigned rhsParts) {
2623   /* Put the narrower number on the LHS for less loops below.  */
2624   if (lhsParts > rhsParts)
2625     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2626 
2627   assert(dst != lhs && dst != rhs);
2628 
2629   tcSet(dst, 0, rhsParts);
2630 
2631   for (unsigned i = 0; i < lhsParts; i++)
2632     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2633 }
2634 
2635 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2636    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2637    set REMAINDER to the remainder, return zero.  i.e.
2638 
2639    OLD_LHS = RHS * LHS + REMAINDER
2640 
2641    SCRATCH is a bignum of the same size as the operands and result for
2642    use by the routine; its contents need not be initialized and are
2643    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2644 */
2645 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2646                     WordType *remainder, WordType *srhs,
2647                     unsigned parts) {
2648   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2649 
2650   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2651   if (shiftCount == 0)
2652     return true;
2653 
2654   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2655   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2656   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2657 
2658   tcAssign(srhs, rhs, parts);
2659   tcShiftLeft(srhs, parts, shiftCount);
2660   tcAssign(remainder, lhs, parts);
2661   tcSet(lhs, 0, parts);
2662 
2663   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2664      the total.  */
2665   for (;;) {
2666     int compare = tcCompare(remainder, srhs, parts);
2667     if (compare >= 0) {
2668       tcSubtract(remainder, srhs, 0, parts);
2669       lhs[n] |= mask;
2670     }
2671 
2672     if (shiftCount == 0)
2673       break;
2674     shiftCount--;
2675     tcShiftRight(srhs, parts, 1);
2676     if ((mask >>= 1) == 0) {
2677       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2678       n--;
2679     }
2680   }
2681 
2682   return false;
2683 }
2684 
2685 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2686 /// no restrictions on Count.
2687 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2688   // Don't bother performing a no-op shift.
2689   if (!Count)
2690     return;
2691 
2692   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2693   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2694   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2695 
2696   // Fastpath for moving by whole words.
2697   if (BitShift == 0) {
2698     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2699   } else {
2700     while (Words-- > WordShift) {
2701       Dst[Words] = Dst[Words - WordShift] << BitShift;
2702       if (Words > WordShift)
2703         Dst[Words] |=
2704           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2705     }
2706   }
2707 
2708   // Fill in the remainder with 0s.
2709   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2710 }
2711 
2712 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2713 /// are no restrictions on Count.
2714 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2715   // Don't bother performing a no-op shift.
2716   if (!Count)
2717     return;
2718 
2719   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2720   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2721   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2722 
2723   unsigned WordsToMove = Words - WordShift;
2724   // Fastpath for moving by whole words.
2725   if (BitShift == 0) {
2726     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2727   } else {
2728     for (unsigned i = 0; i != WordsToMove; ++i) {
2729       Dst[i] = Dst[i + WordShift] >> BitShift;
2730       if (i + 1 != WordsToMove)
2731         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2732     }
2733   }
2734 
2735   // Fill in the remainder with 0s.
2736   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2737 }
2738 
2739 /* Bitwise and of two bignums.  */
2740 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2741   for (unsigned i = 0; i < parts; i++)
2742     dst[i] &= rhs[i];
2743 }
2744 
2745 /* Bitwise inclusive or of two bignums.  */
2746 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2747   for (unsigned i = 0; i < parts; i++)
2748     dst[i] |= rhs[i];
2749 }
2750 
2751 /* Bitwise exclusive or of two bignums.  */
2752 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2753   for (unsigned i = 0; i < parts; i++)
2754     dst[i] ^= rhs[i];
2755 }
2756 
2757 /* Complement a bignum in-place.  */
2758 void APInt::tcComplement(WordType *dst, unsigned parts) {
2759   for (unsigned i = 0; i < parts; i++)
2760     dst[i] = ~dst[i];
2761 }
2762 
2763 /* Comparison (unsigned) of two bignums.  */
2764 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2765                      unsigned parts) {
2766   while (parts) {
2767     parts--;
2768     if (lhs[parts] != rhs[parts])
2769       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2770   }
2771 
2772   return 0;
2773 }
2774 
2775 /* Set the least significant BITS bits of a bignum, clear the
2776    rest.  */
2777 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2778                                       unsigned bits) {
2779   unsigned i = 0;
2780   while (bits > APINT_BITS_PER_WORD) {
2781     dst[i++] = ~(WordType) 0;
2782     bits -= APINT_BITS_PER_WORD;
2783   }
2784 
2785   if (bits)
2786     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2787 
2788   while (i < parts)
2789     dst[i++] = 0;
2790 }
2791 
2792 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2793                                    APInt::Rounding RM) {
2794   // Currently udivrem always rounds down.
2795   switch (RM) {
2796   case APInt::Rounding::DOWN:
2797   case APInt::Rounding::TOWARD_ZERO:
2798     return A.udiv(B);
2799   case APInt::Rounding::UP: {
2800     APInt Quo, Rem;
2801     APInt::udivrem(A, B, Quo, Rem);
2802     if (Rem == 0)
2803       return Quo;
2804     return Quo + 1;
2805   }
2806   }
2807   llvm_unreachable("Unknown APInt::Rounding enum");
2808 }
2809 
2810 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2811                                    APInt::Rounding RM) {
2812   switch (RM) {
2813   case APInt::Rounding::DOWN:
2814   case APInt::Rounding::UP: {
2815     APInt Quo, Rem;
2816     APInt::sdivrem(A, B, Quo, Rem);
2817     if (Rem == 0)
2818       return Quo;
2819     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2820     // We want to check whether the non-integer part of the mathematical value
2821     // is negative or not. If the non-integer part is negative, we need to round
2822     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2823     // already rounded down.
2824     if (RM == APInt::Rounding::DOWN) {
2825       if (Rem.isNegative() != B.isNegative())
2826         return Quo - 1;
2827       return Quo;
2828     }
2829     if (Rem.isNegative() != B.isNegative())
2830       return Quo;
2831     return Quo + 1;
2832   }
2833   // Currently sdiv rounds twards zero.
2834   case APInt::Rounding::TOWARD_ZERO:
2835     return A.sdiv(B);
2836   }
2837   llvm_unreachable("Unknown APInt::Rounding enum");
2838 }
2839 
2840 Optional<APInt>
2841 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2842                                            unsigned RangeWidth) {
2843   unsigned CoeffWidth = A.getBitWidth();
2844   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2845   assert(RangeWidth <= CoeffWidth &&
2846          "Value range width should be less than coefficient width");
2847   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2848 
2849   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2850                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2851 
2852   // Identify 0 as a (non)solution immediately.
2853   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2854     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2855     return APInt(CoeffWidth, 0);
2856   }
2857 
2858   // The result of APInt arithmetic has the same bit width as the operands,
2859   // so it can actually lose high bits. A product of two n-bit integers needs
2860   // 2n-1 bits to represent the full value.
2861   // The operation done below (on quadratic coefficients) that can produce
2862   // the largest value is the evaluation of the equation during bisection,
2863   // which needs 3 times the bitwidth of the coefficient, so the total number
2864   // of required bits is 3n.
2865   //
2866   // The purpose of this extension is to simulate the set Z of all integers,
2867   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2868   // and negative numbers (not so much in a modulo arithmetic). The method
2869   // used to solve the equation is based on the standard formula for real
2870   // numbers, and uses the concepts of "positive" and "negative" with their
2871   // usual meanings.
2872   CoeffWidth *= 3;
2873   A = A.sext(CoeffWidth);
2874   B = B.sext(CoeffWidth);
2875   C = C.sext(CoeffWidth);
2876 
2877   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2878   // the bit width has increased.
2879   if (A.isNegative()) {
2880     A.negate();
2881     B.negate();
2882     C.negate();
2883   }
2884 
2885   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2886   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2887   // and R = 2^BitWidth.
2888   // Since we're trying not only to find exact solutions, but also values
2889   // that "wrap around", such a set will always have a solution, i.e. an x
2890   // that satisfies at least one of the equations, or such that |q(x)|
2891   // exceeds kR, while |q(x-1)| for the same k does not.
2892   //
2893   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2894   // positive solution n (in the above sense), and also such that the n
2895   // will be the least among all solutions corresponding to k = 0, 1, ...
2896   // (more precisely, the least element in the set
2897   //   { n(k) | k is such that a solution n(k) exists }).
2898   //
2899   // Consider the parabola (over real numbers) that corresponds to the
2900   // quadratic equation. Since A > 0, the arms of the parabola will point
2901   // up. Picking different values of k will shift it up and down by R.
2902   //
2903   // We want to shift the parabola in such a way as to reduce the problem
2904   // of solving q(x) = kR to solving shifted_q(x) = 0.
2905   // (The interesting solutions are the ceilings of the real number
2906   // solutions.)
2907   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2908   APInt TwoA = 2 * A;
2909   APInt SqrB = B * B;
2910   bool PickLow;
2911 
2912   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2913     assert(A.isStrictlyPositive());
2914     APInt T = V.abs().urem(A);
2915     if (T.isNullValue())
2916       return V;
2917     return V.isNegative() ? V+T : V+(A-T);
2918   };
2919 
2920   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2921   // iff B is positive.
2922   if (B.isNonNegative()) {
2923     // If B >= 0, the vertex it at a negative location (or at 0), so in
2924     // order to have a non-negative solution we need to pick k that makes
2925     // C-kR negative. To satisfy all the requirements for the solution
2926     // that we are looking for, it needs to be closest to 0 of all k.
2927     C = C.srem(R);
2928     if (C.isStrictlyPositive())
2929       C -= R;
2930     // Pick the greater solution.
2931     PickLow = false;
2932   } else {
2933     // If B < 0, the vertex is at a positive location. For any solution
2934     // to exist, the discriminant must be non-negative. This means that
2935     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2936     // lower bound on values of k: kR >= C - B^2/4A.
2937     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2938     // Round LowkR up (towards +inf) to the nearest kR.
2939     LowkR = RoundUp(LowkR, R);
2940 
2941     // If there exists k meeting the condition above, and such that
2942     // C-kR > 0, there will be two positive real number solutions of
2943     // q(x) = kR. Out of all such values of k, pick the one that makes
2944     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2945     // In other words, find maximum k such that LowkR <= kR < C.
2946     if (C.sgt(LowkR)) {
2947       // If LowkR < C, then such a k is guaranteed to exist because
2948       // LowkR itself is a multiple of R.
2949       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2950       // Pick the smaller solution.
2951       PickLow = true;
2952     } else {
2953       // If C-kR < 0 for all potential k's, it means that one solution
2954       // will be negative, while the other will be positive. The positive
2955       // solution will shift towards 0 if the parabola is moved up.
2956       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2957       // to 0, or in other words, out of all parabolas that have solutions,
2958       // pick the one that is the farthest "up").
2959       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2960       C -= LowkR;
2961       // Pick the greater solution.
2962       PickLow = false;
2963     }
2964   }
2965 
2966   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2967                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2968 
2969   APInt D = SqrB - 4*A*C;
2970   assert(D.isNonNegative() && "Negative discriminant");
2971   APInt SQ = D.sqrt();
2972 
2973   APInt Q = SQ * SQ;
2974   bool InexactSQ = Q != D;
2975   // The calculated SQ may actually be greater than the exact (non-integer)
2976   // value. If that's the case, decremement SQ to get a value that is lower.
2977   if (Q.sgt(D))
2978     SQ -= 1;
2979 
2980   APInt X;
2981   APInt Rem;
2982 
2983   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2984   // When using the quadratic formula directly, the calculated low root
2985   // may be greater than the exact one, since we would be subtracting SQ.
2986   // To make sure that the calculated root is not greater than the exact
2987   // one, subtract SQ+1 when calculating the low root (for inexact value
2988   // of SQ).
2989   if (PickLow)
2990     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2991   else
2992     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2993 
2994   // The updated coefficients should be such that the (exact) solution is
2995   // positive. Since APInt division rounds towards 0, the calculated one
2996   // can be 0, but cannot be negative.
2997   assert(X.isNonNegative() && "Solution should be non-negative");
2998 
2999   if (!InexactSQ && Rem.isNullValue()) {
3000     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3001     return X;
3002   }
3003 
3004   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3005   // The exact value of the square root of D should be between SQ and SQ+1.
3006   // This implies that the solution should be between that corresponding to
3007   // SQ (i.e. X) and that corresponding to SQ+1.
3008   //
3009   // The calculated X cannot be greater than the exact (real) solution.
3010   // Actually it must be strictly less than the exact solution, while
3011   // X+1 will be greater than or equal to it.
3012 
3013   APInt VX = (A*X + B)*X + C;
3014   APInt VY = VX + TwoA*X + A + B;
3015   bool SignChange = VX.isNegative() != VY.isNegative() ||
3016                     VX.isNullValue() != VY.isNullValue();
3017   // If the sign did not change between X and X+1, X is not a valid solution.
3018   // This could happen when the actual (exact) roots don't have an integer
3019   // between them, so they would both be contained between X and X+1.
3020   if (!SignChange) {
3021     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3022     return None;
3023   }
3024 
3025   X += 1;
3026   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3027   return X;
3028 }
3029 
3030 Optional<unsigned>
3031 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3032   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3033   if (A == B)
3034     return llvm::None;
3035   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3036 }
3037 
3038 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3039 /// with the integer held in IntVal.
3040 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3041                             unsigned StoreBytes) {
3042   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3043   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3044 
3045   if (sys::IsLittleEndianHost) {
3046     // Little-endian host - the source is ordered from LSB to MSB.  Order the
3047     // destination from LSB to MSB: Do a straight copy.
3048     memcpy(Dst, Src, StoreBytes);
3049   } else {
3050     // Big-endian host - the source is an array of 64 bit words ordered from
3051     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3052     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3053     while (StoreBytes > sizeof(uint64_t)) {
3054       StoreBytes -= sizeof(uint64_t);
3055       // May not be aligned so use memcpy.
3056       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3057       Src += sizeof(uint64_t);
3058     }
3059 
3060     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3061   }
3062 }
3063 
3064 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3065 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3066 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
3067   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3068   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3069                    const_cast<uint64_t *>(IntVal.getRawData()));
3070 
3071   if (sys::IsLittleEndianHost)
3072     // Little-endian host - the destination must be ordered from LSB to MSB.
3073     // The source is ordered from LSB to MSB: Do a straight copy.
3074     memcpy(Dst, Src, LoadBytes);
3075   else {
3076     // Big-endian - the destination is an array of 64 bit words ordered from
3077     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3078     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3079     // a word.
3080     while (LoadBytes > sizeof(uint64_t)) {
3081       LoadBytes -= sizeof(uint64_t);
3082       // May not be aligned so use memcpy.
3083       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3084       Dst += sizeof(uint64_t);
3085     }
3086 
3087     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3088   }
3089 }
3090