1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APInt to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
405   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
406   subBits &= maskBits;
407   if (isSingleWord()) {
408     U.VAL &= ~(maskBits << bitPosition);
409     U.VAL |= subBits << bitPosition;
410     return;
411   }
412 
413   unsigned loBit = whichBit(bitPosition);
414   unsigned loWord = whichWord(bitPosition);
415   unsigned hiWord = whichWord(bitPosition + numBits - 1);
416   if (loWord == hiWord) {
417     U.pVal[loWord] &= ~(maskBits << loBit);
418     U.pVal[loWord] |= subBits << loBit;
419     return;
420   }
421 
422   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
423   unsigned wordBits = 8 * sizeof(WordType);
424   U.pVal[loWord] &= ~(maskBits << loBit);
425   U.pVal[loWord] |= subBits << loBit;
426 
427   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
428   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
429 }
430 
431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
432   assert(numBits > 0 && "Can't extract zero bits");
433   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
434          "Illegal bit extraction");
435 
436   if (isSingleWord())
437     return APInt(numBits, U.VAL >> bitPosition);
438 
439   unsigned loBit = whichBit(bitPosition);
440   unsigned loWord = whichWord(bitPosition);
441   unsigned hiWord = whichWord(bitPosition + numBits - 1);
442 
443   // Single word result extracting bits from a single word source.
444   if (loWord == hiWord)
445     return APInt(numBits, U.pVal[loWord] >> loBit);
446 
447   // Extracting bits that start on a source word boundary can be done
448   // as a fast memory copy.
449   if (loBit == 0)
450     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
451 
452   // General case - shift + copy source words directly into place.
453   APInt Result(numBits, 0);
454   unsigned NumSrcWords = getNumWords();
455   unsigned NumDstWords = Result.getNumWords();
456 
457   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
458   for (unsigned word = 0; word < NumDstWords; ++word) {
459     uint64_t w0 = U.pVal[loWord + word];
460     uint64_t w1 =
461         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
462     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
463   }
464 
465   return Result.clearUnusedBits();
466 }
467 
468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
469                                        unsigned bitPosition) const {
470   assert(numBits > 0 && "Can't extract zero bits");
471   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
472          "Illegal bit extraction");
473   assert(numBits <= 64 && "Illegal bit extraction");
474 
475   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
476   if (isSingleWord())
477     return (U.VAL >> bitPosition) & maskBits;
478 
479   unsigned loBit = whichBit(bitPosition);
480   unsigned loWord = whichWord(bitPosition);
481   unsigned hiWord = whichWord(bitPosition + numBits - 1);
482   if (loWord == hiWord)
483     return (U.pVal[loWord] >> loBit) & maskBits;
484 
485   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
486   unsigned wordBits = 8 * sizeof(WordType);
487   uint64_t retBits = U.pVal[loWord] >> loBit;
488   retBits |= U.pVal[hiWord] << (wordBits - loBit);
489   retBits &= maskBits;
490   return retBits;
491 }
492 
493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
494   assert(!str.empty() && "Invalid string length");
495   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
496           radix == 36) &&
497          "Radix should be 2, 8, 10, 16, or 36!");
498 
499   size_t slen = str.size();
500 
501   // Each computation below needs to know if it's negative.
502   StringRef::iterator p = str.begin();
503   unsigned isNegative = *p == '-';
504   if (*p == '-' || *p == '+') {
505     p++;
506     slen--;
507     assert(slen && "String is only a sign, needs a value.");
508   }
509 
510   // For radixes of power-of-two values, the bits required is accurately and
511   // easily computed
512   if (radix == 2)
513     return slen + isNegative;
514   if (radix == 8)
515     return slen * 3 + isNegative;
516   if (radix == 16)
517     return slen * 4 + isNegative;
518 
519   // FIXME: base 36
520 
521   // This is grossly inefficient but accurate. We could probably do something
522   // with a computation of roughly slen*64/20 and then adjust by the value of
523   // the first few digits. But, I'm not sure how accurate that could be.
524 
525   // Compute a sufficient number of bits that is always large enough but might
526   // be too large. This avoids the assertion in the constructor. This
527   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
528   // bits in that case.
529   unsigned sufficient
530     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
531                  : (slen == 1 ? 7 : slen * 16/3);
532 
533   // Convert to the actual binary value.
534   APInt tmp(sufficient, StringRef(p, slen), radix);
535 
536   // Compute how many bits are required. If the log is infinite, assume we need
537   // just bit. If the log is exact and value is negative, then the value is
538   // MinSignedValue with (log + 1) bits.
539   unsigned log = tmp.logBase2();
540   if (log == (unsigned)-1) {
541     return isNegative + 1;
542   } else if (isNegative && tmp.isPowerOf2()) {
543     return isNegative + log;
544   } else {
545     return isNegative + log + 1;
546   }
547 }
548 
549 hash_code llvm::hash_value(const APInt &Arg) {
550   if (Arg.isSingleWord())
551     return hash_combine(Arg.U.VAL);
552 
553   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
554 }
555 
556 bool APInt::isSplat(unsigned SplatSizeInBits) const {
557   assert(getBitWidth() % SplatSizeInBits == 0 &&
558          "SplatSizeInBits must divide width!");
559   // We can check that all parts of an integer are equal by making use of a
560   // little trick: rotate and check if it's still the same value.
561   return *this == rotl(SplatSizeInBits);
562 }
563 
564 /// This function returns the high "numBits" bits of this APInt.
565 APInt APInt::getHiBits(unsigned numBits) const {
566   return this->lshr(BitWidth - numBits);
567 }
568 
569 /// This function returns the low "numBits" bits of this APInt.
570 APInt APInt::getLoBits(unsigned numBits) const {
571   APInt Result(getLowBitsSet(BitWidth, numBits));
572   Result &= *this;
573   return Result;
574 }
575 
576 /// Return a value containing V broadcasted over NewLen bits.
577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
578   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
579 
580   APInt Val = V.zextOrSelf(NewLen);
581   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
582     Val |= Val << I;
583 
584   return Val;
585 }
586 
587 unsigned APInt::countLeadingZerosSlowCase() const {
588   unsigned Count = 0;
589   for (int i = getNumWords()-1; i >= 0; --i) {
590     uint64_t V = U.pVal[i];
591     if (V == 0)
592       Count += APINT_BITS_PER_WORD;
593     else {
594       Count += llvm::countLeadingZeros(V);
595       break;
596     }
597   }
598   // Adjust for unused bits in the most significant word (they are zero).
599   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
600   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
601   return Count;
602 }
603 
604 unsigned APInt::countLeadingOnesSlowCase() const {
605   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
606   unsigned shift;
607   if (!highWordBits) {
608     highWordBits = APINT_BITS_PER_WORD;
609     shift = 0;
610   } else {
611     shift = APINT_BITS_PER_WORD - highWordBits;
612   }
613   int i = getNumWords() - 1;
614   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
615   if (Count == highWordBits) {
616     for (i--; i >= 0; --i) {
617       if (U.pVal[i] == WORDTYPE_MAX)
618         Count += APINT_BITS_PER_WORD;
619       else {
620         Count += llvm::countLeadingOnes(U.pVal[i]);
621         break;
622       }
623     }
624   }
625   return Count;
626 }
627 
628 unsigned APInt::countTrailingZerosSlowCase() const {
629   unsigned Count = 0;
630   unsigned i = 0;
631   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
632     Count += APINT_BITS_PER_WORD;
633   if (i < getNumWords())
634     Count += llvm::countTrailingZeros(U.pVal[i]);
635   return std::min(Count, BitWidth);
636 }
637 
638 unsigned APInt::countTrailingOnesSlowCase() const {
639   unsigned Count = 0;
640   unsigned i = 0;
641   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
642     Count += APINT_BITS_PER_WORD;
643   if (i < getNumWords())
644     Count += llvm::countTrailingOnes(U.pVal[i]);
645   assert(Count <= BitWidth);
646   return Count;
647 }
648 
649 unsigned APInt::countPopulationSlowCase() const {
650   unsigned Count = 0;
651   for (unsigned i = 0; i < getNumWords(); ++i)
652     Count += llvm::countPopulation(U.pVal[i]);
653   return Count;
654 }
655 
656 bool APInt::intersectsSlowCase(const APInt &RHS) const {
657   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
658     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
659       return true;
660 
661   return false;
662 }
663 
664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
665   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
666     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
667       return false;
668 
669   return true;
670 }
671 
672 APInt APInt::byteSwap() const {
673   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
674   if (BitWidth == 16)
675     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
676   if (BitWidth == 32)
677     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
678   if (BitWidth <= 64) {
679     uint64_t Tmp1 = ByteSwap_64(U.VAL);
680     Tmp1 >>= (64 - BitWidth);
681     return APInt(BitWidth, Tmp1);
682   }
683 
684   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
685   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
686     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
687   if (Result.BitWidth != BitWidth) {
688     Result.lshrInPlace(Result.BitWidth - BitWidth);
689     Result.BitWidth = BitWidth;
690   }
691   return Result;
692 }
693 
694 APInt APInt::reverseBits() const {
695   switch (BitWidth) {
696   case 64:
697     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
698   case 32:
699     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
700   case 16:
701     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
702   case 8:
703     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
704   default:
705     break;
706   }
707 
708   APInt Val(*this);
709   APInt Reversed(BitWidth, 0);
710   unsigned S = BitWidth;
711 
712   for (; Val != 0; Val.lshrInPlace(1)) {
713     Reversed <<= 1;
714     Reversed |= Val[0];
715     --S;
716   }
717 
718   Reversed <<= S;
719   return Reversed;
720 }
721 
722 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
723   // Fast-path a common case.
724   if (A == B) return A;
725 
726   // Corner cases: if either operand is zero, the other is the gcd.
727   if (!A) return B;
728   if (!B) return A;
729 
730   // Count common powers of 2 and remove all other powers of 2.
731   unsigned Pow2;
732   {
733     unsigned Pow2_A = A.countTrailingZeros();
734     unsigned Pow2_B = B.countTrailingZeros();
735     if (Pow2_A > Pow2_B) {
736       A.lshrInPlace(Pow2_A - Pow2_B);
737       Pow2 = Pow2_B;
738     } else if (Pow2_B > Pow2_A) {
739       B.lshrInPlace(Pow2_B - Pow2_A);
740       Pow2 = Pow2_A;
741     } else {
742       Pow2 = Pow2_A;
743     }
744   }
745 
746   // Both operands are odd multiples of 2^Pow_2:
747   //
748   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
749   //
750   // This is a modified version of Stein's algorithm, taking advantage of
751   // efficient countTrailingZeros().
752   while (A != B) {
753     if (A.ugt(B)) {
754       A -= B;
755       A.lshrInPlace(A.countTrailingZeros() - Pow2);
756     } else {
757       B -= A;
758       B.lshrInPlace(B.countTrailingZeros() - Pow2);
759     }
760   }
761 
762   return A;
763 }
764 
765 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
766   uint64_t I = bit_cast<uint64_t>(Double);
767 
768   // Get the sign bit from the highest order bit
769   bool isNeg = I >> 63;
770 
771   // Get the 11-bit exponent and adjust for the 1023 bit bias
772   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
773 
774   // If the exponent is negative, the value is < 0 so just return 0.
775   if (exp < 0)
776     return APInt(width, 0u);
777 
778   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
779   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
780 
781   // If the exponent doesn't shift all bits out of the mantissa
782   if (exp < 52)
783     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
784                     APInt(width, mantissa >> (52 - exp));
785 
786   // If the client didn't provide enough bits for us to shift the mantissa into
787   // then the result is undefined, just return 0
788   if (width <= exp - 52)
789     return APInt(width, 0);
790 
791   // Otherwise, we have to shift the mantissa bits up to the right location
792   APInt Tmp(width, mantissa);
793   Tmp <<= (unsigned)exp - 52;
794   return isNeg ? -Tmp : Tmp;
795 }
796 
797 /// This function converts this APInt to a double.
798 /// The layout for double is as following (IEEE Standard 754):
799 ///  --------------------------------------
800 /// |  Sign    Exponent    Fraction    Bias |
801 /// |-------------------------------------- |
802 /// |  1[63]   11[62-52]   52[51-00]   1023 |
803 ///  --------------------------------------
804 double APInt::roundToDouble(bool isSigned) const {
805 
806   // Handle the simple case where the value is contained in one uint64_t.
807   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
808   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
809     if (isSigned) {
810       int64_t sext = SignExtend64(getWord(0), BitWidth);
811       return double(sext);
812     } else
813       return double(getWord(0));
814   }
815 
816   // Determine if the value is negative.
817   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
818 
819   // Construct the absolute value if we're negative.
820   APInt Tmp(isNeg ? -(*this) : (*this));
821 
822   // Figure out how many bits we're using.
823   unsigned n = Tmp.getActiveBits();
824 
825   // The exponent (without bias normalization) is just the number of bits
826   // we are using. Note that the sign bit is gone since we constructed the
827   // absolute value.
828   uint64_t exp = n;
829 
830   // Return infinity for exponent overflow
831   if (exp > 1023) {
832     if (!isSigned || !isNeg)
833       return std::numeric_limits<double>::infinity();
834     else
835       return -std::numeric_limits<double>::infinity();
836   }
837   exp += 1023; // Increment for 1023 bias
838 
839   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
840   // extract the high 52 bits from the correct words in pVal.
841   uint64_t mantissa;
842   unsigned hiWord = whichWord(n-1);
843   if (hiWord == 0) {
844     mantissa = Tmp.U.pVal[0];
845     if (n > 52)
846       mantissa >>= n - 52; // shift down, we want the top 52 bits.
847   } else {
848     assert(hiWord > 0 && "huh?");
849     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
850     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
851     mantissa = hibits | lobits;
852   }
853 
854   // The leading bit of mantissa is implicit, so get rid of it.
855   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
856   uint64_t I = sign | (exp << 52) | mantissa;
857   return bit_cast<double>(I);
858 }
859 
860 // Truncate to new width.
861 APInt APInt::trunc(unsigned width) const {
862   assert(width < BitWidth && "Invalid APInt Truncate request");
863   assert(width && "Can't truncate to 0 bits");
864 
865   if (width <= APINT_BITS_PER_WORD)
866     return APInt(width, getRawData()[0]);
867 
868   APInt Result(getMemory(getNumWords(width)), width);
869 
870   // Copy full words.
871   unsigned i;
872   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
873     Result.U.pVal[i] = U.pVal[i];
874 
875   // Truncate and copy any partial word.
876   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
877   if (bits != 0)
878     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
879 
880   return Result;
881 }
882 
883 // Truncate to new width with unsigned saturation.
884 APInt APInt::truncUSat(unsigned width) const {
885   assert(width < BitWidth && "Invalid APInt Truncate request");
886   assert(width && "Can't truncate to 0 bits");
887 
888   // Can we just losslessly truncate it?
889   if (isIntN(width))
890     return trunc(width);
891   // If not, then just return the new limit.
892   return APInt::getMaxValue(width);
893 }
894 
895 // Truncate to new width with signed saturation.
896 APInt APInt::truncSSat(unsigned width) const {
897   assert(width < BitWidth && "Invalid APInt Truncate request");
898   assert(width && "Can't truncate to 0 bits");
899 
900   // Can we just losslessly truncate it?
901   if (isSignedIntN(width))
902     return trunc(width);
903   // If not, then just return the new limits.
904   return isNegative() ? APInt::getSignedMinValue(width)
905                       : APInt::getSignedMaxValue(width);
906 }
907 
908 // Sign extend to a new width.
909 APInt APInt::sext(unsigned Width) const {
910   assert(Width > BitWidth && "Invalid APInt SignExtend request");
911 
912   if (Width <= APINT_BITS_PER_WORD)
913     return APInt(Width, SignExtend64(U.VAL, BitWidth));
914 
915   APInt Result(getMemory(getNumWords(Width)), Width);
916 
917   // Copy words.
918   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
919 
920   // Sign extend the last word since there may be unused bits in the input.
921   Result.U.pVal[getNumWords() - 1] =
922       SignExtend64(Result.U.pVal[getNumWords() - 1],
923                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
924 
925   // Fill with sign bits.
926   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
927               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
928   Result.clearUnusedBits();
929   return Result;
930 }
931 
932 //  Zero extend to a new width.
933 APInt APInt::zext(unsigned width) const {
934   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
935 
936   if (width <= APINT_BITS_PER_WORD)
937     return APInt(width, U.VAL);
938 
939   APInt Result(getMemory(getNumWords(width)), width);
940 
941   // Copy words.
942   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
943 
944   // Zero remaining words.
945   std::memset(Result.U.pVal + getNumWords(), 0,
946               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
947 
948   return Result;
949 }
950 
951 APInt APInt::zextOrTrunc(unsigned width) const {
952   if (BitWidth < width)
953     return zext(width);
954   if (BitWidth > width)
955     return trunc(width);
956   return *this;
957 }
958 
959 APInt APInt::sextOrTrunc(unsigned width) const {
960   if (BitWidth < width)
961     return sext(width);
962   if (BitWidth > width)
963     return trunc(width);
964   return *this;
965 }
966 
967 APInt APInt::zextOrSelf(unsigned width) const {
968   if (BitWidth < width)
969     return zext(width);
970   return *this;
971 }
972 
973 APInt APInt::sextOrSelf(unsigned width) const {
974   if (BitWidth < width)
975     return sext(width);
976   return *this;
977 }
978 
979 /// Arithmetic right-shift this APInt by shiftAmt.
980 /// Arithmetic right-shift function.
981 void APInt::ashrInPlace(const APInt &shiftAmt) {
982   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
983 }
984 
985 /// Arithmetic right-shift this APInt by shiftAmt.
986 /// Arithmetic right-shift function.
987 void APInt::ashrSlowCase(unsigned ShiftAmt) {
988   // Don't bother performing a no-op shift.
989   if (!ShiftAmt)
990     return;
991 
992   // Save the original sign bit for later.
993   bool Negative = isNegative();
994 
995   // WordShift is the inter-part shift; BitShift is intra-part shift.
996   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
997   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
998 
999   unsigned WordsToMove = getNumWords() - WordShift;
1000   if (WordsToMove != 0) {
1001     // Sign extend the last word to fill in the unused bits.
1002     U.pVal[getNumWords() - 1] = SignExtend64(
1003         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1004 
1005     // Fastpath for moving by whole words.
1006     if (BitShift == 0) {
1007       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1008     } else {
1009       // Move the words containing significant bits.
1010       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1011         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1012                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1013 
1014       // Handle the last word which has no high bits to copy.
1015       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1016       // Sign extend one more time.
1017       U.pVal[WordsToMove - 1] =
1018           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1019     }
1020   }
1021 
1022   // Fill in the remainder based on the original sign.
1023   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1024               WordShift * APINT_WORD_SIZE);
1025   clearUnusedBits();
1026 }
1027 
1028 /// Logical right-shift this APInt by shiftAmt.
1029 /// Logical right-shift function.
1030 void APInt::lshrInPlace(const APInt &shiftAmt) {
1031   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1032 }
1033 
1034 /// Logical right-shift this APInt by shiftAmt.
1035 /// Logical right-shift function.
1036 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1037   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1038 }
1039 
1040 /// Left-shift this APInt by shiftAmt.
1041 /// Left-shift function.
1042 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1043   // It's undefined behavior in C to shift by BitWidth or greater.
1044   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1045   return *this;
1046 }
1047 
1048 void APInt::shlSlowCase(unsigned ShiftAmt) {
1049   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1050   clearUnusedBits();
1051 }
1052 
1053 // Calculate the rotate amount modulo the bit width.
1054 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1055   unsigned rotBitWidth = rotateAmt.getBitWidth();
1056   APInt rot = rotateAmt;
1057   if (rotBitWidth < BitWidth) {
1058     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1059     // e.g. APInt(1, 32) would give APInt(1, 0).
1060     rot = rotateAmt.zext(BitWidth);
1061   }
1062   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1063   return rot.getLimitedValue(BitWidth);
1064 }
1065 
1066 APInt APInt::rotl(const APInt &rotateAmt) const {
1067   return rotl(rotateModulo(BitWidth, rotateAmt));
1068 }
1069 
1070 APInt APInt::rotl(unsigned rotateAmt) const {
1071   rotateAmt %= BitWidth;
1072   if (rotateAmt == 0)
1073     return *this;
1074   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1075 }
1076 
1077 APInt APInt::rotr(const APInt &rotateAmt) const {
1078   return rotr(rotateModulo(BitWidth, rotateAmt));
1079 }
1080 
1081 APInt APInt::rotr(unsigned rotateAmt) const {
1082   rotateAmt %= BitWidth;
1083   if (rotateAmt == 0)
1084     return *this;
1085   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1086 }
1087 
1088 // Square Root - this method computes and returns the square root of "this".
1089 // Three mechanisms are used for computation. For small values (<= 5 bits),
1090 // a table lookup is done. This gets some performance for common cases. For
1091 // values using less than 52 bits, the value is converted to double and then
1092 // the libc sqrt function is called. The result is rounded and then converted
1093 // back to a uint64_t which is then used to construct the result. Finally,
1094 // the Babylonian method for computing square roots is used.
1095 APInt APInt::sqrt() const {
1096 
1097   // Determine the magnitude of the value.
1098   unsigned magnitude = getActiveBits();
1099 
1100   // Use a fast table for some small values. This also gets rid of some
1101   // rounding errors in libc sqrt for small values.
1102   if (magnitude <= 5) {
1103     static const uint8_t results[32] = {
1104       /*     0 */ 0,
1105       /*  1- 2 */ 1, 1,
1106       /*  3- 6 */ 2, 2, 2, 2,
1107       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1108       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1109       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1110       /*    31 */ 6
1111     };
1112     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1113   }
1114 
1115   // If the magnitude of the value fits in less than 52 bits (the precision of
1116   // an IEEE double precision floating point value), then we can use the
1117   // libc sqrt function which will probably use a hardware sqrt computation.
1118   // This should be faster than the algorithm below.
1119   if (magnitude < 52) {
1120     return APInt(BitWidth,
1121                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1122                                                                : U.pVal[0])))));
1123   }
1124 
1125   // Okay, all the short cuts are exhausted. We must compute it. The following
1126   // is a classical Babylonian method for computing the square root. This code
1127   // was adapted to APInt from a wikipedia article on such computations.
1128   // See http://www.wikipedia.org/ and go to the page named
1129   // Calculate_an_integer_square_root.
1130   unsigned nbits = BitWidth, i = 4;
1131   APInt testy(BitWidth, 16);
1132   APInt x_old(BitWidth, 1);
1133   APInt x_new(BitWidth, 0);
1134   APInt two(BitWidth, 2);
1135 
1136   // Select a good starting value using binary logarithms.
1137   for (;; i += 2, testy = testy.shl(2))
1138     if (i >= nbits || this->ule(testy)) {
1139       x_old = x_old.shl(i / 2);
1140       break;
1141     }
1142 
1143   // Use the Babylonian method to arrive at the integer square root:
1144   for (;;) {
1145     x_new = (this->udiv(x_old) + x_old).udiv(two);
1146     if (x_old.ule(x_new))
1147       break;
1148     x_old = x_new;
1149   }
1150 
1151   // Make sure we return the closest approximation
1152   // NOTE: The rounding calculation below is correct. It will produce an
1153   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1154   // determined to be a rounding issue with pari/gp as it begins to use a
1155   // floating point representation after 192 bits. There are no discrepancies
1156   // between this algorithm and pari/gp for bit widths < 192 bits.
1157   APInt square(x_old * x_old);
1158   APInt nextSquare((x_old + 1) * (x_old +1));
1159   if (this->ult(square))
1160     return x_old;
1161   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1162   APInt midpoint((nextSquare - square).udiv(two));
1163   APInt offset(*this - square);
1164   if (offset.ult(midpoint))
1165     return x_old;
1166   return x_old + 1;
1167 }
1168 
1169 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1170 /// iterative extended Euclidean algorithm is used to solve for this value,
1171 /// however we simplify it to speed up calculating only the inverse, and take
1172 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1173 /// (potentially large) APInts around.
1174 /// WARNING: a value of '0' may be returned,
1175 ///          signifying that no multiplicative inverse exists!
1176 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1177   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1178 
1179   // Using the properties listed at the following web page (accessed 06/21/08):
1180   //   http://www.numbertheory.org/php/euclid.html
1181   // (especially the properties numbered 3, 4 and 9) it can be proved that
1182   // BitWidth bits suffice for all the computations in the algorithm implemented
1183   // below. More precisely, this number of bits suffice if the multiplicative
1184   // inverse exists, but may not suffice for the general extended Euclidean
1185   // algorithm.
1186 
1187   APInt r[2] = { modulo, *this };
1188   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1189   APInt q(BitWidth, 0);
1190 
1191   unsigned i;
1192   for (i = 0; r[i^1] != 0; i ^= 1) {
1193     // An overview of the math without the confusing bit-flipping:
1194     // q = r[i-2] / r[i-1]
1195     // r[i] = r[i-2] % r[i-1]
1196     // t[i] = t[i-2] - t[i-1] * q
1197     udivrem(r[i], r[i^1], q, r[i]);
1198     t[i] -= t[i^1] * q;
1199   }
1200 
1201   // If this APInt and the modulo are not coprime, there is no multiplicative
1202   // inverse, so return 0. We check this by looking at the next-to-last
1203   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1204   // algorithm.
1205   if (r[i] != 1)
1206     return APInt(BitWidth, 0);
1207 
1208   // The next-to-last t is the multiplicative inverse.  However, we are
1209   // interested in a positive inverse. Calculate a positive one from a negative
1210   // one if necessary. A simple addition of the modulo suffices because
1211   // abs(t[i]) is known to be less than *this/2 (see the link above).
1212   if (t[i].isNegative())
1213     t[i] += modulo;
1214 
1215   return std::move(t[i]);
1216 }
1217 
1218 /// Calculate the magic numbers required to implement a signed integer division
1219 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1220 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1221 /// Warren, Jr., chapter 10.
1222 APInt::ms APInt::magic() const {
1223   const APInt& d = *this;
1224   unsigned p;
1225   APInt ad, anc, delta, q1, r1, q2, r2, t;
1226   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1227   struct ms mag;
1228 
1229   ad = d.abs();
1230   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1231   anc = t - 1 - t.urem(ad);   // absolute value of nc
1232   p = d.getBitWidth() - 1;    // initialize p
1233   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1234   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1235   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1236   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1237   do {
1238     p = p + 1;
1239     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1240     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1241     if (r1.uge(anc)) {  // must be unsigned comparison
1242       q1 = q1 + 1;
1243       r1 = r1 - anc;
1244     }
1245     q2 = q2<<1;          // update q2 = 2p/abs(d)
1246     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1247     if (r2.uge(ad)) {   // must be unsigned comparison
1248       q2 = q2 + 1;
1249       r2 = r2 - ad;
1250     }
1251     delta = ad - r2;
1252   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1253 
1254   mag.m = q2 + 1;
1255   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1256   mag.s = p - d.getBitWidth();          // resulting shift
1257   return mag;
1258 }
1259 
1260 /// Calculate the magic numbers required to implement an unsigned integer
1261 /// division by a constant as a sequence of multiplies, adds and shifts.
1262 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1263 /// S. Warren, Jr., chapter 10.
1264 /// LeadingZeros can be used to simplify the calculation if the upper bits
1265 /// of the divided value are known zero.
1266 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1267   const APInt& d = *this;
1268   unsigned p;
1269   APInt nc, delta, q1, r1, q2, r2;
1270   struct mu magu;
1271   magu.a = 0;               // initialize "add" indicator
1272   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1273   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1274   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1275 
1276   nc = allOnes - (allOnes - d).urem(d);
1277   p = d.getBitWidth() - 1;  // initialize p
1278   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1279   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1280   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1281   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1282   do {
1283     p = p + 1;
1284     if (r1.uge(nc - r1)) {
1285       q1 = q1 + q1 + 1;  // update q1
1286       r1 = r1 + r1 - nc; // update r1
1287     }
1288     else {
1289       q1 = q1+q1; // update q1
1290       r1 = r1+r1; // update r1
1291     }
1292     if ((r2 + 1).uge(d - r2)) {
1293       if (q2.uge(signedMax)) magu.a = 1;
1294       q2 = q2+q2 + 1;     // update q2
1295       r2 = r2+r2 + 1 - d; // update r2
1296     }
1297     else {
1298       if (q2.uge(signedMin)) magu.a = 1;
1299       q2 = q2+q2;     // update q2
1300       r2 = r2+r2 + 1; // update r2
1301     }
1302     delta = d - 1 - r2;
1303   } while (p < d.getBitWidth()*2 &&
1304            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1305   magu.m = q2 + 1; // resulting magic number
1306   magu.s = p - d.getBitWidth();  // resulting shift
1307   return magu;
1308 }
1309 
1310 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1311 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1312 /// variables here have the same names as in the algorithm. Comments explain
1313 /// the algorithm and any deviation from it.
1314 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1315                      unsigned m, unsigned n) {
1316   assert(u && "Must provide dividend");
1317   assert(v && "Must provide divisor");
1318   assert(q && "Must provide quotient");
1319   assert(u != v && u != q && v != q && "Must use different memory");
1320   assert(n>1 && "n must be > 1");
1321 
1322   // b denotes the base of the number system. In our case b is 2^32.
1323   const uint64_t b = uint64_t(1) << 32;
1324 
1325 // The DEBUG macros here tend to be spam in the debug output if you're not
1326 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1327 #ifdef KNUTH_DEBUG
1328 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1329 #else
1330 #define DEBUG_KNUTH(X) do {} while(false)
1331 #endif
1332 
1333   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1334   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1335   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1336   DEBUG_KNUTH(dbgs() << " by");
1337   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1338   DEBUG_KNUTH(dbgs() << '\n');
1339   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1340   // u and v by d. Note that we have taken Knuth's advice here to use a power
1341   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1342   // 2 allows us to shift instead of multiply and it is easy to determine the
1343   // shift amount from the leading zeros.  We are basically normalizing the u
1344   // and v so that its high bits are shifted to the top of v's range without
1345   // overflow. Note that this can require an extra word in u so that u must
1346   // be of length m+n+1.
1347   unsigned shift = countLeadingZeros(v[n-1]);
1348   uint32_t v_carry = 0;
1349   uint32_t u_carry = 0;
1350   if (shift) {
1351     for (unsigned i = 0; i < m+n; ++i) {
1352       uint32_t u_tmp = u[i] >> (32 - shift);
1353       u[i] = (u[i] << shift) | u_carry;
1354       u_carry = u_tmp;
1355     }
1356     for (unsigned i = 0; i < n; ++i) {
1357       uint32_t v_tmp = v[i] >> (32 - shift);
1358       v[i] = (v[i] << shift) | v_carry;
1359       v_carry = v_tmp;
1360     }
1361   }
1362   u[m+n] = u_carry;
1363 
1364   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1365   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1366   DEBUG_KNUTH(dbgs() << " by");
1367   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1368   DEBUG_KNUTH(dbgs() << '\n');
1369 
1370   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1371   int j = m;
1372   do {
1373     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1374     // D3. [Calculate q'.].
1375     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1376     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1377     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1378     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1379     // on v[n-2] determines at high speed most of the cases in which the trial
1380     // value qp is one too large, and it eliminates all cases where qp is two
1381     // too large.
1382     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1383     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1384     uint64_t qp = dividend / v[n-1];
1385     uint64_t rp = dividend % v[n-1];
1386     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1387       qp--;
1388       rp += v[n-1];
1389       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1390         qp--;
1391     }
1392     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1393 
1394     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1395     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1396     // consists of a simple multiplication by a one-place number, combined with
1397     // a subtraction.
1398     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1399     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1400     // true value plus b**(n+1), namely as the b's complement of
1401     // the true value, and a "borrow" to the left should be remembered.
1402     int64_t borrow = 0;
1403     for (unsigned i = 0; i < n; ++i) {
1404       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1405       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1406       u[j+i] = Lo_32(subres);
1407       borrow = Hi_32(p) - Hi_32(subres);
1408       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1409                         << ", borrow = " << borrow << '\n');
1410     }
1411     bool isNeg = u[j+n] < borrow;
1412     u[j+n] -= Lo_32(borrow);
1413 
1414     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1415     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1416     DEBUG_KNUTH(dbgs() << '\n');
1417 
1418     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1419     // negative, go to step D6; otherwise go on to step D7.
1420     q[j] = Lo_32(qp);
1421     if (isNeg) {
1422       // D6. [Add back]. The probability that this step is necessary is very
1423       // small, on the order of only 2/b. Make sure that test data accounts for
1424       // this possibility. Decrease q[j] by 1
1425       q[j]--;
1426       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1427       // A carry will occur to the left of u[j+n], and it should be ignored
1428       // since it cancels with the borrow that occurred in D4.
1429       bool carry = false;
1430       for (unsigned i = 0; i < n; i++) {
1431         uint32_t limit = std::min(u[j+i],v[i]);
1432         u[j+i] += v[i] + carry;
1433         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1434       }
1435       u[j+n] += carry;
1436     }
1437     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1438     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1439     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1440 
1441     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1442   } while (--j >= 0);
1443 
1444   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1445   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1446   DEBUG_KNUTH(dbgs() << '\n');
1447 
1448   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1449   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1450   // compute the remainder (urem uses this).
1451   if (r) {
1452     // The value d is expressed by the "shift" value above since we avoided
1453     // multiplication by d by using a shift left. So, all we have to do is
1454     // shift right here.
1455     if (shift) {
1456       uint32_t carry = 0;
1457       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1458       for (int i = n-1; i >= 0; i--) {
1459         r[i] = (u[i] >> shift) | carry;
1460         carry = u[i] << (32 - shift);
1461         DEBUG_KNUTH(dbgs() << " " << r[i]);
1462       }
1463     } else {
1464       for (int i = n-1; i >= 0; i--) {
1465         r[i] = u[i];
1466         DEBUG_KNUTH(dbgs() << " " << r[i]);
1467       }
1468     }
1469     DEBUG_KNUTH(dbgs() << '\n');
1470   }
1471   DEBUG_KNUTH(dbgs() << '\n');
1472 }
1473 
1474 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1475                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1476   assert(lhsWords >= rhsWords && "Fractional result");
1477 
1478   // First, compose the values into an array of 32-bit words instead of
1479   // 64-bit words. This is a necessity of both the "short division" algorithm
1480   // and the Knuth "classical algorithm" which requires there to be native
1481   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1482   // can't use 64-bit operands here because we don't have native results of
1483   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1484   // work on large-endian machines.
1485   unsigned n = rhsWords * 2;
1486   unsigned m = (lhsWords * 2) - n;
1487 
1488   // Allocate space for the temporary values we need either on the stack, if
1489   // it will fit, or on the heap if it won't.
1490   uint32_t SPACE[128];
1491   uint32_t *U = nullptr;
1492   uint32_t *V = nullptr;
1493   uint32_t *Q = nullptr;
1494   uint32_t *R = nullptr;
1495   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1496     U = &SPACE[0];
1497     V = &SPACE[m+n+1];
1498     Q = &SPACE[(m+n+1) + n];
1499     if (Remainder)
1500       R = &SPACE[(m+n+1) + n + (m+n)];
1501   } else {
1502     U = new uint32_t[m + n + 1];
1503     V = new uint32_t[n];
1504     Q = new uint32_t[m+n];
1505     if (Remainder)
1506       R = new uint32_t[n];
1507   }
1508 
1509   // Initialize the dividend
1510   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1511   for (unsigned i = 0; i < lhsWords; ++i) {
1512     uint64_t tmp = LHS[i];
1513     U[i * 2] = Lo_32(tmp);
1514     U[i * 2 + 1] = Hi_32(tmp);
1515   }
1516   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1517 
1518   // Initialize the divisor
1519   memset(V, 0, (n)*sizeof(uint32_t));
1520   for (unsigned i = 0; i < rhsWords; ++i) {
1521     uint64_t tmp = RHS[i];
1522     V[i * 2] = Lo_32(tmp);
1523     V[i * 2 + 1] = Hi_32(tmp);
1524   }
1525 
1526   // initialize the quotient and remainder
1527   memset(Q, 0, (m+n) * sizeof(uint32_t));
1528   if (Remainder)
1529     memset(R, 0, n * sizeof(uint32_t));
1530 
1531   // Now, adjust m and n for the Knuth division. n is the number of words in
1532   // the divisor. m is the number of words by which the dividend exceeds the
1533   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1534   // contain any zero words or the Knuth algorithm fails.
1535   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1536     n--;
1537     m++;
1538   }
1539   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1540     m--;
1541 
1542   // If we're left with only a single word for the divisor, Knuth doesn't work
1543   // so we implement the short division algorithm here. This is much simpler
1544   // and faster because we are certain that we can divide a 64-bit quantity
1545   // by a 32-bit quantity at hardware speed and short division is simply a
1546   // series of such operations. This is just like doing short division but we
1547   // are using base 2^32 instead of base 10.
1548   assert(n != 0 && "Divide by zero?");
1549   if (n == 1) {
1550     uint32_t divisor = V[0];
1551     uint32_t remainder = 0;
1552     for (int i = m; i >= 0; i--) {
1553       uint64_t partial_dividend = Make_64(remainder, U[i]);
1554       if (partial_dividend == 0) {
1555         Q[i] = 0;
1556         remainder = 0;
1557       } else if (partial_dividend < divisor) {
1558         Q[i] = 0;
1559         remainder = Lo_32(partial_dividend);
1560       } else if (partial_dividend == divisor) {
1561         Q[i] = 1;
1562         remainder = 0;
1563       } else {
1564         Q[i] = Lo_32(partial_dividend / divisor);
1565         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1566       }
1567     }
1568     if (R)
1569       R[0] = remainder;
1570   } else {
1571     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1572     // case n > 1.
1573     KnuthDiv(U, V, Q, R, m, n);
1574   }
1575 
1576   // If the caller wants the quotient
1577   if (Quotient) {
1578     for (unsigned i = 0; i < lhsWords; ++i)
1579       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1580   }
1581 
1582   // If the caller wants the remainder
1583   if (Remainder) {
1584     for (unsigned i = 0; i < rhsWords; ++i)
1585       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1586   }
1587 
1588   // Clean up the memory we allocated.
1589   if (U != &SPACE[0]) {
1590     delete [] U;
1591     delete [] V;
1592     delete [] Q;
1593     delete [] R;
1594   }
1595 }
1596 
1597 APInt APInt::udiv(const APInt &RHS) const {
1598   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1599 
1600   // First, deal with the easy case
1601   if (isSingleWord()) {
1602     assert(RHS.U.VAL != 0 && "Divide by zero?");
1603     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1604   }
1605 
1606   // Get some facts about the LHS and RHS number of bits and words
1607   unsigned lhsWords = getNumWords(getActiveBits());
1608   unsigned rhsBits  = RHS.getActiveBits();
1609   unsigned rhsWords = getNumWords(rhsBits);
1610   assert(rhsWords && "Divided by zero???");
1611 
1612   // Deal with some degenerate cases
1613   if (!lhsWords)
1614     // 0 / X ===> 0
1615     return APInt(BitWidth, 0);
1616   if (rhsBits == 1)
1617     // X / 1 ===> X
1618     return *this;
1619   if (lhsWords < rhsWords || this->ult(RHS))
1620     // X / Y ===> 0, iff X < Y
1621     return APInt(BitWidth, 0);
1622   if (*this == RHS)
1623     // X / X ===> 1
1624     return APInt(BitWidth, 1);
1625   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1626     // All high words are zero, just use native divide
1627     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1628 
1629   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1630   APInt Quotient(BitWidth, 0); // to hold result.
1631   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1632   return Quotient;
1633 }
1634 
1635 APInt APInt::udiv(uint64_t RHS) const {
1636   assert(RHS != 0 && "Divide by zero?");
1637 
1638   // First, deal with the easy case
1639   if (isSingleWord())
1640     return APInt(BitWidth, U.VAL / RHS);
1641 
1642   // Get some facts about the LHS words.
1643   unsigned lhsWords = getNumWords(getActiveBits());
1644 
1645   // Deal with some degenerate cases
1646   if (!lhsWords)
1647     // 0 / X ===> 0
1648     return APInt(BitWidth, 0);
1649   if (RHS == 1)
1650     // X / 1 ===> X
1651     return *this;
1652   if (this->ult(RHS))
1653     // X / Y ===> 0, iff X < Y
1654     return APInt(BitWidth, 0);
1655   if (*this == RHS)
1656     // X / X ===> 1
1657     return APInt(BitWidth, 1);
1658   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1659     // All high words are zero, just use native divide
1660     return APInt(BitWidth, this->U.pVal[0] / RHS);
1661 
1662   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1663   APInt Quotient(BitWidth, 0); // to hold result.
1664   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1665   return Quotient;
1666 }
1667 
1668 APInt APInt::sdiv(const APInt &RHS) const {
1669   if (isNegative()) {
1670     if (RHS.isNegative())
1671       return (-(*this)).udiv(-RHS);
1672     return -((-(*this)).udiv(RHS));
1673   }
1674   if (RHS.isNegative())
1675     return -(this->udiv(-RHS));
1676   return this->udiv(RHS);
1677 }
1678 
1679 APInt APInt::sdiv(int64_t RHS) const {
1680   if (isNegative()) {
1681     if (RHS < 0)
1682       return (-(*this)).udiv(-RHS);
1683     return -((-(*this)).udiv(RHS));
1684   }
1685   if (RHS < 0)
1686     return -(this->udiv(-RHS));
1687   return this->udiv(RHS);
1688 }
1689 
1690 APInt APInt::urem(const APInt &RHS) const {
1691   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1692   if (isSingleWord()) {
1693     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1694     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1695   }
1696 
1697   // Get some facts about the LHS
1698   unsigned lhsWords = getNumWords(getActiveBits());
1699 
1700   // Get some facts about the RHS
1701   unsigned rhsBits = RHS.getActiveBits();
1702   unsigned rhsWords = getNumWords(rhsBits);
1703   assert(rhsWords && "Performing remainder operation by zero ???");
1704 
1705   // Check the degenerate cases
1706   if (lhsWords == 0)
1707     // 0 % Y ===> 0
1708     return APInt(BitWidth, 0);
1709   if (rhsBits == 1)
1710     // X % 1 ===> 0
1711     return APInt(BitWidth, 0);
1712   if (lhsWords < rhsWords || this->ult(RHS))
1713     // X % Y ===> X, iff X < Y
1714     return *this;
1715   if (*this == RHS)
1716     // X % X == 0;
1717     return APInt(BitWidth, 0);
1718   if (lhsWords == 1)
1719     // All high words are zero, just use native remainder
1720     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1721 
1722   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1723   APInt Remainder(BitWidth, 0);
1724   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1725   return Remainder;
1726 }
1727 
1728 uint64_t APInt::urem(uint64_t RHS) const {
1729   assert(RHS != 0 && "Remainder by zero?");
1730 
1731   if (isSingleWord())
1732     return U.VAL % RHS;
1733 
1734   // Get some facts about the LHS
1735   unsigned lhsWords = getNumWords(getActiveBits());
1736 
1737   // Check the degenerate cases
1738   if (lhsWords == 0)
1739     // 0 % Y ===> 0
1740     return 0;
1741   if (RHS == 1)
1742     // X % 1 ===> 0
1743     return 0;
1744   if (this->ult(RHS))
1745     // X % Y ===> X, iff X < Y
1746     return getZExtValue();
1747   if (*this == RHS)
1748     // X % X == 0;
1749     return 0;
1750   if (lhsWords == 1)
1751     // All high words are zero, just use native remainder
1752     return U.pVal[0] % RHS;
1753 
1754   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1755   uint64_t Remainder;
1756   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1757   return Remainder;
1758 }
1759 
1760 APInt APInt::srem(const APInt &RHS) const {
1761   if (isNegative()) {
1762     if (RHS.isNegative())
1763       return -((-(*this)).urem(-RHS));
1764     return -((-(*this)).urem(RHS));
1765   }
1766   if (RHS.isNegative())
1767     return this->urem(-RHS);
1768   return this->urem(RHS);
1769 }
1770 
1771 int64_t APInt::srem(int64_t RHS) const {
1772   if (isNegative()) {
1773     if (RHS < 0)
1774       return -((-(*this)).urem(-RHS));
1775     return -((-(*this)).urem(RHS));
1776   }
1777   if (RHS < 0)
1778     return this->urem(-RHS);
1779   return this->urem(RHS);
1780 }
1781 
1782 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1783                     APInt &Quotient, APInt &Remainder) {
1784   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1785   unsigned BitWidth = LHS.BitWidth;
1786 
1787   // First, deal with the easy case
1788   if (LHS.isSingleWord()) {
1789     assert(RHS.U.VAL != 0 && "Divide by zero?");
1790     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1791     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1792     Quotient = APInt(BitWidth, QuotVal);
1793     Remainder = APInt(BitWidth, RemVal);
1794     return;
1795   }
1796 
1797   // Get some size facts about the dividend and divisor
1798   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1799   unsigned rhsBits  = RHS.getActiveBits();
1800   unsigned rhsWords = getNumWords(rhsBits);
1801   assert(rhsWords && "Performing divrem operation by zero ???");
1802 
1803   // Check the degenerate cases
1804   if (lhsWords == 0) {
1805     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1806     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1807     return;
1808   }
1809 
1810   if (rhsBits == 1) {
1811     Quotient = LHS;                   // X / 1 ===> X
1812     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1813   }
1814 
1815   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1816     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1817     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1818     return;
1819   }
1820 
1821   if (LHS == RHS) {
1822     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1823     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1824     return;
1825   }
1826 
1827   // Make sure there is enough space to hold the results.
1828   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1829   // change the size. This is necessary if Quotient or Remainder is aliased
1830   // with LHS or RHS.
1831   Quotient.reallocate(BitWidth);
1832   Remainder.reallocate(BitWidth);
1833 
1834   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1835     // There is only one word to consider so use the native versions.
1836     uint64_t lhsValue = LHS.U.pVal[0];
1837     uint64_t rhsValue = RHS.U.pVal[0];
1838     Quotient = lhsValue / rhsValue;
1839     Remainder = lhsValue % rhsValue;
1840     return;
1841   }
1842 
1843   // Okay, lets do it the long way
1844   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1845          Remainder.U.pVal);
1846   // Clear the rest of the Quotient and Remainder.
1847   std::memset(Quotient.U.pVal + lhsWords, 0,
1848               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1849   std::memset(Remainder.U.pVal + rhsWords, 0,
1850               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1851 }
1852 
1853 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1854                     uint64_t &Remainder) {
1855   assert(RHS != 0 && "Divide by zero?");
1856   unsigned BitWidth = LHS.BitWidth;
1857 
1858   // First, deal with the easy case
1859   if (LHS.isSingleWord()) {
1860     uint64_t QuotVal = LHS.U.VAL / RHS;
1861     Remainder = LHS.U.VAL % RHS;
1862     Quotient = APInt(BitWidth, QuotVal);
1863     return;
1864   }
1865 
1866   // Get some size facts about the dividend and divisor
1867   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1868 
1869   // Check the degenerate cases
1870   if (lhsWords == 0) {
1871     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1872     Remainder = 0;                    // 0 % Y ===> 0
1873     return;
1874   }
1875 
1876   if (RHS == 1) {
1877     Quotient = LHS;                   // X / 1 ===> X
1878     Remainder = 0;                    // X % 1 ===> 0
1879     return;
1880   }
1881 
1882   if (LHS.ult(RHS)) {
1883     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1884     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1885     return;
1886   }
1887 
1888   if (LHS == RHS) {
1889     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1890     Remainder = 0;                    // X % X ===> 0;
1891     return;
1892   }
1893 
1894   // Make sure there is enough space to hold the results.
1895   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1896   // change the size. This is necessary if Quotient is aliased with LHS.
1897   Quotient.reallocate(BitWidth);
1898 
1899   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1900     // There is only one word to consider so use the native versions.
1901     uint64_t lhsValue = LHS.U.pVal[0];
1902     Quotient = lhsValue / RHS;
1903     Remainder = lhsValue % RHS;
1904     return;
1905   }
1906 
1907   // Okay, lets do it the long way
1908   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1909   // Clear the rest of the Quotient.
1910   std::memset(Quotient.U.pVal + lhsWords, 0,
1911               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1912 }
1913 
1914 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1915                     APInt &Quotient, APInt &Remainder) {
1916   if (LHS.isNegative()) {
1917     if (RHS.isNegative())
1918       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1919     else {
1920       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1921       Quotient.negate();
1922     }
1923     Remainder.negate();
1924   } else if (RHS.isNegative()) {
1925     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1926     Quotient.negate();
1927   } else {
1928     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1929   }
1930 }
1931 
1932 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1933                     APInt &Quotient, int64_t &Remainder) {
1934   uint64_t R = Remainder;
1935   if (LHS.isNegative()) {
1936     if (RHS < 0)
1937       APInt::udivrem(-LHS, -RHS, Quotient, R);
1938     else {
1939       APInt::udivrem(-LHS, RHS, Quotient, R);
1940       Quotient.negate();
1941     }
1942     R = -R;
1943   } else if (RHS < 0) {
1944     APInt::udivrem(LHS, -RHS, Quotient, R);
1945     Quotient.negate();
1946   } else {
1947     APInt::udivrem(LHS, RHS, Quotient, R);
1948   }
1949   Remainder = R;
1950 }
1951 
1952 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1953   APInt Res = *this+RHS;
1954   Overflow = isNonNegative() == RHS.isNonNegative() &&
1955              Res.isNonNegative() != isNonNegative();
1956   return Res;
1957 }
1958 
1959 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1960   APInt Res = *this+RHS;
1961   Overflow = Res.ult(RHS);
1962   return Res;
1963 }
1964 
1965 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1966   APInt Res = *this - RHS;
1967   Overflow = isNonNegative() != RHS.isNonNegative() &&
1968              Res.isNonNegative() != isNonNegative();
1969   return Res;
1970 }
1971 
1972 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1973   APInt Res = *this-RHS;
1974   Overflow = Res.ugt(*this);
1975   return Res;
1976 }
1977 
1978 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1979   // MININT/-1  -->  overflow.
1980   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1981   return sdiv(RHS);
1982 }
1983 
1984 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1985   APInt Res = *this * RHS;
1986 
1987   if (*this != 0 && RHS != 0)
1988     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1989   else
1990     Overflow = false;
1991   return Res;
1992 }
1993 
1994 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1995   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1996     Overflow = true;
1997     return *this * RHS;
1998   }
1999 
2000   APInt Res = lshr(1) * RHS;
2001   Overflow = Res.isNegative();
2002   Res <<= 1;
2003   if ((*this)[0]) {
2004     Res += RHS;
2005     if (Res.ult(RHS))
2006       Overflow = true;
2007   }
2008   return Res;
2009 }
2010 
2011 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2012   Overflow = ShAmt.uge(getBitWidth());
2013   if (Overflow)
2014     return APInt(BitWidth, 0);
2015 
2016   if (isNonNegative()) // Don't allow sign change.
2017     Overflow = ShAmt.uge(countLeadingZeros());
2018   else
2019     Overflow = ShAmt.uge(countLeadingOnes());
2020 
2021   return *this << ShAmt;
2022 }
2023 
2024 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2025   Overflow = ShAmt.uge(getBitWidth());
2026   if (Overflow)
2027     return APInt(BitWidth, 0);
2028 
2029   Overflow = ShAmt.ugt(countLeadingZeros());
2030 
2031   return *this << ShAmt;
2032 }
2033 
2034 APInt APInt::sadd_sat(const APInt &RHS) const {
2035   bool Overflow;
2036   APInt Res = sadd_ov(RHS, Overflow);
2037   if (!Overflow)
2038     return Res;
2039 
2040   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2041                       : APInt::getSignedMaxValue(BitWidth);
2042 }
2043 
2044 APInt APInt::uadd_sat(const APInt &RHS) const {
2045   bool Overflow;
2046   APInt Res = uadd_ov(RHS, Overflow);
2047   if (!Overflow)
2048     return Res;
2049 
2050   return APInt::getMaxValue(BitWidth);
2051 }
2052 
2053 APInt APInt::ssub_sat(const APInt &RHS) const {
2054   bool Overflow;
2055   APInt Res = ssub_ov(RHS, Overflow);
2056   if (!Overflow)
2057     return Res;
2058 
2059   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2060                       : APInt::getSignedMaxValue(BitWidth);
2061 }
2062 
2063 APInt APInt::usub_sat(const APInt &RHS) const {
2064   bool Overflow;
2065   APInt Res = usub_ov(RHS, Overflow);
2066   if (!Overflow)
2067     return Res;
2068 
2069   return APInt(BitWidth, 0);
2070 }
2071 
2072 APInt APInt::smul_sat(const APInt &RHS) const {
2073   bool Overflow;
2074   APInt Res = smul_ov(RHS, Overflow);
2075   if (!Overflow)
2076     return Res;
2077 
2078   // The result is negative if one and only one of inputs is negative.
2079   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2080 
2081   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2082                        : APInt::getSignedMaxValue(BitWidth);
2083 }
2084 
2085 APInt APInt::umul_sat(const APInt &RHS) const {
2086   bool Overflow;
2087   APInt Res = umul_ov(RHS, Overflow);
2088   if (!Overflow)
2089     return Res;
2090 
2091   return APInt::getMaxValue(BitWidth);
2092 }
2093 
2094 APInt APInt::sshl_sat(const APInt &RHS) const {
2095   bool Overflow;
2096   APInt Res = sshl_ov(RHS, Overflow);
2097   if (!Overflow)
2098     return Res;
2099 
2100   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2101                       : APInt::getSignedMaxValue(BitWidth);
2102 }
2103 
2104 APInt APInt::ushl_sat(const APInt &RHS) const {
2105   bool Overflow;
2106   APInt Res = ushl_ov(RHS, Overflow);
2107   if (!Overflow)
2108     return Res;
2109 
2110   return APInt::getMaxValue(BitWidth);
2111 }
2112 
2113 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2114   // Check our assumptions here
2115   assert(!str.empty() && "Invalid string length");
2116   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2117           radix == 36) &&
2118          "Radix should be 2, 8, 10, 16, or 36!");
2119 
2120   StringRef::iterator p = str.begin();
2121   size_t slen = str.size();
2122   bool isNeg = *p == '-';
2123   if (*p == '-' || *p == '+') {
2124     p++;
2125     slen--;
2126     assert(slen && "String is only a sign, needs a value.");
2127   }
2128   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2129   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2130   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2131   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2132          "Insufficient bit width");
2133 
2134   // Allocate memory if needed
2135   if (isSingleWord())
2136     U.VAL = 0;
2137   else
2138     U.pVal = getClearedMemory(getNumWords());
2139 
2140   // Figure out if we can shift instead of multiply
2141   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2142 
2143   // Enter digit traversal loop
2144   for (StringRef::iterator e = str.end(); p != e; ++p) {
2145     unsigned digit = getDigit(*p, radix);
2146     assert(digit < radix && "Invalid character in digit string");
2147 
2148     // Shift or multiply the value by the radix
2149     if (slen > 1) {
2150       if (shift)
2151         *this <<= shift;
2152       else
2153         *this *= radix;
2154     }
2155 
2156     // Add in the digit we just interpreted
2157     *this += digit;
2158   }
2159   // If its negative, put it in two's complement form
2160   if (isNeg)
2161     this->negate();
2162 }
2163 
2164 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2165                      bool Signed, bool formatAsCLiteral) const {
2166   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2167           Radix == 36) &&
2168          "Radix should be 2, 8, 10, 16, or 36!");
2169 
2170   const char *Prefix = "";
2171   if (formatAsCLiteral) {
2172     switch (Radix) {
2173       case 2:
2174         // Binary literals are a non-standard extension added in gcc 4.3:
2175         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2176         Prefix = "0b";
2177         break;
2178       case 8:
2179         Prefix = "0";
2180         break;
2181       case 10:
2182         break; // No prefix
2183       case 16:
2184         Prefix = "0x";
2185         break;
2186       default:
2187         llvm_unreachable("Invalid radix!");
2188     }
2189   }
2190 
2191   // First, check for a zero value and just short circuit the logic below.
2192   if (*this == 0) {
2193     while (*Prefix) {
2194       Str.push_back(*Prefix);
2195       ++Prefix;
2196     };
2197     Str.push_back('0');
2198     return;
2199   }
2200 
2201   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2202 
2203   if (isSingleWord()) {
2204     char Buffer[65];
2205     char *BufPtr = std::end(Buffer);
2206 
2207     uint64_t N;
2208     if (!Signed) {
2209       N = getZExtValue();
2210     } else {
2211       int64_t I = getSExtValue();
2212       if (I >= 0) {
2213         N = I;
2214       } else {
2215         Str.push_back('-');
2216         N = -(uint64_t)I;
2217       }
2218     }
2219 
2220     while (*Prefix) {
2221       Str.push_back(*Prefix);
2222       ++Prefix;
2223     };
2224 
2225     while (N) {
2226       *--BufPtr = Digits[N % Radix];
2227       N /= Radix;
2228     }
2229     Str.append(BufPtr, std::end(Buffer));
2230     return;
2231   }
2232 
2233   APInt Tmp(*this);
2234 
2235   if (Signed && isNegative()) {
2236     // They want to print the signed version and it is a negative value
2237     // Flip the bits and add one to turn it into the equivalent positive
2238     // value and put a '-' in the result.
2239     Tmp.negate();
2240     Str.push_back('-');
2241   }
2242 
2243   while (*Prefix) {
2244     Str.push_back(*Prefix);
2245     ++Prefix;
2246   };
2247 
2248   // We insert the digits backward, then reverse them to get the right order.
2249   unsigned StartDig = Str.size();
2250 
2251   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2252   // because the number of bits per digit (1, 3 and 4 respectively) divides
2253   // equally.  We just shift until the value is zero.
2254   if (Radix == 2 || Radix == 8 || Radix == 16) {
2255     // Just shift tmp right for each digit width until it becomes zero
2256     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2257     unsigned MaskAmt = Radix - 1;
2258 
2259     while (Tmp.getBoolValue()) {
2260       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2261       Str.push_back(Digits[Digit]);
2262       Tmp.lshrInPlace(ShiftAmt);
2263     }
2264   } else {
2265     while (Tmp.getBoolValue()) {
2266       uint64_t Digit;
2267       udivrem(Tmp, Radix, Tmp, Digit);
2268       assert(Digit < Radix && "divide failed");
2269       Str.push_back(Digits[Digit]);
2270     }
2271   }
2272 
2273   // Reverse the digits before returning.
2274   std::reverse(Str.begin()+StartDig, Str.end());
2275 }
2276 
2277 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2278 /// It is better to pass in a SmallVector/SmallString to the methods above.
2279 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2280   SmallString<40> S;
2281   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2282   return std::string(S.str());
2283 }
2284 
2285 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2286 LLVM_DUMP_METHOD void APInt::dump() const {
2287   SmallString<40> S, U;
2288   this->toStringUnsigned(U);
2289   this->toStringSigned(S);
2290   dbgs() << "APInt(" << BitWidth << "b, "
2291          << U << "u " << S << "s)\n";
2292 }
2293 #endif
2294 
2295 void APInt::print(raw_ostream &OS, bool isSigned) const {
2296   SmallString<40> S;
2297   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2298   OS << S;
2299 }
2300 
2301 // This implements a variety of operations on a representation of
2302 // arbitrary precision, two's-complement, bignum integer values.
2303 
2304 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2305 // and unrestricting assumption.
2306 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2307               "Part width must be divisible by 2!");
2308 
2309 /* Some handy functions local to this file.  */
2310 
2311 /* Returns the integer part with the least significant BITS set.
2312    BITS cannot be zero.  */
2313 static inline APInt::WordType lowBitMask(unsigned bits) {
2314   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2315 
2316   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2317 }
2318 
2319 /* Returns the value of the lower half of PART.  */
2320 static inline APInt::WordType lowHalf(APInt::WordType part) {
2321   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2322 }
2323 
2324 /* Returns the value of the upper half of PART.  */
2325 static inline APInt::WordType highHalf(APInt::WordType part) {
2326   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2327 }
2328 
2329 /* Returns the bit number of the most significant set bit of a part.
2330    If the input number has no bits set -1U is returned.  */
2331 static unsigned partMSB(APInt::WordType value) {
2332   return findLastSet(value, ZB_Max);
2333 }
2334 
2335 /* Returns the bit number of the least significant set bit of a
2336    part.  If the input number has no bits set -1U is returned.  */
2337 static unsigned partLSB(APInt::WordType value) {
2338   return findFirstSet(value, ZB_Max);
2339 }
2340 
2341 /* Sets the least significant part of a bignum to the input value, and
2342    zeroes out higher parts.  */
2343 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2344   assert(parts > 0);
2345 
2346   dst[0] = part;
2347   for (unsigned i = 1; i < parts; i++)
2348     dst[i] = 0;
2349 }
2350 
2351 /* Assign one bignum to another.  */
2352 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2353   for (unsigned i = 0; i < parts; i++)
2354     dst[i] = src[i];
2355 }
2356 
2357 /* Returns true if a bignum is zero, false otherwise.  */
2358 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2359   for (unsigned i = 0; i < parts; i++)
2360     if (src[i])
2361       return false;
2362 
2363   return true;
2364 }
2365 
2366 /* Extract the given bit of a bignum; returns 0 or 1.  */
2367 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2368   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2369 }
2370 
2371 /* Set the given bit of a bignum. */
2372 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2373   parts[whichWord(bit)] |= maskBit(bit);
2374 }
2375 
2376 /* Clears the given bit of a bignum. */
2377 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2378   parts[whichWord(bit)] &= ~maskBit(bit);
2379 }
2380 
2381 /* Returns the bit number of the least significant set bit of a
2382    number.  If the input number has no bits set -1U is returned.  */
2383 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2384   for (unsigned i = 0; i < n; i++) {
2385     if (parts[i] != 0) {
2386       unsigned lsb = partLSB(parts[i]);
2387 
2388       return lsb + i * APINT_BITS_PER_WORD;
2389     }
2390   }
2391 
2392   return -1U;
2393 }
2394 
2395 /* Returns the bit number of the most significant set bit of a number.
2396    If the input number has no bits set -1U is returned.  */
2397 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2398   do {
2399     --n;
2400 
2401     if (parts[n] != 0) {
2402       unsigned msb = partMSB(parts[n]);
2403 
2404       return msb + n * APINT_BITS_PER_WORD;
2405     }
2406   } while (n);
2407 
2408   return -1U;
2409 }
2410 
2411 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2412    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2413    the least significant bit of DST.  All high bits above srcBITS in
2414    DST are zero-filled.  */
2415 void
2416 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2417                  unsigned srcBits, unsigned srcLSB) {
2418   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2419   assert(dstParts <= dstCount);
2420 
2421   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2422   tcAssign (dst, src + firstSrcPart, dstParts);
2423 
2424   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2425   tcShiftRight (dst, dstParts, shift);
2426 
2427   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2428      in DST.  If this is less that srcBits, append the rest, else
2429      clear the high bits.  */
2430   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2431   if (n < srcBits) {
2432     WordType mask = lowBitMask (srcBits - n);
2433     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2434                           << n % APINT_BITS_PER_WORD);
2435   } else if (n > srcBits) {
2436     if (srcBits % APINT_BITS_PER_WORD)
2437       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2438   }
2439 
2440   /* Clear high parts.  */
2441   while (dstParts < dstCount)
2442     dst[dstParts++] = 0;
2443 }
2444 
2445 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2446 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2447                              WordType c, unsigned parts) {
2448   assert(c <= 1);
2449 
2450   for (unsigned i = 0; i < parts; i++) {
2451     WordType l = dst[i];
2452     if (c) {
2453       dst[i] += rhs[i] + 1;
2454       c = (dst[i] <= l);
2455     } else {
2456       dst[i] += rhs[i];
2457       c = (dst[i] < l);
2458     }
2459   }
2460 
2461   return c;
2462 }
2463 
2464 /// This function adds a single "word" integer, src, to the multiple
2465 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2466 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2467 /// @returns the carry of the addition.
2468 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2469                                  unsigned parts) {
2470   for (unsigned i = 0; i < parts; ++i) {
2471     dst[i] += src;
2472     if (dst[i] >= src)
2473       return 0; // No need to carry so exit early.
2474     src = 1; // Carry one to next digit.
2475   }
2476 
2477   return 1;
2478 }
2479 
2480 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2481 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2482                                   WordType c, unsigned parts) {
2483   assert(c <= 1);
2484 
2485   for (unsigned i = 0; i < parts; i++) {
2486     WordType l = dst[i];
2487     if (c) {
2488       dst[i] -= rhs[i] + 1;
2489       c = (dst[i] >= l);
2490     } else {
2491       dst[i] -= rhs[i];
2492       c = (dst[i] > l);
2493     }
2494   }
2495 
2496   return c;
2497 }
2498 
2499 /// This function subtracts a single "word" (64-bit word), src, from
2500 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2501 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2502 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2503 /// exhausted. In other words, if src > dst then this function returns 1,
2504 /// otherwise 0.
2505 /// @returns the borrow out of the subtraction
2506 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2507                                       unsigned parts) {
2508   for (unsigned i = 0; i < parts; ++i) {
2509     WordType Dst = dst[i];
2510     dst[i] -= src;
2511     if (src <= Dst)
2512       return 0; // No need to borrow so exit early.
2513     src = 1; // We have to "borrow 1" from next "word"
2514   }
2515 
2516   return 1;
2517 }
2518 
2519 /* Negate a bignum in-place.  */
2520 void APInt::tcNegate(WordType *dst, unsigned parts) {
2521   tcComplement(dst, parts);
2522   tcIncrement(dst, parts);
2523 }
2524 
2525 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2526     DST  = SRC * MULTIPLIER + CARRY   if add is false
2527 
2528     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2529     they must start at the same point, i.e. DST == SRC.
2530 
2531     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2532     returned.  Otherwise DST is filled with the least significant
2533     DSTPARTS parts of the result, and if all of the omitted higher
2534     parts were zero return zero, otherwise overflow occurred and
2535     return one.  */
2536 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2537                           WordType multiplier, WordType carry,
2538                           unsigned srcParts, unsigned dstParts,
2539                           bool add) {
2540   /* Otherwise our writes of DST kill our later reads of SRC.  */
2541   assert(dst <= src || dst >= src + srcParts);
2542   assert(dstParts <= srcParts + 1);
2543 
2544   /* N loops; minimum of dstParts and srcParts.  */
2545   unsigned n = std::min(dstParts, srcParts);
2546 
2547   for (unsigned i = 0; i < n; i++) {
2548     WordType low, mid, high, srcPart;
2549 
2550       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2551 
2552          This cannot overflow, because
2553 
2554          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2555 
2556          which is less than n^2.  */
2557 
2558     srcPart = src[i];
2559 
2560     if (multiplier == 0 || srcPart == 0) {
2561       low = carry;
2562       high = 0;
2563     } else {
2564       low = lowHalf(srcPart) * lowHalf(multiplier);
2565       high = highHalf(srcPart) * highHalf(multiplier);
2566 
2567       mid = lowHalf(srcPart) * highHalf(multiplier);
2568       high += highHalf(mid);
2569       mid <<= APINT_BITS_PER_WORD / 2;
2570       if (low + mid < low)
2571         high++;
2572       low += mid;
2573 
2574       mid = highHalf(srcPart) * lowHalf(multiplier);
2575       high += highHalf(mid);
2576       mid <<= APINT_BITS_PER_WORD / 2;
2577       if (low + mid < low)
2578         high++;
2579       low += mid;
2580 
2581       /* Now add carry.  */
2582       if (low + carry < low)
2583         high++;
2584       low += carry;
2585     }
2586 
2587     if (add) {
2588       /* And now DST[i], and store the new low part there.  */
2589       if (low + dst[i] < low)
2590         high++;
2591       dst[i] += low;
2592     } else
2593       dst[i] = low;
2594 
2595     carry = high;
2596   }
2597 
2598   if (srcParts < dstParts) {
2599     /* Full multiplication, there is no overflow.  */
2600     assert(srcParts + 1 == dstParts);
2601     dst[srcParts] = carry;
2602     return 0;
2603   }
2604 
2605   /* We overflowed if there is carry.  */
2606   if (carry)
2607     return 1;
2608 
2609   /* We would overflow if any significant unwritten parts would be
2610      non-zero.  This is true if any remaining src parts are non-zero
2611      and the multiplier is non-zero.  */
2612   if (multiplier)
2613     for (unsigned i = dstParts; i < srcParts; i++)
2614       if (src[i])
2615         return 1;
2616 
2617   /* We fitted in the narrow destination.  */
2618   return 0;
2619 }
2620 
2621 /* DST = LHS * RHS, where DST has the same width as the operands and
2622    is filled with the least significant parts of the result.  Returns
2623    one if overflow occurred, otherwise zero.  DST must be disjoint
2624    from both operands.  */
2625 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2626                       const WordType *rhs, unsigned parts) {
2627   assert(dst != lhs && dst != rhs);
2628 
2629   int overflow = 0;
2630   tcSet(dst, 0, parts);
2631 
2632   for (unsigned i = 0; i < parts; i++)
2633     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2634                                parts - i, true);
2635 
2636   return overflow;
2637 }
2638 
2639 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2640 /// operands. No overflow occurs. DST must be disjoint from both operands.
2641 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2642                            const WordType *rhs, unsigned lhsParts,
2643                            unsigned rhsParts) {
2644   /* Put the narrower number on the LHS for less loops below.  */
2645   if (lhsParts > rhsParts)
2646     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2647 
2648   assert(dst != lhs && dst != rhs);
2649 
2650   tcSet(dst, 0, rhsParts);
2651 
2652   for (unsigned i = 0; i < lhsParts; i++)
2653     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2654 }
2655 
2656 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2657    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2658    set REMAINDER to the remainder, return zero.  i.e.
2659 
2660    OLD_LHS = RHS * LHS + REMAINDER
2661 
2662    SCRATCH is a bignum of the same size as the operands and result for
2663    use by the routine; its contents need not be initialized and are
2664    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2665 */
2666 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2667                     WordType *remainder, WordType *srhs,
2668                     unsigned parts) {
2669   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2670 
2671   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2672   if (shiftCount == 0)
2673     return true;
2674 
2675   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2676   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2677   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2678 
2679   tcAssign(srhs, rhs, parts);
2680   tcShiftLeft(srhs, parts, shiftCount);
2681   tcAssign(remainder, lhs, parts);
2682   tcSet(lhs, 0, parts);
2683 
2684   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2685      the total.  */
2686   for (;;) {
2687     int compare = tcCompare(remainder, srhs, parts);
2688     if (compare >= 0) {
2689       tcSubtract(remainder, srhs, 0, parts);
2690       lhs[n] |= mask;
2691     }
2692 
2693     if (shiftCount == 0)
2694       break;
2695     shiftCount--;
2696     tcShiftRight(srhs, parts, 1);
2697     if ((mask >>= 1) == 0) {
2698       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2699       n--;
2700     }
2701   }
2702 
2703   return false;
2704 }
2705 
2706 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2707 /// no restrictions on Count.
2708 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2709   // Don't bother performing a no-op shift.
2710   if (!Count)
2711     return;
2712 
2713   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2714   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2715   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2716 
2717   // Fastpath for moving by whole words.
2718   if (BitShift == 0) {
2719     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2720   } else {
2721     while (Words-- > WordShift) {
2722       Dst[Words] = Dst[Words - WordShift] << BitShift;
2723       if (Words > WordShift)
2724         Dst[Words] |=
2725           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2726     }
2727   }
2728 
2729   // Fill in the remainder with 0s.
2730   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2731 }
2732 
2733 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2734 /// are no restrictions on Count.
2735 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2736   // Don't bother performing a no-op shift.
2737   if (!Count)
2738     return;
2739 
2740   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2741   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2742   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2743 
2744   unsigned WordsToMove = Words - WordShift;
2745   // Fastpath for moving by whole words.
2746   if (BitShift == 0) {
2747     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2748   } else {
2749     for (unsigned i = 0; i != WordsToMove; ++i) {
2750       Dst[i] = Dst[i + WordShift] >> BitShift;
2751       if (i + 1 != WordsToMove)
2752         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2753     }
2754   }
2755 
2756   // Fill in the remainder with 0s.
2757   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2758 }
2759 
2760 /* Bitwise and of two bignums.  */
2761 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2762   for (unsigned i = 0; i < parts; i++)
2763     dst[i] &= rhs[i];
2764 }
2765 
2766 /* Bitwise inclusive or of two bignums.  */
2767 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2768   for (unsigned i = 0; i < parts; i++)
2769     dst[i] |= rhs[i];
2770 }
2771 
2772 /* Bitwise exclusive or of two bignums.  */
2773 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2774   for (unsigned i = 0; i < parts; i++)
2775     dst[i] ^= rhs[i];
2776 }
2777 
2778 /* Complement a bignum in-place.  */
2779 void APInt::tcComplement(WordType *dst, unsigned parts) {
2780   for (unsigned i = 0; i < parts; i++)
2781     dst[i] = ~dst[i];
2782 }
2783 
2784 /* Comparison (unsigned) of two bignums.  */
2785 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2786                      unsigned parts) {
2787   while (parts) {
2788     parts--;
2789     if (lhs[parts] != rhs[parts])
2790       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2791   }
2792 
2793   return 0;
2794 }
2795 
2796 /* Set the least significant BITS bits of a bignum, clear the
2797    rest.  */
2798 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2799                                       unsigned bits) {
2800   unsigned i = 0;
2801   while (bits > APINT_BITS_PER_WORD) {
2802     dst[i++] = ~(WordType) 0;
2803     bits -= APINT_BITS_PER_WORD;
2804   }
2805 
2806   if (bits)
2807     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2808 
2809   while (i < parts)
2810     dst[i++] = 0;
2811 }
2812 
2813 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2814                                    APInt::Rounding RM) {
2815   // Currently udivrem always rounds down.
2816   switch (RM) {
2817   case APInt::Rounding::DOWN:
2818   case APInt::Rounding::TOWARD_ZERO:
2819     return A.udiv(B);
2820   case APInt::Rounding::UP: {
2821     APInt Quo, Rem;
2822     APInt::udivrem(A, B, Quo, Rem);
2823     if (Rem == 0)
2824       return Quo;
2825     return Quo + 1;
2826   }
2827   }
2828   llvm_unreachable("Unknown APInt::Rounding enum");
2829 }
2830 
2831 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2832                                    APInt::Rounding RM) {
2833   switch (RM) {
2834   case APInt::Rounding::DOWN:
2835   case APInt::Rounding::UP: {
2836     APInt Quo, Rem;
2837     APInt::sdivrem(A, B, Quo, Rem);
2838     if (Rem == 0)
2839       return Quo;
2840     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2841     // We want to check whether the non-integer part of the mathematical value
2842     // is negative or not. If the non-integer part is negative, we need to round
2843     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2844     // already rounded down.
2845     if (RM == APInt::Rounding::DOWN) {
2846       if (Rem.isNegative() != B.isNegative())
2847         return Quo - 1;
2848       return Quo;
2849     }
2850     if (Rem.isNegative() != B.isNegative())
2851       return Quo;
2852     return Quo + 1;
2853   }
2854   // Currently sdiv rounds towards zero.
2855   case APInt::Rounding::TOWARD_ZERO:
2856     return A.sdiv(B);
2857   }
2858   llvm_unreachable("Unknown APInt::Rounding enum");
2859 }
2860 
2861 Optional<APInt>
2862 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2863                                            unsigned RangeWidth) {
2864   unsigned CoeffWidth = A.getBitWidth();
2865   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2866   assert(RangeWidth <= CoeffWidth &&
2867          "Value range width should be less than coefficient width");
2868   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2869 
2870   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2871                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2872 
2873   // Identify 0 as a (non)solution immediately.
2874   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2875     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2876     return APInt(CoeffWidth, 0);
2877   }
2878 
2879   // The result of APInt arithmetic has the same bit width as the operands,
2880   // so it can actually lose high bits. A product of two n-bit integers needs
2881   // 2n-1 bits to represent the full value.
2882   // The operation done below (on quadratic coefficients) that can produce
2883   // the largest value is the evaluation of the equation during bisection,
2884   // which needs 3 times the bitwidth of the coefficient, so the total number
2885   // of required bits is 3n.
2886   //
2887   // The purpose of this extension is to simulate the set Z of all integers,
2888   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2889   // and negative numbers (not so much in a modulo arithmetic). The method
2890   // used to solve the equation is based on the standard formula for real
2891   // numbers, and uses the concepts of "positive" and "negative" with their
2892   // usual meanings.
2893   CoeffWidth *= 3;
2894   A = A.sext(CoeffWidth);
2895   B = B.sext(CoeffWidth);
2896   C = C.sext(CoeffWidth);
2897 
2898   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2899   // the bit width has increased.
2900   if (A.isNegative()) {
2901     A.negate();
2902     B.negate();
2903     C.negate();
2904   }
2905 
2906   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2907   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2908   // and R = 2^BitWidth.
2909   // Since we're trying not only to find exact solutions, but also values
2910   // that "wrap around", such a set will always have a solution, i.e. an x
2911   // that satisfies at least one of the equations, or such that |q(x)|
2912   // exceeds kR, while |q(x-1)| for the same k does not.
2913   //
2914   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2915   // positive solution n (in the above sense), and also such that the n
2916   // will be the least among all solutions corresponding to k = 0, 1, ...
2917   // (more precisely, the least element in the set
2918   //   { n(k) | k is such that a solution n(k) exists }).
2919   //
2920   // Consider the parabola (over real numbers) that corresponds to the
2921   // quadratic equation. Since A > 0, the arms of the parabola will point
2922   // up. Picking different values of k will shift it up and down by R.
2923   //
2924   // We want to shift the parabola in such a way as to reduce the problem
2925   // of solving q(x) = kR to solving shifted_q(x) = 0.
2926   // (The interesting solutions are the ceilings of the real number
2927   // solutions.)
2928   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2929   APInt TwoA = 2 * A;
2930   APInt SqrB = B * B;
2931   bool PickLow;
2932 
2933   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2934     assert(A.isStrictlyPositive());
2935     APInt T = V.abs().urem(A);
2936     if (T.isNullValue())
2937       return V;
2938     return V.isNegative() ? V+T : V+(A-T);
2939   };
2940 
2941   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2942   // iff B is positive.
2943   if (B.isNonNegative()) {
2944     // If B >= 0, the vertex it at a negative location (or at 0), so in
2945     // order to have a non-negative solution we need to pick k that makes
2946     // C-kR negative. To satisfy all the requirements for the solution
2947     // that we are looking for, it needs to be closest to 0 of all k.
2948     C = C.srem(R);
2949     if (C.isStrictlyPositive())
2950       C -= R;
2951     // Pick the greater solution.
2952     PickLow = false;
2953   } else {
2954     // If B < 0, the vertex is at a positive location. For any solution
2955     // to exist, the discriminant must be non-negative. This means that
2956     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2957     // lower bound on values of k: kR >= C - B^2/4A.
2958     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2959     // Round LowkR up (towards +inf) to the nearest kR.
2960     LowkR = RoundUp(LowkR, R);
2961 
2962     // If there exists k meeting the condition above, and such that
2963     // C-kR > 0, there will be two positive real number solutions of
2964     // q(x) = kR. Out of all such values of k, pick the one that makes
2965     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2966     // In other words, find maximum k such that LowkR <= kR < C.
2967     if (C.sgt(LowkR)) {
2968       // If LowkR < C, then such a k is guaranteed to exist because
2969       // LowkR itself is a multiple of R.
2970       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2971       // Pick the smaller solution.
2972       PickLow = true;
2973     } else {
2974       // If C-kR < 0 for all potential k's, it means that one solution
2975       // will be negative, while the other will be positive. The positive
2976       // solution will shift towards 0 if the parabola is moved up.
2977       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2978       // to 0, or in other words, out of all parabolas that have solutions,
2979       // pick the one that is the farthest "up").
2980       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2981       C -= LowkR;
2982       // Pick the greater solution.
2983       PickLow = false;
2984     }
2985   }
2986 
2987   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2988                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2989 
2990   APInt D = SqrB - 4*A*C;
2991   assert(D.isNonNegative() && "Negative discriminant");
2992   APInt SQ = D.sqrt();
2993 
2994   APInt Q = SQ * SQ;
2995   bool InexactSQ = Q != D;
2996   // The calculated SQ may actually be greater than the exact (non-integer)
2997   // value. If that's the case, decrement SQ to get a value that is lower.
2998   if (Q.sgt(D))
2999     SQ -= 1;
3000 
3001   APInt X;
3002   APInt Rem;
3003 
3004   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3005   // When using the quadratic formula directly, the calculated low root
3006   // may be greater than the exact one, since we would be subtracting SQ.
3007   // To make sure that the calculated root is not greater than the exact
3008   // one, subtract SQ+1 when calculating the low root (for inexact value
3009   // of SQ).
3010   if (PickLow)
3011     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3012   else
3013     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3014 
3015   // The updated coefficients should be such that the (exact) solution is
3016   // positive. Since APInt division rounds towards 0, the calculated one
3017   // can be 0, but cannot be negative.
3018   assert(X.isNonNegative() && "Solution should be non-negative");
3019 
3020   if (!InexactSQ && Rem.isNullValue()) {
3021     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3022     return X;
3023   }
3024 
3025   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3026   // The exact value of the square root of D should be between SQ and SQ+1.
3027   // This implies that the solution should be between that corresponding to
3028   // SQ (i.e. X) and that corresponding to SQ+1.
3029   //
3030   // The calculated X cannot be greater than the exact (real) solution.
3031   // Actually it must be strictly less than the exact solution, while
3032   // X+1 will be greater than or equal to it.
3033 
3034   APInt VX = (A*X + B)*X + C;
3035   APInt VY = VX + TwoA*X + A + B;
3036   bool SignChange = VX.isNegative() != VY.isNegative() ||
3037                     VX.isNullValue() != VY.isNullValue();
3038   // If the sign did not change between X and X+1, X is not a valid solution.
3039   // This could happen when the actual (exact) roots don't have an integer
3040   // between them, so they would both be contained between X and X+1.
3041   if (!SignChange) {
3042     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3043     return None;
3044   }
3045 
3046   X += 1;
3047   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3048   return X;
3049 }
3050 
3051 Optional<unsigned>
3052 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3053   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3054   if (A == B)
3055     return llvm::None;
3056   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3057 }
3058 
3059 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3060 /// with the integer held in IntVal.
3061 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3062                             unsigned StoreBytes) {
3063   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3064   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3065 
3066   if (sys::IsLittleEndianHost) {
3067     // Little-endian host - the source is ordered from LSB to MSB.  Order the
3068     // destination from LSB to MSB: Do a straight copy.
3069     memcpy(Dst, Src, StoreBytes);
3070   } else {
3071     // Big-endian host - the source is an array of 64 bit words ordered from
3072     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3073     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3074     while (StoreBytes > sizeof(uint64_t)) {
3075       StoreBytes -= sizeof(uint64_t);
3076       // May not be aligned so use memcpy.
3077       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3078       Src += sizeof(uint64_t);
3079     }
3080 
3081     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3082   }
3083 }
3084 
3085 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3086 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3087 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
3088   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3089   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3090                    const_cast<uint64_t *>(IntVal.getRawData()));
3091 
3092   if (sys::IsLittleEndianHost)
3093     // Little-endian host - the destination must be ordered from LSB to MSB.
3094     // The source is ordered from LSB to MSB: Do a straight copy.
3095     memcpy(Dst, Src, LoadBytes);
3096   else {
3097     // Big-endian - the destination is an array of 64 bit words ordered from
3098     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3099     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3100     // a word.
3101     while (LoadBytes > sizeof(uint64_t)) {
3102       LoadBytes -= sizeof(uint64_t);
3103       // May not be aligned so use memcpy.
3104       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3105       Dst += sizeof(uint64_t);
3106     }
3107 
3108     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3109   }
3110 }
3111