1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision integer 11 // constant values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/Optional.h" 20 #include "llvm/ADT/SmallString.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/ADT/bit.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <climits> 29 #include <cmath> 30 #include <cstdlib> 31 #include <cstring> 32 using namespace llvm; 33 34 #define DEBUG_TYPE "apint" 35 36 /// A utility function for allocating memory, checking for allocation failures, 37 /// and ensuring the contents are zeroed. 38 inline static uint64_t* getClearedMemory(unsigned numWords) { 39 uint64_t *result = new uint64_t[numWords]; 40 memset(result, 0, numWords * sizeof(uint64_t)); 41 return result; 42 } 43 44 /// A utility function for allocating memory and checking for allocation 45 /// failure. The content is not zeroed. 46 inline static uint64_t* getMemory(unsigned numWords) { 47 return new uint64_t[numWords]; 48 } 49 50 /// A utility function that converts a character to a digit. 51 inline static unsigned getDigit(char cdigit, uint8_t radix) { 52 unsigned r; 53 54 if (radix == 16 || radix == 36) { 55 r = cdigit - '0'; 56 if (r <= 9) 57 return r; 58 59 r = cdigit - 'A'; 60 if (r <= radix - 11U) 61 return r + 10; 62 63 r = cdigit - 'a'; 64 if (r <= radix - 11U) 65 return r + 10; 66 67 radix = 10; 68 } 69 70 r = cdigit - '0'; 71 if (r < radix) 72 return r; 73 74 return -1U; 75 } 76 77 78 void APInt::initSlowCase(uint64_t val, bool isSigned) { 79 U.pVal = getClearedMemory(getNumWords()); 80 U.pVal[0] = val; 81 if (isSigned && int64_t(val) < 0) 82 for (unsigned i = 1; i < getNumWords(); ++i) 83 U.pVal[i] = WORDTYPE_MAX; 84 clearUnusedBits(); 85 } 86 87 void APInt::initSlowCase(const APInt& that) { 88 U.pVal = getMemory(getNumWords()); 89 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 90 } 91 92 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 93 assert(BitWidth && "Bitwidth too small"); 94 assert(bigVal.data() && "Null pointer detected!"); 95 if (isSingleWord()) 96 U.VAL = bigVal[0]; 97 else { 98 // Get memory, cleared to 0 99 U.pVal = getClearedMemory(getNumWords()); 100 // Calculate the number of words to copy 101 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 102 // Copy the words from bigVal to pVal 103 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 104 } 105 // Make sure unused high bits are cleared 106 clearUnusedBits(); 107 } 108 109 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 110 : BitWidth(numBits) { 111 initFromArray(bigVal); 112 } 113 114 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 115 : BitWidth(numBits) { 116 initFromArray(makeArrayRef(bigVal, numWords)); 117 } 118 119 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 120 : BitWidth(numbits) { 121 assert(BitWidth && "Bitwidth too small"); 122 fromString(numbits, Str, radix); 123 } 124 125 void APInt::reallocate(unsigned NewBitWidth) { 126 // If the number of words is the same we can just change the width and stop. 127 if (getNumWords() == getNumWords(NewBitWidth)) { 128 BitWidth = NewBitWidth; 129 return; 130 } 131 132 // If we have an allocation, delete it. 133 if (!isSingleWord()) 134 delete [] U.pVal; 135 136 // Update BitWidth. 137 BitWidth = NewBitWidth; 138 139 // If we are supposed to have an allocation, create it. 140 if (!isSingleWord()) 141 U.pVal = getMemory(getNumWords()); 142 } 143 144 void APInt::AssignSlowCase(const APInt& RHS) { 145 // Don't do anything for X = X 146 if (this == &RHS) 147 return; 148 149 // Adjust the bit width and handle allocations as necessary. 150 reallocate(RHS.getBitWidth()); 151 152 // Copy the data. 153 if (isSingleWord()) 154 U.VAL = RHS.U.VAL; 155 else 156 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 157 } 158 159 /// This method 'profiles' an APInt for use with FoldingSet. 160 void APInt::Profile(FoldingSetNodeID& ID) const { 161 ID.AddInteger(BitWidth); 162 163 if (isSingleWord()) { 164 ID.AddInteger(U.VAL); 165 return; 166 } 167 168 unsigned NumWords = getNumWords(); 169 for (unsigned i = 0; i < NumWords; ++i) 170 ID.AddInteger(U.pVal[i]); 171 } 172 173 /// Prefix increment operator. Increments the APInt by one. 174 APInt& APInt::operator++() { 175 if (isSingleWord()) 176 ++U.VAL; 177 else 178 tcIncrement(U.pVal, getNumWords()); 179 return clearUnusedBits(); 180 } 181 182 /// Prefix decrement operator. Decrements the APInt by one. 183 APInt& APInt::operator--() { 184 if (isSingleWord()) 185 --U.VAL; 186 else 187 tcDecrement(U.pVal, getNumWords()); 188 return clearUnusedBits(); 189 } 190 191 /// Adds the RHS APint to this APInt. 192 /// @returns this, after addition of RHS. 193 /// Addition assignment operator. 194 APInt& APInt::operator+=(const APInt& RHS) { 195 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 196 if (isSingleWord()) 197 U.VAL += RHS.U.VAL; 198 else 199 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 200 return clearUnusedBits(); 201 } 202 203 APInt& APInt::operator+=(uint64_t RHS) { 204 if (isSingleWord()) 205 U.VAL += RHS; 206 else 207 tcAddPart(U.pVal, RHS, getNumWords()); 208 return clearUnusedBits(); 209 } 210 211 /// Subtracts the RHS APInt from this APInt 212 /// @returns this, after subtraction 213 /// Subtraction assignment operator. 214 APInt& APInt::operator-=(const APInt& RHS) { 215 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 216 if (isSingleWord()) 217 U.VAL -= RHS.U.VAL; 218 else 219 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 220 return clearUnusedBits(); 221 } 222 223 APInt& APInt::operator-=(uint64_t RHS) { 224 if (isSingleWord()) 225 U.VAL -= RHS; 226 else 227 tcSubtractPart(U.pVal, RHS, getNumWords()); 228 return clearUnusedBits(); 229 } 230 231 APInt APInt::operator*(const APInt& RHS) const { 232 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 233 if (isSingleWord()) 234 return APInt(BitWidth, U.VAL * RHS.U.VAL); 235 236 APInt Result(getMemory(getNumWords()), getBitWidth()); 237 238 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 239 240 Result.clearUnusedBits(); 241 return Result; 242 } 243 244 void APInt::AndAssignSlowCase(const APInt& RHS) { 245 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 246 } 247 248 void APInt::OrAssignSlowCase(const APInt& RHS) { 249 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 250 } 251 252 void APInt::XorAssignSlowCase(const APInt& RHS) { 253 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 254 } 255 256 APInt& APInt::operator*=(const APInt& RHS) { 257 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 258 *this = *this * RHS; 259 return *this; 260 } 261 262 APInt& APInt::operator*=(uint64_t RHS) { 263 if (isSingleWord()) { 264 U.VAL *= RHS; 265 } else { 266 unsigned NumWords = getNumWords(); 267 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 268 } 269 return clearUnusedBits(); 270 } 271 272 bool APInt::EqualSlowCase(const APInt& RHS) const { 273 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 274 } 275 276 int APInt::compare(const APInt& RHS) const { 277 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 278 if (isSingleWord()) 279 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 280 281 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 282 } 283 284 int APInt::compareSigned(const APInt& RHS) const { 285 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 286 if (isSingleWord()) { 287 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 288 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 289 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 290 } 291 292 bool lhsNeg = isNegative(); 293 bool rhsNeg = RHS.isNegative(); 294 295 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 296 if (lhsNeg != rhsNeg) 297 return lhsNeg ? -1 : 1; 298 299 // Otherwise we can just use an unsigned comparison, because even negative 300 // numbers compare correctly this way if both have the same signed-ness. 301 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 302 } 303 304 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 305 unsigned loWord = whichWord(loBit); 306 unsigned hiWord = whichWord(hiBit); 307 308 // Create an initial mask for the low word with zeros below loBit. 309 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 310 311 // If hiBit is not aligned, we need a high mask. 312 unsigned hiShiftAmt = whichBit(hiBit); 313 if (hiShiftAmt != 0) { 314 // Create a high mask with zeros above hiBit. 315 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 316 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 317 // set the bits in hiWord. 318 if (hiWord == loWord) 319 loMask &= hiMask; 320 else 321 U.pVal[hiWord] |= hiMask; 322 } 323 // Apply the mask to the low word. 324 U.pVal[loWord] |= loMask; 325 326 // Fill any words between loWord and hiWord with all ones. 327 for (unsigned word = loWord + 1; word < hiWord; ++word) 328 U.pVal[word] = WORDTYPE_MAX; 329 } 330 331 /// Toggle every bit to its opposite value. 332 void APInt::flipAllBitsSlowCase() { 333 tcComplement(U.pVal, getNumWords()); 334 clearUnusedBits(); 335 } 336 337 /// Toggle a given bit to its opposite value whose position is given 338 /// as "bitPosition". 339 /// Toggles a given bit to its opposite value. 340 void APInt::flipBit(unsigned bitPosition) { 341 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 342 if ((*this)[bitPosition]) clearBit(bitPosition); 343 else setBit(bitPosition); 344 } 345 346 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 347 unsigned subBitWidth = subBits.getBitWidth(); 348 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 349 "Illegal bit insertion"); 350 351 // Insertion is a direct copy. 352 if (subBitWidth == BitWidth) { 353 *this = subBits; 354 return; 355 } 356 357 // Single word result can be done as a direct bitmask. 358 if (isSingleWord()) { 359 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 360 U.VAL &= ~(mask << bitPosition); 361 U.VAL |= (subBits.U.VAL << bitPosition); 362 return; 363 } 364 365 unsigned loBit = whichBit(bitPosition); 366 unsigned loWord = whichWord(bitPosition); 367 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 368 369 // Insertion within a single word can be done as a direct bitmask. 370 if (loWord == hi1Word) { 371 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 372 U.pVal[loWord] &= ~(mask << loBit); 373 U.pVal[loWord] |= (subBits.U.VAL << loBit); 374 return; 375 } 376 377 // Insert on word boundaries. 378 if (loBit == 0) { 379 // Direct copy whole words. 380 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 381 memcpy(U.pVal + loWord, subBits.getRawData(), 382 numWholeSubWords * APINT_WORD_SIZE); 383 384 // Mask+insert remaining bits. 385 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 386 if (remainingBits != 0) { 387 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 388 U.pVal[hi1Word] &= ~mask; 389 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 390 } 391 return; 392 } 393 394 // General case - set/clear individual bits in dst based on src. 395 // TODO - there is scope for optimization here, but at the moment this code 396 // path is barely used so prefer readability over performance. 397 for (unsigned i = 0; i != subBitWidth; ++i) { 398 if (subBits[i]) 399 setBit(bitPosition + i); 400 else 401 clearBit(bitPosition + i); 402 } 403 } 404 405 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 406 assert(numBits > 0 && "Can't extract zero bits"); 407 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 408 "Illegal bit extraction"); 409 410 if (isSingleWord()) 411 return APInt(numBits, U.VAL >> bitPosition); 412 413 unsigned loBit = whichBit(bitPosition); 414 unsigned loWord = whichWord(bitPosition); 415 unsigned hiWord = whichWord(bitPosition + numBits - 1); 416 417 // Single word result extracting bits from a single word source. 418 if (loWord == hiWord) 419 return APInt(numBits, U.pVal[loWord] >> loBit); 420 421 // Extracting bits that start on a source word boundary can be done 422 // as a fast memory copy. 423 if (loBit == 0) 424 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 425 426 // General case - shift + copy source words directly into place. 427 APInt Result(numBits, 0); 428 unsigned NumSrcWords = getNumWords(); 429 unsigned NumDstWords = Result.getNumWords(); 430 431 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 432 for (unsigned word = 0; word < NumDstWords; ++word) { 433 uint64_t w0 = U.pVal[loWord + word]; 434 uint64_t w1 = 435 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 436 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 437 } 438 439 return Result.clearUnusedBits(); 440 } 441 442 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 443 assert(!str.empty() && "Invalid string length"); 444 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 445 radix == 36) && 446 "Radix should be 2, 8, 10, 16, or 36!"); 447 448 size_t slen = str.size(); 449 450 // Each computation below needs to know if it's negative. 451 StringRef::iterator p = str.begin(); 452 unsigned isNegative = *p == '-'; 453 if (*p == '-' || *p == '+') { 454 p++; 455 slen--; 456 assert(slen && "String is only a sign, needs a value."); 457 } 458 459 // For radixes of power-of-two values, the bits required is accurately and 460 // easily computed 461 if (radix == 2) 462 return slen + isNegative; 463 if (radix == 8) 464 return slen * 3 + isNegative; 465 if (radix == 16) 466 return slen * 4 + isNegative; 467 468 // FIXME: base 36 469 470 // This is grossly inefficient but accurate. We could probably do something 471 // with a computation of roughly slen*64/20 and then adjust by the value of 472 // the first few digits. But, I'm not sure how accurate that could be. 473 474 // Compute a sufficient number of bits that is always large enough but might 475 // be too large. This avoids the assertion in the constructor. This 476 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 477 // bits in that case. 478 unsigned sufficient 479 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 480 : (slen == 1 ? 7 : slen * 16/3); 481 482 // Convert to the actual binary value. 483 APInt tmp(sufficient, StringRef(p, slen), radix); 484 485 // Compute how many bits are required. If the log is infinite, assume we need 486 // just bit. 487 unsigned log = tmp.logBase2(); 488 if (log == (unsigned)-1) { 489 return isNegative + 1; 490 } else { 491 return isNegative + log + 1; 492 } 493 } 494 495 hash_code llvm::hash_value(const APInt &Arg) { 496 if (Arg.isSingleWord()) 497 return hash_combine(Arg.U.VAL); 498 499 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); 500 } 501 502 bool APInt::isSplat(unsigned SplatSizeInBits) const { 503 assert(getBitWidth() % SplatSizeInBits == 0 && 504 "SplatSizeInBits must divide width!"); 505 // We can check that all parts of an integer are equal by making use of a 506 // little trick: rotate and check if it's still the same value. 507 return *this == rotl(SplatSizeInBits); 508 } 509 510 /// This function returns the high "numBits" bits of this APInt. 511 APInt APInt::getHiBits(unsigned numBits) const { 512 return this->lshr(BitWidth - numBits); 513 } 514 515 /// This function returns the low "numBits" bits of this APInt. 516 APInt APInt::getLoBits(unsigned numBits) const { 517 APInt Result(getLowBitsSet(BitWidth, numBits)); 518 Result &= *this; 519 return Result; 520 } 521 522 /// Return a value containing V broadcasted over NewLen bits. 523 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 524 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 525 526 APInt Val = V.zextOrSelf(NewLen); 527 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 528 Val |= Val << I; 529 530 return Val; 531 } 532 533 unsigned APInt::countLeadingZerosSlowCase() const { 534 unsigned Count = 0; 535 for (int i = getNumWords()-1; i >= 0; --i) { 536 uint64_t V = U.pVal[i]; 537 if (V == 0) 538 Count += APINT_BITS_PER_WORD; 539 else { 540 Count += llvm::countLeadingZeros(V); 541 break; 542 } 543 } 544 // Adjust for unused bits in the most significant word (they are zero). 545 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 546 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 547 return Count; 548 } 549 550 unsigned APInt::countLeadingOnesSlowCase() const { 551 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 552 unsigned shift; 553 if (!highWordBits) { 554 highWordBits = APINT_BITS_PER_WORD; 555 shift = 0; 556 } else { 557 shift = APINT_BITS_PER_WORD - highWordBits; 558 } 559 int i = getNumWords() - 1; 560 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 561 if (Count == highWordBits) { 562 for (i--; i >= 0; --i) { 563 if (U.pVal[i] == WORDTYPE_MAX) 564 Count += APINT_BITS_PER_WORD; 565 else { 566 Count += llvm::countLeadingOnes(U.pVal[i]); 567 break; 568 } 569 } 570 } 571 return Count; 572 } 573 574 unsigned APInt::countTrailingZerosSlowCase() const { 575 unsigned Count = 0; 576 unsigned i = 0; 577 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 578 Count += APINT_BITS_PER_WORD; 579 if (i < getNumWords()) 580 Count += llvm::countTrailingZeros(U.pVal[i]); 581 return std::min(Count, BitWidth); 582 } 583 584 unsigned APInt::countTrailingOnesSlowCase() const { 585 unsigned Count = 0; 586 unsigned i = 0; 587 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 588 Count += APINT_BITS_PER_WORD; 589 if (i < getNumWords()) 590 Count += llvm::countTrailingOnes(U.pVal[i]); 591 assert(Count <= BitWidth); 592 return Count; 593 } 594 595 unsigned APInt::countPopulationSlowCase() const { 596 unsigned Count = 0; 597 for (unsigned i = 0; i < getNumWords(); ++i) 598 Count += llvm::countPopulation(U.pVal[i]); 599 return Count; 600 } 601 602 bool APInt::intersectsSlowCase(const APInt &RHS) const { 603 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 604 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 605 return true; 606 607 return false; 608 } 609 610 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 611 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 612 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 613 return false; 614 615 return true; 616 } 617 618 APInt APInt::byteSwap() const { 619 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); 620 if (BitWidth == 16) 621 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 622 if (BitWidth == 32) 623 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 624 if (BitWidth == 48) { 625 unsigned Tmp1 = unsigned(U.VAL >> 16); 626 Tmp1 = ByteSwap_32(Tmp1); 627 uint16_t Tmp2 = uint16_t(U.VAL); 628 Tmp2 = ByteSwap_16(Tmp2); 629 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); 630 } 631 if (BitWidth == 64) 632 return APInt(BitWidth, ByteSwap_64(U.VAL)); 633 634 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 635 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 636 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 637 if (Result.BitWidth != BitWidth) { 638 Result.lshrInPlace(Result.BitWidth - BitWidth); 639 Result.BitWidth = BitWidth; 640 } 641 return Result; 642 } 643 644 APInt APInt::reverseBits() const { 645 switch (BitWidth) { 646 case 64: 647 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 648 case 32: 649 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 650 case 16: 651 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 652 case 8: 653 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 654 default: 655 break; 656 } 657 658 APInt Val(*this); 659 APInt Reversed(BitWidth, 0); 660 unsigned S = BitWidth; 661 662 for (; Val != 0; Val.lshrInPlace(1)) { 663 Reversed <<= 1; 664 Reversed |= Val[0]; 665 --S; 666 } 667 668 Reversed <<= S; 669 return Reversed; 670 } 671 672 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 673 // Fast-path a common case. 674 if (A == B) return A; 675 676 // Corner cases: if either operand is zero, the other is the gcd. 677 if (!A) return B; 678 if (!B) return A; 679 680 // Count common powers of 2 and remove all other powers of 2. 681 unsigned Pow2; 682 { 683 unsigned Pow2_A = A.countTrailingZeros(); 684 unsigned Pow2_B = B.countTrailingZeros(); 685 if (Pow2_A > Pow2_B) { 686 A.lshrInPlace(Pow2_A - Pow2_B); 687 Pow2 = Pow2_B; 688 } else if (Pow2_B > Pow2_A) { 689 B.lshrInPlace(Pow2_B - Pow2_A); 690 Pow2 = Pow2_A; 691 } else { 692 Pow2 = Pow2_A; 693 } 694 } 695 696 // Both operands are odd multiples of 2^Pow_2: 697 // 698 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 699 // 700 // This is a modified version of Stein's algorithm, taking advantage of 701 // efficient countTrailingZeros(). 702 while (A != B) { 703 if (A.ugt(B)) { 704 A -= B; 705 A.lshrInPlace(A.countTrailingZeros() - Pow2); 706 } else { 707 B -= A; 708 B.lshrInPlace(B.countTrailingZeros() - Pow2); 709 } 710 } 711 712 return A; 713 } 714 715 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 716 uint64_t I = bit_cast<uint64_t>(Double); 717 718 // Get the sign bit from the highest order bit 719 bool isNeg = I >> 63; 720 721 // Get the 11-bit exponent and adjust for the 1023 bit bias 722 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 723 724 // If the exponent is negative, the value is < 0 so just return 0. 725 if (exp < 0) 726 return APInt(width, 0u); 727 728 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 729 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 730 731 // If the exponent doesn't shift all bits out of the mantissa 732 if (exp < 52) 733 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 734 APInt(width, mantissa >> (52 - exp)); 735 736 // If the client didn't provide enough bits for us to shift the mantissa into 737 // then the result is undefined, just return 0 738 if (width <= exp - 52) 739 return APInt(width, 0); 740 741 // Otherwise, we have to shift the mantissa bits up to the right location 742 APInt Tmp(width, mantissa); 743 Tmp <<= (unsigned)exp - 52; 744 return isNeg ? -Tmp : Tmp; 745 } 746 747 /// This function converts this APInt to a double. 748 /// The layout for double is as following (IEEE Standard 754): 749 /// -------------------------------------- 750 /// | Sign Exponent Fraction Bias | 751 /// |-------------------------------------- | 752 /// | 1[63] 11[62-52] 52[51-00] 1023 | 753 /// -------------------------------------- 754 double APInt::roundToDouble(bool isSigned) const { 755 756 // Handle the simple case where the value is contained in one uint64_t. 757 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 758 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 759 if (isSigned) { 760 int64_t sext = SignExtend64(getWord(0), BitWidth); 761 return double(sext); 762 } else 763 return double(getWord(0)); 764 } 765 766 // Determine if the value is negative. 767 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 768 769 // Construct the absolute value if we're negative. 770 APInt Tmp(isNeg ? -(*this) : (*this)); 771 772 // Figure out how many bits we're using. 773 unsigned n = Tmp.getActiveBits(); 774 775 // The exponent (without bias normalization) is just the number of bits 776 // we are using. Note that the sign bit is gone since we constructed the 777 // absolute value. 778 uint64_t exp = n; 779 780 // Return infinity for exponent overflow 781 if (exp > 1023) { 782 if (!isSigned || !isNeg) 783 return std::numeric_limits<double>::infinity(); 784 else 785 return -std::numeric_limits<double>::infinity(); 786 } 787 exp += 1023; // Increment for 1023 bias 788 789 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 790 // extract the high 52 bits from the correct words in pVal. 791 uint64_t mantissa; 792 unsigned hiWord = whichWord(n-1); 793 if (hiWord == 0) { 794 mantissa = Tmp.U.pVal[0]; 795 if (n > 52) 796 mantissa >>= n - 52; // shift down, we want the top 52 bits. 797 } else { 798 assert(hiWord > 0 && "huh?"); 799 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 800 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 801 mantissa = hibits | lobits; 802 } 803 804 // The leading bit of mantissa is implicit, so get rid of it. 805 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 806 uint64_t I = sign | (exp << 52) | mantissa; 807 return bit_cast<double>(I); 808 } 809 810 // Truncate to new width. 811 APInt APInt::trunc(unsigned width) const { 812 assert(width < BitWidth && "Invalid APInt Truncate request"); 813 assert(width && "Can't truncate to 0 bits"); 814 815 if (width <= APINT_BITS_PER_WORD) 816 return APInt(width, getRawData()[0]); 817 818 APInt Result(getMemory(getNumWords(width)), width); 819 820 // Copy full words. 821 unsigned i; 822 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 823 Result.U.pVal[i] = U.pVal[i]; 824 825 // Truncate and copy any partial word. 826 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 827 if (bits != 0) 828 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 829 830 return Result; 831 } 832 833 // Sign extend to a new width. 834 APInt APInt::sext(unsigned Width) const { 835 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 836 837 if (Width <= APINT_BITS_PER_WORD) 838 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 839 840 APInt Result(getMemory(getNumWords(Width)), Width); 841 842 // Copy words. 843 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 844 845 // Sign extend the last word since there may be unused bits in the input. 846 Result.U.pVal[getNumWords() - 1] = 847 SignExtend64(Result.U.pVal[getNumWords() - 1], 848 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 849 850 // Fill with sign bits. 851 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 852 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 853 Result.clearUnusedBits(); 854 return Result; 855 } 856 857 // Zero extend to a new width. 858 APInt APInt::zext(unsigned width) const { 859 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 860 861 if (width <= APINT_BITS_PER_WORD) 862 return APInt(width, U.VAL); 863 864 APInt Result(getMemory(getNumWords(width)), width); 865 866 // Copy words. 867 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 868 869 // Zero remaining words. 870 std::memset(Result.U.pVal + getNumWords(), 0, 871 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 872 873 return Result; 874 } 875 876 APInt APInt::zextOrTrunc(unsigned width) const { 877 if (BitWidth < width) 878 return zext(width); 879 if (BitWidth > width) 880 return trunc(width); 881 return *this; 882 } 883 884 APInt APInt::sextOrTrunc(unsigned width) const { 885 if (BitWidth < width) 886 return sext(width); 887 if (BitWidth > width) 888 return trunc(width); 889 return *this; 890 } 891 892 APInt APInt::zextOrSelf(unsigned width) const { 893 if (BitWidth < width) 894 return zext(width); 895 return *this; 896 } 897 898 APInt APInt::sextOrSelf(unsigned width) const { 899 if (BitWidth < width) 900 return sext(width); 901 return *this; 902 } 903 904 /// Arithmetic right-shift this APInt by shiftAmt. 905 /// Arithmetic right-shift function. 906 void APInt::ashrInPlace(const APInt &shiftAmt) { 907 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 908 } 909 910 /// Arithmetic right-shift this APInt by shiftAmt. 911 /// Arithmetic right-shift function. 912 void APInt::ashrSlowCase(unsigned ShiftAmt) { 913 // Don't bother performing a no-op shift. 914 if (!ShiftAmt) 915 return; 916 917 // Save the original sign bit for later. 918 bool Negative = isNegative(); 919 920 // WordShift is the inter-part shift; BitShift is intra-part shift. 921 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 922 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 923 924 unsigned WordsToMove = getNumWords() - WordShift; 925 if (WordsToMove != 0) { 926 // Sign extend the last word to fill in the unused bits. 927 U.pVal[getNumWords() - 1] = SignExtend64( 928 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 929 930 // Fastpath for moving by whole words. 931 if (BitShift == 0) { 932 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 933 } else { 934 // Move the words containing significant bits. 935 for (unsigned i = 0; i != WordsToMove - 1; ++i) 936 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 937 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 938 939 // Handle the last word which has no high bits to copy. 940 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 941 // Sign extend one more time. 942 U.pVal[WordsToMove - 1] = 943 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 944 } 945 } 946 947 // Fill in the remainder based on the original sign. 948 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 949 WordShift * APINT_WORD_SIZE); 950 clearUnusedBits(); 951 } 952 953 /// Logical right-shift this APInt by shiftAmt. 954 /// Logical right-shift function. 955 void APInt::lshrInPlace(const APInt &shiftAmt) { 956 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 957 } 958 959 /// Logical right-shift this APInt by shiftAmt. 960 /// Logical right-shift function. 961 void APInt::lshrSlowCase(unsigned ShiftAmt) { 962 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 963 } 964 965 /// Left-shift this APInt by shiftAmt. 966 /// Left-shift function. 967 APInt &APInt::operator<<=(const APInt &shiftAmt) { 968 // It's undefined behavior in C to shift by BitWidth or greater. 969 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 970 return *this; 971 } 972 973 void APInt::shlSlowCase(unsigned ShiftAmt) { 974 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 975 clearUnusedBits(); 976 } 977 978 // Calculate the rotate amount modulo the bit width. 979 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 980 unsigned rotBitWidth = rotateAmt.getBitWidth(); 981 APInt rot = rotateAmt; 982 if (rotBitWidth < BitWidth) { 983 // Extend the rotate APInt, so that the urem doesn't divide by 0. 984 // e.g. APInt(1, 32) would give APInt(1, 0). 985 rot = rotateAmt.zext(BitWidth); 986 } 987 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 988 return rot.getLimitedValue(BitWidth); 989 } 990 991 APInt APInt::rotl(const APInt &rotateAmt) const { 992 return rotl(rotateModulo(BitWidth, rotateAmt)); 993 } 994 995 APInt APInt::rotl(unsigned rotateAmt) const { 996 rotateAmt %= BitWidth; 997 if (rotateAmt == 0) 998 return *this; 999 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1000 } 1001 1002 APInt APInt::rotr(const APInt &rotateAmt) const { 1003 return rotr(rotateModulo(BitWidth, rotateAmt)); 1004 } 1005 1006 APInt APInt::rotr(unsigned rotateAmt) const { 1007 rotateAmt %= BitWidth; 1008 if (rotateAmt == 0) 1009 return *this; 1010 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1011 } 1012 1013 // Square Root - this method computes and returns the square root of "this". 1014 // Three mechanisms are used for computation. For small values (<= 5 bits), 1015 // a table lookup is done. This gets some performance for common cases. For 1016 // values using less than 52 bits, the value is converted to double and then 1017 // the libc sqrt function is called. The result is rounded and then converted 1018 // back to a uint64_t which is then used to construct the result. Finally, 1019 // the Babylonian method for computing square roots is used. 1020 APInt APInt::sqrt() const { 1021 1022 // Determine the magnitude of the value. 1023 unsigned magnitude = getActiveBits(); 1024 1025 // Use a fast table for some small values. This also gets rid of some 1026 // rounding errors in libc sqrt for small values. 1027 if (magnitude <= 5) { 1028 static const uint8_t results[32] = { 1029 /* 0 */ 0, 1030 /* 1- 2 */ 1, 1, 1031 /* 3- 6 */ 2, 2, 2, 2, 1032 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1033 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1034 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1035 /* 31 */ 6 1036 }; 1037 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1038 } 1039 1040 // If the magnitude of the value fits in less than 52 bits (the precision of 1041 // an IEEE double precision floating point value), then we can use the 1042 // libc sqrt function which will probably use a hardware sqrt computation. 1043 // This should be faster than the algorithm below. 1044 if (magnitude < 52) { 1045 return APInt(BitWidth, 1046 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1047 : U.pVal[0]))))); 1048 } 1049 1050 // Okay, all the short cuts are exhausted. We must compute it. The following 1051 // is a classical Babylonian method for computing the square root. This code 1052 // was adapted to APInt from a wikipedia article on such computations. 1053 // See http://www.wikipedia.org/ and go to the page named 1054 // Calculate_an_integer_square_root. 1055 unsigned nbits = BitWidth, i = 4; 1056 APInt testy(BitWidth, 16); 1057 APInt x_old(BitWidth, 1); 1058 APInt x_new(BitWidth, 0); 1059 APInt two(BitWidth, 2); 1060 1061 // Select a good starting value using binary logarithms. 1062 for (;; i += 2, testy = testy.shl(2)) 1063 if (i >= nbits || this->ule(testy)) { 1064 x_old = x_old.shl(i / 2); 1065 break; 1066 } 1067 1068 // Use the Babylonian method to arrive at the integer square root: 1069 for (;;) { 1070 x_new = (this->udiv(x_old) + x_old).udiv(two); 1071 if (x_old.ule(x_new)) 1072 break; 1073 x_old = x_new; 1074 } 1075 1076 // Make sure we return the closest approximation 1077 // NOTE: The rounding calculation below is correct. It will produce an 1078 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1079 // determined to be a rounding issue with pari/gp as it begins to use a 1080 // floating point representation after 192 bits. There are no discrepancies 1081 // between this algorithm and pari/gp for bit widths < 192 bits. 1082 APInt square(x_old * x_old); 1083 APInt nextSquare((x_old + 1) * (x_old +1)); 1084 if (this->ult(square)) 1085 return x_old; 1086 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1087 APInt midpoint((nextSquare - square).udiv(two)); 1088 APInt offset(*this - square); 1089 if (offset.ult(midpoint)) 1090 return x_old; 1091 return x_old + 1; 1092 } 1093 1094 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1095 /// iterative extended Euclidean algorithm is used to solve for this value, 1096 /// however we simplify it to speed up calculating only the inverse, and take 1097 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1098 /// (potentially large) APInts around. 1099 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1100 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1101 1102 // Using the properties listed at the following web page (accessed 06/21/08): 1103 // http://www.numbertheory.org/php/euclid.html 1104 // (especially the properties numbered 3, 4 and 9) it can be proved that 1105 // BitWidth bits suffice for all the computations in the algorithm implemented 1106 // below. More precisely, this number of bits suffice if the multiplicative 1107 // inverse exists, but may not suffice for the general extended Euclidean 1108 // algorithm. 1109 1110 APInt r[2] = { modulo, *this }; 1111 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1112 APInt q(BitWidth, 0); 1113 1114 unsigned i; 1115 for (i = 0; r[i^1] != 0; i ^= 1) { 1116 // An overview of the math without the confusing bit-flipping: 1117 // q = r[i-2] / r[i-1] 1118 // r[i] = r[i-2] % r[i-1] 1119 // t[i] = t[i-2] - t[i-1] * q 1120 udivrem(r[i], r[i^1], q, r[i]); 1121 t[i] -= t[i^1] * q; 1122 } 1123 1124 // If this APInt and the modulo are not coprime, there is no multiplicative 1125 // inverse, so return 0. We check this by looking at the next-to-last 1126 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1127 // algorithm. 1128 if (r[i] != 1) 1129 return APInt(BitWidth, 0); 1130 1131 // The next-to-last t is the multiplicative inverse. However, we are 1132 // interested in a positive inverse. Calculate a positive one from a negative 1133 // one if necessary. A simple addition of the modulo suffices because 1134 // abs(t[i]) is known to be less than *this/2 (see the link above). 1135 if (t[i].isNegative()) 1136 t[i] += modulo; 1137 1138 return std::move(t[i]); 1139 } 1140 1141 /// Calculate the magic numbers required to implement a signed integer division 1142 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1143 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1144 /// Warren, Jr., chapter 10. 1145 APInt::ms APInt::magic() const { 1146 const APInt& d = *this; 1147 unsigned p; 1148 APInt ad, anc, delta, q1, r1, q2, r2, t; 1149 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1150 struct ms mag; 1151 1152 ad = d.abs(); 1153 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1154 anc = t - 1 - t.urem(ad); // absolute value of nc 1155 p = d.getBitWidth() - 1; // initialize p 1156 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1157 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1158 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1159 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1160 do { 1161 p = p + 1; 1162 q1 = q1<<1; // update q1 = 2p/abs(nc) 1163 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1164 if (r1.uge(anc)) { // must be unsigned comparison 1165 q1 = q1 + 1; 1166 r1 = r1 - anc; 1167 } 1168 q2 = q2<<1; // update q2 = 2p/abs(d) 1169 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1170 if (r2.uge(ad)) { // must be unsigned comparison 1171 q2 = q2 + 1; 1172 r2 = r2 - ad; 1173 } 1174 delta = ad - r2; 1175 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1176 1177 mag.m = q2 + 1; 1178 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1179 mag.s = p - d.getBitWidth(); // resulting shift 1180 return mag; 1181 } 1182 1183 /// Calculate the magic numbers required to implement an unsigned integer 1184 /// division by a constant as a sequence of multiplies, adds and shifts. 1185 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1186 /// S. Warren, Jr., chapter 10. 1187 /// LeadingZeros can be used to simplify the calculation if the upper bits 1188 /// of the divided value are known zero. 1189 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1190 const APInt& d = *this; 1191 unsigned p; 1192 APInt nc, delta, q1, r1, q2, r2; 1193 struct mu magu; 1194 magu.a = 0; // initialize "add" indicator 1195 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1196 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1197 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1198 1199 nc = allOnes - (allOnes - d).urem(d); 1200 p = d.getBitWidth() - 1; // initialize p 1201 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1202 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1203 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1204 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1205 do { 1206 p = p + 1; 1207 if (r1.uge(nc - r1)) { 1208 q1 = q1 + q1 + 1; // update q1 1209 r1 = r1 + r1 - nc; // update r1 1210 } 1211 else { 1212 q1 = q1+q1; // update q1 1213 r1 = r1+r1; // update r1 1214 } 1215 if ((r2 + 1).uge(d - r2)) { 1216 if (q2.uge(signedMax)) magu.a = 1; 1217 q2 = q2+q2 + 1; // update q2 1218 r2 = r2+r2 + 1 - d; // update r2 1219 } 1220 else { 1221 if (q2.uge(signedMin)) magu.a = 1; 1222 q2 = q2+q2; // update q2 1223 r2 = r2+r2 + 1; // update r2 1224 } 1225 delta = d - 1 - r2; 1226 } while (p < d.getBitWidth()*2 && 1227 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1228 magu.m = q2 + 1; // resulting magic number 1229 magu.s = p - d.getBitWidth(); // resulting shift 1230 return magu; 1231 } 1232 1233 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1234 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1235 /// variables here have the same names as in the algorithm. Comments explain 1236 /// the algorithm and any deviation from it. 1237 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1238 unsigned m, unsigned n) { 1239 assert(u && "Must provide dividend"); 1240 assert(v && "Must provide divisor"); 1241 assert(q && "Must provide quotient"); 1242 assert(u != v && u != q && v != q && "Must use different memory"); 1243 assert(n>1 && "n must be > 1"); 1244 1245 // b denotes the base of the number system. In our case b is 2^32. 1246 const uint64_t b = uint64_t(1) << 32; 1247 1248 // The DEBUG macros here tend to be spam in the debug output if you're not 1249 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1250 #ifdef KNUTH_DEBUG 1251 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1252 #else 1253 #define DEBUG_KNUTH(X) do {} while(false) 1254 #endif 1255 1256 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1257 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1258 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1259 DEBUG_KNUTH(dbgs() << " by"); 1260 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1261 DEBUG_KNUTH(dbgs() << '\n'); 1262 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1263 // u and v by d. Note that we have taken Knuth's advice here to use a power 1264 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1265 // 2 allows us to shift instead of multiply and it is easy to determine the 1266 // shift amount from the leading zeros. We are basically normalizing the u 1267 // and v so that its high bits are shifted to the top of v's range without 1268 // overflow. Note that this can require an extra word in u so that u must 1269 // be of length m+n+1. 1270 unsigned shift = countLeadingZeros(v[n-1]); 1271 uint32_t v_carry = 0; 1272 uint32_t u_carry = 0; 1273 if (shift) { 1274 for (unsigned i = 0; i < m+n; ++i) { 1275 uint32_t u_tmp = u[i] >> (32 - shift); 1276 u[i] = (u[i] << shift) | u_carry; 1277 u_carry = u_tmp; 1278 } 1279 for (unsigned i = 0; i < n; ++i) { 1280 uint32_t v_tmp = v[i] >> (32 - shift); 1281 v[i] = (v[i] << shift) | v_carry; 1282 v_carry = v_tmp; 1283 } 1284 } 1285 u[m+n] = u_carry; 1286 1287 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1288 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1289 DEBUG_KNUTH(dbgs() << " by"); 1290 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1291 DEBUG_KNUTH(dbgs() << '\n'); 1292 1293 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1294 int j = m; 1295 do { 1296 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1297 // D3. [Calculate q'.]. 1298 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1299 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1300 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1301 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1302 // on v[n-2] determines at high speed most of the cases in which the trial 1303 // value qp is one too large, and it eliminates all cases where qp is two 1304 // too large. 1305 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1306 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1307 uint64_t qp = dividend / v[n-1]; 1308 uint64_t rp = dividend % v[n-1]; 1309 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1310 qp--; 1311 rp += v[n-1]; 1312 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1313 qp--; 1314 } 1315 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1316 1317 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1318 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1319 // consists of a simple multiplication by a one-place number, combined with 1320 // a subtraction. 1321 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1322 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1323 // true value plus b**(n+1), namely as the b's complement of 1324 // the true value, and a "borrow" to the left should be remembered. 1325 int64_t borrow = 0; 1326 for (unsigned i = 0; i < n; ++i) { 1327 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1328 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1329 u[j+i] = Lo_32(subres); 1330 borrow = Hi_32(p) - Hi_32(subres); 1331 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1332 << ", borrow = " << borrow << '\n'); 1333 } 1334 bool isNeg = u[j+n] < borrow; 1335 u[j+n] -= Lo_32(borrow); 1336 1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1338 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1339 DEBUG_KNUTH(dbgs() << '\n'); 1340 1341 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1342 // negative, go to step D6; otherwise go on to step D7. 1343 q[j] = Lo_32(qp); 1344 if (isNeg) { 1345 // D6. [Add back]. The probability that this step is necessary is very 1346 // small, on the order of only 2/b. Make sure that test data accounts for 1347 // this possibility. Decrease q[j] by 1 1348 q[j]--; 1349 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1350 // A carry will occur to the left of u[j+n], and it should be ignored 1351 // since it cancels with the borrow that occurred in D4. 1352 bool carry = false; 1353 for (unsigned i = 0; i < n; i++) { 1354 uint32_t limit = std::min(u[j+i],v[i]); 1355 u[j+i] += v[i] + carry; 1356 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1357 } 1358 u[j+n] += carry; 1359 } 1360 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1361 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1362 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1363 1364 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1365 } while (--j >= 0); 1366 1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1368 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1369 DEBUG_KNUTH(dbgs() << '\n'); 1370 1371 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1372 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1373 // compute the remainder (urem uses this). 1374 if (r) { 1375 // The value d is expressed by the "shift" value above since we avoided 1376 // multiplication by d by using a shift left. So, all we have to do is 1377 // shift right here. 1378 if (shift) { 1379 uint32_t carry = 0; 1380 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1381 for (int i = n-1; i >= 0; i--) { 1382 r[i] = (u[i] >> shift) | carry; 1383 carry = u[i] << (32 - shift); 1384 DEBUG_KNUTH(dbgs() << " " << r[i]); 1385 } 1386 } else { 1387 for (int i = n-1; i >= 0; i--) { 1388 r[i] = u[i]; 1389 DEBUG_KNUTH(dbgs() << " " << r[i]); 1390 } 1391 } 1392 DEBUG_KNUTH(dbgs() << '\n'); 1393 } 1394 DEBUG_KNUTH(dbgs() << '\n'); 1395 } 1396 1397 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1398 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1399 assert(lhsWords >= rhsWords && "Fractional result"); 1400 1401 // First, compose the values into an array of 32-bit words instead of 1402 // 64-bit words. This is a necessity of both the "short division" algorithm 1403 // and the Knuth "classical algorithm" which requires there to be native 1404 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1405 // can't use 64-bit operands here because we don't have native results of 1406 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1407 // work on large-endian machines. 1408 unsigned n = rhsWords * 2; 1409 unsigned m = (lhsWords * 2) - n; 1410 1411 // Allocate space for the temporary values we need either on the stack, if 1412 // it will fit, or on the heap if it won't. 1413 uint32_t SPACE[128]; 1414 uint32_t *U = nullptr; 1415 uint32_t *V = nullptr; 1416 uint32_t *Q = nullptr; 1417 uint32_t *R = nullptr; 1418 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1419 U = &SPACE[0]; 1420 V = &SPACE[m+n+1]; 1421 Q = &SPACE[(m+n+1) + n]; 1422 if (Remainder) 1423 R = &SPACE[(m+n+1) + n + (m+n)]; 1424 } else { 1425 U = new uint32_t[m + n + 1]; 1426 V = new uint32_t[n]; 1427 Q = new uint32_t[m+n]; 1428 if (Remainder) 1429 R = new uint32_t[n]; 1430 } 1431 1432 // Initialize the dividend 1433 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1434 for (unsigned i = 0; i < lhsWords; ++i) { 1435 uint64_t tmp = LHS[i]; 1436 U[i * 2] = Lo_32(tmp); 1437 U[i * 2 + 1] = Hi_32(tmp); 1438 } 1439 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1440 1441 // Initialize the divisor 1442 memset(V, 0, (n)*sizeof(uint32_t)); 1443 for (unsigned i = 0; i < rhsWords; ++i) { 1444 uint64_t tmp = RHS[i]; 1445 V[i * 2] = Lo_32(tmp); 1446 V[i * 2 + 1] = Hi_32(tmp); 1447 } 1448 1449 // initialize the quotient and remainder 1450 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1451 if (Remainder) 1452 memset(R, 0, n * sizeof(uint32_t)); 1453 1454 // Now, adjust m and n for the Knuth division. n is the number of words in 1455 // the divisor. m is the number of words by which the dividend exceeds the 1456 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1457 // contain any zero words or the Knuth algorithm fails. 1458 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1459 n--; 1460 m++; 1461 } 1462 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1463 m--; 1464 1465 // If we're left with only a single word for the divisor, Knuth doesn't work 1466 // so we implement the short division algorithm here. This is much simpler 1467 // and faster because we are certain that we can divide a 64-bit quantity 1468 // by a 32-bit quantity at hardware speed and short division is simply a 1469 // series of such operations. This is just like doing short division but we 1470 // are using base 2^32 instead of base 10. 1471 assert(n != 0 && "Divide by zero?"); 1472 if (n == 1) { 1473 uint32_t divisor = V[0]; 1474 uint32_t remainder = 0; 1475 for (int i = m; i >= 0; i--) { 1476 uint64_t partial_dividend = Make_64(remainder, U[i]); 1477 if (partial_dividend == 0) { 1478 Q[i] = 0; 1479 remainder = 0; 1480 } else if (partial_dividend < divisor) { 1481 Q[i] = 0; 1482 remainder = Lo_32(partial_dividend); 1483 } else if (partial_dividend == divisor) { 1484 Q[i] = 1; 1485 remainder = 0; 1486 } else { 1487 Q[i] = Lo_32(partial_dividend / divisor); 1488 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1489 } 1490 } 1491 if (R) 1492 R[0] = remainder; 1493 } else { 1494 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1495 // case n > 1. 1496 KnuthDiv(U, V, Q, R, m, n); 1497 } 1498 1499 // If the caller wants the quotient 1500 if (Quotient) { 1501 for (unsigned i = 0; i < lhsWords; ++i) 1502 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1503 } 1504 1505 // If the caller wants the remainder 1506 if (Remainder) { 1507 for (unsigned i = 0; i < rhsWords; ++i) 1508 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1509 } 1510 1511 // Clean up the memory we allocated. 1512 if (U != &SPACE[0]) { 1513 delete [] U; 1514 delete [] V; 1515 delete [] Q; 1516 delete [] R; 1517 } 1518 } 1519 1520 APInt APInt::udiv(const APInt &RHS) const { 1521 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1522 1523 // First, deal with the easy case 1524 if (isSingleWord()) { 1525 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1526 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1527 } 1528 1529 // Get some facts about the LHS and RHS number of bits and words 1530 unsigned lhsWords = getNumWords(getActiveBits()); 1531 unsigned rhsBits = RHS.getActiveBits(); 1532 unsigned rhsWords = getNumWords(rhsBits); 1533 assert(rhsWords && "Divided by zero???"); 1534 1535 // Deal with some degenerate cases 1536 if (!lhsWords) 1537 // 0 / X ===> 0 1538 return APInt(BitWidth, 0); 1539 if (rhsBits == 1) 1540 // X / 1 ===> X 1541 return *this; 1542 if (lhsWords < rhsWords || this->ult(RHS)) 1543 // X / Y ===> 0, iff X < Y 1544 return APInt(BitWidth, 0); 1545 if (*this == RHS) 1546 // X / X ===> 1 1547 return APInt(BitWidth, 1); 1548 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1549 // All high words are zero, just use native divide 1550 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1551 1552 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1553 APInt Quotient(BitWidth, 0); // to hold result. 1554 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1555 return Quotient; 1556 } 1557 1558 APInt APInt::udiv(uint64_t RHS) const { 1559 assert(RHS != 0 && "Divide by zero?"); 1560 1561 // First, deal with the easy case 1562 if (isSingleWord()) 1563 return APInt(BitWidth, U.VAL / RHS); 1564 1565 // Get some facts about the LHS words. 1566 unsigned lhsWords = getNumWords(getActiveBits()); 1567 1568 // Deal with some degenerate cases 1569 if (!lhsWords) 1570 // 0 / X ===> 0 1571 return APInt(BitWidth, 0); 1572 if (RHS == 1) 1573 // X / 1 ===> X 1574 return *this; 1575 if (this->ult(RHS)) 1576 // X / Y ===> 0, iff X < Y 1577 return APInt(BitWidth, 0); 1578 if (*this == RHS) 1579 // X / X ===> 1 1580 return APInt(BitWidth, 1); 1581 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1582 // All high words are zero, just use native divide 1583 return APInt(BitWidth, this->U.pVal[0] / RHS); 1584 1585 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1586 APInt Quotient(BitWidth, 0); // to hold result. 1587 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1588 return Quotient; 1589 } 1590 1591 APInt APInt::sdiv(const APInt &RHS) const { 1592 if (isNegative()) { 1593 if (RHS.isNegative()) 1594 return (-(*this)).udiv(-RHS); 1595 return -((-(*this)).udiv(RHS)); 1596 } 1597 if (RHS.isNegative()) 1598 return -(this->udiv(-RHS)); 1599 return this->udiv(RHS); 1600 } 1601 1602 APInt APInt::sdiv(int64_t RHS) const { 1603 if (isNegative()) { 1604 if (RHS < 0) 1605 return (-(*this)).udiv(-RHS); 1606 return -((-(*this)).udiv(RHS)); 1607 } 1608 if (RHS < 0) 1609 return -(this->udiv(-RHS)); 1610 return this->udiv(RHS); 1611 } 1612 1613 APInt APInt::urem(const APInt &RHS) const { 1614 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1615 if (isSingleWord()) { 1616 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1617 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1618 } 1619 1620 // Get some facts about the LHS 1621 unsigned lhsWords = getNumWords(getActiveBits()); 1622 1623 // Get some facts about the RHS 1624 unsigned rhsBits = RHS.getActiveBits(); 1625 unsigned rhsWords = getNumWords(rhsBits); 1626 assert(rhsWords && "Performing remainder operation by zero ???"); 1627 1628 // Check the degenerate cases 1629 if (lhsWords == 0) 1630 // 0 % Y ===> 0 1631 return APInt(BitWidth, 0); 1632 if (rhsBits == 1) 1633 // X % 1 ===> 0 1634 return APInt(BitWidth, 0); 1635 if (lhsWords < rhsWords || this->ult(RHS)) 1636 // X % Y ===> X, iff X < Y 1637 return *this; 1638 if (*this == RHS) 1639 // X % X == 0; 1640 return APInt(BitWidth, 0); 1641 if (lhsWords == 1) 1642 // All high words are zero, just use native remainder 1643 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1644 1645 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1646 APInt Remainder(BitWidth, 0); 1647 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1648 return Remainder; 1649 } 1650 1651 uint64_t APInt::urem(uint64_t RHS) const { 1652 assert(RHS != 0 && "Remainder by zero?"); 1653 1654 if (isSingleWord()) 1655 return U.VAL % RHS; 1656 1657 // Get some facts about the LHS 1658 unsigned lhsWords = getNumWords(getActiveBits()); 1659 1660 // Check the degenerate cases 1661 if (lhsWords == 0) 1662 // 0 % Y ===> 0 1663 return 0; 1664 if (RHS == 1) 1665 // X % 1 ===> 0 1666 return 0; 1667 if (this->ult(RHS)) 1668 // X % Y ===> X, iff X < Y 1669 return getZExtValue(); 1670 if (*this == RHS) 1671 // X % X == 0; 1672 return 0; 1673 if (lhsWords == 1) 1674 // All high words are zero, just use native remainder 1675 return U.pVal[0] % RHS; 1676 1677 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1678 uint64_t Remainder; 1679 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1680 return Remainder; 1681 } 1682 1683 APInt APInt::srem(const APInt &RHS) const { 1684 if (isNegative()) { 1685 if (RHS.isNegative()) 1686 return -((-(*this)).urem(-RHS)); 1687 return -((-(*this)).urem(RHS)); 1688 } 1689 if (RHS.isNegative()) 1690 return this->urem(-RHS); 1691 return this->urem(RHS); 1692 } 1693 1694 int64_t APInt::srem(int64_t RHS) const { 1695 if (isNegative()) { 1696 if (RHS < 0) 1697 return -((-(*this)).urem(-RHS)); 1698 return -((-(*this)).urem(RHS)); 1699 } 1700 if (RHS < 0) 1701 return this->urem(-RHS); 1702 return this->urem(RHS); 1703 } 1704 1705 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1706 APInt &Quotient, APInt &Remainder) { 1707 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1708 unsigned BitWidth = LHS.BitWidth; 1709 1710 // First, deal with the easy case 1711 if (LHS.isSingleWord()) { 1712 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1713 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1714 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1715 Quotient = APInt(BitWidth, QuotVal); 1716 Remainder = APInt(BitWidth, RemVal); 1717 return; 1718 } 1719 1720 // Get some size facts about the dividend and divisor 1721 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1722 unsigned rhsBits = RHS.getActiveBits(); 1723 unsigned rhsWords = getNumWords(rhsBits); 1724 assert(rhsWords && "Performing divrem operation by zero ???"); 1725 1726 // Check the degenerate cases 1727 if (lhsWords == 0) { 1728 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1729 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1730 return; 1731 } 1732 1733 if (rhsBits == 1) { 1734 Quotient = LHS; // X / 1 ===> X 1735 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1736 } 1737 1738 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1739 Remainder = LHS; // X % Y ===> X, iff X < Y 1740 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1741 return; 1742 } 1743 1744 if (LHS == RHS) { 1745 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1746 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1747 return; 1748 } 1749 1750 // Make sure there is enough space to hold the results. 1751 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1752 // change the size. This is necessary if Quotient or Remainder is aliased 1753 // with LHS or RHS. 1754 Quotient.reallocate(BitWidth); 1755 Remainder.reallocate(BitWidth); 1756 1757 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1758 // There is only one word to consider so use the native versions. 1759 uint64_t lhsValue = LHS.U.pVal[0]; 1760 uint64_t rhsValue = RHS.U.pVal[0]; 1761 Quotient = lhsValue / rhsValue; 1762 Remainder = lhsValue % rhsValue; 1763 return; 1764 } 1765 1766 // Okay, lets do it the long way 1767 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1768 Remainder.U.pVal); 1769 // Clear the rest of the Quotient and Remainder. 1770 std::memset(Quotient.U.pVal + lhsWords, 0, 1771 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1772 std::memset(Remainder.U.pVal + rhsWords, 0, 1773 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1774 } 1775 1776 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1777 uint64_t &Remainder) { 1778 assert(RHS != 0 && "Divide by zero?"); 1779 unsigned BitWidth = LHS.BitWidth; 1780 1781 // First, deal with the easy case 1782 if (LHS.isSingleWord()) { 1783 uint64_t QuotVal = LHS.U.VAL / RHS; 1784 Remainder = LHS.U.VAL % RHS; 1785 Quotient = APInt(BitWidth, QuotVal); 1786 return; 1787 } 1788 1789 // Get some size facts about the dividend and divisor 1790 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1791 1792 // Check the degenerate cases 1793 if (lhsWords == 0) { 1794 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1795 Remainder = 0; // 0 % Y ===> 0 1796 return; 1797 } 1798 1799 if (RHS == 1) { 1800 Quotient = LHS; // X / 1 ===> X 1801 Remainder = 0; // X % 1 ===> 0 1802 return; 1803 } 1804 1805 if (LHS.ult(RHS)) { 1806 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1807 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1808 return; 1809 } 1810 1811 if (LHS == RHS) { 1812 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1813 Remainder = 0; // X % X ===> 0; 1814 return; 1815 } 1816 1817 // Make sure there is enough space to hold the results. 1818 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1819 // change the size. This is necessary if Quotient is aliased with LHS. 1820 Quotient.reallocate(BitWidth); 1821 1822 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1823 // There is only one word to consider so use the native versions. 1824 uint64_t lhsValue = LHS.U.pVal[0]; 1825 Quotient = lhsValue / RHS; 1826 Remainder = lhsValue % RHS; 1827 return; 1828 } 1829 1830 // Okay, lets do it the long way 1831 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1832 // Clear the rest of the Quotient. 1833 std::memset(Quotient.U.pVal + lhsWords, 0, 1834 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1835 } 1836 1837 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1838 APInt &Quotient, APInt &Remainder) { 1839 if (LHS.isNegative()) { 1840 if (RHS.isNegative()) 1841 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1842 else { 1843 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1844 Quotient.negate(); 1845 } 1846 Remainder.negate(); 1847 } else if (RHS.isNegative()) { 1848 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1849 Quotient.negate(); 1850 } else { 1851 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1852 } 1853 } 1854 1855 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1856 APInt &Quotient, int64_t &Remainder) { 1857 uint64_t R = Remainder; 1858 if (LHS.isNegative()) { 1859 if (RHS < 0) 1860 APInt::udivrem(-LHS, -RHS, Quotient, R); 1861 else { 1862 APInt::udivrem(-LHS, RHS, Quotient, R); 1863 Quotient.negate(); 1864 } 1865 R = -R; 1866 } else if (RHS < 0) { 1867 APInt::udivrem(LHS, -RHS, Quotient, R); 1868 Quotient.negate(); 1869 } else { 1870 APInt::udivrem(LHS, RHS, Quotient, R); 1871 } 1872 Remainder = R; 1873 } 1874 1875 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1876 APInt Res = *this+RHS; 1877 Overflow = isNonNegative() == RHS.isNonNegative() && 1878 Res.isNonNegative() != isNonNegative(); 1879 return Res; 1880 } 1881 1882 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1883 APInt Res = *this+RHS; 1884 Overflow = Res.ult(RHS); 1885 return Res; 1886 } 1887 1888 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1889 APInt Res = *this - RHS; 1890 Overflow = isNonNegative() != RHS.isNonNegative() && 1891 Res.isNonNegative() != isNonNegative(); 1892 return Res; 1893 } 1894 1895 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1896 APInt Res = *this-RHS; 1897 Overflow = Res.ugt(*this); 1898 return Res; 1899 } 1900 1901 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1902 // MININT/-1 --> overflow. 1903 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1904 return sdiv(RHS); 1905 } 1906 1907 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1908 APInt Res = *this * RHS; 1909 1910 if (*this != 0 && RHS != 0) 1911 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1912 else 1913 Overflow = false; 1914 return Res; 1915 } 1916 1917 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1918 APInt Res = *this * RHS; 1919 1920 if (*this != 0 && RHS != 0) 1921 Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; 1922 else 1923 Overflow = false; 1924 return Res; 1925 } 1926 1927 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 1928 Overflow = ShAmt.uge(getBitWidth()); 1929 if (Overflow) 1930 return APInt(BitWidth, 0); 1931 1932 if (isNonNegative()) // Don't allow sign change. 1933 Overflow = ShAmt.uge(countLeadingZeros()); 1934 else 1935 Overflow = ShAmt.uge(countLeadingOnes()); 1936 1937 return *this << ShAmt; 1938 } 1939 1940 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 1941 Overflow = ShAmt.uge(getBitWidth()); 1942 if (Overflow) 1943 return APInt(BitWidth, 0); 1944 1945 Overflow = ShAmt.ugt(countLeadingZeros()); 1946 1947 return *this << ShAmt; 1948 } 1949 1950 1951 1952 1953 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 1954 // Check our assumptions here 1955 assert(!str.empty() && "Invalid string length"); 1956 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 1957 radix == 36) && 1958 "Radix should be 2, 8, 10, 16, or 36!"); 1959 1960 StringRef::iterator p = str.begin(); 1961 size_t slen = str.size(); 1962 bool isNeg = *p == '-'; 1963 if (*p == '-' || *p == '+') { 1964 p++; 1965 slen--; 1966 assert(slen && "String is only a sign, needs a value."); 1967 } 1968 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 1969 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 1970 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 1971 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 1972 "Insufficient bit width"); 1973 1974 // Allocate memory if needed 1975 if (isSingleWord()) 1976 U.VAL = 0; 1977 else 1978 U.pVal = getClearedMemory(getNumWords()); 1979 1980 // Figure out if we can shift instead of multiply 1981 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 1982 1983 // Enter digit traversal loop 1984 for (StringRef::iterator e = str.end(); p != e; ++p) { 1985 unsigned digit = getDigit(*p, radix); 1986 assert(digit < radix && "Invalid character in digit string"); 1987 1988 // Shift or multiply the value by the radix 1989 if (slen > 1) { 1990 if (shift) 1991 *this <<= shift; 1992 else 1993 *this *= radix; 1994 } 1995 1996 // Add in the digit we just interpreted 1997 *this += digit; 1998 } 1999 // If its negative, put it in two's complement form 2000 if (isNeg) 2001 this->negate(); 2002 } 2003 2004 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2005 bool Signed, bool formatAsCLiteral) const { 2006 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2007 Radix == 36) && 2008 "Radix should be 2, 8, 10, 16, or 36!"); 2009 2010 const char *Prefix = ""; 2011 if (formatAsCLiteral) { 2012 switch (Radix) { 2013 case 2: 2014 // Binary literals are a non-standard extension added in gcc 4.3: 2015 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2016 Prefix = "0b"; 2017 break; 2018 case 8: 2019 Prefix = "0"; 2020 break; 2021 case 10: 2022 break; // No prefix 2023 case 16: 2024 Prefix = "0x"; 2025 break; 2026 default: 2027 llvm_unreachable("Invalid radix!"); 2028 } 2029 } 2030 2031 // First, check for a zero value and just short circuit the logic below. 2032 if (*this == 0) { 2033 while (*Prefix) { 2034 Str.push_back(*Prefix); 2035 ++Prefix; 2036 }; 2037 Str.push_back('0'); 2038 return; 2039 } 2040 2041 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2042 2043 if (isSingleWord()) { 2044 char Buffer[65]; 2045 char *BufPtr = std::end(Buffer); 2046 2047 uint64_t N; 2048 if (!Signed) { 2049 N = getZExtValue(); 2050 } else { 2051 int64_t I = getSExtValue(); 2052 if (I >= 0) { 2053 N = I; 2054 } else { 2055 Str.push_back('-'); 2056 N = -(uint64_t)I; 2057 } 2058 } 2059 2060 while (*Prefix) { 2061 Str.push_back(*Prefix); 2062 ++Prefix; 2063 }; 2064 2065 while (N) { 2066 *--BufPtr = Digits[N % Radix]; 2067 N /= Radix; 2068 } 2069 Str.append(BufPtr, std::end(Buffer)); 2070 return; 2071 } 2072 2073 APInt Tmp(*this); 2074 2075 if (Signed && isNegative()) { 2076 // They want to print the signed version and it is a negative value 2077 // Flip the bits and add one to turn it into the equivalent positive 2078 // value and put a '-' in the result. 2079 Tmp.negate(); 2080 Str.push_back('-'); 2081 } 2082 2083 while (*Prefix) { 2084 Str.push_back(*Prefix); 2085 ++Prefix; 2086 }; 2087 2088 // We insert the digits backward, then reverse them to get the right order. 2089 unsigned StartDig = Str.size(); 2090 2091 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2092 // because the number of bits per digit (1, 3 and 4 respectively) divides 2093 // equally. We just shift until the value is zero. 2094 if (Radix == 2 || Radix == 8 || Radix == 16) { 2095 // Just shift tmp right for each digit width until it becomes zero 2096 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2097 unsigned MaskAmt = Radix - 1; 2098 2099 while (Tmp.getBoolValue()) { 2100 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2101 Str.push_back(Digits[Digit]); 2102 Tmp.lshrInPlace(ShiftAmt); 2103 } 2104 } else { 2105 while (Tmp.getBoolValue()) { 2106 uint64_t Digit; 2107 udivrem(Tmp, Radix, Tmp, Digit); 2108 assert(Digit < Radix && "divide failed"); 2109 Str.push_back(Digits[Digit]); 2110 } 2111 } 2112 2113 // Reverse the digits before returning. 2114 std::reverse(Str.begin()+StartDig, Str.end()); 2115 } 2116 2117 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2118 /// It is better to pass in a SmallVector/SmallString to the methods above. 2119 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2120 SmallString<40> S; 2121 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2122 return S.str(); 2123 } 2124 2125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2126 LLVM_DUMP_METHOD void APInt::dump() const { 2127 SmallString<40> S, U; 2128 this->toStringUnsigned(U); 2129 this->toStringSigned(S); 2130 dbgs() << "APInt(" << BitWidth << "b, " 2131 << U << "u " << S << "s)\n"; 2132 } 2133 #endif 2134 2135 void APInt::print(raw_ostream &OS, bool isSigned) const { 2136 SmallString<40> S; 2137 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2138 OS << S; 2139 } 2140 2141 // This implements a variety of operations on a representation of 2142 // arbitrary precision, two's-complement, bignum integer values. 2143 2144 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2145 // and unrestricting assumption. 2146 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2147 "Part width must be divisible by 2!"); 2148 2149 /* Some handy functions local to this file. */ 2150 2151 /* Returns the integer part with the least significant BITS set. 2152 BITS cannot be zero. */ 2153 static inline APInt::WordType lowBitMask(unsigned bits) { 2154 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2155 2156 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2157 } 2158 2159 /* Returns the value of the lower half of PART. */ 2160 static inline APInt::WordType lowHalf(APInt::WordType part) { 2161 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2162 } 2163 2164 /* Returns the value of the upper half of PART. */ 2165 static inline APInt::WordType highHalf(APInt::WordType part) { 2166 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2167 } 2168 2169 /* Returns the bit number of the most significant set bit of a part. 2170 If the input number has no bits set -1U is returned. */ 2171 static unsigned partMSB(APInt::WordType value) { 2172 return findLastSet(value, ZB_Max); 2173 } 2174 2175 /* Returns the bit number of the least significant set bit of a 2176 part. If the input number has no bits set -1U is returned. */ 2177 static unsigned partLSB(APInt::WordType value) { 2178 return findFirstSet(value, ZB_Max); 2179 } 2180 2181 /* Sets the least significant part of a bignum to the input value, and 2182 zeroes out higher parts. */ 2183 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2184 assert(parts > 0); 2185 2186 dst[0] = part; 2187 for (unsigned i = 1; i < parts; i++) 2188 dst[i] = 0; 2189 } 2190 2191 /* Assign one bignum to another. */ 2192 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2193 for (unsigned i = 0; i < parts; i++) 2194 dst[i] = src[i]; 2195 } 2196 2197 /* Returns true if a bignum is zero, false otherwise. */ 2198 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2199 for (unsigned i = 0; i < parts; i++) 2200 if (src[i]) 2201 return false; 2202 2203 return true; 2204 } 2205 2206 /* Extract the given bit of a bignum; returns 0 or 1. */ 2207 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2208 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2209 } 2210 2211 /* Set the given bit of a bignum. */ 2212 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2213 parts[whichWord(bit)] |= maskBit(bit); 2214 } 2215 2216 /* Clears the given bit of a bignum. */ 2217 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2218 parts[whichWord(bit)] &= ~maskBit(bit); 2219 } 2220 2221 /* Returns the bit number of the least significant set bit of a 2222 number. If the input number has no bits set -1U is returned. */ 2223 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2224 for (unsigned i = 0; i < n; i++) { 2225 if (parts[i] != 0) { 2226 unsigned lsb = partLSB(parts[i]); 2227 2228 return lsb + i * APINT_BITS_PER_WORD; 2229 } 2230 } 2231 2232 return -1U; 2233 } 2234 2235 /* Returns the bit number of the most significant set bit of a number. 2236 If the input number has no bits set -1U is returned. */ 2237 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2238 do { 2239 --n; 2240 2241 if (parts[n] != 0) { 2242 unsigned msb = partMSB(parts[n]); 2243 2244 return msb + n * APINT_BITS_PER_WORD; 2245 } 2246 } while (n); 2247 2248 return -1U; 2249 } 2250 2251 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2252 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2253 the least significant bit of DST. All high bits above srcBITS in 2254 DST are zero-filled. */ 2255 void 2256 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2257 unsigned srcBits, unsigned srcLSB) { 2258 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2259 assert(dstParts <= dstCount); 2260 2261 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2262 tcAssign (dst, src + firstSrcPart, dstParts); 2263 2264 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2265 tcShiftRight (dst, dstParts, shift); 2266 2267 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2268 in DST. If this is less that srcBits, append the rest, else 2269 clear the high bits. */ 2270 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2271 if (n < srcBits) { 2272 WordType mask = lowBitMask (srcBits - n); 2273 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2274 << n % APINT_BITS_PER_WORD); 2275 } else if (n > srcBits) { 2276 if (srcBits % APINT_BITS_PER_WORD) 2277 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2278 } 2279 2280 /* Clear high parts. */ 2281 while (dstParts < dstCount) 2282 dst[dstParts++] = 0; 2283 } 2284 2285 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2286 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2287 WordType c, unsigned parts) { 2288 assert(c <= 1); 2289 2290 for (unsigned i = 0; i < parts; i++) { 2291 WordType l = dst[i]; 2292 if (c) { 2293 dst[i] += rhs[i] + 1; 2294 c = (dst[i] <= l); 2295 } else { 2296 dst[i] += rhs[i]; 2297 c = (dst[i] < l); 2298 } 2299 } 2300 2301 return c; 2302 } 2303 2304 /// This function adds a single "word" integer, src, to the multiple 2305 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2306 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2307 /// @returns the carry of the addition. 2308 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2309 unsigned parts) { 2310 for (unsigned i = 0; i < parts; ++i) { 2311 dst[i] += src; 2312 if (dst[i] >= src) 2313 return 0; // No need to carry so exit early. 2314 src = 1; // Carry one to next digit. 2315 } 2316 2317 return 1; 2318 } 2319 2320 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2321 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2322 WordType c, unsigned parts) { 2323 assert(c <= 1); 2324 2325 for (unsigned i = 0; i < parts; i++) { 2326 WordType l = dst[i]; 2327 if (c) { 2328 dst[i] -= rhs[i] + 1; 2329 c = (dst[i] >= l); 2330 } else { 2331 dst[i] -= rhs[i]; 2332 c = (dst[i] > l); 2333 } 2334 } 2335 2336 return c; 2337 } 2338 2339 /// This function subtracts a single "word" (64-bit word), src, from 2340 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2341 /// no further borrowing is needed or it runs out of "words" in dst. The result 2342 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2343 /// exhausted. In other words, if src > dst then this function returns 1, 2344 /// otherwise 0. 2345 /// @returns the borrow out of the subtraction 2346 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2347 unsigned parts) { 2348 for (unsigned i = 0; i < parts; ++i) { 2349 WordType Dst = dst[i]; 2350 dst[i] -= src; 2351 if (src <= Dst) 2352 return 0; // No need to borrow so exit early. 2353 src = 1; // We have to "borrow 1" from next "word" 2354 } 2355 2356 return 1; 2357 } 2358 2359 /* Negate a bignum in-place. */ 2360 void APInt::tcNegate(WordType *dst, unsigned parts) { 2361 tcComplement(dst, parts); 2362 tcIncrement(dst, parts); 2363 } 2364 2365 /* DST += SRC * MULTIPLIER + CARRY if add is true 2366 DST = SRC * MULTIPLIER + CARRY if add is false 2367 2368 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2369 they must start at the same point, i.e. DST == SRC. 2370 2371 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2372 returned. Otherwise DST is filled with the least significant 2373 DSTPARTS parts of the result, and if all of the omitted higher 2374 parts were zero return zero, otherwise overflow occurred and 2375 return one. */ 2376 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2377 WordType multiplier, WordType carry, 2378 unsigned srcParts, unsigned dstParts, 2379 bool add) { 2380 /* Otherwise our writes of DST kill our later reads of SRC. */ 2381 assert(dst <= src || dst >= src + srcParts); 2382 assert(dstParts <= srcParts + 1); 2383 2384 /* N loops; minimum of dstParts and srcParts. */ 2385 unsigned n = std::min(dstParts, srcParts); 2386 2387 for (unsigned i = 0; i < n; i++) { 2388 WordType low, mid, high, srcPart; 2389 2390 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2391 2392 This cannot overflow, because 2393 2394 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2395 2396 which is less than n^2. */ 2397 2398 srcPart = src[i]; 2399 2400 if (multiplier == 0 || srcPart == 0) { 2401 low = carry; 2402 high = 0; 2403 } else { 2404 low = lowHalf(srcPart) * lowHalf(multiplier); 2405 high = highHalf(srcPart) * highHalf(multiplier); 2406 2407 mid = lowHalf(srcPart) * highHalf(multiplier); 2408 high += highHalf(mid); 2409 mid <<= APINT_BITS_PER_WORD / 2; 2410 if (low + mid < low) 2411 high++; 2412 low += mid; 2413 2414 mid = highHalf(srcPart) * lowHalf(multiplier); 2415 high += highHalf(mid); 2416 mid <<= APINT_BITS_PER_WORD / 2; 2417 if (low + mid < low) 2418 high++; 2419 low += mid; 2420 2421 /* Now add carry. */ 2422 if (low + carry < low) 2423 high++; 2424 low += carry; 2425 } 2426 2427 if (add) { 2428 /* And now DST[i], and store the new low part there. */ 2429 if (low + dst[i] < low) 2430 high++; 2431 dst[i] += low; 2432 } else 2433 dst[i] = low; 2434 2435 carry = high; 2436 } 2437 2438 if (srcParts < dstParts) { 2439 /* Full multiplication, there is no overflow. */ 2440 assert(srcParts + 1 == dstParts); 2441 dst[srcParts] = carry; 2442 return 0; 2443 } 2444 2445 /* We overflowed if there is carry. */ 2446 if (carry) 2447 return 1; 2448 2449 /* We would overflow if any significant unwritten parts would be 2450 non-zero. This is true if any remaining src parts are non-zero 2451 and the multiplier is non-zero. */ 2452 if (multiplier) 2453 for (unsigned i = dstParts; i < srcParts; i++) 2454 if (src[i]) 2455 return 1; 2456 2457 /* We fitted in the narrow destination. */ 2458 return 0; 2459 } 2460 2461 /* DST = LHS * RHS, where DST has the same width as the operands and 2462 is filled with the least significant parts of the result. Returns 2463 one if overflow occurred, otherwise zero. DST must be disjoint 2464 from both operands. */ 2465 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2466 const WordType *rhs, unsigned parts) { 2467 assert(dst != lhs && dst != rhs); 2468 2469 int overflow = 0; 2470 tcSet(dst, 0, parts); 2471 2472 for (unsigned i = 0; i < parts; i++) 2473 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2474 parts - i, true); 2475 2476 return overflow; 2477 } 2478 2479 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2480 /// operands. No overflow occurs. DST must be disjoint from both operands. 2481 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2482 const WordType *rhs, unsigned lhsParts, 2483 unsigned rhsParts) { 2484 /* Put the narrower number on the LHS for less loops below. */ 2485 if (lhsParts > rhsParts) 2486 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2487 2488 assert(dst != lhs && dst != rhs); 2489 2490 tcSet(dst, 0, rhsParts); 2491 2492 for (unsigned i = 0; i < lhsParts; i++) 2493 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2494 } 2495 2496 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2497 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2498 set REMAINDER to the remainder, return zero. i.e. 2499 2500 OLD_LHS = RHS * LHS + REMAINDER 2501 2502 SCRATCH is a bignum of the same size as the operands and result for 2503 use by the routine; its contents need not be initialized and are 2504 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2505 */ 2506 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2507 WordType *remainder, WordType *srhs, 2508 unsigned parts) { 2509 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2510 2511 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2512 if (shiftCount == 0) 2513 return true; 2514 2515 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2516 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2517 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2518 2519 tcAssign(srhs, rhs, parts); 2520 tcShiftLeft(srhs, parts, shiftCount); 2521 tcAssign(remainder, lhs, parts); 2522 tcSet(lhs, 0, parts); 2523 2524 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2525 the total. */ 2526 for (;;) { 2527 int compare = tcCompare(remainder, srhs, parts); 2528 if (compare >= 0) { 2529 tcSubtract(remainder, srhs, 0, parts); 2530 lhs[n] |= mask; 2531 } 2532 2533 if (shiftCount == 0) 2534 break; 2535 shiftCount--; 2536 tcShiftRight(srhs, parts, 1); 2537 if ((mask >>= 1) == 0) { 2538 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2539 n--; 2540 } 2541 } 2542 2543 return false; 2544 } 2545 2546 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2547 /// no restrictions on Count. 2548 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2549 // Don't bother performing a no-op shift. 2550 if (!Count) 2551 return; 2552 2553 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2554 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2555 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2556 2557 // Fastpath for moving by whole words. 2558 if (BitShift == 0) { 2559 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2560 } else { 2561 while (Words-- > WordShift) { 2562 Dst[Words] = Dst[Words - WordShift] << BitShift; 2563 if (Words > WordShift) 2564 Dst[Words] |= 2565 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2566 } 2567 } 2568 2569 // Fill in the remainder with 0s. 2570 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2571 } 2572 2573 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2574 /// are no restrictions on Count. 2575 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2576 // Don't bother performing a no-op shift. 2577 if (!Count) 2578 return; 2579 2580 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2581 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2582 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2583 2584 unsigned WordsToMove = Words - WordShift; 2585 // Fastpath for moving by whole words. 2586 if (BitShift == 0) { 2587 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2588 } else { 2589 for (unsigned i = 0; i != WordsToMove; ++i) { 2590 Dst[i] = Dst[i + WordShift] >> BitShift; 2591 if (i + 1 != WordsToMove) 2592 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2593 } 2594 } 2595 2596 // Fill in the remainder with 0s. 2597 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2598 } 2599 2600 /* Bitwise and of two bignums. */ 2601 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2602 for (unsigned i = 0; i < parts; i++) 2603 dst[i] &= rhs[i]; 2604 } 2605 2606 /* Bitwise inclusive or of two bignums. */ 2607 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2608 for (unsigned i = 0; i < parts; i++) 2609 dst[i] |= rhs[i]; 2610 } 2611 2612 /* Bitwise exclusive or of two bignums. */ 2613 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2614 for (unsigned i = 0; i < parts; i++) 2615 dst[i] ^= rhs[i]; 2616 } 2617 2618 /* Complement a bignum in-place. */ 2619 void APInt::tcComplement(WordType *dst, unsigned parts) { 2620 for (unsigned i = 0; i < parts; i++) 2621 dst[i] = ~dst[i]; 2622 } 2623 2624 /* Comparison (unsigned) of two bignums. */ 2625 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2626 unsigned parts) { 2627 while (parts) { 2628 parts--; 2629 if (lhs[parts] != rhs[parts]) 2630 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2631 } 2632 2633 return 0; 2634 } 2635 2636 /* Set the least significant BITS bits of a bignum, clear the 2637 rest. */ 2638 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2639 unsigned bits) { 2640 unsigned i = 0; 2641 while (bits > APINT_BITS_PER_WORD) { 2642 dst[i++] = ~(WordType) 0; 2643 bits -= APINT_BITS_PER_WORD; 2644 } 2645 2646 if (bits) 2647 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2648 2649 while (i < parts) 2650 dst[i++] = 0; 2651 } 2652 2653 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2654 APInt::Rounding RM) { 2655 // Currently udivrem always rounds down. 2656 switch (RM) { 2657 case APInt::Rounding::DOWN: 2658 case APInt::Rounding::TOWARD_ZERO: 2659 return A.udiv(B); 2660 case APInt::Rounding::UP: { 2661 APInt Quo, Rem; 2662 APInt::udivrem(A, B, Quo, Rem); 2663 if (Rem == 0) 2664 return Quo; 2665 return Quo + 1; 2666 } 2667 } 2668 llvm_unreachable("Unknown APInt::Rounding enum"); 2669 } 2670 2671 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2672 APInt::Rounding RM) { 2673 switch (RM) { 2674 case APInt::Rounding::DOWN: 2675 case APInt::Rounding::UP: { 2676 APInt Quo, Rem; 2677 APInt::sdivrem(A, B, Quo, Rem); 2678 if (Rem == 0) 2679 return Quo; 2680 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2681 // We want to check whether the non-integer part of the mathematical value 2682 // is negative or not. If the non-integer part is negative, we need to round 2683 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2684 // already rounded down. 2685 if (RM == APInt::Rounding::DOWN) { 2686 if (Rem.isNegative() != B.isNegative()) 2687 return Quo - 1; 2688 return Quo; 2689 } 2690 if (Rem.isNegative() != B.isNegative()) 2691 return Quo; 2692 return Quo + 1; 2693 } 2694 // Currently sdiv rounds twards zero. 2695 case APInt::Rounding::TOWARD_ZERO: 2696 return A.sdiv(B); 2697 } 2698 llvm_unreachable("Unknown APInt::Rounding enum"); 2699 } 2700 2701 Optional<APInt> 2702 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2703 unsigned RangeWidth) { 2704 unsigned CoeffWidth = A.getBitWidth(); 2705 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2706 assert(RangeWidth <= CoeffWidth && 2707 "Value range width should be less than coefficient width"); 2708 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2709 2710 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2711 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2712 2713 // Identify 0 as a (non)solution immediately. 2714 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2715 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2716 return APInt(CoeffWidth, 0); 2717 } 2718 2719 // The result of APInt arithmetic has the same bit width as the operands, 2720 // so it can actually lose high bits. A product of two n-bit integers needs 2721 // 2n-1 bits to represent the full value. 2722 // The operation done below (on quadratic coefficients) that can produce 2723 // the largest value is the evaluation of the equation during bisection, 2724 // which needs 3 times the bitwidth of the coefficient, so the total number 2725 // of required bits is 3n. 2726 // 2727 // The purpose of this extension is to simulate the set Z of all integers, 2728 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2729 // and negative numbers (not so much in a modulo arithmetic). The method 2730 // used to solve the equation is based on the standard formula for real 2731 // numbers, and uses the concepts of "positive" and "negative" with their 2732 // usual meanings. 2733 CoeffWidth *= 3; 2734 A = A.sext(CoeffWidth); 2735 B = B.sext(CoeffWidth); 2736 C = C.sext(CoeffWidth); 2737 2738 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2739 // the bit width has increased. 2740 if (A.isNegative()) { 2741 A.negate(); 2742 B.negate(); 2743 C.negate(); 2744 } 2745 2746 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2747 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2748 // and R = 2^BitWidth. 2749 // Since we're trying not only to find exact solutions, but also values 2750 // that "wrap around", such a set will always have a solution, i.e. an x 2751 // that satisfies at least one of the equations, or such that |q(x)| 2752 // exceeds kR, while |q(x-1)| for the same k does not. 2753 // 2754 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2755 // positive solution n (in the above sense), and also such that the n 2756 // will be the least among all solutions corresponding to k = 0, 1, ... 2757 // (more precisely, the least element in the set 2758 // { n(k) | k is such that a solution n(k) exists }). 2759 // 2760 // Consider the parabola (over real numbers) that corresponds to the 2761 // quadratic equation. Since A > 0, the arms of the parabola will point 2762 // up. Picking different values of k will shift it up and down by R. 2763 // 2764 // We want to shift the parabola in such a way as to reduce the problem 2765 // of solving q(x) = kR to solving shifted_q(x) = 0. 2766 // (The interesting solutions are the ceilings of the real number 2767 // solutions.) 2768 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2769 APInt TwoA = 2 * A; 2770 APInt SqrB = B * B; 2771 bool PickLow; 2772 2773 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2774 assert(A.isStrictlyPositive()); 2775 APInt T = V.abs().urem(A); 2776 if (T.isNullValue()) 2777 return V; 2778 return V.isNegative() ? V+T : V+(A-T); 2779 }; 2780 2781 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2782 // iff B is positive. 2783 if (B.isNonNegative()) { 2784 // If B >= 0, the vertex it at a negative location (or at 0), so in 2785 // order to have a non-negative solution we need to pick k that makes 2786 // C-kR negative. To satisfy all the requirements for the solution 2787 // that we are looking for, it needs to be closest to 0 of all k. 2788 C = C.srem(R); 2789 if (C.isStrictlyPositive()) 2790 C -= R; 2791 // Pick the greater solution. 2792 PickLow = false; 2793 } else { 2794 // If B < 0, the vertex is at a positive location. For any solution 2795 // to exist, the discriminant must be non-negative. This means that 2796 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2797 // lower bound on values of k: kR >= C - B^2/4A. 2798 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2799 // Round LowkR up (towards +inf) to the nearest kR. 2800 LowkR = RoundUp(LowkR, R); 2801 2802 // If there exists k meeting the condition above, and such that 2803 // C-kR > 0, there will be two positive real number solutions of 2804 // q(x) = kR. Out of all such values of k, pick the one that makes 2805 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2806 // In other words, find maximum k such that LowkR <= kR < C. 2807 if (C.sgt(LowkR)) { 2808 // If LowkR < C, then such a k is guaranteed to exist because 2809 // LowkR itself is a multiple of R. 2810 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2811 // Pick the smaller solution. 2812 PickLow = true; 2813 } else { 2814 // If C-kR < 0 for all potential k's, it means that one solution 2815 // will be negative, while the other will be positive. The positive 2816 // solution will shift towards 0 if the parabola is moved up. 2817 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2818 // to 0, or in other words, out of all parabolas that have solutions, 2819 // pick the one that is the farthest "up"). 2820 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2821 C -= LowkR; 2822 // Pick the greater solution. 2823 PickLow = false; 2824 } 2825 } 2826 2827 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2828 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2829 2830 APInt D = SqrB - 4*A*C; 2831 assert(D.isNonNegative() && "Negative discriminant"); 2832 APInt SQ = D.sqrt(); 2833 2834 APInt Q = SQ * SQ; 2835 bool InexactSQ = Q != D; 2836 // The calculated SQ may actually be greater than the exact (non-integer) 2837 // value. If that's the case, decremement SQ to get a value that is lower. 2838 if (Q.sgt(D)) 2839 SQ -= 1; 2840 2841 APInt X; 2842 APInt Rem; 2843 2844 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 2845 // When using the quadratic formula directly, the calculated low root 2846 // may be greater than the exact one, since we would be subtracting SQ. 2847 // To make sure that the calculated root is not greater than the exact 2848 // one, subtract SQ+1 when calculating the low root (for inexact value 2849 // of SQ). 2850 if (PickLow) 2851 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 2852 else 2853 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 2854 2855 // The updated coefficients should be such that the (exact) solution is 2856 // positive. Since APInt division rounds towards 0, the calculated one 2857 // can be 0, but cannot be negative. 2858 assert(X.isNonNegative() && "Solution should be non-negative"); 2859 2860 if (!InexactSQ && Rem.isNullValue()) { 2861 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 2862 return X; 2863 } 2864 2865 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 2866 // The exact value of the square root of D should be between SQ and SQ+1. 2867 // This implies that the solution should be between that corresponding to 2868 // SQ (i.e. X) and that corresponding to SQ+1. 2869 // 2870 // The calculated X cannot be greater than the exact (real) solution. 2871 // Actually it must be strictly less than the exact solution, while 2872 // X+1 will be greater than or equal to it. 2873 2874 APInt VX = (A*X + B)*X + C; 2875 APInt VY = VX + TwoA*X + A + B; 2876 bool SignChange = VX.isNegative() != VY.isNegative() || 2877 VX.isNullValue() != VY.isNullValue(); 2878 // If the sign did not change between X and X+1, X is not a valid solution. 2879 // This could happen when the actual (exact) roots don't have an integer 2880 // between them, so they would both be contained between X and X+1. 2881 if (!SignChange) { 2882 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 2883 return None; 2884 } 2885 2886 X += 1; 2887 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 2888 return X; 2889 } 2890