1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision integer 11 // constant values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/Optional.h" 20 #include "llvm/ADT/SmallString.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Config/llvm-config.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <climits> 28 #include <cmath> 29 #include <cstdlib> 30 #include <cstring> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "apint" 34 35 /// A utility function for allocating memory, checking for allocation failures, 36 /// and ensuring the contents are zeroed. 37 inline static uint64_t* getClearedMemory(unsigned numWords) { 38 uint64_t *result = new uint64_t[numWords]; 39 memset(result, 0, numWords * sizeof(uint64_t)); 40 return result; 41 } 42 43 /// A utility function for allocating memory and checking for allocation 44 /// failure. The content is not zeroed. 45 inline static uint64_t* getMemory(unsigned numWords) { 46 return new uint64_t[numWords]; 47 } 48 49 /// A utility function that converts a character to a digit. 50 inline static unsigned getDigit(char cdigit, uint8_t radix) { 51 unsigned r; 52 53 if (radix == 16 || radix == 36) { 54 r = cdigit - '0'; 55 if (r <= 9) 56 return r; 57 58 r = cdigit - 'A'; 59 if (r <= radix - 11U) 60 return r + 10; 61 62 r = cdigit - 'a'; 63 if (r <= radix - 11U) 64 return r + 10; 65 66 radix = 10; 67 } 68 69 r = cdigit - '0'; 70 if (r < radix) 71 return r; 72 73 return -1U; 74 } 75 76 77 void APInt::initSlowCase(uint64_t val, bool isSigned) { 78 U.pVal = getClearedMemory(getNumWords()); 79 U.pVal[0] = val; 80 if (isSigned && int64_t(val) < 0) 81 for (unsigned i = 1; i < getNumWords(); ++i) 82 U.pVal[i] = WORDTYPE_MAX; 83 clearUnusedBits(); 84 } 85 86 void APInt::initSlowCase(const APInt& that) { 87 U.pVal = getMemory(getNumWords()); 88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 89 } 90 91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 92 assert(BitWidth && "Bitwidth too small"); 93 assert(bigVal.data() && "Null pointer detected!"); 94 if (isSingleWord()) 95 U.VAL = bigVal[0]; 96 else { 97 // Get memory, cleared to 0 98 U.pVal = getClearedMemory(getNumWords()); 99 // Calculate the number of words to copy 100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 101 // Copy the words from bigVal to pVal 102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 103 } 104 // Make sure unused high bits are cleared 105 clearUnusedBits(); 106 } 107 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 109 : BitWidth(numBits) { 110 initFromArray(bigVal); 111 } 112 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 114 : BitWidth(numBits) { 115 initFromArray(makeArrayRef(bigVal, numWords)); 116 } 117 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 119 : BitWidth(numbits) { 120 assert(BitWidth && "Bitwidth too small"); 121 fromString(numbits, Str, radix); 122 } 123 124 void APInt::reallocate(unsigned NewBitWidth) { 125 // If the number of words is the same we can just change the width and stop. 126 if (getNumWords() == getNumWords(NewBitWidth)) { 127 BitWidth = NewBitWidth; 128 return; 129 } 130 131 // If we have an allocation, delete it. 132 if (!isSingleWord()) 133 delete [] U.pVal; 134 135 // Update BitWidth. 136 BitWidth = NewBitWidth; 137 138 // If we are supposed to have an allocation, create it. 139 if (!isSingleWord()) 140 U.pVal = getMemory(getNumWords()); 141 } 142 143 void APInt::AssignSlowCase(const APInt& RHS) { 144 // Don't do anything for X = X 145 if (this == &RHS) 146 return; 147 148 // Adjust the bit width and handle allocations as necessary. 149 reallocate(RHS.getBitWidth()); 150 151 // Copy the data. 152 if (isSingleWord()) 153 U.VAL = RHS.U.VAL; 154 else 155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 156 } 157 158 /// This method 'profiles' an APInt for use with FoldingSet. 159 void APInt::Profile(FoldingSetNodeID& ID) const { 160 ID.AddInteger(BitWidth); 161 162 if (isSingleWord()) { 163 ID.AddInteger(U.VAL); 164 return; 165 } 166 167 unsigned NumWords = getNumWords(); 168 for (unsigned i = 0; i < NumWords; ++i) 169 ID.AddInteger(U.pVal[i]); 170 } 171 172 /// Prefix increment operator. Increments the APInt by one. 173 APInt& APInt::operator++() { 174 if (isSingleWord()) 175 ++U.VAL; 176 else 177 tcIncrement(U.pVal, getNumWords()); 178 return clearUnusedBits(); 179 } 180 181 /// Prefix decrement operator. Decrements the APInt by one. 182 APInt& APInt::operator--() { 183 if (isSingleWord()) 184 --U.VAL; 185 else 186 tcDecrement(U.pVal, getNumWords()); 187 return clearUnusedBits(); 188 } 189 190 /// Adds the RHS APint to this APInt. 191 /// @returns this, after addition of RHS. 192 /// Addition assignment operator. 193 APInt& APInt::operator+=(const APInt& RHS) { 194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 195 if (isSingleWord()) 196 U.VAL += RHS.U.VAL; 197 else 198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 199 return clearUnusedBits(); 200 } 201 202 APInt& APInt::operator+=(uint64_t RHS) { 203 if (isSingleWord()) 204 U.VAL += RHS; 205 else 206 tcAddPart(U.pVal, RHS, getNumWords()); 207 return clearUnusedBits(); 208 } 209 210 /// Subtracts the RHS APInt from this APInt 211 /// @returns this, after subtraction 212 /// Subtraction assignment operator. 213 APInt& APInt::operator-=(const APInt& RHS) { 214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 215 if (isSingleWord()) 216 U.VAL -= RHS.U.VAL; 217 else 218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 219 return clearUnusedBits(); 220 } 221 222 APInt& APInt::operator-=(uint64_t RHS) { 223 if (isSingleWord()) 224 U.VAL -= RHS; 225 else 226 tcSubtractPart(U.pVal, RHS, getNumWords()); 227 return clearUnusedBits(); 228 } 229 230 APInt APInt::operator*(const APInt& RHS) const { 231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 232 if (isSingleWord()) 233 return APInt(BitWidth, U.VAL * RHS.U.VAL); 234 235 APInt Result(getMemory(getNumWords()), getBitWidth()); 236 237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 238 239 Result.clearUnusedBits(); 240 return Result; 241 } 242 243 void APInt::AndAssignSlowCase(const APInt& RHS) { 244 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 245 } 246 247 void APInt::OrAssignSlowCase(const APInt& RHS) { 248 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 249 } 250 251 void APInt::XorAssignSlowCase(const APInt& RHS) { 252 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 253 } 254 255 APInt& APInt::operator*=(const APInt& RHS) { 256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 257 *this = *this * RHS; 258 return *this; 259 } 260 261 APInt& APInt::operator*=(uint64_t RHS) { 262 if (isSingleWord()) { 263 U.VAL *= RHS; 264 } else { 265 unsigned NumWords = getNumWords(); 266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 267 } 268 return clearUnusedBits(); 269 } 270 271 bool APInt::EqualSlowCase(const APInt& RHS) const { 272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 273 } 274 275 int APInt::compare(const APInt& RHS) const { 276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 277 if (isSingleWord()) 278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 279 280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 281 } 282 283 int APInt::compareSigned(const APInt& RHS) const { 284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 285 if (isSingleWord()) { 286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 289 } 290 291 bool lhsNeg = isNegative(); 292 bool rhsNeg = RHS.isNegative(); 293 294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 295 if (lhsNeg != rhsNeg) 296 return lhsNeg ? -1 : 1; 297 298 // Otherwise we can just use an unsigned comparison, because even negative 299 // numbers compare correctly this way if both have the same signed-ness. 300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 301 } 302 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 304 unsigned loWord = whichWord(loBit); 305 unsigned hiWord = whichWord(hiBit); 306 307 // Create an initial mask for the low word with zeros below loBit. 308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 309 310 // If hiBit is not aligned, we need a high mask. 311 unsigned hiShiftAmt = whichBit(hiBit); 312 if (hiShiftAmt != 0) { 313 // Create a high mask with zeros above hiBit. 314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 315 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 316 // set the bits in hiWord. 317 if (hiWord == loWord) 318 loMask &= hiMask; 319 else 320 U.pVal[hiWord] |= hiMask; 321 } 322 // Apply the mask to the low word. 323 U.pVal[loWord] |= loMask; 324 325 // Fill any words between loWord and hiWord with all ones. 326 for (unsigned word = loWord + 1; word < hiWord; ++word) 327 U.pVal[word] = WORDTYPE_MAX; 328 } 329 330 /// Toggle every bit to its opposite value. 331 void APInt::flipAllBitsSlowCase() { 332 tcComplement(U.pVal, getNumWords()); 333 clearUnusedBits(); 334 } 335 336 /// Toggle a given bit to its opposite value whose position is given 337 /// as "bitPosition". 338 /// Toggles a given bit to its opposite value. 339 void APInt::flipBit(unsigned bitPosition) { 340 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 341 if ((*this)[bitPosition]) clearBit(bitPosition); 342 else setBit(bitPosition); 343 } 344 345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 346 unsigned subBitWidth = subBits.getBitWidth(); 347 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 348 "Illegal bit insertion"); 349 350 // Insertion is a direct copy. 351 if (subBitWidth == BitWidth) { 352 *this = subBits; 353 return; 354 } 355 356 // Single word result can be done as a direct bitmask. 357 if (isSingleWord()) { 358 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 359 U.VAL &= ~(mask << bitPosition); 360 U.VAL |= (subBits.U.VAL << bitPosition); 361 return; 362 } 363 364 unsigned loBit = whichBit(bitPosition); 365 unsigned loWord = whichWord(bitPosition); 366 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 367 368 // Insertion within a single word can be done as a direct bitmask. 369 if (loWord == hi1Word) { 370 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 371 U.pVal[loWord] &= ~(mask << loBit); 372 U.pVal[loWord] |= (subBits.U.VAL << loBit); 373 return; 374 } 375 376 // Insert on word boundaries. 377 if (loBit == 0) { 378 // Direct copy whole words. 379 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 380 memcpy(U.pVal + loWord, subBits.getRawData(), 381 numWholeSubWords * APINT_WORD_SIZE); 382 383 // Mask+insert remaining bits. 384 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 385 if (remainingBits != 0) { 386 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 387 U.pVal[hi1Word] &= ~mask; 388 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 389 } 390 return; 391 } 392 393 // General case - set/clear individual bits in dst based on src. 394 // TODO - there is scope for optimization here, but at the moment this code 395 // path is barely used so prefer readability over performance. 396 for (unsigned i = 0; i != subBitWidth; ++i) { 397 if (subBits[i]) 398 setBit(bitPosition + i); 399 else 400 clearBit(bitPosition + i); 401 } 402 } 403 404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 405 assert(numBits > 0 && "Can't extract zero bits"); 406 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 407 "Illegal bit extraction"); 408 409 if (isSingleWord()) 410 return APInt(numBits, U.VAL >> bitPosition); 411 412 unsigned loBit = whichBit(bitPosition); 413 unsigned loWord = whichWord(bitPosition); 414 unsigned hiWord = whichWord(bitPosition + numBits - 1); 415 416 // Single word result extracting bits from a single word source. 417 if (loWord == hiWord) 418 return APInt(numBits, U.pVal[loWord] >> loBit); 419 420 // Extracting bits that start on a source word boundary can be done 421 // as a fast memory copy. 422 if (loBit == 0) 423 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 424 425 // General case - shift + copy source words directly into place. 426 APInt Result(numBits, 0); 427 unsigned NumSrcWords = getNumWords(); 428 unsigned NumDstWords = Result.getNumWords(); 429 430 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 431 for (unsigned word = 0; word < NumDstWords; ++word) { 432 uint64_t w0 = U.pVal[loWord + word]; 433 uint64_t w1 = 434 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 435 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 436 } 437 438 return Result.clearUnusedBits(); 439 } 440 441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 442 assert(!str.empty() && "Invalid string length"); 443 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 444 radix == 36) && 445 "Radix should be 2, 8, 10, 16, or 36!"); 446 447 size_t slen = str.size(); 448 449 // Each computation below needs to know if it's negative. 450 StringRef::iterator p = str.begin(); 451 unsigned isNegative = *p == '-'; 452 if (*p == '-' || *p == '+') { 453 p++; 454 slen--; 455 assert(slen && "String is only a sign, needs a value."); 456 } 457 458 // For radixes of power-of-two values, the bits required is accurately and 459 // easily computed 460 if (radix == 2) 461 return slen + isNegative; 462 if (radix == 8) 463 return slen * 3 + isNegative; 464 if (radix == 16) 465 return slen * 4 + isNegative; 466 467 // FIXME: base 36 468 469 // This is grossly inefficient but accurate. We could probably do something 470 // with a computation of roughly slen*64/20 and then adjust by the value of 471 // the first few digits. But, I'm not sure how accurate that could be. 472 473 // Compute a sufficient number of bits that is always large enough but might 474 // be too large. This avoids the assertion in the constructor. This 475 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 476 // bits in that case. 477 unsigned sufficient 478 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 479 : (slen == 1 ? 7 : slen * 16/3); 480 481 // Convert to the actual binary value. 482 APInt tmp(sufficient, StringRef(p, slen), radix); 483 484 // Compute how many bits are required. If the log is infinite, assume we need 485 // just bit. 486 unsigned log = tmp.logBase2(); 487 if (log == (unsigned)-1) { 488 return isNegative + 1; 489 } else { 490 return isNegative + log + 1; 491 } 492 } 493 494 hash_code llvm::hash_value(const APInt &Arg) { 495 if (Arg.isSingleWord()) 496 return hash_combine(Arg.U.VAL); 497 498 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); 499 } 500 501 bool APInt::isSplat(unsigned SplatSizeInBits) const { 502 assert(getBitWidth() % SplatSizeInBits == 0 && 503 "SplatSizeInBits must divide width!"); 504 // We can check that all parts of an integer are equal by making use of a 505 // little trick: rotate and check if it's still the same value. 506 return *this == rotl(SplatSizeInBits); 507 } 508 509 /// This function returns the high "numBits" bits of this APInt. 510 APInt APInt::getHiBits(unsigned numBits) const { 511 return this->lshr(BitWidth - numBits); 512 } 513 514 /// This function returns the low "numBits" bits of this APInt. 515 APInt APInt::getLoBits(unsigned numBits) const { 516 APInt Result(getLowBitsSet(BitWidth, numBits)); 517 Result &= *this; 518 return Result; 519 } 520 521 /// Return a value containing V broadcasted over NewLen bits. 522 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 523 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 524 525 APInt Val = V.zextOrSelf(NewLen); 526 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 527 Val |= Val << I; 528 529 return Val; 530 } 531 532 unsigned APInt::countLeadingZerosSlowCase() const { 533 unsigned Count = 0; 534 for (int i = getNumWords()-1; i >= 0; --i) { 535 uint64_t V = U.pVal[i]; 536 if (V == 0) 537 Count += APINT_BITS_PER_WORD; 538 else { 539 Count += llvm::countLeadingZeros(V); 540 break; 541 } 542 } 543 // Adjust for unused bits in the most significant word (they are zero). 544 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 545 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 546 return Count; 547 } 548 549 unsigned APInt::countLeadingOnesSlowCase() const { 550 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 551 unsigned shift; 552 if (!highWordBits) { 553 highWordBits = APINT_BITS_PER_WORD; 554 shift = 0; 555 } else { 556 shift = APINT_BITS_PER_WORD - highWordBits; 557 } 558 int i = getNumWords() - 1; 559 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 560 if (Count == highWordBits) { 561 for (i--; i >= 0; --i) { 562 if (U.pVal[i] == WORDTYPE_MAX) 563 Count += APINT_BITS_PER_WORD; 564 else { 565 Count += llvm::countLeadingOnes(U.pVal[i]); 566 break; 567 } 568 } 569 } 570 return Count; 571 } 572 573 unsigned APInt::countTrailingZerosSlowCase() const { 574 unsigned Count = 0; 575 unsigned i = 0; 576 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 577 Count += APINT_BITS_PER_WORD; 578 if (i < getNumWords()) 579 Count += llvm::countTrailingZeros(U.pVal[i]); 580 return std::min(Count, BitWidth); 581 } 582 583 unsigned APInt::countTrailingOnesSlowCase() const { 584 unsigned Count = 0; 585 unsigned i = 0; 586 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 587 Count += APINT_BITS_PER_WORD; 588 if (i < getNumWords()) 589 Count += llvm::countTrailingOnes(U.pVal[i]); 590 assert(Count <= BitWidth); 591 return Count; 592 } 593 594 unsigned APInt::countPopulationSlowCase() const { 595 unsigned Count = 0; 596 for (unsigned i = 0; i < getNumWords(); ++i) 597 Count += llvm::countPopulation(U.pVal[i]); 598 return Count; 599 } 600 601 bool APInt::intersectsSlowCase(const APInt &RHS) const { 602 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 603 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 604 return true; 605 606 return false; 607 } 608 609 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 610 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 611 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 612 return false; 613 614 return true; 615 } 616 617 APInt APInt::byteSwap() const { 618 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); 619 if (BitWidth == 16) 620 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 621 if (BitWidth == 32) 622 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 623 if (BitWidth == 48) { 624 unsigned Tmp1 = unsigned(U.VAL >> 16); 625 Tmp1 = ByteSwap_32(Tmp1); 626 uint16_t Tmp2 = uint16_t(U.VAL); 627 Tmp2 = ByteSwap_16(Tmp2); 628 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); 629 } 630 if (BitWidth == 64) 631 return APInt(BitWidth, ByteSwap_64(U.VAL)); 632 633 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 634 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 635 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 636 if (Result.BitWidth != BitWidth) { 637 Result.lshrInPlace(Result.BitWidth - BitWidth); 638 Result.BitWidth = BitWidth; 639 } 640 return Result; 641 } 642 643 APInt APInt::reverseBits() const { 644 switch (BitWidth) { 645 case 64: 646 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 647 case 32: 648 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 649 case 16: 650 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 651 case 8: 652 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 653 default: 654 break; 655 } 656 657 APInt Val(*this); 658 APInt Reversed(BitWidth, 0); 659 unsigned S = BitWidth; 660 661 for (; Val != 0; Val.lshrInPlace(1)) { 662 Reversed <<= 1; 663 Reversed |= Val[0]; 664 --S; 665 } 666 667 Reversed <<= S; 668 return Reversed; 669 } 670 671 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 672 // Fast-path a common case. 673 if (A == B) return A; 674 675 // Corner cases: if either operand is zero, the other is the gcd. 676 if (!A) return B; 677 if (!B) return A; 678 679 // Count common powers of 2 and remove all other powers of 2. 680 unsigned Pow2; 681 { 682 unsigned Pow2_A = A.countTrailingZeros(); 683 unsigned Pow2_B = B.countTrailingZeros(); 684 if (Pow2_A > Pow2_B) { 685 A.lshrInPlace(Pow2_A - Pow2_B); 686 Pow2 = Pow2_B; 687 } else if (Pow2_B > Pow2_A) { 688 B.lshrInPlace(Pow2_B - Pow2_A); 689 Pow2 = Pow2_A; 690 } else { 691 Pow2 = Pow2_A; 692 } 693 } 694 695 // Both operands are odd multiples of 2^Pow_2: 696 // 697 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 698 // 699 // This is a modified version of Stein's algorithm, taking advantage of 700 // efficient countTrailingZeros(). 701 while (A != B) { 702 if (A.ugt(B)) { 703 A -= B; 704 A.lshrInPlace(A.countTrailingZeros() - Pow2); 705 } else { 706 B -= A; 707 B.lshrInPlace(B.countTrailingZeros() - Pow2); 708 } 709 } 710 711 return A; 712 } 713 714 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 715 union { 716 double D; 717 uint64_t I; 718 } T; 719 T.D = Double; 720 721 // Get the sign bit from the highest order bit 722 bool isNeg = T.I >> 63; 723 724 // Get the 11-bit exponent and adjust for the 1023 bit bias 725 int64_t exp = ((T.I >> 52) & 0x7ff) - 1023; 726 727 // If the exponent is negative, the value is < 0 so just return 0. 728 if (exp < 0) 729 return APInt(width, 0u); 730 731 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 732 uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52; 733 734 // If the exponent doesn't shift all bits out of the mantissa 735 if (exp < 52) 736 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 737 APInt(width, mantissa >> (52 - exp)); 738 739 // If the client didn't provide enough bits for us to shift the mantissa into 740 // then the result is undefined, just return 0 741 if (width <= exp - 52) 742 return APInt(width, 0); 743 744 // Otherwise, we have to shift the mantissa bits up to the right location 745 APInt Tmp(width, mantissa); 746 Tmp <<= (unsigned)exp - 52; 747 return isNeg ? -Tmp : Tmp; 748 } 749 750 /// This function converts this APInt to a double. 751 /// The layout for double is as following (IEEE Standard 754): 752 /// -------------------------------------- 753 /// | Sign Exponent Fraction Bias | 754 /// |-------------------------------------- | 755 /// | 1[63] 11[62-52] 52[51-00] 1023 | 756 /// -------------------------------------- 757 double APInt::roundToDouble(bool isSigned) const { 758 759 // Handle the simple case where the value is contained in one uint64_t. 760 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 761 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 762 if (isSigned) { 763 int64_t sext = SignExtend64(getWord(0), BitWidth); 764 return double(sext); 765 } else 766 return double(getWord(0)); 767 } 768 769 // Determine if the value is negative. 770 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 771 772 // Construct the absolute value if we're negative. 773 APInt Tmp(isNeg ? -(*this) : (*this)); 774 775 // Figure out how many bits we're using. 776 unsigned n = Tmp.getActiveBits(); 777 778 // The exponent (without bias normalization) is just the number of bits 779 // we are using. Note that the sign bit is gone since we constructed the 780 // absolute value. 781 uint64_t exp = n; 782 783 // Return infinity for exponent overflow 784 if (exp > 1023) { 785 if (!isSigned || !isNeg) 786 return std::numeric_limits<double>::infinity(); 787 else 788 return -std::numeric_limits<double>::infinity(); 789 } 790 exp += 1023; // Increment for 1023 bias 791 792 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 793 // extract the high 52 bits from the correct words in pVal. 794 uint64_t mantissa; 795 unsigned hiWord = whichWord(n-1); 796 if (hiWord == 0) { 797 mantissa = Tmp.U.pVal[0]; 798 if (n > 52) 799 mantissa >>= n - 52; // shift down, we want the top 52 bits. 800 } else { 801 assert(hiWord > 0 && "huh?"); 802 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 803 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 804 mantissa = hibits | lobits; 805 } 806 807 // The leading bit of mantissa is implicit, so get rid of it. 808 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 809 union { 810 double D; 811 uint64_t I; 812 } T; 813 T.I = sign | (exp << 52) | mantissa; 814 return T.D; 815 } 816 817 // Truncate to new width. 818 APInt APInt::trunc(unsigned width) const { 819 assert(width < BitWidth && "Invalid APInt Truncate request"); 820 assert(width && "Can't truncate to 0 bits"); 821 822 if (width <= APINT_BITS_PER_WORD) 823 return APInt(width, getRawData()[0]); 824 825 APInt Result(getMemory(getNumWords(width)), width); 826 827 // Copy full words. 828 unsigned i; 829 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 830 Result.U.pVal[i] = U.pVal[i]; 831 832 // Truncate and copy any partial word. 833 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 834 if (bits != 0) 835 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 836 837 return Result; 838 } 839 840 // Sign extend to a new width. 841 APInt APInt::sext(unsigned Width) const { 842 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 843 844 if (Width <= APINT_BITS_PER_WORD) 845 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 846 847 APInt Result(getMemory(getNumWords(Width)), Width); 848 849 // Copy words. 850 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 851 852 // Sign extend the last word since there may be unused bits in the input. 853 Result.U.pVal[getNumWords() - 1] = 854 SignExtend64(Result.U.pVal[getNumWords() - 1], 855 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 856 857 // Fill with sign bits. 858 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 859 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 860 Result.clearUnusedBits(); 861 return Result; 862 } 863 864 // Zero extend to a new width. 865 APInt APInt::zext(unsigned width) const { 866 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 867 868 if (width <= APINT_BITS_PER_WORD) 869 return APInt(width, U.VAL); 870 871 APInt Result(getMemory(getNumWords(width)), width); 872 873 // Copy words. 874 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 875 876 // Zero remaining words. 877 std::memset(Result.U.pVal + getNumWords(), 0, 878 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 879 880 return Result; 881 } 882 883 APInt APInt::zextOrTrunc(unsigned width) const { 884 if (BitWidth < width) 885 return zext(width); 886 if (BitWidth > width) 887 return trunc(width); 888 return *this; 889 } 890 891 APInt APInt::sextOrTrunc(unsigned width) const { 892 if (BitWidth < width) 893 return sext(width); 894 if (BitWidth > width) 895 return trunc(width); 896 return *this; 897 } 898 899 APInt APInt::zextOrSelf(unsigned width) const { 900 if (BitWidth < width) 901 return zext(width); 902 return *this; 903 } 904 905 APInt APInt::sextOrSelf(unsigned width) const { 906 if (BitWidth < width) 907 return sext(width); 908 return *this; 909 } 910 911 /// Arithmetic right-shift this APInt by shiftAmt. 912 /// Arithmetic right-shift function. 913 void APInt::ashrInPlace(const APInt &shiftAmt) { 914 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 915 } 916 917 /// Arithmetic right-shift this APInt by shiftAmt. 918 /// Arithmetic right-shift function. 919 void APInt::ashrSlowCase(unsigned ShiftAmt) { 920 // Don't bother performing a no-op shift. 921 if (!ShiftAmt) 922 return; 923 924 // Save the original sign bit for later. 925 bool Negative = isNegative(); 926 927 // WordShift is the inter-part shift; BitShift is intra-part shift. 928 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 929 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 930 931 unsigned WordsToMove = getNumWords() - WordShift; 932 if (WordsToMove != 0) { 933 // Sign extend the last word to fill in the unused bits. 934 U.pVal[getNumWords() - 1] = SignExtend64( 935 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 936 937 // Fastpath for moving by whole words. 938 if (BitShift == 0) { 939 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 940 } else { 941 // Move the words containing significant bits. 942 for (unsigned i = 0; i != WordsToMove - 1; ++i) 943 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 944 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 945 946 // Handle the last word which has no high bits to copy. 947 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 948 // Sign extend one more time. 949 U.pVal[WordsToMove - 1] = 950 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 951 } 952 } 953 954 // Fill in the remainder based on the original sign. 955 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 956 WordShift * APINT_WORD_SIZE); 957 clearUnusedBits(); 958 } 959 960 /// Logical right-shift this APInt by shiftAmt. 961 /// Logical right-shift function. 962 void APInt::lshrInPlace(const APInt &shiftAmt) { 963 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 964 } 965 966 /// Logical right-shift this APInt by shiftAmt. 967 /// Logical right-shift function. 968 void APInt::lshrSlowCase(unsigned ShiftAmt) { 969 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 970 } 971 972 /// Left-shift this APInt by shiftAmt. 973 /// Left-shift function. 974 APInt &APInt::operator<<=(const APInt &shiftAmt) { 975 // It's undefined behavior in C to shift by BitWidth or greater. 976 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 977 return *this; 978 } 979 980 void APInt::shlSlowCase(unsigned ShiftAmt) { 981 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 982 clearUnusedBits(); 983 } 984 985 // Calculate the rotate amount modulo the bit width. 986 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 987 unsigned rotBitWidth = rotateAmt.getBitWidth(); 988 APInt rot = rotateAmt; 989 if (rotBitWidth < BitWidth) { 990 // Extend the rotate APInt, so that the urem doesn't divide by 0. 991 // e.g. APInt(1, 32) would give APInt(1, 0). 992 rot = rotateAmt.zext(BitWidth); 993 } 994 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 995 return rot.getLimitedValue(BitWidth); 996 } 997 998 APInt APInt::rotl(const APInt &rotateAmt) const { 999 return rotl(rotateModulo(BitWidth, rotateAmt)); 1000 } 1001 1002 APInt APInt::rotl(unsigned rotateAmt) const { 1003 rotateAmt %= BitWidth; 1004 if (rotateAmt == 0) 1005 return *this; 1006 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1007 } 1008 1009 APInt APInt::rotr(const APInt &rotateAmt) const { 1010 return rotr(rotateModulo(BitWidth, rotateAmt)); 1011 } 1012 1013 APInt APInt::rotr(unsigned rotateAmt) const { 1014 rotateAmt %= BitWidth; 1015 if (rotateAmt == 0) 1016 return *this; 1017 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1018 } 1019 1020 // Square Root - this method computes and returns the square root of "this". 1021 // Three mechanisms are used for computation. For small values (<= 5 bits), 1022 // a table lookup is done. This gets some performance for common cases. For 1023 // values using less than 52 bits, the value is converted to double and then 1024 // the libc sqrt function is called. The result is rounded and then converted 1025 // back to a uint64_t which is then used to construct the result. Finally, 1026 // the Babylonian method for computing square roots is used. 1027 APInt APInt::sqrt() const { 1028 1029 // Determine the magnitude of the value. 1030 unsigned magnitude = getActiveBits(); 1031 1032 // Use a fast table for some small values. This also gets rid of some 1033 // rounding errors in libc sqrt for small values. 1034 if (magnitude <= 5) { 1035 static const uint8_t results[32] = { 1036 /* 0 */ 0, 1037 /* 1- 2 */ 1, 1, 1038 /* 3- 6 */ 2, 2, 2, 2, 1039 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1040 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1041 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1042 /* 31 */ 6 1043 }; 1044 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1045 } 1046 1047 // If the magnitude of the value fits in less than 52 bits (the precision of 1048 // an IEEE double precision floating point value), then we can use the 1049 // libc sqrt function which will probably use a hardware sqrt computation. 1050 // This should be faster than the algorithm below. 1051 if (magnitude < 52) { 1052 return APInt(BitWidth, 1053 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1054 : U.pVal[0]))))); 1055 } 1056 1057 // Okay, all the short cuts are exhausted. We must compute it. The following 1058 // is a classical Babylonian method for computing the square root. This code 1059 // was adapted to APInt from a wikipedia article on such computations. 1060 // See http://www.wikipedia.org/ and go to the page named 1061 // Calculate_an_integer_square_root. 1062 unsigned nbits = BitWidth, i = 4; 1063 APInt testy(BitWidth, 16); 1064 APInt x_old(BitWidth, 1); 1065 APInt x_new(BitWidth, 0); 1066 APInt two(BitWidth, 2); 1067 1068 // Select a good starting value using binary logarithms. 1069 for (;; i += 2, testy = testy.shl(2)) 1070 if (i >= nbits || this->ule(testy)) { 1071 x_old = x_old.shl(i / 2); 1072 break; 1073 } 1074 1075 // Use the Babylonian method to arrive at the integer square root: 1076 for (;;) { 1077 x_new = (this->udiv(x_old) + x_old).udiv(two); 1078 if (x_old.ule(x_new)) 1079 break; 1080 x_old = x_new; 1081 } 1082 1083 // Make sure we return the closest approximation 1084 // NOTE: The rounding calculation below is correct. It will produce an 1085 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1086 // determined to be a rounding issue with pari/gp as it begins to use a 1087 // floating point representation after 192 bits. There are no discrepancies 1088 // between this algorithm and pari/gp for bit widths < 192 bits. 1089 APInt square(x_old * x_old); 1090 APInt nextSquare((x_old + 1) * (x_old +1)); 1091 if (this->ult(square)) 1092 return x_old; 1093 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1094 APInt midpoint((nextSquare - square).udiv(two)); 1095 APInt offset(*this - square); 1096 if (offset.ult(midpoint)) 1097 return x_old; 1098 return x_old + 1; 1099 } 1100 1101 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1102 /// iterative extended Euclidean algorithm is used to solve for this value, 1103 /// however we simplify it to speed up calculating only the inverse, and take 1104 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1105 /// (potentially large) APInts around. 1106 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1107 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1108 1109 // Using the properties listed at the following web page (accessed 06/21/08): 1110 // http://www.numbertheory.org/php/euclid.html 1111 // (especially the properties numbered 3, 4 and 9) it can be proved that 1112 // BitWidth bits suffice for all the computations in the algorithm implemented 1113 // below. More precisely, this number of bits suffice if the multiplicative 1114 // inverse exists, but may not suffice for the general extended Euclidean 1115 // algorithm. 1116 1117 APInt r[2] = { modulo, *this }; 1118 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1119 APInt q(BitWidth, 0); 1120 1121 unsigned i; 1122 for (i = 0; r[i^1] != 0; i ^= 1) { 1123 // An overview of the math without the confusing bit-flipping: 1124 // q = r[i-2] / r[i-1] 1125 // r[i] = r[i-2] % r[i-1] 1126 // t[i] = t[i-2] - t[i-1] * q 1127 udivrem(r[i], r[i^1], q, r[i]); 1128 t[i] -= t[i^1] * q; 1129 } 1130 1131 // If this APInt and the modulo are not coprime, there is no multiplicative 1132 // inverse, so return 0. We check this by looking at the next-to-last 1133 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1134 // algorithm. 1135 if (r[i] != 1) 1136 return APInt(BitWidth, 0); 1137 1138 // The next-to-last t is the multiplicative inverse. However, we are 1139 // interested in a positive inverse. Calculate a positive one from a negative 1140 // one if necessary. A simple addition of the modulo suffices because 1141 // abs(t[i]) is known to be less than *this/2 (see the link above). 1142 if (t[i].isNegative()) 1143 t[i] += modulo; 1144 1145 return std::move(t[i]); 1146 } 1147 1148 /// Calculate the magic numbers required to implement a signed integer division 1149 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1150 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1151 /// Warren, Jr., chapter 10. 1152 APInt::ms APInt::magic() const { 1153 const APInt& d = *this; 1154 unsigned p; 1155 APInt ad, anc, delta, q1, r1, q2, r2, t; 1156 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1157 struct ms mag; 1158 1159 ad = d.abs(); 1160 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1161 anc = t - 1 - t.urem(ad); // absolute value of nc 1162 p = d.getBitWidth() - 1; // initialize p 1163 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1164 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1165 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1166 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1167 do { 1168 p = p + 1; 1169 q1 = q1<<1; // update q1 = 2p/abs(nc) 1170 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1171 if (r1.uge(anc)) { // must be unsigned comparison 1172 q1 = q1 + 1; 1173 r1 = r1 - anc; 1174 } 1175 q2 = q2<<1; // update q2 = 2p/abs(d) 1176 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1177 if (r2.uge(ad)) { // must be unsigned comparison 1178 q2 = q2 + 1; 1179 r2 = r2 - ad; 1180 } 1181 delta = ad - r2; 1182 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1183 1184 mag.m = q2 + 1; 1185 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1186 mag.s = p - d.getBitWidth(); // resulting shift 1187 return mag; 1188 } 1189 1190 /// Calculate the magic numbers required to implement an unsigned integer 1191 /// division by a constant as a sequence of multiplies, adds and shifts. 1192 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1193 /// S. Warren, Jr., chapter 10. 1194 /// LeadingZeros can be used to simplify the calculation if the upper bits 1195 /// of the divided value are known zero. 1196 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1197 const APInt& d = *this; 1198 unsigned p; 1199 APInt nc, delta, q1, r1, q2, r2; 1200 struct mu magu; 1201 magu.a = 0; // initialize "add" indicator 1202 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1203 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1204 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1205 1206 nc = allOnes - (allOnes - d).urem(d); 1207 p = d.getBitWidth() - 1; // initialize p 1208 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1209 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1210 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1211 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1212 do { 1213 p = p + 1; 1214 if (r1.uge(nc - r1)) { 1215 q1 = q1 + q1 + 1; // update q1 1216 r1 = r1 + r1 - nc; // update r1 1217 } 1218 else { 1219 q1 = q1+q1; // update q1 1220 r1 = r1+r1; // update r1 1221 } 1222 if ((r2 + 1).uge(d - r2)) { 1223 if (q2.uge(signedMax)) magu.a = 1; 1224 q2 = q2+q2 + 1; // update q2 1225 r2 = r2+r2 + 1 - d; // update r2 1226 } 1227 else { 1228 if (q2.uge(signedMin)) magu.a = 1; 1229 q2 = q2+q2; // update q2 1230 r2 = r2+r2 + 1; // update r2 1231 } 1232 delta = d - 1 - r2; 1233 } while (p < d.getBitWidth()*2 && 1234 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1235 magu.m = q2 + 1; // resulting magic number 1236 magu.s = p - d.getBitWidth(); // resulting shift 1237 return magu; 1238 } 1239 1240 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1241 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1242 /// variables here have the same names as in the algorithm. Comments explain 1243 /// the algorithm and any deviation from it. 1244 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1245 unsigned m, unsigned n) { 1246 assert(u && "Must provide dividend"); 1247 assert(v && "Must provide divisor"); 1248 assert(q && "Must provide quotient"); 1249 assert(u != v && u != q && v != q && "Must use different memory"); 1250 assert(n>1 && "n must be > 1"); 1251 1252 // b denotes the base of the number system. In our case b is 2^32. 1253 const uint64_t b = uint64_t(1) << 32; 1254 1255 // The DEBUG macros here tend to be spam in the debug output if you're not 1256 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1257 #ifdef KNUTH_DEBUG 1258 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1259 #else 1260 #define DEBUG_KNUTH(X) do {} while(false) 1261 #endif 1262 1263 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1264 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1265 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1266 DEBUG_KNUTH(dbgs() << " by"); 1267 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1268 DEBUG_KNUTH(dbgs() << '\n'); 1269 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1270 // u and v by d. Note that we have taken Knuth's advice here to use a power 1271 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1272 // 2 allows us to shift instead of multiply and it is easy to determine the 1273 // shift amount from the leading zeros. We are basically normalizing the u 1274 // and v so that its high bits are shifted to the top of v's range without 1275 // overflow. Note that this can require an extra word in u so that u must 1276 // be of length m+n+1. 1277 unsigned shift = countLeadingZeros(v[n-1]); 1278 uint32_t v_carry = 0; 1279 uint32_t u_carry = 0; 1280 if (shift) { 1281 for (unsigned i = 0; i < m+n; ++i) { 1282 uint32_t u_tmp = u[i] >> (32 - shift); 1283 u[i] = (u[i] << shift) | u_carry; 1284 u_carry = u_tmp; 1285 } 1286 for (unsigned i = 0; i < n; ++i) { 1287 uint32_t v_tmp = v[i] >> (32 - shift); 1288 v[i] = (v[i] << shift) | v_carry; 1289 v_carry = v_tmp; 1290 } 1291 } 1292 u[m+n] = u_carry; 1293 1294 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1295 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1296 DEBUG_KNUTH(dbgs() << " by"); 1297 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1298 DEBUG_KNUTH(dbgs() << '\n'); 1299 1300 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1301 int j = m; 1302 do { 1303 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1304 // D3. [Calculate q'.]. 1305 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1306 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1307 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1308 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1309 // on v[n-2] determines at high speed most of the cases in which the trial 1310 // value qp is one too large, and it eliminates all cases where qp is two 1311 // too large. 1312 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1313 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1314 uint64_t qp = dividend / v[n-1]; 1315 uint64_t rp = dividend % v[n-1]; 1316 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1317 qp--; 1318 rp += v[n-1]; 1319 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1320 qp--; 1321 } 1322 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1323 1324 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1325 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1326 // consists of a simple multiplication by a one-place number, combined with 1327 // a subtraction. 1328 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1329 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1330 // true value plus b**(n+1), namely as the b's complement of 1331 // the true value, and a "borrow" to the left should be remembered. 1332 int64_t borrow = 0; 1333 for (unsigned i = 0; i < n; ++i) { 1334 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1335 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1336 u[j+i] = Lo_32(subres); 1337 borrow = Hi_32(p) - Hi_32(subres); 1338 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1339 << ", borrow = " << borrow << '\n'); 1340 } 1341 bool isNeg = u[j+n] < borrow; 1342 u[j+n] -= Lo_32(borrow); 1343 1344 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1345 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1346 DEBUG_KNUTH(dbgs() << '\n'); 1347 1348 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1349 // negative, go to step D6; otherwise go on to step D7. 1350 q[j] = Lo_32(qp); 1351 if (isNeg) { 1352 // D6. [Add back]. The probability that this step is necessary is very 1353 // small, on the order of only 2/b. Make sure that test data accounts for 1354 // this possibility. Decrease q[j] by 1 1355 q[j]--; 1356 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1357 // A carry will occur to the left of u[j+n], and it should be ignored 1358 // since it cancels with the borrow that occurred in D4. 1359 bool carry = false; 1360 for (unsigned i = 0; i < n; i++) { 1361 uint32_t limit = std::min(u[j+i],v[i]); 1362 u[j+i] += v[i] + carry; 1363 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1364 } 1365 u[j+n] += carry; 1366 } 1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1368 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1369 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1370 1371 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1372 } while (--j >= 0); 1373 1374 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1375 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1376 DEBUG_KNUTH(dbgs() << '\n'); 1377 1378 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1379 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1380 // compute the remainder (urem uses this). 1381 if (r) { 1382 // The value d is expressed by the "shift" value above since we avoided 1383 // multiplication by d by using a shift left. So, all we have to do is 1384 // shift right here. 1385 if (shift) { 1386 uint32_t carry = 0; 1387 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1388 for (int i = n-1; i >= 0; i--) { 1389 r[i] = (u[i] >> shift) | carry; 1390 carry = u[i] << (32 - shift); 1391 DEBUG_KNUTH(dbgs() << " " << r[i]); 1392 } 1393 } else { 1394 for (int i = n-1; i >= 0; i--) { 1395 r[i] = u[i]; 1396 DEBUG_KNUTH(dbgs() << " " << r[i]); 1397 } 1398 } 1399 DEBUG_KNUTH(dbgs() << '\n'); 1400 } 1401 DEBUG_KNUTH(dbgs() << '\n'); 1402 } 1403 1404 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1405 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1406 assert(lhsWords >= rhsWords && "Fractional result"); 1407 1408 // First, compose the values into an array of 32-bit words instead of 1409 // 64-bit words. This is a necessity of both the "short division" algorithm 1410 // and the Knuth "classical algorithm" which requires there to be native 1411 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1412 // can't use 64-bit operands here because we don't have native results of 1413 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1414 // work on large-endian machines. 1415 unsigned n = rhsWords * 2; 1416 unsigned m = (lhsWords * 2) - n; 1417 1418 // Allocate space for the temporary values we need either on the stack, if 1419 // it will fit, or on the heap if it won't. 1420 uint32_t SPACE[128]; 1421 uint32_t *U = nullptr; 1422 uint32_t *V = nullptr; 1423 uint32_t *Q = nullptr; 1424 uint32_t *R = nullptr; 1425 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1426 U = &SPACE[0]; 1427 V = &SPACE[m+n+1]; 1428 Q = &SPACE[(m+n+1) + n]; 1429 if (Remainder) 1430 R = &SPACE[(m+n+1) + n + (m+n)]; 1431 } else { 1432 U = new uint32_t[m + n + 1]; 1433 V = new uint32_t[n]; 1434 Q = new uint32_t[m+n]; 1435 if (Remainder) 1436 R = new uint32_t[n]; 1437 } 1438 1439 // Initialize the dividend 1440 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1441 for (unsigned i = 0; i < lhsWords; ++i) { 1442 uint64_t tmp = LHS[i]; 1443 U[i * 2] = Lo_32(tmp); 1444 U[i * 2 + 1] = Hi_32(tmp); 1445 } 1446 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1447 1448 // Initialize the divisor 1449 memset(V, 0, (n)*sizeof(uint32_t)); 1450 for (unsigned i = 0; i < rhsWords; ++i) { 1451 uint64_t tmp = RHS[i]; 1452 V[i * 2] = Lo_32(tmp); 1453 V[i * 2 + 1] = Hi_32(tmp); 1454 } 1455 1456 // initialize the quotient and remainder 1457 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1458 if (Remainder) 1459 memset(R, 0, n * sizeof(uint32_t)); 1460 1461 // Now, adjust m and n for the Knuth division. n is the number of words in 1462 // the divisor. m is the number of words by which the dividend exceeds the 1463 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1464 // contain any zero words or the Knuth algorithm fails. 1465 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1466 n--; 1467 m++; 1468 } 1469 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1470 m--; 1471 1472 // If we're left with only a single word for the divisor, Knuth doesn't work 1473 // so we implement the short division algorithm here. This is much simpler 1474 // and faster because we are certain that we can divide a 64-bit quantity 1475 // by a 32-bit quantity at hardware speed and short division is simply a 1476 // series of such operations. This is just like doing short division but we 1477 // are using base 2^32 instead of base 10. 1478 assert(n != 0 && "Divide by zero?"); 1479 if (n == 1) { 1480 uint32_t divisor = V[0]; 1481 uint32_t remainder = 0; 1482 for (int i = m; i >= 0; i--) { 1483 uint64_t partial_dividend = Make_64(remainder, U[i]); 1484 if (partial_dividend == 0) { 1485 Q[i] = 0; 1486 remainder = 0; 1487 } else if (partial_dividend < divisor) { 1488 Q[i] = 0; 1489 remainder = Lo_32(partial_dividend); 1490 } else if (partial_dividend == divisor) { 1491 Q[i] = 1; 1492 remainder = 0; 1493 } else { 1494 Q[i] = Lo_32(partial_dividend / divisor); 1495 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1496 } 1497 } 1498 if (R) 1499 R[0] = remainder; 1500 } else { 1501 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1502 // case n > 1. 1503 KnuthDiv(U, V, Q, R, m, n); 1504 } 1505 1506 // If the caller wants the quotient 1507 if (Quotient) { 1508 for (unsigned i = 0; i < lhsWords; ++i) 1509 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1510 } 1511 1512 // If the caller wants the remainder 1513 if (Remainder) { 1514 for (unsigned i = 0; i < rhsWords; ++i) 1515 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1516 } 1517 1518 // Clean up the memory we allocated. 1519 if (U != &SPACE[0]) { 1520 delete [] U; 1521 delete [] V; 1522 delete [] Q; 1523 delete [] R; 1524 } 1525 } 1526 1527 APInt APInt::udiv(const APInt &RHS) const { 1528 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1529 1530 // First, deal with the easy case 1531 if (isSingleWord()) { 1532 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1533 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1534 } 1535 1536 // Get some facts about the LHS and RHS number of bits and words 1537 unsigned lhsWords = getNumWords(getActiveBits()); 1538 unsigned rhsBits = RHS.getActiveBits(); 1539 unsigned rhsWords = getNumWords(rhsBits); 1540 assert(rhsWords && "Divided by zero???"); 1541 1542 // Deal with some degenerate cases 1543 if (!lhsWords) 1544 // 0 / X ===> 0 1545 return APInt(BitWidth, 0); 1546 if (rhsBits == 1) 1547 // X / 1 ===> X 1548 return *this; 1549 if (lhsWords < rhsWords || this->ult(RHS)) 1550 // X / Y ===> 0, iff X < Y 1551 return APInt(BitWidth, 0); 1552 if (*this == RHS) 1553 // X / X ===> 1 1554 return APInt(BitWidth, 1); 1555 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1556 // All high words are zero, just use native divide 1557 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1558 1559 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1560 APInt Quotient(BitWidth, 0); // to hold result. 1561 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1562 return Quotient; 1563 } 1564 1565 APInt APInt::udiv(uint64_t RHS) const { 1566 assert(RHS != 0 && "Divide by zero?"); 1567 1568 // First, deal with the easy case 1569 if (isSingleWord()) 1570 return APInt(BitWidth, U.VAL / RHS); 1571 1572 // Get some facts about the LHS words. 1573 unsigned lhsWords = getNumWords(getActiveBits()); 1574 1575 // Deal with some degenerate cases 1576 if (!lhsWords) 1577 // 0 / X ===> 0 1578 return APInt(BitWidth, 0); 1579 if (RHS == 1) 1580 // X / 1 ===> X 1581 return *this; 1582 if (this->ult(RHS)) 1583 // X / Y ===> 0, iff X < Y 1584 return APInt(BitWidth, 0); 1585 if (*this == RHS) 1586 // X / X ===> 1 1587 return APInt(BitWidth, 1); 1588 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1589 // All high words are zero, just use native divide 1590 return APInt(BitWidth, this->U.pVal[0] / RHS); 1591 1592 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1593 APInt Quotient(BitWidth, 0); // to hold result. 1594 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1595 return Quotient; 1596 } 1597 1598 APInt APInt::sdiv(const APInt &RHS) const { 1599 if (isNegative()) { 1600 if (RHS.isNegative()) 1601 return (-(*this)).udiv(-RHS); 1602 return -((-(*this)).udiv(RHS)); 1603 } 1604 if (RHS.isNegative()) 1605 return -(this->udiv(-RHS)); 1606 return this->udiv(RHS); 1607 } 1608 1609 APInt APInt::sdiv(int64_t RHS) const { 1610 if (isNegative()) { 1611 if (RHS < 0) 1612 return (-(*this)).udiv(-RHS); 1613 return -((-(*this)).udiv(RHS)); 1614 } 1615 if (RHS < 0) 1616 return -(this->udiv(-RHS)); 1617 return this->udiv(RHS); 1618 } 1619 1620 APInt APInt::urem(const APInt &RHS) const { 1621 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1622 if (isSingleWord()) { 1623 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1624 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1625 } 1626 1627 // Get some facts about the LHS 1628 unsigned lhsWords = getNumWords(getActiveBits()); 1629 1630 // Get some facts about the RHS 1631 unsigned rhsBits = RHS.getActiveBits(); 1632 unsigned rhsWords = getNumWords(rhsBits); 1633 assert(rhsWords && "Performing remainder operation by zero ???"); 1634 1635 // Check the degenerate cases 1636 if (lhsWords == 0) 1637 // 0 % Y ===> 0 1638 return APInt(BitWidth, 0); 1639 if (rhsBits == 1) 1640 // X % 1 ===> 0 1641 return APInt(BitWidth, 0); 1642 if (lhsWords < rhsWords || this->ult(RHS)) 1643 // X % Y ===> X, iff X < Y 1644 return *this; 1645 if (*this == RHS) 1646 // X % X == 0; 1647 return APInt(BitWidth, 0); 1648 if (lhsWords == 1) 1649 // All high words are zero, just use native remainder 1650 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1651 1652 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1653 APInt Remainder(BitWidth, 0); 1654 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1655 return Remainder; 1656 } 1657 1658 uint64_t APInt::urem(uint64_t RHS) const { 1659 assert(RHS != 0 && "Remainder by zero?"); 1660 1661 if (isSingleWord()) 1662 return U.VAL % RHS; 1663 1664 // Get some facts about the LHS 1665 unsigned lhsWords = getNumWords(getActiveBits()); 1666 1667 // Check the degenerate cases 1668 if (lhsWords == 0) 1669 // 0 % Y ===> 0 1670 return 0; 1671 if (RHS == 1) 1672 // X % 1 ===> 0 1673 return 0; 1674 if (this->ult(RHS)) 1675 // X % Y ===> X, iff X < Y 1676 return getZExtValue(); 1677 if (*this == RHS) 1678 // X % X == 0; 1679 return 0; 1680 if (lhsWords == 1) 1681 // All high words are zero, just use native remainder 1682 return U.pVal[0] % RHS; 1683 1684 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1685 uint64_t Remainder; 1686 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1687 return Remainder; 1688 } 1689 1690 APInt APInt::srem(const APInt &RHS) const { 1691 if (isNegative()) { 1692 if (RHS.isNegative()) 1693 return -((-(*this)).urem(-RHS)); 1694 return -((-(*this)).urem(RHS)); 1695 } 1696 if (RHS.isNegative()) 1697 return this->urem(-RHS); 1698 return this->urem(RHS); 1699 } 1700 1701 int64_t APInt::srem(int64_t RHS) const { 1702 if (isNegative()) { 1703 if (RHS < 0) 1704 return -((-(*this)).urem(-RHS)); 1705 return -((-(*this)).urem(RHS)); 1706 } 1707 if (RHS < 0) 1708 return this->urem(-RHS); 1709 return this->urem(RHS); 1710 } 1711 1712 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1713 APInt &Quotient, APInt &Remainder) { 1714 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1715 unsigned BitWidth = LHS.BitWidth; 1716 1717 // First, deal with the easy case 1718 if (LHS.isSingleWord()) { 1719 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1720 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1721 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1722 Quotient = APInt(BitWidth, QuotVal); 1723 Remainder = APInt(BitWidth, RemVal); 1724 return; 1725 } 1726 1727 // Get some size facts about the dividend and divisor 1728 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1729 unsigned rhsBits = RHS.getActiveBits(); 1730 unsigned rhsWords = getNumWords(rhsBits); 1731 assert(rhsWords && "Performing divrem operation by zero ???"); 1732 1733 // Check the degenerate cases 1734 if (lhsWords == 0) { 1735 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1736 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1737 return; 1738 } 1739 1740 if (rhsBits == 1) { 1741 Quotient = LHS; // X / 1 ===> X 1742 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1743 } 1744 1745 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1746 Remainder = LHS; // X % Y ===> X, iff X < Y 1747 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1748 return; 1749 } 1750 1751 if (LHS == RHS) { 1752 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1753 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1754 return; 1755 } 1756 1757 // Make sure there is enough space to hold the results. 1758 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1759 // change the size. This is necessary if Quotient or Remainder is aliased 1760 // with LHS or RHS. 1761 Quotient.reallocate(BitWidth); 1762 Remainder.reallocate(BitWidth); 1763 1764 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1765 // There is only one word to consider so use the native versions. 1766 uint64_t lhsValue = LHS.U.pVal[0]; 1767 uint64_t rhsValue = RHS.U.pVal[0]; 1768 Quotient = lhsValue / rhsValue; 1769 Remainder = lhsValue % rhsValue; 1770 return; 1771 } 1772 1773 // Okay, lets do it the long way 1774 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1775 Remainder.U.pVal); 1776 // Clear the rest of the Quotient and Remainder. 1777 std::memset(Quotient.U.pVal + lhsWords, 0, 1778 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1779 std::memset(Remainder.U.pVal + rhsWords, 0, 1780 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1781 } 1782 1783 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1784 uint64_t &Remainder) { 1785 assert(RHS != 0 && "Divide by zero?"); 1786 unsigned BitWidth = LHS.BitWidth; 1787 1788 // First, deal with the easy case 1789 if (LHS.isSingleWord()) { 1790 uint64_t QuotVal = LHS.U.VAL / RHS; 1791 Remainder = LHS.U.VAL % RHS; 1792 Quotient = APInt(BitWidth, QuotVal); 1793 return; 1794 } 1795 1796 // Get some size facts about the dividend and divisor 1797 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1798 1799 // Check the degenerate cases 1800 if (lhsWords == 0) { 1801 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1802 Remainder = 0; // 0 % Y ===> 0 1803 return; 1804 } 1805 1806 if (RHS == 1) { 1807 Quotient = LHS; // X / 1 ===> X 1808 Remainder = 0; // X % 1 ===> 0 1809 return; 1810 } 1811 1812 if (LHS.ult(RHS)) { 1813 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1814 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1815 return; 1816 } 1817 1818 if (LHS == RHS) { 1819 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1820 Remainder = 0; // X % X ===> 0; 1821 return; 1822 } 1823 1824 // Make sure there is enough space to hold the results. 1825 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1826 // change the size. This is necessary if Quotient is aliased with LHS. 1827 Quotient.reallocate(BitWidth); 1828 1829 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1830 // There is only one word to consider so use the native versions. 1831 uint64_t lhsValue = LHS.U.pVal[0]; 1832 Quotient = lhsValue / RHS; 1833 Remainder = lhsValue % RHS; 1834 return; 1835 } 1836 1837 // Okay, lets do it the long way 1838 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1839 // Clear the rest of the Quotient. 1840 std::memset(Quotient.U.pVal + lhsWords, 0, 1841 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1842 } 1843 1844 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1845 APInt &Quotient, APInt &Remainder) { 1846 if (LHS.isNegative()) { 1847 if (RHS.isNegative()) 1848 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1849 else { 1850 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1851 Quotient.negate(); 1852 } 1853 Remainder.negate(); 1854 } else if (RHS.isNegative()) { 1855 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1856 Quotient.negate(); 1857 } else { 1858 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1859 } 1860 } 1861 1862 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1863 APInt &Quotient, int64_t &Remainder) { 1864 uint64_t R = Remainder; 1865 if (LHS.isNegative()) { 1866 if (RHS < 0) 1867 APInt::udivrem(-LHS, -RHS, Quotient, R); 1868 else { 1869 APInt::udivrem(-LHS, RHS, Quotient, R); 1870 Quotient.negate(); 1871 } 1872 R = -R; 1873 } else if (RHS < 0) { 1874 APInt::udivrem(LHS, -RHS, Quotient, R); 1875 Quotient.negate(); 1876 } else { 1877 APInt::udivrem(LHS, RHS, Quotient, R); 1878 } 1879 Remainder = R; 1880 } 1881 1882 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1883 APInt Res = *this+RHS; 1884 Overflow = isNonNegative() == RHS.isNonNegative() && 1885 Res.isNonNegative() != isNonNegative(); 1886 return Res; 1887 } 1888 1889 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1890 APInt Res = *this+RHS; 1891 Overflow = Res.ult(RHS); 1892 return Res; 1893 } 1894 1895 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1896 APInt Res = *this - RHS; 1897 Overflow = isNonNegative() != RHS.isNonNegative() && 1898 Res.isNonNegative() != isNonNegative(); 1899 return Res; 1900 } 1901 1902 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1903 APInt Res = *this-RHS; 1904 Overflow = Res.ugt(*this); 1905 return Res; 1906 } 1907 1908 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1909 // MININT/-1 --> overflow. 1910 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1911 return sdiv(RHS); 1912 } 1913 1914 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1915 APInt Res = *this * RHS; 1916 1917 if (*this != 0 && RHS != 0) 1918 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1919 else 1920 Overflow = false; 1921 return Res; 1922 } 1923 1924 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1925 APInt Res = *this * RHS; 1926 1927 if (*this != 0 && RHS != 0) 1928 Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; 1929 else 1930 Overflow = false; 1931 return Res; 1932 } 1933 1934 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 1935 Overflow = ShAmt.uge(getBitWidth()); 1936 if (Overflow) 1937 return APInt(BitWidth, 0); 1938 1939 if (isNonNegative()) // Don't allow sign change. 1940 Overflow = ShAmt.uge(countLeadingZeros()); 1941 else 1942 Overflow = ShAmt.uge(countLeadingOnes()); 1943 1944 return *this << ShAmt; 1945 } 1946 1947 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 1948 Overflow = ShAmt.uge(getBitWidth()); 1949 if (Overflow) 1950 return APInt(BitWidth, 0); 1951 1952 Overflow = ShAmt.ugt(countLeadingZeros()); 1953 1954 return *this << ShAmt; 1955 } 1956 1957 1958 1959 1960 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 1961 // Check our assumptions here 1962 assert(!str.empty() && "Invalid string length"); 1963 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 1964 radix == 36) && 1965 "Radix should be 2, 8, 10, 16, or 36!"); 1966 1967 StringRef::iterator p = str.begin(); 1968 size_t slen = str.size(); 1969 bool isNeg = *p == '-'; 1970 if (*p == '-' || *p == '+') { 1971 p++; 1972 slen--; 1973 assert(slen && "String is only a sign, needs a value."); 1974 } 1975 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 1976 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 1977 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 1978 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 1979 "Insufficient bit width"); 1980 1981 // Allocate memory if needed 1982 if (isSingleWord()) 1983 U.VAL = 0; 1984 else 1985 U.pVal = getClearedMemory(getNumWords()); 1986 1987 // Figure out if we can shift instead of multiply 1988 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 1989 1990 // Enter digit traversal loop 1991 for (StringRef::iterator e = str.end(); p != e; ++p) { 1992 unsigned digit = getDigit(*p, radix); 1993 assert(digit < radix && "Invalid character in digit string"); 1994 1995 // Shift or multiply the value by the radix 1996 if (slen > 1) { 1997 if (shift) 1998 *this <<= shift; 1999 else 2000 *this *= radix; 2001 } 2002 2003 // Add in the digit we just interpreted 2004 *this += digit; 2005 } 2006 // If its negative, put it in two's complement form 2007 if (isNeg) 2008 this->negate(); 2009 } 2010 2011 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2012 bool Signed, bool formatAsCLiteral) const { 2013 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2014 Radix == 36) && 2015 "Radix should be 2, 8, 10, 16, or 36!"); 2016 2017 const char *Prefix = ""; 2018 if (formatAsCLiteral) { 2019 switch (Radix) { 2020 case 2: 2021 // Binary literals are a non-standard extension added in gcc 4.3: 2022 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2023 Prefix = "0b"; 2024 break; 2025 case 8: 2026 Prefix = "0"; 2027 break; 2028 case 10: 2029 break; // No prefix 2030 case 16: 2031 Prefix = "0x"; 2032 break; 2033 default: 2034 llvm_unreachable("Invalid radix!"); 2035 } 2036 } 2037 2038 // First, check for a zero value and just short circuit the logic below. 2039 if (*this == 0) { 2040 while (*Prefix) { 2041 Str.push_back(*Prefix); 2042 ++Prefix; 2043 }; 2044 Str.push_back('0'); 2045 return; 2046 } 2047 2048 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2049 2050 if (isSingleWord()) { 2051 char Buffer[65]; 2052 char *BufPtr = std::end(Buffer); 2053 2054 uint64_t N; 2055 if (!Signed) { 2056 N = getZExtValue(); 2057 } else { 2058 int64_t I = getSExtValue(); 2059 if (I >= 0) { 2060 N = I; 2061 } else { 2062 Str.push_back('-'); 2063 N = -(uint64_t)I; 2064 } 2065 } 2066 2067 while (*Prefix) { 2068 Str.push_back(*Prefix); 2069 ++Prefix; 2070 }; 2071 2072 while (N) { 2073 *--BufPtr = Digits[N % Radix]; 2074 N /= Radix; 2075 } 2076 Str.append(BufPtr, std::end(Buffer)); 2077 return; 2078 } 2079 2080 APInt Tmp(*this); 2081 2082 if (Signed && isNegative()) { 2083 // They want to print the signed version and it is a negative value 2084 // Flip the bits and add one to turn it into the equivalent positive 2085 // value and put a '-' in the result. 2086 Tmp.negate(); 2087 Str.push_back('-'); 2088 } 2089 2090 while (*Prefix) { 2091 Str.push_back(*Prefix); 2092 ++Prefix; 2093 }; 2094 2095 // We insert the digits backward, then reverse them to get the right order. 2096 unsigned StartDig = Str.size(); 2097 2098 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2099 // because the number of bits per digit (1, 3 and 4 respectively) divides 2100 // equally. We just shift until the value is zero. 2101 if (Radix == 2 || Radix == 8 || Radix == 16) { 2102 // Just shift tmp right for each digit width until it becomes zero 2103 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2104 unsigned MaskAmt = Radix - 1; 2105 2106 while (Tmp.getBoolValue()) { 2107 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2108 Str.push_back(Digits[Digit]); 2109 Tmp.lshrInPlace(ShiftAmt); 2110 } 2111 } else { 2112 while (Tmp.getBoolValue()) { 2113 uint64_t Digit; 2114 udivrem(Tmp, Radix, Tmp, Digit); 2115 assert(Digit < Radix && "divide failed"); 2116 Str.push_back(Digits[Digit]); 2117 } 2118 } 2119 2120 // Reverse the digits before returning. 2121 std::reverse(Str.begin()+StartDig, Str.end()); 2122 } 2123 2124 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2125 /// It is better to pass in a SmallVector/SmallString to the methods above. 2126 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2127 SmallString<40> S; 2128 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2129 return S.str(); 2130 } 2131 2132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2133 LLVM_DUMP_METHOD void APInt::dump() const { 2134 SmallString<40> S, U; 2135 this->toStringUnsigned(U); 2136 this->toStringSigned(S); 2137 dbgs() << "APInt(" << BitWidth << "b, " 2138 << U << "u " << S << "s)\n"; 2139 } 2140 #endif 2141 2142 void APInt::print(raw_ostream &OS, bool isSigned) const { 2143 SmallString<40> S; 2144 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2145 OS << S; 2146 } 2147 2148 // This implements a variety of operations on a representation of 2149 // arbitrary precision, two's-complement, bignum integer values. 2150 2151 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2152 // and unrestricting assumption. 2153 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2154 "Part width must be divisible by 2!"); 2155 2156 /* Some handy functions local to this file. */ 2157 2158 /* Returns the integer part with the least significant BITS set. 2159 BITS cannot be zero. */ 2160 static inline APInt::WordType lowBitMask(unsigned bits) { 2161 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2162 2163 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2164 } 2165 2166 /* Returns the value of the lower half of PART. */ 2167 static inline APInt::WordType lowHalf(APInt::WordType part) { 2168 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2169 } 2170 2171 /* Returns the value of the upper half of PART. */ 2172 static inline APInt::WordType highHalf(APInt::WordType part) { 2173 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2174 } 2175 2176 /* Returns the bit number of the most significant set bit of a part. 2177 If the input number has no bits set -1U is returned. */ 2178 static unsigned partMSB(APInt::WordType value) { 2179 return findLastSet(value, ZB_Max); 2180 } 2181 2182 /* Returns the bit number of the least significant set bit of a 2183 part. If the input number has no bits set -1U is returned. */ 2184 static unsigned partLSB(APInt::WordType value) { 2185 return findFirstSet(value, ZB_Max); 2186 } 2187 2188 /* Sets the least significant part of a bignum to the input value, and 2189 zeroes out higher parts. */ 2190 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2191 assert(parts > 0); 2192 2193 dst[0] = part; 2194 for (unsigned i = 1; i < parts; i++) 2195 dst[i] = 0; 2196 } 2197 2198 /* Assign one bignum to another. */ 2199 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2200 for (unsigned i = 0; i < parts; i++) 2201 dst[i] = src[i]; 2202 } 2203 2204 /* Returns true if a bignum is zero, false otherwise. */ 2205 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2206 for (unsigned i = 0; i < parts; i++) 2207 if (src[i]) 2208 return false; 2209 2210 return true; 2211 } 2212 2213 /* Extract the given bit of a bignum; returns 0 or 1. */ 2214 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2215 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2216 } 2217 2218 /* Set the given bit of a bignum. */ 2219 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2220 parts[whichWord(bit)] |= maskBit(bit); 2221 } 2222 2223 /* Clears the given bit of a bignum. */ 2224 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2225 parts[whichWord(bit)] &= ~maskBit(bit); 2226 } 2227 2228 /* Returns the bit number of the least significant set bit of a 2229 number. If the input number has no bits set -1U is returned. */ 2230 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2231 for (unsigned i = 0; i < n; i++) { 2232 if (parts[i] != 0) { 2233 unsigned lsb = partLSB(parts[i]); 2234 2235 return lsb + i * APINT_BITS_PER_WORD; 2236 } 2237 } 2238 2239 return -1U; 2240 } 2241 2242 /* Returns the bit number of the most significant set bit of a number. 2243 If the input number has no bits set -1U is returned. */ 2244 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2245 do { 2246 --n; 2247 2248 if (parts[n] != 0) { 2249 unsigned msb = partMSB(parts[n]); 2250 2251 return msb + n * APINT_BITS_PER_WORD; 2252 } 2253 } while (n); 2254 2255 return -1U; 2256 } 2257 2258 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2259 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2260 the least significant bit of DST. All high bits above srcBITS in 2261 DST are zero-filled. */ 2262 void 2263 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2264 unsigned srcBits, unsigned srcLSB) { 2265 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2266 assert(dstParts <= dstCount); 2267 2268 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2269 tcAssign (dst, src + firstSrcPart, dstParts); 2270 2271 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2272 tcShiftRight (dst, dstParts, shift); 2273 2274 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2275 in DST. If this is less that srcBits, append the rest, else 2276 clear the high bits. */ 2277 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2278 if (n < srcBits) { 2279 WordType mask = lowBitMask (srcBits - n); 2280 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2281 << n % APINT_BITS_PER_WORD); 2282 } else if (n > srcBits) { 2283 if (srcBits % APINT_BITS_PER_WORD) 2284 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2285 } 2286 2287 /* Clear high parts. */ 2288 while (dstParts < dstCount) 2289 dst[dstParts++] = 0; 2290 } 2291 2292 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2293 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2294 WordType c, unsigned parts) { 2295 assert(c <= 1); 2296 2297 for (unsigned i = 0; i < parts; i++) { 2298 WordType l = dst[i]; 2299 if (c) { 2300 dst[i] += rhs[i] + 1; 2301 c = (dst[i] <= l); 2302 } else { 2303 dst[i] += rhs[i]; 2304 c = (dst[i] < l); 2305 } 2306 } 2307 2308 return c; 2309 } 2310 2311 /// This function adds a single "word" integer, src, to the multiple 2312 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2313 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2314 /// @returns the carry of the addition. 2315 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2316 unsigned parts) { 2317 for (unsigned i = 0; i < parts; ++i) { 2318 dst[i] += src; 2319 if (dst[i] >= src) 2320 return 0; // No need to carry so exit early. 2321 src = 1; // Carry one to next digit. 2322 } 2323 2324 return 1; 2325 } 2326 2327 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2328 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2329 WordType c, unsigned parts) { 2330 assert(c <= 1); 2331 2332 for (unsigned i = 0; i < parts; i++) { 2333 WordType l = dst[i]; 2334 if (c) { 2335 dst[i] -= rhs[i] + 1; 2336 c = (dst[i] >= l); 2337 } else { 2338 dst[i] -= rhs[i]; 2339 c = (dst[i] > l); 2340 } 2341 } 2342 2343 return c; 2344 } 2345 2346 /// This function subtracts a single "word" (64-bit word), src, from 2347 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2348 /// no further borrowing is needed or it runs out of "words" in dst. The result 2349 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2350 /// exhausted. In other words, if src > dst then this function returns 1, 2351 /// otherwise 0. 2352 /// @returns the borrow out of the subtraction 2353 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2354 unsigned parts) { 2355 for (unsigned i = 0; i < parts; ++i) { 2356 WordType Dst = dst[i]; 2357 dst[i] -= src; 2358 if (src <= Dst) 2359 return 0; // No need to borrow so exit early. 2360 src = 1; // We have to "borrow 1" from next "word" 2361 } 2362 2363 return 1; 2364 } 2365 2366 /* Negate a bignum in-place. */ 2367 void APInt::tcNegate(WordType *dst, unsigned parts) { 2368 tcComplement(dst, parts); 2369 tcIncrement(dst, parts); 2370 } 2371 2372 /* DST += SRC * MULTIPLIER + CARRY if add is true 2373 DST = SRC * MULTIPLIER + CARRY if add is false 2374 2375 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2376 they must start at the same point, i.e. DST == SRC. 2377 2378 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2379 returned. Otherwise DST is filled with the least significant 2380 DSTPARTS parts of the result, and if all of the omitted higher 2381 parts were zero return zero, otherwise overflow occurred and 2382 return one. */ 2383 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2384 WordType multiplier, WordType carry, 2385 unsigned srcParts, unsigned dstParts, 2386 bool add) { 2387 /* Otherwise our writes of DST kill our later reads of SRC. */ 2388 assert(dst <= src || dst >= src + srcParts); 2389 assert(dstParts <= srcParts + 1); 2390 2391 /* N loops; minimum of dstParts and srcParts. */ 2392 unsigned n = std::min(dstParts, srcParts); 2393 2394 for (unsigned i = 0; i < n; i++) { 2395 WordType low, mid, high, srcPart; 2396 2397 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2398 2399 This cannot overflow, because 2400 2401 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2402 2403 which is less than n^2. */ 2404 2405 srcPart = src[i]; 2406 2407 if (multiplier == 0 || srcPart == 0) { 2408 low = carry; 2409 high = 0; 2410 } else { 2411 low = lowHalf(srcPart) * lowHalf(multiplier); 2412 high = highHalf(srcPart) * highHalf(multiplier); 2413 2414 mid = lowHalf(srcPart) * highHalf(multiplier); 2415 high += highHalf(mid); 2416 mid <<= APINT_BITS_PER_WORD / 2; 2417 if (low + mid < low) 2418 high++; 2419 low += mid; 2420 2421 mid = highHalf(srcPart) * lowHalf(multiplier); 2422 high += highHalf(mid); 2423 mid <<= APINT_BITS_PER_WORD / 2; 2424 if (low + mid < low) 2425 high++; 2426 low += mid; 2427 2428 /* Now add carry. */ 2429 if (low + carry < low) 2430 high++; 2431 low += carry; 2432 } 2433 2434 if (add) { 2435 /* And now DST[i], and store the new low part there. */ 2436 if (low + dst[i] < low) 2437 high++; 2438 dst[i] += low; 2439 } else 2440 dst[i] = low; 2441 2442 carry = high; 2443 } 2444 2445 if (srcParts < dstParts) { 2446 /* Full multiplication, there is no overflow. */ 2447 assert(srcParts + 1 == dstParts); 2448 dst[srcParts] = carry; 2449 return 0; 2450 } 2451 2452 /* We overflowed if there is carry. */ 2453 if (carry) 2454 return 1; 2455 2456 /* We would overflow if any significant unwritten parts would be 2457 non-zero. This is true if any remaining src parts are non-zero 2458 and the multiplier is non-zero. */ 2459 if (multiplier) 2460 for (unsigned i = dstParts; i < srcParts; i++) 2461 if (src[i]) 2462 return 1; 2463 2464 /* We fitted in the narrow destination. */ 2465 return 0; 2466 } 2467 2468 /* DST = LHS * RHS, where DST has the same width as the operands and 2469 is filled with the least significant parts of the result. Returns 2470 one if overflow occurred, otherwise zero. DST must be disjoint 2471 from both operands. */ 2472 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2473 const WordType *rhs, unsigned parts) { 2474 assert(dst != lhs && dst != rhs); 2475 2476 int overflow = 0; 2477 tcSet(dst, 0, parts); 2478 2479 for (unsigned i = 0; i < parts; i++) 2480 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2481 parts - i, true); 2482 2483 return overflow; 2484 } 2485 2486 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2487 /// operands. No overflow occurs. DST must be disjoint from both operands. 2488 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2489 const WordType *rhs, unsigned lhsParts, 2490 unsigned rhsParts) { 2491 /* Put the narrower number on the LHS for less loops below. */ 2492 if (lhsParts > rhsParts) 2493 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2494 2495 assert(dst != lhs && dst != rhs); 2496 2497 tcSet(dst, 0, rhsParts); 2498 2499 for (unsigned i = 0; i < lhsParts; i++) 2500 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2501 } 2502 2503 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2504 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2505 set REMAINDER to the remainder, return zero. i.e. 2506 2507 OLD_LHS = RHS * LHS + REMAINDER 2508 2509 SCRATCH is a bignum of the same size as the operands and result for 2510 use by the routine; its contents need not be initialized and are 2511 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2512 */ 2513 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2514 WordType *remainder, WordType *srhs, 2515 unsigned parts) { 2516 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2517 2518 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2519 if (shiftCount == 0) 2520 return true; 2521 2522 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2523 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2524 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2525 2526 tcAssign(srhs, rhs, parts); 2527 tcShiftLeft(srhs, parts, shiftCount); 2528 tcAssign(remainder, lhs, parts); 2529 tcSet(lhs, 0, parts); 2530 2531 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2532 the total. */ 2533 for (;;) { 2534 int compare = tcCompare(remainder, srhs, parts); 2535 if (compare >= 0) { 2536 tcSubtract(remainder, srhs, 0, parts); 2537 lhs[n] |= mask; 2538 } 2539 2540 if (shiftCount == 0) 2541 break; 2542 shiftCount--; 2543 tcShiftRight(srhs, parts, 1); 2544 if ((mask >>= 1) == 0) { 2545 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2546 n--; 2547 } 2548 } 2549 2550 return false; 2551 } 2552 2553 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2554 /// no restrictions on Count. 2555 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2556 // Don't bother performing a no-op shift. 2557 if (!Count) 2558 return; 2559 2560 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2561 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2562 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2563 2564 // Fastpath for moving by whole words. 2565 if (BitShift == 0) { 2566 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2567 } else { 2568 while (Words-- > WordShift) { 2569 Dst[Words] = Dst[Words - WordShift] << BitShift; 2570 if (Words > WordShift) 2571 Dst[Words] |= 2572 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2573 } 2574 } 2575 2576 // Fill in the remainder with 0s. 2577 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2578 } 2579 2580 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2581 /// are no restrictions on Count. 2582 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2583 // Don't bother performing a no-op shift. 2584 if (!Count) 2585 return; 2586 2587 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2588 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2589 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2590 2591 unsigned WordsToMove = Words - WordShift; 2592 // Fastpath for moving by whole words. 2593 if (BitShift == 0) { 2594 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2595 } else { 2596 for (unsigned i = 0; i != WordsToMove; ++i) { 2597 Dst[i] = Dst[i + WordShift] >> BitShift; 2598 if (i + 1 != WordsToMove) 2599 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2600 } 2601 } 2602 2603 // Fill in the remainder with 0s. 2604 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2605 } 2606 2607 /* Bitwise and of two bignums. */ 2608 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2609 for (unsigned i = 0; i < parts; i++) 2610 dst[i] &= rhs[i]; 2611 } 2612 2613 /* Bitwise inclusive or of two bignums. */ 2614 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2615 for (unsigned i = 0; i < parts; i++) 2616 dst[i] |= rhs[i]; 2617 } 2618 2619 /* Bitwise exclusive or of two bignums. */ 2620 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2621 for (unsigned i = 0; i < parts; i++) 2622 dst[i] ^= rhs[i]; 2623 } 2624 2625 /* Complement a bignum in-place. */ 2626 void APInt::tcComplement(WordType *dst, unsigned parts) { 2627 for (unsigned i = 0; i < parts; i++) 2628 dst[i] = ~dst[i]; 2629 } 2630 2631 /* Comparison (unsigned) of two bignums. */ 2632 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2633 unsigned parts) { 2634 while (parts) { 2635 parts--; 2636 if (lhs[parts] != rhs[parts]) 2637 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2638 } 2639 2640 return 0; 2641 } 2642 2643 /* Set the least significant BITS bits of a bignum, clear the 2644 rest. */ 2645 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2646 unsigned bits) { 2647 unsigned i = 0; 2648 while (bits > APINT_BITS_PER_WORD) { 2649 dst[i++] = ~(WordType) 0; 2650 bits -= APINT_BITS_PER_WORD; 2651 } 2652 2653 if (bits) 2654 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2655 2656 while (i < parts) 2657 dst[i++] = 0; 2658 } 2659 2660 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2661 APInt::Rounding RM) { 2662 // Currently udivrem always rounds down. 2663 switch (RM) { 2664 case APInt::Rounding::DOWN: 2665 case APInt::Rounding::TOWARD_ZERO: 2666 return A.udiv(B); 2667 case APInt::Rounding::UP: { 2668 APInt Quo, Rem; 2669 APInt::udivrem(A, B, Quo, Rem); 2670 if (Rem == 0) 2671 return Quo; 2672 return Quo + 1; 2673 } 2674 } 2675 llvm_unreachable("Unknown APInt::Rounding enum"); 2676 } 2677 2678 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2679 APInt::Rounding RM) { 2680 switch (RM) { 2681 case APInt::Rounding::DOWN: 2682 case APInt::Rounding::UP: { 2683 APInt Quo, Rem; 2684 APInt::sdivrem(A, B, Quo, Rem); 2685 if (Rem == 0) 2686 return Quo; 2687 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2688 // We want to check whether the non-integer part of the mathematical value 2689 // is negative or not. If the non-integer part is negative, we need to round 2690 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2691 // already rounded down. 2692 if (RM == APInt::Rounding::DOWN) { 2693 if (Rem.isNegative() != B.isNegative()) 2694 return Quo - 1; 2695 return Quo; 2696 } 2697 if (Rem.isNegative() != B.isNegative()) 2698 return Quo; 2699 return Quo + 1; 2700 } 2701 // Currently sdiv rounds twards zero. 2702 case APInt::Rounding::TOWARD_ZERO: 2703 return A.sdiv(B); 2704 } 2705 llvm_unreachable("Unknown APInt::Rounding enum"); 2706 } 2707 2708 Optional<APInt> 2709 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2710 unsigned RangeWidth) { 2711 unsigned CoeffWidth = A.getBitWidth(); 2712 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2713 assert(RangeWidth <= CoeffWidth && 2714 "Value range width should be less than coefficient width"); 2715 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2716 2717 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2718 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2719 2720 // Identify 0 as a (non)solution immediately. 2721 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2722 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2723 return APInt(CoeffWidth, 0); 2724 } 2725 2726 // The result of APInt arithmetic has the same bit width as the operands, 2727 // so it can actually lose high bits. A product of two n-bit integers needs 2728 // 2n-1 bits to represent the full value. 2729 // The operation done below (on quadratic coefficients) that can produce 2730 // the largest value is the evaluation of the equation during bisection, 2731 // which needs 3 times the bitwidth of the coefficient, so the total number 2732 // of required bits is 3n. 2733 // 2734 // The purpose of this extension is to simulate the set Z of all integers, 2735 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2736 // and negative numbers (not so much in a modulo arithmetic). The method 2737 // used to solve the equation is based on the standard formula for real 2738 // numbers, and uses the concepts of "positive" and "negative" with their 2739 // usual meanings. 2740 CoeffWidth *= 3; 2741 A = A.sext(CoeffWidth); 2742 B = B.sext(CoeffWidth); 2743 C = C.sext(CoeffWidth); 2744 2745 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2746 // the bit width has increased. 2747 if (A.isNegative()) { 2748 A.negate(); 2749 B.negate(); 2750 C.negate(); 2751 } 2752 2753 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2754 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2755 // and R = 2^BitWidth. 2756 // Since we're trying not only to find exact solutions, but also values 2757 // that "wrap around", such a set will always have a solution, i.e. an x 2758 // that satisfies at least one of the equations, or such that |q(x)| 2759 // exceeds kR, while |q(x-1)| for the same k does not. 2760 // 2761 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2762 // positive solution n (in the above sense), and also such that the n 2763 // will be the least among all solutions corresponding to k = 0, 1, ... 2764 // (more precisely, the least element in the set 2765 // { n(k) | k is such that a solution n(k) exists }). 2766 // 2767 // Consider the parabola (over real numbers) that corresponds to the 2768 // quadratic equation. Since A > 0, the arms of the parabola will point 2769 // up. Picking different values of k will shift it up and down by R. 2770 // 2771 // We want to shift the parabola in such a way as to reduce the problem 2772 // of solving q(x) = kR to solving shifted_q(x) = 0. 2773 // (The interesting solutions are the ceilings of the real number 2774 // solutions.) 2775 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2776 APInt TwoA = 2 * A; 2777 APInt SqrB = B * B; 2778 bool PickLow; 2779 2780 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2781 assert(A.isStrictlyPositive()); 2782 APInt T = V.abs().urem(A); 2783 if (T.isNullValue()) 2784 return V; 2785 return V.isNegative() ? V+T : V+(A-T); 2786 }; 2787 2788 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2789 // iff B is positive. 2790 if (B.isNonNegative()) { 2791 // If B >= 0, the vertex it at a negative location (or at 0), so in 2792 // order to have a non-negative solution we need to pick k that makes 2793 // C-kR negative. To satisfy all the requirements for the solution 2794 // that we are looking for, it needs to be closest to 0 of all k. 2795 C = C.srem(R); 2796 if (C.isStrictlyPositive()) 2797 C -= R; 2798 // Pick the greater solution. 2799 PickLow = false; 2800 } else { 2801 // If B < 0, the vertex is at a positive location. For any solution 2802 // to exist, the discriminant must be non-negative. This means that 2803 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2804 // lower bound on values of k: kR >= C - B^2/4A. 2805 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2806 // Round LowkR up (towards +inf) to the nearest kR. 2807 LowkR = RoundUp(LowkR, R); 2808 2809 // If there exists k meeting the condition above, and such that 2810 // C-kR > 0, there will be two positive real number solutions of 2811 // q(x) = kR. Out of all such values of k, pick the one that makes 2812 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2813 // In other words, find maximum k such that LowkR <= kR < C. 2814 if (C.sgt(LowkR)) { 2815 // If LowkR < C, then such a k is guaranteed to exist because 2816 // LowkR itself is a multiple of R. 2817 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2818 // Pick the smaller solution. 2819 PickLow = true; 2820 } else { 2821 // If C-kR < 0 for all potential k's, it means that one solution 2822 // will be negative, while the other will be positive. The positive 2823 // solution will shift towards 0 if the parabola is moved up. 2824 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2825 // to 0, or in other words, out of all parabolas that have solutions, 2826 // pick the one that is the farthest "up"). 2827 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2828 C -= LowkR; 2829 // Pick the greater solution. 2830 PickLow = false; 2831 } 2832 } 2833 2834 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2835 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2836 2837 APInt D = SqrB - 4*A*C; 2838 assert(D.isNonNegative() && "Negative discriminant"); 2839 APInt SQ = D.sqrt(); 2840 2841 APInt Q = SQ * SQ; 2842 bool InexactSQ = Q != D; 2843 // The calculated SQ may actually be greater than the exact (non-integer) 2844 // value. If that's the case, decremement SQ to get a value that is lower. 2845 if (Q.sgt(D)) 2846 SQ -= 1; 2847 2848 APInt X; 2849 APInt Rem; 2850 2851 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 2852 // When using the quadratic formula directly, the calculated low root 2853 // may be greater than the exact one, since we would be subtracting SQ. 2854 // To make sure that the calculated root is not greater than the exact 2855 // one, subtract SQ+1 when calculating the low root (for inexact value 2856 // of SQ). 2857 if (PickLow) 2858 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 2859 else 2860 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 2861 2862 // The updated coefficients should be such that the (exact) solution is 2863 // positive. Since APInt division rounds towards 0, the calculated one 2864 // can be 0, but cannot be negative. 2865 assert(X.isNonNegative() && "Solution should be non-negative"); 2866 2867 if (!InexactSQ && Rem.isNullValue()) { 2868 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 2869 return X; 2870 } 2871 2872 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 2873 // The exact value of the square root of D should be between SQ and SQ+1. 2874 // This implies that the solution should be between that corresponding to 2875 // SQ (i.e. X) and that corresponding to SQ+1. 2876 // 2877 // The calculated X cannot be greater than the exact (real) solution. 2878 // Actually it must be strictly less than the exact solution, while 2879 // X+1 will be greater than or equal to it. 2880 2881 APInt VX = (A*X + B)*X + C; 2882 APInt VY = VX + TwoA*X + A + B; 2883 bool SignChange = VX.isNegative() != VY.isNegative() || 2884 VX.isNullValue() != VY.isNullValue(); 2885 // If the sign did not change between X and X+1, X is not a valid solution. 2886 // This could happen when the actual (exact) roots don't have an integer 2887 // between them, so they would both be contained between X and X+1. 2888 if (!SignChange) { 2889 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 2890 return None; 2891 } 2892 2893 X += 1; 2894 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 2895 return X; 2896 } 2897