1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision integer 10 // constant values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/FoldingSet.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/bit.h" 22 #include "llvm/Config/llvm-config.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <climits> 28 #include <cmath> 29 #include <cstdlib> 30 #include <cstring> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "apint" 34 35 /// A utility function for allocating memory, checking for allocation failures, 36 /// and ensuring the contents are zeroed. 37 inline static uint64_t* getClearedMemory(unsigned numWords) { 38 uint64_t *result = new uint64_t[numWords]; 39 memset(result, 0, numWords * sizeof(uint64_t)); 40 return result; 41 } 42 43 /// A utility function for allocating memory and checking for allocation 44 /// failure. The content is not zeroed. 45 inline static uint64_t* getMemory(unsigned numWords) { 46 return new uint64_t[numWords]; 47 } 48 49 /// A utility function that converts a character to a digit. 50 inline static unsigned getDigit(char cdigit, uint8_t radix) { 51 unsigned r; 52 53 if (radix == 16 || radix == 36) { 54 r = cdigit - '0'; 55 if (r <= 9) 56 return r; 57 58 r = cdigit - 'A'; 59 if (r <= radix - 11U) 60 return r + 10; 61 62 r = cdigit - 'a'; 63 if (r <= radix - 11U) 64 return r + 10; 65 66 radix = 10; 67 } 68 69 r = cdigit - '0'; 70 if (r < radix) 71 return r; 72 73 return -1U; 74 } 75 76 77 void APInt::initSlowCase(uint64_t val, bool isSigned) { 78 U.pVal = getClearedMemory(getNumWords()); 79 U.pVal[0] = val; 80 if (isSigned && int64_t(val) < 0) 81 for (unsigned i = 1; i < getNumWords(); ++i) 82 U.pVal[i] = WORDTYPE_MAX; 83 clearUnusedBits(); 84 } 85 86 void APInt::initSlowCase(const APInt& that) { 87 U.pVal = getMemory(getNumWords()); 88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 89 } 90 91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 92 assert(BitWidth && "Bitwidth too small"); 93 assert(bigVal.data() && "Null pointer detected!"); 94 if (isSingleWord()) 95 U.VAL = bigVal[0]; 96 else { 97 // Get memory, cleared to 0 98 U.pVal = getClearedMemory(getNumWords()); 99 // Calculate the number of words to copy 100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 101 // Copy the words from bigVal to pVal 102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 103 } 104 // Make sure unused high bits are cleared 105 clearUnusedBits(); 106 } 107 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 109 : BitWidth(numBits) { 110 initFromArray(bigVal); 111 } 112 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 114 : BitWidth(numBits) { 115 initFromArray(makeArrayRef(bigVal, numWords)); 116 } 117 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 119 : BitWidth(numbits) { 120 assert(BitWidth && "Bitwidth too small"); 121 fromString(numbits, Str, radix); 122 } 123 124 void APInt::reallocate(unsigned NewBitWidth) { 125 // If the number of words is the same we can just change the width and stop. 126 if (getNumWords() == getNumWords(NewBitWidth)) { 127 BitWidth = NewBitWidth; 128 return; 129 } 130 131 // If we have an allocation, delete it. 132 if (!isSingleWord()) 133 delete [] U.pVal; 134 135 // Update BitWidth. 136 BitWidth = NewBitWidth; 137 138 // If we are supposed to have an allocation, create it. 139 if (!isSingleWord()) 140 U.pVal = getMemory(getNumWords()); 141 } 142 143 void APInt::AssignSlowCase(const APInt& RHS) { 144 // Don't do anything for X = X 145 if (this == &RHS) 146 return; 147 148 // Adjust the bit width and handle allocations as necessary. 149 reallocate(RHS.getBitWidth()); 150 151 // Copy the data. 152 if (isSingleWord()) 153 U.VAL = RHS.U.VAL; 154 else 155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 156 } 157 158 /// This method 'profiles' an APInt for use with FoldingSet. 159 void APInt::Profile(FoldingSetNodeID& ID) const { 160 ID.AddInteger(BitWidth); 161 162 if (isSingleWord()) { 163 ID.AddInteger(U.VAL); 164 return; 165 } 166 167 unsigned NumWords = getNumWords(); 168 for (unsigned i = 0; i < NumWords; ++i) 169 ID.AddInteger(U.pVal[i]); 170 } 171 172 /// Prefix increment operator. Increments the APInt by one. 173 APInt& APInt::operator++() { 174 if (isSingleWord()) 175 ++U.VAL; 176 else 177 tcIncrement(U.pVal, getNumWords()); 178 return clearUnusedBits(); 179 } 180 181 /// Prefix decrement operator. Decrements the APInt by one. 182 APInt& APInt::operator--() { 183 if (isSingleWord()) 184 --U.VAL; 185 else 186 tcDecrement(U.pVal, getNumWords()); 187 return clearUnusedBits(); 188 } 189 190 /// Adds the RHS APInt to this APInt. 191 /// @returns this, after addition of RHS. 192 /// Addition assignment operator. 193 APInt& APInt::operator+=(const APInt& RHS) { 194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 195 if (isSingleWord()) 196 U.VAL += RHS.U.VAL; 197 else 198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 199 return clearUnusedBits(); 200 } 201 202 APInt& APInt::operator+=(uint64_t RHS) { 203 if (isSingleWord()) 204 U.VAL += RHS; 205 else 206 tcAddPart(U.pVal, RHS, getNumWords()); 207 return clearUnusedBits(); 208 } 209 210 /// Subtracts the RHS APInt from this APInt 211 /// @returns this, after subtraction 212 /// Subtraction assignment operator. 213 APInt& APInt::operator-=(const APInt& RHS) { 214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 215 if (isSingleWord()) 216 U.VAL -= RHS.U.VAL; 217 else 218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 219 return clearUnusedBits(); 220 } 221 222 APInt& APInt::operator-=(uint64_t RHS) { 223 if (isSingleWord()) 224 U.VAL -= RHS; 225 else 226 tcSubtractPart(U.pVal, RHS, getNumWords()); 227 return clearUnusedBits(); 228 } 229 230 APInt APInt::operator*(const APInt& RHS) const { 231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 232 if (isSingleWord()) 233 return APInt(BitWidth, U.VAL * RHS.U.VAL); 234 235 APInt Result(getMemory(getNumWords()), getBitWidth()); 236 237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 238 239 Result.clearUnusedBits(); 240 return Result; 241 } 242 243 void APInt::AndAssignSlowCase(const APInt& RHS) { 244 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 245 } 246 247 void APInt::OrAssignSlowCase(const APInt& RHS) { 248 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 249 } 250 251 void APInt::XorAssignSlowCase(const APInt& RHS) { 252 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 253 } 254 255 APInt& APInt::operator*=(const APInt& RHS) { 256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 257 *this = *this * RHS; 258 return *this; 259 } 260 261 APInt& APInt::operator*=(uint64_t RHS) { 262 if (isSingleWord()) { 263 U.VAL *= RHS; 264 } else { 265 unsigned NumWords = getNumWords(); 266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 267 } 268 return clearUnusedBits(); 269 } 270 271 bool APInt::EqualSlowCase(const APInt& RHS) const { 272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 273 } 274 275 int APInt::compare(const APInt& RHS) const { 276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 277 if (isSingleWord()) 278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 279 280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 281 } 282 283 int APInt::compareSigned(const APInt& RHS) const { 284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 285 if (isSingleWord()) { 286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 289 } 290 291 bool lhsNeg = isNegative(); 292 bool rhsNeg = RHS.isNegative(); 293 294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 295 if (lhsNeg != rhsNeg) 296 return lhsNeg ? -1 : 1; 297 298 // Otherwise we can just use an unsigned comparison, because even negative 299 // numbers compare correctly this way if both have the same signed-ness. 300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 301 } 302 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 304 unsigned loWord = whichWord(loBit); 305 unsigned hiWord = whichWord(hiBit); 306 307 // Create an initial mask for the low word with zeros below loBit. 308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 309 310 // If hiBit is not aligned, we need a high mask. 311 unsigned hiShiftAmt = whichBit(hiBit); 312 if (hiShiftAmt != 0) { 313 // Create a high mask with zeros above hiBit. 314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 315 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 316 // set the bits in hiWord. 317 if (hiWord == loWord) 318 loMask &= hiMask; 319 else 320 U.pVal[hiWord] |= hiMask; 321 } 322 // Apply the mask to the low word. 323 U.pVal[loWord] |= loMask; 324 325 // Fill any words between loWord and hiWord with all ones. 326 for (unsigned word = loWord + 1; word < hiWord; ++word) 327 U.pVal[word] = WORDTYPE_MAX; 328 } 329 330 /// Toggle every bit to its opposite value. 331 void APInt::flipAllBitsSlowCase() { 332 tcComplement(U.pVal, getNumWords()); 333 clearUnusedBits(); 334 } 335 336 /// Toggle a given bit to its opposite value whose position is given 337 /// as "bitPosition". 338 /// Toggles a given bit to its opposite value. 339 void APInt::flipBit(unsigned bitPosition) { 340 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 341 setBitVal(bitPosition, !(*this)[bitPosition]); 342 } 343 344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 345 unsigned subBitWidth = subBits.getBitWidth(); 346 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 347 "Illegal bit insertion"); 348 349 // Insertion is a direct copy. 350 if (subBitWidth == BitWidth) { 351 *this = subBits; 352 return; 353 } 354 355 // Single word result can be done as a direct bitmask. 356 if (isSingleWord()) { 357 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 358 U.VAL &= ~(mask << bitPosition); 359 U.VAL |= (subBits.U.VAL << bitPosition); 360 return; 361 } 362 363 unsigned loBit = whichBit(bitPosition); 364 unsigned loWord = whichWord(bitPosition); 365 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 366 367 // Insertion within a single word can be done as a direct bitmask. 368 if (loWord == hi1Word) { 369 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 370 U.pVal[loWord] &= ~(mask << loBit); 371 U.pVal[loWord] |= (subBits.U.VAL << loBit); 372 return; 373 } 374 375 // Insert on word boundaries. 376 if (loBit == 0) { 377 // Direct copy whole words. 378 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 379 memcpy(U.pVal + loWord, subBits.getRawData(), 380 numWholeSubWords * APINT_WORD_SIZE); 381 382 // Mask+insert remaining bits. 383 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 384 if (remainingBits != 0) { 385 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 386 U.pVal[hi1Word] &= ~mask; 387 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 388 } 389 return; 390 } 391 392 // General case - set/clear individual bits in dst based on src. 393 // TODO - there is scope for optimization here, but at the moment this code 394 // path is barely used so prefer readability over performance. 395 for (unsigned i = 0; i != subBitWidth; ++i) 396 setBitVal(bitPosition + i, subBits[i]); 397 } 398 399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { 400 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 401 subBits &= maskBits; 402 if (isSingleWord()) { 403 U.VAL &= ~(maskBits << bitPosition); 404 U.VAL |= subBits << bitPosition; 405 return; 406 } 407 408 unsigned loBit = whichBit(bitPosition); 409 unsigned loWord = whichWord(bitPosition); 410 unsigned hiWord = whichWord(bitPosition + numBits - 1); 411 if (loWord == hiWord) { 412 U.pVal[loWord] &= ~(maskBits << loBit); 413 U.pVal[loWord] |= subBits << loBit; 414 return; 415 } 416 417 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 418 unsigned wordBits = 8 * sizeof(WordType); 419 U.pVal[loWord] &= ~(maskBits << loBit); 420 U.pVal[loWord] |= subBits << loBit; 421 422 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); 423 U.pVal[hiWord] |= subBits >> (wordBits - loBit); 424 } 425 426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 427 assert(numBits > 0 && "Can't extract zero bits"); 428 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 429 "Illegal bit extraction"); 430 431 if (isSingleWord()) 432 return APInt(numBits, U.VAL >> bitPosition); 433 434 unsigned loBit = whichBit(bitPosition); 435 unsigned loWord = whichWord(bitPosition); 436 unsigned hiWord = whichWord(bitPosition + numBits - 1); 437 438 // Single word result extracting bits from a single word source. 439 if (loWord == hiWord) 440 return APInt(numBits, U.pVal[loWord] >> loBit); 441 442 // Extracting bits that start on a source word boundary can be done 443 // as a fast memory copy. 444 if (loBit == 0) 445 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 446 447 // General case - shift + copy source words directly into place. 448 APInt Result(numBits, 0); 449 unsigned NumSrcWords = getNumWords(); 450 unsigned NumDstWords = Result.getNumWords(); 451 452 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 453 for (unsigned word = 0; word < NumDstWords; ++word) { 454 uint64_t w0 = U.pVal[loWord + word]; 455 uint64_t w1 = 456 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 457 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 458 } 459 460 return Result.clearUnusedBits(); 461 } 462 463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, 464 unsigned bitPosition) const { 465 assert(numBits > 0 && "Can't extract zero bits"); 466 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 467 "Illegal bit extraction"); 468 assert(numBits <= 64 && "Illegal bit extraction"); 469 470 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 471 if (isSingleWord()) 472 return (U.VAL >> bitPosition) & maskBits; 473 474 unsigned loBit = whichBit(bitPosition); 475 unsigned loWord = whichWord(bitPosition); 476 unsigned hiWord = whichWord(bitPosition + numBits - 1); 477 if (loWord == hiWord) 478 return (U.pVal[loWord] >> loBit) & maskBits; 479 480 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 481 unsigned wordBits = 8 * sizeof(WordType); 482 uint64_t retBits = U.pVal[loWord] >> loBit; 483 retBits |= U.pVal[hiWord] << (wordBits - loBit); 484 retBits &= maskBits; 485 return retBits; 486 } 487 488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 489 assert(!str.empty() && "Invalid string length"); 490 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 491 radix == 36) && 492 "Radix should be 2, 8, 10, 16, or 36!"); 493 494 size_t slen = str.size(); 495 496 // Each computation below needs to know if it's negative. 497 StringRef::iterator p = str.begin(); 498 unsigned isNegative = *p == '-'; 499 if (*p == '-' || *p == '+') { 500 p++; 501 slen--; 502 assert(slen && "String is only a sign, needs a value."); 503 } 504 505 // For radixes of power-of-two values, the bits required is accurately and 506 // easily computed 507 if (radix == 2) 508 return slen + isNegative; 509 if (radix == 8) 510 return slen * 3 + isNegative; 511 if (radix == 16) 512 return slen * 4 + isNegative; 513 514 // FIXME: base 36 515 516 // This is grossly inefficient but accurate. We could probably do something 517 // with a computation of roughly slen*64/20 and then adjust by the value of 518 // the first few digits. But, I'm not sure how accurate that could be. 519 520 // Compute a sufficient number of bits that is always large enough but might 521 // be too large. This avoids the assertion in the constructor. This 522 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 523 // bits in that case. 524 unsigned sufficient 525 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 526 : (slen == 1 ? 7 : slen * 16/3); 527 528 // Convert to the actual binary value. 529 APInt tmp(sufficient, StringRef(p, slen), radix); 530 531 // Compute how many bits are required. If the log is infinite, assume we need 532 // just bit. If the log is exact and value is negative, then the value is 533 // MinSignedValue with (log + 1) bits. 534 unsigned log = tmp.logBase2(); 535 if (log == (unsigned)-1) { 536 return isNegative + 1; 537 } else if (isNegative && tmp.isPowerOf2()) { 538 return isNegative + log; 539 } else { 540 return isNegative + log + 1; 541 } 542 } 543 544 hash_code llvm::hash_value(const APInt &Arg) { 545 if (Arg.isSingleWord()) 546 return hash_combine(Arg.BitWidth, Arg.U.VAL); 547 548 return hash_combine( 549 Arg.BitWidth, 550 hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords())); 551 } 552 553 unsigned DenseMapInfo<APInt>::getHashValue(const APInt &Key) { 554 return static_cast<unsigned>(hash_value(Key)); 555 } 556 557 bool APInt::isSplat(unsigned SplatSizeInBits) const { 558 assert(getBitWidth() % SplatSizeInBits == 0 && 559 "SplatSizeInBits must divide width!"); 560 // We can check that all parts of an integer are equal by making use of a 561 // little trick: rotate and check if it's still the same value. 562 return *this == rotl(SplatSizeInBits); 563 } 564 565 /// This function returns the high "numBits" bits of this APInt. 566 APInt APInt::getHiBits(unsigned numBits) const { 567 return this->lshr(BitWidth - numBits); 568 } 569 570 /// This function returns the low "numBits" bits of this APInt. 571 APInt APInt::getLoBits(unsigned numBits) const { 572 APInt Result(getLowBitsSet(BitWidth, numBits)); 573 Result &= *this; 574 return Result; 575 } 576 577 /// Return a value containing V broadcasted over NewLen bits. 578 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 579 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 580 581 APInt Val = V.zextOrSelf(NewLen); 582 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 583 Val |= Val << I; 584 585 return Val; 586 } 587 588 unsigned APInt::countLeadingZerosSlowCase() const { 589 unsigned Count = 0; 590 for (int i = getNumWords()-1; i >= 0; --i) { 591 uint64_t V = U.pVal[i]; 592 if (V == 0) 593 Count += APINT_BITS_PER_WORD; 594 else { 595 Count += llvm::countLeadingZeros(V); 596 break; 597 } 598 } 599 // Adjust for unused bits in the most significant word (they are zero). 600 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 601 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 602 return Count; 603 } 604 605 unsigned APInt::countLeadingOnesSlowCase() const { 606 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 607 unsigned shift; 608 if (!highWordBits) { 609 highWordBits = APINT_BITS_PER_WORD; 610 shift = 0; 611 } else { 612 shift = APINT_BITS_PER_WORD - highWordBits; 613 } 614 int i = getNumWords() - 1; 615 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 616 if (Count == highWordBits) { 617 for (i--; i >= 0; --i) { 618 if (U.pVal[i] == WORDTYPE_MAX) 619 Count += APINT_BITS_PER_WORD; 620 else { 621 Count += llvm::countLeadingOnes(U.pVal[i]); 622 break; 623 } 624 } 625 } 626 return Count; 627 } 628 629 unsigned APInt::countTrailingZerosSlowCase() const { 630 unsigned Count = 0; 631 unsigned i = 0; 632 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 633 Count += APINT_BITS_PER_WORD; 634 if (i < getNumWords()) 635 Count += llvm::countTrailingZeros(U.pVal[i]); 636 return std::min(Count, BitWidth); 637 } 638 639 unsigned APInt::countTrailingOnesSlowCase() const { 640 unsigned Count = 0; 641 unsigned i = 0; 642 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 643 Count += APINT_BITS_PER_WORD; 644 if (i < getNumWords()) 645 Count += llvm::countTrailingOnes(U.pVal[i]); 646 assert(Count <= BitWidth); 647 return Count; 648 } 649 650 unsigned APInt::countPopulationSlowCase() const { 651 unsigned Count = 0; 652 for (unsigned i = 0; i < getNumWords(); ++i) 653 Count += llvm::countPopulation(U.pVal[i]); 654 return Count; 655 } 656 657 bool APInt::intersectsSlowCase(const APInt &RHS) const { 658 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 659 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 660 return true; 661 662 return false; 663 } 664 665 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 666 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 667 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 668 return false; 669 670 return true; 671 } 672 673 APInt APInt::byteSwap() const { 674 assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!"); 675 if (BitWidth == 16) 676 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 677 if (BitWidth == 32) 678 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 679 if (BitWidth <= 64) { 680 uint64_t Tmp1 = ByteSwap_64(U.VAL); 681 Tmp1 >>= (64 - BitWidth); 682 return APInt(BitWidth, Tmp1); 683 } 684 685 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 686 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 687 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 688 if (Result.BitWidth != BitWidth) { 689 Result.lshrInPlace(Result.BitWidth - BitWidth); 690 Result.BitWidth = BitWidth; 691 } 692 return Result; 693 } 694 695 APInt APInt::reverseBits() const { 696 switch (BitWidth) { 697 case 64: 698 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 699 case 32: 700 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 701 case 16: 702 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 703 case 8: 704 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 705 default: 706 break; 707 } 708 709 APInt Val(*this); 710 APInt Reversed(BitWidth, 0); 711 unsigned S = BitWidth; 712 713 for (; Val != 0; Val.lshrInPlace(1)) { 714 Reversed <<= 1; 715 Reversed |= Val[0]; 716 --S; 717 } 718 719 Reversed <<= S; 720 return Reversed; 721 } 722 723 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 724 // Fast-path a common case. 725 if (A == B) return A; 726 727 // Corner cases: if either operand is zero, the other is the gcd. 728 if (!A) return B; 729 if (!B) return A; 730 731 // Count common powers of 2 and remove all other powers of 2. 732 unsigned Pow2; 733 { 734 unsigned Pow2_A = A.countTrailingZeros(); 735 unsigned Pow2_B = B.countTrailingZeros(); 736 if (Pow2_A > Pow2_B) { 737 A.lshrInPlace(Pow2_A - Pow2_B); 738 Pow2 = Pow2_B; 739 } else if (Pow2_B > Pow2_A) { 740 B.lshrInPlace(Pow2_B - Pow2_A); 741 Pow2 = Pow2_A; 742 } else { 743 Pow2 = Pow2_A; 744 } 745 } 746 747 // Both operands are odd multiples of 2^Pow_2: 748 // 749 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 750 // 751 // This is a modified version of Stein's algorithm, taking advantage of 752 // efficient countTrailingZeros(). 753 while (A != B) { 754 if (A.ugt(B)) { 755 A -= B; 756 A.lshrInPlace(A.countTrailingZeros() - Pow2); 757 } else { 758 B -= A; 759 B.lshrInPlace(B.countTrailingZeros() - Pow2); 760 } 761 } 762 763 return A; 764 } 765 766 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 767 uint64_t I = bit_cast<uint64_t>(Double); 768 769 // Get the sign bit from the highest order bit 770 bool isNeg = I >> 63; 771 772 // Get the 11-bit exponent and adjust for the 1023 bit bias 773 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 774 775 // If the exponent is negative, the value is < 0 so just return 0. 776 if (exp < 0) 777 return APInt(width, 0u); 778 779 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 780 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 781 782 // If the exponent doesn't shift all bits out of the mantissa 783 if (exp < 52) 784 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 785 APInt(width, mantissa >> (52 - exp)); 786 787 // If the client didn't provide enough bits for us to shift the mantissa into 788 // then the result is undefined, just return 0 789 if (width <= exp - 52) 790 return APInt(width, 0); 791 792 // Otherwise, we have to shift the mantissa bits up to the right location 793 APInt Tmp(width, mantissa); 794 Tmp <<= (unsigned)exp - 52; 795 return isNeg ? -Tmp : Tmp; 796 } 797 798 /// This function converts this APInt to a double. 799 /// The layout for double is as following (IEEE Standard 754): 800 /// -------------------------------------- 801 /// | Sign Exponent Fraction Bias | 802 /// |-------------------------------------- | 803 /// | 1[63] 11[62-52] 52[51-00] 1023 | 804 /// -------------------------------------- 805 double APInt::roundToDouble(bool isSigned) const { 806 807 // Handle the simple case where the value is contained in one uint64_t. 808 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 809 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 810 if (isSigned) { 811 int64_t sext = SignExtend64(getWord(0), BitWidth); 812 return double(sext); 813 } else 814 return double(getWord(0)); 815 } 816 817 // Determine if the value is negative. 818 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 819 820 // Construct the absolute value if we're negative. 821 APInt Tmp(isNeg ? -(*this) : (*this)); 822 823 // Figure out how many bits we're using. 824 unsigned n = Tmp.getActiveBits(); 825 826 // The exponent (without bias normalization) is just the number of bits 827 // we are using. Note that the sign bit is gone since we constructed the 828 // absolute value. 829 uint64_t exp = n; 830 831 // Return infinity for exponent overflow 832 if (exp > 1023) { 833 if (!isSigned || !isNeg) 834 return std::numeric_limits<double>::infinity(); 835 else 836 return -std::numeric_limits<double>::infinity(); 837 } 838 exp += 1023; // Increment for 1023 bias 839 840 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 841 // extract the high 52 bits from the correct words in pVal. 842 uint64_t mantissa; 843 unsigned hiWord = whichWord(n-1); 844 if (hiWord == 0) { 845 mantissa = Tmp.U.pVal[0]; 846 if (n > 52) 847 mantissa >>= n - 52; // shift down, we want the top 52 bits. 848 } else { 849 assert(hiWord > 0 && "huh?"); 850 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 851 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 852 mantissa = hibits | lobits; 853 } 854 855 // The leading bit of mantissa is implicit, so get rid of it. 856 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 857 uint64_t I = sign | (exp << 52) | mantissa; 858 return bit_cast<double>(I); 859 } 860 861 // Truncate to new width. 862 APInt APInt::trunc(unsigned width) const { 863 assert(width < BitWidth && "Invalid APInt Truncate request"); 864 assert(width && "Can't truncate to 0 bits"); 865 866 if (width <= APINT_BITS_PER_WORD) 867 return APInt(width, getRawData()[0]); 868 869 APInt Result(getMemory(getNumWords(width)), width); 870 871 // Copy full words. 872 unsigned i; 873 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 874 Result.U.pVal[i] = U.pVal[i]; 875 876 // Truncate and copy any partial word. 877 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 878 if (bits != 0) 879 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 880 881 return Result; 882 } 883 884 // Truncate to new width with unsigned saturation. 885 APInt APInt::truncUSat(unsigned width) const { 886 assert(width < BitWidth && "Invalid APInt Truncate request"); 887 assert(width && "Can't truncate to 0 bits"); 888 889 // Can we just losslessly truncate it? 890 if (isIntN(width)) 891 return trunc(width); 892 // If not, then just return the new limit. 893 return APInt::getMaxValue(width); 894 } 895 896 // Truncate to new width with signed saturation. 897 APInt APInt::truncSSat(unsigned width) const { 898 assert(width < BitWidth && "Invalid APInt Truncate request"); 899 assert(width && "Can't truncate to 0 bits"); 900 901 // Can we just losslessly truncate it? 902 if (isSignedIntN(width)) 903 return trunc(width); 904 // If not, then just return the new limits. 905 return isNegative() ? APInt::getSignedMinValue(width) 906 : APInt::getSignedMaxValue(width); 907 } 908 909 // Sign extend to a new width. 910 APInt APInt::sext(unsigned Width) const { 911 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 912 913 if (Width <= APINT_BITS_PER_WORD) 914 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 915 916 APInt Result(getMemory(getNumWords(Width)), Width); 917 918 // Copy words. 919 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 920 921 // Sign extend the last word since there may be unused bits in the input. 922 Result.U.pVal[getNumWords() - 1] = 923 SignExtend64(Result.U.pVal[getNumWords() - 1], 924 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 925 926 // Fill with sign bits. 927 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 928 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 929 Result.clearUnusedBits(); 930 return Result; 931 } 932 933 // Zero extend to a new width. 934 APInt APInt::zext(unsigned width) const { 935 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 936 937 if (width <= APINT_BITS_PER_WORD) 938 return APInt(width, U.VAL); 939 940 APInt Result(getMemory(getNumWords(width)), width); 941 942 // Copy words. 943 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 944 945 // Zero remaining words. 946 std::memset(Result.U.pVal + getNumWords(), 0, 947 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 948 949 return Result; 950 } 951 952 APInt APInt::zextOrTrunc(unsigned width) const { 953 if (BitWidth < width) 954 return zext(width); 955 if (BitWidth > width) 956 return trunc(width); 957 return *this; 958 } 959 960 APInt APInt::sextOrTrunc(unsigned width) const { 961 if (BitWidth < width) 962 return sext(width); 963 if (BitWidth > width) 964 return trunc(width); 965 return *this; 966 } 967 968 APInt APInt::truncOrSelf(unsigned width) const { 969 if (BitWidth > width) 970 return trunc(width); 971 return *this; 972 } 973 974 APInt APInt::zextOrSelf(unsigned width) const { 975 if (BitWidth < width) 976 return zext(width); 977 return *this; 978 } 979 980 APInt APInt::sextOrSelf(unsigned width) const { 981 if (BitWidth < width) 982 return sext(width); 983 return *this; 984 } 985 986 /// Arithmetic right-shift this APInt by shiftAmt. 987 /// Arithmetic right-shift function. 988 void APInt::ashrInPlace(const APInt &shiftAmt) { 989 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 990 } 991 992 /// Arithmetic right-shift this APInt by shiftAmt. 993 /// Arithmetic right-shift function. 994 void APInt::ashrSlowCase(unsigned ShiftAmt) { 995 // Don't bother performing a no-op shift. 996 if (!ShiftAmt) 997 return; 998 999 // Save the original sign bit for later. 1000 bool Negative = isNegative(); 1001 1002 // WordShift is the inter-part shift; BitShift is intra-part shift. 1003 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 1004 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 1005 1006 unsigned WordsToMove = getNumWords() - WordShift; 1007 if (WordsToMove != 0) { 1008 // Sign extend the last word to fill in the unused bits. 1009 U.pVal[getNumWords() - 1] = SignExtend64( 1010 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 1011 1012 // Fastpath for moving by whole words. 1013 if (BitShift == 0) { 1014 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 1015 } else { 1016 // Move the words containing significant bits. 1017 for (unsigned i = 0; i != WordsToMove - 1; ++i) 1018 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 1019 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 1020 1021 // Handle the last word which has no high bits to copy. 1022 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 1023 // Sign extend one more time. 1024 U.pVal[WordsToMove - 1] = 1025 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 1026 } 1027 } 1028 1029 // Fill in the remainder based on the original sign. 1030 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 1031 WordShift * APINT_WORD_SIZE); 1032 clearUnusedBits(); 1033 } 1034 1035 /// Logical right-shift this APInt by shiftAmt. 1036 /// Logical right-shift function. 1037 void APInt::lshrInPlace(const APInt &shiftAmt) { 1038 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1039 } 1040 1041 /// Logical right-shift this APInt by shiftAmt. 1042 /// Logical right-shift function. 1043 void APInt::lshrSlowCase(unsigned ShiftAmt) { 1044 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 1045 } 1046 1047 /// Left-shift this APInt by shiftAmt. 1048 /// Left-shift function. 1049 APInt &APInt::operator<<=(const APInt &shiftAmt) { 1050 // It's undefined behavior in C to shift by BitWidth or greater. 1051 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 1052 return *this; 1053 } 1054 1055 void APInt::shlSlowCase(unsigned ShiftAmt) { 1056 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 1057 clearUnusedBits(); 1058 } 1059 1060 // Calculate the rotate amount modulo the bit width. 1061 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 1062 unsigned rotBitWidth = rotateAmt.getBitWidth(); 1063 APInt rot = rotateAmt; 1064 if (rotBitWidth < BitWidth) { 1065 // Extend the rotate APInt, so that the urem doesn't divide by 0. 1066 // e.g. APInt(1, 32) would give APInt(1, 0). 1067 rot = rotateAmt.zext(BitWidth); 1068 } 1069 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 1070 return rot.getLimitedValue(BitWidth); 1071 } 1072 1073 APInt APInt::rotl(const APInt &rotateAmt) const { 1074 return rotl(rotateModulo(BitWidth, rotateAmt)); 1075 } 1076 1077 APInt APInt::rotl(unsigned rotateAmt) const { 1078 rotateAmt %= BitWidth; 1079 if (rotateAmt == 0) 1080 return *this; 1081 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1082 } 1083 1084 APInt APInt::rotr(const APInt &rotateAmt) const { 1085 return rotr(rotateModulo(BitWidth, rotateAmt)); 1086 } 1087 1088 APInt APInt::rotr(unsigned rotateAmt) const { 1089 rotateAmt %= BitWidth; 1090 if (rotateAmt == 0) 1091 return *this; 1092 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1093 } 1094 1095 // Square Root - this method computes and returns the square root of "this". 1096 // Three mechanisms are used for computation. For small values (<= 5 bits), 1097 // a table lookup is done. This gets some performance for common cases. For 1098 // values using less than 52 bits, the value is converted to double and then 1099 // the libc sqrt function is called. The result is rounded and then converted 1100 // back to a uint64_t which is then used to construct the result. Finally, 1101 // the Babylonian method for computing square roots is used. 1102 APInt APInt::sqrt() const { 1103 1104 // Determine the magnitude of the value. 1105 unsigned magnitude = getActiveBits(); 1106 1107 // Use a fast table for some small values. This also gets rid of some 1108 // rounding errors in libc sqrt for small values. 1109 if (magnitude <= 5) { 1110 static const uint8_t results[32] = { 1111 /* 0 */ 0, 1112 /* 1- 2 */ 1, 1, 1113 /* 3- 6 */ 2, 2, 2, 2, 1114 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1115 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1116 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1117 /* 31 */ 6 1118 }; 1119 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1120 } 1121 1122 // If the magnitude of the value fits in less than 52 bits (the precision of 1123 // an IEEE double precision floating point value), then we can use the 1124 // libc sqrt function which will probably use a hardware sqrt computation. 1125 // This should be faster than the algorithm below. 1126 if (magnitude < 52) { 1127 return APInt(BitWidth, 1128 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1129 : U.pVal[0]))))); 1130 } 1131 1132 // Okay, all the short cuts are exhausted. We must compute it. The following 1133 // is a classical Babylonian method for computing the square root. This code 1134 // was adapted to APInt from a wikipedia article on such computations. 1135 // See http://www.wikipedia.org/ and go to the page named 1136 // Calculate_an_integer_square_root. 1137 unsigned nbits = BitWidth, i = 4; 1138 APInt testy(BitWidth, 16); 1139 APInt x_old(BitWidth, 1); 1140 APInt x_new(BitWidth, 0); 1141 APInt two(BitWidth, 2); 1142 1143 // Select a good starting value using binary logarithms. 1144 for (;; i += 2, testy = testy.shl(2)) 1145 if (i >= nbits || this->ule(testy)) { 1146 x_old = x_old.shl(i / 2); 1147 break; 1148 } 1149 1150 // Use the Babylonian method to arrive at the integer square root: 1151 for (;;) { 1152 x_new = (this->udiv(x_old) + x_old).udiv(two); 1153 if (x_old.ule(x_new)) 1154 break; 1155 x_old = x_new; 1156 } 1157 1158 // Make sure we return the closest approximation 1159 // NOTE: The rounding calculation below is correct. It will produce an 1160 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1161 // determined to be a rounding issue with pari/gp as it begins to use a 1162 // floating point representation after 192 bits. There are no discrepancies 1163 // between this algorithm and pari/gp for bit widths < 192 bits. 1164 APInt square(x_old * x_old); 1165 APInt nextSquare((x_old + 1) * (x_old +1)); 1166 if (this->ult(square)) 1167 return x_old; 1168 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1169 APInt midpoint((nextSquare - square).udiv(two)); 1170 APInt offset(*this - square); 1171 if (offset.ult(midpoint)) 1172 return x_old; 1173 return x_old + 1; 1174 } 1175 1176 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1177 /// iterative extended Euclidean algorithm is used to solve for this value, 1178 /// however we simplify it to speed up calculating only the inverse, and take 1179 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1180 /// (potentially large) APInts around. 1181 /// WARNING: a value of '0' may be returned, 1182 /// signifying that no multiplicative inverse exists! 1183 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1184 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1185 1186 // Using the properties listed at the following web page (accessed 06/21/08): 1187 // http://www.numbertheory.org/php/euclid.html 1188 // (especially the properties numbered 3, 4 and 9) it can be proved that 1189 // BitWidth bits suffice for all the computations in the algorithm implemented 1190 // below. More precisely, this number of bits suffice if the multiplicative 1191 // inverse exists, but may not suffice for the general extended Euclidean 1192 // algorithm. 1193 1194 APInt r[2] = { modulo, *this }; 1195 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1196 APInt q(BitWidth, 0); 1197 1198 unsigned i; 1199 for (i = 0; r[i^1] != 0; i ^= 1) { 1200 // An overview of the math without the confusing bit-flipping: 1201 // q = r[i-2] / r[i-1] 1202 // r[i] = r[i-2] % r[i-1] 1203 // t[i] = t[i-2] - t[i-1] * q 1204 udivrem(r[i], r[i^1], q, r[i]); 1205 t[i] -= t[i^1] * q; 1206 } 1207 1208 // If this APInt and the modulo are not coprime, there is no multiplicative 1209 // inverse, so return 0. We check this by looking at the next-to-last 1210 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1211 // algorithm. 1212 if (r[i] != 1) 1213 return APInt(BitWidth, 0); 1214 1215 // The next-to-last t is the multiplicative inverse. However, we are 1216 // interested in a positive inverse. Calculate a positive one from a negative 1217 // one if necessary. A simple addition of the modulo suffices because 1218 // abs(t[i]) is known to be less than *this/2 (see the link above). 1219 if (t[i].isNegative()) 1220 t[i] += modulo; 1221 1222 return std::move(t[i]); 1223 } 1224 1225 /// Calculate the magic numbers required to implement a signed integer division 1226 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1227 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1228 /// Warren, Jr., chapter 10. 1229 APInt::ms APInt::magic() const { 1230 const APInt& d = *this; 1231 unsigned p; 1232 APInt ad, anc, delta, q1, r1, q2, r2, t; 1233 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1234 struct ms mag; 1235 1236 ad = d.abs(); 1237 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1238 anc = t - 1 - t.urem(ad); // absolute value of nc 1239 p = d.getBitWidth() - 1; // initialize p 1240 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1241 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1242 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1243 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1244 do { 1245 p = p + 1; 1246 q1 = q1<<1; // update q1 = 2p/abs(nc) 1247 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1248 if (r1.uge(anc)) { // must be unsigned comparison 1249 q1 = q1 + 1; 1250 r1 = r1 - anc; 1251 } 1252 q2 = q2<<1; // update q2 = 2p/abs(d) 1253 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1254 if (r2.uge(ad)) { // must be unsigned comparison 1255 q2 = q2 + 1; 1256 r2 = r2 - ad; 1257 } 1258 delta = ad - r2; 1259 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1260 1261 mag.m = q2 + 1; 1262 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1263 mag.s = p - d.getBitWidth(); // resulting shift 1264 return mag; 1265 } 1266 1267 /// Calculate the magic numbers required to implement an unsigned integer 1268 /// division by a constant as a sequence of multiplies, adds and shifts. 1269 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1270 /// S. Warren, Jr., chapter 10. 1271 /// LeadingZeros can be used to simplify the calculation if the upper bits 1272 /// of the divided value are known zero. 1273 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1274 const APInt& d = *this; 1275 unsigned p; 1276 APInt nc, delta, q1, r1, q2, r2; 1277 struct mu magu; 1278 magu.a = 0; // initialize "add" indicator 1279 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1280 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1281 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1282 1283 nc = allOnes - (allOnes - d).urem(d); 1284 p = d.getBitWidth() - 1; // initialize p 1285 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1286 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1287 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1288 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1289 do { 1290 p = p + 1; 1291 if (r1.uge(nc - r1)) { 1292 q1 = q1 + q1 + 1; // update q1 1293 r1 = r1 + r1 - nc; // update r1 1294 } 1295 else { 1296 q1 = q1+q1; // update q1 1297 r1 = r1+r1; // update r1 1298 } 1299 if ((r2 + 1).uge(d - r2)) { 1300 if (q2.uge(signedMax)) magu.a = 1; 1301 q2 = q2+q2 + 1; // update q2 1302 r2 = r2+r2 + 1 - d; // update r2 1303 } 1304 else { 1305 if (q2.uge(signedMin)) magu.a = 1; 1306 q2 = q2+q2; // update q2 1307 r2 = r2+r2 + 1; // update r2 1308 } 1309 delta = d - 1 - r2; 1310 } while (p < d.getBitWidth()*2 && 1311 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1312 magu.m = q2 + 1; // resulting magic number 1313 magu.s = p - d.getBitWidth(); // resulting shift 1314 return magu; 1315 } 1316 1317 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1318 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1319 /// variables here have the same names as in the algorithm. Comments explain 1320 /// the algorithm and any deviation from it. 1321 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1322 unsigned m, unsigned n) { 1323 assert(u && "Must provide dividend"); 1324 assert(v && "Must provide divisor"); 1325 assert(q && "Must provide quotient"); 1326 assert(u != v && u != q && v != q && "Must use different memory"); 1327 assert(n>1 && "n must be > 1"); 1328 1329 // b denotes the base of the number system. In our case b is 2^32. 1330 const uint64_t b = uint64_t(1) << 32; 1331 1332 // The DEBUG macros here tend to be spam in the debug output if you're not 1333 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1334 #ifdef KNUTH_DEBUG 1335 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1336 #else 1337 #define DEBUG_KNUTH(X) do {} while(false) 1338 #endif 1339 1340 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1341 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1342 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1343 DEBUG_KNUTH(dbgs() << " by"); 1344 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1345 DEBUG_KNUTH(dbgs() << '\n'); 1346 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1347 // u and v by d. Note that we have taken Knuth's advice here to use a power 1348 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1349 // 2 allows us to shift instead of multiply and it is easy to determine the 1350 // shift amount from the leading zeros. We are basically normalizing the u 1351 // and v so that its high bits are shifted to the top of v's range without 1352 // overflow. Note that this can require an extra word in u so that u must 1353 // be of length m+n+1. 1354 unsigned shift = countLeadingZeros(v[n-1]); 1355 uint32_t v_carry = 0; 1356 uint32_t u_carry = 0; 1357 if (shift) { 1358 for (unsigned i = 0; i < m+n; ++i) { 1359 uint32_t u_tmp = u[i] >> (32 - shift); 1360 u[i] = (u[i] << shift) | u_carry; 1361 u_carry = u_tmp; 1362 } 1363 for (unsigned i = 0; i < n; ++i) { 1364 uint32_t v_tmp = v[i] >> (32 - shift); 1365 v[i] = (v[i] << shift) | v_carry; 1366 v_carry = v_tmp; 1367 } 1368 } 1369 u[m+n] = u_carry; 1370 1371 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1372 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1373 DEBUG_KNUTH(dbgs() << " by"); 1374 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1375 DEBUG_KNUTH(dbgs() << '\n'); 1376 1377 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1378 int j = m; 1379 do { 1380 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1381 // D3. [Calculate q'.]. 1382 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1383 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1384 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1385 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1386 // on v[n-2] determines at high speed most of the cases in which the trial 1387 // value qp is one too large, and it eliminates all cases where qp is two 1388 // too large. 1389 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1390 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1391 uint64_t qp = dividend / v[n-1]; 1392 uint64_t rp = dividend % v[n-1]; 1393 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1394 qp--; 1395 rp += v[n-1]; 1396 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1397 qp--; 1398 } 1399 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1400 1401 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1402 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1403 // consists of a simple multiplication by a one-place number, combined with 1404 // a subtraction. 1405 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1406 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1407 // true value plus b**(n+1), namely as the b's complement of 1408 // the true value, and a "borrow" to the left should be remembered. 1409 int64_t borrow = 0; 1410 for (unsigned i = 0; i < n; ++i) { 1411 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1412 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1413 u[j+i] = Lo_32(subres); 1414 borrow = Hi_32(p) - Hi_32(subres); 1415 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1416 << ", borrow = " << borrow << '\n'); 1417 } 1418 bool isNeg = u[j+n] < borrow; 1419 u[j+n] -= Lo_32(borrow); 1420 1421 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1422 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1423 DEBUG_KNUTH(dbgs() << '\n'); 1424 1425 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1426 // negative, go to step D6; otherwise go on to step D7. 1427 q[j] = Lo_32(qp); 1428 if (isNeg) { 1429 // D6. [Add back]. The probability that this step is necessary is very 1430 // small, on the order of only 2/b. Make sure that test data accounts for 1431 // this possibility. Decrease q[j] by 1 1432 q[j]--; 1433 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1434 // A carry will occur to the left of u[j+n], and it should be ignored 1435 // since it cancels with the borrow that occurred in D4. 1436 bool carry = false; 1437 for (unsigned i = 0; i < n; i++) { 1438 uint32_t limit = std::min(u[j+i],v[i]); 1439 u[j+i] += v[i] + carry; 1440 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1441 } 1442 u[j+n] += carry; 1443 } 1444 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1445 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1446 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1447 1448 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1449 } while (--j >= 0); 1450 1451 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1452 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1453 DEBUG_KNUTH(dbgs() << '\n'); 1454 1455 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1456 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1457 // compute the remainder (urem uses this). 1458 if (r) { 1459 // The value d is expressed by the "shift" value above since we avoided 1460 // multiplication by d by using a shift left. So, all we have to do is 1461 // shift right here. 1462 if (shift) { 1463 uint32_t carry = 0; 1464 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1465 for (int i = n-1; i >= 0; i--) { 1466 r[i] = (u[i] >> shift) | carry; 1467 carry = u[i] << (32 - shift); 1468 DEBUG_KNUTH(dbgs() << " " << r[i]); 1469 } 1470 } else { 1471 for (int i = n-1; i >= 0; i--) { 1472 r[i] = u[i]; 1473 DEBUG_KNUTH(dbgs() << " " << r[i]); 1474 } 1475 } 1476 DEBUG_KNUTH(dbgs() << '\n'); 1477 } 1478 DEBUG_KNUTH(dbgs() << '\n'); 1479 } 1480 1481 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1482 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1483 assert(lhsWords >= rhsWords && "Fractional result"); 1484 1485 // First, compose the values into an array of 32-bit words instead of 1486 // 64-bit words. This is a necessity of both the "short division" algorithm 1487 // and the Knuth "classical algorithm" which requires there to be native 1488 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1489 // can't use 64-bit operands here because we don't have native results of 1490 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1491 // work on large-endian machines. 1492 unsigned n = rhsWords * 2; 1493 unsigned m = (lhsWords * 2) - n; 1494 1495 // Allocate space for the temporary values we need either on the stack, if 1496 // it will fit, or on the heap if it won't. 1497 uint32_t SPACE[128]; 1498 uint32_t *U = nullptr; 1499 uint32_t *V = nullptr; 1500 uint32_t *Q = nullptr; 1501 uint32_t *R = nullptr; 1502 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1503 U = &SPACE[0]; 1504 V = &SPACE[m+n+1]; 1505 Q = &SPACE[(m+n+1) + n]; 1506 if (Remainder) 1507 R = &SPACE[(m+n+1) + n + (m+n)]; 1508 } else { 1509 U = new uint32_t[m + n + 1]; 1510 V = new uint32_t[n]; 1511 Q = new uint32_t[m+n]; 1512 if (Remainder) 1513 R = new uint32_t[n]; 1514 } 1515 1516 // Initialize the dividend 1517 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1518 for (unsigned i = 0; i < lhsWords; ++i) { 1519 uint64_t tmp = LHS[i]; 1520 U[i * 2] = Lo_32(tmp); 1521 U[i * 2 + 1] = Hi_32(tmp); 1522 } 1523 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1524 1525 // Initialize the divisor 1526 memset(V, 0, (n)*sizeof(uint32_t)); 1527 for (unsigned i = 0; i < rhsWords; ++i) { 1528 uint64_t tmp = RHS[i]; 1529 V[i * 2] = Lo_32(tmp); 1530 V[i * 2 + 1] = Hi_32(tmp); 1531 } 1532 1533 // initialize the quotient and remainder 1534 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1535 if (Remainder) 1536 memset(R, 0, n * sizeof(uint32_t)); 1537 1538 // Now, adjust m and n for the Knuth division. n is the number of words in 1539 // the divisor. m is the number of words by which the dividend exceeds the 1540 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1541 // contain any zero words or the Knuth algorithm fails. 1542 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1543 n--; 1544 m++; 1545 } 1546 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1547 m--; 1548 1549 // If we're left with only a single word for the divisor, Knuth doesn't work 1550 // so we implement the short division algorithm here. This is much simpler 1551 // and faster because we are certain that we can divide a 64-bit quantity 1552 // by a 32-bit quantity at hardware speed and short division is simply a 1553 // series of such operations. This is just like doing short division but we 1554 // are using base 2^32 instead of base 10. 1555 assert(n != 0 && "Divide by zero?"); 1556 if (n == 1) { 1557 uint32_t divisor = V[0]; 1558 uint32_t remainder = 0; 1559 for (int i = m; i >= 0; i--) { 1560 uint64_t partial_dividend = Make_64(remainder, U[i]); 1561 if (partial_dividend == 0) { 1562 Q[i] = 0; 1563 remainder = 0; 1564 } else if (partial_dividend < divisor) { 1565 Q[i] = 0; 1566 remainder = Lo_32(partial_dividend); 1567 } else if (partial_dividend == divisor) { 1568 Q[i] = 1; 1569 remainder = 0; 1570 } else { 1571 Q[i] = Lo_32(partial_dividend / divisor); 1572 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1573 } 1574 } 1575 if (R) 1576 R[0] = remainder; 1577 } else { 1578 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1579 // case n > 1. 1580 KnuthDiv(U, V, Q, R, m, n); 1581 } 1582 1583 // If the caller wants the quotient 1584 if (Quotient) { 1585 for (unsigned i = 0; i < lhsWords; ++i) 1586 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1587 } 1588 1589 // If the caller wants the remainder 1590 if (Remainder) { 1591 for (unsigned i = 0; i < rhsWords; ++i) 1592 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1593 } 1594 1595 // Clean up the memory we allocated. 1596 if (U != &SPACE[0]) { 1597 delete [] U; 1598 delete [] V; 1599 delete [] Q; 1600 delete [] R; 1601 } 1602 } 1603 1604 APInt APInt::udiv(const APInt &RHS) const { 1605 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1606 1607 // First, deal with the easy case 1608 if (isSingleWord()) { 1609 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1610 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1611 } 1612 1613 // Get some facts about the LHS and RHS number of bits and words 1614 unsigned lhsWords = getNumWords(getActiveBits()); 1615 unsigned rhsBits = RHS.getActiveBits(); 1616 unsigned rhsWords = getNumWords(rhsBits); 1617 assert(rhsWords && "Divided by zero???"); 1618 1619 // Deal with some degenerate cases 1620 if (!lhsWords) 1621 // 0 / X ===> 0 1622 return APInt(BitWidth, 0); 1623 if (rhsBits == 1) 1624 // X / 1 ===> X 1625 return *this; 1626 if (lhsWords < rhsWords || this->ult(RHS)) 1627 // X / Y ===> 0, iff X < Y 1628 return APInt(BitWidth, 0); 1629 if (*this == RHS) 1630 // X / X ===> 1 1631 return APInt(BitWidth, 1); 1632 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1633 // All high words are zero, just use native divide 1634 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1635 1636 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1637 APInt Quotient(BitWidth, 0); // to hold result. 1638 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1639 return Quotient; 1640 } 1641 1642 APInt APInt::udiv(uint64_t RHS) const { 1643 assert(RHS != 0 && "Divide by zero?"); 1644 1645 // First, deal with the easy case 1646 if (isSingleWord()) 1647 return APInt(BitWidth, U.VAL / RHS); 1648 1649 // Get some facts about the LHS words. 1650 unsigned lhsWords = getNumWords(getActiveBits()); 1651 1652 // Deal with some degenerate cases 1653 if (!lhsWords) 1654 // 0 / X ===> 0 1655 return APInt(BitWidth, 0); 1656 if (RHS == 1) 1657 // X / 1 ===> X 1658 return *this; 1659 if (this->ult(RHS)) 1660 // X / Y ===> 0, iff X < Y 1661 return APInt(BitWidth, 0); 1662 if (*this == RHS) 1663 // X / X ===> 1 1664 return APInt(BitWidth, 1); 1665 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1666 // All high words are zero, just use native divide 1667 return APInt(BitWidth, this->U.pVal[0] / RHS); 1668 1669 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1670 APInt Quotient(BitWidth, 0); // to hold result. 1671 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1672 return Quotient; 1673 } 1674 1675 APInt APInt::sdiv(const APInt &RHS) const { 1676 if (isNegative()) { 1677 if (RHS.isNegative()) 1678 return (-(*this)).udiv(-RHS); 1679 return -((-(*this)).udiv(RHS)); 1680 } 1681 if (RHS.isNegative()) 1682 return -(this->udiv(-RHS)); 1683 return this->udiv(RHS); 1684 } 1685 1686 APInt APInt::sdiv(int64_t RHS) const { 1687 if (isNegative()) { 1688 if (RHS < 0) 1689 return (-(*this)).udiv(-RHS); 1690 return -((-(*this)).udiv(RHS)); 1691 } 1692 if (RHS < 0) 1693 return -(this->udiv(-RHS)); 1694 return this->udiv(RHS); 1695 } 1696 1697 APInt APInt::urem(const APInt &RHS) const { 1698 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1699 if (isSingleWord()) { 1700 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1701 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1702 } 1703 1704 // Get some facts about the LHS 1705 unsigned lhsWords = getNumWords(getActiveBits()); 1706 1707 // Get some facts about the RHS 1708 unsigned rhsBits = RHS.getActiveBits(); 1709 unsigned rhsWords = getNumWords(rhsBits); 1710 assert(rhsWords && "Performing remainder operation by zero ???"); 1711 1712 // Check the degenerate cases 1713 if (lhsWords == 0) 1714 // 0 % Y ===> 0 1715 return APInt(BitWidth, 0); 1716 if (rhsBits == 1) 1717 // X % 1 ===> 0 1718 return APInt(BitWidth, 0); 1719 if (lhsWords < rhsWords || this->ult(RHS)) 1720 // X % Y ===> X, iff X < Y 1721 return *this; 1722 if (*this == RHS) 1723 // X % X == 0; 1724 return APInt(BitWidth, 0); 1725 if (lhsWords == 1) 1726 // All high words are zero, just use native remainder 1727 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1728 1729 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1730 APInt Remainder(BitWidth, 0); 1731 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1732 return Remainder; 1733 } 1734 1735 uint64_t APInt::urem(uint64_t RHS) const { 1736 assert(RHS != 0 && "Remainder by zero?"); 1737 1738 if (isSingleWord()) 1739 return U.VAL % RHS; 1740 1741 // Get some facts about the LHS 1742 unsigned lhsWords = getNumWords(getActiveBits()); 1743 1744 // Check the degenerate cases 1745 if (lhsWords == 0) 1746 // 0 % Y ===> 0 1747 return 0; 1748 if (RHS == 1) 1749 // X % 1 ===> 0 1750 return 0; 1751 if (this->ult(RHS)) 1752 // X % Y ===> X, iff X < Y 1753 return getZExtValue(); 1754 if (*this == RHS) 1755 // X % X == 0; 1756 return 0; 1757 if (lhsWords == 1) 1758 // All high words are zero, just use native remainder 1759 return U.pVal[0] % RHS; 1760 1761 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1762 uint64_t Remainder; 1763 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1764 return Remainder; 1765 } 1766 1767 APInt APInt::srem(const APInt &RHS) const { 1768 if (isNegative()) { 1769 if (RHS.isNegative()) 1770 return -((-(*this)).urem(-RHS)); 1771 return -((-(*this)).urem(RHS)); 1772 } 1773 if (RHS.isNegative()) 1774 return this->urem(-RHS); 1775 return this->urem(RHS); 1776 } 1777 1778 int64_t APInt::srem(int64_t RHS) const { 1779 if (isNegative()) { 1780 if (RHS < 0) 1781 return -((-(*this)).urem(-RHS)); 1782 return -((-(*this)).urem(RHS)); 1783 } 1784 if (RHS < 0) 1785 return this->urem(-RHS); 1786 return this->urem(RHS); 1787 } 1788 1789 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1790 APInt &Quotient, APInt &Remainder) { 1791 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1792 unsigned BitWidth = LHS.BitWidth; 1793 1794 // First, deal with the easy case 1795 if (LHS.isSingleWord()) { 1796 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1797 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1798 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1799 Quotient = APInt(BitWidth, QuotVal); 1800 Remainder = APInt(BitWidth, RemVal); 1801 return; 1802 } 1803 1804 // Get some size facts about the dividend and divisor 1805 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1806 unsigned rhsBits = RHS.getActiveBits(); 1807 unsigned rhsWords = getNumWords(rhsBits); 1808 assert(rhsWords && "Performing divrem operation by zero ???"); 1809 1810 // Check the degenerate cases 1811 if (lhsWords == 0) { 1812 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1813 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1814 return; 1815 } 1816 1817 if (rhsBits == 1) { 1818 Quotient = LHS; // X / 1 ===> X 1819 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1820 } 1821 1822 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1823 Remainder = LHS; // X % Y ===> X, iff X < Y 1824 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1825 return; 1826 } 1827 1828 if (LHS == RHS) { 1829 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1830 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1831 return; 1832 } 1833 1834 // Make sure there is enough space to hold the results. 1835 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1836 // change the size. This is necessary if Quotient or Remainder is aliased 1837 // with LHS or RHS. 1838 Quotient.reallocate(BitWidth); 1839 Remainder.reallocate(BitWidth); 1840 1841 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1842 // There is only one word to consider so use the native versions. 1843 uint64_t lhsValue = LHS.U.pVal[0]; 1844 uint64_t rhsValue = RHS.U.pVal[0]; 1845 Quotient = lhsValue / rhsValue; 1846 Remainder = lhsValue % rhsValue; 1847 return; 1848 } 1849 1850 // Okay, lets do it the long way 1851 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1852 Remainder.U.pVal); 1853 // Clear the rest of the Quotient and Remainder. 1854 std::memset(Quotient.U.pVal + lhsWords, 0, 1855 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1856 std::memset(Remainder.U.pVal + rhsWords, 0, 1857 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1858 } 1859 1860 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1861 uint64_t &Remainder) { 1862 assert(RHS != 0 && "Divide by zero?"); 1863 unsigned BitWidth = LHS.BitWidth; 1864 1865 // First, deal with the easy case 1866 if (LHS.isSingleWord()) { 1867 uint64_t QuotVal = LHS.U.VAL / RHS; 1868 Remainder = LHS.U.VAL % RHS; 1869 Quotient = APInt(BitWidth, QuotVal); 1870 return; 1871 } 1872 1873 // Get some size facts about the dividend and divisor 1874 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1875 1876 // Check the degenerate cases 1877 if (lhsWords == 0) { 1878 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1879 Remainder = 0; // 0 % Y ===> 0 1880 return; 1881 } 1882 1883 if (RHS == 1) { 1884 Quotient = LHS; // X / 1 ===> X 1885 Remainder = 0; // X % 1 ===> 0 1886 return; 1887 } 1888 1889 if (LHS.ult(RHS)) { 1890 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1891 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1892 return; 1893 } 1894 1895 if (LHS == RHS) { 1896 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1897 Remainder = 0; // X % X ===> 0; 1898 return; 1899 } 1900 1901 // Make sure there is enough space to hold the results. 1902 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1903 // change the size. This is necessary if Quotient is aliased with LHS. 1904 Quotient.reallocate(BitWidth); 1905 1906 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1907 // There is only one word to consider so use the native versions. 1908 uint64_t lhsValue = LHS.U.pVal[0]; 1909 Quotient = lhsValue / RHS; 1910 Remainder = lhsValue % RHS; 1911 return; 1912 } 1913 1914 // Okay, lets do it the long way 1915 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1916 // Clear the rest of the Quotient. 1917 std::memset(Quotient.U.pVal + lhsWords, 0, 1918 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1919 } 1920 1921 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1922 APInt &Quotient, APInt &Remainder) { 1923 if (LHS.isNegative()) { 1924 if (RHS.isNegative()) 1925 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1926 else { 1927 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1928 Quotient.negate(); 1929 } 1930 Remainder.negate(); 1931 } else if (RHS.isNegative()) { 1932 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1933 Quotient.negate(); 1934 } else { 1935 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1936 } 1937 } 1938 1939 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1940 APInt &Quotient, int64_t &Remainder) { 1941 uint64_t R = Remainder; 1942 if (LHS.isNegative()) { 1943 if (RHS < 0) 1944 APInt::udivrem(-LHS, -RHS, Quotient, R); 1945 else { 1946 APInt::udivrem(-LHS, RHS, Quotient, R); 1947 Quotient.negate(); 1948 } 1949 R = -R; 1950 } else if (RHS < 0) { 1951 APInt::udivrem(LHS, -RHS, Quotient, R); 1952 Quotient.negate(); 1953 } else { 1954 APInt::udivrem(LHS, RHS, Quotient, R); 1955 } 1956 Remainder = R; 1957 } 1958 1959 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1960 APInt Res = *this+RHS; 1961 Overflow = isNonNegative() == RHS.isNonNegative() && 1962 Res.isNonNegative() != isNonNegative(); 1963 return Res; 1964 } 1965 1966 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1967 APInt Res = *this+RHS; 1968 Overflow = Res.ult(RHS); 1969 return Res; 1970 } 1971 1972 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1973 APInt Res = *this - RHS; 1974 Overflow = isNonNegative() != RHS.isNonNegative() && 1975 Res.isNonNegative() != isNonNegative(); 1976 return Res; 1977 } 1978 1979 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1980 APInt Res = *this-RHS; 1981 Overflow = Res.ugt(*this); 1982 return Res; 1983 } 1984 1985 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1986 // MININT/-1 --> overflow. 1987 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1988 return sdiv(RHS); 1989 } 1990 1991 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1992 APInt Res = *this * RHS; 1993 1994 if (*this != 0 && RHS != 0) 1995 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1996 else 1997 Overflow = false; 1998 return Res; 1999 } 2000 2001 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 2002 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { 2003 Overflow = true; 2004 return *this * RHS; 2005 } 2006 2007 APInt Res = lshr(1) * RHS; 2008 Overflow = Res.isNegative(); 2009 Res <<= 1; 2010 if ((*this)[0]) { 2011 Res += RHS; 2012 if (Res.ult(RHS)) 2013 Overflow = true; 2014 } 2015 return Res; 2016 } 2017 2018 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 2019 Overflow = ShAmt.uge(getBitWidth()); 2020 if (Overflow) 2021 return APInt(BitWidth, 0); 2022 2023 if (isNonNegative()) // Don't allow sign change. 2024 Overflow = ShAmt.uge(countLeadingZeros()); 2025 else 2026 Overflow = ShAmt.uge(countLeadingOnes()); 2027 2028 return *this << ShAmt; 2029 } 2030 2031 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 2032 Overflow = ShAmt.uge(getBitWidth()); 2033 if (Overflow) 2034 return APInt(BitWidth, 0); 2035 2036 Overflow = ShAmt.ugt(countLeadingZeros()); 2037 2038 return *this << ShAmt; 2039 } 2040 2041 APInt APInt::sadd_sat(const APInt &RHS) const { 2042 bool Overflow; 2043 APInt Res = sadd_ov(RHS, Overflow); 2044 if (!Overflow) 2045 return Res; 2046 2047 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2048 : APInt::getSignedMaxValue(BitWidth); 2049 } 2050 2051 APInt APInt::uadd_sat(const APInt &RHS) const { 2052 bool Overflow; 2053 APInt Res = uadd_ov(RHS, Overflow); 2054 if (!Overflow) 2055 return Res; 2056 2057 return APInt::getMaxValue(BitWidth); 2058 } 2059 2060 APInt APInt::ssub_sat(const APInt &RHS) const { 2061 bool Overflow; 2062 APInt Res = ssub_ov(RHS, Overflow); 2063 if (!Overflow) 2064 return Res; 2065 2066 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2067 : APInt::getSignedMaxValue(BitWidth); 2068 } 2069 2070 APInt APInt::usub_sat(const APInt &RHS) const { 2071 bool Overflow; 2072 APInt Res = usub_ov(RHS, Overflow); 2073 if (!Overflow) 2074 return Res; 2075 2076 return APInt(BitWidth, 0); 2077 } 2078 2079 APInt APInt::smul_sat(const APInt &RHS) const { 2080 bool Overflow; 2081 APInt Res = smul_ov(RHS, Overflow); 2082 if (!Overflow) 2083 return Res; 2084 2085 // The result is negative if one and only one of inputs is negative. 2086 bool ResIsNegative = isNegative() ^ RHS.isNegative(); 2087 2088 return ResIsNegative ? APInt::getSignedMinValue(BitWidth) 2089 : APInt::getSignedMaxValue(BitWidth); 2090 } 2091 2092 APInt APInt::umul_sat(const APInt &RHS) const { 2093 bool Overflow; 2094 APInt Res = umul_ov(RHS, Overflow); 2095 if (!Overflow) 2096 return Res; 2097 2098 return APInt::getMaxValue(BitWidth); 2099 } 2100 2101 APInt APInt::sshl_sat(const APInt &RHS) const { 2102 bool Overflow; 2103 APInt Res = sshl_ov(RHS, Overflow); 2104 if (!Overflow) 2105 return Res; 2106 2107 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2108 : APInt::getSignedMaxValue(BitWidth); 2109 } 2110 2111 APInt APInt::ushl_sat(const APInt &RHS) const { 2112 bool Overflow; 2113 APInt Res = ushl_ov(RHS, Overflow); 2114 if (!Overflow) 2115 return Res; 2116 2117 return APInt::getMaxValue(BitWidth); 2118 } 2119 2120 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 2121 // Check our assumptions here 2122 assert(!str.empty() && "Invalid string length"); 2123 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 2124 radix == 36) && 2125 "Radix should be 2, 8, 10, 16, or 36!"); 2126 2127 StringRef::iterator p = str.begin(); 2128 size_t slen = str.size(); 2129 bool isNeg = *p == '-'; 2130 if (*p == '-' || *p == '+') { 2131 p++; 2132 slen--; 2133 assert(slen && "String is only a sign, needs a value."); 2134 } 2135 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2136 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2137 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2138 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2139 "Insufficient bit width"); 2140 2141 // Allocate memory if needed 2142 if (isSingleWord()) 2143 U.VAL = 0; 2144 else 2145 U.pVal = getClearedMemory(getNumWords()); 2146 2147 // Figure out if we can shift instead of multiply 2148 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2149 2150 // Enter digit traversal loop 2151 for (StringRef::iterator e = str.end(); p != e; ++p) { 2152 unsigned digit = getDigit(*p, radix); 2153 assert(digit < radix && "Invalid character in digit string"); 2154 2155 // Shift or multiply the value by the radix 2156 if (slen > 1) { 2157 if (shift) 2158 *this <<= shift; 2159 else 2160 *this *= radix; 2161 } 2162 2163 // Add in the digit we just interpreted 2164 *this += digit; 2165 } 2166 // If its negative, put it in two's complement form 2167 if (isNeg) 2168 this->negate(); 2169 } 2170 2171 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2172 bool Signed, bool formatAsCLiteral) const { 2173 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2174 Radix == 36) && 2175 "Radix should be 2, 8, 10, 16, or 36!"); 2176 2177 const char *Prefix = ""; 2178 if (formatAsCLiteral) { 2179 switch (Radix) { 2180 case 2: 2181 // Binary literals are a non-standard extension added in gcc 4.3: 2182 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2183 Prefix = "0b"; 2184 break; 2185 case 8: 2186 Prefix = "0"; 2187 break; 2188 case 10: 2189 break; // No prefix 2190 case 16: 2191 Prefix = "0x"; 2192 break; 2193 default: 2194 llvm_unreachable("Invalid radix!"); 2195 } 2196 } 2197 2198 // First, check for a zero value and just short circuit the logic below. 2199 if (*this == 0) { 2200 while (*Prefix) { 2201 Str.push_back(*Prefix); 2202 ++Prefix; 2203 }; 2204 Str.push_back('0'); 2205 return; 2206 } 2207 2208 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2209 2210 if (isSingleWord()) { 2211 char Buffer[65]; 2212 char *BufPtr = std::end(Buffer); 2213 2214 uint64_t N; 2215 if (!Signed) { 2216 N = getZExtValue(); 2217 } else { 2218 int64_t I = getSExtValue(); 2219 if (I >= 0) { 2220 N = I; 2221 } else { 2222 Str.push_back('-'); 2223 N = -(uint64_t)I; 2224 } 2225 } 2226 2227 while (*Prefix) { 2228 Str.push_back(*Prefix); 2229 ++Prefix; 2230 }; 2231 2232 while (N) { 2233 *--BufPtr = Digits[N % Radix]; 2234 N /= Radix; 2235 } 2236 Str.append(BufPtr, std::end(Buffer)); 2237 return; 2238 } 2239 2240 APInt Tmp(*this); 2241 2242 if (Signed && isNegative()) { 2243 // They want to print the signed version and it is a negative value 2244 // Flip the bits and add one to turn it into the equivalent positive 2245 // value and put a '-' in the result. 2246 Tmp.negate(); 2247 Str.push_back('-'); 2248 } 2249 2250 while (*Prefix) { 2251 Str.push_back(*Prefix); 2252 ++Prefix; 2253 }; 2254 2255 // We insert the digits backward, then reverse them to get the right order. 2256 unsigned StartDig = Str.size(); 2257 2258 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2259 // because the number of bits per digit (1, 3 and 4 respectively) divides 2260 // equally. We just shift until the value is zero. 2261 if (Radix == 2 || Radix == 8 || Radix == 16) { 2262 // Just shift tmp right for each digit width until it becomes zero 2263 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2264 unsigned MaskAmt = Radix - 1; 2265 2266 while (Tmp.getBoolValue()) { 2267 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2268 Str.push_back(Digits[Digit]); 2269 Tmp.lshrInPlace(ShiftAmt); 2270 } 2271 } else { 2272 while (Tmp.getBoolValue()) { 2273 uint64_t Digit; 2274 udivrem(Tmp, Radix, Tmp, Digit); 2275 assert(Digit < Radix && "divide failed"); 2276 Str.push_back(Digits[Digit]); 2277 } 2278 } 2279 2280 // Reverse the digits before returning. 2281 std::reverse(Str.begin()+StartDig, Str.end()); 2282 } 2283 2284 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2285 /// It is better to pass in a SmallVector/SmallString to the methods above. 2286 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2287 SmallString<40> S; 2288 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2289 return std::string(S.str()); 2290 } 2291 2292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2293 LLVM_DUMP_METHOD void APInt::dump() const { 2294 SmallString<40> S, U; 2295 this->toStringUnsigned(U); 2296 this->toStringSigned(S); 2297 dbgs() << "APInt(" << BitWidth << "b, " 2298 << U << "u " << S << "s)\n"; 2299 } 2300 #endif 2301 2302 void APInt::print(raw_ostream &OS, bool isSigned) const { 2303 SmallString<40> S; 2304 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2305 OS << S; 2306 } 2307 2308 // This implements a variety of operations on a representation of 2309 // arbitrary precision, two's-complement, bignum integer values. 2310 2311 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2312 // and unrestricting assumption. 2313 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2314 "Part width must be divisible by 2!"); 2315 2316 /* Some handy functions local to this file. */ 2317 2318 /* Returns the integer part with the least significant BITS set. 2319 BITS cannot be zero. */ 2320 static inline APInt::WordType lowBitMask(unsigned bits) { 2321 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2322 2323 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2324 } 2325 2326 /* Returns the value of the lower half of PART. */ 2327 static inline APInt::WordType lowHalf(APInt::WordType part) { 2328 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2329 } 2330 2331 /* Returns the value of the upper half of PART. */ 2332 static inline APInt::WordType highHalf(APInt::WordType part) { 2333 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2334 } 2335 2336 /* Returns the bit number of the most significant set bit of a part. 2337 If the input number has no bits set -1U is returned. */ 2338 static unsigned partMSB(APInt::WordType value) { 2339 return findLastSet(value, ZB_Max); 2340 } 2341 2342 /* Returns the bit number of the least significant set bit of a 2343 part. If the input number has no bits set -1U is returned. */ 2344 static unsigned partLSB(APInt::WordType value) { 2345 return findFirstSet(value, ZB_Max); 2346 } 2347 2348 /* Sets the least significant part of a bignum to the input value, and 2349 zeroes out higher parts. */ 2350 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2351 assert(parts > 0); 2352 2353 dst[0] = part; 2354 for (unsigned i = 1; i < parts; i++) 2355 dst[i] = 0; 2356 } 2357 2358 /* Assign one bignum to another. */ 2359 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2360 for (unsigned i = 0; i < parts; i++) 2361 dst[i] = src[i]; 2362 } 2363 2364 /* Returns true if a bignum is zero, false otherwise. */ 2365 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2366 for (unsigned i = 0; i < parts; i++) 2367 if (src[i]) 2368 return false; 2369 2370 return true; 2371 } 2372 2373 /* Extract the given bit of a bignum; returns 0 or 1. */ 2374 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2375 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2376 } 2377 2378 /* Set the given bit of a bignum. */ 2379 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2380 parts[whichWord(bit)] |= maskBit(bit); 2381 } 2382 2383 /* Clears the given bit of a bignum. */ 2384 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2385 parts[whichWord(bit)] &= ~maskBit(bit); 2386 } 2387 2388 /* Returns the bit number of the least significant set bit of a 2389 number. If the input number has no bits set -1U is returned. */ 2390 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2391 for (unsigned i = 0; i < n; i++) { 2392 if (parts[i] != 0) { 2393 unsigned lsb = partLSB(parts[i]); 2394 2395 return lsb + i * APINT_BITS_PER_WORD; 2396 } 2397 } 2398 2399 return -1U; 2400 } 2401 2402 /* Returns the bit number of the most significant set bit of a number. 2403 If the input number has no bits set -1U is returned. */ 2404 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2405 do { 2406 --n; 2407 2408 if (parts[n] != 0) { 2409 unsigned msb = partMSB(parts[n]); 2410 2411 return msb + n * APINT_BITS_PER_WORD; 2412 } 2413 } while (n); 2414 2415 return -1U; 2416 } 2417 2418 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2419 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2420 the least significant bit of DST. All high bits above srcBITS in 2421 DST are zero-filled. */ 2422 void 2423 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2424 unsigned srcBits, unsigned srcLSB) { 2425 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2426 assert(dstParts <= dstCount); 2427 2428 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2429 tcAssign (dst, src + firstSrcPart, dstParts); 2430 2431 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2432 tcShiftRight (dst, dstParts, shift); 2433 2434 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2435 in DST. If this is less that srcBits, append the rest, else 2436 clear the high bits. */ 2437 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2438 if (n < srcBits) { 2439 WordType mask = lowBitMask (srcBits - n); 2440 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2441 << n % APINT_BITS_PER_WORD); 2442 } else if (n > srcBits) { 2443 if (srcBits % APINT_BITS_PER_WORD) 2444 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2445 } 2446 2447 /* Clear high parts. */ 2448 while (dstParts < dstCount) 2449 dst[dstParts++] = 0; 2450 } 2451 2452 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2453 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2454 WordType c, unsigned parts) { 2455 assert(c <= 1); 2456 2457 for (unsigned i = 0; i < parts; i++) { 2458 WordType l = dst[i]; 2459 if (c) { 2460 dst[i] += rhs[i] + 1; 2461 c = (dst[i] <= l); 2462 } else { 2463 dst[i] += rhs[i]; 2464 c = (dst[i] < l); 2465 } 2466 } 2467 2468 return c; 2469 } 2470 2471 /// This function adds a single "word" integer, src, to the multiple 2472 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2473 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2474 /// @returns the carry of the addition. 2475 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2476 unsigned parts) { 2477 for (unsigned i = 0; i < parts; ++i) { 2478 dst[i] += src; 2479 if (dst[i] >= src) 2480 return 0; // No need to carry so exit early. 2481 src = 1; // Carry one to next digit. 2482 } 2483 2484 return 1; 2485 } 2486 2487 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2488 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2489 WordType c, unsigned parts) { 2490 assert(c <= 1); 2491 2492 for (unsigned i = 0; i < parts; i++) { 2493 WordType l = dst[i]; 2494 if (c) { 2495 dst[i] -= rhs[i] + 1; 2496 c = (dst[i] >= l); 2497 } else { 2498 dst[i] -= rhs[i]; 2499 c = (dst[i] > l); 2500 } 2501 } 2502 2503 return c; 2504 } 2505 2506 /// This function subtracts a single "word" (64-bit word), src, from 2507 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2508 /// no further borrowing is needed or it runs out of "words" in dst. The result 2509 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2510 /// exhausted. In other words, if src > dst then this function returns 1, 2511 /// otherwise 0. 2512 /// @returns the borrow out of the subtraction 2513 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2514 unsigned parts) { 2515 for (unsigned i = 0; i < parts; ++i) { 2516 WordType Dst = dst[i]; 2517 dst[i] -= src; 2518 if (src <= Dst) 2519 return 0; // No need to borrow so exit early. 2520 src = 1; // We have to "borrow 1" from next "word" 2521 } 2522 2523 return 1; 2524 } 2525 2526 /* Negate a bignum in-place. */ 2527 void APInt::tcNegate(WordType *dst, unsigned parts) { 2528 tcComplement(dst, parts); 2529 tcIncrement(dst, parts); 2530 } 2531 2532 /* DST += SRC * MULTIPLIER + CARRY if add is true 2533 DST = SRC * MULTIPLIER + CARRY if add is false 2534 2535 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2536 they must start at the same point, i.e. DST == SRC. 2537 2538 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2539 returned. Otherwise DST is filled with the least significant 2540 DSTPARTS parts of the result, and if all of the omitted higher 2541 parts were zero return zero, otherwise overflow occurred and 2542 return one. */ 2543 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2544 WordType multiplier, WordType carry, 2545 unsigned srcParts, unsigned dstParts, 2546 bool add) { 2547 /* Otherwise our writes of DST kill our later reads of SRC. */ 2548 assert(dst <= src || dst >= src + srcParts); 2549 assert(dstParts <= srcParts + 1); 2550 2551 /* N loops; minimum of dstParts and srcParts. */ 2552 unsigned n = std::min(dstParts, srcParts); 2553 2554 for (unsigned i = 0; i < n; i++) { 2555 WordType low, mid, high, srcPart; 2556 2557 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2558 2559 This cannot overflow, because 2560 2561 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2562 2563 which is less than n^2. */ 2564 2565 srcPart = src[i]; 2566 2567 if (multiplier == 0 || srcPart == 0) { 2568 low = carry; 2569 high = 0; 2570 } else { 2571 low = lowHalf(srcPart) * lowHalf(multiplier); 2572 high = highHalf(srcPart) * highHalf(multiplier); 2573 2574 mid = lowHalf(srcPart) * highHalf(multiplier); 2575 high += highHalf(mid); 2576 mid <<= APINT_BITS_PER_WORD / 2; 2577 if (low + mid < low) 2578 high++; 2579 low += mid; 2580 2581 mid = highHalf(srcPart) * lowHalf(multiplier); 2582 high += highHalf(mid); 2583 mid <<= APINT_BITS_PER_WORD / 2; 2584 if (low + mid < low) 2585 high++; 2586 low += mid; 2587 2588 /* Now add carry. */ 2589 if (low + carry < low) 2590 high++; 2591 low += carry; 2592 } 2593 2594 if (add) { 2595 /* And now DST[i], and store the new low part there. */ 2596 if (low + dst[i] < low) 2597 high++; 2598 dst[i] += low; 2599 } else 2600 dst[i] = low; 2601 2602 carry = high; 2603 } 2604 2605 if (srcParts < dstParts) { 2606 /* Full multiplication, there is no overflow. */ 2607 assert(srcParts + 1 == dstParts); 2608 dst[srcParts] = carry; 2609 return 0; 2610 } 2611 2612 /* We overflowed if there is carry. */ 2613 if (carry) 2614 return 1; 2615 2616 /* We would overflow if any significant unwritten parts would be 2617 non-zero. This is true if any remaining src parts are non-zero 2618 and the multiplier is non-zero. */ 2619 if (multiplier) 2620 for (unsigned i = dstParts; i < srcParts; i++) 2621 if (src[i]) 2622 return 1; 2623 2624 /* We fitted in the narrow destination. */ 2625 return 0; 2626 } 2627 2628 /* DST = LHS * RHS, where DST has the same width as the operands and 2629 is filled with the least significant parts of the result. Returns 2630 one if overflow occurred, otherwise zero. DST must be disjoint 2631 from both operands. */ 2632 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2633 const WordType *rhs, unsigned parts) { 2634 assert(dst != lhs && dst != rhs); 2635 2636 int overflow = 0; 2637 tcSet(dst, 0, parts); 2638 2639 for (unsigned i = 0; i < parts; i++) 2640 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2641 parts - i, true); 2642 2643 return overflow; 2644 } 2645 2646 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2647 /// operands. No overflow occurs. DST must be disjoint from both operands. 2648 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2649 const WordType *rhs, unsigned lhsParts, 2650 unsigned rhsParts) { 2651 /* Put the narrower number on the LHS for less loops below. */ 2652 if (lhsParts > rhsParts) 2653 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2654 2655 assert(dst != lhs && dst != rhs); 2656 2657 tcSet(dst, 0, rhsParts); 2658 2659 for (unsigned i = 0; i < lhsParts; i++) 2660 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2661 } 2662 2663 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2664 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2665 set REMAINDER to the remainder, return zero. i.e. 2666 2667 OLD_LHS = RHS * LHS + REMAINDER 2668 2669 SCRATCH is a bignum of the same size as the operands and result for 2670 use by the routine; its contents need not be initialized and are 2671 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2672 */ 2673 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2674 WordType *remainder, WordType *srhs, 2675 unsigned parts) { 2676 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2677 2678 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2679 if (shiftCount == 0) 2680 return true; 2681 2682 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2683 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2684 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2685 2686 tcAssign(srhs, rhs, parts); 2687 tcShiftLeft(srhs, parts, shiftCount); 2688 tcAssign(remainder, lhs, parts); 2689 tcSet(lhs, 0, parts); 2690 2691 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2692 the total. */ 2693 for (;;) { 2694 int compare = tcCompare(remainder, srhs, parts); 2695 if (compare >= 0) { 2696 tcSubtract(remainder, srhs, 0, parts); 2697 lhs[n] |= mask; 2698 } 2699 2700 if (shiftCount == 0) 2701 break; 2702 shiftCount--; 2703 tcShiftRight(srhs, parts, 1); 2704 if ((mask >>= 1) == 0) { 2705 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2706 n--; 2707 } 2708 } 2709 2710 return false; 2711 } 2712 2713 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2714 /// no restrictions on Count. 2715 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2716 // Don't bother performing a no-op shift. 2717 if (!Count) 2718 return; 2719 2720 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2721 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2722 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2723 2724 // Fastpath for moving by whole words. 2725 if (BitShift == 0) { 2726 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2727 } else { 2728 while (Words-- > WordShift) { 2729 Dst[Words] = Dst[Words - WordShift] << BitShift; 2730 if (Words > WordShift) 2731 Dst[Words] |= 2732 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2733 } 2734 } 2735 2736 // Fill in the remainder with 0s. 2737 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2738 } 2739 2740 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2741 /// are no restrictions on Count. 2742 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2743 // Don't bother performing a no-op shift. 2744 if (!Count) 2745 return; 2746 2747 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2748 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2749 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2750 2751 unsigned WordsToMove = Words - WordShift; 2752 // Fastpath for moving by whole words. 2753 if (BitShift == 0) { 2754 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2755 } else { 2756 for (unsigned i = 0; i != WordsToMove; ++i) { 2757 Dst[i] = Dst[i + WordShift] >> BitShift; 2758 if (i + 1 != WordsToMove) 2759 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2760 } 2761 } 2762 2763 // Fill in the remainder with 0s. 2764 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2765 } 2766 2767 /* Bitwise and of two bignums. */ 2768 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2769 for (unsigned i = 0; i < parts; i++) 2770 dst[i] &= rhs[i]; 2771 } 2772 2773 /* Bitwise inclusive or of two bignums. */ 2774 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2775 for (unsigned i = 0; i < parts; i++) 2776 dst[i] |= rhs[i]; 2777 } 2778 2779 /* Bitwise exclusive or of two bignums. */ 2780 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2781 for (unsigned i = 0; i < parts; i++) 2782 dst[i] ^= rhs[i]; 2783 } 2784 2785 /* Complement a bignum in-place. */ 2786 void APInt::tcComplement(WordType *dst, unsigned parts) { 2787 for (unsigned i = 0; i < parts; i++) 2788 dst[i] = ~dst[i]; 2789 } 2790 2791 /* Comparison (unsigned) of two bignums. */ 2792 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2793 unsigned parts) { 2794 while (parts) { 2795 parts--; 2796 if (lhs[parts] != rhs[parts]) 2797 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2798 } 2799 2800 return 0; 2801 } 2802 2803 /* Set the least significant BITS bits of a bignum, clear the 2804 rest. */ 2805 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2806 unsigned bits) { 2807 unsigned i = 0; 2808 while (bits > APINT_BITS_PER_WORD) { 2809 dst[i++] = ~(WordType) 0; 2810 bits -= APINT_BITS_PER_WORD; 2811 } 2812 2813 if (bits) 2814 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2815 2816 while (i < parts) 2817 dst[i++] = 0; 2818 } 2819 2820 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2821 APInt::Rounding RM) { 2822 // Currently udivrem always rounds down. 2823 switch (RM) { 2824 case APInt::Rounding::DOWN: 2825 case APInt::Rounding::TOWARD_ZERO: 2826 return A.udiv(B); 2827 case APInt::Rounding::UP: { 2828 APInt Quo, Rem; 2829 APInt::udivrem(A, B, Quo, Rem); 2830 if (Rem == 0) 2831 return Quo; 2832 return Quo + 1; 2833 } 2834 } 2835 llvm_unreachable("Unknown APInt::Rounding enum"); 2836 } 2837 2838 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2839 APInt::Rounding RM) { 2840 switch (RM) { 2841 case APInt::Rounding::DOWN: 2842 case APInt::Rounding::UP: { 2843 APInt Quo, Rem; 2844 APInt::sdivrem(A, B, Quo, Rem); 2845 if (Rem == 0) 2846 return Quo; 2847 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2848 // We want to check whether the non-integer part of the mathematical value 2849 // is negative or not. If the non-integer part is negative, we need to round 2850 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2851 // already rounded down. 2852 if (RM == APInt::Rounding::DOWN) { 2853 if (Rem.isNegative() != B.isNegative()) 2854 return Quo - 1; 2855 return Quo; 2856 } 2857 if (Rem.isNegative() != B.isNegative()) 2858 return Quo; 2859 return Quo + 1; 2860 } 2861 // Currently sdiv rounds towards zero. 2862 case APInt::Rounding::TOWARD_ZERO: 2863 return A.sdiv(B); 2864 } 2865 llvm_unreachable("Unknown APInt::Rounding enum"); 2866 } 2867 2868 Optional<APInt> 2869 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2870 unsigned RangeWidth) { 2871 unsigned CoeffWidth = A.getBitWidth(); 2872 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2873 assert(RangeWidth <= CoeffWidth && 2874 "Value range width should be less than coefficient width"); 2875 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2876 2877 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2878 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2879 2880 // Identify 0 as a (non)solution immediately. 2881 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2882 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2883 return APInt(CoeffWidth, 0); 2884 } 2885 2886 // The result of APInt arithmetic has the same bit width as the operands, 2887 // so it can actually lose high bits. A product of two n-bit integers needs 2888 // 2n-1 bits to represent the full value. 2889 // The operation done below (on quadratic coefficients) that can produce 2890 // the largest value is the evaluation of the equation during bisection, 2891 // which needs 3 times the bitwidth of the coefficient, so the total number 2892 // of required bits is 3n. 2893 // 2894 // The purpose of this extension is to simulate the set Z of all integers, 2895 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2896 // and negative numbers (not so much in a modulo arithmetic). The method 2897 // used to solve the equation is based on the standard formula for real 2898 // numbers, and uses the concepts of "positive" and "negative" with their 2899 // usual meanings. 2900 CoeffWidth *= 3; 2901 A = A.sext(CoeffWidth); 2902 B = B.sext(CoeffWidth); 2903 C = C.sext(CoeffWidth); 2904 2905 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2906 // the bit width has increased. 2907 if (A.isNegative()) { 2908 A.negate(); 2909 B.negate(); 2910 C.negate(); 2911 } 2912 2913 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2914 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2915 // and R = 2^BitWidth. 2916 // Since we're trying not only to find exact solutions, but also values 2917 // that "wrap around", such a set will always have a solution, i.e. an x 2918 // that satisfies at least one of the equations, or such that |q(x)| 2919 // exceeds kR, while |q(x-1)| for the same k does not. 2920 // 2921 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2922 // positive solution n (in the above sense), and also such that the n 2923 // will be the least among all solutions corresponding to k = 0, 1, ... 2924 // (more precisely, the least element in the set 2925 // { n(k) | k is such that a solution n(k) exists }). 2926 // 2927 // Consider the parabola (over real numbers) that corresponds to the 2928 // quadratic equation. Since A > 0, the arms of the parabola will point 2929 // up. Picking different values of k will shift it up and down by R. 2930 // 2931 // We want to shift the parabola in such a way as to reduce the problem 2932 // of solving q(x) = kR to solving shifted_q(x) = 0. 2933 // (The interesting solutions are the ceilings of the real number 2934 // solutions.) 2935 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2936 APInt TwoA = 2 * A; 2937 APInt SqrB = B * B; 2938 bool PickLow; 2939 2940 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2941 assert(A.isStrictlyPositive()); 2942 APInt T = V.abs().urem(A); 2943 if (T.isNullValue()) 2944 return V; 2945 return V.isNegative() ? V+T : V+(A-T); 2946 }; 2947 2948 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2949 // iff B is positive. 2950 if (B.isNonNegative()) { 2951 // If B >= 0, the vertex it at a negative location (or at 0), so in 2952 // order to have a non-negative solution we need to pick k that makes 2953 // C-kR negative. To satisfy all the requirements for the solution 2954 // that we are looking for, it needs to be closest to 0 of all k. 2955 C = C.srem(R); 2956 if (C.isStrictlyPositive()) 2957 C -= R; 2958 // Pick the greater solution. 2959 PickLow = false; 2960 } else { 2961 // If B < 0, the vertex is at a positive location. For any solution 2962 // to exist, the discriminant must be non-negative. This means that 2963 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2964 // lower bound on values of k: kR >= C - B^2/4A. 2965 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2966 // Round LowkR up (towards +inf) to the nearest kR. 2967 LowkR = RoundUp(LowkR, R); 2968 2969 // If there exists k meeting the condition above, and such that 2970 // C-kR > 0, there will be two positive real number solutions of 2971 // q(x) = kR. Out of all such values of k, pick the one that makes 2972 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2973 // In other words, find maximum k such that LowkR <= kR < C. 2974 if (C.sgt(LowkR)) { 2975 // If LowkR < C, then such a k is guaranteed to exist because 2976 // LowkR itself is a multiple of R. 2977 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2978 // Pick the smaller solution. 2979 PickLow = true; 2980 } else { 2981 // If C-kR < 0 for all potential k's, it means that one solution 2982 // will be negative, while the other will be positive. The positive 2983 // solution will shift towards 0 if the parabola is moved up. 2984 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2985 // to 0, or in other words, out of all parabolas that have solutions, 2986 // pick the one that is the farthest "up"). 2987 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2988 C -= LowkR; 2989 // Pick the greater solution. 2990 PickLow = false; 2991 } 2992 } 2993 2994 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2995 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2996 2997 APInt D = SqrB - 4*A*C; 2998 assert(D.isNonNegative() && "Negative discriminant"); 2999 APInt SQ = D.sqrt(); 3000 3001 APInt Q = SQ * SQ; 3002 bool InexactSQ = Q != D; 3003 // The calculated SQ may actually be greater than the exact (non-integer) 3004 // value. If that's the case, decrement SQ to get a value that is lower. 3005 if (Q.sgt(D)) 3006 SQ -= 1; 3007 3008 APInt X; 3009 APInt Rem; 3010 3011 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 3012 // When using the quadratic formula directly, the calculated low root 3013 // may be greater than the exact one, since we would be subtracting SQ. 3014 // To make sure that the calculated root is not greater than the exact 3015 // one, subtract SQ+1 when calculating the low root (for inexact value 3016 // of SQ). 3017 if (PickLow) 3018 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 3019 else 3020 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 3021 3022 // The updated coefficients should be such that the (exact) solution is 3023 // positive. Since APInt division rounds towards 0, the calculated one 3024 // can be 0, but cannot be negative. 3025 assert(X.isNonNegative() && "Solution should be non-negative"); 3026 3027 if (!InexactSQ && Rem.isNullValue()) { 3028 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 3029 return X; 3030 } 3031 3032 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 3033 // The exact value of the square root of D should be between SQ and SQ+1. 3034 // This implies that the solution should be between that corresponding to 3035 // SQ (i.e. X) and that corresponding to SQ+1. 3036 // 3037 // The calculated X cannot be greater than the exact (real) solution. 3038 // Actually it must be strictly less than the exact solution, while 3039 // X+1 will be greater than or equal to it. 3040 3041 APInt VX = (A*X + B)*X + C; 3042 APInt VY = VX + TwoA*X + A + B; 3043 bool SignChange = VX.isNegative() != VY.isNegative() || 3044 VX.isNullValue() != VY.isNullValue(); 3045 // If the sign did not change between X and X+1, X is not a valid solution. 3046 // This could happen when the actual (exact) roots don't have an integer 3047 // between them, so they would both be contained between X and X+1. 3048 if (!SignChange) { 3049 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 3050 return None; 3051 } 3052 3053 X += 1; 3054 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 3055 return X; 3056 } 3057 3058 Optional<unsigned> 3059 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { 3060 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); 3061 if (A == B) 3062 return llvm::None; 3063 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); 3064 } 3065 3066 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst 3067 /// with the integer held in IntVal. 3068 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, 3069 unsigned StoreBytes) { 3070 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); 3071 const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); 3072 3073 if (sys::IsLittleEndianHost) { 3074 // Little-endian host - the source is ordered from LSB to MSB. Order the 3075 // destination from LSB to MSB: Do a straight copy. 3076 memcpy(Dst, Src, StoreBytes); 3077 } else { 3078 // Big-endian host - the source is an array of 64 bit words ordered from 3079 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination 3080 // from MSB to LSB: Reverse the word order, but not the bytes in a word. 3081 while (StoreBytes > sizeof(uint64_t)) { 3082 StoreBytes -= sizeof(uint64_t); 3083 // May not be aligned so use memcpy. 3084 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); 3085 Src += sizeof(uint64_t); 3086 } 3087 3088 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); 3089 } 3090 } 3091 3092 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting 3093 /// from Src into IntVal, which is assumed to be wide enough and to hold zero. 3094 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, 3095 unsigned LoadBytes) { 3096 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); 3097 uint8_t *Dst = reinterpret_cast<uint8_t *>( 3098 const_cast<uint64_t *>(IntVal.getRawData())); 3099 3100 if (sys::IsLittleEndianHost) 3101 // Little-endian host - the destination must be ordered from LSB to MSB. 3102 // The source is ordered from LSB to MSB: Do a straight copy. 3103 memcpy(Dst, Src, LoadBytes); 3104 else { 3105 // Big-endian - the destination is an array of 64 bit words ordered from 3106 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is 3107 // ordered from MSB to LSB: Reverse the word order, but not the bytes in 3108 // a word. 3109 while (LoadBytes > sizeof(uint64_t)) { 3110 LoadBytes -= sizeof(uint64_t); 3111 // May not be aligned so use memcpy. 3112 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); 3113 Dst += sizeof(uint64_t); 3114 } 3115 3116 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); 3117 } 3118 } 3119