1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision integer 10 // constant values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/FoldingSet.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/bit.h" 22 #include "llvm/Config/llvm-config.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <climits> 28 #include <cmath> 29 #include <cstdlib> 30 #include <cstring> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "apint" 34 35 /// A utility function for allocating memory, checking for allocation failures, 36 /// and ensuring the contents are zeroed. 37 inline static uint64_t* getClearedMemory(unsigned numWords) { 38 uint64_t *result = new uint64_t[numWords]; 39 memset(result, 0, numWords * sizeof(uint64_t)); 40 return result; 41 } 42 43 /// A utility function for allocating memory and checking for allocation 44 /// failure. The content is not zeroed. 45 inline static uint64_t* getMemory(unsigned numWords) { 46 return new uint64_t[numWords]; 47 } 48 49 /// A utility function that converts a character to a digit. 50 inline static unsigned getDigit(char cdigit, uint8_t radix) { 51 unsigned r; 52 53 if (radix == 16 || radix == 36) { 54 r = cdigit - '0'; 55 if (r <= 9) 56 return r; 57 58 r = cdigit - 'A'; 59 if (r <= radix - 11U) 60 return r + 10; 61 62 r = cdigit - 'a'; 63 if (r <= radix - 11U) 64 return r + 10; 65 66 radix = 10; 67 } 68 69 r = cdigit - '0'; 70 if (r < radix) 71 return r; 72 73 return -1U; 74 } 75 76 77 void APInt::initSlowCase(uint64_t val, bool isSigned) { 78 U.pVal = getClearedMemory(getNumWords()); 79 U.pVal[0] = val; 80 if (isSigned && int64_t(val) < 0) 81 for (unsigned i = 1; i < getNumWords(); ++i) 82 U.pVal[i] = WORDTYPE_MAX; 83 clearUnusedBits(); 84 } 85 86 void APInt::initSlowCase(const APInt& that) { 87 U.pVal = getMemory(getNumWords()); 88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 89 } 90 91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 92 assert(BitWidth && "Bitwidth too small"); 93 assert(bigVal.data() && "Null pointer detected!"); 94 if (isSingleWord()) 95 U.VAL = bigVal[0]; 96 else { 97 // Get memory, cleared to 0 98 U.pVal = getClearedMemory(getNumWords()); 99 // Calculate the number of words to copy 100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 101 // Copy the words from bigVal to pVal 102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 103 } 104 // Make sure unused high bits are cleared 105 clearUnusedBits(); 106 } 107 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 109 : BitWidth(numBits) { 110 initFromArray(bigVal); 111 } 112 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 114 : BitWidth(numBits) { 115 initFromArray(makeArrayRef(bigVal, numWords)); 116 } 117 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 119 : BitWidth(numbits) { 120 assert(BitWidth && "Bitwidth too small"); 121 fromString(numbits, Str, radix); 122 } 123 124 void APInt::reallocate(unsigned NewBitWidth) { 125 // If the number of words is the same we can just change the width and stop. 126 if (getNumWords() == getNumWords(NewBitWidth)) { 127 BitWidth = NewBitWidth; 128 return; 129 } 130 131 // If we have an allocation, delete it. 132 if (!isSingleWord()) 133 delete [] U.pVal; 134 135 // Update BitWidth. 136 BitWidth = NewBitWidth; 137 138 // If we are supposed to have an allocation, create it. 139 if (!isSingleWord()) 140 U.pVal = getMemory(getNumWords()); 141 } 142 143 void APInt::AssignSlowCase(const APInt& RHS) { 144 // Don't do anything for X = X 145 if (this == &RHS) 146 return; 147 148 // Adjust the bit width and handle allocations as necessary. 149 reallocate(RHS.getBitWidth()); 150 151 // Copy the data. 152 if (isSingleWord()) 153 U.VAL = RHS.U.VAL; 154 else 155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 156 } 157 158 /// This method 'profiles' an APInt for use with FoldingSet. 159 void APInt::Profile(FoldingSetNodeID& ID) const { 160 ID.AddInteger(BitWidth); 161 162 if (isSingleWord()) { 163 ID.AddInteger(U.VAL); 164 return; 165 } 166 167 unsigned NumWords = getNumWords(); 168 for (unsigned i = 0; i < NumWords; ++i) 169 ID.AddInteger(U.pVal[i]); 170 } 171 172 /// Prefix increment operator. Increments the APInt by one. 173 APInt& APInt::operator++() { 174 if (isSingleWord()) 175 ++U.VAL; 176 else 177 tcIncrement(U.pVal, getNumWords()); 178 return clearUnusedBits(); 179 } 180 181 /// Prefix decrement operator. Decrements the APInt by one. 182 APInt& APInt::operator--() { 183 if (isSingleWord()) 184 --U.VAL; 185 else 186 tcDecrement(U.pVal, getNumWords()); 187 return clearUnusedBits(); 188 } 189 190 /// Adds the RHS APInt to this APInt. 191 /// @returns this, after addition of RHS. 192 /// Addition assignment operator. 193 APInt& APInt::operator+=(const APInt& RHS) { 194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 195 if (isSingleWord()) 196 U.VAL += RHS.U.VAL; 197 else 198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 199 return clearUnusedBits(); 200 } 201 202 APInt& APInt::operator+=(uint64_t RHS) { 203 if (isSingleWord()) 204 U.VAL += RHS; 205 else 206 tcAddPart(U.pVal, RHS, getNumWords()); 207 return clearUnusedBits(); 208 } 209 210 /// Subtracts the RHS APInt from this APInt 211 /// @returns this, after subtraction 212 /// Subtraction assignment operator. 213 APInt& APInt::operator-=(const APInt& RHS) { 214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 215 if (isSingleWord()) 216 U.VAL -= RHS.U.VAL; 217 else 218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 219 return clearUnusedBits(); 220 } 221 222 APInt& APInt::operator-=(uint64_t RHS) { 223 if (isSingleWord()) 224 U.VAL -= RHS; 225 else 226 tcSubtractPart(U.pVal, RHS, getNumWords()); 227 return clearUnusedBits(); 228 } 229 230 APInt APInt::operator*(const APInt& RHS) const { 231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 232 if (isSingleWord()) 233 return APInt(BitWidth, U.VAL * RHS.U.VAL); 234 235 APInt Result(getMemory(getNumWords()), getBitWidth()); 236 237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 238 239 Result.clearUnusedBits(); 240 return Result; 241 } 242 243 void APInt::AndAssignSlowCase(const APInt& RHS) { 244 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 245 } 246 247 void APInt::OrAssignSlowCase(const APInt& RHS) { 248 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 249 } 250 251 void APInt::XorAssignSlowCase(const APInt& RHS) { 252 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 253 } 254 255 APInt& APInt::operator*=(const APInt& RHS) { 256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 257 *this = *this * RHS; 258 return *this; 259 } 260 261 APInt& APInt::operator*=(uint64_t RHS) { 262 if (isSingleWord()) { 263 U.VAL *= RHS; 264 } else { 265 unsigned NumWords = getNumWords(); 266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 267 } 268 return clearUnusedBits(); 269 } 270 271 bool APInt::EqualSlowCase(const APInt& RHS) const { 272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 273 } 274 275 int APInt::compare(const APInt& RHS) const { 276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 277 if (isSingleWord()) 278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 279 280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 281 } 282 283 int APInt::compareSigned(const APInt& RHS) const { 284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 285 if (isSingleWord()) { 286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 289 } 290 291 bool lhsNeg = isNegative(); 292 bool rhsNeg = RHS.isNegative(); 293 294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 295 if (lhsNeg != rhsNeg) 296 return lhsNeg ? -1 : 1; 297 298 // Otherwise we can just use an unsigned comparison, because even negative 299 // numbers compare correctly this way if both have the same signed-ness. 300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 301 } 302 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 304 unsigned loWord = whichWord(loBit); 305 unsigned hiWord = whichWord(hiBit); 306 307 // Create an initial mask for the low word with zeros below loBit. 308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 309 310 // If hiBit is not aligned, we need a high mask. 311 unsigned hiShiftAmt = whichBit(hiBit); 312 if (hiShiftAmt != 0) { 313 // Create a high mask with zeros above hiBit. 314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 315 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 316 // set the bits in hiWord. 317 if (hiWord == loWord) 318 loMask &= hiMask; 319 else 320 U.pVal[hiWord] |= hiMask; 321 } 322 // Apply the mask to the low word. 323 U.pVal[loWord] |= loMask; 324 325 // Fill any words between loWord and hiWord with all ones. 326 for (unsigned word = loWord + 1; word < hiWord; ++word) 327 U.pVal[word] = WORDTYPE_MAX; 328 } 329 330 /// Toggle every bit to its opposite value. 331 void APInt::flipAllBitsSlowCase() { 332 tcComplement(U.pVal, getNumWords()); 333 clearUnusedBits(); 334 } 335 336 /// Toggle a given bit to its opposite value whose position is given 337 /// as "bitPosition". 338 /// Toggles a given bit to its opposite value. 339 void APInt::flipBit(unsigned bitPosition) { 340 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 341 if ((*this)[bitPosition]) clearBit(bitPosition); 342 else setBit(bitPosition); 343 } 344 345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 346 unsigned subBitWidth = subBits.getBitWidth(); 347 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 348 "Illegal bit insertion"); 349 350 // Insertion is a direct copy. 351 if (subBitWidth == BitWidth) { 352 *this = subBits; 353 return; 354 } 355 356 // Single word result can be done as a direct bitmask. 357 if (isSingleWord()) { 358 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 359 U.VAL &= ~(mask << bitPosition); 360 U.VAL |= (subBits.U.VAL << bitPosition); 361 return; 362 } 363 364 unsigned loBit = whichBit(bitPosition); 365 unsigned loWord = whichWord(bitPosition); 366 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 367 368 // Insertion within a single word can be done as a direct bitmask. 369 if (loWord == hi1Word) { 370 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 371 U.pVal[loWord] &= ~(mask << loBit); 372 U.pVal[loWord] |= (subBits.U.VAL << loBit); 373 return; 374 } 375 376 // Insert on word boundaries. 377 if (loBit == 0) { 378 // Direct copy whole words. 379 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 380 memcpy(U.pVal + loWord, subBits.getRawData(), 381 numWholeSubWords * APINT_WORD_SIZE); 382 383 // Mask+insert remaining bits. 384 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 385 if (remainingBits != 0) { 386 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 387 U.pVal[hi1Word] &= ~mask; 388 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 389 } 390 return; 391 } 392 393 // General case - set/clear individual bits in dst based on src. 394 // TODO - there is scope for optimization here, but at the moment this code 395 // path is barely used so prefer readability over performance. 396 for (unsigned i = 0; i != subBitWidth; ++i) { 397 if (subBits[i]) 398 setBit(bitPosition + i); 399 else 400 clearBit(bitPosition + i); 401 } 402 } 403 404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { 405 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 406 subBits &= maskBits; 407 if (isSingleWord()) { 408 U.VAL &= ~(maskBits << bitPosition); 409 U.VAL |= subBits << bitPosition; 410 return; 411 } 412 413 unsigned loBit = whichBit(bitPosition); 414 unsigned loWord = whichWord(bitPosition); 415 unsigned hiWord = whichWord(bitPosition + numBits - 1); 416 if (loWord == hiWord) { 417 U.pVal[loWord] &= ~(maskBits << loBit); 418 U.pVal[loWord] |= subBits << loBit; 419 return; 420 } 421 422 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 423 unsigned wordBits = 8 * sizeof(WordType); 424 U.pVal[loWord] &= ~(maskBits << loBit); 425 U.pVal[loWord] |= subBits << loBit; 426 427 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); 428 U.pVal[hiWord] |= subBits >> (wordBits - loBit); 429 } 430 431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 432 assert(numBits > 0 && "Can't extract zero bits"); 433 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 434 "Illegal bit extraction"); 435 436 if (isSingleWord()) 437 return APInt(numBits, U.VAL >> bitPosition); 438 439 unsigned loBit = whichBit(bitPosition); 440 unsigned loWord = whichWord(bitPosition); 441 unsigned hiWord = whichWord(bitPosition + numBits - 1); 442 443 // Single word result extracting bits from a single word source. 444 if (loWord == hiWord) 445 return APInt(numBits, U.pVal[loWord] >> loBit); 446 447 // Extracting bits that start on a source word boundary can be done 448 // as a fast memory copy. 449 if (loBit == 0) 450 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 451 452 // General case - shift + copy source words directly into place. 453 APInt Result(numBits, 0); 454 unsigned NumSrcWords = getNumWords(); 455 unsigned NumDstWords = Result.getNumWords(); 456 457 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 458 for (unsigned word = 0; word < NumDstWords; ++word) { 459 uint64_t w0 = U.pVal[loWord + word]; 460 uint64_t w1 = 461 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 462 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 463 } 464 465 return Result.clearUnusedBits(); 466 } 467 468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, 469 unsigned bitPosition) const { 470 assert(numBits > 0 && "Can't extract zero bits"); 471 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 472 "Illegal bit extraction"); 473 assert(numBits <= 64 && "Illegal bit extraction"); 474 475 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 476 if (isSingleWord()) 477 return (U.VAL >> bitPosition) & maskBits; 478 479 unsigned loBit = whichBit(bitPosition); 480 unsigned loWord = whichWord(bitPosition); 481 unsigned hiWord = whichWord(bitPosition + numBits - 1); 482 if (loWord == hiWord) 483 return (U.pVal[loWord] >> loBit) & maskBits; 484 485 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 486 unsigned wordBits = 8 * sizeof(WordType); 487 uint64_t retBits = U.pVal[loWord] >> loBit; 488 retBits |= U.pVal[hiWord] << (wordBits - loBit); 489 retBits &= maskBits; 490 return retBits; 491 } 492 493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 494 assert(!str.empty() && "Invalid string length"); 495 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 496 radix == 36) && 497 "Radix should be 2, 8, 10, 16, or 36!"); 498 499 size_t slen = str.size(); 500 501 // Each computation below needs to know if it's negative. 502 StringRef::iterator p = str.begin(); 503 unsigned isNegative = *p == '-'; 504 if (*p == '-' || *p == '+') { 505 p++; 506 slen--; 507 assert(slen && "String is only a sign, needs a value."); 508 } 509 510 // For radixes of power-of-two values, the bits required is accurately and 511 // easily computed 512 if (radix == 2) 513 return slen + isNegative; 514 if (radix == 8) 515 return slen * 3 + isNegative; 516 if (radix == 16) 517 return slen * 4 + isNegative; 518 519 // FIXME: base 36 520 521 // This is grossly inefficient but accurate. We could probably do something 522 // with a computation of roughly slen*64/20 and then adjust by the value of 523 // the first few digits. But, I'm not sure how accurate that could be. 524 525 // Compute a sufficient number of bits that is always large enough but might 526 // be too large. This avoids the assertion in the constructor. This 527 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 528 // bits in that case. 529 unsigned sufficient 530 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 531 : (slen == 1 ? 7 : slen * 16/3); 532 533 // Convert to the actual binary value. 534 APInt tmp(sufficient, StringRef(p, slen), radix); 535 536 // Compute how many bits are required. If the log is infinite, assume we need 537 // just bit. If the log is exact and value is negative, then the value is 538 // MinSignedValue with (log + 1) bits. 539 unsigned log = tmp.logBase2(); 540 if (log == (unsigned)-1) { 541 return isNegative + 1; 542 } else if (isNegative && tmp.isPowerOf2()) { 543 return isNegative + log; 544 } else { 545 return isNegative + log + 1; 546 } 547 } 548 549 hash_code llvm::hash_value(const APInt &Arg) { 550 if (Arg.isSingleWord()) 551 return hash_combine(Arg.U.VAL); 552 553 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); 554 } 555 556 bool APInt::isSplat(unsigned SplatSizeInBits) const { 557 assert(getBitWidth() % SplatSizeInBits == 0 && 558 "SplatSizeInBits must divide width!"); 559 // We can check that all parts of an integer are equal by making use of a 560 // little trick: rotate and check if it's still the same value. 561 return *this == rotl(SplatSizeInBits); 562 } 563 564 /// This function returns the high "numBits" bits of this APInt. 565 APInt APInt::getHiBits(unsigned numBits) const { 566 return this->lshr(BitWidth - numBits); 567 } 568 569 /// This function returns the low "numBits" bits of this APInt. 570 APInt APInt::getLoBits(unsigned numBits) const { 571 APInt Result(getLowBitsSet(BitWidth, numBits)); 572 Result &= *this; 573 return Result; 574 } 575 576 /// Return a value containing V broadcasted over NewLen bits. 577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 578 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 579 580 APInt Val = V.zextOrSelf(NewLen); 581 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 582 Val |= Val << I; 583 584 return Val; 585 } 586 587 unsigned APInt::countLeadingZerosSlowCase() const { 588 unsigned Count = 0; 589 for (int i = getNumWords()-1; i >= 0; --i) { 590 uint64_t V = U.pVal[i]; 591 if (V == 0) 592 Count += APINT_BITS_PER_WORD; 593 else { 594 Count += llvm::countLeadingZeros(V); 595 break; 596 } 597 } 598 // Adjust for unused bits in the most significant word (they are zero). 599 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 600 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 601 return Count; 602 } 603 604 unsigned APInt::countLeadingOnesSlowCase() const { 605 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 606 unsigned shift; 607 if (!highWordBits) { 608 highWordBits = APINT_BITS_PER_WORD; 609 shift = 0; 610 } else { 611 shift = APINT_BITS_PER_WORD - highWordBits; 612 } 613 int i = getNumWords() - 1; 614 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 615 if (Count == highWordBits) { 616 for (i--; i >= 0; --i) { 617 if (U.pVal[i] == WORDTYPE_MAX) 618 Count += APINT_BITS_PER_WORD; 619 else { 620 Count += llvm::countLeadingOnes(U.pVal[i]); 621 break; 622 } 623 } 624 } 625 return Count; 626 } 627 628 unsigned APInt::countTrailingZerosSlowCase() const { 629 unsigned Count = 0; 630 unsigned i = 0; 631 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 632 Count += APINT_BITS_PER_WORD; 633 if (i < getNumWords()) 634 Count += llvm::countTrailingZeros(U.pVal[i]); 635 return std::min(Count, BitWidth); 636 } 637 638 unsigned APInt::countTrailingOnesSlowCase() const { 639 unsigned Count = 0; 640 unsigned i = 0; 641 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 642 Count += APINT_BITS_PER_WORD; 643 if (i < getNumWords()) 644 Count += llvm::countTrailingOnes(U.pVal[i]); 645 assert(Count <= BitWidth); 646 return Count; 647 } 648 649 unsigned APInt::countPopulationSlowCase() const { 650 unsigned Count = 0; 651 for (unsigned i = 0; i < getNumWords(); ++i) 652 Count += llvm::countPopulation(U.pVal[i]); 653 return Count; 654 } 655 656 bool APInt::intersectsSlowCase(const APInt &RHS) const { 657 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 658 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 659 return true; 660 661 return false; 662 } 663 664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 665 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 666 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 667 return false; 668 669 return true; 670 } 671 672 APInt APInt::byteSwap() const { 673 assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!"); 674 if (BitWidth == 16) 675 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 676 if (BitWidth == 32) 677 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 678 if (BitWidth <= 64) { 679 uint64_t Tmp1 = ByteSwap_64(U.VAL); 680 Tmp1 >>= (64 - BitWidth); 681 return APInt(BitWidth, Tmp1); 682 } 683 684 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 685 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 686 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 687 if (Result.BitWidth != BitWidth) { 688 Result.lshrInPlace(Result.BitWidth - BitWidth); 689 Result.BitWidth = BitWidth; 690 } 691 return Result; 692 } 693 694 APInt APInt::reverseBits() const { 695 switch (BitWidth) { 696 case 64: 697 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 698 case 32: 699 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 700 case 16: 701 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 702 case 8: 703 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 704 default: 705 break; 706 } 707 708 APInt Val(*this); 709 APInt Reversed(BitWidth, 0); 710 unsigned S = BitWidth; 711 712 for (; Val != 0; Val.lshrInPlace(1)) { 713 Reversed <<= 1; 714 Reversed |= Val[0]; 715 --S; 716 } 717 718 Reversed <<= S; 719 return Reversed; 720 } 721 722 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 723 // Fast-path a common case. 724 if (A == B) return A; 725 726 // Corner cases: if either operand is zero, the other is the gcd. 727 if (!A) return B; 728 if (!B) return A; 729 730 // Count common powers of 2 and remove all other powers of 2. 731 unsigned Pow2; 732 { 733 unsigned Pow2_A = A.countTrailingZeros(); 734 unsigned Pow2_B = B.countTrailingZeros(); 735 if (Pow2_A > Pow2_B) { 736 A.lshrInPlace(Pow2_A - Pow2_B); 737 Pow2 = Pow2_B; 738 } else if (Pow2_B > Pow2_A) { 739 B.lshrInPlace(Pow2_B - Pow2_A); 740 Pow2 = Pow2_A; 741 } else { 742 Pow2 = Pow2_A; 743 } 744 } 745 746 // Both operands are odd multiples of 2^Pow_2: 747 // 748 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 749 // 750 // This is a modified version of Stein's algorithm, taking advantage of 751 // efficient countTrailingZeros(). 752 while (A != B) { 753 if (A.ugt(B)) { 754 A -= B; 755 A.lshrInPlace(A.countTrailingZeros() - Pow2); 756 } else { 757 B -= A; 758 B.lshrInPlace(B.countTrailingZeros() - Pow2); 759 } 760 } 761 762 return A; 763 } 764 765 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 766 uint64_t I = bit_cast<uint64_t>(Double); 767 768 // Get the sign bit from the highest order bit 769 bool isNeg = I >> 63; 770 771 // Get the 11-bit exponent and adjust for the 1023 bit bias 772 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 773 774 // If the exponent is negative, the value is < 0 so just return 0. 775 if (exp < 0) 776 return APInt(width, 0u); 777 778 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 779 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 780 781 // If the exponent doesn't shift all bits out of the mantissa 782 if (exp < 52) 783 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 784 APInt(width, mantissa >> (52 - exp)); 785 786 // If the client didn't provide enough bits for us to shift the mantissa into 787 // then the result is undefined, just return 0 788 if (width <= exp - 52) 789 return APInt(width, 0); 790 791 // Otherwise, we have to shift the mantissa bits up to the right location 792 APInt Tmp(width, mantissa); 793 Tmp <<= (unsigned)exp - 52; 794 return isNeg ? -Tmp : Tmp; 795 } 796 797 /// This function converts this APInt to a double. 798 /// The layout for double is as following (IEEE Standard 754): 799 /// -------------------------------------- 800 /// | Sign Exponent Fraction Bias | 801 /// |-------------------------------------- | 802 /// | 1[63] 11[62-52] 52[51-00] 1023 | 803 /// -------------------------------------- 804 double APInt::roundToDouble(bool isSigned) const { 805 806 // Handle the simple case where the value is contained in one uint64_t. 807 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 808 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 809 if (isSigned) { 810 int64_t sext = SignExtend64(getWord(0), BitWidth); 811 return double(sext); 812 } else 813 return double(getWord(0)); 814 } 815 816 // Determine if the value is negative. 817 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 818 819 // Construct the absolute value if we're negative. 820 APInt Tmp(isNeg ? -(*this) : (*this)); 821 822 // Figure out how many bits we're using. 823 unsigned n = Tmp.getActiveBits(); 824 825 // The exponent (without bias normalization) is just the number of bits 826 // we are using. Note that the sign bit is gone since we constructed the 827 // absolute value. 828 uint64_t exp = n; 829 830 // Return infinity for exponent overflow 831 if (exp > 1023) { 832 if (!isSigned || !isNeg) 833 return std::numeric_limits<double>::infinity(); 834 else 835 return -std::numeric_limits<double>::infinity(); 836 } 837 exp += 1023; // Increment for 1023 bias 838 839 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 840 // extract the high 52 bits from the correct words in pVal. 841 uint64_t mantissa; 842 unsigned hiWord = whichWord(n-1); 843 if (hiWord == 0) { 844 mantissa = Tmp.U.pVal[0]; 845 if (n > 52) 846 mantissa >>= n - 52; // shift down, we want the top 52 bits. 847 } else { 848 assert(hiWord > 0 && "huh?"); 849 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 850 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 851 mantissa = hibits | lobits; 852 } 853 854 // The leading bit of mantissa is implicit, so get rid of it. 855 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 856 uint64_t I = sign | (exp << 52) | mantissa; 857 return bit_cast<double>(I); 858 } 859 860 // Truncate to new width. 861 APInt APInt::trunc(unsigned width) const { 862 assert(width < BitWidth && "Invalid APInt Truncate request"); 863 assert(width && "Can't truncate to 0 bits"); 864 865 if (width <= APINT_BITS_PER_WORD) 866 return APInt(width, getRawData()[0]); 867 868 APInt Result(getMemory(getNumWords(width)), width); 869 870 // Copy full words. 871 unsigned i; 872 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 873 Result.U.pVal[i] = U.pVal[i]; 874 875 // Truncate and copy any partial word. 876 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 877 if (bits != 0) 878 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 879 880 return Result; 881 } 882 883 // Truncate to new width with unsigned saturation. 884 APInt APInt::truncUSat(unsigned width) const { 885 assert(width < BitWidth && "Invalid APInt Truncate request"); 886 assert(width && "Can't truncate to 0 bits"); 887 888 // Can we just losslessly truncate it? 889 if (isIntN(width)) 890 return trunc(width); 891 // If not, then just return the new limit. 892 return APInt::getMaxValue(width); 893 } 894 895 // Truncate to new width with signed saturation. 896 APInt APInt::truncSSat(unsigned width) const { 897 assert(width < BitWidth && "Invalid APInt Truncate request"); 898 assert(width && "Can't truncate to 0 bits"); 899 900 // Can we just losslessly truncate it? 901 if (isSignedIntN(width)) 902 return trunc(width); 903 // If not, then just return the new limits. 904 return isNegative() ? APInt::getSignedMinValue(width) 905 : APInt::getSignedMaxValue(width); 906 } 907 908 // Sign extend to a new width. 909 APInt APInt::sext(unsigned Width) const { 910 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 911 912 if (Width <= APINT_BITS_PER_WORD) 913 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 914 915 APInt Result(getMemory(getNumWords(Width)), Width); 916 917 // Copy words. 918 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 919 920 // Sign extend the last word since there may be unused bits in the input. 921 Result.U.pVal[getNumWords() - 1] = 922 SignExtend64(Result.U.pVal[getNumWords() - 1], 923 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 924 925 // Fill with sign bits. 926 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 927 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 928 Result.clearUnusedBits(); 929 return Result; 930 } 931 932 // Zero extend to a new width. 933 APInt APInt::zext(unsigned width) const { 934 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 935 936 if (width <= APINT_BITS_PER_WORD) 937 return APInt(width, U.VAL); 938 939 APInt Result(getMemory(getNumWords(width)), width); 940 941 // Copy words. 942 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 943 944 // Zero remaining words. 945 std::memset(Result.U.pVal + getNumWords(), 0, 946 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 947 948 return Result; 949 } 950 951 APInt APInt::zextOrTrunc(unsigned width) const { 952 if (BitWidth < width) 953 return zext(width); 954 if (BitWidth > width) 955 return trunc(width); 956 return *this; 957 } 958 959 APInt APInt::sextOrTrunc(unsigned width) const { 960 if (BitWidth < width) 961 return sext(width); 962 if (BitWidth > width) 963 return trunc(width); 964 return *this; 965 } 966 967 APInt APInt::zextOrSelf(unsigned width) const { 968 if (BitWidth < width) 969 return zext(width); 970 return *this; 971 } 972 973 APInt APInt::sextOrSelf(unsigned width) const { 974 if (BitWidth < width) 975 return sext(width); 976 return *this; 977 } 978 979 /// Arithmetic right-shift this APInt by shiftAmt. 980 /// Arithmetic right-shift function. 981 void APInt::ashrInPlace(const APInt &shiftAmt) { 982 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 983 } 984 985 /// Arithmetic right-shift this APInt by shiftAmt. 986 /// Arithmetic right-shift function. 987 void APInt::ashrSlowCase(unsigned ShiftAmt) { 988 // Don't bother performing a no-op shift. 989 if (!ShiftAmt) 990 return; 991 992 // Save the original sign bit for later. 993 bool Negative = isNegative(); 994 995 // WordShift is the inter-part shift; BitShift is intra-part shift. 996 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 997 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 998 999 unsigned WordsToMove = getNumWords() - WordShift; 1000 if (WordsToMove != 0) { 1001 // Sign extend the last word to fill in the unused bits. 1002 U.pVal[getNumWords() - 1] = SignExtend64( 1003 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 1004 1005 // Fastpath for moving by whole words. 1006 if (BitShift == 0) { 1007 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 1008 } else { 1009 // Move the words containing significant bits. 1010 for (unsigned i = 0; i != WordsToMove - 1; ++i) 1011 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 1012 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 1013 1014 // Handle the last word which has no high bits to copy. 1015 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 1016 // Sign extend one more time. 1017 U.pVal[WordsToMove - 1] = 1018 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 1019 } 1020 } 1021 1022 // Fill in the remainder based on the original sign. 1023 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 1024 WordShift * APINT_WORD_SIZE); 1025 clearUnusedBits(); 1026 } 1027 1028 /// Logical right-shift this APInt by shiftAmt. 1029 /// Logical right-shift function. 1030 void APInt::lshrInPlace(const APInt &shiftAmt) { 1031 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1032 } 1033 1034 /// Logical right-shift this APInt by shiftAmt. 1035 /// Logical right-shift function. 1036 void APInt::lshrSlowCase(unsigned ShiftAmt) { 1037 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 1038 } 1039 1040 /// Left-shift this APInt by shiftAmt. 1041 /// Left-shift function. 1042 APInt &APInt::operator<<=(const APInt &shiftAmt) { 1043 // It's undefined behavior in C to shift by BitWidth or greater. 1044 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 1045 return *this; 1046 } 1047 1048 void APInt::shlSlowCase(unsigned ShiftAmt) { 1049 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 1050 clearUnusedBits(); 1051 } 1052 1053 // Calculate the rotate amount modulo the bit width. 1054 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 1055 unsigned rotBitWidth = rotateAmt.getBitWidth(); 1056 APInt rot = rotateAmt; 1057 if (rotBitWidth < BitWidth) { 1058 // Extend the rotate APInt, so that the urem doesn't divide by 0. 1059 // e.g. APInt(1, 32) would give APInt(1, 0). 1060 rot = rotateAmt.zext(BitWidth); 1061 } 1062 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 1063 return rot.getLimitedValue(BitWidth); 1064 } 1065 1066 APInt APInt::rotl(const APInt &rotateAmt) const { 1067 return rotl(rotateModulo(BitWidth, rotateAmt)); 1068 } 1069 1070 APInt APInt::rotl(unsigned rotateAmt) const { 1071 rotateAmt %= BitWidth; 1072 if (rotateAmt == 0) 1073 return *this; 1074 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1075 } 1076 1077 APInt APInt::rotr(const APInt &rotateAmt) const { 1078 return rotr(rotateModulo(BitWidth, rotateAmt)); 1079 } 1080 1081 APInt APInt::rotr(unsigned rotateAmt) const { 1082 rotateAmt %= BitWidth; 1083 if (rotateAmt == 0) 1084 return *this; 1085 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1086 } 1087 1088 // Square Root - this method computes and returns the square root of "this". 1089 // Three mechanisms are used for computation. For small values (<= 5 bits), 1090 // a table lookup is done. This gets some performance for common cases. For 1091 // values using less than 52 bits, the value is converted to double and then 1092 // the libc sqrt function is called. The result is rounded and then converted 1093 // back to a uint64_t which is then used to construct the result. Finally, 1094 // the Babylonian method for computing square roots is used. 1095 APInt APInt::sqrt() const { 1096 1097 // Determine the magnitude of the value. 1098 unsigned magnitude = getActiveBits(); 1099 1100 // Use a fast table for some small values. This also gets rid of some 1101 // rounding errors in libc sqrt for small values. 1102 if (magnitude <= 5) { 1103 static const uint8_t results[32] = { 1104 /* 0 */ 0, 1105 /* 1- 2 */ 1, 1, 1106 /* 3- 6 */ 2, 2, 2, 2, 1107 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1108 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1109 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1110 /* 31 */ 6 1111 }; 1112 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1113 } 1114 1115 // If the magnitude of the value fits in less than 52 bits (the precision of 1116 // an IEEE double precision floating point value), then we can use the 1117 // libc sqrt function which will probably use a hardware sqrt computation. 1118 // This should be faster than the algorithm below. 1119 if (magnitude < 52) { 1120 return APInt(BitWidth, 1121 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1122 : U.pVal[0]))))); 1123 } 1124 1125 // Okay, all the short cuts are exhausted. We must compute it. The following 1126 // is a classical Babylonian method for computing the square root. This code 1127 // was adapted to APInt from a wikipedia article on such computations. 1128 // See http://www.wikipedia.org/ and go to the page named 1129 // Calculate_an_integer_square_root. 1130 unsigned nbits = BitWidth, i = 4; 1131 APInt testy(BitWidth, 16); 1132 APInt x_old(BitWidth, 1); 1133 APInt x_new(BitWidth, 0); 1134 APInt two(BitWidth, 2); 1135 1136 // Select a good starting value using binary logarithms. 1137 for (;; i += 2, testy = testy.shl(2)) 1138 if (i >= nbits || this->ule(testy)) { 1139 x_old = x_old.shl(i / 2); 1140 break; 1141 } 1142 1143 // Use the Babylonian method to arrive at the integer square root: 1144 for (;;) { 1145 x_new = (this->udiv(x_old) + x_old).udiv(two); 1146 if (x_old.ule(x_new)) 1147 break; 1148 x_old = x_new; 1149 } 1150 1151 // Make sure we return the closest approximation 1152 // NOTE: The rounding calculation below is correct. It will produce an 1153 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1154 // determined to be a rounding issue with pari/gp as it begins to use a 1155 // floating point representation after 192 bits. There are no discrepancies 1156 // between this algorithm and pari/gp for bit widths < 192 bits. 1157 APInt square(x_old * x_old); 1158 APInt nextSquare((x_old + 1) * (x_old +1)); 1159 if (this->ult(square)) 1160 return x_old; 1161 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1162 APInt midpoint((nextSquare - square).udiv(two)); 1163 APInt offset(*this - square); 1164 if (offset.ult(midpoint)) 1165 return x_old; 1166 return x_old + 1; 1167 } 1168 1169 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1170 /// iterative extended Euclidean algorithm is used to solve for this value, 1171 /// however we simplify it to speed up calculating only the inverse, and take 1172 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1173 /// (potentially large) APInts around. 1174 /// WARNING: a value of '0' may be returned, 1175 /// signifying that no multiplicative inverse exists! 1176 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1177 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1178 1179 // Using the properties listed at the following web page (accessed 06/21/08): 1180 // http://www.numbertheory.org/php/euclid.html 1181 // (especially the properties numbered 3, 4 and 9) it can be proved that 1182 // BitWidth bits suffice for all the computations in the algorithm implemented 1183 // below. More precisely, this number of bits suffice if the multiplicative 1184 // inverse exists, but may not suffice for the general extended Euclidean 1185 // algorithm. 1186 1187 APInt r[2] = { modulo, *this }; 1188 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1189 APInt q(BitWidth, 0); 1190 1191 unsigned i; 1192 for (i = 0; r[i^1] != 0; i ^= 1) { 1193 // An overview of the math without the confusing bit-flipping: 1194 // q = r[i-2] / r[i-1] 1195 // r[i] = r[i-2] % r[i-1] 1196 // t[i] = t[i-2] - t[i-1] * q 1197 udivrem(r[i], r[i^1], q, r[i]); 1198 t[i] -= t[i^1] * q; 1199 } 1200 1201 // If this APInt and the modulo are not coprime, there is no multiplicative 1202 // inverse, so return 0. We check this by looking at the next-to-last 1203 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1204 // algorithm. 1205 if (r[i] != 1) 1206 return APInt(BitWidth, 0); 1207 1208 // The next-to-last t is the multiplicative inverse. However, we are 1209 // interested in a positive inverse. Calculate a positive one from a negative 1210 // one if necessary. A simple addition of the modulo suffices because 1211 // abs(t[i]) is known to be less than *this/2 (see the link above). 1212 if (t[i].isNegative()) 1213 t[i] += modulo; 1214 1215 return std::move(t[i]); 1216 } 1217 1218 /// Calculate the magic numbers required to implement a signed integer division 1219 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1220 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1221 /// Warren, Jr., chapter 10. 1222 APInt::ms APInt::magic() const { 1223 const APInt& d = *this; 1224 unsigned p; 1225 APInt ad, anc, delta, q1, r1, q2, r2, t; 1226 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1227 struct ms mag; 1228 1229 ad = d.abs(); 1230 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1231 anc = t - 1 - t.urem(ad); // absolute value of nc 1232 p = d.getBitWidth() - 1; // initialize p 1233 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1234 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1235 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1236 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1237 do { 1238 p = p + 1; 1239 q1 = q1<<1; // update q1 = 2p/abs(nc) 1240 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1241 if (r1.uge(anc)) { // must be unsigned comparison 1242 q1 = q1 + 1; 1243 r1 = r1 - anc; 1244 } 1245 q2 = q2<<1; // update q2 = 2p/abs(d) 1246 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1247 if (r2.uge(ad)) { // must be unsigned comparison 1248 q2 = q2 + 1; 1249 r2 = r2 - ad; 1250 } 1251 delta = ad - r2; 1252 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1253 1254 mag.m = q2 + 1; 1255 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1256 mag.s = p - d.getBitWidth(); // resulting shift 1257 return mag; 1258 } 1259 1260 /// Calculate the magic numbers required to implement an unsigned integer 1261 /// division by a constant as a sequence of multiplies, adds and shifts. 1262 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1263 /// S. Warren, Jr., chapter 10. 1264 /// LeadingZeros can be used to simplify the calculation if the upper bits 1265 /// of the divided value are known zero. 1266 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1267 const APInt& d = *this; 1268 unsigned p; 1269 APInt nc, delta, q1, r1, q2, r2; 1270 struct mu magu; 1271 magu.a = 0; // initialize "add" indicator 1272 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1273 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1274 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1275 1276 nc = allOnes - (allOnes - d).urem(d); 1277 p = d.getBitWidth() - 1; // initialize p 1278 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1279 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1280 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1281 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1282 do { 1283 p = p + 1; 1284 if (r1.uge(nc - r1)) { 1285 q1 = q1 + q1 + 1; // update q1 1286 r1 = r1 + r1 - nc; // update r1 1287 } 1288 else { 1289 q1 = q1+q1; // update q1 1290 r1 = r1+r1; // update r1 1291 } 1292 if ((r2 + 1).uge(d - r2)) { 1293 if (q2.uge(signedMax)) magu.a = 1; 1294 q2 = q2+q2 + 1; // update q2 1295 r2 = r2+r2 + 1 - d; // update r2 1296 } 1297 else { 1298 if (q2.uge(signedMin)) magu.a = 1; 1299 q2 = q2+q2; // update q2 1300 r2 = r2+r2 + 1; // update r2 1301 } 1302 delta = d - 1 - r2; 1303 } while (p < d.getBitWidth()*2 && 1304 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1305 magu.m = q2 + 1; // resulting magic number 1306 magu.s = p - d.getBitWidth(); // resulting shift 1307 return magu; 1308 } 1309 1310 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1311 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1312 /// variables here have the same names as in the algorithm. Comments explain 1313 /// the algorithm and any deviation from it. 1314 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1315 unsigned m, unsigned n) { 1316 assert(u && "Must provide dividend"); 1317 assert(v && "Must provide divisor"); 1318 assert(q && "Must provide quotient"); 1319 assert(u != v && u != q && v != q && "Must use different memory"); 1320 assert(n>1 && "n must be > 1"); 1321 1322 // b denotes the base of the number system. In our case b is 2^32. 1323 const uint64_t b = uint64_t(1) << 32; 1324 1325 // The DEBUG macros here tend to be spam in the debug output if you're not 1326 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1327 #ifdef KNUTH_DEBUG 1328 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1329 #else 1330 #define DEBUG_KNUTH(X) do {} while(false) 1331 #endif 1332 1333 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1334 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1335 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1336 DEBUG_KNUTH(dbgs() << " by"); 1337 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1338 DEBUG_KNUTH(dbgs() << '\n'); 1339 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1340 // u and v by d. Note that we have taken Knuth's advice here to use a power 1341 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1342 // 2 allows us to shift instead of multiply and it is easy to determine the 1343 // shift amount from the leading zeros. We are basically normalizing the u 1344 // and v so that its high bits are shifted to the top of v's range without 1345 // overflow. Note that this can require an extra word in u so that u must 1346 // be of length m+n+1. 1347 unsigned shift = countLeadingZeros(v[n-1]); 1348 uint32_t v_carry = 0; 1349 uint32_t u_carry = 0; 1350 if (shift) { 1351 for (unsigned i = 0; i < m+n; ++i) { 1352 uint32_t u_tmp = u[i] >> (32 - shift); 1353 u[i] = (u[i] << shift) | u_carry; 1354 u_carry = u_tmp; 1355 } 1356 for (unsigned i = 0; i < n; ++i) { 1357 uint32_t v_tmp = v[i] >> (32 - shift); 1358 v[i] = (v[i] << shift) | v_carry; 1359 v_carry = v_tmp; 1360 } 1361 } 1362 u[m+n] = u_carry; 1363 1364 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1365 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1366 DEBUG_KNUTH(dbgs() << " by"); 1367 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1368 DEBUG_KNUTH(dbgs() << '\n'); 1369 1370 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1371 int j = m; 1372 do { 1373 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1374 // D3. [Calculate q'.]. 1375 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1376 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1377 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1378 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1379 // on v[n-2] determines at high speed most of the cases in which the trial 1380 // value qp is one too large, and it eliminates all cases where qp is two 1381 // too large. 1382 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1383 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1384 uint64_t qp = dividend / v[n-1]; 1385 uint64_t rp = dividend % v[n-1]; 1386 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1387 qp--; 1388 rp += v[n-1]; 1389 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1390 qp--; 1391 } 1392 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1393 1394 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1395 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1396 // consists of a simple multiplication by a one-place number, combined with 1397 // a subtraction. 1398 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1399 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1400 // true value plus b**(n+1), namely as the b's complement of 1401 // the true value, and a "borrow" to the left should be remembered. 1402 int64_t borrow = 0; 1403 for (unsigned i = 0; i < n; ++i) { 1404 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1405 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1406 u[j+i] = Lo_32(subres); 1407 borrow = Hi_32(p) - Hi_32(subres); 1408 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1409 << ", borrow = " << borrow << '\n'); 1410 } 1411 bool isNeg = u[j+n] < borrow; 1412 u[j+n] -= Lo_32(borrow); 1413 1414 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1415 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1416 DEBUG_KNUTH(dbgs() << '\n'); 1417 1418 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1419 // negative, go to step D6; otherwise go on to step D7. 1420 q[j] = Lo_32(qp); 1421 if (isNeg) { 1422 // D6. [Add back]. The probability that this step is necessary is very 1423 // small, on the order of only 2/b. Make sure that test data accounts for 1424 // this possibility. Decrease q[j] by 1 1425 q[j]--; 1426 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1427 // A carry will occur to the left of u[j+n], and it should be ignored 1428 // since it cancels with the borrow that occurred in D4. 1429 bool carry = false; 1430 for (unsigned i = 0; i < n; i++) { 1431 uint32_t limit = std::min(u[j+i],v[i]); 1432 u[j+i] += v[i] + carry; 1433 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1434 } 1435 u[j+n] += carry; 1436 } 1437 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1438 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1439 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1440 1441 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1442 } while (--j >= 0); 1443 1444 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1445 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1446 DEBUG_KNUTH(dbgs() << '\n'); 1447 1448 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1449 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1450 // compute the remainder (urem uses this). 1451 if (r) { 1452 // The value d is expressed by the "shift" value above since we avoided 1453 // multiplication by d by using a shift left. So, all we have to do is 1454 // shift right here. 1455 if (shift) { 1456 uint32_t carry = 0; 1457 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1458 for (int i = n-1; i >= 0; i--) { 1459 r[i] = (u[i] >> shift) | carry; 1460 carry = u[i] << (32 - shift); 1461 DEBUG_KNUTH(dbgs() << " " << r[i]); 1462 } 1463 } else { 1464 for (int i = n-1; i >= 0; i--) { 1465 r[i] = u[i]; 1466 DEBUG_KNUTH(dbgs() << " " << r[i]); 1467 } 1468 } 1469 DEBUG_KNUTH(dbgs() << '\n'); 1470 } 1471 DEBUG_KNUTH(dbgs() << '\n'); 1472 } 1473 1474 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1475 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1476 assert(lhsWords >= rhsWords && "Fractional result"); 1477 1478 // First, compose the values into an array of 32-bit words instead of 1479 // 64-bit words. This is a necessity of both the "short division" algorithm 1480 // and the Knuth "classical algorithm" which requires there to be native 1481 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1482 // can't use 64-bit operands here because we don't have native results of 1483 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1484 // work on large-endian machines. 1485 unsigned n = rhsWords * 2; 1486 unsigned m = (lhsWords * 2) - n; 1487 1488 // Allocate space for the temporary values we need either on the stack, if 1489 // it will fit, or on the heap if it won't. 1490 uint32_t SPACE[128]; 1491 uint32_t *U = nullptr; 1492 uint32_t *V = nullptr; 1493 uint32_t *Q = nullptr; 1494 uint32_t *R = nullptr; 1495 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1496 U = &SPACE[0]; 1497 V = &SPACE[m+n+1]; 1498 Q = &SPACE[(m+n+1) + n]; 1499 if (Remainder) 1500 R = &SPACE[(m+n+1) + n + (m+n)]; 1501 } else { 1502 U = new uint32_t[m + n + 1]; 1503 V = new uint32_t[n]; 1504 Q = new uint32_t[m+n]; 1505 if (Remainder) 1506 R = new uint32_t[n]; 1507 } 1508 1509 // Initialize the dividend 1510 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1511 for (unsigned i = 0; i < lhsWords; ++i) { 1512 uint64_t tmp = LHS[i]; 1513 U[i * 2] = Lo_32(tmp); 1514 U[i * 2 + 1] = Hi_32(tmp); 1515 } 1516 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1517 1518 // Initialize the divisor 1519 memset(V, 0, (n)*sizeof(uint32_t)); 1520 for (unsigned i = 0; i < rhsWords; ++i) { 1521 uint64_t tmp = RHS[i]; 1522 V[i * 2] = Lo_32(tmp); 1523 V[i * 2 + 1] = Hi_32(tmp); 1524 } 1525 1526 // initialize the quotient and remainder 1527 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1528 if (Remainder) 1529 memset(R, 0, n * sizeof(uint32_t)); 1530 1531 // Now, adjust m and n for the Knuth division. n is the number of words in 1532 // the divisor. m is the number of words by which the dividend exceeds the 1533 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1534 // contain any zero words or the Knuth algorithm fails. 1535 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1536 n--; 1537 m++; 1538 } 1539 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1540 m--; 1541 1542 // If we're left with only a single word for the divisor, Knuth doesn't work 1543 // so we implement the short division algorithm here. This is much simpler 1544 // and faster because we are certain that we can divide a 64-bit quantity 1545 // by a 32-bit quantity at hardware speed and short division is simply a 1546 // series of such operations. This is just like doing short division but we 1547 // are using base 2^32 instead of base 10. 1548 assert(n != 0 && "Divide by zero?"); 1549 if (n == 1) { 1550 uint32_t divisor = V[0]; 1551 uint32_t remainder = 0; 1552 for (int i = m; i >= 0; i--) { 1553 uint64_t partial_dividend = Make_64(remainder, U[i]); 1554 if (partial_dividend == 0) { 1555 Q[i] = 0; 1556 remainder = 0; 1557 } else if (partial_dividend < divisor) { 1558 Q[i] = 0; 1559 remainder = Lo_32(partial_dividend); 1560 } else if (partial_dividend == divisor) { 1561 Q[i] = 1; 1562 remainder = 0; 1563 } else { 1564 Q[i] = Lo_32(partial_dividend / divisor); 1565 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1566 } 1567 } 1568 if (R) 1569 R[0] = remainder; 1570 } else { 1571 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1572 // case n > 1. 1573 KnuthDiv(U, V, Q, R, m, n); 1574 } 1575 1576 // If the caller wants the quotient 1577 if (Quotient) { 1578 for (unsigned i = 0; i < lhsWords; ++i) 1579 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1580 } 1581 1582 // If the caller wants the remainder 1583 if (Remainder) { 1584 for (unsigned i = 0; i < rhsWords; ++i) 1585 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1586 } 1587 1588 // Clean up the memory we allocated. 1589 if (U != &SPACE[0]) { 1590 delete [] U; 1591 delete [] V; 1592 delete [] Q; 1593 delete [] R; 1594 } 1595 } 1596 1597 APInt APInt::udiv(const APInt &RHS) const { 1598 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1599 1600 // First, deal with the easy case 1601 if (isSingleWord()) { 1602 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1603 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1604 } 1605 1606 // Get some facts about the LHS and RHS number of bits and words 1607 unsigned lhsWords = getNumWords(getActiveBits()); 1608 unsigned rhsBits = RHS.getActiveBits(); 1609 unsigned rhsWords = getNumWords(rhsBits); 1610 assert(rhsWords && "Divided by zero???"); 1611 1612 // Deal with some degenerate cases 1613 if (!lhsWords) 1614 // 0 / X ===> 0 1615 return APInt(BitWidth, 0); 1616 if (rhsBits == 1) 1617 // X / 1 ===> X 1618 return *this; 1619 if (lhsWords < rhsWords || this->ult(RHS)) 1620 // X / Y ===> 0, iff X < Y 1621 return APInt(BitWidth, 0); 1622 if (*this == RHS) 1623 // X / X ===> 1 1624 return APInt(BitWidth, 1); 1625 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1626 // All high words are zero, just use native divide 1627 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1628 1629 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1630 APInt Quotient(BitWidth, 0); // to hold result. 1631 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1632 return Quotient; 1633 } 1634 1635 APInt APInt::udiv(uint64_t RHS) const { 1636 assert(RHS != 0 && "Divide by zero?"); 1637 1638 // First, deal with the easy case 1639 if (isSingleWord()) 1640 return APInt(BitWidth, U.VAL / RHS); 1641 1642 // Get some facts about the LHS words. 1643 unsigned lhsWords = getNumWords(getActiveBits()); 1644 1645 // Deal with some degenerate cases 1646 if (!lhsWords) 1647 // 0 / X ===> 0 1648 return APInt(BitWidth, 0); 1649 if (RHS == 1) 1650 // X / 1 ===> X 1651 return *this; 1652 if (this->ult(RHS)) 1653 // X / Y ===> 0, iff X < Y 1654 return APInt(BitWidth, 0); 1655 if (*this == RHS) 1656 // X / X ===> 1 1657 return APInt(BitWidth, 1); 1658 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1659 // All high words are zero, just use native divide 1660 return APInt(BitWidth, this->U.pVal[0] / RHS); 1661 1662 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1663 APInt Quotient(BitWidth, 0); // to hold result. 1664 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1665 return Quotient; 1666 } 1667 1668 APInt APInt::sdiv(const APInt &RHS) const { 1669 if (isNegative()) { 1670 if (RHS.isNegative()) 1671 return (-(*this)).udiv(-RHS); 1672 return -((-(*this)).udiv(RHS)); 1673 } 1674 if (RHS.isNegative()) 1675 return -(this->udiv(-RHS)); 1676 return this->udiv(RHS); 1677 } 1678 1679 APInt APInt::sdiv(int64_t RHS) const { 1680 if (isNegative()) { 1681 if (RHS < 0) 1682 return (-(*this)).udiv(-RHS); 1683 return -((-(*this)).udiv(RHS)); 1684 } 1685 if (RHS < 0) 1686 return -(this->udiv(-RHS)); 1687 return this->udiv(RHS); 1688 } 1689 1690 APInt APInt::urem(const APInt &RHS) const { 1691 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1692 if (isSingleWord()) { 1693 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1694 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1695 } 1696 1697 // Get some facts about the LHS 1698 unsigned lhsWords = getNumWords(getActiveBits()); 1699 1700 // Get some facts about the RHS 1701 unsigned rhsBits = RHS.getActiveBits(); 1702 unsigned rhsWords = getNumWords(rhsBits); 1703 assert(rhsWords && "Performing remainder operation by zero ???"); 1704 1705 // Check the degenerate cases 1706 if (lhsWords == 0) 1707 // 0 % Y ===> 0 1708 return APInt(BitWidth, 0); 1709 if (rhsBits == 1) 1710 // X % 1 ===> 0 1711 return APInt(BitWidth, 0); 1712 if (lhsWords < rhsWords || this->ult(RHS)) 1713 // X % Y ===> X, iff X < Y 1714 return *this; 1715 if (*this == RHS) 1716 // X % X == 0; 1717 return APInt(BitWidth, 0); 1718 if (lhsWords == 1) 1719 // All high words are zero, just use native remainder 1720 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1721 1722 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1723 APInt Remainder(BitWidth, 0); 1724 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1725 return Remainder; 1726 } 1727 1728 uint64_t APInt::urem(uint64_t RHS) const { 1729 assert(RHS != 0 && "Remainder by zero?"); 1730 1731 if (isSingleWord()) 1732 return U.VAL % RHS; 1733 1734 // Get some facts about the LHS 1735 unsigned lhsWords = getNumWords(getActiveBits()); 1736 1737 // Check the degenerate cases 1738 if (lhsWords == 0) 1739 // 0 % Y ===> 0 1740 return 0; 1741 if (RHS == 1) 1742 // X % 1 ===> 0 1743 return 0; 1744 if (this->ult(RHS)) 1745 // X % Y ===> X, iff X < Y 1746 return getZExtValue(); 1747 if (*this == RHS) 1748 // X % X == 0; 1749 return 0; 1750 if (lhsWords == 1) 1751 // All high words are zero, just use native remainder 1752 return U.pVal[0] % RHS; 1753 1754 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1755 uint64_t Remainder; 1756 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1757 return Remainder; 1758 } 1759 1760 APInt APInt::srem(const APInt &RHS) const { 1761 if (isNegative()) { 1762 if (RHS.isNegative()) 1763 return -((-(*this)).urem(-RHS)); 1764 return -((-(*this)).urem(RHS)); 1765 } 1766 if (RHS.isNegative()) 1767 return this->urem(-RHS); 1768 return this->urem(RHS); 1769 } 1770 1771 int64_t APInt::srem(int64_t RHS) const { 1772 if (isNegative()) { 1773 if (RHS < 0) 1774 return -((-(*this)).urem(-RHS)); 1775 return -((-(*this)).urem(RHS)); 1776 } 1777 if (RHS < 0) 1778 return this->urem(-RHS); 1779 return this->urem(RHS); 1780 } 1781 1782 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1783 APInt &Quotient, APInt &Remainder) { 1784 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1785 unsigned BitWidth = LHS.BitWidth; 1786 1787 // First, deal with the easy case 1788 if (LHS.isSingleWord()) { 1789 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1790 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1791 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1792 Quotient = APInt(BitWidth, QuotVal); 1793 Remainder = APInt(BitWidth, RemVal); 1794 return; 1795 } 1796 1797 // Get some size facts about the dividend and divisor 1798 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1799 unsigned rhsBits = RHS.getActiveBits(); 1800 unsigned rhsWords = getNumWords(rhsBits); 1801 assert(rhsWords && "Performing divrem operation by zero ???"); 1802 1803 // Check the degenerate cases 1804 if (lhsWords == 0) { 1805 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1806 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1807 return; 1808 } 1809 1810 if (rhsBits == 1) { 1811 Quotient = LHS; // X / 1 ===> X 1812 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1813 } 1814 1815 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1816 Remainder = LHS; // X % Y ===> X, iff X < Y 1817 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1818 return; 1819 } 1820 1821 if (LHS == RHS) { 1822 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1823 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1824 return; 1825 } 1826 1827 // Make sure there is enough space to hold the results. 1828 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1829 // change the size. This is necessary if Quotient or Remainder is aliased 1830 // with LHS or RHS. 1831 Quotient.reallocate(BitWidth); 1832 Remainder.reallocate(BitWidth); 1833 1834 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1835 // There is only one word to consider so use the native versions. 1836 uint64_t lhsValue = LHS.U.pVal[0]; 1837 uint64_t rhsValue = RHS.U.pVal[0]; 1838 Quotient = lhsValue / rhsValue; 1839 Remainder = lhsValue % rhsValue; 1840 return; 1841 } 1842 1843 // Okay, lets do it the long way 1844 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1845 Remainder.U.pVal); 1846 // Clear the rest of the Quotient and Remainder. 1847 std::memset(Quotient.U.pVal + lhsWords, 0, 1848 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1849 std::memset(Remainder.U.pVal + rhsWords, 0, 1850 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1851 } 1852 1853 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1854 uint64_t &Remainder) { 1855 assert(RHS != 0 && "Divide by zero?"); 1856 unsigned BitWidth = LHS.BitWidth; 1857 1858 // First, deal with the easy case 1859 if (LHS.isSingleWord()) { 1860 uint64_t QuotVal = LHS.U.VAL / RHS; 1861 Remainder = LHS.U.VAL % RHS; 1862 Quotient = APInt(BitWidth, QuotVal); 1863 return; 1864 } 1865 1866 // Get some size facts about the dividend and divisor 1867 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1868 1869 // Check the degenerate cases 1870 if (lhsWords == 0) { 1871 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1872 Remainder = 0; // 0 % Y ===> 0 1873 return; 1874 } 1875 1876 if (RHS == 1) { 1877 Quotient = LHS; // X / 1 ===> X 1878 Remainder = 0; // X % 1 ===> 0 1879 return; 1880 } 1881 1882 if (LHS.ult(RHS)) { 1883 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1884 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1885 return; 1886 } 1887 1888 if (LHS == RHS) { 1889 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1890 Remainder = 0; // X % X ===> 0; 1891 return; 1892 } 1893 1894 // Make sure there is enough space to hold the results. 1895 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1896 // change the size. This is necessary if Quotient is aliased with LHS. 1897 Quotient.reallocate(BitWidth); 1898 1899 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1900 // There is only one word to consider so use the native versions. 1901 uint64_t lhsValue = LHS.U.pVal[0]; 1902 Quotient = lhsValue / RHS; 1903 Remainder = lhsValue % RHS; 1904 return; 1905 } 1906 1907 // Okay, lets do it the long way 1908 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1909 // Clear the rest of the Quotient. 1910 std::memset(Quotient.U.pVal + lhsWords, 0, 1911 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1912 } 1913 1914 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1915 APInt &Quotient, APInt &Remainder) { 1916 if (LHS.isNegative()) { 1917 if (RHS.isNegative()) 1918 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1919 else { 1920 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1921 Quotient.negate(); 1922 } 1923 Remainder.negate(); 1924 } else if (RHS.isNegative()) { 1925 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1926 Quotient.negate(); 1927 } else { 1928 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1929 } 1930 } 1931 1932 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1933 APInt &Quotient, int64_t &Remainder) { 1934 uint64_t R = Remainder; 1935 if (LHS.isNegative()) { 1936 if (RHS < 0) 1937 APInt::udivrem(-LHS, -RHS, Quotient, R); 1938 else { 1939 APInt::udivrem(-LHS, RHS, Quotient, R); 1940 Quotient.negate(); 1941 } 1942 R = -R; 1943 } else if (RHS < 0) { 1944 APInt::udivrem(LHS, -RHS, Quotient, R); 1945 Quotient.negate(); 1946 } else { 1947 APInt::udivrem(LHS, RHS, Quotient, R); 1948 } 1949 Remainder = R; 1950 } 1951 1952 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1953 APInt Res = *this+RHS; 1954 Overflow = isNonNegative() == RHS.isNonNegative() && 1955 Res.isNonNegative() != isNonNegative(); 1956 return Res; 1957 } 1958 1959 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1960 APInt Res = *this+RHS; 1961 Overflow = Res.ult(RHS); 1962 return Res; 1963 } 1964 1965 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1966 APInt Res = *this - RHS; 1967 Overflow = isNonNegative() != RHS.isNonNegative() && 1968 Res.isNonNegative() != isNonNegative(); 1969 return Res; 1970 } 1971 1972 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1973 APInt Res = *this-RHS; 1974 Overflow = Res.ugt(*this); 1975 return Res; 1976 } 1977 1978 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1979 // MININT/-1 --> overflow. 1980 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1981 return sdiv(RHS); 1982 } 1983 1984 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1985 APInt Res = *this * RHS; 1986 1987 if (*this != 0 && RHS != 0) 1988 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1989 else 1990 Overflow = false; 1991 return Res; 1992 } 1993 1994 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1995 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { 1996 Overflow = true; 1997 return *this * RHS; 1998 } 1999 2000 APInt Res = lshr(1) * RHS; 2001 Overflow = Res.isNegative(); 2002 Res <<= 1; 2003 if ((*this)[0]) { 2004 Res += RHS; 2005 if (Res.ult(RHS)) 2006 Overflow = true; 2007 } 2008 return Res; 2009 } 2010 2011 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 2012 Overflow = ShAmt.uge(getBitWidth()); 2013 if (Overflow) 2014 return APInt(BitWidth, 0); 2015 2016 if (isNonNegative()) // Don't allow sign change. 2017 Overflow = ShAmt.uge(countLeadingZeros()); 2018 else 2019 Overflow = ShAmt.uge(countLeadingOnes()); 2020 2021 return *this << ShAmt; 2022 } 2023 2024 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 2025 Overflow = ShAmt.uge(getBitWidth()); 2026 if (Overflow) 2027 return APInt(BitWidth, 0); 2028 2029 Overflow = ShAmt.ugt(countLeadingZeros()); 2030 2031 return *this << ShAmt; 2032 } 2033 2034 APInt APInt::sadd_sat(const APInt &RHS) const { 2035 bool Overflow; 2036 APInt Res = sadd_ov(RHS, Overflow); 2037 if (!Overflow) 2038 return Res; 2039 2040 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2041 : APInt::getSignedMaxValue(BitWidth); 2042 } 2043 2044 APInt APInt::uadd_sat(const APInt &RHS) const { 2045 bool Overflow; 2046 APInt Res = uadd_ov(RHS, Overflow); 2047 if (!Overflow) 2048 return Res; 2049 2050 return APInt::getMaxValue(BitWidth); 2051 } 2052 2053 APInt APInt::ssub_sat(const APInt &RHS) const { 2054 bool Overflow; 2055 APInt Res = ssub_ov(RHS, Overflow); 2056 if (!Overflow) 2057 return Res; 2058 2059 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2060 : APInt::getSignedMaxValue(BitWidth); 2061 } 2062 2063 APInt APInt::usub_sat(const APInt &RHS) const { 2064 bool Overflow; 2065 APInt Res = usub_ov(RHS, Overflow); 2066 if (!Overflow) 2067 return Res; 2068 2069 return APInt(BitWidth, 0); 2070 } 2071 2072 APInt APInt::smul_sat(const APInt &RHS) const { 2073 bool Overflow; 2074 APInt Res = smul_ov(RHS, Overflow); 2075 if (!Overflow) 2076 return Res; 2077 2078 // The result is negative if one and only one of inputs is negative. 2079 bool ResIsNegative = isNegative() ^ RHS.isNegative(); 2080 2081 return ResIsNegative ? APInt::getSignedMinValue(BitWidth) 2082 : APInt::getSignedMaxValue(BitWidth); 2083 } 2084 2085 APInt APInt::umul_sat(const APInt &RHS) const { 2086 bool Overflow; 2087 APInt Res = umul_ov(RHS, Overflow); 2088 if (!Overflow) 2089 return Res; 2090 2091 return APInt::getMaxValue(BitWidth); 2092 } 2093 2094 APInt APInt::sshl_sat(const APInt &RHS) const { 2095 bool Overflow; 2096 APInt Res = sshl_ov(RHS, Overflow); 2097 if (!Overflow) 2098 return Res; 2099 2100 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2101 : APInt::getSignedMaxValue(BitWidth); 2102 } 2103 2104 APInt APInt::ushl_sat(const APInt &RHS) const { 2105 bool Overflow; 2106 APInt Res = ushl_ov(RHS, Overflow); 2107 if (!Overflow) 2108 return Res; 2109 2110 return APInt::getMaxValue(BitWidth); 2111 } 2112 2113 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 2114 // Check our assumptions here 2115 assert(!str.empty() && "Invalid string length"); 2116 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 2117 radix == 36) && 2118 "Radix should be 2, 8, 10, 16, or 36!"); 2119 2120 StringRef::iterator p = str.begin(); 2121 size_t slen = str.size(); 2122 bool isNeg = *p == '-'; 2123 if (*p == '-' || *p == '+') { 2124 p++; 2125 slen--; 2126 assert(slen && "String is only a sign, needs a value."); 2127 } 2128 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2129 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2130 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2131 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2132 "Insufficient bit width"); 2133 2134 // Allocate memory if needed 2135 if (isSingleWord()) 2136 U.VAL = 0; 2137 else 2138 U.pVal = getClearedMemory(getNumWords()); 2139 2140 // Figure out if we can shift instead of multiply 2141 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2142 2143 // Enter digit traversal loop 2144 for (StringRef::iterator e = str.end(); p != e; ++p) { 2145 unsigned digit = getDigit(*p, radix); 2146 assert(digit < radix && "Invalid character in digit string"); 2147 2148 // Shift or multiply the value by the radix 2149 if (slen > 1) { 2150 if (shift) 2151 *this <<= shift; 2152 else 2153 *this *= radix; 2154 } 2155 2156 // Add in the digit we just interpreted 2157 *this += digit; 2158 } 2159 // If its negative, put it in two's complement form 2160 if (isNeg) 2161 this->negate(); 2162 } 2163 2164 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2165 bool Signed, bool formatAsCLiteral) const { 2166 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2167 Radix == 36) && 2168 "Radix should be 2, 8, 10, 16, or 36!"); 2169 2170 const char *Prefix = ""; 2171 if (formatAsCLiteral) { 2172 switch (Radix) { 2173 case 2: 2174 // Binary literals are a non-standard extension added in gcc 4.3: 2175 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2176 Prefix = "0b"; 2177 break; 2178 case 8: 2179 Prefix = "0"; 2180 break; 2181 case 10: 2182 break; // No prefix 2183 case 16: 2184 Prefix = "0x"; 2185 break; 2186 default: 2187 llvm_unreachable("Invalid radix!"); 2188 } 2189 } 2190 2191 // First, check for a zero value and just short circuit the logic below. 2192 if (*this == 0) { 2193 while (*Prefix) { 2194 Str.push_back(*Prefix); 2195 ++Prefix; 2196 }; 2197 Str.push_back('0'); 2198 return; 2199 } 2200 2201 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2202 2203 if (isSingleWord()) { 2204 char Buffer[65]; 2205 char *BufPtr = std::end(Buffer); 2206 2207 uint64_t N; 2208 if (!Signed) { 2209 N = getZExtValue(); 2210 } else { 2211 int64_t I = getSExtValue(); 2212 if (I >= 0) { 2213 N = I; 2214 } else { 2215 Str.push_back('-'); 2216 N = -(uint64_t)I; 2217 } 2218 } 2219 2220 while (*Prefix) { 2221 Str.push_back(*Prefix); 2222 ++Prefix; 2223 }; 2224 2225 while (N) { 2226 *--BufPtr = Digits[N % Radix]; 2227 N /= Radix; 2228 } 2229 Str.append(BufPtr, std::end(Buffer)); 2230 return; 2231 } 2232 2233 APInt Tmp(*this); 2234 2235 if (Signed && isNegative()) { 2236 // They want to print the signed version and it is a negative value 2237 // Flip the bits and add one to turn it into the equivalent positive 2238 // value and put a '-' in the result. 2239 Tmp.negate(); 2240 Str.push_back('-'); 2241 } 2242 2243 while (*Prefix) { 2244 Str.push_back(*Prefix); 2245 ++Prefix; 2246 }; 2247 2248 // We insert the digits backward, then reverse them to get the right order. 2249 unsigned StartDig = Str.size(); 2250 2251 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2252 // because the number of bits per digit (1, 3 and 4 respectively) divides 2253 // equally. We just shift until the value is zero. 2254 if (Radix == 2 || Radix == 8 || Radix == 16) { 2255 // Just shift tmp right for each digit width until it becomes zero 2256 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2257 unsigned MaskAmt = Radix - 1; 2258 2259 while (Tmp.getBoolValue()) { 2260 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2261 Str.push_back(Digits[Digit]); 2262 Tmp.lshrInPlace(ShiftAmt); 2263 } 2264 } else { 2265 while (Tmp.getBoolValue()) { 2266 uint64_t Digit; 2267 udivrem(Tmp, Radix, Tmp, Digit); 2268 assert(Digit < Radix && "divide failed"); 2269 Str.push_back(Digits[Digit]); 2270 } 2271 } 2272 2273 // Reverse the digits before returning. 2274 std::reverse(Str.begin()+StartDig, Str.end()); 2275 } 2276 2277 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2278 /// It is better to pass in a SmallVector/SmallString to the methods above. 2279 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2280 SmallString<40> S; 2281 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2282 return std::string(S.str()); 2283 } 2284 2285 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2286 LLVM_DUMP_METHOD void APInt::dump() const { 2287 SmallString<40> S, U; 2288 this->toStringUnsigned(U); 2289 this->toStringSigned(S); 2290 dbgs() << "APInt(" << BitWidth << "b, " 2291 << U << "u " << S << "s)\n"; 2292 } 2293 #endif 2294 2295 void APInt::print(raw_ostream &OS, bool isSigned) const { 2296 SmallString<40> S; 2297 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2298 OS << S; 2299 } 2300 2301 // This implements a variety of operations on a representation of 2302 // arbitrary precision, two's-complement, bignum integer values. 2303 2304 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2305 // and unrestricting assumption. 2306 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2307 "Part width must be divisible by 2!"); 2308 2309 /* Some handy functions local to this file. */ 2310 2311 /* Returns the integer part with the least significant BITS set. 2312 BITS cannot be zero. */ 2313 static inline APInt::WordType lowBitMask(unsigned bits) { 2314 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2315 2316 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2317 } 2318 2319 /* Returns the value of the lower half of PART. */ 2320 static inline APInt::WordType lowHalf(APInt::WordType part) { 2321 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2322 } 2323 2324 /* Returns the value of the upper half of PART. */ 2325 static inline APInt::WordType highHalf(APInt::WordType part) { 2326 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2327 } 2328 2329 /* Returns the bit number of the most significant set bit of a part. 2330 If the input number has no bits set -1U is returned. */ 2331 static unsigned partMSB(APInt::WordType value) { 2332 return findLastSet(value, ZB_Max); 2333 } 2334 2335 /* Returns the bit number of the least significant set bit of a 2336 part. If the input number has no bits set -1U is returned. */ 2337 static unsigned partLSB(APInt::WordType value) { 2338 return findFirstSet(value, ZB_Max); 2339 } 2340 2341 /* Sets the least significant part of a bignum to the input value, and 2342 zeroes out higher parts. */ 2343 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2344 assert(parts > 0); 2345 2346 dst[0] = part; 2347 for (unsigned i = 1; i < parts; i++) 2348 dst[i] = 0; 2349 } 2350 2351 /* Assign one bignum to another. */ 2352 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2353 for (unsigned i = 0; i < parts; i++) 2354 dst[i] = src[i]; 2355 } 2356 2357 /* Returns true if a bignum is zero, false otherwise. */ 2358 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2359 for (unsigned i = 0; i < parts; i++) 2360 if (src[i]) 2361 return false; 2362 2363 return true; 2364 } 2365 2366 /* Extract the given bit of a bignum; returns 0 or 1. */ 2367 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2368 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2369 } 2370 2371 /* Set the given bit of a bignum. */ 2372 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2373 parts[whichWord(bit)] |= maskBit(bit); 2374 } 2375 2376 /* Clears the given bit of a bignum. */ 2377 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2378 parts[whichWord(bit)] &= ~maskBit(bit); 2379 } 2380 2381 /* Returns the bit number of the least significant set bit of a 2382 number. If the input number has no bits set -1U is returned. */ 2383 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2384 for (unsigned i = 0; i < n; i++) { 2385 if (parts[i] != 0) { 2386 unsigned lsb = partLSB(parts[i]); 2387 2388 return lsb + i * APINT_BITS_PER_WORD; 2389 } 2390 } 2391 2392 return -1U; 2393 } 2394 2395 /* Returns the bit number of the most significant set bit of a number. 2396 If the input number has no bits set -1U is returned. */ 2397 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2398 do { 2399 --n; 2400 2401 if (parts[n] != 0) { 2402 unsigned msb = partMSB(parts[n]); 2403 2404 return msb + n * APINT_BITS_PER_WORD; 2405 } 2406 } while (n); 2407 2408 return -1U; 2409 } 2410 2411 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2412 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2413 the least significant bit of DST. All high bits above srcBITS in 2414 DST are zero-filled. */ 2415 void 2416 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2417 unsigned srcBits, unsigned srcLSB) { 2418 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2419 assert(dstParts <= dstCount); 2420 2421 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2422 tcAssign (dst, src + firstSrcPart, dstParts); 2423 2424 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2425 tcShiftRight (dst, dstParts, shift); 2426 2427 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2428 in DST. If this is less that srcBits, append the rest, else 2429 clear the high bits. */ 2430 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2431 if (n < srcBits) { 2432 WordType mask = lowBitMask (srcBits - n); 2433 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2434 << n % APINT_BITS_PER_WORD); 2435 } else if (n > srcBits) { 2436 if (srcBits % APINT_BITS_PER_WORD) 2437 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2438 } 2439 2440 /* Clear high parts. */ 2441 while (dstParts < dstCount) 2442 dst[dstParts++] = 0; 2443 } 2444 2445 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2446 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2447 WordType c, unsigned parts) { 2448 assert(c <= 1); 2449 2450 for (unsigned i = 0; i < parts; i++) { 2451 WordType l = dst[i]; 2452 if (c) { 2453 dst[i] += rhs[i] + 1; 2454 c = (dst[i] <= l); 2455 } else { 2456 dst[i] += rhs[i]; 2457 c = (dst[i] < l); 2458 } 2459 } 2460 2461 return c; 2462 } 2463 2464 /// This function adds a single "word" integer, src, to the multiple 2465 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2466 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2467 /// @returns the carry of the addition. 2468 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2469 unsigned parts) { 2470 for (unsigned i = 0; i < parts; ++i) { 2471 dst[i] += src; 2472 if (dst[i] >= src) 2473 return 0; // No need to carry so exit early. 2474 src = 1; // Carry one to next digit. 2475 } 2476 2477 return 1; 2478 } 2479 2480 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2481 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2482 WordType c, unsigned parts) { 2483 assert(c <= 1); 2484 2485 for (unsigned i = 0; i < parts; i++) { 2486 WordType l = dst[i]; 2487 if (c) { 2488 dst[i] -= rhs[i] + 1; 2489 c = (dst[i] >= l); 2490 } else { 2491 dst[i] -= rhs[i]; 2492 c = (dst[i] > l); 2493 } 2494 } 2495 2496 return c; 2497 } 2498 2499 /// This function subtracts a single "word" (64-bit word), src, from 2500 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2501 /// no further borrowing is needed or it runs out of "words" in dst. The result 2502 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2503 /// exhausted. In other words, if src > dst then this function returns 1, 2504 /// otherwise 0. 2505 /// @returns the borrow out of the subtraction 2506 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2507 unsigned parts) { 2508 for (unsigned i = 0; i < parts; ++i) { 2509 WordType Dst = dst[i]; 2510 dst[i] -= src; 2511 if (src <= Dst) 2512 return 0; // No need to borrow so exit early. 2513 src = 1; // We have to "borrow 1" from next "word" 2514 } 2515 2516 return 1; 2517 } 2518 2519 /* Negate a bignum in-place. */ 2520 void APInt::tcNegate(WordType *dst, unsigned parts) { 2521 tcComplement(dst, parts); 2522 tcIncrement(dst, parts); 2523 } 2524 2525 /* DST += SRC * MULTIPLIER + CARRY if add is true 2526 DST = SRC * MULTIPLIER + CARRY if add is false 2527 2528 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2529 they must start at the same point, i.e. DST == SRC. 2530 2531 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2532 returned. Otherwise DST is filled with the least significant 2533 DSTPARTS parts of the result, and if all of the omitted higher 2534 parts were zero return zero, otherwise overflow occurred and 2535 return one. */ 2536 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2537 WordType multiplier, WordType carry, 2538 unsigned srcParts, unsigned dstParts, 2539 bool add) { 2540 /* Otherwise our writes of DST kill our later reads of SRC. */ 2541 assert(dst <= src || dst >= src + srcParts); 2542 assert(dstParts <= srcParts + 1); 2543 2544 /* N loops; minimum of dstParts and srcParts. */ 2545 unsigned n = std::min(dstParts, srcParts); 2546 2547 for (unsigned i = 0; i < n; i++) { 2548 WordType low, mid, high, srcPart; 2549 2550 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2551 2552 This cannot overflow, because 2553 2554 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2555 2556 which is less than n^2. */ 2557 2558 srcPart = src[i]; 2559 2560 if (multiplier == 0 || srcPart == 0) { 2561 low = carry; 2562 high = 0; 2563 } else { 2564 low = lowHalf(srcPart) * lowHalf(multiplier); 2565 high = highHalf(srcPart) * highHalf(multiplier); 2566 2567 mid = lowHalf(srcPart) * highHalf(multiplier); 2568 high += highHalf(mid); 2569 mid <<= APINT_BITS_PER_WORD / 2; 2570 if (low + mid < low) 2571 high++; 2572 low += mid; 2573 2574 mid = highHalf(srcPart) * lowHalf(multiplier); 2575 high += highHalf(mid); 2576 mid <<= APINT_BITS_PER_WORD / 2; 2577 if (low + mid < low) 2578 high++; 2579 low += mid; 2580 2581 /* Now add carry. */ 2582 if (low + carry < low) 2583 high++; 2584 low += carry; 2585 } 2586 2587 if (add) { 2588 /* And now DST[i], and store the new low part there. */ 2589 if (low + dst[i] < low) 2590 high++; 2591 dst[i] += low; 2592 } else 2593 dst[i] = low; 2594 2595 carry = high; 2596 } 2597 2598 if (srcParts < dstParts) { 2599 /* Full multiplication, there is no overflow. */ 2600 assert(srcParts + 1 == dstParts); 2601 dst[srcParts] = carry; 2602 return 0; 2603 } 2604 2605 /* We overflowed if there is carry. */ 2606 if (carry) 2607 return 1; 2608 2609 /* We would overflow if any significant unwritten parts would be 2610 non-zero. This is true if any remaining src parts are non-zero 2611 and the multiplier is non-zero. */ 2612 if (multiplier) 2613 for (unsigned i = dstParts; i < srcParts; i++) 2614 if (src[i]) 2615 return 1; 2616 2617 /* We fitted in the narrow destination. */ 2618 return 0; 2619 } 2620 2621 /* DST = LHS * RHS, where DST has the same width as the operands and 2622 is filled with the least significant parts of the result. Returns 2623 one if overflow occurred, otherwise zero. DST must be disjoint 2624 from both operands. */ 2625 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2626 const WordType *rhs, unsigned parts) { 2627 assert(dst != lhs && dst != rhs); 2628 2629 int overflow = 0; 2630 tcSet(dst, 0, parts); 2631 2632 for (unsigned i = 0; i < parts; i++) 2633 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2634 parts - i, true); 2635 2636 return overflow; 2637 } 2638 2639 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2640 /// operands. No overflow occurs. DST must be disjoint from both operands. 2641 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2642 const WordType *rhs, unsigned lhsParts, 2643 unsigned rhsParts) { 2644 /* Put the narrower number on the LHS for less loops below. */ 2645 if (lhsParts > rhsParts) 2646 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2647 2648 assert(dst != lhs && dst != rhs); 2649 2650 tcSet(dst, 0, rhsParts); 2651 2652 for (unsigned i = 0; i < lhsParts; i++) 2653 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2654 } 2655 2656 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2657 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2658 set REMAINDER to the remainder, return zero. i.e. 2659 2660 OLD_LHS = RHS * LHS + REMAINDER 2661 2662 SCRATCH is a bignum of the same size as the operands and result for 2663 use by the routine; its contents need not be initialized and are 2664 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2665 */ 2666 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2667 WordType *remainder, WordType *srhs, 2668 unsigned parts) { 2669 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2670 2671 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2672 if (shiftCount == 0) 2673 return true; 2674 2675 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2676 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2677 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2678 2679 tcAssign(srhs, rhs, parts); 2680 tcShiftLeft(srhs, parts, shiftCount); 2681 tcAssign(remainder, lhs, parts); 2682 tcSet(lhs, 0, parts); 2683 2684 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2685 the total. */ 2686 for (;;) { 2687 int compare = tcCompare(remainder, srhs, parts); 2688 if (compare >= 0) { 2689 tcSubtract(remainder, srhs, 0, parts); 2690 lhs[n] |= mask; 2691 } 2692 2693 if (shiftCount == 0) 2694 break; 2695 shiftCount--; 2696 tcShiftRight(srhs, parts, 1); 2697 if ((mask >>= 1) == 0) { 2698 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2699 n--; 2700 } 2701 } 2702 2703 return false; 2704 } 2705 2706 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2707 /// no restrictions on Count. 2708 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2709 // Don't bother performing a no-op shift. 2710 if (!Count) 2711 return; 2712 2713 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2714 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2715 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2716 2717 // Fastpath for moving by whole words. 2718 if (BitShift == 0) { 2719 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2720 } else { 2721 while (Words-- > WordShift) { 2722 Dst[Words] = Dst[Words - WordShift] << BitShift; 2723 if (Words > WordShift) 2724 Dst[Words] |= 2725 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2726 } 2727 } 2728 2729 // Fill in the remainder with 0s. 2730 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2731 } 2732 2733 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2734 /// are no restrictions on Count. 2735 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2736 // Don't bother performing a no-op shift. 2737 if (!Count) 2738 return; 2739 2740 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2741 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2742 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2743 2744 unsigned WordsToMove = Words - WordShift; 2745 // Fastpath for moving by whole words. 2746 if (BitShift == 0) { 2747 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2748 } else { 2749 for (unsigned i = 0; i != WordsToMove; ++i) { 2750 Dst[i] = Dst[i + WordShift] >> BitShift; 2751 if (i + 1 != WordsToMove) 2752 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2753 } 2754 } 2755 2756 // Fill in the remainder with 0s. 2757 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2758 } 2759 2760 /* Bitwise and of two bignums. */ 2761 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2762 for (unsigned i = 0; i < parts; i++) 2763 dst[i] &= rhs[i]; 2764 } 2765 2766 /* Bitwise inclusive or of two bignums. */ 2767 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2768 for (unsigned i = 0; i < parts; i++) 2769 dst[i] |= rhs[i]; 2770 } 2771 2772 /* Bitwise exclusive or of two bignums. */ 2773 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2774 for (unsigned i = 0; i < parts; i++) 2775 dst[i] ^= rhs[i]; 2776 } 2777 2778 /* Complement a bignum in-place. */ 2779 void APInt::tcComplement(WordType *dst, unsigned parts) { 2780 for (unsigned i = 0; i < parts; i++) 2781 dst[i] = ~dst[i]; 2782 } 2783 2784 /* Comparison (unsigned) of two bignums. */ 2785 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2786 unsigned parts) { 2787 while (parts) { 2788 parts--; 2789 if (lhs[parts] != rhs[parts]) 2790 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2791 } 2792 2793 return 0; 2794 } 2795 2796 /* Set the least significant BITS bits of a bignum, clear the 2797 rest. */ 2798 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2799 unsigned bits) { 2800 unsigned i = 0; 2801 while (bits > APINT_BITS_PER_WORD) { 2802 dst[i++] = ~(WordType) 0; 2803 bits -= APINT_BITS_PER_WORD; 2804 } 2805 2806 if (bits) 2807 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2808 2809 while (i < parts) 2810 dst[i++] = 0; 2811 } 2812 2813 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2814 APInt::Rounding RM) { 2815 // Currently udivrem always rounds down. 2816 switch (RM) { 2817 case APInt::Rounding::DOWN: 2818 case APInt::Rounding::TOWARD_ZERO: 2819 return A.udiv(B); 2820 case APInt::Rounding::UP: { 2821 APInt Quo, Rem; 2822 APInt::udivrem(A, B, Quo, Rem); 2823 if (Rem == 0) 2824 return Quo; 2825 return Quo + 1; 2826 } 2827 } 2828 llvm_unreachable("Unknown APInt::Rounding enum"); 2829 } 2830 2831 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2832 APInt::Rounding RM) { 2833 switch (RM) { 2834 case APInt::Rounding::DOWN: 2835 case APInt::Rounding::UP: { 2836 APInt Quo, Rem; 2837 APInt::sdivrem(A, B, Quo, Rem); 2838 if (Rem == 0) 2839 return Quo; 2840 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2841 // We want to check whether the non-integer part of the mathematical value 2842 // is negative or not. If the non-integer part is negative, we need to round 2843 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2844 // already rounded down. 2845 if (RM == APInt::Rounding::DOWN) { 2846 if (Rem.isNegative() != B.isNegative()) 2847 return Quo - 1; 2848 return Quo; 2849 } 2850 if (Rem.isNegative() != B.isNegative()) 2851 return Quo; 2852 return Quo + 1; 2853 } 2854 // Currently sdiv rounds towards zero. 2855 case APInt::Rounding::TOWARD_ZERO: 2856 return A.sdiv(B); 2857 } 2858 llvm_unreachable("Unknown APInt::Rounding enum"); 2859 } 2860 2861 Optional<APInt> 2862 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2863 unsigned RangeWidth) { 2864 unsigned CoeffWidth = A.getBitWidth(); 2865 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2866 assert(RangeWidth <= CoeffWidth && 2867 "Value range width should be less than coefficient width"); 2868 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2869 2870 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2871 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2872 2873 // Identify 0 as a (non)solution immediately. 2874 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2875 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2876 return APInt(CoeffWidth, 0); 2877 } 2878 2879 // The result of APInt arithmetic has the same bit width as the operands, 2880 // so it can actually lose high bits. A product of two n-bit integers needs 2881 // 2n-1 bits to represent the full value. 2882 // The operation done below (on quadratic coefficients) that can produce 2883 // the largest value is the evaluation of the equation during bisection, 2884 // which needs 3 times the bitwidth of the coefficient, so the total number 2885 // of required bits is 3n. 2886 // 2887 // The purpose of this extension is to simulate the set Z of all integers, 2888 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2889 // and negative numbers (not so much in a modulo arithmetic). The method 2890 // used to solve the equation is based on the standard formula for real 2891 // numbers, and uses the concepts of "positive" and "negative" with their 2892 // usual meanings. 2893 CoeffWidth *= 3; 2894 A = A.sext(CoeffWidth); 2895 B = B.sext(CoeffWidth); 2896 C = C.sext(CoeffWidth); 2897 2898 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2899 // the bit width has increased. 2900 if (A.isNegative()) { 2901 A.negate(); 2902 B.negate(); 2903 C.negate(); 2904 } 2905 2906 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2907 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2908 // and R = 2^BitWidth. 2909 // Since we're trying not only to find exact solutions, but also values 2910 // that "wrap around", such a set will always have a solution, i.e. an x 2911 // that satisfies at least one of the equations, or such that |q(x)| 2912 // exceeds kR, while |q(x-1)| for the same k does not. 2913 // 2914 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2915 // positive solution n (in the above sense), and also such that the n 2916 // will be the least among all solutions corresponding to k = 0, 1, ... 2917 // (more precisely, the least element in the set 2918 // { n(k) | k is such that a solution n(k) exists }). 2919 // 2920 // Consider the parabola (over real numbers) that corresponds to the 2921 // quadratic equation. Since A > 0, the arms of the parabola will point 2922 // up. Picking different values of k will shift it up and down by R. 2923 // 2924 // We want to shift the parabola in such a way as to reduce the problem 2925 // of solving q(x) = kR to solving shifted_q(x) = 0. 2926 // (The interesting solutions are the ceilings of the real number 2927 // solutions.) 2928 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2929 APInt TwoA = 2 * A; 2930 APInt SqrB = B * B; 2931 bool PickLow; 2932 2933 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2934 assert(A.isStrictlyPositive()); 2935 APInt T = V.abs().urem(A); 2936 if (T.isNullValue()) 2937 return V; 2938 return V.isNegative() ? V+T : V+(A-T); 2939 }; 2940 2941 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2942 // iff B is positive. 2943 if (B.isNonNegative()) { 2944 // If B >= 0, the vertex it at a negative location (or at 0), so in 2945 // order to have a non-negative solution we need to pick k that makes 2946 // C-kR negative. To satisfy all the requirements for the solution 2947 // that we are looking for, it needs to be closest to 0 of all k. 2948 C = C.srem(R); 2949 if (C.isStrictlyPositive()) 2950 C -= R; 2951 // Pick the greater solution. 2952 PickLow = false; 2953 } else { 2954 // If B < 0, the vertex is at a positive location. For any solution 2955 // to exist, the discriminant must be non-negative. This means that 2956 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2957 // lower bound on values of k: kR >= C - B^2/4A. 2958 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2959 // Round LowkR up (towards +inf) to the nearest kR. 2960 LowkR = RoundUp(LowkR, R); 2961 2962 // If there exists k meeting the condition above, and such that 2963 // C-kR > 0, there will be two positive real number solutions of 2964 // q(x) = kR. Out of all such values of k, pick the one that makes 2965 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2966 // In other words, find maximum k such that LowkR <= kR < C. 2967 if (C.sgt(LowkR)) { 2968 // If LowkR < C, then such a k is guaranteed to exist because 2969 // LowkR itself is a multiple of R. 2970 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2971 // Pick the smaller solution. 2972 PickLow = true; 2973 } else { 2974 // If C-kR < 0 for all potential k's, it means that one solution 2975 // will be negative, while the other will be positive. The positive 2976 // solution will shift towards 0 if the parabola is moved up. 2977 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2978 // to 0, or in other words, out of all parabolas that have solutions, 2979 // pick the one that is the farthest "up"). 2980 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2981 C -= LowkR; 2982 // Pick the greater solution. 2983 PickLow = false; 2984 } 2985 } 2986 2987 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2988 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2989 2990 APInt D = SqrB - 4*A*C; 2991 assert(D.isNonNegative() && "Negative discriminant"); 2992 APInt SQ = D.sqrt(); 2993 2994 APInt Q = SQ * SQ; 2995 bool InexactSQ = Q != D; 2996 // The calculated SQ may actually be greater than the exact (non-integer) 2997 // value. If that's the case, decrement SQ to get a value that is lower. 2998 if (Q.sgt(D)) 2999 SQ -= 1; 3000 3001 APInt X; 3002 APInt Rem; 3003 3004 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 3005 // When using the quadratic formula directly, the calculated low root 3006 // may be greater than the exact one, since we would be subtracting SQ. 3007 // To make sure that the calculated root is not greater than the exact 3008 // one, subtract SQ+1 when calculating the low root (for inexact value 3009 // of SQ). 3010 if (PickLow) 3011 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 3012 else 3013 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 3014 3015 // The updated coefficients should be such that the (exact) solution is 3016 // positive. Since APInt division rounds towards 0, the calculated one 3017 // can be 0, but cannot be negative. 3018 assert(X.isNonNegative() && "Solution should be non-negative"); 3019 3020 if (!InexactSQ && Rem.isNullValue()) { 3021 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 3022 return X; 3023 } 3024 3025 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 3026 // The exact value of the square root of D should be between SQ and SQ+1. 3027 // This implies that the solution should be between that corresponding to 3028 // SQ (i.e. X) and that corresponding to SQ+1. 3029 // 3030 // The calculated X cannot be greater than the exact (real) solution. 3031 // Actually it must be strictly less than the exact solution, while 3032 // X+1 will be greater than or equal to it. 3033 3034 APInt VX = (A*X + B)*X + C; 3035 APInt VY = VX + TwoA*X + A + B; 3036 bool SignChange = VX.isNegative() != VY.isNegative() || 3037 VX.isNullValue() != VY.isNullValue(); 3038 // If the sign did not change between X and X+1, X is not a valid solution. 3039 // This could happen when the actual (exact) roots don't have an integer 3040 // between them, so they would both be contained between X and X+1. 3041 if (!SignChange) { 3042 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 3043 return None; 3044 } 3045 3046 X += 1; 3047 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 3048 return X; 3049 } 3050 3051 Optional<unsigned> 3052 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { 3053 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); 3054 if (A == B) 3055 return llvm::None; 3056 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); 3057 } 3058 3059 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst 3060 /// with the integer held in IntVal. 3061 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, 3062 unsigned StoreBytes) { 3063 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); 3064 const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); 3065 3066 if (sys::IsLittleEndianHost) { 3067 // Little-endian host - the source is ordered from LSB to MSB. Order the 3068 // destination from LSB to MSB: Do a straight copy. 3069 memcpy(Dst, Src, StoreBytes); 3070 } else { 3071 // Big-endian host - the source is an array of 64 bit words ordered from 3072 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination 3073 // from MSB to LSB: Reverse the word order, but not the bytes in a word. 3074 while (StoreBytes > sizeof(uint64_t)) { 3075 StoreBytes -= sizeof(uint64_t); 3076 // May not be aligned so use memcpy. 3077 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); 3078 Src += sizeof(uint64_t); 3079 } 3080 3081 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); 3082 } 3083 } 3084 3085 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting 3086 /// from Src into IntVal, which is assumed to be wide enough and to hold zero. 3087 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { 3088 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); 3089 uint8_t *Dst = reinterpret_cast<uint8_t *>( 3090 const_cast<uint64_t *>(IntVal.getRawData())); 3091 3092 if (sys::IsLittleEndianHost) 3093 // Little-endian host - the destination must be ordered from LSB to MSB. 3094 // The source is ordered from LSB to MSB: Do a straight copy. 3095 memcpy(Dst, Src, LoadBytes); 3096 else { 3097 // Big-endian - the destination is an array of 64 bit words ordered from 3098 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is 3099 // ordered from MSB to LSB: Reverse the word order, but not the bytes in 3100 // a word. 3101 while (LoadBytes > sizeof(uint64_t)) { 3102 LoadBytes -= sizeof(uint64_t); 3103 // May not be aligned so use memcpy. 3104 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); 3105 Dst += sizeof(uint64_t); 3106 } 3107 3108 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); 3109 } 3110 } 3111