1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision integer 10 // constant values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/FoldingSet.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/bit.h" 22 #include "llvm/Config/llvm-config.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <climits> 28 #include <cmath> 29 #include <cstdlib> 30 #include <cstring> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "apint" 34 35 /// A utility function for allocating memory, checking for allocation failures, 36 /// and ensuring the contents are zeroed. 37 inline static uint64_t* getClearedMemory(unsigned numWords) { 38 uint64_t *result = new uint64_t[numWords]; 39 memset(result, 0, numWords * sizeof(uint64_t)); 40 return result; 41 } 42 43 /// A utility function for allocating memory and checking for allocation 44 /// failure. The content is not zeroed. 45 inline static uint64_t* getMemory(unsigned numWords) { 46 return new uint64_t[numWords]; 47 } 48 49 /// A utility function that converts a character to a digit. 50 inline static unsigned getDigit(char cdigit, uint8_t radix) { 51 unsigned r; 52 53 if (radix == 16 || radix == 36) { 54 r = cdigit - '0'; 55 if (r <= 9) 56 return r; 57 58 r = cdigit - 'A'; 59 if (r <= radix - 11U) 60 return r + 10; 61 62 r = cdigit - 'a'; 63 if (r <= radix - 11U) 64 return r + 10; 65 66 radix = 10; 67 } 68 69 r = cdigit - '0'; 70 if (r < radix) 71 return r; 72 73 return -1U; 74 } 75 76 77 void APInt::initSlowCase(uint64_t val, bool isSigned) { 78 U.pVal = getClearedMemory(getNumWords()); 79 U.pVal[0] = val; 80 if (isSigned && int64_t(val) < 0) 81 for (unsigned i = 1; i < getNumWords(); ++i) 82 U.pVal[i] = WORDTYPE_MAX; 83 clearUnusedBits(); 84 } 85 86 void APInt::initSlowCase(const APInt& that) { 87 U.pVal = getMemory(getNumWords()); 88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 89 } 90 91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 92 assert(BitWidth && "Bitwidth too small"); 93 assert(bigVal.data() && "Null pointer detected!"); 94 if (isSingleWord()) 95 U.VAL = bigVal[0]; 96 else { 97 // Get memory, cleared to 0 98 U.pVal = getClearedMemory(getNumWords()); 99 // Calculate the number of words to copy 100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 101 // Copy the words from bigVal to pVal 102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 103 } 104 // Make sure unused high bits are cleared 105 clearUnusedBits(); 106 } 107 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 109 : BitWidth(numBits) { 110 initFromArray(bigVal); 111 } 112 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 114 : BitWidth(numBits) { 115 initFromArray(makeArrayRef(bigVal, numWords)); 116 } 117 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 119 : BitWidth(numbits) { 120 assert(BitWidth && "Bitwidth too small"); 121 fromString(numbits, Str, radix); 122 } 123 124 void APInt::reallocate(unsigned NewBitWidth) { 125 // If the number of words is the same we can just change the width and stop. 126 if (getNumWords() == getNumWords(NewBitWidth)) { 127 BitWidth = NewBitWidth; 128 return; 129 } 130 131 // If we have an allocation, delete it. 132 if (!isSingleWord()) 133 delete [] U.pVal; 134 135 // Update BitWidth. 136 BitWidth = NewBitWidth; 137 138 // If we are supposed to have an allocation, create it. 139 if (!isSingleWord()) 140 U.pVal = getMemory(getNumWords()); 141 } 142 143 void APInt::AssignSlowCase(const APInt& RHS) { 144 // Don't do anything for X = X 145 if (this == &RHS) 146 return; 147 148 // Adjust the bit width and handle allocations as necessary. 149 reallocate(RHS.getBitWidth()); 150 151 // Copy the data. 152 if (isSingleWord()) 153 U.VAL = RHS.U.VAL; 154 else 155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 156 } 157 158 /// This method 'profiles' an APInt for use with FoldingSet. 159 void APInt::Profile(FoldingSetNodeID& ID) const { 160 ID.AddInteger(BitWidth); 161 162 if (isSingleWord()) { 163 ID.AddInteger(U.VAL); 164 return; 165 } 166 167 unsigned NumWords = getNumWords(); 168 for (unsigned i = 0; i < NumWords; ++i) 169 ID.AddInteger(U.pVal[i]); 170 } 171 172 /// Prefix increment operator. Increments the APInt by one. 173 APInt& APInt::operator++() { 174 if (isSingleWord()) 175 ++U.VAL; 176 else 177 tcIncrement(U.pVal, getNumWords()); 178 return clearUnusedBits(); 179 } 180 181 /// Prefix decrement operator. Decrements the APInt by one. 182 APInt& APInt::operator--() { 183 if (isSingleWord()) 184 --U.VAL; 185 else 186 tcDecrement(U.pVal, getNumWords()); 187 return clearUnusedBits(); 188 } 189 190 /// Adds the RHS APInt to this APInt. 191 /// @returns this, after addition of RHS. 192 /// Addition assignment operator. 193 APInt& APInt::operator+=(const APInt& RHS) { 194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 195 if (isSingleWord()) 196 U.VAL += RHS.U.VAL; 197 else 198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 199 return clearUnusedBits(); 200 } 201 202 APInt& APInt::operator+=(uint64_t RHS) { 203 if (isSingleWord()) 204 U.VAL += RHS; 205 else 206 tcAddPart(U.pVal, RHS, getNumWords()); 207 return clearUnusedBits(); 208 } 209 210 /// Subtracts the RHS APInt from this APInt 211 /// @returns this, after subtraction 212 /// Subtraction assignment operator. 213 APInt& APInt::operator-=(const APInt& RHS) { 214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 215 if (isSingleWord()) 216 U.VAL -= RHS.U.VAL; 217 else 218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 219 return clearUnusedBits(); 220 } 221 222 APInt& APInt::operator-=(uint64_t RHS) { 223 if (isSingleWord()) 224 U.VAL -= RHS; 225 else 226 tcSubtractPart(U.pVal, RHS, getNumWords()); 227 return clearUnusedBits(); 228 } 229 230 APInt APInt::operator*(const APInt& RHS) const { 231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 232 if (isSingleWord()) 233 return APInt(BitWidth, U.VAL * RHS.U.VAL); 234 235 APInt Result(getMemory(getNumWords()), getBitWidth()); 236 237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 238 239 Result.clearUnusedBits(); 240 return Result; 241 } 242 243 void APInt::AndAssignSlowCase(const APInt& RHS) { 244 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 245 } 246 247 void APInt::OrAssignSlowCase(const APInt& RHS) { 248 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 249 } 250 251 void APInt::XorAssignSlowCase(const APInt& RHS) { 252 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 253 } 254 255 APInt& APInt::operator*=(const APInt& RHS) { 256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 257 *this = *this * RHS; 258 return *this; 259 } 260 261 APInt& APInt::operator*=(uint64_t RHS) { 262 if (isSingleWord()) { 263 U.VAL *= RHS; 264 } else { 265 unsigned NumWords = getNumWords(); 266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 267 } 268 return clearUnusedBits(); 269 } 270 271 bool APInt::EqualSlowCase(const APInt& RHS) const { 272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 273 } 274 275 int APInt::compare(const APInt& RHS) const { 276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 277 if (isSingleWord()) 278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 279 280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 281 } 282 283 int APInt::compareSigned(const APInt& RHS) const { 284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 285 if (isSingleWord()) { 286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 289 } 290 291 bool lhsNeg = isNegative(); 292 bool rhsNeg = RHS.isNegative(); 293 294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 295 if (lhsNeg != rhsNeg) 296 return lhsNeg ? -1 : 1; 297 298 // Otherwise we can just use an unsigned comparison, because even negative 299 // numbers compare correctly this way if both have the same signed-ness. 300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 301 } 302 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 304 unsigned loWord = whichWord(loBit); 305 unsigned hiWord = whichWord(hiBit); 306 307 // Create an initial mask for the low word with zeros below loBit. 308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 309 310 // If hiBit is not aligned, we need a high mask. 311 unsigned hiShiftAmt = whichBit(hiBit); 312 if (hiShiftAmt != 0) { 313 // Create a high mask with zeros above hiBit. 314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 315 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 316 // set the bits in hiWord. 317 if (hiWord == loWord) 318 loMask &= hiMask; 319 else 320 U.pVal[hiWord] |= hiMask; 321 } 322 // Apply the mask to the low word. 323 U.pVal[loWord] |= loMask; 324 325 // Fill any words between loWord and hiWord with all ones. 326 for (unsigned word = loWord + 1; word < hiWord; ++word) 327 U.pVal[word] = WORDTYPE_MAX; 328 } 329 330 /// Toggle every bit to its opposite value. 331 void APInt::flipAllBitsSlowCase() { 332 tcComplement(U.pVal, getNumWords()); 333 clearUnusedBits(); 334 } 335 336 /// Toggle a given bit to its opposite value whose position is given 337 /// as "bitPosition". 338 /// Toggles a given bit to its opposite value. 339 void APInt::flipBit(unsigned bitPosition) { 340 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 341 setBitVal(bitPosition, !(*this)[bitPosition]); 342 } 343 344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 345 unsigned subBitWidth = subBits.getBitWidth(); 346 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 347 "Illegal bit insertion"); 348 349 // Insertion is a direct copy. 350 if (subBitWidth == BitWidth) { 351 *this = subBits; 352 return; 353 } 354 355 // Single word result can be done as a direct bitmask. 356 if (isSingleWord()) { 357 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 358 U.VAL &= ~(mask << bitPosition); 359 U.VAL |= (subBits.U.VAL << bitPosition); 360 return; 361 } 362 363 unsigned loBit = whichBit(bitPosition); 364 unsigned loWord = whichWord(bitPosition); 365 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 366 367 // Insertion within a single word can be done as a direct bitmask. 368 if (loWord == hi1Word) { 369 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 370 U.pVal[loWord] &= ~(mask << loBit); 371 U.pVal[loWord] |= (subBits.U.VAL << loBit); 372 return; 373 } 374 375 // Insert on word boundaries. 376 if (loBit == 0) { 377 // Direct copy whole words. 378 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 379 memcpy(U.pVal + loWord, subBits.getRawData(), 380 numWholeSubWords * APINT_WORD_SIZE); 381 382 // Mask+insert remaining bits. 383 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 384 if (remainingBits != 0) { 385 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 386 U.pVal[hi1Word] &= ~mask; 387 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 388 } 389 return; 390 } 391 392 // General case - set/clear individual bits in dst based on src. 393 // TODO - there is scope for optimization here, but at the moment this code 394 // path is barely used so prefer readability over performance. 395 for (unsigned i = 0; i != subBitWidth; ++i) 396 setBitVal(bitPosition + i, subBits[i]); 397 } 398 399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { 400 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 401 subBits &= maskBits; 402 if (isSingleWord()) { 403 U.VAL &= ~(maskBits << bitPosition); 404 U.VAL |= subBits << bitPosition; 405 return; 406 } 407 408 unsigned loBit = whichBit(bitPosition); 409 unsigned loWord = whichWord(bitPosition); 410 unsigned hiWord = whichWord(bitPosition + numBits - 1); 411 if (loWord == hiWord) { 412 U.pVal[loWord] &= ~(maskBits << loBit); 413 U.pVal[loWord] |= subBits << loBit; 414 return; 415 } 416 417 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 418 unsigned wordBits = 8 * sizeof(WordType); 419 U.pVal[loWord] &= ~(maskBits << loBit); 420 U.pVal[loWord] |= subBits << loBit; 421 422 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); 423 U.pVal[hiWord] |= subBits >> (wordBits - loBit); 424 } 425 426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 427 assert(numBits > 0 && "Can't extract zero bits"); 428 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 429 "Illegal bit extraction"); 430 431 if (isSingleWord()) 432 return APInt(numBits, U.VAL >> bitPosition); 433 434 unsigned loBit = whichBit(bitPosition); 435 unsigned loWord = whichWord(bitPosition); 436 unsigned hiWord = whichWord(bitPosition + numBits - 1); 437 438 // Single word result extracting bits from a single word source. 439 if (loWord == hiWord) 440 return APInt(numBits, U.pVal[loWord] >> loBit); 441 442 // Extracting bits that start on a source word boundary can be done 443 // as a fast memory copy. 444 if (loBit == 0) 445 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 446 447 // General case - shift + copy source words directly into place. 448 APInt Result(numBits, 0); 449 unsigned NumSrcWords = getNumWords(); 450 unsigned NumDstWords = Result.getNumWords(); 451 452 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 453 for (unsigned word = 0; word < NumDstWords; ++word) { 454 uint64_t w0 = U.pVal[loWord + word]; 455 uint64_t w1 = 456 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 457 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 458 } 459 460 return Result.clearUnusedBits(); 461 } 462 463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, 464 unsigned bitPosition) const { 465 assert(numBits > 0 && "Can't extract zero bits"); 466 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 467 "Illegal bit extraction"); 468 assert(numBits <= 64 && "Illegal bit extraction"); 469 470 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 471 if (isSingleWord()) 472 return (U.VAL >> bitPosition) & maskBits; 473 474 unsigned loBit = whichBit(bitPosition); 475 unsigned loWord = whichWord(bitPosition); 476 unsigned hiWord = whichWord(bitPosition + numBits - 1); 477 if (loWord == hiWord) 478 return (U.pVal[loWord] >> loBit) & maskBits; 479 480 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 481 unsigned wordBits = 8 * sizeof(WordType); 482 uint64_t retBits = U.pVal[loWord] >> loBit; 483 retBits |= U.pVal[hiWord] << (wordBits - loBit); 484 retBits &= maskBits; 485 return retBits; 486 } 487 488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 489 assert(!str.empty() && "Invalid string length"); 490 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 491 radix == 36) && 492 "Radix should be 2, 8, 10, 16, or 36!"); 493 494 size_t slen = str.size(); 495 496 // Each computation below needs to know if it's negative. 497 StringRef::iterator p = str.begin(); 498 unsigned isNegative = *p == '-'; 499 if (*p == '-' || *p == '+') { 500 p++; 501 slen--; 502 assert(slen && "String is only a sign, needs a value."); 503 } 504 505 // For radixes of power-of-two values, the bits required is accurately and 506 // easily computed 507 if (radix == 2) 508 return slen + isNegative; 509 if (radix == 8) 510 return slen * 3 + isNegative; 511 if (radix == 16) 512 return slen * 4 + isNegative; 513 514 // FIXME: base 36 515 516 // This is grossly inefficient but accurate. We could probably do something 517 // with a computation of roughly slen*64/20 and then adjust by the value of 518 // the first few digits. But, I'm not sure how accurate that could be. 519 520 // Compute a sufficient number of bits that is always large enough but might 521 // be too large. This avoids the assertion in the constructor. This 522 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 523 // bits in that case. 524 unsigned sufficient 525 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 526 : (slen == 1 ? 7 : slen * 16/3); 527 528 // Convert to the actual binary value. 529 APInt tmp(sufficient, StringRef(p, slen), radix); 530 531 // Compute how many bits are required. If the log is infinite, assume we need 532 // just bit. If the log is exact and value is negative, then the value is 533 // MinSignedValue with (log + 1) bits. 534 unsigned log = tmp.logBase2(); 535 if (log == (unsigned)-1) { 536 return isNegative + 1; 537 } else if (isNegative && tmp.isPowerOf2()) { 538 return isNegative + log; 539 } else { 540 return isNegative + log + 1; 541 } 542 } 543 544 hash_code llvm::hash_value(const APInt &Arg) { 545 if (Arg.isSingleWord()) 546 return hash_combine(Arg.BitWidth, Arg.U.VAL); 547 548 return hash_combine( 549 Arg.BitWidth, 550 hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords())); 551 } 552 553 bool APInt::isSplat(unsigned SplatSizeInBits) const { 554 assert(getBitWidth() % SplatSizeInBits == 0 && 555 "SplatSizeInBits must divide width!"); 556 // We can check that all parts of an integer are equal by making use of a 557 // little trick: rotate and check if it's still the same value. 558 return *this == rotl(SplatSizeInBits); 559 } 560 561 /// This function returns the high "numBits" bits of this APInt. 562 APInt APInt::getHiBits(unsigned numBits) const { 563 return this->lshr(BitWidth - numBits); 564 } 565 566 /// This function returns the low "numBits" bits of this APInt. 567 APInt APInt::getLoBits(unsigned numBits) const { 568 APInt Result(getLowBitsSet(BitWidth, numBits)); 569 Result &= *this; 570 return Result; 571 } 572 573 /// Return a value containing V broadcasted over NewLen bits. 574 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 575 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 576 577 APInt Val = V.zextOrSelf(NewLen); 578 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 579 Val |= Val << I; 580 581 return Val; 582 } 583 584 unsigned APInt::countLeadingZerosSlowCase() const { 585 unsigned Count = 0; 586 for (int i = getNumWords()-1; i >= 0; --i) { 587 uint64_t V = U.pVal[i]; 588 if (V == 0) 589 Count += APINT_BITS_PER_WORD; 590 else { 591 Count += llvm::countLeadingZeros(V); 592 break; 593 } 594 } 595 // Adjust for unused bits in the most significant word (they are zero). 596 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 597 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 598 return Count; 599 } 600 601 unsigned APInt::countLeadingOnesSlowCase() const { 602 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 603 unsigned shift; 604 if (!highWordBits) { 605 highWordBits = APINT_BITS_PER_WORD; 606 shift = 0; 607 } else { 608 shift = APINT_BITS_PER_WORD - highWordBits; 609 } 610 int i = getNumWords() - 1; 611 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 612 if (Count == highWordBits) { 613 for (i--; i >= 0; --i) { 614 if (U.pVal[i] == WORDTYPE_MAX) 615 Count += APINT_BITS_PER_WORD; 616 else { 617 Count += llvm::countLeadingOnes(U.pVal[i]); 618 break; 619 } 620 } 621 } 622 return Count; 623 } 624 625 unsigned APInt::countTrailingZerosSlowCase() const { 626 unsigned Count = 0; 627 unsigned i = 0; 628 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 629 Count += APINT_BITS_PER_WORD; 630 if (i < getNumWords()) 631 Count += llvm::countTrailingZeros(U.pVal[i]); 632 return std::min(Count, BitWidth); 633 } 634 635 unsigned APInt::countTrailingOnesSlowCase() const { 636 unsigned Count = 0; 637 unsigned i = 0; 638 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 639 Count += APINT_BITS_PER_WORD; 640 if (i < getNumWords()) 641 Count += llvm::countTrailingOnes(U.pVal[i]); 642 assert(Count <= BitWidth); 643 return Count; 644 } 645 646 unsigned APInt::countPopulationSlowCase() const { 647 unsigned Count = 0; 648 for (unsigned i = 0; i < getNumWords(); ++i) 649 Count += llvm::countPopulation(U.pVal[i]); 650 return Count; 651 } 652 653 bool APInt::intersectsSlowCase(const APInt &RHS) const { 654 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 655 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 656 return true; 657 658 return false; 659 } 660 661 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 662 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 663 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 664 return false; 665 666 return true; 667 } 668 669 APInt APInt::byteSwap() const { 670 assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!"); 671 if (BitWidth == 16) 672 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 673 if (BitWidth == 32) 674 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 675 if (BitWidth <= 64) { 676 uint64_t Tmp1 = ByteSwap_64(U.VAL); 677 Tmp1 >>= (64 - BitWidth); 678 return APInt(BitWidth, Tmp1); 679 } 680 681 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 682 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 683 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 684 if (Result.BitWidth != BitWidth) { 685 Result.lshrInPlace(Result.BitWidth - BitWidth); 686 Result.BitWidth = BitWidth; 687 } 688 return Result; 689 } 690 691 APInt APInt::reverseBits() const { 692 switch (BitWidth) { 693 case 64: 694 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 695 case 32: 696 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 697 case 16: 698 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 699 case 8: 700 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 701 default: 702 break; 703 } 704 705 APInt Val(*this); 706 APInt Reversed(BitWidth, 0); 707 unsigned S = BitWidth; 708 709 for (; Val != 0; Val.lshrInPlace(1)) { 710 Reversed <<= 1; 711 Reversed |= Val[0]; 712 --S; 713 } 714 715 Reversed <<= S; 716 return Reversed; 717 } 718 719 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 720 // Fast-path a common case. 721 if (A == B) return A; 722 723 // Corner cases: if either operand is zero, the other is the gcd. 724 if (!A) return B; 725 if (!B) return A; 726 727 // Count common powers of 2 and remove all other powers of 2. 728 unsigned Pow2; 729 { 730 unsigned Pow2_A = A.countTrailingZeros(); 731 unsigned Pow2_B = B.countTrailingZeros(); 732 if (Pow2_A > Pow2_B) { 733 A.lshrInPlace(Pow2_A - Pow2_B); 734 Pow2 = Pow2_B; 735 } else if (Pow2_B > Pow2_A) { 736 B.lshrInPlace(Pow2_B - Pow2_A); 737 Pow2 = Pow2_A; 738 } else { 739 Pow2 = Pow2_A; 740 } 741 } 742 743 // Both operands are odd multiples of 2^Pow_2: 744 // 745 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 746 // 747 // This is a modified version of Stein's algorithm, taking advantage of 748 // efficient countTrailingZeros(). 749 while (A != B) { 750 if (A.ugt(B)) { 751 A -= B; 752 A.lshrInPlace(A.countTrailingZeros() - Pow2); 753 } else { 754 B -= A; 755 B.lshrInPlace(B.countTrailingZeros() - Pow2); 756 } 757 } 758 759 return A; 760 } 761 762 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 763 uint64_t I = bit_cast<uint64_t>(Double); 764 765 // Get the sign bit from the highest order bit 766 bool isNeg = I >> 63; 767 768 // Get the 11-bit exponent and adjust for the 1023 bit bias 769 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 770 771 // If the exponent is negative, the value is < 0 so just return 0. 772 if (exp < 0) 773 return APInt(width, 0u); 774 775 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 776 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 777 778 // If the exponent doesn't shift all bits out of the mantissa 779 if (exp < 52) 780 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 781 APInt(width, mantissa >> (52 - exp)); 782 783 // If the client didn't provide enough bits for us to shift the mantissa into 784 // then the result is undefined, just return 0 785 if (width <= exp - 52) 786 return APInt(width, 0); 787 788 // Otherwise, we have to shift the mantissa bits up to the right location 789 APInt Tmp(width, mantissa); 790 Tmp <<= (unsigned)exp - 52; 791 return isNeg ? -Tmp : Tmp; 792 } 793 794 /// This function converts this APInt to a double. 795 /// The layout for double is as following (IEEE Standard 754): 796 /// -------------------------------------- 797 /// | Sign Exponent Fraction Bias | 798 /// |-------------------------------------- | 799 /// | 1[63] 11[62-52] 52[51-00] 1023 | 800 /// -------------------------------------- 801 double APInt::roundToDouble(bool isSigned) const { 802 803 // Handle the simple case where the value is contained in one uint64_t. 804 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 805 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 806 if (isSigned) { 807 int64_t sext = SignExtend64(getWord(0), BitWidth); 808 return double(sext); 809 } else 810 return double(getWord(0)); 811 } 812 813 // Determine if the value is negative. 814 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 815 816 // Construct the absolute value if we're negative. 817 APInt Tmp(isNeg ? -(*this) : (*this)); 818 819 // Figure out how many bits we're using. 820 unsigned n = Tmp.getActiveBits(); 821 822 // The exponent (without bias normalization) is just the number of bits 823 // we are using. Note that the sign bit is gone since we constructed the 824 // absolute value. 825 uint64_t exp = n; 826 827 // Return infinity for exponent overflow 828 if (exp > 1023) { 829 if (!isSigned || !isNeg) 830 return std::numeric_limits<double>::infinity(); 831 else 832 return -std::numeric_limits<double>::infinity(); 833 } 834 exp += 1023; // Increment for 1023 bias 835 836 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 837 // extract the high 52 bits from the correct words in pVal. 838 uint64_t mantissa; 839 unsigned hiWord = whichWord(n-1); 840 if (hiWord == 0) { 841 mantissa = Tmp.U.pVal[0]; 842 if (n > 52) 843 mantissa >>= n - 52; // shift down, we want the top 52 bits. 844 } else { 845 assert(hiWord > 0 && "huh?"); 846 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 847 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 848 mantissa = hibits | lobits; 849 } 850 851 // The leading bit of mantissa is implicit, so get rid of it. 852 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 853 uint64_t I = sign | (exp << 52) | mantissa; 854 return bit_cast<double>(I); 855 } 856 857 // Truncate to new width. 858 APInt APInt::trunc(unsigned width) const { 859 assert(width < BitWidth && "Invalid APInt Truncate request"); 860 assert(width && "Can't truncate to 0 bits"); 861 862 if (width <= APINT_BITS_PER_WORD) 863 return APInt(width, getRawData()[0]); 864 865 APInt Result(getMemory(getNumWords(width)), width); 866 867 // Copy full words. 868 unsigned i; 869 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 870 Result.U.pVal[i] = U.pVal[i]; 871 872 // Truncate and copy any partial word. 873 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 874 if (bits != 0) 875 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 876 877 return Result; 878 } 879 880 // Truncate to new width with unsigned saturation. 881 APInt APInt::truncUSat(unsigned width) const { 882 assert(width < BitWidth && "Invalid APInt Truncate request"); 883 assert(width && "Can't truncate to 0 bits"); 884 885 // Can we just losslessly truncate it? 886 if (isIntN(width)) 887 return trunc(width); 888 // If not, then just return the new limit. 889 return APInt::getMaxValue(width); 890 } 891 892 // Truncate to new width with signed saturation. 893 APInt APInt::truncSSat(unsigned width) const { 894 assert(width < BitWidth && "Invalid APInt Truncate request"); 895 assert(width && "Can't truncate to 0 bits"); 896 897 // Can we just losslessly truncate it? 898 if (isSignedIntN(width)) 899 return trunc(width); 900 // If not, then just return the new limits. 901 return isNegative() ? APInt::getSignedMinValue(width) 902 : APInt::getSignedMaxValue(width); 903 } 904 905 // Sign extend to a new width. 906 APInt APInt::sext(unsigned Width) const { 907 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 908 909 if (Width <= APINT_BITS_PER_WORD) 910 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 911 912 APInt Result(getMemory(getNumWords(Width)), Width); 913 914 // Copy words. 915 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 916 917 // Sign extend the last word since there may be unused bits in the input. 918 Result.U.pVal[getNumWords() - 1] = 919 SignExtend64(Result.U.pVal[getNumWords() - 1], 920 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 921 922 // Fill with sign bits. 923 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 924 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 925 Result.clearUnusedBits(); 926 return Result; 927 } 928 929 // Zero extend to a new width. 930 APInt APInt::zext(unsigned width) const { 931 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 932 933 if (width <= APINT_BITS_PER_WORD) 934 return APInt(width, U.VAL); 935 936 APInt Result(getMemory(getNumWords(width)), width); 937 938 // Copy words. 939 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 940 941 // Zero remaining words. 942 std::memset(Result.U.pVal + getNumWords(), 0, 943 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 944 945 return Result; 946 } 947 948 APInt APInt::zextOrTrunc(unsigned width) const { 949 if (BitWidth < width) 950 return zext(width); 951 if (BitWidth > width) 952 return trunc(width); 953 return *this; 954 } 955 956 APInt APInt::sextOrTrunc(unsigned width) const { 957 if (BitWidth < width) 958 return sext(width); 959 if (BitWidth > width) 960 return trunc(width); 961 return *this; 962 } 963 964 APInt APInt::zextOrSelf(unsigned width) const { 965 if (BitWidth < width) 966 return zext(width); 967 return *this; 968 } 969 970 APInt APInt::sextOrSelf(unsigned width) const { 971 if (BitWidth < width) 972 return sext(width); 973 return *this; 974 } 975 976 /// Arithmetic right-shift this APInt by shiftAmt. 977 /// Arithmetic right-shift function. 978 void APInt::ashrInPlace(const APInt &shiftAmt) { 979 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 980 } 981 982 /// Arithmetic right-shift this APInt by shiftAmt. 983 /// Arithmetic right-shift function. 984 void APInt::ashrSlowCase(unsigned ShiftAmt) { 985 // Don't bother performing a no-op shift. 986 if (!ShiftAmt) 987 return; 988 989 // Save the original sign bit for later. 990 bool Negative = isNegative(); 991 992 // WordShift is the inter-part shift; BitShift is intra-part shift. 993 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 994 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 995 996 unsigned WordsToMove = getNumWords() - WordShift; 997 if (WordsToMove != 0) { 998 // Sign extend the last word to fill in the unused bits. 999 U.pVal[getNumWords() - 1] = SignExtend64( 1000 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 1001 1002 // Fastpath for moving by whole words. 1003 if (BitShift == 0) { 1004 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 1005 } else { 1006 // Move the words containing significant bits. 1007 for (unsigned i = 0; i != WordsToMove - 1; ++i) 1008 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 1009 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 1010 1011 // Handle the last word which has no high bits to copy. 1012 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 1013 // Sign extend one more time. 1014 U.pVal[WordsToMove - 1] = 1015 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 1016 } 1017 } 1018 1019 // Fill in the remainder based on the original sign. 1020 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 1021 WordShift * APINT_WORD_SIZE); 1022 clearUnusedBits(); 1023 } 1024 1025 /// Logical right-shift this APInt by shiftAmt. 1026 /// Logical right-shift function. 1027 void APInt::lshrInPlace(const APInt &shiftAmt) { 1028 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1029 } 1030 1031 /// Logical right-shift this APInt by shiftAmt. 1032 /// Logical right-shift function. 1033 void APInt::lshrSlowCase(unsigned ShiftAmt) { 1034 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 1035 } 1036 1037 /// Left-shift this APInt by shiftAmt. 1038 /// Left-shift function. 1039 APInt &APInt::operator<<=(const APInt &shiftAmt) { 1040 // It's undefined behavior in C to shift by BitWidth or greater. 1041 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 1042 return *this; 1043 } 1044 1045 void APInt::shlSlowCase(unsigned ShiftAmt) { 1046 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 1047 clearUnusedBits(); 1048 } 1049 1050 // Calculate the rotate amount modulo the bit width. 1051 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 1052 unsigned rotBitWidth = rotateAmt.getBitWidth(); 1053 APInt rot = rotateAmt; 1054 if (rotBitWidth < BitWidth) { 1055 // Extend the rotate APInt, so that the urem doesn't divide by 0. 1056 // e.g. APInt(1, 32) would give APInt(1, 0). 1057 rot = rotateAmt.zext(BitWidth); 1058 } 1059 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 1060 return rot.getLimitedValue(BitWidth); 1061 } 1062 1063 APInt APInt::rotl(const APInt &rotateAmt) const { 1064 return rotl(rotateModulo(BitWidth, rotateAmt)); 1065 } 1066 1067 APInt APInt::rotl(unsigned rotateAmt) const { 1068 rotateAmt %= BitWidth; 1069 if (rotateAmt == 0) 1070 return *this; 1071 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1072 } 1073 1074 APInt APInt::rotr(const APInt &rotateAmt) const { 1075 return rotr(rotateModulo(BitWidth, rotateAmt)); 1076 } 1077 1078 APInt APInt::rotr(unsigned rotateAmt) const { 1079 rotateAmt %= BitWidth; 1080 if (rotateAmt == 0) 1081 return *this; 1082 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1083 } 1084 1085 // Square Root - this method computes and returns the square root of "this". 1086 // Three mechanisms are used for computation. For small values (<= 5 bits), 1087 // a table lookup is done. This gets some performance for common cases. For 1088 // values using less than 52 bits, the value is converted to double and then 1089 // the libc sqrt function is called. The result is rounded and then converted 1090 // back to a uint64_t which is then used to construct the result. Finally, 1091 // the Babylonian method for computing square roots is used. 1092 APInt APInt::sqrt() const { 1093 1094 // Determine the magnitude of the value. 1095 unsigned magnitude = getActiveBits(); 1096 1097 // Use a fast table for some small values. This also gets rid of some 1098 // rounding errors in libc sqrt for small values. 1099 if (magnitude <= 5) { 1100 static const uint8_t results[32] = { 1101 /* 0 */ 0, 1102 /* 1- 2 */ 1, 1, 1103 /* 3- 6 */ 2, 2, 2, 2, 1104 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1105 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1106 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1107 /* 31 */ 6 1108 }; 1109 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1110 } 1111 1112 // If the magnitude of the value fits in less than 52 bits (the precision of 1113 // an IEEE double precision floating point value), then we can use the 1114 // libc sqrt function which will probably use a hardware sqrt computation. 1115 // This should be faster than the algorithm below. 1116 if (magnitude < 52) { 1117 return APInt(BitWidth, 1118 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1119 : U.pVal[0]))))); 1120 } 1121 1122 // Okay, all the short cuts are exhausted. We must compute it. The following 1123 // is a classical Babylonian method for computing the square root. This code 1124 // was adapted to APInt from a wikipedia article on such computations. 1125 // See http://www.wikipedia.org/ and go to the page named 1126 // Calculate_an_integer_square_root. 1127 unsigned nbits = BitWidth, i = 4; 1128 APInt testy(BitWidth, 16); 1129 APInt x_old(BitWidth, 1); 1130 APInt x_new(BitWidth, 0); 1131 APInt two(BitWidth, 2); 1132 1133 // Select a good starting value using binary logarithms. 1134 for (;; i += 2, testy = testy.shl(2)) 1135 if (i >= nbits || this->ule(testy)) { 1136 x_old = x_old.shl(i / 2); 1137 break; 1138 } 1139 1140 // Use the Babylonian method to arrive at the integer square root: 1141 for (;;) { 1142 x_new = (this->udiv(x_old) + x_old).udiv(two); 1143 if (x_old.ule(x_new)) 1144 break; 1145 x_old = x_new; 1146 } 1147 1148 // Make sure we return the closest approximation 1149 // NOTE: The rounding calculation below is correct. It will produce an 1150 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1151 // determined to be a rounding issue with pari/gp as it begins to use a 1152 // floating point representation after 192 bits. There are no discrepancies 1153 // between this algorithm and pari/gp for bit widths < 192 bits. 1154 APInt square(x_old * x_old); 1155 APInt nextSquare((x_old + 1) * (x_old +1)); 1156 if (this->ult(square)) 1157 return x_old; 1158 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1159 APInt midpoint((nextSquare - square).udiv(two)); 1160 APInt offset(*this - square); 1161 if (offset.ult(midpoint)) 1162 return x_old; 1163 return x_old + 1; 1164 } 1165 1166 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1167 /// iterative extended Euclidean algorithm is used to solve for this value, 1168 /// however we simplify it to speed up calculating only the inverse, and take 1169 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1170 /// (potentially large) APInts around. 1171 /// WARNING: a value of '0' may be returned, 1172 /// signifying that no multiplicative inverse exists! 1173 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1174 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1175 1176 // Using the properties listed at the following web page (accessed 06/21/08): 1177 // http://www.numbertheory.org/php/euclid.html 1178 // (especially the properties numbered 3, 4 and 9) it can be proved that 1179 // BitWidth bits suffice for all the computations in the algorithm implemented 1180 // below. More precisely, this number of bits suffice if the multiplicative 1181 // inverse exists, but may not suffice for the general extended Euclidean 1182 // algorithm. 1183 1184 APInt r[2] = { modulo, *this }; 1185 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1186 APInt q(BitWidth, 0); 1187 1188 unsigned i; 1189 for (i = 0; r[i^1] != 0; i ^= 1) { 1190 // An overview of the math without the confusing bit-flipping: 1191 // q = r[i-2] / r[i-1] 1192 // r[i] = r[i-2] % r[i-1] 1193 // t[i] = t[i-2] - t[i-1] * q 1194 udivrem(r[i], r[i^1], q, r[i]); 1195 t[i] -= t[i^1] * q; 1196 } 1197 1198 // If this APInt and the modulo are not coprime, there is no multiplicative 1199 // inverse, so return 0. We check this by looking at the next-to-last 1200 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1201 // algorithm. 1202 if (r[i] != 1) 1203 return APInt(BitWidth, 0); 1204 1205 // The next-to-last t is the multiplicative inverse. However, we are 1206 // interested in a positive inverse. Calculate a positive one from a negative 1207 // one if necessary. A simple addition of the modulo suffices because 1208 // abs(t[i]) is known to be less than *this/2 (see the link above). 1209 if (t[i].isNegative()) 1210 t[i] += modulo; 1211 1212 return std::move(t[i]); 1213 } 1214 1215 /// Calculate the magic numbers required to implement a signed integer division 1216 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1217 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1218 /// Warren, Jr., chapter 10. 1219 APInt::ms APInt::magic() const { 1220 const APInt& d = *this; 1221 unsigned p; 1222 APInt ad, anc, delta, q1, r1, q2, r2, t; 1223 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1224 struct ms mag; 1225 1226 ad = d.abs(); 1227 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1228 anc = t - 1 - t.urem(ad); // absolute value of nc 1229 p = d.getBitWidth() - 1; // initialize p 1230 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1231 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1232 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1233 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1234 do { 1235 p = p + 1; 1236 q1 = q1<<1; // update q1 = 2p/abs(nc) 1237 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1238 if (r1.uge(anc)) { // must be unsigned comparison 1239 q1 = q1 + 1; 1240 r1 = r1 - anc; 1241 } 1242 q2 = q2<<1; // update q2 = 2p/abs(d) 1243 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1244 if (r2.uge(ad)) { // must be unsigned comparison 1245 q2 = q2 + 1; 1246 r2 = r2 - ad; 1247 } 1248 delta = ad - r2; 1249 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1250 1251 mag.m = q2 + 1; 1252 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1253 mag.s = p - d.getBitWidth(); // resulting shift 1254 return mag; 1255 } 1256 1257 /// Calculate the magic numbers required to implement an unsigned integer 1258 /// division by a constant as a sequence of multiplies, adds and shifts. 1259 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1260 /// S. Warren, Jr., chapter 10. 1261 /// LeadingZeros can be used to simplify the calculation if the upper bits 1262 /// of the divided value are known zero. 1263 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1264 const APInt& d = *this; 1265 unsigned p; 1266 APInt nc, delta, q1, r1, q2, r2; 1267 struct mu magu; 1268 magu.a = 0; // initialize "add" indicator 1269 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1270 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1271 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1272 1273 nc = allOnes - (allOnes - d).urem(d); 1274 p = d.getBitWidth() - 1; // initialize p 1275 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1276 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1277 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1278 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1279 do { 1280 p = p + 1; 1281 if (r1.uge(nc - r1)) { 1282 q1 = q1 + q1 + 1; // update q1 1283 r1 = r1 + r1 - nc; // update r1 1284 } 1285 else { 1286 q1 = q1+q1; // update q1 1287 r1 = r1+r1; // update r1 1288 } 1289 if ((r2 + 1).uge(d - r2)) { 1290 if (q2.uge(signedMax)) magu.a = 1; 1291 q2 = q2+q2 + 1; // update q2 1292 r2 = r2+r2 + 1 - d; // update r2 1293 } 1294 else { 1295 if (q2.uge(signedMin)) magu.a = 1; 1296 q2 = q2+q2; // update q2 1297 r2 = r2+r2 + 1; // update r2 1298 } 1299 delta = d - 1 - r2; 1300 } while (p < d.getBitWidth()*2 && 1301 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1302 magu.m = q2 + 1; // resulting magic number 1303 magu.s = p - d.getBitWidth(); // resulting shift 1304 return magu; 1305 } 1306 1307 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1308 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1309 /// variables here have the same names as in the algorithm. Comments explain 1310 /// the algorithm and any deviation from it. 1311 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1312 unsigned m, unsigned n) { 1313 assert(u && "Must provide dividend"); 1314 assert(v && "Must provide divisor"); 1315 assert(q && "Must provide quotient"); 1316 assert(u != v && u != q && v != q && "Must use different memory"); 1317 assert(n>1 && "n must be > 1"); 1318 1319 // b denotes the base of the number system. In our case b is 2^32. 1320 const uint64_t b = uint64_t(1) << 32; 1321 1322 // The DEBUG macros here tend to be spam in the debug output if you're not 1323 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1324 #ifdef KNUTH_DEBUG 1325 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1326 #else 1327 #define DEBUG_KNUTH(X) do {} while(false) 1328 #endif 1329 1330 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1331 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1332 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1333 DEBUG_KNUTH(dbgs() << " by"); 1334 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1335 DEBUG_KNUTH(dbgs() << '\n'); 1336 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1337 // u and v by d. Note that we have taken Knuth's advice here to use a power 1338 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1339 // 2 allows us to shift instead of multiply and it is easy to determine the 1340 // shift amount from the leading zeros. We are basically normalizing the u 1341 // and v so that its high bits are shifted to the top of v's range without 1342 // overflow. Note that this can require an extra word in u so that u must 1343 // be of length m+n+1. 1344 unsigned shift = countLeadingZeros(v[n-1]); 1345 uint32_t v_carry = 0; 1346 uint32_t u_carry = 0; 1347 if (shift) { 1348 for (unsigned i = 0; i < m+n; ++i) { 1349 uint32_t u_tmp = u[i] >> (32 - shift); 1350 u[i] = (u[i] << shift) | u_carry; 1351 u_carry = u_tmp; 1352 } 1353 for (unsigned i = 0; i < n; ++i) { 1354 uint32_t v_tmp = v[i] >> (32 - shift); 1355 v[i] = (v[i] << shift) | v_carry; 1356 v_carry = v_tmp; 1357 } 1358 } 1359 u[m+n] = u_carry; 1360 1361 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1362 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1363 DEBUG_KNUTH(dbgs() << " by"); 1364 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1365 DEBUG_KNUTH(dbgs() << '\n'); 1366 1367 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1368 int j = m; 1369 do { 1370 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1371 // D3. [Calculate q'.]. 1372 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1373 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1374 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1375 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1376 // on v[n-2] determines at high speed most of the cases in which the trial 1377 // value qp is one too large, and it eliminates all cases where qp is two 1378 // too large. 1379 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1380 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1381 uint64_t qp = dividend / v[n-1]; 1382 uint64_t rp = dividend % v[n-1]; 1383 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1384 qp--; 1385 rp += v[n-1]; 1386 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1387 qp--; 1388 } 1389 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1390 1391 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1392 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1393 // consists of a simple multiplication by a one-place number, combined with 1394 // a subtraction. 1395 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1396 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1397 // true value plus b**(n+1), namely as the b's complement of 1398 // the true value, and a "borrow" to the left should be remembered. 1399 int64_t borrow = 0; 1400 for (unsigned i = 0; i < n; ++i) { 1401 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1402 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1403 u[j+i] = Lo_32(subres); 1404 borrow = Hi_32(p) - Hi_32(subres); 1405 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1406 << ", borrow = " << borrow << '\n'); 1407 } 1408 bool isNeg = u[j+n] < borrow; 1409 u[j+n] -= Lo_32(borrow); 1410 1411 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1412 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1413 DEBUG_KNUTH(dbgs() << '\n'); 1414 1415 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1416 // negative, go to step D6; otherwise go on to step D7. 1417 q[j] = Lo_32(qp); 1418 if (isNeg) { 1419 // D6. [Add back]. The probability that this step is necessary is very 1420 // small, on the order of only 2/b. Make sure that test data accounts for 1421 // this possibility. Decrease q[j] by 1 1422 q[j]--; 1423 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1424 // A carry will occur to the left of u[j+n], and it should be ignored 1425 // since it cancels with the borrow that occurred in D4. 1426 bool carry = false; 1427 for (unsigned i = 0; i < n; i++) { 1428 uint32_t limit = std::min(u[j+i],v[i]); 1429 u[j+i] += v[i] + carry; 1430 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1431 } 1432 u[j+n] += carry; 1433 } 1434 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1435 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1436 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1437 1438 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1439 } while (--j >= 0); 1440 1441 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1442 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1443 DEBUG_KNUTH(dbgs() << '\n'); 1444 1445 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1446 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1447 // compute the remainder (urem uses this). 1448 if (r) { 1449 // The value d is expressed by the "shift" value above since we avoided 1450 // multiplication by d by using a shift left. So, all we have to do is 1451 // shift right here. 1452 if (shift) { 1453 uint32_t carry = 0; 1454 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1455 for (int i = n-1; i >= 0; i--) { 1456 r[i] = (u[i] >> shift) | carry; 1457 carry = u[i] << (32 - shift); 1458 DEBUG_KNUTH(dbgs() << " " << r[i]); 1459 } 1460 } else { 1461 for (int i = n-1; i >= 0; i--) { 1462 r[i] = u[i]; 1463 DEBUG_KNUTH(dbgs() << " " << r[i]); 1464 } 1465 } 1466 DEBUG_KNUTH(dbgs() << '\n'); 1467 } 1468 DEBUG_KNUTH(dbgs() << '\n'); 1469 } 1470 1471 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1472 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1473 assert(lhsWords >= rhsWords && "Fractional result"); 1474 1475 // First, compose the values into an array of 32-bit words instead of 1476 // 64-bit words. This is a necessity of both the "short division" algorithm 1477 // and the Knuth "classical algorithm" which requires there to be native 1478 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1479 // can't use 64-bit operands here because we don't have native results of 1480 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1481 // work on large-endian machines. 1482 unsigned n = rhsWords * 2; 1483 unsigned m = (lhsWords * 2) - n; 1484 1485 // Allocate space for the temporary values we need either on the stack, if 1486 // it will fit, or on the heap if it won't. 1487 uint32_t SPACE[128]; 1488 uint32_t *U = nullptr; 1489 uint32_t *V = nullptr; 1490 uint32_t *Q = nullptr; 1491 uint32_t *R = nullptr; 1492 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1493 U = &SPACE[0]; 1494 V = &SPACE[m+n+1]; 1495 Q = &SPACE[(m+n+1) + n]; 1496 if (Remainder) 1497 R = &SPACE[(m+n+1) + n + (m+n)]; 1498 } else { 1499 U = new uint32_t[m + n + 1]; 1500 V = new uint32_t[n]; 1501 Q = new uint32_t[m+n]; 1502 if (Remainder) 1503 R = new uint32_t[n]; 1504 } 1505 1506 // Initialize the dividend 1507 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1508 for (unsigned i = 0; i < lhsWords; ++i) { 1509 uint64_t tmp = LHS[i]; 1510 U[i * 2] = Lo_32(tmp); 1511 U[i * 2 + 1] = Hi_32(tmp); 1512 } 1513 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1514 1515 // Initialize the divisor 1516 memset(V, 0, (n)*sizeof(uint32_t)); 1517 for (unsigned i = 0; i < rhsWords; ++i) { 1518 uint64_t tmp = RHS[i]; 1519 V[i * 2] = Lo_32(tmp); 1520 V[i * 2 + 1] = Hi_32(tmp); 1521 } 1522 1523 // initialize the quotient and remainder 1524 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1525 if (Remainder) 1526 memset(R, 0, n * sizeof(uint32_t)); 1527 1528 // Now, adjust m and n for the Knuth division. n is the number of words in 1529 // the divisor. m is the number of words by which the dividend exceeds the 1530 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1531 // contain any zero words or the Knuth algorithm fails. 1532 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1533 n--; 1534 m++; 1535 } 1536 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1537 m--; 1538 1539 // If we're left with only a single word for the divisor, Knuth doesn't work 1540 // so we implement the short division algorithm here. This is much simpler 1541 // and faster because we are certain that we can divide a 64-bit quantity 1542 // by a 32-bit quantity at hardware speed and short division is simply a 1543 // series of such operations. This is just like doing short division but we 1544 // are using base 2^32 instead of base 10. 1545 assert(n != 0 && "Divide by zero?"); 1546 if (n == 1) { 1547 uint32_t divisor = V[0]; 1548 uint32_t remainder = 0; 1549 for (int i = m; i >= 0; i--) { 1550 uint64_t partial_dividend = Make_64(remainder, U[i]); 1551 if (partial_dividend == 0) { 1552 Q[i] = 0; 1553 remainder = 0; 1554 } else if (partial_dividend < divisor) { 1555 Q[i] = 0; 1556 remainder = Lo_32(partial_dividend); 1557 } else if (partial_dividend == divisor) { 1558 Q[i] = 1; 1559 remainder = 0; 1560 } else { 1561 Q[i] = Lo_32(partial_dividend / divisor); 1562 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1563 } 1564 } 1565 if (R) 1566 R[0] = remainder; 1567 } else { 1568 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1569 // case n > 1. 1570 KnuthDiv(U, V, Q, R, m, n); 1571 } 1572 1573 // If the caller wants the quotient 1574 if (Quotient) { 1575 for (unsigned i = 0; i < lhsWords; ++i) 1576 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1577 } 1578 1579 // If the caller wants the remainder 1580 if (Remainder) { 1581 for (unsigned i = 0; i < rhsWords; ++i) 1582 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1583 } 1584 1585 // Clean up the memory we allocated. 1586 if (U != &SPACE[0]) { 1587 delete [] U; 1588 delete [] V; 1589 delete [] Q; 1590 delete [] R; 1591 } 1592 } 1593 1594 APInt APInt::udiv(const APInt &RHS) const { 1595 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1596 1597 // First, deal with the easy case 1598 if (isSingleWord()) { 1599 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1600 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1601 } 1602 1603 // Get some facts about the LHS and RHS number of bits and words 1604 unsigned lhsWords = getNumWords(getActiveBits()); 1605 unsigned rhsBits = RHS.getActiveBits(); 1606 unsigned rhsWords = getNumWords(rhsBits); 1607 assert(rhsWords && "Divided by zero???"); 1608 1609 // Deal with some degenerate cases 1610 if (!lhsWords) 1611 // 0 / X ===> 0 1612 return APInt(BitWidth, 0); 1613 if (rhsBits == 1) 1614 // X / 1 ===> X 1615 return *this; 1616 if (lhsWords < rhsWords || this->ult(RHS)) 1617 // X / Y ===> 0, iff X < Y 1618 return APInt(BitWidth, 0); 1619 if (*this == RHS) 1620 // X / X ===> 1 1621 return APInt(BitWidth, 1); 1622 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1623 // All high words are zero, just use native divide 1624 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1625 1626 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1627 APInt Quotient(BitWidth, 0); // to hold result. 1628 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1629 return Quotient; 1630 } 1631 1632 APInt APInt::udiv(uint64_t RHS) const { 1633 assert(RHS != 0 && "Divide by zero?"); 1634 1635 // First, deal with the easy case 1636 if (isSingleWord()) 1637 return APInt(BitWidth, U.VAL / RHS); 1638 1639 // Get some facts about the LHS words. 1640 unsigned lhsWords = getNumWords(getActiveBits()); 1641 1642 // Deal with some degenerate cases 1643 if (!lhsWords) 1644 // 0 / X ===> 0 1645 return APInt(BitWidth, 0); 1646 if (RHS == 1) 1647 // X / 1 ===> X 1648 return *this; 1649 if (this->ult(RHS)) 1650 // X / Y ===> 0, iff X < Y 1651 return APInt(BitWidth, 0); 1652 if (*this == RHS) 1653 // X / X ===> 1 1654 return APInt(BitWidth, 1); 1655 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1656 // All high words are zero, just use native divide 1657 return APInt(BitWidth, this->U.pVal[0] / RHS); 1658 1659 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1660 APInt Quotient(BitWidth, 0); // to hold result. 1661 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1662 return Quotient; 1663 } 1664 1665 APInt APInt::sdiv(const APInt &RHS) const { 1666 if (isNegative()) { 1667 if (RHS.isNegative()) 1668 return (-(*this)).udiv(-RHS); 1669 return -((-(*this)).udiv(RHS)); 1670 } 1671 if (RHS.isNegative()) 1672 return -(this->udiv(-RHS)); 1673 return this->udiv(RHS); 1674 } 1675 1676 APInt APInt::sdiv(int64_t RHS) const { 1677 if (isNegative()) { 1678 if (RHS < 0) 1679 return (-(*this)).udiv(-RHS); 1680 return -((-(*this)).udiv(RHS)); 1681 } 1682 if (RHS < 0) 1683 return -(this->udiv(-RHS)); 1684 return this->udiv(RHS); 1685 } 1686 1687 APInt APInt::urem(const APInt &RHS) const { 1688 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1689 if (isSingleWord()) { 1690 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1691 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1692 } 1693 1694 // Get some facts about the LHS 1695 unsigned lhsWords = getNumWords(getActiveBits()); 1696 1697 // Get some facts about the RHS 1698 unsigned rhsBits = RHS.getActiveBits(); 1699 unsigned rhsWords = getNumWords(rhsBits); 1700 assert(rhsWords && "Performing remainder operation by zero ???"); 1701 1702 // Check the degenerate cases 1703 if (lhsWords == 0) 1704 // 0 % Y ===> 0 1705 return APInt(BitWidth, 0); 1706 if (rhsBits == 1) 1707 // X % 1 ===> 0 1708 return APInt(BitWidth, 0); 1709 if (lhsWords < rhsWords || this->ult(RHS)) 1710 // X % Y ===> X, iff X < Y 1711 return *this; 1712 if (*this == RHS) 1713 // X % X == 0; 1714 return APInt(BitWidth, 0); 1715 if (lhsWords == 1) 1716 // All high words are zero, just use native remainder 1717 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1718 1719 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1720 APInt Remainder(BitWidth, 0); 1721 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1722 return Remainder; 1723 } 1724 1725 uint64_t APInt::urem(uint64_t RHS) const { 1726 assert(RHS != 0 && "Remainder by zero?"); 1727 1728 if (isSingleWord()) 1729 return U.VAL % RHS; 1730 1731 // Get some facts about the LHS 1732 unsigned lhsWords = getNumWords(getActiveBits()); 1733 1734 // Check the degenerate cases 1735 if (lhsWords == 0) 1736 // 0 % Y ===> 0 1737 return 0; 1738 if (RHS == 1) 1739 // X % 1 ===> 0 1740 return 0; 1741 if (this->ult(RHS)) 1742 // X % Y ===> X, iff X < Y 1743 return getZExtValue(); 1744 if (*this == RHS) 1745 // X % X == 0; 1746 return 0; 1747 if (lhsWords == 1) 1748 // All high words are zero, just use native remainder 1749 return U.pVal[0] % RHS; 1750 1751 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1752 uint64_t Remainder; 1753 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1754 return Remainder; 1755 } 1756 1757 APInt APInt::srem(const APInt &RHS) const { 1758 if (isNegative()) { 1759 if (RHS.isNegative()) 1760 return -((-(*this)).urem(-RHS)); 1761 return -((-(*this)).urem(RHS)); 1762 } 1763 if (RHS.isNegative()) 1764 return this->urem(-RHS); 1765 return this->urem(RHS); 1766 } 1767 1768 int64_t APInt::srem(int64_t RHS) const { 1769 if (isNegative()) { 1770 if (RHS < 0) 1771 return -((-(*this)).urem(-RHS)); 1772 return -((-(*this)).urem(RHS)); 1773 } 1774 if (RHS < 0) 1775 return this->urem(-RHS); 1776 return this->urem(RHS); 1777 } 1778 1779 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1780 APInt &Quotient, APInt &Remainder) { 1781 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1782 unsigned BitWidth = LHS.BitWidth; 1783 1784 // First, deal with the easy case 1785 if (LHS.isSingleWord()) { 1786 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1787 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1788 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1789 Quotient = APInt(BitWidth, QuotVal); 1790 Remainder = APInt(BitWidth, RemVal); 1791 return; 1792 } 1793 1794 // Get some size facts about the dividend and divisor 1795 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1796 unsigned rhsBits = RHS.getActiveBits(); 1797 unsigned rhsWords = getNumWords(rhsBits); 1798 assert(rhsWords && "Performing divrem operation by zero ???"); 1799 1800 // Check the degenerate cases 1801 if (lhsWords == 0) { 1802 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1803 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1804 return; 1805 } 1806 1807 if (rhsBits == 1) { 1808 Quotient = LHS; // X / 1 ===> X 1809 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1810 } 1811 1812 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1813 Remainder = LHS; // X % Y ===> X, iff X < Y 1814 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1815 return; 1816 } 1817 1818 if (LHS == RHS) { 1819 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1820 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1821 return; 1822 } 1823 1824 // Make sure there is enough space to hold the results. 1825 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1826 // change the size. This is necessary if Quotient or Remainder is aliased 1827 // with LHS or RHS. 1828 Quotient.reallocate(BitWidth); 1829 Remainder.reallocate(BitWidth); 1830 1831 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1832 // There is only one word to consider so use the native versions. 1833 uint64_t lhsValue = LHS.U.pVal[0]; 1834 uint64_t rhsValue = RHS.U.pVal[0]; 1835 Quotient = lhsValue / rhsValue; 1836 Remainder = lhsValue % rhsValue; 1837 return; 1838 } 1839 1840 // Okay, lets do it the long way 1841 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1842 Remainder.U.pVal); 1843 // Clear the rest of the Quotient and Remainder. 1844 std::memset(Quotient.U.pVal + lhsWords, 0, 1845 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1846 std::memset(Remainder.U.pVal + rhsWords, 0, 1847 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1848 } 1849 1850 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1851 uint64_t &Remainder) { 1852 assert(RHS != 0 && "Divide by zero?"); 1853 unsigned BitWidth = LHS.BitWidth; 1854 1855 // First, deal with the easy case 1856 if (LHS.isSingleWord()) { 1857 uint64_t QuotVal = LHS.U.VAL / RHS; 1858 Remainder = LHS.U.VAL % RHS; 1859 Quotient = APInt(BitWidth, QuotVal); 1860 return; 1861 } 1862 1863 // Get some size facts about the dividend and divisor 1864 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1865 1866 // Check the degenerate cases 1867 if (lhsWords == 0) { 1868 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1869 Remainder = 0; // 0 % Y ===> 0 1870 return; 1871 } 1872 1873 if (RHS == 1) { 1874 Quotient = LHS; // X / 1 ===> X 1875 Remainder = 0; // X % 1 ===> 0 1876 return; 1877 } 1878 1879 if (LHS.ult(RHS)) { 1880 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1881 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1882 return; 1883 } 1884 1885 if (LHS == RHS) { 1886 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1887 Remainder = 0; // X % X ===> 0; 1888 return; 1889 } 1890 1891 // Make sure there is enough space to hold the results. 1892 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1893 // change the size. This is necessary if Quotient is aliased with LHS. 1894 Quotient.reallocate(BitWidth); 1895 1896 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1897 // There is only one word to consider so use the native versions. 1898 uint64_t lhsValue = LHS.U.pVal[0]; 1899 Quotient = lhsValue / RHS; 1900 Remainder = lhsValue % RHS; 1901 return; 1902 } 1903 1904 // Okay, lets do it the long way 1905 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1906 // Clear the rest of the Quotient. 1907 std::memset(Quotient.U.pVal + lhsWords, 0, 1908 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1909 } 1910 1911 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1912 APInt &Quotient, APInt &Remainder) { 1913 if (LHS.isNegative()) { 1914 if (RHS.isNegative()) 1915 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1916 else { 1917 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1918 Quotient.negate(); 1919 } 1920 Remainder.negate(); 1921 } else if (RHS.isNegative()) { 1922 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1923 Quotient.negate(); 1924 } else { 1925 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1926 } 1927 } 1928 1929 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1930 APInt &Quotient, int64_t &Remainder) { 1931 uint64_t R = Remainder; 1932 if (LHS.isNegative()) { 1933 if (RHS < 0) 1934 APInt::udivrem(-LHS, -RHS, Quotient, R); 1935 else { 1936 APInt::udivrem(-LHS, RHS, Quotient, R); 1937 Quotient.negate(); 1938 } 1939 R = -R; 1940 } else if (RHS < 0) { 1941 APInt::udivrem(LHS, -RHS, Quotient, R); 1942 Quotient.negate(); 1943 } else { 1944 APInt::udivrem(LHS, RHS, Quotient, R); 1945 } 1946 Remainder = R; 1947 } 1948 1949 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1950 APInt Res = *this+RHS; 1951 Overflow = isNonNegative() == RHS.isNonNegative() && 1952 Res.isNonNegative() != isNonNegative(); 1953 return Res; 1954 } 1955 1956 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1957 APInt Res = *this+RHS; 1958 Overflow = Res.ult(RHS); 1959 return Res; 1960 } 1961 1962 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1963 APInt Res = *this - RHS; 1964 Overflow = isNonNegative() != RHS.isNonNegative() && 1965 Res.isNonNegative() != isNonNegative(); 1966 return Res; 1967 } 1968 1969 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1970 APInt Res = *this-RHS; 1971 Overflow = Res.ugt(*this); 1972 return Res; 1973 } 1974 1975 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1976 // MININT/-1 --> overflow. 1977 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1978 return sdiv(RHS); 1979 } 1980 1981 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1982 APInt Res = *this * RHS; 1983 1984 if (*this != 0 && RHS != 0) 1985 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1986 else 1987 Overflow = false; 1988 return Res; 1989 } 1990 1991 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1992 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { 1993 Overflow = true; 1994 return *this * RHS; 1995 } 1996 1997 APInt Res = lshr(1) * RHS; 1998 Overflow = Res.isNegative(); 1999 Res <<= 1; 2000 if ((*this)[0]) { 2001 Res += RHS; 2002 if (Res.ult(RHS)) 2003 Overflow = true; 2004 } 2005 return Res; 2006 } 2007 2008 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 2009 Overflow = ShAmt.uge(getBitWidth()); 2010 if (Overflow) 2011 return APInt(BitWidth, 0); 2012 2013 if (isNonNegative()) // Don't allow sign change. 2014 Overflow = ShAmt.uge(countLeadingZeros()); 2015 else 2016 Overflow = ShAmt.uge(countLeadingOnes()); 2017 2018 return *this << ShAmt; 2019 } 2020 2021 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 2022 Overflow = ShAmt.uge(getBitWidth()); 2023 if (Overflow) 2024 return APInt(BitWidth, 0); 2025 2026 Overflow = ShAmt.ugt(countLeadingZeros()); 2027 2028 return *this << ShAmt; 2029 } 2030 2031 APInt APInt::sadd_sat(const APInt &RHS) const { 2032 bool Overflow; 2033 APInt Res = sadd_ov(RHS, Overflow); 2034 if (!Overflow) 2035 return Res; 2036 2037 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2038 : APInt::getSignedMaxValue(BitWidth); 2039 } 2040 2041 APInt APInt::uadd_sat(const APInt &RHS) const { 2042 bool Overflow; 2043 APInt Res = uadd_ov(RHS, Overflow); 2044 if (!Overflow) 2045 return Res; 2046 2047 return APInt::getMaxValue(BitWidth); 2048 } 2049 2050 APInt APInt::ssub_sat(const APInt &RHS) const { 2051 bool Overflow; 2052 APInt Res = ssub_ov(RHS, Overflow); 2053 if (!Overflow) 2054 return Res; 2055 2056 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2057 : APInt::getSignedMaxValue(BitWidth); 2058 } 2059 2060 APInt APInt::usub_sat(const APInt &RHS) const { 2061 bool Overflow; 2062 APInt Res = usub_ov(RHS, Overflow); 2063 if (!Overflow) 2064 return Res; 2065 2066 return APInt(BitWidth, 0); 2067 } 2068 2069 APInt APInt::smul_sat(const APInt &RHS) const { 2070 bool Overflow; 2071 APInt Res = smul_ov(RHS, Overflow); 2072 if (!Overflow) 2073 return Res; 2074 2075 // The result is negative if one and only one of inputs is negative. 2076 bool ResIsNegative = isNegative() ^ RHS.isNegative(); 2077 2078 return ResIsNegative ? APInt::getSignedMinValue(BitWidth) 2079 : APInt::getSignedMaxValue(BitWidth); 2080 } 2081 2082 APInt APInt::umul_sat(const APInt &RHS) const { 2083 bool Overflow; 2084 APInt Res = umul_ov(RHS, Overflow); 2085 if (!Overflow) 2086 return Res; 2087 2088 return APInt::getMaxValue(BitWidth); 2089 } 2090 2091 APInt APInt::sshl_sat(const APInt &RHS) const { 2092 bool Overflow; 2093 APInt Res = sshl_ov(RHS, Overflow); 2094 if (!Overflow) 2095 return Res; 2096 2097 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2098 : APInt::getSignedMaxValue(BitWidth); 2099 } 2100 2101 APInt APInt::ushl_sat(const APInt &RHS) const { 2102 bool Overflow; 2103 APInt Res = ushl_ov(RHS, Overflow); 2104 if (!Overflow) 2105 return Res; 2106 2107 return APInt::getMaxValue(BitWidth); 2108 } 2109 2110 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 2111 // Check our assumptions here 2112 assert(!str.empty() && "Invalid string length"); 2113 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 2114 radix == 36) && 2115 "Radix should be 2, 8, 10, 16, or 36!"); 2116 2117 StringRef::iterator p = str.begin(); 2118 size_t slen = str.size(); 2119 bool isNeg = *p == '-'; 2120 if (*p == '-' || *p == '+') { 2121 p++; 2122 slen--; 2123 assert(slen && "String is only a sign, needs a value."); 2124 } 2125 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2126 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2127 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2128 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2129 "Insufficient bit width"); 2130 2131 // Allocate memory if needed 2132 if (isSingleWord()) 2133 U.VAL = 0; 2134 else 2135 U.pVal = getClearedMemory(getNumWords()); 2136 2137 // Figure out if we can shift instead of multiply 2138 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2139 2140 // Enter digit traversal loop 2141 for (StringRef::iterator e = str.end(); p != e; ++p) { 2142 unsigned digit = getDigit(*p, radix); 2143 assert(digit < radix && "Invalid character in digit string"); 2144 2145 // Shift or multiply the value by the radix 2146 if (slen > 1) { 2147 if (shift) 2148 *this <<= shift; 2149 else 2150 *this *= radix; 2151 } 2152 2153 // Add in the digit we just interpreted 2154 *this += digit; 2155 } 2156 // If its negative, put it in two's complement form 2157 if (isNeg) 2158 this->negate(); 2159 } 2160 2161 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2162 bool Signed, bool formatAsCLiteral) const { 2163 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2164 Radix == 36) && 2165 "Radix should be 2, 8, 10, 16, or 36!"); 2166 2167 const char *Prefix = ""; 2168 if (formatAsCLiteral) { 2169 switch (Radix) { 2170 case 2: 2171 // Binary literals are a non-standard extension added in gcc 4.3: 2172 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2173 Prefix = "0b"; 2174 break; 2175 case 8: 2176 Prefix = "0"; 2177 break; 2178 case 10: 2179 break; // No prefix 2180 case 16: 2181 Prefix = "0x"; 2182 break; 2183 default: 2184 llvm_unreachable("Invalid radix!"); 2185 } 2186 } 2187 2188 // First, check for a zero value and just short circuit the logic below. 2189 if (*this == 0) { 2190 while (*Prefix) { 2191 Str.push_back(*Prefix); 2192 ++Prefix; 2193 }; 2194 Str.push_back('0'); 2195 return; 2196 } 2197 2198 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2199 2200 if (isSingleWord()) { 2201 char Buffer[65]; 2202 char *BufPtr = std::end(Buffer); 2203 2204 uint64_t N; 2205 if (!Signed) { 2206 N = getZExtValue(); 2207 } else { 2208 int64_t I = getSExtValue(); 2209 if (I >= 0) { 2210 N = I; 2211 } else { 2212 Str.push_back('-'); 2213 N = -(uint64_t)I; 2214 } 2215 } 2216 2217 while (*Prefix) { 2218 Str.push_back(*Prefix); 2219 ++Prefix; 2220 }; 2221 2222 while (N) { 2223 *--BufPtr = Digits[N % Radix]; 2224 N /= Radix; 2225 } 2226 Str.append(BufPtr, std::end(Buffer)); 2227 return; 2228 } 2229 2230 APInt Tmp(*this); 2231 2232 if (Signed && isNegative()) { 2233 // They want to print the signed version and it is a negative value 2234 // Flip the bits and add one to turn it into the equivalent positive 2235 // value and put a '-' in the result. 2236 Tmp.negate(); 2237 Str.push_back('-'); 2238 } 2239 2240 while (*Prefix) { 2241 Str.push_back(*Prefix); 2242 ++Prefix; 2243 }; 2244 2245 // We insert the digits backward, then reverse them to get the right order. 2246 unsigned StartDig = Str.size(); 2247 2248 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2249 // because the number of bits per digit (1, 3 and 4 respectively) divides 2250 // equally. We just shift until the value is zero. 2251 if (Radix == 2 || Radix == 8 || Radix == 16) { 2252 // Just shift tmp right for each digit width until it becomes zero 2253 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2254 unsigned MaskAmt = Radix - 1; 2255 2256 while (Tmp.getBoolValue()) { 2257 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2258 Str.push_back(Digits[Digit]); 2259 Tmp.lshrInPlace(ShiftAmt); 2260 } 2261 } else { 2262 while (Tmp.getBoolValue()) { 2263 uint64_t Digit; 2264 udivrem(Tmp, Radix, Tmp, Digit); 2265 assert(Digit < Radix && "divide failed"); 2266 Str.push_back(Digits[Digit]); 2267 } 2268 } 2269 2270 // Reverse the digits before returning. 2271 std::reverse(Str.begin()+StartDig, Str.end()); 2272 } 2273 2274 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2275 /// It is better to pass in a SmallVector/SmallString to the methods above. 2276 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2277 SmallString<40> S; 2278 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2279 return std::string(S.str()); 2280 } 2281 2282 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2283 LLVM_DUMP_METHOD void APInt::dump() const { 2284 SmallString<40> S, U; 2285 this->toStringUnsigned(U); 2286 this->toStringSigned(S); 2287 dbgs() << "APInt(" << BitWidth << "b, " 2288 << U << "u " << S << "s)\n"; 2289 } 2290 #endif 2291 2292 void APInt::print(raw_ostream &OS, bool isSigned) const { 2293 SmallString<40> S; 2294 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2295 OS << S; 2296 } 2297 2298 // This implements a variety of operations on a representation of 2299 // arbitrary precision, two's-complement, bignum integer values. 2300 2301 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2302 // and unrestricting assumption. 2303 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2304 "Part width must be divisible by 2!"); 2305 2306 /* Some handy functions local to this file. */ 2307 2308 /* Returns the integer part with the least significant BITS set. 2309 BITS cannot be zero. */ 2310 static inline APInt::WordType lowBitMask(unsigned bits) { 2311 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2312 2313 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2314 } 2315 2316 /* Returns the value of the lower half of PART. */ 2317 static inline APInt::WordType lowHalf(APInt::WordType part) { 2318 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2319 } 2320 2321 /* Returns the value of the upper half of PART. */ 2322 static inline APInt::WordType highHalf(APInt::WordType part) { 2323 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2324 } 2325 2326 /* Returns the bit number of the most significant set bit of a part. 2327 If the input number has no bits set -1U is returned. */ 2328 static unsigned partMSB(APInt::WordType value) { 2329 return findLastSet(value, ZB_Max); 2330 } 2331 2332 /* Returns the bit number of the least significant set bit of a 2333 part. If the input number has no bits set -1U is returned. */ 2334 static unsigned partLSB(APInt::WordType value) { 2335 return findFirstSet(value, ZB_Max); 2336 } 2337 2338 /* Sets the least significant part of a bignum to the input value, and 2339 zeroes out higher parts. */ 2340 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2341 assert(parts > 0); 2342 2343 dst[0] = part; 2344 for (unsigned i = 1; i < parts; i++) 2345 dst[i] = 0; 2346 } 2347 2348 /* Assign one bignum to another. */ 2349 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2350 for (unsigned i = 0; i < parts; i++) 2351 dst[i] = src[i]; 2352 } 2353 2354 /* Returns true if a bignum is zero, false otherwise. */ 2355 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2356 for (unsigned i = 0; i < parts; i++) 2357 if (src[i]) 2358 return false; 2359 2360 return true; 2361 } 2362 2363 /* Extract the given bit of a bignum; returns 0 or 1. */ 2364 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2365 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2366 } 2367 2368 /* Set the given bit of a bignum. */ 2369 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2370 parts[whichWord(bit)] |= maskBit(bit); 2371 } 2372 2373 /* Clears the given bit of a bignum. */ 2374 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2375 parts[whichWord(bit)] &= ~maskBit(bit); 2376 } 2377 2378 /* Returns the bit number of the least significant set bit of a 2379 number. If the input number has no bits set -1U is returned. */ 2380 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2381 for (unsigned i = 0; i < n; i++) { 2382 if (parts[i] != 0) { 2383 unsigned lsb = partLSB(parts[i]); 2384 2385 return lsb + i * APINT_BITS_PER_WORD; 2386 } 2387 } 2388 2389 return -1U; 2390 } 2391 2392 /* Returns the bit number of the most significant set bit of a number. 2393 If the input number has no bits set -1U is returned. */ 2394 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2395 do { 2396 --n; 2397 2398 if (parts[n] != 0) { 2399 unsigned msb = partMSB(parts[n]); 2400 2401 return msb + n * APINT_BITS_PER_WORD; 2402 } 2403 } while (n); 2404 2405 return -1U; 2406 } 2407 2408 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2409 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2410 the least significant bit of DST. All high bits above srcBITS in 2411 DST are zero-filled. */ 2412 void 2413 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2414 unsigned srcBits, unsigned srcLSB) { 2415 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2416 assert(dstParts <= dstCount); 2417 2418 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2419 tcAssign (dst, src + firstSrcPart, dstParts); 2420 2421 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2422 tcShiftRight (dst, dstParts, shift); 2423 2424 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2425 in DST. If this is less that srcBits, append the rest, else 2426 clear the high bits. */ 2427 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2428 if (n < srcBits) { 2429 WordType mask = lowBitMask (srcBits - n); 2430 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2431 << n % APINT_BITS_PER_WORD); 2432 } else if (n > srcBits) { 2433 if (srcBits % APINT_BITS_PER_WORD) 2434 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2435 } 2436 2437 /* Clear high parts. */ 2438 while (dstParts < dstCount) 2439 dst[dstParts++] = 0; 2440 } 2441 2442 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2443 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2444 WordType c, unsigned parts) { 2445 assert(c <= 1); 2446 2447 for (unsigned i = 0; i < parts; i++) { 2448 WordType l = dst[i]; 2449 if (c) { 2450 dst[i] += rhs[i] + 1; 2451 c = (dst[i] <= l); 2452 } else { 2453 dst[i] += rhs[i]; 2454 c = (dst[i] < l); 2455 } 2456 } 2457 2458 return c; 2459 } 2460 2461 /// This function adds a single "word" integer, src, to the multiple 2462 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2463 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2464 /// @returns the carry of the addition. 2465 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2466 unsigned parts) { 2467 for (unsigned i = 0; i < parts; ++i) { 2468 dst[i] += src; 2469 if (dst[i] >= src) 2470 return 0; // No need to carry so exit early. 2471 src = 1; // Carry one to next digit. 2472 } 2473 2474 return 1; 2475 } 2476 2477 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2478 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2479 WordType c, unsigned parts) { 2480 assert(c <= 1); 2481 2482 for (unsigned i = 0; i < parts; i++) { 2483 WordType l = dst[i]; 2484 if (c) { 2485 dst[i] -= rhs[i] + 1; 2486 c = (dst[i] >= l); 2487 } else { 2488 dst[i] -= rhs[i]; 2489 c = (dst[i] > l); 2490 } 2491 } 2492 2493 return c; 2494 } 2495 2496 /// This function subtracts a single "word" (64-bit word), src, from 2497 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2498 /// no further borrowing is needed or it runs out of "words" in dst. The result 2499 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2500 /// exhausted. In other words, if src > dst then this function returns 1, 2501 /// otherwise 0. 2502 /// @returns the borrow out of the subtraction 2503 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2504 unsigned parts) { 2505 for (unsigned i = 0; i < parts; ++i) { 2506 WordType Dst = dst[i]; 2507 dst[i] -= src; 2508 if (src <= Dst) 2509 return 0; // No need to borrow so exit early. 2510 src = 1; // We have to "borrow 1" from next "word" 2511 } 2512 2513 return 1; 2514 } 2515 2516 /* Negate a bignum in-place. */ 2517 void APInt::tcNegate(WordType *dst, unsigned parts) { 2518 tcComplement(dst, parts); 2519 tcIncrement(dst, parts); 2520 } 2521 2522 /* DST += SRC * MULTIPLIER + CARRY if add is true 2523 DST = SRC * MULTIPLIER + CARRY if add is false 2524 2525 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2526 they must start at the same point, i.e. DST == SRC. 2527 2528 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2529 returned. Otherwise DST is filled with the least significant 2530 DSTPARTS parts of the result, and if all of the omitted higher 2531 parts were zero return zero, otherwise overflow occurred and 2532 return one. */ 2533 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2534 WordType multiplier, WordType carry, 2535 unsigned srcParts, unsigned dstParts, 2536 bool add) { 2537 /* Otherwise our writes of DST kill our later reads of SRC. */ 2538 assert(dst <= src || dst >= src + srcParts); 2539 assert(dstParts <= srcParts + 1); 2540 2541 /* N loops; minimum of dstParts and srcParts. */ 2542 unsigned n = std::min(dstParts, srcParts); 2543 2544 for (unsigned i = 0; i < n; i++) { 2545 WordType low, mid, high, srcPart; 2546 2547 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2548 2549 This cannot overflow, because 2550 2551 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2552 2553 which is less than n^2. */ 2554 2555 srcPart = src[i]; 2556 2557 if (multiplier == 0 || srcPart == 0) { 2558 low = carry; 2559 high = 0; 2560 } else { 2561 low = lowHalf(srcPart) * lowHalf(multiplier); 2562 high = highHalf(srcPart) * highHalf(multiplier); 2563 2564 mid = lowHalf(srcPart) * highHalf(multiplier); 2565 high += highHalf(mid); 2566 mid <<= APINT_BITS_PER_WORD / 2; 2567 if (low + mid < low) 2568 high++; 2569 low += mid; 2570 2571 mid = highHalf(srcPart) * lowHalf(multiplier); 2572 high += highHalf(mid); 2573 mid <<= APINT_BITS_PER_WORD / 2; 2574 if (low + mid < low) 2575 high++; 2576 low += mid; 2577 2578 /* Now add carry. */ 2579 if (low + carry < low) 2580 high++; 2581 low += carry; 2582 } 2583 2584 if (add) { 2585 /* And now DST[i], and store the new low part there. */ 2586 if (low + dst[i] < low) 2587 high++; 2588 dst[i] += low; 2589 } else 2590 dst[i] = low; 2591 2592 carry = high; 2593 } 2594 2595 if (srcParts < dstParts) { 2596 /* Full multiplication, there is no overflow. */ 2597 assert(srcParts + 1 == dstParts); 2598 dst[srcParts] = carry; 2599 return 0; 2600 } 2601 2602 /* We overflowed if there is carry. */ 2603 if (carry) 2604 return 1; 2605 2606 /* We would overflow if any significant unwritten parts would be 2607 non-zero. This is true if any remaining src parts are non-zero 2608 and the multiplier is non-zero. */ 2609 if (multiplier) 2610 for (unsigned i = dstParts; i < srcParts; i++) 2611 if (src[i]) 2612 return 1; 2613 2614 /* We fitted in the narrow destination. */ 2615 return 0; 2616 } 2617 2618 /* DST = LHS * RHS, where DST has the same width as the operands and 2619 is filled with the least significant parts of the result. Returns 2620 one if overflow occurred, otherwise zero. DST must be disjoint 2621 from both operands. */ 2622 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2623 const WordType *rhs, unsigned parts) { 2624 assert(dst != lhs && dst != rhs); 2625 2626 int overflow = 0; 2627 tcSet(dst, 0, parts); 2628 2629 for (unsigned i = 0; i < parts; i++) 2630 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2631 parts - i, true); 2632 2633 return overflow; 2634 } 2635 2636 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2637 /// operands. No overflow occurs. DST must be disjoint from both operands. 2638 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2639 const WordType *rhs, unsigned lhsParts, 2640 unsigned rhsParts) { 2641 /* Put the narrower number on the LHS for less loops below. */ 2642 if (lhsParts > rhsParts) 2643 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2644 2645 assert(dst != lhs && dst != rhs); 2646 2647 tcSet(dst, 0, rhsParts); 2648 2649 for (unsigned i = 0; i < lhsParts; i++) 2650 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2651 } 2652 2653 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2654 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2655 set REMAINDER to the remainder, return zero. i.e. 2656 2657 OLD_LHS = RHS * LHS + REMAINDER 2658 2659 SCRATCH is a bignum of the same size as the operands and result for 2660 use by the routine; its contents need not be initialized and are 2661 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2662 */ 2663 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2664 WordType *remainder, WordType *srhs, 2665 unsigned parts) { 2666 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2667 2668 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2669 if (shiftCount == 0) 2670 return true; 2671 2672 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2673 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2674 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2675 2676 tcAssign(srhs, rhs, parts); 2677 tcShiftLeft(srhs, parts, shiftCount); 2678 tcAssign(remainder, lhs, parts); 2679 tcSet(lhs, 0, parts); 2680 2681 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2682 the total. */ 2683 for (;;) { 2684 int compare = tcCompare(remainder, srhs, parts); 2685 if (compare >= 0) { 2686 tcSubtract(remainder, srhs, 0, parts); 2687 lhs[n] |= mask; 2688 } 2689 2690 if (shiftCount == 0) 2691 break; 2692 shiftCount--; 2693 tcShiftRight(srhs, parts, 1); 2694 if ((mask >>= 1) == 0) { 2695 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2696 n--; 2697 } 2698 } 2699 2700 return false; 2701 } 2702 2703 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2704 /// no restrictions on Count. 2705 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2706 // Don't bother performing a no-op shift. 2707 if (!Count) 2708 return; 2709 2710 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2711 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2712 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2713 2714 // Fastpath for moving by whole words. 2715 if (BitShift == 0) { 2716 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2717 } else { 2718 while (Words-- > WordShift) { 2719 Dst[Words] = Dst[Words - WordShift] << BitShift; 2720 if (Words > WordShift) 2721 Dst[Words] |= 2722 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2723 } 2724 } 2725 2726 // Fill in the remainder with 0s. 2727 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2728 } 2729 2730 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2731 /// are no restrictions on Count. 2732 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2733 // Don't bother performing a no-op shift. 2734 if (!Count) 2735 return; 2736 2737 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2738 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2739 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2740 2741 unsigned WordsToMove = Words - WordShift; 2742 // Fastpath for moving by whole words. 2743 if (BitShift == 0) { 2744 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2745 } else { 2746 for (unsigned i = 0; i != WordsToMove; ++i) { 2747 Dst[i] = Dst[i + WordShift] >> BitShift; 2748 if (i + 1 != WordsToMove) 2749 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2750 } 2751 } 2752 2753 // Fill in the remainder with 0s. 2754 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2755 } 2756 2757 /* Bitwise and of two bignums. */ 2758 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2759 for (unsigned i = 0; i < parts; i++) 2760 dst[i] &= rhs[i]; 2761 } 2762 2763 /* Bitwise inclusive or of two bignums. */ 2764 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2765 for (unsigned i = 0; i < parts; i++) 2766 dst[i] |= rhs[i]; 2767 } 2768 2769 /* Bitwise exclusive or of two bignums. */ 2770 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2771 for (unsigned i = 0; i < parts; i++) 2772 dst[i] ^= rhs[i]; 2773 } 2774 2775 /* Complement a bignum in-place. */ 2776 void APInt::tcComplement(WordType *dst, unsigned parts) { 2777 for (unsigned i = 0; i < parts; i++) 2778 dst[i] = ~dst[i]; 2779 } 2780 2781 /* Comparison (unsigned) of two bignums. */ 2782 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2783 unsigned parts) { 2784 while (parts) { 2785 parts--; 2786 if (lhs[parts] != rhs[parts]) 2787 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2788 } 2789 2790 return 0; 2791 } 2792 2793 /* Set the least significant BITS bits of a bignum, clear the 2794 rest. */ 2795 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2796 unsigned bits) { 2797 unsigned i = 0; 2798 while (bits > APINT_BITS_PER_WORD) { 2799 dst[i++] = ~(WordType) 0; 2800 bits -= APINT_BITS_PER_WORD; 2801 } 2802 2803 if (bits) 2804 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2805 2806 while (i < parts) 2807 dst[i++] = 0; 2808 } 2809 2810 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2811 APInt::Rounding RM) { 2812 // Currently udivrem always rounds down. 2813 switch (RM) { 2814 case APInt::Rounding::DOWN: 2815 case APInt::Rounding::TOWARD_ZERO: 2816 return A.udiv(B); 2817 case APInt::Rounding::UP: { 2818 APInt Quo, Rem; 2819 APInt::udivrem(A, B, Quo, Rem); 2820 if (Rem == 0) 2821 return Quo; 2822 return Quo + 1; 2823 } 2824 } 2825 llvm_unreachable("Unknown APInt::Rounding enum"); 2826 } 2827 2828 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2829 APInt::Rounding RM) { 2830 switch (RM) { 2831 case APInt::Rounding::DOWN: 2832 case APInt::Rounding::UP: { 2833 APInt Quo, Rem; 2834 APInt::sdivrem(A, B, Quo, Rem); 2835 if (Rem == 0) 2836 return Quo; 2837 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2838 // We want to check whether the non-integer part of the mathematical value 2839 // is negative or not. If the non-integer part is negative, we need to round 2840 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2841 // already rounded down. 2842 if (RM == APInt::Rounding::DOWN) { 2843 if (Rem.isNegative() != B.isNegative()) 2844 return Quo - 1; 2845 return Quo; 2846 } 2847 if (Rem.isNegative() != B.isNegative()) 2848 return Quo; 2849 return Quo + 1; 2850 } 2851 // Currently sdiv rounds towards zero. 2852 case APInt::Rounding::TOWARD_ZERO: 2853 return A.sdiv(B); 2854 } 2855 llvm_unreachable("Unknown APInt::Rounding enum"); 2856 } 2857 2858 Optional<APInt> 2859 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2860 unsigned RangeWidth) { 2861 unsigned CoeffWidth = A.getBitWidth(); 2862 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2863 assert(RangeWidth <= CoeffWidth && 2864 "Value range width should be less than coefficient width"); 2865 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2866 2867 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2868 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2869 2870 // Identify 0 as a (non)solution immediately. 2871 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2872 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2873 return APInt(CoeffWidth, 0); 2874 } 2875 2876 // The result of APInt arithmetic has the same bit width as the operands, 2877 // so it can actually lose high bits. A product of two n-bit integers needs 2878 // 2n-1 bits to represent the full value. 2879 // The operation done below (on quadratic coefficients) that can produce 2880 // the largest value is the evaluation of the equation during bisection, 2881 // which needs 3 times the bitwidth of the coefficient, so the total number 2882 // of required bits is 3n. 2883 // 2884 // The purpose of this extension is to simulate the set Z of all integers, 2885 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2886 // and negative numbers (not so much in a modulo arithmetic). The method 2887 // used to solve the equation is based on the standard formula for real 2888 // numbers, and uses the concepts of "positive" and "negative" with their 2889 // usual meanings. 2890 CoeffWidth *= 3; 2891 A = A.sext(CoeffWidth); 2892 B = B.sext(CoeffWidth); 2893 C = C.sext(CoeffWidth); 2894 2895 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2896 // the bit width has increased. 2897 if (A.isNegative()) { 2898 A.negate(); 2899 B.negate(); 2900 C.negate(); 2901 } 2902 2903 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2904 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2905 // and R = 2^BitWidth. 2906 // Since we're trying not only to find exact solutions, but also values 2907 // that "wrap around", such a set will always have a solution, i.e. an x 2908 // that satisfies at least one of the equations, or such that |q(x)| 2909 // exceeds kR, while |q(x-1)| for the same k does not. 2910 // 2911 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2912 // positive solution n (in the above sense), and also such that the n 2913 // will be the least among all solutions corresponding to k = 0, 1, ... 2914 // (more precisely, the least element in the set 2915 // { n(k) | k is such that a solution n(k) exists }). 2916 // 2917 // Consider the parabola (over real numbers) that corresponds to the 2918 // quadratic equation. Since A > 0, the arms of the parabola will point 2919 // up. Picking different values of k will shift it up and down by R. 2920 // 2921 // We want to shift the parabola in such a way as to reduce the problem 2922 // of solving q(x) = kR to solving shifted_q(x) = 0. 2923 // (The interesting solutions are the ceilings of the real number 2924 // solutions.) 2925 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2926 APInt TwoA = 2 * A; 2927 APInt SqrB = B * B; 2928 bool PickLow; 2929 2930 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2931 assert(A.isStrictlyPositive()); 2932 APInt T = V.abs().urem(A); 2933 if (T.isNullValue()) 2934 return V; 2935 return V.isNegative() ? V+T : V+(A-T); 2936 }; 2937 2938 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2939 // iff B is positive. 2940 if (B.isNonNegative()) { 2941 // If B >= 0, the vertex it at a negative location (or at 0), so in 2942 // order to have a non-negative solution we need to pick k that makes 2943 // C-kR negative. To satisfy all the requirements for the solution 2944 // that we are looking for, it needs to be closest to 0 of all k. 2945 C = C.srem(R); 2946 if (C.isStrictlyPositive()) 2947 C -= R; 2948 // Pick the greater solution. 2949 PickLow = false; 2950 } else { 2951 // If B < 0, the vertex is at a positive location. For any solution 2952 // to exist, the discriminant must be non-negative. This means that 2953 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2954 // lower bound on values of k: kR >= C - B^2/4A. 2955 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2956 // Round LowkR up (towards +inf) to the nearest kR. 2957 LowkR = RoundUp(LowkR, R); 2958 2959 // If there exists k meeting the condition above, and such that 2960 // C-kR > 0, there will be two positive real number solutions of 2961 // q(x) = kR. Out of all such values of k, pick the one that makes 2962 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2963 // In other words, find maximum k such that LowkR <= kR < C. 2964 if (C.sgt(LowkR)) { 2965 // If LowkR < C, then such a k is guaranteed to exist because 2966 // LowkR itself is a multiple of R. 2967 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2968 // Pick the smaller solution. 2969 PickLow = true; 2970 } else { 2971 // If C-kR < 0 for all potential k's, it means that one solution 2972 // will be negative, while the other will be positive. The positive 2973 // solution will shift towards 0 if the parabola is moved up. 2974 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2975 // to 0, or in other words, out of all parabolas that have solutions, 2976 // pick the one that is the farthest "up"). 2977 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2978 C -= LowkR; 2979 // Pick the greater solution. 2980 PickLow = false; 2981 } 2982 } 2983 2984 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2985 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2986 2987 APInt D = SqrB - 4*A*C; 2988 assert(D.isNonNegative() && "Negative discriminant"); 2989 APInt SQ = D.sqrt(); 2990 2991 APInt Q = SQ * SQ; 2992 bool InexactSQ = Q != D; 2993 // The calculated SQ may actually be greater than the exact (non-integer) 2994 // value. If that's the case, decrement SQ to get a value that is lower. 2995 if (Q.sgt(D)) 2996 SQ -= 1; 2997 2998 APInt X; 2999 APInt Rem; 3000 3001 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 3002 // When using the quadratic formula directly, the calculated low root 3003 // may be greater than the exact one, since we would be subtracting SQ. 3004 // To make sure that the calculated root is not greater than the exact 3005 // one, subtract SQ+1 when calculating the low root (for inexact value 3006 // of SQ). 3007 if (PickLow) 3008 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 3009 else 3010 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 3011 3012 // The updated coefficients should be such that the (exact) solution is 3013 // positive. Since APInt division rounds towards 0, the calculated one 3014 // can be 0, but cannot be negative. 3015 assert(X.isNonNegative() && "Solution should be non-negative"); 3016 3017 if (!InexactSQ && Rem.isNullValue()) { 3018 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 3019 return X; 3020 } 3021 3022 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 3023 // The exact value of the square root of D should be between SQ and SQ+1. 3024 // This implies that the solution should be between that corresponding to 3025 // SQ (i.e. X) and that corresponding to SQ+1. 3026 // 3027 // The calculated X cannot be greater than the exact (real) solution. 3028 // Actually it must be strictly less than the exact solution, while 3029 // X+1 will be greater than or equal to it. 3030 3031 APInt VX = (A*X + B)*X + C; 3032 APInt VY = VX + TwoA*X + A + B; 3033 bool SignChange = VX.isNegative() != VY.isNegative() || 3034 VX.isNullValue() != VY.isNullValue(); 3035 // If the sign did not change between X and X+1, X is not a valid solution. 3036 // This could happen when the actual (exact) roots don't have an integer 3037 // between them, so they would both be contained between X and X+1. 3038 if (!SignChange) { 3039 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 3040 return None; 3041 } 3042 3043 X += 1; 3044 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 3045 return X; 3046 } 3047 3048 Optional<unsigned> 3049 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { 3050 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); 3051 if (A == B) 3052 return llvm::None; 3053 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); 3054 } 3055 3056 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst 3057 /// with the integer held in IntVal. 3058 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, 3059 unsigned StoreBytes) { 3060 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); 3061 const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); 3062 3063 if (sys::IsLittleEndianHost) { 3064 // Little-endian host - the source is ordered from LSB to MSB. Order the 3065 // destination from LSB to MSB: Do a straight copy. 3066 memcpy(Dst, Src, StoreBytes); 3067 } else { 3068 // Big-endian host - the source is an array of 64 bit words ordered from 3069 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination 3070 // from MSB to LSB: Reverse the word order, but not the bytes in a word. 3071 while (StoreBytes > sizeof(uint64_t)) { 3072 StoreBytes -= sizeof(uint64_t); 3073 // May not be aligned so use memcpy. 3074 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); 3075 Src += sizeof(uint64_t); 3076 } 3077 3078 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); 3079 } 3080 } 3081 3082 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting 3083 /// from Src into IntVal, which is assumed to be wide enough and to hold zero. 3084 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, 3085 unsigned LoadBytes) { 3086 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); 3087 uint8_t *Dst = reinterpret_cast<uint8_t *>( 3088 const_cast<uint64_t *>(IntVal.getRawData())); 3089 3090 if (sys::IsLittleEndianHost) 3091 // Little-endian host - the destination must be ordered from LSB to MSB. 3092 // The source is ordered from LSB to MSB: Do a straight copy. 3093 memcpy(Dst, Src, LoadBytes); 3094 else { 3095 // Big-endian - the destination is an array of 64 bit words ordered from 3096 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is 3097 // ordered from MSB to LSB: Reverse the word order, but not the bytes in 3098 // a word. 3099 while (LoadBytes > sizeof(uint64_t)) { 3100 LoadBytes -= sizeof(uint64_t); 3101 // May not be aligned so use memcpy. 3102 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); 3103 Dst += sizeof(uint64_t); 3104 } 3105 3106 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); 3107 } 3108 } 3109