1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision integer 10 // constant values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/FoldingSet.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/bit.h" 22 #include "llvm/Config/llvm-config.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <climits> 28 #include <cmath> 29 #include <cstdlib> 30 #include <cstring> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "apint" 34 35 /// A utility function for allocating memory, checking for allocation failures, 36 /// and ensuring the contents are zeroed. 37 inline static uint64_t* getClearedMemory(unsigned numWords) { 38 uint64_t *result = new uint64_t[numWords]; 39 memset(result, 0, numWords * sizeof(uint64_t)); 40 return result; 41 } 42 43 /// A utility function for allocating memory and checking for allocation 44 /// failure. The content is not zeroed. 45 inline static uint64_t* getMemory(unsigned numWords) { 46 return new uint64_t[numWords]; 47 } 48 49 /// A utility function that converts a character to a digit. 50 inline static unsigned getDigit(char cdigit, uint8_t radix) { 51 unsigned r; 52 53 if (radix == 16 || radix == 36) { 54 r = cdigit - '0'; 55 if (r <= 9) 56 return r; 57 58 r = cdigit - 'A'; 59 if (r <= radix - 11U) 60 return r + 10; 61 62 r = cdigit - 'a'; 63 if (r <= radix - 11U) 64 return r + 10; 65 66 radix = 10; 67 } 68 69 r = cdigit - '0'; 70 if (r < radix) 71 return r; 72 73 return -1U; 74 } 75 76 77 void APInt::initSlowCase(uint64_t val, bool isSigned) { 78 U.pVal = getClearedMemory(getNumWords()); 79 U.pVal[0] = val; 80 if (isSigned && int64_t(val) < 0) 81 for (unsigned i = 1; i < getNumWords(); ++i) 82 U.pVal[i] = WORDTYPE_MAX; 83 clearUnusedBits(); 84 } 85 86 void APInt::initSlowCase(const APInt& that) { 87 U.pVal = getMemory(getNumWords()); 88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 89 } 90 91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 92 assert(BitWidth && "Bitwidth too small"); 93 assert(bigVal.data() && "Null pointer detected!"); 94 if (isSingleWord()) 95 U.VAL = bigVal[0]; 96 else { 97 // Get memory, cleared to 0 98 U.pVal = getClearedMemory(getNumWords()); 99 // Calculate the number of words to copy 100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 101 // Copy the words from bigVal to pVal 102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 103 } 104 // Make sure unused high bits are cleared 105 clearUnusedBits(); 106 } 107 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 109 : BitWidth(numBits) { 110 initFromArray(bigVal); 111 } 112 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 114 : BitWidth(numBits) { 115 initFromArray(makeArrayRef(bigVal, numWords)); 116 } 117 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 119 : BitWidth(numbits) { 120 assert(BitWidth && "Bitwidth too small"); 121 fromString(numbits, Str, radix); 122 } 123 124 void APInt::reallocate(unsigned NewBitWidth) { 125 // If the number of words is the same we can just change the width and stop. 126 if (getNumWords() == getNumWords(NewBitWidth)) { 127 BitWidth = NewBitWidth; 128 return; 129 } 130 131 // If we have an allocation, delete it. 132 if (!isSingleWord()) 133 delete [] U.pVal; 134 135 // Update BitWidth. 136 BitWidth = NewBitWidth; 137 138 // If we are supposed to have an allocation, create it. 139 if (!isSingleWord()) 140 U.pVal = getMemory(getNumWords()); 141 } 142 143 void APInt::AssignSlowCase(const APInt& RHS) { 144 // Don't do anything for X = X 145 if (this == &RHS) 146 return; 147 148 // Adjust the bit width and handle allocations as necessary. 149 reallocate(RHS.getBitWidth()); 150 151 // Copy the data. 152 if (isSingleWord()) 153 U.VAL = RHS.U.VAL; 154 else 155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 156 } 157 158 /// This method 'profiles' an APInt for use with FoldingSet. 159 void APInt::Profile(FoldingSetNodeID& ID) const { 160 ID.AddInteger(BitWidth); 161 162 if (isSingleWord()) { 163 ID.AddInteger(U.VAL); 164 return; 165 } 166 167 unsigned NumWords = getNumWords(); 168 for (unsigned i = 0; i < NumWords; ++i) 169 ID.AddInteger(U.pVal[i]); 170 } 171 172 /// Prefix increment operator. Increments the APInt by one. 173 APInt& APInt::operator++() { 174 if (isSingleWord()) 175 ++U.VAL; 176 else 177 tcIncrement(U.pVal, getNumWords()); 178 return clearUnusedBits(); 179 } 180 181 /// Prefix decrement operator. Decrements the APInt by one. 182 APInt& APInt::operator--() { 183 if (isSingleWord()) 184 --U.VAL; 185 else 186 tcDecrement(U.pVal, getNumWords()); 187 return clearUnusedBits(); 188 } 189 190 /// Adds the RHS APint to this APInt. 191 /// @returns this, after addition of RHS. 192 /// Addition assignment operator. 193 APInt& APInt::operator+=(const APInt& RHS) { 194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 195 if (isSingleWord()) 196 U.VAL += RHS.U.VAL; 197 else 198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 199 return clearUnusedBits(); 200 } 201 202 APInt& APInt::operator+=(uint64_t RHS) { 203 if (isSingleWord()) 204 U.VAL += RHS; 205 else 206 tcAddPart(U.pVal, RHS, getNumWords()); 207 return clearUnusedBits(); 208 } 209 210 /// Subtracts the RHS APInt from this APInt 211 /// @returns this, after subtraction 212 /// Subtraction assignment operator. 213 APInt& APInt::operator-=(const APInt& RHS) { 214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 215 if (isSingleWord()) 216 U.VAL -= RHS.U.VAL; 217 else 218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 219 return clearUnusedBits(); 220 } 221 222 APInt& APInt::operator-=(uint64_t RHS) { 223 if (isSingleWord()) 224 U.VAL -= RHS; 225 else 226 tcSubtractPart(U.pVal, RHS, getNumWords()); 227 return clearUnusedBits(); 228 } 229 230 APInt APInt::operator*(const APInt& RHS) const { 231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 232 if (isSingleWord()) 233 return APInt(BitWidth, U.VAL * RHS.U.VAL); 234 235 APInt Result(getMemory(getNumWords()), getBitWidth()); 236 237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 238 239 Result.clearUnusedBits(); 240 return Result; 241 } 242 243 void APInt::AndAssignSlowCase(const APInt& RHS) { 244 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 245 } 246 247 void APInt::OrAssignSlowCase(const APInt& RHS) { 248 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 249 } 250 251 void APInt::XorAssignSlowCase(const APInt& RHS) { 252 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 253 } 254 255 APInt& APInt::operator*=(const APInt& RHS) { 256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 257 *this = *this * RHS; 258 return *this; 259 } 260 261 APInt& APInt::operator*=(uint64_t RHS) { 262 if (isSingleWord()) { 263 U.VAL *= RHS; 264 } else { 265 unsigned NumWords = getNumWords(); 266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 267 } 268 return clearUnusedBits(); 269 } 270 271 bool APInt::EqualSlowCase(const APInt& RHS) const { 272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 273 } 274 275 int APInt::compare(const APInt& RHS) const { 276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 277 if (isSingleWord()) 278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 279 280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 281 } 282 283 int APInt::compareSigned(const APInt& RHS) const { 284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 285 if (isSingleWord()) { 286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 289 } 290 291 bool lhsNeg = isNegative(); 292 bool rhsNeg = RHS.isNegative(); 293 294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 295 if (lhsNeg != rhsNeg) 296 return lhsNeg ? -1 : 1; 297 298 // Otherwise we can just use an unsigned comparison, because even negative 299 // numbers compare correctly this way if both have the same signed-ness. 300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 301 } 302 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 304 unsigned loWord = whichWord(loBit); 305 unsigned hiWord = whichWord(hiBit); 306 307 // Create an initial mask for the low word with zeros below loBit. 308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 309 310 // If hiBit is not aligned, we need a high mask. 311 unsigned hiShiftAmt = whichBit(hiBit); 312 if (hiShiftAmt != 0) { 313 // Create a high mask with zeros above hiBit. 314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 315 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 316 // set the bits in hiWord. 317 if (hiWord == loWord) 318 loMask &= hiMask; 319 else 320 U.pVal[hiWord] |= hiMask; 321 } 322 // Apply the mask to the low word. 323 U.pVal[loWord] |= loMask; 324 325 // Fill any words between loWord and hiWord with all ones. 326 for (unsigned word = loWord + 1; word < hiWord; ++word) 327 U.pVal[word] = WORDTYPE_MAX; 328 } 329 330 /// Toggle every bit to its opposite value. 331 void APInt::flipAllBitsSlowCase() { 332 tcComplement(U.pVal, getNumWords()); 333 clearUnusedBits(); 334 } 335 336 /// Toggle a given bit to its opposite value whose position is given 337 /// as "bitPosition". 338 /// Toggles a given bit to its opposite value. 339 void APInt::flipBit(unsigned bitPosition) { 340 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 341 if ((*this)[bitPosition]) clearBit(bitPosition); 342 else setBit(bitPosition); 343 } 344 345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 346 unsigned subBitWidth = subBits.getBitWidth(); 347 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 348 "Illegal bit insertion"); 349 350 // Insertion is a direct copy. 351 if (subBitWidth == BitWidth) { 352 *this = subBits; 353 return; 354 } 355 356 // Single word result can be done as a direct bitmask. 357 if (isSingleWord()) { 358 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 359 U.VAL &= ~(mask << bitPosition); 360 U.VAL |= (subBits.U.VAL << bitPosition); 361 return; 362 } 363 364 unsigned loBit = whichBit(bitPosition); 365 unsigned loWord = whichWord(bitPosition); 366 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 367 368 // Insertion within a single word can be done as a direct bitmask. 369 if (loWord == hi1Word) { 370 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 371 U.pVal[loWord] &= ~(mask << loBit); 372 U.pVal[loWord] |= (subBits.U.VAL << loBit); 373 return; 374 } 375 376 // Insert on word boundaries. 377 if (loBit == 0) { 378 // Direct copy whole words. 379 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 380 memcpy(U.pVal + loWord, subBits.getRawData(), 381 numWholeSubWords * APINT_WORD_SIZE); 382 383 // Mask+insert remaining bits. 384 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 385 if (remainingBits != 0) { 386 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 387 U.pVal[hi1Word] &= ~mask; 388 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 389 } 390 return; 391 } 392 393 // General case - set/clear individual bits in dst based on src. 394 // TODO - there is scope for optimization here, but at the moment this code 395 // path is barely used so prefer readability over performance. 396 for (unsigned i = 0; i != subBitWidth; ++i) { 397 if (subBits[i]) 398 setBit(bitPosition + i); 399 else 400 clearBit(bitPosition + i); 401 } 402 } 403 404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { 405 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 406 subBits &= maskBits; 407 if (isSingleWord()) { 408 U.VAL &= ~(maskBits << bitPosition); 409 U.VAL |= subBits << bitPosition; 410 return; 411 } 412 413 unsigned loBit = whichBit(bitPosition); 414 unsigned loWord = whichWord(bitPosition); 415 unsigned hiWord = whichWord(bitPosition + numBits - 1); 416 if (loWord == hiWord) { 417 U.pVal[loWord] &= ~(maskBits << loBit); 418 U.pVal[loWord] |= subBits << loBit; 419 return; 420 } 421 422 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 423 unsigned wordBits = 8 * sizeof(WordType); 424 U.pVal[loWord] &= ~(maskBits << loBit); 425 U.pVal[loWord] |= subBits << loBit; 426 427 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); 428 U.pVal[hiWord] |= subBits >> (wordBits - loBit); 429 } 430 431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 432 assert(numBits > 0 && "Can't extract zero bits"); 433 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 434 "Illegal bit extraction"); 435 436 if (isSingleWord()) 437 return APInt(numBits, U.VAL >> bitPosition); 438 439 unsigned loBit = whichBit(bitPosition); 440 unsigned loWord = whichWord(bitPosition); 441 unsigned hiWord = whichWord(bitPosition + numBits - 1); 442 443 // Single word result extracting bits from a single word source. 444 if (loWord == hiWord) 445 return APInt(numBits, U.pVal[loWord] >> loBit); 446 447 // Extracting bits that start on a source word boundary can be done 448 // as a fast memory copy. 449 if (loBit == 0) 450 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 451 452 // General case - shift + copy source words directly into place. 453 APInt Result(numBits, 0); 454 unsigned NumSrcWords = getNumWords(); 455 unsigned NumDstWords = Result.getNumWords(); 456 457 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 458 for (unsigned word = 0; word < NumDstWords; ++word) { 459 uint64_t w0 = U.pVal[loWord + word]; 460 uint64_t w1 = 461 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 462 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 463 } 464 465 return Result.clearUnusedBits(); 466 } 467 468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, 469 unsigned bitPosition) const { 470 assert(numBits > 0 && "Can't extract zero bits"); 471 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 472 "Illegal bit extraction"); 473 assert(numBits <= 64 && "Illegal bit extraction"); 474 475 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 476 if (isSingleWord()) 477 return (U.VAL >> bitPosition) & maskBits; 478 479 unsigned loBit = whichBit(bitPosition); 480 unsigned loWord = whichWord(bitPosition); 481 unsigned hiWord = whichWord(bitPosition + numBits - 1); 482 if (loWord == hiWord) 483 return (U.pVal[loWord] >> loBit) & maskBits; 484 485 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 486 unsigned wordBits = 8 * sizeof(WordType); 487 uint64_t retBits = U.pVal[loWord] >> loBit; 488 retBits |= U.pVal[hiWord] << (wordBits - loBit); 489 retBits &= maskBits; 490 return retBits; 491 } 492 493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 494 assert(!str.empty() && "Invalid string length"); 495 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 496 radix == 36) && 497 "Radix should be 2, 8, 10, 16, or 36!"); 498 499 size_t slen = str.size(); 500 501 // Each computation below needs to know if it's negative. 502 StringRef::iterator p = str.begin(); 503 unsigned isNegative = *p == '-'; 504 if (*p == '-' || *p == '+') { 505 p++; 506 slen--; 507 assert(slen && "String is only a sign, needs a value."); 508 } 509 510 // For radixes of power-of-two values, the bits required is accurately and 511 // easily computed 512 if (radix == 2) 513 return slen + isNegative; 514 if (radix == 8) 515 return slen * 3 + isNegative; 516 if (radix == 16) 517 return slen * 4 + isNegative; 518 519 // FIXME: base 36 520 521 // This is grossly inefficient but accurate. We could probably do something 522 // with a computation of roughly slen*64/20 and then adjust by the value of 523 // the first few digits. But, I'm not sure how accurate that could be. 524 525 // Compute a sufficient number of bits that is always large enough but might 526 // be too large. This avoids the assertion in the constructor. This 527 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 528 // bits in that case. 529 unsigned sufficient 530 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 531 : (slen == 1 ? 7 : slen * 16/3); 532 533 // Convert to the actual binary value. 534 APInt tmp(sufficient, StringRef(p, slen), radix); 535 536 // Compute how many bits are required. If the log is infinite, assume we need 537 // just bit. If the log is exact and value is negative, then the value is 538 // MinSignedValue with (log + 1) bits. 539 unsigned log = tmp.logBase2(); 540 if (log == (unsigned)-1) { 541 return isNegative + 1; 542 } else if (isNegative && tmp.isPowerOf2()) { 543 return isNegative + log; 544 } else { 545 return isNegative + log + 1; 546 } 547 } 548 549 hash_code llvm::hash_value(const APInt &Arg) { 550 if (Arg.isSingleWord()) 551 return hash_combine(Arg.U.VAL); 552 553 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); 554 } 555 556 bool APInt::isSplat(unsigned SplatSizeInBits) const { 557 assert(getBitWidth() % SplatSizeInBits == 0 && 558 "SplatSizeInBits must divide width!"); 559 // We can check that all parts of an integer are equal by making use of a 560 // little trick: rotate and check if it's still the same value. 561 return *this == rotl(SplatSizeInBits); 562 } 563 564 /// This function returns the high "numBits" bits of this APInt. 565 APInt APInt::getHiBits(unsigned numBits) const { 566 return this->lshr(BitWidth - numBits); 567 } 568 569 /// This function returns the low "numBits" bits of this APInt. 570 APInt APInt::getLoBits(unsigned numBits) const { 571 APInt Result(getLowBitsSet(BitWidth, numBits)); 572 Result &= *this; 573 return Result; 574 } 575 576 /// Return a value containing V broadcasted over NewLen bits. 577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 578 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 579 580 APInt Val = V.zextOrSelf(NewLen); 581 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 582 Val |= Val << I; 583 584 return Val; 585 } 586 587 unsigned APInt::countLeadingZerosSlowCase() const { 588 unsigned Count = 0; 589 for (int i = getNumWords()-1; i >= 0; --i) { 590 uint64_t V = U.pVal[i]; 591 if (V == 0) 592 Count += APINT_BITS_PER_WORD; 593 else { 594 Count += llvm::countLeadingZeros(V); 595 break; 596 } 597 } 598 // Adjust for unused bits in the most significant word (they are zero). 599 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 600 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 601 return Count; 602 } 603 604 unsigned APInt::countLeadingOnesSlowCase() const { 605 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 606 unsigned shift; 607 if (!highWordBits) { 608 highWordBits = APINT_BITS_PER_WORD; 609 shift = 0; 610 } else { 611 shift = APINT_BITS_PER_WORD - highWordBits; 612 } 613 int i = getNumWords() - 1; 614 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 615 if (Count == highWordBits) { 616 for (i--; i >= 0; --i) { 617 if (U.pVal[i] == WORDTYPE_MAX) 618 Count += APINT_BITS_PER_WORD; 619 else { 620 Count += llvm::countLeadingOnes(U.pVal[i]); 621 break; 622 } 623 } 624 } 625 return Count; 626 } 627 628 unsigned APInt::countTrailingZerosSlowCase() const { 629 unsigned Count = 0; 630 unsigned i = 0; 631 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 632 Count += APINT_BITS_PER_WORD; 633 if (i < getNumWords()) 634 Count += llvm::countTrailingZeros(U.pVal[i]); 635 return std::min(Count, BitWidth); 636 } 637 638 unsigned APInt::countTrailingOnesSlowCase() const { 639 unsigned Count = 0; 640 unsigned i = 0; 641 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 642 Count += APINT_BITS_PER_WORD; 643 if (i < getNumWords()) 644 Count += llvm::countTrailingOnes(U.pVal[i]); 645 assert(Count <= BitWidth); 646 return Count; 647 } 648 649 unsigned APInt::countPopulationSlowCase() const { 650 unsigned Count = 0; 651 for (unsigned i = 0; i < getNumWords(); ++i) 652 Count += llvm::countPopulation(U.pVal[i]); 653 return Count; 654 } 655 656 bool APInt::intersectsSlowCase(const APInt &RHS) const { 657 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 658 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 659 return true; 660 661 return false; 662 } 663 664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 665 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 666 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 667 return false; 668 669 return true; 670 } 671 672 APInt APInt::byteSwap() const { 673 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); 674 if (BitWidth == 16) 675 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 676 if (BitWidth == 32) 677 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 678 if (BitWidth == 48) { 679 unsigned Tmp1 = unsigned(U.VAL >> 16); 680 Tmp1 = ByteSwap_32(Tmp1); 681 uint16_t Tmp2 = uint16_t(U.VAL); 682 Tmp2 = ByteSwap_16(Tmp2); 683 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); 684 } 685 if (BitWidth == 64) 686 return APInt(BitWidth, ByteSwap_64(U.VAL)); 687 688 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 689 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 690 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 691 if (Result.BitWidth != BitWidth) { 692 Result.lshrInPlace(Result.BitWidth - BitWidth); 693 Result.BitWidth = BitWidth; 694 } 695 return Result; 696 } 697 698 APInt APInt::reverseBits() const { 699 switch (BitWidth) { 700 case 64: 701 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 702 case 32: 703 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 704 case 16: 705 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 706 case 8: 707 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 708 default: 709 break; 710 } 711 712 APInt Val(*this); 713 APInt Reversed(BitWidth, 0); 714 unsigned S = BitWidth; 715 716 for (; Val != 0; Val.lshrInPlace(1)) { 717 Reversed <<= 1; 718 Reversed |= Val[0]; 719 --S; 720 } 721 722 Reversed <<= S; 723 return Reversed; 724 } 725 726 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 727 // Fast-path a common case. 728 if (A == B) return A; 729 730 // Corner cases: if either operand is zero, the other is the gcd. 731 if (!A) return B; 732 if (!B) return A; 733 734 // Count common powers of 2 and remove all other powers of 2. 735 unsigned Pow2; 736 { 737 unsigned Pow2_A = A.countTrailingZeros(); 738 unsigned Pow2_B = B.countTrailingZeros(); 739 if (Pow2_A > Pow2_B) { 740 A.lshrInPlace(Pow2_A - Pow2_B); 741 Pow2 = Pow2_B; 742 } else if (Pow2_B > Pow2_A) { 743 B.lshrInPlace(Pow2_B - Pow2_A); 744 Pow2 = Pow2_A; 745 } else { 746 Pow2 = Pow2_A; 747 } 748 } 749 750 // Both operands are odd multiples of 2^Pow_2: 751 // 752 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 753 // 754 // This is a modified version of Stein's algorithm, taking advantage of 755 // efficient countTrailingZeros(). 756 while (A != B) { 757 if (A.ugt(B)) { 758 A -= B; 759 A.lshrInPlace(A.countTrailingZeros() - Pow2); 760 } else { 761 B -= A; 762 B.lshrInPlace(B.countTrailingZeros() - Pow2); 763 } 764 } 765 766 return A; 767 } 768 769 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 770 uint64_t I = bit_cast<uint64_t>(Double); 771 772 // Get the sign bit from the highest order bit 773 bool isNeg = I >> 63; 774 775 // Get the 11-bit exponent and adjust for the 1023 bit bias 776 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 777 778 // If the exponent is negative, the value is < 0 so just return 0. 779 if (exp < 0) 780 return APInt(width, 0u); 781 782 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 783 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 784 785 // If the exponent doesn't shift all bits out of the mantissa 786 if (exp < 52) 787 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 788 APInt(width, mantissa >> (52 - exp)); 789 790 // If the client didn't provide enough bits for us to shift the mantissa into 791 // then the result is undefined, just return 0 792 if (width <= exp - 52) 793 return APInt(width, 0); 794 795 // Otherwise, we have to shift the mantissa bits up to the right location 796 APInt Tmp(width, mantissa); 797 Tmp <<= (unsigned)exp - 52; 798 return isNeg ? -Tmp : Tmp; 799 } 800 801 /// This function converts this APInt to a double. 802 /// The layout for double is as following (IEEE Standard 754): 803 /// -------------------------------------- 804 /// | Sign Exponent Fraction Bias | 805 /// |-------------------------------------- | 806 /// | 1[63] 11[62-52] 52[51-00] 1023 | 807 /// -------------------------------------- 808 double APInt::roundToDouble(bool isSigned) const { 809 810 // Handle the simple case where the value is contained in one uint64_t. 811 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 812 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 813 if (isSigned) { 814 int64_t sext = SignExtend64(getWord(0), BitWidth); 815 return double(sext); 816 } else 817 return double(getWord(0)); 818 } 819 820 // Determine if the value is negative. 821 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 822 823 // Construct the absolute value if we're negative. 824 APInt Tmp(isNeg ? -(*this) : (*this)); 825 826 // Figure out how many bits we're using. 827 unsigned n = Tmp.getActiveBits(); 828 829 // The exponent (without bias normalization) is just the number of bits 830 // we are using. Note that the sign bit is gone since we constructed the 831 // absolute value. 832 uint64_t exp = n; 833 834 // Return infinity for exponent overflow 835 if (exp > 1023) { 836 if (!isSigned || !isNeg) 837 return std::numeric_limits<double>::infinity(); 838 else 839 return -std::numeric_limits<double>::infinity(); 840 } 841 exp += 1023; // Increment for 1023 bias 842 843 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 844 // extract the high 52 bits from the correct words in pVal. 845 uint64_t mantissa; 846 unsigned hiWord = whichWord(n-1); 847 if (hiWord == 0) { 848 mantissa = Tmp.U.pVal[0]; 849 if (n > 52) 850 mantissa >>= n - 52; // shift down, we want the top 52 bits. 851 } else { 852 assert(hiWord > 0 && "huh?"); 853 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 854 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 855 mantissa = hibits | lobits; 856 } 857 858 // The leading bit of mantissa is implicit, so get rid of it. 859 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 860 uint64_t I = sign | (exp << 52) | mantissa; 861 return bit_cast<double>(I); 862 } 863 864 // Truncate to new width. 865 APInt APInt::trunc(unsigned width) const { 866 assert(width < BitWidth && "Invalid APInt Truncate request"); 867 assert(width && "Can't truncate to 0 bits"); 868 869 if (width <= APINT_BITS_PER_WORD) 870 return APInt(width, getRawData()[0]); 871 872 APInt Result(getMemory(getNumWords(width)), width); 873 874 // Copy full words. 875 unsigned i; 876 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 877 Result.U.pVal[i] = U.pVal[i]; 878 879 // Truncate and copy any partial word. 880 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 881 if (bits != 0) 882 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 883 884 return Result; 885 } 886 887 // Sign extend to a new width. 888 APInt APInt::sext(unsigned Width) const { 889 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 890 891 if (Width <= APINT_BITS_PER_WORD) 892 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 893 894 APInt Result(getMemory(getNumWords(Width)), Width); 895 896 // Copy words. 897 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 898 899 // Sign extend the last word since there may be unused bits in the input. 900 Result.U.pVal[getNumWords() - 1] = 901 SignExtend64(Result.U.pVal[getNumWords() - 1], 902 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 903 904 // Fill with sign bits. 905 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 906 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 907 Result.clearUnusedBits(); 908 return Result; 909 } 910 911 // Zero extend to a new width. 912 APInt APInt::zext(unsigned width) const { 913 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 914 915 if (width <= APINT_BITS_PER_WORD) 916 return APInt(width, U.VAL); 917 918 APInt Result(getMemory(getNumWords(width)), width); 919 920 // Copy words. 921 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 922 923 // Zero remaining words. 924 std::memset(Result.U.pVal + getNumWords(), 0, 925 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 926 927 return Result; 928 } 929 930 APInt APInt::zextOrTrunc(unsigned width) const { 931 if (BitWidth < width) 932 return zext(width); 933 if (BitWidth > width) 934 return trunc(width); 935 return *this; 936 } 937 938 APInt APInt::sextOrTrunc(unsigned width) const { 939 if (BitWidth < width) 940 return sext(width); 941 if (BitWidth > width) 942 return trunc(width); 943 return *this; 944 } 945 946 APInt APInt::zextOrSelf(unsigned width) const { 947 if (BitWidth < width) 948 return zext(width); 949 return *this; 950 } 951 952 APInt APInt::sextOrSelf(unsigned width) const { 953 if (BitWidth < width) 954 return sext(width); 955 return *this; 956 } 957 958 /// Arithmetic right-shift this APInt by shiftAmt. 959 /// Arithmetic right-shift function. 960 void APInt::ashrInPlace(const APInt &shiftAmt) { 961 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 962 } 963 964 /// Arithmetic right-shift this APInt by shiftAmt. 965 /// Arithmetic right-shift function. 966 void APInt::ashrSlowCase(unsigned ShiftAmt) { 967 // Don't bother performing a no-op shift. 968 if (!ShiftAmt) 969 return; 970 971 // Save the original sign bit for later. 972 bool Negative = isNegative(); 973 974 // WordShift is the inter-part shift; BitShift is intra-part shift. 975 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 976 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 977 978 unsigned WordsToMove = getNumWords() - WordShift; 979 if (WordsToMove != 0) { 980 // Sign extend the last word to fill in the unused bits. 981 U.pVal[getNumWords() - 1] = SignExtend64( 982 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 983 984 // Fastpath for moving by whole words. 985 if (BitShift == 0) { 986 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 987 } else { 988 // Move the words containing significant bits. 989 for (unsigned i = 0; i != WordsToMove - 1; ++i) 990 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 991 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 992 993 // Handle the last word which has no high bits to copy. 994 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 995 // Sign extend one more time. 996 U.pVal[WordsToMove - 1] = 997 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 998 } 999 } 1000 1001 // Fill in the remainder based on the original sign. 1002 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 1003 WordShift * APINT_WORD_SIZE); 1004 clearUnusedBits(); 1005 } 1006 1007 /// Logical right-shift this APInt by shiftAmt. 1008 /// Logical right-shift function. 1009 void APInt::lshrInPlace(const APInt &shiftAmt) { 1010 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1011 } 1012 1013 /// Logical right-shift this APInt by shiftAmt. 1014 /// Logical right-shift function. 1015 void APInt::lshrSlowCase(unsigned ShiftAmt) { 1016 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 1017 } 1018 1019 /// Left-shift this APInt by shiftAmt. 1020 /// Left-shift function. 1021 APInt &APInt::operator<<=(const APInt &shiftAmt) { 1022 // It's undefined behavior in C to shift by BitWidth or greater. 1023 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 1024 return *this; 1025 } 1026 1027 void APInt::shlSlowCase(unsigned ShiftAmt) { 1028 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 1029 clearUnusedBits(); 1030 } 1031 1032 // Calculate the rotate amount modulo the bit width. 1033 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 1034 unsigned rotBitWidth = rotateAmt.getBitWidth(); 1035 APInt rot = rotateAmt; 1036 if (rotBitWidth < BitWidth) { 1037 // Extend the rotate APInt, so that the urem doesn't divide by 0. 1038 // e.g. APInt(1, 32) would give APInt(1, 0). 1039 rot = rotateAmt.zext(BitWidth); 1040 } 1041 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 1042 return rot.getLimitedValue(BitWidth); 1043 } 1044 1045 APInt APInt::rotl(const APInt &rotateAmt) const { 1046 return rotl(rotateModulo(BitWidth, rotateAmt)); 1047 } 1048 1049 APInt APInt::rotl(unsigned rotateAmt) const { 1050 rotateAmt %= BitWidth; 1051 if (rotateAmt == 0) 1052 return *this; 1053 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1054 } 1055 1056 APInt APInt::rotr(const APInt &rotateAmt) const { 1057 return rotr(rotateModulo(BitWidth, rotateAmt)); 1058 } 1059 1060 APInt APInt::rotr(unsigned rotateAmt) const { 1061 rotateAmt %= BitWidth; 1062 if (rotateAmt == 0) 1063 return *this; 1064 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1065 } 1066 1067 // Square Root - this method computes and returns the square root of "this". 1068 // Three mechanisms are used for computation. For small values (<= 5 bits), 1069 // a table lookup is done. This gets some performance for common cases. For 1070 // values using less than 52 bits, the value is converted to double and then 1071 // the libc sqrt function is called. The result is rounded and then converted 1072 // back to a uint64_t which is then used to construct the result. Finally, 1073 // the Babylonian method for computing square roots is used. 1074 APInt APInt::sqrt() const { 1075 1076 // Determine the magnitude of the value. 1077 unsigned magnitude = getActiveBits(); 1078 1079 // Use a fast table for some small values. This also gets rid of some 1080 // rounding errors in libc sqrt for small values. 1081 if (magnitude <= 5) { 1082 static const uint8_t results[32] = { 1083 /* 0 */ 0, 1084 /* 1- 2 */ 1, 1, 1085 /* 3- 6 */ 2, 2, 2, 2, 1086 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1087 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1088 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1089 /* 31 */ 6 1090 }; 1091 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1092 } 1093 1094 // If the magnitude of the value fits in less than 52 bits (the precision of 1095 // an IEEE double precision floating point value), then we can use the 1096 // libc sqrt function which will probably use a hardware sqrt computation. 1097 // This should be faster than the algorithm below. 1098 if (magnitude < 52) { 1099 return APInt(BitWidth, 1100 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1101 : U.pVal[0]))))); 1102 } 1103 1104 // Okay, all the short cuts are exhausted. We must compute it. The following 1105 // is a classical Babylonian method for computing the square root. This code 1106 // was adapted to APInt from a wikipedia article on such computations. 1107 // See http://www.wikipedia.org/ and go to the page named 1108 // Calculate_an_integer_square_root. 1109 unsigned nbits = BitWidth, i = 4; 1110 APInt testy(BitWidth, 16); 1111 APInt x_old(BitWidth, 1); 1112 APInt x_new(BitWidth, 0); 1113 APInt two(BitWidth, 2); 1114 1115 // Select a good starting value using binary logarithms. 1116 for (;; i += 2, testy = testy.shl(2)) 1117 if (i >= nbits || this->ule(testy)) { 1118 x_old = x_old.shl(i / 2); 1119 break; 1120 } 1121 1122 // Use the Babylonian method to arrive at the integer square root: 1123 for (;;) { 1124 x_new = (this->udiv(x_old) + x_old).udiv(two); 1125 if (x_old.ule(x_new)) 1126 break; 1127 x_old = x_new; 1128 } 1129 1130 // Make sure we return the closest approximation 1131 // NOTE: The rounding calculation below is correct. It will produce an 1132 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1133 // determined to be a rounding issue with pari/gp as it begins to use a 1134 // floating point representation after 192 bits. There are no discrepancies 1135 // between this algorithm and pari/gp for bit widths < 192 bits. 1136 APInt square(x_old * x_old); 1137 APInt nextSquare((x_old + 1) * (x_old +1)); 1138 if (this->ult(square)) 1139 return x_old; 1140 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1141 APInt midpoint((nextSquare - square).udiv(two)); 1142 APInt offset(*this - square); 1143 if (offset.ult(midpoint)) 1144 return x_old; 1145 return x_old + 1; 1146 } 1147 1148 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1149 /// iterative extended Euclidean algorithm is used to solve for this value, 1150 /// however we simplify it to speed up calculating only the inverse, and take 1151 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1152 /// (potentially large) APInts around. 1153 /// WARNING: a value of '0' may be returned, 1154 /// signifying that no multiplicative inverse exists! 1155 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1156 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1157 1158 // Using the properties listed at the following web page (accessed 06/21/08): 1159 // http://www.numbertheory.org/php/euclid.html 1160 // (especially the properties numbered 3, 4 and 9) it can be proved that 1161 // BitWidth bits suffice for all the computations in the algorithm implemented 1162 // below. More precisely, this number of bits suffice if the multiplicative 1163 // inverse exists, but may not suffice for the general extended Euclidean 1164 // algorithm. 1165 1166 APInt r[2] = { modulo, *this }; 1167 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1168 APInt q(BitWidth, 0); 1169 1170 unsigned i; 1171 for (i = 0; r[i^1] != 0; i ^= 1) { 1172 // An overview of the math without the confusing bit-flipping: 1173 // q = r[i-2] / r[i-1] 1174 // r[i] = r[i-2] % r[i-1] 1175 // t[i] = t[i-2] - t[i-1] * q 1176 udivrem(r[i], r[i^1], q, r[i]); 1177 t[i] -= t[i^1] * q; 1178 } 1179 1180 // If this APInt and the modulo are not coprime, there is no multiplicative 1181 // inverse, so return 0. We check this by looking at the next-to-last 1182 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1183 // algorithm. 1184 if (r[i] != 1) 1185 return APInt(BitWidth, 0); 1186 1187 // The next-to-last t is the multiplicative inverse. However, we are 1188 // interested in a positive inverse. Calculate a positive one from a negative 1189 // one if necessary. A simple addition of the modulo suffices because 1190 // abs(t[i]) is known to be less than *this/2 (see the link above). 1191 if (t[i].isNegative()) 1192 t[i] += modulo; 1193 1194 return std::move(t[i]); 1195 } 1196 1197 /// Calculate the magic numbers required to implement a signed integer division 1198 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1199 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1200 /// Warren, Jr., chapter 10. 1201 APInt::ms APInt::magic() const { 1202 const APInt& d = *this; 1203 unsigned p; 1204 APInt ad, anc, delta, q1, r1, q2, r2, t; 1205 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1206 struct ms mag; 1207 1208 ad = d.abs(); 1209 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1210 anc = t - 1 - t.urem(ad); // absolute value of nc 1211 p = d.getBitWidth() - 1; // initialize p 1212 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1213 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1214 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1215 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1216 do { 1217 p = p + 1; 1218 q1 = q1<<1; // update q1 = 2p/abs(nc) 1219 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1220 if (r1.uge(anc)) { // must be unsigned comparison 1221 q1 = q1 + 1; 1222 r1 = r1 - anc; 1223 } 1224 q2 = q2<<1; // update q2 = 2p/abs(d) 1225 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1226 if (r2.uge(ad)) { // must be unsigned comparison 1227 q2 = q2 + 1; 1228 r2 = r2 - ad; 1229 } 1230 delta = ad - r2; 1231 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1232 1233 mag.m = q2 + 1; 1234 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1235 mag.s = p - d.getBitWidth(); // resulting shift 1236 return mag; 1237 } 1238 1239 /// Calculate the magic numbers required to implement an unsigned integer 1240 /// division by a constant as a sequence of multiplies, adds and shifts. 1241 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1242 /// S. Warren, Jr., chapter 10. 1243 /// LeadingZeros can be used to simplify the calculation if the upper bits 1244 /// of the divided value are known zero. 1245 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1246 const APInt& d = *this; 1247 unsigned p; 1248 APInt nc, delta, q1, r1, q2, r2; 1249 struct mu magu; 1250 magu.a = 0; // initialize "add" indicator 1251 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1252 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1253 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1254 1255 nc = allOnes - (allOnes - d).urem(d); 1256 p = d.getBitWidth() - 1; // initialize p 1257 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1258 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1259 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1260 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1261 do { 1262 p = p + 1; 1263 if (r1.uge(nc - r1)) { 1264 q1 = q1 + q1 + 1; // update q1 1265 r1 = r1 + r1 - nc; // update r1 1266 } 1267 else { 1268 q1 = q1+q1; // update q1 1269 r1 = r1+r1; // update r1 1270 } 1271 if ((r2 + 1).uge(d - r2)) { 1272 if (q2.uge(signedMax)) magu.a = 1; 1273 q2 = q2+q2 + 1; // update q2 1274 r2 = r2+r2 + 1 - d; // update r2 1275 } 1276 else { 1277 if (q2.uge(signedMin)) magu.a = 1; 1278 q2 = q2+q2; // update q2 1279 r2 = r2+r2 + 1; // update r2 1280 } 1281 delta = d - 1 - r2; 1282 } while (p < d.getBitWidth()*2 && 1283 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1284 magu.m = q2 + 1; // resulting magic number 1285 magu.s = p - d.getBitWidth(); // resulting shift 1286 return magu; 1287 } 1288 1289 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1290 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1291 /// variables here have the same names as in the algorithm. Comments explain 1292 /// the algorithm and any deviation from it. 1293 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1294 unsigned m, unsigned n) { 1295 assert(u && "Must provide dividend"); 1296 assert(v && "Must provide divisor"); 1297 assert(q && "Must provide quotient"); 1298 assert(u != v && u != q && v != q && "Must use different memory"); 1299 assert(n>1 && "n must be > 1"); 1300 1301 // b denotes the base of the number system. In our case b is 2^32. 1302 const uint64_t b = uint64_t(1) << 32; 1303 1304 // The DEBUG macros here tend to be spam in the debug output if you're not 1305 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1306 #ifdef KNUTH_DEBUG 1307 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1308 #else 1309 #define DEBUG_KNUTH(X) do {} while(false) 1310 #endif 1311 1312 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1313 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1314 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1315 DEBUG_KNUTH(dbgs() << " by"); 1316 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1317 DEBUG_KNUTH(dbgs() << '\n'); 1318 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1319 // u and v by d. Note that we have taken Knuth's advice here to use a power 1320 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1321 // 2 allows us to shift instead of multiply and it is easy to determine the 1322 // shift amount from the leading zeros. We are basically normalizing the u 1323 // and v so that its high bits are shifted to the top of v's range without 1324 // overflow. Note that this can require an extra word in u so that u must 1325 // be of length m+n+1. 1326 unsigned shift = countLeadingZeros(v[n-1]); 1327 uint32_t v_carry = 0; 1328 uint32_t u_carry = 0; 1329 if (shift) { 1330 for (unsigned i = 0; i < m+n; ++i) { 1331 uint32_t u_tmp = u[i] >> (32 - shift); 1332 u[i] = (u[i] << shift) | u_carry; 1333 u_carry = u_tmp; 1334 } 1335 for (unsigned i = 0; i < n; ++i) { 1336 uint32_t v_tmp = v[i] >> (32 - shift); 1337 v[i] = (v[i] << shift) | v_carry; 1338 v_carry = v_tmp; 1339 } 1340 } 1341 u[m+n] = u_carry; 1342 1343 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1344 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1345 DEBUG_KNUTH(dbgs() << " by"); 1346 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1347 DEBUG_KNUTH(dbgs() << '\n'); 1348 1349 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1350 int j = m; 1351 do { 1352 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1353 // D3. [Calculate q'.]. 1354 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1355 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1356 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1357 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1358 // on v[n-2] determines at high speed most of the cases in which the trial 1359 // value qp is one too large, and it eliminates all cases where qp is two 1360 // too large. 1361 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1362 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1363 uint64_t qp = dividend / v[n-1]; 1364 uint64_t rp = dividend % v[n-1]; 1365 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1366 qp--; 1367 rp += v[n-1]; 1368 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1369 qp--; 1370 } 1371 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1372 1373 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1374 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1375 // consists of a simple multiplication by a one-place number, combined with 1376 // a subtraction. 1377 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1378 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1379 // true value plus b**(n+1), namely as the b's complement of 1380 // the true value, and a "borrow" to the left should be remembered. 1381 int64_t borrow = 0; 1382 for (unsigned i = 0; i < n; ++i) { 1383 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1384 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1385 u[j+i] = Lo_32(subres); 1386 borrow = Hi_32(p) - Hi_32(subres); 1387 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1388 << ", borrow = " << borrow << '\n'); 1389 } 1390 bool isNeg = u[j+n] < borrow; 1391 u[j+n] -= Lo_32(borrow); 1392 1393 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1394 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1395 DEBUG_KNUTH(dbgs() << '\n'); 1396 1397 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1398 // negative, go to step D6; otherwise go on to step D7. 1399 q[j] = Lo_32(qp); 1400 if (isNeg) { 1401 // D6. [Add back]. The probability that this step is necessary is very 1402 // small, on the order of only 2/b. Make sure that test data accounts for 1403 // this possibility. Decrease q[j] by 1 1404 q[j]--; 1405 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1406 // A carry will occur to the left of u[j+n], and it should be ignored 1407 // since it cancels with the borrow that occurred in D4. 1408 bool carry = false; 1409 for (unsigned i = 0; i < n; i++) { 1410 uint32_t limit = std::min(u[j+i],v[i]); 1411 u[j+i] += v[i] + carry; 1412 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1413 } 1414 u[j+n] += carry; 1415 } 1416 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1417 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1418 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1419 1420 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1421 } while (--j >= 0); 1422 1423 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1424 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1425 DEBUG_KNUTH(dbgs() << '\n'); 1426 1427 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1428 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1429 // compute the remainder (urem uses this). 1430 if (r) { 1431 // The value d is expressed by the "shift" value above since we avoided 1432 // multiplication by d by using a shift left. So, all we have to do is 1433 // shift right here. 1434 if (shift) { 1435 uint32_t carry = 0; 1436 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1437 for (int i = n-1; i >= 0; i--) { 1438 r[i] = (u[i] >> shift) | carry; 1439 carry = u[i] << (32 - shift); 1440 DEBUG_KNUTH(dbgs() << " " << r[i]); 1441 } 1442 } else { 1443 for (int i = n-1; i >= 0; i--) { 1444 r[i] = u[i]; 1445 DEBUG_KNUTH(dbgs() << " " << r[i]); 1446 } 1447 } 1448 DEBUG_KNUTH(dbgs() << '\n'); 1449 } 1450 DEBUG_KNUTH(dbgs() << '\n'); 1451 } 1452 1453 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1454 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1455 assert(lhsWords >= rhsWords && "Fractional result"); 1456 1457 // First, compose the values into an array of 32-bit words instead of 1458 // 64-bit words. This is a necessity of both the "short division" algorithm 1459 // and the Knuth "classical algorithm" which requires there to be native 1460 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1461 // can't use 64-bit operands here because we don't have native results of 1462 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1463 // work on large-endian machines. 1464 unsigned n = rhsWords * 2; 1465 unsigned m = (lhsWords * 2) - n; 1466 1467 // Allocate space for the temporary values we need either on the stack, if 1468 // it will fit, or on the heap if it won't. 1469 uint32_t SPACE[128]; 1470 uint32_t *U = nullptr; 1471 uint32_t *V = nullptr; 1472 uint32_t *Q = nullptr; 1473 uint32_t *R = nullptr; 1474 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1475 U = &SPACE[0]; 1476 V = &SPACE[m+n+1]; 1477 Q = &SPACE[(m+n+1) + n]; 1478 if (Remainder) 1479 R = &SPACE[(m+n+1) + n + (m+n)]; 1480 } else { 1481 U = new uint32_t[m + n + 1]; 1482 V = new uint32_t[n]; 1483 Q = new uint32_t[m+n]; 1484 if (Remainder) 1485 R = new uint32_t[n]; 1486 } 1487 1488 // Initialize the dividend 1489 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1490 for (unsigned i = 0; i < lhsWords; ++i) { 1491 uint64_t tmp = LHS[i]; 1492 U[i * 2] = Lo_32(tmp); 1493 U[i * 2 + 1] = Hi_32(tmp); 1494 } 1495 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1496 1497 // Initialize the divisor 1498 memset(V, 0, (n)*sizeof(uint32_t)); 1499 for (unsigned i = 0; i < rhsWords; ++i) { 1500 uint64_t tmp = RHS[i]; 1501 V[i * 2] = Lo_32(tmp); 1502 V[i * 2 + 1] = Hi_32(tmp); 1503 } 1504 1505 // initialize the quotient and remainder 1506 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1507 if (Remainder) 1508 memset(R, 0, n * sizeof(uint32_t)); 1509 1510 // Now, adjust m and n for the Knuth division. n is the number of words in 1511 // the divisor. m is the number of words by which the dividend exceeds the 1512 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1513 // contain any zero words or the Knuth algorithm fails. 1514 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1515 n--; 1516 m++; 1517 } 1518 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1519 m--; 1520 1521 // If we're left with only a single word for the divisor, Knuth doesn't work 1522 // so we implement the short division algorithm here. This is much simpler 1523 // and faster because we are certain that we can divide a 64-bit quantity 1524 // by a 32-bit quantity at hardware speed and short division is simply a 1525 // series of such operations. This is just like doing short division but we 1526 // are using base 2^32 instead of base 10. 1527 assert(n != 0 && "Divide by zero?"); 1528 if (n == 1) { 1529 uint32_t divisor = V[0]; 1530 uint32_t remainder = 0; 1531 for (int i = m; i >= 0; i--) { 1532 uint64_t partial_dividend = Make_64(remainder, U[i]); 1533 if (partial_dividend == 0) { 1534 Q[i] = 0; 1535 remainder = 0; 1536 } else if (partial_dividend < divisor) { 1537 Q[i] = 0; 1538 remainder = Lo_32(partial_dividend); 1539 } else if (partial_dividend == divisor) { 1540 Q[i] = 1; 1541 remainder = 0; 1542 } else { 1543 Q[i] = Lo_32(partial_dividend / divisor); 1544 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1545 } 1546 } 1547 if (R) 1548 R[0] = remainder; 1549 } else { 1550 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1551 // case n > 1. 1552 KnuthDiv(U, V, Q, R, m, n); 1553 } 1554 1555 // If the caller wants the quotient 1556 if (Quotient) { 1557 for (unsigned i = 0; i < lhsWords; ++i) 1558 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1559 } 1560 1561 // If the caller wants the remainder 1562 if (Remainder) { 1563 for (unsigned i = 0; i < rhsWords; ++i) 1564 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1565 } 1566 1567 // Clean up the memory we allocated. 1568 if (U != &SPACE[0]) { 1569 delete [] U; 1570 delete [] V; 1571 delete [] Q; 1572 delete [] R; 1573 } 1574 } 1575 1576 APInt APInt::udiv(const APInt &RHS) const { 1577 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1578 1579 // First, deal with the easy case 1580 if (isSingleWord()) { 1581 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1582 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1583 } 1584 1585 // Get some facts about the LHS and RHS number of bits and words 1586 unsigned lhsWords = getNumWords(getActiveBits()); 1587 unsigned rhsBits = RHS.getActiveBits(); 1588 unsigned rhsWords = getNumWords(rhsBits); 1589 assert(rhsWords && "Divided by zero???"); 1590 1591 // Deal with some degenerate cases 1592 if (!lhsWords) 1593 // 0 / X ===> 0 1594 return APInt(BitWidth, 0); 1595 if (rhsBits == 1) 1596 // X / 1 ===> X 1597 return *this; 1598 if (lhsWords < rhsWords || this->ult(RHS)) 1599 // X / Y ===> 0, iff X < Y 1600 return APInt(BitWidth, 0); 1601 if (*this == RHS) 1602 // X / X ===> 1 1603 return APInt(BitWidth, 1); 1604 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1605 // All high words are zero, just use native divide 1606 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1607 1608 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1609 APInt Quotient(BitWidth, 0); // to hold result. 1610 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1611 return Quotient; 1612 } 1613 1614 APInt APInt::udiv(uint64_t RHS) const { 1615 assert(RHS != 0 && "Divide by zero?"); 1616 1617 // First, deal with the easy case 1618 if (isSingleWord()) 1619 return APInt(BitWidth, U.VAL / RHS); 1620 1621 // Get some facts about the LHS words. 1622 unsigned lhsWords = getNumWords(getActiveBits()); 1623 1624 // Deal with some degenerate cases 1625 if (!lhsWords) 1626 // 0 / X ===> 0 1627 return APInt(BitWidth, 0); 1628 if (RHS == 1) 1629 // X / 1 ===> X 1630 return *this; 1631 if (this->ult(RHS)) 1632 // X / Y ===> 0, iff X < Y 1633 return APInt(BitWidth, 0); 1634 if (*this == RHS) 1635 // X / X ===> 1 1636 return APInt(BitWidth, 1); 1637 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1638 // All high words are zero, just use native divide 1639 return APInt(BitWidth, this->U.pVal[0] / RHS); 1640 1641 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1642 APInt Quotient(BitWidth, 0); // to hold result. 1643 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1644 return Quotient; 1645 } 1646 1647 APInt APInt::sdiv(const APInt &RHS) const { 1648 if (isNegative()) { 1649 if (RHS.isNegative()) 1650 return (-(*this)).udiv(-RHS); 1651 return -((-(*this)).udiv(RHS)); 1652 } 1653 if (RHS.isNegative()) 1654 return -(this->udiv(-RHS)); 1655 return this->udiv(RHS); 1656 } 1657 1658 APInt APInt::sdiv(int64_t RHS) const { 1659 if (isNegative()) { 1660 if (RHS < 0) 1661 return (-(*this)).udiv(-RHS); 1662 return -((-(*this)).udiv(RHS)); 1663 } 1664 if (RHS < 0) 1665 return -(this->udiv(-RHS)); 1666 return this->udiv(RHS); 1667 } 1668 1669 APInt APInt::urem(const APInt &RHS) const { 1670 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1671 if (isSingleWord()) { 1672 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1673 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1674 } 1675 1676 // Get some facts about the LHS 1677 unsigned lhsWords = getNumWords(getActiveBits()); 1678 1679 // Get some facts about the RHS 1680 unsigned rhsBits = RHS.getActiveBits(); 1681 unsigned rhsWords = getNumWords(rhsBits); 1682 assert(rhsWords && "Performing remainder operation by zero ???"); 1683 1684 // Check the degenerate cases 1685 if (lhsWords == 0) 1686 // 0 % Y ===> 0 1687 return APInt(BitWidth, 0); 1688 if (rhsBits == 1) 1689 // X % 1 ===> 0 1690 return APInt(BitWidth, 0); 1691 if (lhsWords < rhsWords || this->ult(RHS)) 1692 // X % Y ===> X, iff X < Y 1693 return *this; 1694 if (*this == RHS) 1695 // X % X == 0; 1696 return APInt(BitWidth, 0); 1697 if (lhsWords == 1) 1698 // All high words are zero, just use native remainder 1699 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1700 1701 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1702 APInt Remainder(BitWidth, 0); 1703 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1704 return Remainder; 1705 } 1706 1707 uint64_t APInt::urem(uint64_t RHS) const { 1708 assert(RHS != 0 && "Remainder by zero?"); 1709 1710 if (isSingleWord()) 1711 return U.VAL % RHS; 1712 1713 // Get some facts about the LHS 1714 unsigned lhsWords = getNumWords(getActiveBits()); 1715 1716 // Check the degenerate cases 1717 if (lhsWords == 0) 1718 // 0 % Y ===> 0 1719 return 0; 1720 if (RHS == 1) 1721 // X % 1 ===> 0 1722 return 0; 1723 if (this->ult(RHS)) 1724 // X % Y ===> X, iff X < Y 1725 return getZExtValue(); 1726 if (*this == RHS) 1727 // X % X == 0; 1728 return 0; 1729 if (lhsWords == 1) 1730 // All high words are zero, just use native remainder 1731 return U.pVal[0] % RHS; 1732 1733 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1734 uint64_t Remainder; 1735 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1736 return Remainder; 1737 } 1738 1739 APInt APInt::srem(const APInt &RHS) const { 1740 if (isNegative()) { 1741 if (RHS.isNegative()) 1742 return -((-(*this)).urem(-RHS)); 1743 return -((-(*this)).urem(RHS)); 1744 } 1745 if (RHS.isNegative()) 1746 return this->urem(-RHS); 1747 return this->urem(RHS); 1748 } 1749 1750 int64_t APInt::srem(int64_t RHS) const { 1751 if (isNegative()) { 1752 if (RHS < 0) 1753 return -((-(*this)).urem(-RHS)); 1754 return -((-(*this)).urem(RHS)); 1755 } 1756 if (RHS < 0) 1757 return this->urem(-RHS); 1758 return this->urem(RHS); 1759 } 1760 1761 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1762 APInt &Quotient, APInt &Remainder) { 1763 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1764 unsigned BitWidth = LHS.BitWidth; 1765 1766 // First, deal with the easy case 1767 if (LHS.isSingleWord()) { 1768 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1769 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1770 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1771 Quotient = APInt(BitWidth, QuotVal); 1772 Remainder = APInt(BitWidth, RemVal); 1773 return; 1774 } 1775 1776 // Get some size facts about the dividend and divisor 1777 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1778 unsigned rhsBits = RHS.getActiveBits(); 1779 unsigned rhsWords = getNumWords(rhsBits); 1780 assert(rhsWords && "Performing divrem operation by zero ???"); 1781 1782 // Check the degenerate cases 1783 if (lhsWords == 0) { 1784 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1785 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1786 return; 1787 } 1788 1789 if (rhsBits == 1) { 1790 Quotient = LHS; // X / 1 ===> X 1791 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1792 } 1793 1794 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1795 Remainder = LHS; // X % Y ===> X, iff X < Y 1796 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1797 return; 1798 } 1799 1800 if (LHS == RHS) { 1801 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1802 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1803 return; 1804 } 1805 1806 // Make sure there is enough space to hold the results. 1807 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1808 // change the size. This is necessary if Quotient or Remainder is aliased 1809 // with LHS or RHS. 1810 Quotient.reallocate(BitWidth); 1811 Remainder.reallocate(BitWidth); 1812 1813 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1814 // There is only one word to consider so use the native versions. 1815 uint64_t lhsValue = LHS.U.pVal[0]; 1816 uint64_t rhsValue = RHS.U.pVal[0]; 1817 Quotient = lhsValue / rhsValue; 1818 Remainder = lhsValue % rhsValue; 1819 return; 1820 } 1821 1822 // Okay, lets do it the long way 1823 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1824 Remainder.U.pVal); 1825 // Clear the rest of the Quotient and Remainder. 1826 std::memset(Quotient.U.pVal + lhsWords, 0, 1827 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1828 std::memset(Remainder.U.pVal + rhsWords, 0, 1829 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1830 } 1831 1832 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1833 uint64_t &Remainder) { 1834 assert(RHS != 0 && "Divide by zero?"); 1835 unsigned BitWidth = LHS.BitWidth; 1836 1837 // First, deal with the easy case 1838 if (LHS.isSingleWord()) { 1839 uint64_t QuotVal = LHS.U.VAL / RHS; 1840 Remainder = LHS.U.VAL % RHS; 1841 Quotient = APInt(BitWidth, QuotVal); 1842 return; 1843 } 1844 1845 // Get some size facts about the dividend and divisor 1846 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1847 1848 // Check the degenerate cases 1849 if (lhsWords == 0) { 1850 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1851 Remainder = 0; // 0 % Y ===> 0 1852 return; 1853 } 1854 1855 if (RHS == 1) { 1856 Quotient = LHS; // X / 1 ===> X 1857 Remainder = 0; // X % 1 ===> 0 1858 return; 1859 } 1860 1861 if (LHS.ult(RHS)) { 1862 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1863 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1864 return; 1865 } 1866 1867 if (LHS == RHS) { 1868 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1869 Remainder = 0; // X % X ===> 0; 1870 return; 1871 } 1872 1873 // Make sure there is enough space to hold the results. 1874 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1875 // change the size. This is necessary if Quotient is aliased with LHS. 1876 Quotient.reallocate(BitWidth); 1877 1878 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1879 // There is only one word to consider so use the native versions. 1880 uint64_t lhsValue = LHS.U.pVal[0]; 1881 Quotient = lhsValue / RHS; 1882 Remainder = lhsValue % RHS; 1883 return; 1884 } 1885 1886 // Okay, lets do it the long way 1887 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1888 // Clear the rest of the Quotient. 1889 std::memset(Quotient.U.pVal + lhsWords, 0, 1890 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1891 } 1892 1893 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1894 APInt &Quotient, APInt &Remainder) { 1895 if (LHS.isNegative()) { 1896 if (RHS.isNegative()) 1897 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1898 else { 1899 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1900 Quotient.negate(); 1901 } 1902 Remainder.negate(); 1903 } else if (RHS.isNegative()) { 1904 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1905 Quotient.negate(); 1906 } else { 1907 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1908 } 1909 } 1910 1911 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1912 APInt &Quotient, int64_t &Remainder) { 1913 uint64_t R = Remainder; 1914 if (LHS.isNegative()) { 1915 if (RHS < 0) 1916 APInt::udivrem(-LHS, -RHS, Quotient, R); 1917 else { 1918 APInt::udivrem(-LHS, RHS, Quotient, R); 1919 Quotient.negate(); 1920 } 1921 R = -R; 1922 } else if (RHS < 0) { 1923 APInt::udivrem(LHS, -RHS, Quotient, R); 1924 Quotient.negate(); 1925 } else { 1926 APInt::udivrem(LHS, RHS, Quotient, R); 1927 } 1928 Remainder = R; 1929 } 1930 1931 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1932 APInt Res = *this+RHS; 1933 Overflow = isNonNegative() == RHS.isNonNegative() && 1934 Res.isNonNegative() != isNonNegative(); 1935 return Res; 1936 } 1937 1938 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1939 APInt Res = *this+RHS; 1940 Overflow = Res.ult(RHS); 1941 return Res; 1942 } 1943 1944 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1945 APInt Res = *this - RHS; 1946 Overflow = isNonNegative() != RHS.isNonNegative() && 1947 Res.isNonNegative() != isNonNegative(); 1948 return Res; 1949 } 1950 1951 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1952 APInt Res = *this-RHS; 1953 Overflow = Res.ugt(*this); 1954 return Res; 1955 } 1956 1957 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1958 // MININT/-1 --> overflow. 1959 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1960 return sdiv(RHS); 1961 } 1962 1963 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1964 APInt Res = *this * RHS; 1965 1966 if (*this != 0 && RHS != 0) 1967 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1968 else 1969 Overflow = false; 1970 return Res; 1971 } 1972 1973 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1974 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { 1975 Overflow = true; 1976 return *this * RHS; 1977 } 1978 1979 APInt Res = lshr(1) * RHS; 1980 Overflow = Res.isNegative(); 1981 Res <<= 1; 1982 if ((*this)[0]) { 1983 Res += RHS; 1984 if (Res.ult(RHS)) 1985 Overflow = true; 1986 } 1987 return Res; 1988 } 1989 1990 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 1991 Overflow = ShAmt.uge(getBitWidth()); 1992 if (Overflow) 1993 return APInt(BitWidth, 0); 1994 1995 if (isNonNegative()) // Don't allow sign change. 1996 Overflow = ShAmt.uge(countLeadingZeros()); 1997 else 1998 Overflow = ShAmt.uge(countLeadingOnes()); 1999 2000 return *this << ShAmt; 2001 } 2002 2003 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 2004 Overflow = ShAmt.uge(getBitWidth()); 2005 if (Overflow) 2006 return APInt(BitWidth, 0); 2007 2008 Overflow = ShAmt.ugt(countLeadingZeros()); 2009 2010 return *this << ShAmt; 2011 } 2012 2013 APInt APInt::sadd_sat(const APInt &RHS) const { 2014 bool Overflow; 2015 APInt Res = sadd_ov(RHS, Overflow); 2016 if (!Overflow) 2017 return Res; 2018 2019 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2020 : APInt::getSignedMaxValue(BitWidth); 2021 } 2022 2023 APInt APInt::uadd_sat(const APInt &RHS) const { 2024 bool Overflow; 2025 APInt Res = uadd_ov(RHS, Overflow); 2026 if (!Overflow) 2027 return Res; 2028 2029 return APInt::getMaxValue(BitWidth); 2030 } 2031 2032 APInt APInt::ssub_sat(const APInt &RHS) const { 2033 bool Overflow; 2034 APInt Res = ssub_ov(RHS, Overflow); 2035 if (!Overflow) 2036 return Res; 2037 2038 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2039 : APInt::getSignedMaxValue(BitWidth); 2040 } 2041 2042 APInt APInt::usub_sat(const APInt &RHS) const { 2043 bool Overflow; 2044 APInt Res = usub_ov(RHS, Overflow); 2045 if (!Overflow) 2046 return Res; 2047 2048 return APInt(BitWidth, 0); 2049 } 2050 2051 APInt APInt::smul_sat(const APInt &RHS) const { 2052 bool Overflow; 2053 APInt Res = smul_ov(RHS, Overflow); 2054 if (!Overflow) 2055 return Res; 2056 2057 // The result is negative if one and only one of inputs is negative. 2058 bool ResIsNegative = isNegative() ^ RHS.isNegative(); 2059 2060 return ResIsNegative ? APInt::getSignedMinValue(BitWidth) 2061 : APInt::getSignedMaxValue(BitWidth); 2062 } 2063 2064 APInt APInt::umul_sat(const APInt &RHS) const { 2065 bool Overflow; 2066 APInt Res = umul_ov(RHS, Overflow); 2067 if (!Overflow) 2068 return Res; 2069 2070 return APInt::getMaxValue(BitWidth); 2071 } 2072 2073 APInt APInt::sshl_sat(const APInt &RHS) const { 2074 bool Overflow; 2075 APInt Res = sshl_ov(RHS, Overflow); 2076 if (!Overflow) 2077 return Res; 2078 2079 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2080 : APInt::getSignedMaxValue(BitWidth); 2081 } 2082 2083 APInt APInt::ushl_sat(const APInt &RHS) const { 2084 bool Overflow; 2085 APInt Res = ushl_ov(RHS, Overflow); 2086 if (!Overflow) 2087 return Res; 2088 2089 return APInt::getMaxValue(BitWidth); 2090 } 2091 2092 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 2093 // Check our assumptions here 2094 assert(!str.empty() && "Invalid string length"); 2095 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 2096 radix == 36) && 2097 "Radix should be 2, 8, 10, 16, or 36!"); 2098 2099 StringRef::iterator p = str.begin(); 2100 size_t slen = str.size(); 2101 bool isNeg = *p == '-'; 2102 if (*p == '-' || *p == '+') { 2103 p++; 2104 slen--; 2105 assert(slen && "String is only a sign, needs a value."); 2106 } 2107 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2108 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2109 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2110 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2111 "Insufficient bit width"); 2112 2113 // Allocate memory if needed 2114 if (isSingleWord()) 2115 U.VAL = 0; 2116 else 2117 U.pVal = getClearedMemory(getNumWords()); 2118 2119 // Figure out if we can shift instead of multiply 2120 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2121 2122 // Enter digit traversal loop 2123 for (StringRef::iterator e = str.end(); p != e; ++p) { 2124 unsigned digit = getDigit(*p, radix); 2125 assert(digit < radix && "Invalid character in digit string"); 2126 2127 // Shift or multiply the value by the radix 2128 if (slen > 1) { 2129 if (shift) 2130 *this <<= shift; 2131 else 2132 *this *= radix; 2133 } 2134 2135 // Add in the digit we just interpreted 2136 *this += digit; 2137 } 2138 // If its negative, put it in two's complement form 2139 if (isNeg) 2140 this->negate(); 2141 } 2142 2143 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2144 bool Signed, bool formatAsCLiteral) const { 2145 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2146 Radix == 36) && 2147 "Radix should be 2, 8, 10, 16, or 36!"); 2148 2149 const char *Prefix = ""; 2150 if (formatAsCLiteral) { 2151 switch (Radix) { 2152 case 2: 2153 // Binary literals are a non-standard extension added in gcc 4.3: 2154 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2155 Prefix = "0b"; 2156 break; 2157 case 8: 2158 Prefix = "0"; 2159 break; 2160 case 10: 2161 break; // No prefix 2162 case 16: 2163 Prefix = "0x"; 2164 break; 2165 default: 2166 llvm_unreachable("Invalid radix!"); 2167 } 2168 } 2169 2170 // First, check for a zero value and just short circuit the logic below. 2171 if (*this == 0) { 2172 while (*Prefix) { 2173 Str.push_back(*Prefix); 2174 ++Prefix; 2175 }; 2176 Str.push_back('0'); 2177 return; 2178 } 2179 2180 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2181 2182 if (isSingleWord()) { 2183 char Buffer[65]; 2184 char *BufPtr = std::end(Buffer); 2185 2186 uint64_t N; 2187 if (!Signed) { 2188 N = getZExtValue(); 2189 } else { 2190 int64_t I = getSExtValue(); 2191 if (I >= 0) { 2192 N = I; 2193 } else { 2194 Str.push_back('-'); 2195 N = -(uint64_t)I; 2196 } 2197 } 2198 2199 while (*Prefix) { 2200 Str.push_back(*Prefix); 2201 ++Prefix; 2202 }; 2203 2204 while (N) { 2205 *--BufPtr = Digits[N % Radix]; 2206 N /= Radix; 2207 } 2208 Str.append(BufPtr, std::end(Buffer)); 2209 return; 2210 } 2211 2212 APInt Tmp(*this); 2213 2214 if (Signed && isNegative()) { 2215 // They want to print the signed version and it is a negative value 2216 // Flip the bits and add one to turn it into the equivalent positive 2217 // value and put a '-' in the result. 2218 Tmp.negate(); 2219 Str.push_back('-'); 2220 } 2221 2222 while (*Prefix) { 2223 Str.push_back(*Prefix); 2224 ++Prefix; 2225 }; 2226 2227 // We insert the digits backward, then reverse them to get the right order. 2228 unsigned StartDig = Str.size(); 2229 2230 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2231 // because the number of bits per digit (1, 3 and 4 respectively) divides 2232 // equally. We just shift until the value is zero. 2233 if (Radix == 2 || Radix == 8 || Radix == 16) { 2234 // Just shift tmp right for each digit width until it becomes zero 2235 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2236 unsigned MaskAmt = Radix - 1; 2237 2238 while (Tmp.getBoolValue()) { 2239 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2240 Str.push_back(Digits[Digit]); 2241 Tmp.lshrInPlace(ShiftAmt); 2242 } 2243 } else { 2244 while (Tmp.getBoolValue()) { 2245 uint64_t Digit; 2246 udivrem(Tmp, Radix, Tmp, Digit); 2247 assert(Digit < Radix && "divide failed"); 2248 Str.push_back(Digits[Digit]); 2249 } 2250 } 2251 2252 // Reverse the digits before returning. 2253 std::reverse(Str.begin()+StartDig, Str.end()); 2254 } 2255 2256 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2257 /// It is better to pass in a SmallVector/SmallString to the methods above. 2258 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2259 SmallString<40> S; 2260 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2261 return S.str(); 2262 } 2263 2264 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2265 LLVM_DUMP_METHOD void APInt::dump() const { 2266 SmallString<40> S, U; 2267 this->toStringUnsigned(U); 2268 this->toStringSigned(S); 2269 dbgs() << "APInt(" << BitWidth << "b, " 2270 << U << "u " << S << "s)\n"; 2271 } 2272 #endif 2273 2274 void APInt::print(raw_ostream &OS, bool isSigned) const { 2275 SmallString<40> S; 2276 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2277 OS << S; 2278 } 2279 2280 // This implements a variety of operations on a representation of 2281 // arbitrary precision, two's-complement, bignum integer values. 2282 2283 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2284 // and unrestricting assumption. 2285 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2286 "Part width must be divisible by 2!"); 2287 2288 /* Some handy functions local to this file. */ 2289 2290 /* Returns the integer part with the least significant BITS set. 2291 BITS cannot be zero. */ 2292 static inline APInt::WordType lowBitMask(unsigned bits) { 2293 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2294 2295 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2296 } 2297 2298 /* Returns the value of the lower half of PART. */ 2299 static inline APInt::WordType lowHalf(APInt::WordType part) { 2300 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2301 } 2302 2303 /* Returns the value of the upper half of PART. */ 2304 static inline APInt::WordType highHalf(APInt::WordType part) { 2305 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2306 } 2307 2308 /* Returns the bit number of the most significant set bit of a part. 2309 If the input number has no bits set -1U is returned. */ 2310 static unsigned partMSB(APInt::WordType value) { 2311 return findLastSet(value, ZB_Max); 2312 } 2313 2314 /* Returns the bit number of the least significant set bit of a 2315 part. If the input number has no bits set -1U is returned. */ 2316 static unsigned partLSB(APInt::WordType value) { 2317 return findFirstSet(value, ZB_Max); 2318 } 2319 2320 /* Sets the least significant part of a bignum to the input value, and 2321 zeroes out higher parts. */ 2322 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2323 assert(parts > 0); 2324 2325 dst[0] = part; 2326 for (unsigned i = 1; i < parts; i++) 2327 dst[i] = 0; 2328 } 2329 2330 /* Assign one bignum to another. */ 2331 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2332 for (unsigned i = 0; i < parts; i++) 2333 dst[i] = src[i]; 2334 } 2335 2336 /* Returns true if a bignum is zero, false otherwise. */ 2337 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2338 for (unsigned i = 0; i < parts; i++) 2339 if (src[i]) 2340 return false; 2341 2342 return true; 2343 } 2344 2345 /* Extract the given bit of a bignum; returns 0 or 1. */ 2346 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2347 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2348 } 2349 2350 /* Set the given bit of a bignum. */ 2351 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2352 parts[whichWord(bit)] |= maskBit(bit); 2353 } 2354 2355 /* Clears the given bit of a bignum. */ 2356 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2357 parts[whichWord(bit)] &= ~maskBit(bit); 2358 } 2359 2360 /* Returns the bit number of the least significant set bit of a 2361 number. If the input number has no bits set -1U is returned. */ 2362 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2363 for (unsigned i = 0; i < n; i++) { 2364 if (parts[i] != 0) { 2365 unsigned lsb = partLSB(parts[i]); 2366 2367 return lsb + i * APINT_BITS_PER_WORD; 2368 } 2369 } 2370 2371 return -1U; 2372 } 2373 2374 /* Returns the bit number of the most significant set bit of a number. 2375 If the input number has no bits set -1U is returned. */ 2376 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2377 do { 2378 --n; 2379 2380 if (parts[n] != 0) { 2381 unsigned msb = partMSB(parts[n]); 2382 2383 return msb + n * APINT_BITS_PER_WORD; 2384 } 2385 } while (n); 2386 2387 return -1U; 2388 } 2389 2390 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2391 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2392 the least significant bit of DST. All high bits above srcBITS in 2393 DST are zero-filled. */ 2394 void 2395 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2396 unsigned srcBits, unsigned srcLSB) { 2397 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2398 assert(dstParts <= dstCount); 2399 2400 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2401 tcAssign (dst, src + firstSrcPart, dstParts); 2402 2403 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2404 tcShiftRight (dst, dstParts, shift); 2405 2406 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2407 in DST. If this is less that srcBits, append the rest, else 2408 clear the high bits. */ 2409 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2410 if (n < srcBits) { 2411 WordType mask = lowBitMask (srcBits - n); 2412 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2413 << n % APINT_BITS_PER_WORD); 2414 } else if (n > srcBits) { 2415 if (srcBits % APINT_BITS_PER_WORD) 2416 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2417 } 2418 2419 /* Clear high parts. */ 2420 while (dstParts < dstCount) 2421 dst[dstParts++] = 0; 2422 } 2423 2424 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2425 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2426 WordType c, unsigned parts) { 2427 assert(c <= 1); 2428 2429 for (unsigned i = 0; i < parts; i++) { 2430 WordType l = dst[i]; 2431 if (c) { 2432 dst[i] += rhs[i] + 1; 2433 c = (dst[i] <= l); 2434 } else { 2435 dst[i] += rhs[i]; 2436 c = (dst[i] < l); 2437 } 2438 } 2439 2440 return c; 2441 } 2442 2443 /// This function adds a single "word" integer, src, to the multiple 2444 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2445 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2446 /// @returns the carry of the addition. 2447 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2448 unsigned parts) { 2449 for (unsigned i = 0; i < parts; ++i) { 2450 dst[i] += src; 2451 if (dst[i] >= src) 2452 return 0; // No need to carry so exit early. 2453 src = 1; // Carry one to next digit. 2454 } 2455 2456 return 1; 2457 } 2458 2459 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2460 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2461 WordType c, unsigned parts) { 2462 assert(c <= 1); 2463 2464 for (unsigned i = 0; i < parts; i++) { 2465 WordType l = dst[i]; 2466 if (c) { 2467 dst[i] -= rhs[i] + 1; 2468 c = (dst[i] >= l); 2469 } else { 2470 dst[i] -= rhs[i]; 2471 c = (dst[i] > l); 2472 } 2473 } 2474 2475 return c; 2476 } 2477 2478 /// This function subtracts a single "word" (64-bit word), src, from 2479 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2480 /// no further borrowing is needed or it runs out of "words" in dst. The result 2481 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2482 /// exhausted. In other words, if src > dst then this function returns 1, 2483 /// otherwise 0. 2484 /// @returns the borrow out of the subtraction 2485 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2486 unsigned parts) { 2487 for (unsigned i = 0; i < parts; ++i) { 2488 WordType Dst = dst[i]; 2489 dst[i] -= src; 2490 if (src <= Dst) 2491 return 0; // No need to borrow so exit early. 2492 src = 1; // We have to "borrow 1" from next "word" 2493 } 2494 2495 return 1; 2496 } 2497 2498 /* Negate a bignum in-place. */ 2499 void APInt::tcNegate(WordType *dst, unsigned parts) { 2500 tcComplement(dst, parts); 2501 tcIncrement(dst, parts); 2502 } 2503 2504 /* DST += SRC * MULTIPLIER + CARRY if add is true 2505 DST = SRC * MULTIPLIER + CARRY if add is false 2506 2507 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2508 they must start at the same point, i.e. DST == SRC. 2509 2510 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2511 returned. Otherwise DST is filled with the least significant 2512 DSTPARTS parts of the result, and if all of the omitted higher 2513 parts were zero return zero, otherwise overflow occurred and 2514 return one. */ 2515 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2516 WordType multiplier, WordType carry, 2517 unsigned srcParts, unsigned dstParts, 2518 bool add) { 2519 /* Otherwise our writes of DST kill our later reads of SRC. */ 2520 assert(dst <= src || dst >= src + srcParts); 2521 assert(dstParts <= srcParts + 1); 2522 2523 /* N loops; minimum of dstParts and srcParts. */ 2524 unsigned n = std::min(dstParts, srcParts); 2525 2526 for (unsigned i = 0; i < n; i++) { 2527 WordType low, mid, high, srcPart; 2528 2529 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2530 2531 This cannot overflow, because 2532 2533 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2534 2535 which is less than n^2. */ 2536 2537 srcPart = src[i]; 2538 2539 if (multiplier == 0 || srcPart == 0) { 2540 low = carry; 2541 high = 0; 2542 } else { 2543 low = lowHalf(srcPart) * lowHalf(multiplier); 2544 high = highHalf(srcPart) * highHalf(multiplier); 2545 2546 mid = lowHalf(srcPart) * highHalf(multiplier); 2547 high += highHalf(mid); 2548 mid <<= APINT_BITS_PER_WORD / 2; 2549 if (low + mid < low) 2550 high++; 2551 low += mid; 2552 2553 mid = highHalf(srcPart) * lowHalf(multiplier); 2554 high += highHalf(mid); 2555 mid <<= APINT_BITS_PER_WORD / 2; 2556 if (low + mid < low) 2557 high++; 2558 low += mid; 2559 2560 /* Now add carry. */ 2561 if (low + carry < low) 2562 high++; 2563 low += carry; 2564 } 2565 2566 if (add) { 2567 /* And now DST[i], and store the new low part there. */ 2568 if (low + dst[i] < low) 2569 high++; 2570 dst[i] += low; 2571 } else 2572 dst[i] = low; 2573 2574 carry = high; 2575 } 2576 2577 if (srcParts < dstParts) { 2578 /* Full multiplication, there is no overflow. */ 2579 assert(srcParts + 1 == dstParts); 2580 dst[srcParts] = carry; 2581 return 0; 2582 } 2583 2584 /* We overflowed if there is carry. */ 2585 if (carry) 2586 return 1; 2587 2588 /* We would overflow if any significant unwritten parts would be 2589 non-zero. This is true if any remaining src parts are non-zero 2590 and the multiplier is non-zero. */ 2591 if (multiplier) 2592 for (unsigned i = dstParts; i < srcParts; i++) 2593 if (src[i]) 2594 return 1; 2595 2596 /* We fitted in the narrow destination. */ 2597 return 0; 2598 } 2599 2600 /* DST = LHS * RHS, where DST has the same width as the operands and 2601 is filled with the least significant parts of the result. Returns 2602 one if overflow occurred, otherwise zero. DST must be disjoint 2603 from both operands. */ 2604 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2605 const WordType *rhs, unsigned parts) { 2606 assert(dst != lhs && dst != rhs); 2607 2608 int overflow = 0; 2609 tcSet(dst, 0, parts); 2610 2611 for (unsigned i = 0; i < parts; i++) 2612 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2613 parts - i, true); 2614 2615 return overflow; 2616 } 2617 2618 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2619 /// operands. No overflow occurs. DST must be disjoint from both operands. 2620 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2621 const WordType *rhs, unsigned lhsParts, 2622 unsigned rhsParts) { 2623 /* Put the narrower number on the LHS for less loops below. */ 2624 if (lhsParts > rhsParts) 2625 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2626 2627 assert(dst != lhs && dst != rhs); 2628 2629 tcSet(dst, 0, rhsParts); 2630 2631 for (unsigned i = 0; i < lhsParts; i++) 2632 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2633 } 2634 2635 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2636 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2637 set REMAINDER to the remainder, return zero. i.e. 2638 2639 OLD_LHS = RHS * LHS + REMAINDER 2640 2641 SCRATCH is a bignum of the same size as the operands and result for 2642 use by the routine; its contents need not be initialized and are 2643 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2644 */ 2645 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2646 WordType *remainder, WordType *srhs, 2647 unsigned parts) { 2648 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2649 2650 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2651 if (shiftCount == 0) 2652 return true; 2653 2654 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2655 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2656 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2657 2658 tcAssign(srhs, rhs, parts); 2659 tcShiftLeft(srhs, parts, shiftCount); 2660 tcAssign(remainder, lhs, parts); 2661 tcSet(lhs, 0, parts); 2662 2663 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2664 the total. */ 2665 for (;;) { 2666 int compare = tcCompare(remainder, srhs, parts); 2667 if (compare >= 0) { 2668 tcSubtract(remainder, srhs, 0, parts); 2669 lhs[n] |= mask; 2670 } 2671 2672 if (shiftCount == 0) 2673 break; 2674 shiftCount--; 2675 tcShiftRight(srhs, parts, 1); 2676 if ((mask >>= 1) == 0) { 2677 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2678 n--; 2679 } 2680 } 2681 2682 return false; 2683 } 2684 2685 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2686 /// no restrictions on Count. 2687 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2688 // Don't bother performing a no-op shift. 2689 if (!Count) 2690 return; 2691 2692 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2693 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2694 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2695 2696 // Fastpath for moving by whole words. 2697 if (BitShift == 0) { 2698 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2699 } else { 2700 while (Words-- > WordShift) { 2701 Dst[Words] = Dst[Words - WordShift] << BitShift; 2702 if (Words > WordShift) 2703 Dst[Words] |= 2704 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2705 } 2706 } 2707 2708 // Fill in the remainder with 0s. 2709 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2710 } 2711 2712 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2713 /// are no restrictions on Count. 2714 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2715 // Don't bother performing a no-op shift. 2716 if (!Count) 2717 return; 2718 2719 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2720 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2721 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2722 2723 unsigned WordsToMove = Words - WordShift; 2724 // Fastpath for moving by whole words. 2725 if (BitShift == 0) { 2726 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2727 } else { 2728 for (unsigned i = 0; i != WordsToMove; ++i) { 2729 Dst[i] = Dst[i + WordShift] >> BitShift; 2730 if (i + 1 != WordsToMove) 2731 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2732 } 2733 } 2734 2735 // Fill in the remainder with 0s. 2736 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2737 } 2738 2739 /* Bitwise and of two bignums. */ 2740 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2741 for (unsigned i = 0; i < parts; i++) 2742 dst[i] &= rhs[i]; 2743 } 2744 2745 /* Bitwise inclusive or of two bignums. */ 2746 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2747 for (unsigned i = 0; i < parts; i++) 2748 dst[i] |= rhs[i]; 2749 } 2750 2751 /* Bitwise exclusive or of two bignums. */ 2752 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2753 for (unsigned i = 0; i < parts; i++) 2754 dst[i] ^= rhs[i]; 2755 } 2756 2757 /* Complement a bignum in-place. */ 2758 void APInt::tcComplement(WordType *dst, unsigned parts) { 2759 for (unsigned i = 0; i < parts; i++) 2760 dst[i] = ~dst[i]; 2761 } 2762 2763 /* Comparison (unsigned) of two bignums. */ 2764 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2765 unsigned parts) { 2766 while (parts) { 2767 parts--; 2768 if (lhs[parts] != rhs[parts]) 2769 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2770 } 2771 2772 return 0; 2773 } 2774 2775 /* Set the least significant BITS bits of a bignum, clear the 2776 rest. */ 2777 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2778 unsigned bits) { 2779 unsigned i = 0; 2780 while (bits > APINT_BITS_PER_WORD) { 2781 dst[i++] = ~(WordType) 0; 2782 bits -= APINT_BITS_PER_WORD; 2783 } 2784 2785 if (bits) 2786 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2787 2788 while (i < parts) 2789 dst[i++] = 0; 2790 } 2791 2792 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2793 APInt::Rounding RM) { 2794 // Currently udivrem always rounds down. 2795 switch (RM) { 2796 case APInt::Rounding::DOWN: 2797 case APInt::Rounding::TOWARD_ZERO: 2798 return A.udiv(B); 2799 case APInt::Rounding::UP: { 2800 APInt Quo, Rem; 2801 APInt::udivrem(A, B, Quo, Rem); 2802 if (Rem == 0) 2803 return Quo; 2804 return Quo + 1; 2805 } 2806 } 2807 llvm_unreachable("Unknown APInt::Rounding enum"); 2808 } 2809 2810 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2811 APInt::Rounding RM) { 2812 switch (RM) { 2813 case APInt::Rounding::DOWN: 2814 case APInt::Rounding::UP: { 2815 APInt Quo, Rem; 2816 APInt::sdivrem(A, B, Quo, Rem); 2817 if (Rem == 0) 2818 return Quo; 2819 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2820 // We want to check whether the non-integer part of the mathematical value 2821 // is negative or not. If the non-integer part is negative, we need to round 2822 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2823 // already rounded down. 2824 if (RM == APInt::Rounding::DOWN) { 2825 if (Rem.isNegative() != B.isNegative()) 2826 return Quo - 1; 2827 return Quo; 2828 } 2829 if (Rem.isNegative() != B.isNegative()) 2830 return Quo; 2831 return Quo + 1; 2832 } 2833 // Currently sdiv rounds twards zero. 2834 case APInt::Rounding::TOWARD_ZERO: 2835 return A.sdiv(B); 2836 } 2837 llvm_unreachable("Unknown APInt::Rounding enum"); 2838 } 2839 2840 Optional<APInt> 2841 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2842 unsigned RangeWidth) { 2843 unsigned CoeffWidth = A.getBitWidth(); 2844 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2845 assert(RangeWidth <= CoeffWidth && 2846 "Value range width should be less than coefficient width"); 2847 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2848 2849 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2850 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2851 2852 // Identify 0 as a (non)solution immediately. 2853 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2854 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2855 return APInt(CoeffWidth, 0); 2856 } 2857 2858 // The result of APInt arithmetic has the same bit width as the operands, 2859 // so it can actually lose high bits. A product of two n-bit integers needs 2860 // 2n-1 bits to represent the full value. 2861 // The operation done below (on quadratic coefficients) that can produce 2862 // the largest value is the evaluation of the equation during bisection, 2863 // which needs 3 times the bitwidth of the coefficient, so the total number 2864 // of required bits is 3n. 2865 // 2866 // The purpose of this extension is to simulate the set Z of all integers, 2867 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2868 // and negative numbers (not so much in a modulo arithmetic). The method 2869 // used to solve the equation is based on the standard formula for real 2870 // numbers, and uses the concepts of "positive" and "negative" with their 2871 // usual meanings. 2872 CoeffWidth *= 3; 2873 A = A.sext(CoeffWidth); 2874 B = B.sext(CoeffWidth); 2875 C = C.sext(CoeffWidth); 2876 2877 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2878 // the bit width has increased. 2879 if (A.isNegative()) { 2880 A.negate(); 2881 B.negate(); 2882 C.negate(); 2883 } 2884 2885 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2886 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2887 // and R = 2^BitWidth. 2888 // Since we're trying not only to find exact solutions, but also values 2889 // that "wrap around", such a set will always have a solution, i.e. an x 2890 // that satisfies at least one of the equations, or such that |q(x)| 2891 // exceeds kR, while |q(x-1)| for the same k does not. 2892 // 2893 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2894 // positive solution n (in the above sense), and also such that the n 2895 // will be the least among all solutions corresponding to k = 0, 1, ... 2896 // (more precisely, the least element in the set 2897 // { n(k) | k is such that a solution n(k) exists }). 2898 // 2899 // Consider the parabola (over real numbers) that corresponds to the 2900 // quadratic equation. Since A > 0, the arms of the parabola will point 2901 // up. Picking different values of k will shift it up and down by R. 2902 // 2903 // We want to shift the parabola in such a way as to reduce the problem 2904 // of solving q(x) = kR to solving shifted_q(x) = 0. 2905 // (The interesting solutions are the ceilings of the real number 2906 // solutions.) 2907 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2908 APInt TwoA = 2 * A; 2909 APInt SqrB = B * B; 2910 bool PickLow; 2911 2912 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2913 assert(A.isStrictlyPositive()); 2914 APInt T = V.abs().urem(A); 2915 if (T.isNullValue()) 2916 return V; 2917 return V.isNegative() ? V+T : V+(A-T); 2918 }; 2919 2920 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2921 // iff B is positive. 2922 if (B.isNonNegative()) { 2923 // If B >= 0, the vertex it at a negative location (or at 0), so in 2924 // order to have a non-negative solution we need to pick k that makes 2925 // C-kR negative. To satisfy all the requirements for the solution 2926 // that we are looking for, it needs to be closest to 0 of all k. 2927 C = C.srem(R); 2928 if (C.isStrictlyPositive()) 2929 C -= R; 2930 // Pick the greater solution. 2931 PickLow = false; 2932 } else { 2933 // If B < 0, the vertex is at a positive location. For any solution 2934 // to exist, the discriminant must be non-negative. This means that 2935 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2936 // lower bound on values of k: kR >= C - B^2/4A. 2937 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2938 // Round LowkR up (towards +inf) to the nearest kR. 2939 LowkR = RoundUp(LowkR, R); 2940 2941 // If there exists k meeting the condition above, and such that 2942 // C-kR > 0, there will be two positive real number solutions of 2943 // q(x) = kR. Out of all such values of k, pick the one that makes 2944 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2945 // In other words, find maximum k such that LowkR <= kR < C. 2946 if (C.sgt(LowkR)) { 2947 // If LowkR < C, then such a k is guaranteed to exist because 2948 // LowkR itself is a multiple of R. 2949 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2950 // Pick the smaller solution. 2951 PickLow = true; 2952 } else { 2953 // If C-kR < 0 for all potential k's, it means that one solution 2954 // will be negative, while the other will be positive. The positive 2955 // solution will shift towards 0 if the parabola is moved up. 2956 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2957 // to 0, or in other words, out of all parabolas that have solutions, 2958 // pick the one that is the farthest "up"). 2959 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2960 C -= LowkR; 2961 // Pick the greater solution. 2962 PickLow = false; 2963 } 2964 } 2965 2966 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2967 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2968 2969 APInt D = SqrB - 4*A*C; 2970 assert(D.isNonNegative() && "Negative discriminant"); 2971 APInt SQ = D.sqrt(); 2972 2973 APInt Q = SQ * SQ; 2974 bool InexactSQ = Q != D; 2975 // The calculated SQ may actually be greater than the exact (non-integer) 2976 // value. If that's the case, decremement SQ to get a value that is lower. 2977 if (Q.sgt(D)) 2978 SQ -= 1; 2979 2980 APInt X; 2981 APInt Rem; 2982 2983 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 2984 // When using the quadratic formula directly, the calculated low root 2985 // may be greater than the exact one, since we would be subtracting SQ. 2986 // To make sure that the calculated root is not greater than the exact 2987 // one, subtract SQ+1 when calculating the low root (for inexact value 2988 // of SQ). 2989 if (PickLow) 2990 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 2991 else 2992 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 2993 2994 // The updated coefficients should be such that the (exact) solution is 2995 // positive. Since APInt division rounds towards 0, the calculated one 2996 // can be 0, but cannot be negative. 2997 assert(X.isNonNegative() && "Solution should be non-negative"); 2998 2999 if (!InexactSQ && Rem.isNullValue()) { 3000 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 3001 return X; 3002 } 3003 3004 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 3005 // The exact value of the square root of D should be between SQ and SQ+1. 3006 // This implies that the solution should be between that corresponding to 3007 // SQ (i.e. X) and that corresponding to SQ+1. 3008 // 3009 // The calculated X cannot be greater than the exact (real) solution. 3010 // Actually it must be strictly less than the exact solution, while 3011 // X+1 will be greater than or equal to it. 3012 3013 APInt VX = (A*X + B)*X + C; 3014 APInt VY = VX + TwoA*X + A + B; 3015 bool SignChange = VX.isNegative() != VY.isNegative() || 3016 VX.isNullValue() != VY.isNullValue(); 3017 // If the sign did not change between X and X+1, X is not a valid solution. 3018 // This could happen when the actual (exact) roots don't have an integer 3019 // between them, so they would both be contained between X and X+1. 3020 if (!SignChange) { 3021 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 3022 return None; 3023 } 3024 3025 X += 1; 3026 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 3027 return X; 3028 } 3029 3030 Optional<unsigned> 3031 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { 3032 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); 3033 if (A == B) 3034 return llvm::None; 3035 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); 3036 } 3037 3038 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst 3039 /// with the integer held in IntVal. 3040 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, 3041 unsigned StoreBytes) { 3042 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); 3043 const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); 3044 3045 if (sys::IsLittleEndianHost) { 3046 // Little-endian host - the source is ordered from LSB to MSB. Order the 3047 // destination from LSB to MSB: Do a straight copy. 3048 memcpy(Dst, Src, StoreBytes); 3049 } else { 3050 // Big-endian host - the source is an array of 64 bit words ordered from 3051 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination 3052 // from MSB to LSB: Reverse the word order, but not the bytes in a word. 3053 while (StoreBytes > sizeof(uint64_t)) { 3054 StoreBytes -= sizeof(uint64_t); 3055 // May not be aligned so use memcpy. 3056 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); 3057 Src += sizeof(uint64_t); 3058 } 3059 3060 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); 3061 } 3062 } 3063 3064 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting 3065 /// from Src into IntVal, which is assumed to be wide enough and to hold zero. 3066 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { 3067 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); 3068 uint8_t *Dst = reinterpret_cast<uint8_t *>( 3069 const_cast<uint64_t *>(IntVal.getRawData())); 3070 3071 if (sys::IsLittleEndianHost) 3072 // Little-endian host - the destination must be ordered from LSB to MSB. 3073 // The source is ordered from LSB to MSB: Do a straight copy. 3074 memcpy(Dst, Src, LoadBytes); 3075 else { 3076 // Big-endian - the destination is an array of 64 bit words ordered from 3077 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is 3078 // ordered from MSB to LSB: Reverse the word order, but not the bytes in 3079 // a word. 3080 while (LoadBytes > sizeof(uint64_t)) { 3081 LoadBytes -= sizeof(uint64_t); 3082 // May not be aligned so use memcpy. 3083 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); 3084 Dst += sizeof(uint64_t); 3085 } 3086 3087 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); 3088 } 3089 } 3090