1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision integer 11 // constant values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <climits> 26 #include <cmath> 27 #include <cstdlib> 28 #include <cstring> 29 using namespace llvm; 30 31 #define DEBUG_TYPE "apint" 32 33 /// A utility function for allocating memory, checking for allocation failures, 34 /// and ensuring the contents are zeroed. 35 inline static uint64_t* getClearedMemory(unsigned numWords) { 36 uint64_t * result = new uint64_t[numWords]; 37 assert(result && "APInt memory allocation fails!"); 38 memset(result, 0, numWords * sizeof(uint64_t)); 39 return result; 40 } 41 42 /// A utility function for allocating memory and checking for allocation 43 /// failure. The content is not zeroed. 44 inline static uint64_t* getMemory(unsigned numWords) { 45 uint64_t * result = new uint64_t[numWords]; 46 assert(result && "APInt memory allocation fails!"); 47 return result; 48 } 49 50 /// A utility function that converts a character to a digit. 51 inline static unsigned getDigit(char cdigit, uint8_t radix) { 52 unsigned r; 53 54 if (radix == 16 || radix == 36) { 55 r = cdigit - '0'; 56 if (r <= 9) 57 return r; 58 59 r = cdigit - 'A'; 60 if (r <= radix - 11U) 61 return r + 10; 62 63 r = cdigit - 'a'; 64 if (r <= radix - 11U) 65 return r + 10; 66 67 radix = 10; 68 } 69 70 r = cdigit - '0'; 71 if (r < radix) 72 return r; 73 74 return -1U; 75 } 76 77 78 void APInt::initSlowCase(uint64_t val, bool isSigned) { 79 VAL = 0; 80 pVal = getClearedMemory(getNumWords()); 81 pVal[0] = val; 82 if (isSigned && int64_t(val) < 0) 83 for (unsigned i = 1; i < getNumWords(); ++i) 84 pVal[i] = -1ULL; 85 clearUnusedBits(); 86 } 87 88 void APInt::initSlowCase(const APInt& that) { 89 VAL = 0; 90 pVal = getMemory(getNumWords()); 91 memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE); 92 } 93 94 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 95 assert(BitWidth && "Bitwidth too small"); 96 assert(bigVal.data() && "Null pointer detected!"); 97 if (isSingleWord()) 98 VAL = bigVal[0]; 99 else { 100 // Get memory, cleared to 0 101 VAL = 0; 102 pVal = getClearedMemory(getNumWords()); 103 // Calculate the number of words to copy 104 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 105 // Copy the words from bigVal to pVal 106 memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE); 107 } 108 // Make sure unused high bits are cleared 109 clearUnusedBits(); 110 } 111 112 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 113 : BitWidth(numBits) { 114 initFromArray(bigVal); 115 } 116 117 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 118 : BitWidth(numBits) { 119 initFromArray(makeArrayRef(bigVal, numWords)); 120 } 121 122 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 123 : VAL(0), BitWidth(numbits) { 124 assert(BitWidth && "Bitwidth too small"); 125 fromString(numbits, Str, radix); 126 } 127 128 void APInt::AssignSlowCase(const APInt& RHS) { 129 // Don't do anything for X = X 130 if (this == &RHS) 131 return; 132 133 if (BitWidth == RHS.getBitWidth()) { 134 // assume same bit-width single-word case is already handled 135 assert(!isSingleWord()); 136 memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE); 137 return; 138 } 139 140 if (isSingleWord()) { 141 // assume case where both are single words is already handled 142 assert(!RHS.isSingleWord()); 143 VAL = 0; 144 pVal = getMemory(RHS.getNumWords()); 145 memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); 146 } else if (getNumWords() == RHS.getNumWords()) 147 memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); 148 else if (RHS.isSingleWord()) { 149 delete [] pVal; 150 VAL = RHS.VAL; 151 } else { 152 delete [] pVal; 153 pVal = getMemory(RHS.getNumWords()); 154 memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); 155 } 156 BitWidth = RHS.BitWidth; 157 clearUnusedBits(); 158 } 159 160 /// This method 'profiles' an APInt for use with FoldingSet. 161 void APInt::Profile(FoldingSetNodeID& ID) const { 162 ID.AddInteger(BitWidth); 163 164 if (isSingleWord()) { 165 ID.AddInteger(VAL); 166 return; 167 } 168 169 unsigned NumWords = getNumWords(); 170 for (unsigned i = 0; i < NumWords; ++i) 171 ID.AddInteger(pVal[i]); 172 } 173 174 /// @brief Prefix increment operator. Increments the APInt by one. 175 APInt& APInt::operator++() { 176 if (isSingleWord()) 177 ++VAL; 178 else 179 tcIncrement(pVal, getNumWords()); 180 return clearUnusedBits(); 181 } 182 183 /// @brief Prefix decrement operator. Decrements the APInt by one. 184 APInt& APInt::operator--() { 185 if (isSingleWord()) 186 --VAL; 187 else 188 tcDecrement(pVal, getNumWords()); 189 return clearUnusedBits(); 190 } 191 192 /// Adds the RHS APint to this APInt. 193 /// @returns this, after addition of RHS. 194 /// @brief Addition assignment operator. 195 APInt& APInt::operator+=(const APInt& RHS) { 196 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 197 if (isSingleWord()) 198 VAL += RHS.VAL; 199 else 200 tcAdd(pVal, RHS.pVal, 0, getNumWords()); 201 return clearUnusedBits(); 202 } 203 204 APInt& APInt::operator+=(uint64_t RHS) { 205 if (isSingleWord()) 206 VAL += RHS; 207 else 208 tcAddPart(pVal, RHS, getNumWords()); 209 return clearUnusedBits(); 210 } 211 212 /// Subtracts the RHS APInt from this APInt 213 /// @returns this, after subtraction 214 /// @brief Subtraction assignment operator. 215 APInt& APInt::operator-=(const APInt& RHS) { 216 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 217 if (isSingleWord()) 218 VAL -= RHS.VAL; 219 else 220 tcSubtract(pVal, RHS.pVal, 0, getNumWords()); 221 return clearUnusedBits(); 222 } 223 224 APInt& APInt::operator-=(uint64_t RHS) { 225 if (isSingleWord()) 226 VAL -= RHS; 227 else 228 tcSubtractPart(pVal, RHS, getNumWords()); 229 return clearUnusedBits(); 230 } 231 232 /// Multiplies an integer array, x, by a uint64_t integer and places the result 233 /// into dest. 234 /// @returns the carry out of the multiplication. 235 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer. 236 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { 237 // Split y into high 32-bit part (hy) and low 32-bit part (ly) 238 uint64_t ly = y & 0xffffffffULL, hy = y >> 32; 239 uint64_t carry = 0; 240 241 // For each digit of x. 242 for (unsigned i = 0; i < len; ++i) { 243 // Split x into high and low words 244 uint64_t lx = x[i] & 0xffffffffULL; 245 uint64_t hx = x[i] >> 32; 246 // hasCarry - A flag to indicate if there is a carry to the next digit. 247 // hasCarry == 0, no carry 248 // hasCarry == 1, has carry 249 // hasCarry == 2, no carry and the calculation result == 0. 250 uint8_t hasCarry = 0; 251 dest[i] = carry + lx * ly; 252 // Determine if the add above introduces carry. 253 hasCarry = (dest[i] < carry) ? 1 : 0; 254 carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0); 255 // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + 256 // (2^32 - 1) + 2^32 = 2^64. 257 hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); 258 259 carry += (lx * hy) & 0xffffffffULL; 260 dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL); 261 carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + 262 (carry >> 32) + ((lx * hy) >> 32) + hx * hy; 263 } 264 return carry; 265 } 266 267 /// Multiplies integer array x by integer array y and stores the result into 268 /// the integer array dest. Note that dest's size must be >= xlen + ylen. 269 /// @brief Generalized multiplication of integer arrays. 270 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], 271 unsigned ylen) { 272 dest[xlen] = mul_1(dest, x, xlen, y[0]); 273 for (unsigned i = 1; i < ylen; ++i) { 274 uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32; 275 uint64_t carry = 0, lx = 0, hx = 0; 276 for (unsigned j = 0; j < xlen; ++j) { 277 lx = x[j] & 0xffffffffULL; 278 hx = x[j] >> 32; 279 // hasCarry - A flag to indicate if has carry. 280 // hasCarry == 0, no carry 281 // hasCarry == 1, has carry 282 // hasCarry == 2, no carry and the calculation result == 0. 283 uint8_t hasCarry = 0; 284 uint64_t resul = carry + lx * ly; 285 hasCarry = (resul < carry) ? 1 : 0; 286 carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32); 287 hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); 288 289 carry += (lx * hy) & 0xffffffffULL; 290 resul = (carry << 32) | (resul & 0xffffffffULL); 291 dest[i+j] += resul; 292 carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+ 293 (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + 294 ((lx * hy) >> 32) + hx * hy; 295 } 296 dest[i+xlen] = carry; 297 } 298 } 299 300 APInt& APInt::operator*=(const APInt& RHS) { 301 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 302 if (isSingleWord()) { 303 VAL *= RHS.VAL; 304 clearUnusedBits(); 305 return *this; 306 } 307 308 // Get some bit facts about LHS and check for zero 309 unsigned lhsBits = getActiveBits(); 310 unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1; 311 if (!lhsWords) 312 // 0 * X ===> 0 313 return *this; 314 315 // Get some bit facts about RHS and check for zero 316 unsigned rhsBits = RHS.getActiveBits(); 317 unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1; 318 if (!rhsWords) { 319 // X * 0 ===> 0 320 clearAllBits(); 321 return *this; 322 } 323 324 // Allocate space for the result 325 unsigned destWords = rhsWords + lhsWords; 326 uint64_t *dest = getMemory(destWords); 327 328 // Perform the long multiply 329 mul(dest, pVal, lhsWords, RHS.pVal, rhsWords); 330 331 // Copy result back into *this 332 clearAllBits(); 333 unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords; 334 memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE); 335 clearUnusedBits(); 336 337 // delete dest array and return 338 delete[] dest; 339 return *this; 340 } 341 342 void APInt::AndAssignSlowCase(const APInt& RHS) { 343 tcAnd(pVal, RHS.pVal, getNumWords()); 344 } 345 346 void APInt::OrAssignSlowCase(const APInt& RHS) { 347 tcOr(pVal, RHS.pVal, getNumWords()); 348 } 349 350 void APInt::XorAssignSlowCase(const APInt& RHS) { 351 tcXor(pVal, RHS.pVal, getNumWords()); 352 } 353 354 APInt APInt::operator*(const APInt& RHS) const { 355 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 356 if (isSingleWord()) 357 return APInt(BitWidth, VAL * RHS.VAL); 358 APInt Result(*this); 359 Result *= RHS; 360 return Result; 361 } 362 363 bool APInt::EqualSlowCase(const APInt& RHS) const { 364 return std::equal(pVal, pVal + getNumWords(), RHS.pVal); 365 } 366 367 bool APInt::ult(const APInt& RHS) const { 368 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 369 if (isSingleWord()) 370 return VAL < RHS.VAL; 371 372 // Get active bit length of both operands 373 unsigned n1 = getActiveBits(); 374 unsigned n2 = RHS.getActiveBits(); 375 376 // If magnitude of LHS is less than RHS, return true. 377 if (n1 < n2) 378 return true; 379 380 // If magnitude of RHS is greater than LHS, return false. 381 if (n2 < n1) 382 return false; 383 384 // If they both fit in a word, just compare the low order word 385 if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) 386 return pVal[0] < RHS.pVal[0]; 387 388 // Otherwise, compare all words 389 unsigned topWord = whichWord(std::max(n1,n2)-1); 390 for (int i = topWord; i >= 0; --i) { 391 if (pVal[i] > RHS.pVal[i]) 392 return false; 393 if (pVal[i] < RHS.pVal[i]) 394 return true; 395 } 396 return false; 397 } 398 399 bool APInt::slt(const APInt& RHS) const { 400 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 401 if (isSingleWord()) { 402 int64_t lhsSext = SignExtend64(VAL, BitWidth); 403 int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth); 404 return lhsSext < rhsSext; 405 } 406 407 bool lhsNeg = isNegative(); 408 bool rhsNeg = RHS.isNegative(); 409 410 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 411 if (lhsNeg != rhsNeg) 412 return lhsNeg; 413 414 // Otherwise we can just use an unsigned comparison, because even negative 415 // numbers compare correctly this way if both have the same signed-ness. 416 return ult(RHS); 417 } 418 419 void APInt::setBit(unsigned bitPosition) { 420 if (isSingleWord()) 421 VAL |= maskBit(bitPosition); 422 else 423 pVal[whichWord(bitPosition)] |= maskBit(bitPosition); 424 } 425 426 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 427 unsigned loWord = whichWord(loBit); 428 unsigned hiWord = whichWord(hiBit); 429 430 // Create an initial mask for the low word with zeros below loBit. 431 uint64_t loMask = UINT64_MAX << whichBit(loBit); 432 433 // If hiBit is not aligned, we need a high mask. 434 unsigned hiShiftAmt = whichBit(hiBit); 435 if (hiShiftAmt != 0) { 436 // Create a high mask with zeros above hiBit. 437 uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 438 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 439 // set the bits in hiWord. 440 if (hiWord == loWord) 441 loMask &= hiMask; 442 else 443 pVal[hiWord] |= hiMask; 444 } 445 // Apply the mask to the low word. 446 pVal[loWord] |= loMask; 447 448 // Fill any words between loWord and hiWord with all ones. 449 for (unsigned word = loWord + 1; word < hiWord; ++word) 450 pVal[word] = UINT64_MAX; 451 } 452 453 /// Set the given bit to 0 whose position is given as "bitPosition". 454 /// @brief Set a given bit to 0. 455 void APInt::clearBit(unsigned bitPosition) { 456 if (isSingleWord()) 457 VAL &= ~maskBit(bitPosition); 458 else 459 pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition); 460 } 461 462 /// @brief Toggle every bit to its opposite value. 463 void APInt::flipAllBitsSlowCase() { 464 tcComplement(pVal, getNumWords()); 465 clearUnusedBits(); 466 } 467 468 /// Toggle a given bit to its opposite value whose position is given 469 /// as "bitPosition". 470 /// @brief Toggles a given bit to its opposite value. 471 void APInt::flipBit(unsigned bitPosition) { 472 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 473 if ((*this)[bitPosition]) clearBit(bitPosition); 474 else setBit(bitPosition); 475 } 476 477 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 478 unsigned subBitWidth = subBits.getBitWidth(); 479 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 480 "Illegal bit insertion"); 481 482 // Insertion is a direct copy. 483 if (subBitWidth == BitWidth) { 484 *this = subBits; 485 return; 486 } 487 488 // Single word result can be done as a direct bitmask. 489 if (isSingleWord()) { 490 uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 491 VAL &= ~(mask << bitPosition); 492 VAL |= (subBits.VAL << bitPosition); 493 return; 494 } 495 496 unsigned loBit = whichBit(bitPosition); 497 unsigned loWord = whichWord(bitPosition); 498 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 499 500 // Insertion within a single word can be done as a direct bitmask. 501 if (loWord == hi1Word) { 502 uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 503 pVal[loWord] &= ~(mask << loBit); 504 pVal[loWord] |= (subBits.VAL << loBit); 505 return; 506 } 507 508 // Insert on word boundaries. 509 if (loBit == 0) { 510 // Direct copy whole words. 511 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 512 memcpy(pVal + loWord, subBits.getRawData(), 513 numWholeSubWords * APINT_WORD_SIZE); 514 515 // Mask+insert remaining bits. 516 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 517 if (remainingBits != 0) { 518 uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits); 519 pVal[hi1Word] &= ~mask; 520 pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 521 } 522 return; 523 } 524 525 // General case - set/clear individual bits in dst based on src. 526 // TODO - there is scope for optimization here, but at the moment this code 527 // path is barely used so prefer readability over performance. 528 for (unsigned i = 0; i != subBitWidth; ++i) { 529 if (subBits[i]) 530 setBit(bitPosition + i); 531 else 532 clearBit(bitPosition + i); 533 } 534 } 535 536 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 537 assert(numBits > 0 && "Can't extract zero bits"); 538 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 539 "Illegal bit extraction"); 540 541 if (isSingleWord()) 542 return APInt(numBits, VAL >> bitPosition); 543 544 unsigned loBit = whichBit(bitPosition); 545 unsigned loWord = whichWord(bitPosition); 546 unsigned hiWord = whichWord(bitPosition + numBits - 1); 547 548 // Single word result extracting bits from a single word source. 549 if (loWord == hiWord) 550 return APInt(numBits, pVal[loWord] >> loBit); 551 552 // Extracting bits that start on a source word boundary can be done 553 // as a fast memory copy. 554 if (loBit == 0) 555 return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord)); 556 557 // General case - shift + copy source words directly into place. 558 APInt Result(numBits, 0); 559 unsigned NumSrcWords = getNumWords(); 560 unsigned NumDstWords = Result.getNumWords(); 561 562 for (unsigned word = 0; word < NumDstWords; ++word) { 563 uint64_t w0 = pVal[loWord + word]; 564 uint64_t w1 = 565 (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0; 566 Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 567 } 568 569 return Result.clearUnusedBits(); 570 } 571 572 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 573 assert(!str.empty() && "Invalid string length"); 574 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 575 radix == 36) && 576 "Radix should be 2, 8, 10, 16, or 36!"); 577 578 size_t slen = str.size(); 579 580 // Each computation below needs to know if it's negative. 581 StringRef::iterator p = str.begin(); 582 unsigned isNegative = *p == '-'; 583 if (*p == '-' || *p == '+') { 584 p++; 585 slen--; 586 assert(slen && "String is only a sign, needs a value."); 587 } 588 589 // For radixes of power-of-two values, the bits required is accurately and 590 // easily computed 591 if (radix == 2) 592 return slen + isNegative; 593 if (radix == 8) 594 return slen * 3 + isNegative; 595 if (radix == 16) 596 return slen * 4 + isNegative; 597 598 // FIXME: base 36 599 600 // This is grossly inefficient but accurate. We could probably do something 601 // with a computation of roughly slen*64/20 and then adjust by the value of 602 // the first few digits. But, I'm not sure how accurate that could be. 603 604 // Compute a sufficient number of bits that is always large enough but might 605 // be too large. This avoids the assertion in the constructor. This 606 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 607 // bits in that case. 608 unsigned sufficient 609 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 610 : (slen == 1 ? 7 : slen * 16/3); 611 612 // Convert to the actual binary value. 613 APInt tmp(sufficient, StringRef(p, slen), radix); 614 615 // Compute how many bits are required. If the log is infinite, assume we need 616 // just bit. 617 unsigned log = tmp.logBase2(); 618 if (log == (unsigned)-1) { 619 return isNegative + 1; 620 } else { 621 return isNegative + log + 1; 622 } 623 } 624 625 hash_code llvm::hash_value(const APInt &Arg) { 626 if (Arg.isSingleWord()) 627 return hash_combine(Arg.VAL); 628 629 return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords()); 630 } 631 632 bool APInt::isSplat(unsigned SplatSizeInBits) const { 633 assert(getBitWidth() % SplatSizeInBits == 0 && 634 "SplatSizeInBits must divide width!"); 635 // We can check that all parts of an integer are equal by making use of a 636 // little trick: rotate and check if it's still the same value. 637 return *this == rotl(SplatSizeInBits); 638 } 639 640 /// This function returns the high "numBits" bits of this APInt. 641 APInt APInt::getHiBits(unsigned numBits) const { 642 return this->lshr(BitWidth - numBits); 643 } 644 645 /// This function returns the low "numBits" bits of this APInt. 646 APInt APInt::getLoBits(unsigned numBits) const { 647 APInt Result(getLowBitsSet(BitWidth, numBits)); 648 Result &= *this; 649 return Result; 650 } 651 652 unsigned APInt::countLeadingZerosSlowCase() const { 653 unsigned Count = 0; 654 for (int i = getNumWords()-1; i >= 0; --i) { 655 uint64_t V = pVal[i]; 656 if (V == 0) 657 Count += APINT_BITS_PER_WORD; 658 else { 659 Count += llvm::countLeadingZeros(V); 660 break; 661 } 662 } 663 // Adjust for unused bits in the most significant word (they are zero). 664 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 665 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 666 return Count; 667 } 668 669 unsigned APInt::countLeadingOnes() const { 670 if (isSingleWord()) 671 return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth)); 672 673 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 674 unsigned shift; 675 if (!highWordBits) { 676 highWordBits = APINT_BITS_PER_WORD; 677 shift = 0; 678 } else { 679 shift = APINT_BITS_PER_WORD - highWordBits; 680 } 681 int i = getNumWords() - 1; 682 unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); 683 if (Count == highWordBits) { 684 for (i--; i >= 0; --i) { 685 if (pVal[i] == -1ULL) 686 Count += APINT_BITS_PER_WORD; 687 else { 688 Count += llvm::countLeadingOnes(pVal[i]); 689 break; 690 } 691 } 692 } 693 return Count; 694 } 695 696 unsigned APInt::countTrailingZeros() const { 697 if (isSingleWord()) 698 return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth); 699 unsigned Count = 0; 700 unsigned i = 0; 701 for (; i < getNumWords() && pVal[i] == 0; ++i) 702 Count += APINT_BITS_PER_WORD; 703 if (i < getNumWords()) 704 Count += llvm::countTrailingZeros(pVal[i]); 705 return std::min(Count, BitWidth); 706 } 707 708 unsigned APInt::countTrailingOnesSlowCase() const { 709 unsigned Count = 0; 710 unsigned i = 0; 711 for (; i < getNumWords() && pVal[i] == -1ULL; ++i) 712 Count += APINT_BITS_PER_WORD; 713 if (i < getNumWords()) 714 Count += llvm::countTrailingOnes(pVal[i]); 715 return std::min(Count, BitWidth); 716 } 717 718 unsigned APInt::countPopulationSlowCase() const { 719 unsigned Count = 0; 720 for (unsigned i = 0; i < getNumWords(); ++i) 721 Count += llvm::countPopulation(pVal[i]); 722 return Count; 723 } 724 725 bool APInt::intersectsSlowCase(const APInt &RHS) const { 726 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 727 if ((pVal[i] & RHS.pVal[i]) != 0) 728 return true; 729 730 return false; 731 } 732 733 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 734 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 735 if ((pVal[i] & ~RHS.pVal[i]) != 0) 736 return false; 737 738 return true; 739 } 740 741 APInt APInt::byteSwap() const { 742 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); 743 if (BitWidth == 16) 744 return APInt(BitWidth, ByteSwap_16(uint16_t(VAL))); 745 if (BitWidth == 32) 746 return APInt(BitWidth, ByteSwap_32(unsigned(VAL))); 747 if (BitWidth == 48) { 748 unsigned Tmp1 = unsigned(VAL >> 16); 749 Tmp1 = ByteSwap_32(Tmp1); 750 uint16_t Tmp2 = uint16_t(VAL); 751 Tmp2 = ByteSwap_16(Tmp2); 752 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); 753 } 754 if (BitWidth == 64) 755 return APInt(BitWidth, ByteSwap_64(VAL)); 756 757 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 758 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 759 Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]); 760 if (Result.BitWidth != BitWidth) { 761 Result.lshrInPlace(Result.BitWidth - BitWidth); 762 Result.BitWidth = BitWidth; 763 } 764 return Result; 765 } 766 767 APInt APInt::reverseBits() const { 768 switch (BitWidth) { 769 case 64: 770 return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL)); 771 case 32: 772 return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL)); 773 case 16: 774 return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL)); 775 case 8: 776 return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL)); 777 default: 778 break; 779 } 780 781 APInt Val(*this); 782 APInt Reversed(BitWidth, 0); 783 unsigned S = BitWidth; 784 785 for (; Val != 0; Val.lshrInPlace(1)) { 786 Reversed <<= 1; 787 Reversed |= Val[0]; 788 --S; 789 } 790 791 Reversed <<= S; 792 return Reversed; 793 } 794 795 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 796 // Fast-path a common case. 797 if (A == B) return A; 798 799 // Corner cases: if either operand is zero, the other is the gcd. 800 if (!A) return B; 801 if (!B) return A; 802 803 // Count common powers of 2 and remove all other powers of 2. 804 unsigned Pow2; 805 { 806 unsigned Pow2_A = A.countTrailingZeros(); 807 unsigned Pow2_B = B.countTrailingZeros(); 808 if (Pow2_A > Pow2_B) { 809 A.lshrInPlace(Pow2_A - Pow2_B); 810 Pow2 = Pow2_B; 811 } else if (Pow2_B > Pow2_A) { 812 B.lshrInPlace(Pow2_B - Pow2_A); 813 Pow2 = Pow2_A; 814 } else { 815 Pow2 = Pow2_A; 816 } 817 } 818 819 // Both operands are odd multiples of 2^Pow_2: 820 // 821 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 822 // 823 // This is a modified version of Stein's algorithm, taking advantage of 824 // efficient countTrailingZeros(). 825 while (A != B) { 826 if (A.ugt(B)) { 827 A -= B; 828 A.lshrInPlace(A.countTrailingZeros() - Pow2); 829 } else { 830 B -= A; 831 B.lshrInPlace(B.countTrailingZeros() - Pow2); 832 } 833 } 834 835 return A; 836 } 837 838 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 839 union { 840 double D; 841 uint64_t I; 842 } T; 843 T.D = Double; 844 845 // Get the sign bit from the highest order bit 846 bool isNeg = T.I >> 63; 847 848 // Get the 11-bit exponent and adjust for the 1023 bit bias 849 int64_t exp = ((T.I >> 52) & 0x7ff) - 1023; 850 851 // If the exponent is negative, the value is < 0 so just return 0. 852 if (exp < 0) 853 return APInt(width, 0u); 854 855 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 856 uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52; 857 858 // If the exponent doesn't shift all bits out of the mantissa 859 if (exp < 52) 860 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 861 APInt(width, mantissa >> (52 - exp)); 862 863 // If the client didn't provide enough bits for us to shift the mantissa into 864 // then the result is undefined, just return 0 865 if (width <= exp - 52) 866 return APInt(width, 0); 867 868 // Otherwise, we have to shift the mantissa bits up to the right location 869 APInt Tmp(width, mantissa); 870 Tmp = Tmp.shl((unsigned)exp - 52); 871 return isNeg ? -Tmp : Tmp; 872 } 873 874 /// This function converts this APInt to a double. 875 /// The layout for double is as following (IEEE Standard 754): 876 /// -------------------------------------- 877 /// | Sign Exponent Fraction Bias | 878 /// |-------------------------------------- | 879 /// | 1[63] 11[62-52] 52[51-00] 1023 | 880 /// -------------------------------------- 881 double APInt::roundToDouble(bool isSigned) const { 882 883 // Handle the simple case where the value is contained in one uint64_t. 884 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 885 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 886 if (isSigned) { 887 int64_t sext = SignExtend64(getWord(0), BitWidth); 888 return double(sext); 889 } else 890 return double(getWord(0)); 891 } 892 893 // Determine if the value is negative. 894 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 895 896 // Construct the absolute value if we're negative. 897 APInt Tmp(isNeg ? -(*this) : (*this)); 898 899 // Figure out how many bits we're using. 900 unsigned n = Tmp.getActiveBits(); 901 902 // The exponent (without bias normalization) is just the number of bits 903 // we are using. Note that the sign bit is gone since we constructed the 904 // absolute value. 905 uint64_t exp = n; 906 907 // Return infinity for exponent overflow 908 if (exp > 1023) { 909 if (!isSigned || !isNeg) 910 return std::numeric_limits<double>::infinity(); 911 else 912 return -std::numeric_limits<double>::infinity(); 913 } 914 exp += 1023; // Increment for 1023 bias 915 916 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 917 // extract the high 52 bits from the correct words in pVal. 918 uint64_t mantissa; 919 unsigned hiWord = whichWord(n-1); 920 if (hiWord == 0) { 921 mantissa = Tmp.pVal[0]; 922 if (n > 52) 923 mantissa >>= n - 52; // shift down, we want the top 52 bits. 924 } else { 925 assert(hiWord > 0 && "huh?"); 926 uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 927 uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 928 mantissa = hibits | lobits; 929 } 930 931 // The leading bit of mantissa is implicit, so get rid of it. 932 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 933 union { 934 double D; 935 uint64_t I; 936 } T; 937 T.I = sign | (exp << 52) | mantissa; 938 return T.D; 939 } 940 941 // Truncate to new width. 942 APInt APInt::trunc(unsigned width) const { 943 assert(width < BitWidth && "Invalid APInt Truncate request"); 944 assert(width && "Can't truncate to 0 bits"); 945 946 if (width <= APINT_BITS_PER_WORD) 947 return APInt(width, getRawData()[0]); 948 949 APInt Result(getMemory(getNumWords(width)), width); 950 951 // Copy full words. 952 unsigned i; 953 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 954 Result.pVal[i] = pVal[i]; 955 956 // Truncate and copy any partial word. 957 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 958 if (bits != 0) 959 Result.pVal[i] = pVal[i] << bits >> bits; 960 961 return Result; 962 } 963 964 // Sign extend to a new width. 965 APInt APInt::sext(unsigned width) const { 966 assert(width > BitWidth && "Invalid APInt SignExtend request"); 967 968 if (width <= APINT_BITS_PER_WORD) { 969 uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth); 970 val = (int64_t)val >> (width - BitWidth); 971 return APInt(width, val >> (APINT_BITS_PER_WORD - width)); 972 } 973 974 APInt Result(getMemory(getNumWords(width)), width); 975 976 // Copy full words. 977 unsigned i; 978 uint64_t word = 0; 979 for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) { 980 word = getRawData()[i]; 981 Result.pVal[i] = word; 982 } 983 984 // Read and sign-extend any partial word. 985 unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD; 986 if (bits != 0) 987 word = (int64_t)getRawData()[i] << bits >> bits; 988 else 989 word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); 990 991 // Write remaining full words. 992 for (; i != width / APINT_BITS_PER_WORD; i++) { 993 Result.pVal[i] = word; 994 word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); 995 } 996 997 // Write any partial word. 998 bits = (0 - width) % APINT_BITS_PER_WORD; 999 if (bits != 0) 1000 Result.pVal[i] = word << bits >> bits; 1001 1002 return Result; 1003 } 1004 1005 // Zero extend to a new width. 1006 APInt APInt::zext(unsigned width) const { 1007 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 1008 1009 if (width <= APINT_BITS_PER_WORD) 1010 return APInt(width, VAL); 1011 1012 APInt Result(getMemory(getNumWords(width)), width); 1013 1014 // Copy words. 1015 unsigned i; 1016 for (i = 0; i != getNumWords(); i++) 1017 Result.pVal[i] = getRawData()[i]; 1018 1019 // Zero remaining words. 1020 memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE); 1021 1022 return Result; 1023 } 1024 1025 APInt APInt::zextOrTrunc(unsigned width) const { 1026 if (BitWidth < width) 1027 return zext(width); 1028 if (BitWidth > width) 1029 return trunc(width); 1030 return *this; 1031 } 1032 1033 APInt APInt::sextOrTrunc(unsigned width) const { 1034 if (BitWidth < width) 1035 return sext(width); 1036 if (BitWidth > width) 1037 return trunc(width); 1038 return *this; 1039 } 1040 1041 APInt APInt::zextOrSelf(unsigned width) const { 1042 if (BitWidth < width) 1043 return zext(width); 1044 return *this; 1045 } 1046 1047 APInt APInt::sextOrSelf(unsigned width) const { 1048 if (BitWidth < width) 1049 return sext(width); 1050 return *this; 1051 } 1052 1053 /// Arithmetic right-shift this APInt by shiftAmt. 1054 /// @brief Arithmetic right-shift function. 1055 APInt APInt::ashr(const APInt &shiftAmt) const { 1056 return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1057 } 1058 1059 /// Arithmetic right-shift this APInt by shiftAmt. 1060 /// @brief Arithmetic right-shift function. 1061 APInt APInt::ashr(unsigned shiftAmt) const { 1062 assert(shiftAmt <= BitWidth && "Invalid shift amount"); 1063 // Handle a degenerate case 1064 if (shiftAmt == 0) 1065 return *this; 1066 1067 // Handle single word shifts with built-in ashr 1068 if (isSingleWord()) { 1069 if (shiftAmt == BitWidth) 1070 return APInt(BitWidth, 0); // undefined 1071 return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt); 1072 } 1073 1074 // If all the bits were shifted out, the result is, technically, undefined. 1075 // We return -1 if it was negative, 0 otherwise. We check this early to avoid 1076 // issues in the algorithm below. 1077 if (shiftAmt == BitWidth) { 1078 if (isNegative()) 1079 return APInt(BitWidth, -1ULL, true); 1080 else 1081 return APInt(BitWidth, 0); 1082 } 1083 1084 // Create some space for the result. 1085 uint64_t * val = new uint64_t[getNumWords()]; 1086 1087 // Compute some values needed by the following shift algorithms 1088 unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word 1089 unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift 1090 unsigned breakWord = getNumWords() - 1 - offset; // last word affected 1091 unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word? 1092 if (bitsInWord == 0) 1093 bitsInWord = APINT_BITS_PER_WORD; 1094 1095 // If we are shifting whole words, just move whole words 1096 if (wordShift == 0) { 1097 // Move the words containing significant bits 1098 for (unsigned i = 0; i <= breakWord; ++i) 1099 val[i] = pVal[i+offset]; // move whole word 1100 1101 // Adjust the top significant word for sign bit fill, if negative 1102 if (isNegative()) 1103 if (bitsInWord < APINT_BITS_PER_WORD) 1104 val[breakWord] |= ~0ULL << bitsInWord; // set high bits 1105 } else { 1106 // Shift the low order words 1107 for (unsigned i = 0; i < breakWord; ++i) { 1108 // This combines the shifted corresponding word with the low bits from 1109 // the next word (shifted into this word's high bits). 1110 val[i] = (pVal[i+offset] >> wordShift) | 1111 (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); 1112 } 1113 1114 // Shift the break word. In this case there are no bits from the next word 1115 // to include in this word. 1116 val[breakWord] = pVal[breakWord+offset] >> wordShift; 1117 1118 // Deal with sign extension in the break word, and possibly the word before 1119 // it. 1120 if (isNegative()) { 1121 if (wordShift > bitsInWord) { 1122 if (breakWord > 0) 1123 val[breakWord-1] |= 1124 ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); 1125 val[breakWord] |= ~0ULL; 1126 } else 1127 val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); 1128 } 1129 } 1130 1131 // Remaining words are 0 or -1, just assign them. 1132 uint64_t fillValue = (isNegative() ? -1ULL : 0); 1133 for (unsigned i = breakWord+1; i < getNumWords(); ++i) 1134 val[i] = fillValue; 1135 APInt Result(val, BitWidth); 1136 Result.clearUnusedBits(); 1137 return Result; 1138 } 1139 1140 /// Logical right-shift this APInt by shiftAmt. 1141 /// @brief Logical right-shift function. 1142 void APInt::lshrInPlace(const APInt &shiftAmt) { 1143 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1144 } 1145 1146 /// Logical right-shift this APInt by shiftAmt. 1147 /// @brief Logical right-shift function. 1148 void APInt::lshrSlowCase(unsigned ShiftAmt) { 1149 tcShiftRight(pVal, getNumWords(), ShiftAmt); 1150 } 1151 1152 /// Left-shift this APInt by shiftAmt. 1153 /// @brief Left-shift function. 1154 APInt APInt::shl(const APInt &shiftAmt) const { 1155 // It's undefined behavior in C to shift by BitWidth or greater. 1156 return shl((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1157 } 1158 1159 void APInt::shlSlowCase(unsigned ShiftAmt) { 1160 tcShiftLeft(pVal, getNumWords(), ShiftAmt); 1161 clearUnusedBits(); 1162 } 1163 1164 // Calculate the rotate amount modulo the bit width. 1165 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 1166 unsigned rotBitWidth = rotateAmt.getBitWidth(); 1167 APInt rot = rotateAmt; 1168 if (rotBitWidth < BitWidth) { 1169 // Extend the rotate APInt, so that the urem doesn't divide by 0. 1170 // e.g. APInt(1, 32) would give APInt(1, 0). 1171 rot = rotateAmt.zext(BitWidth); 1172 } 1173 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 1174 return rot.getLimitedValue(BitWidth); 1175 } 1176 1177 APInt APInt::rotl(const APInt &rotateAmt) const { 1178 return rotl(rotateModulo(BitWidth, rotateAmt)); 1179 } 1180 1181 APInt APInt::rotl(unsigned rotateAmt) const { 1182 rotateAmt %= BitWidth; 1183 if (rotateAmt == 0) 1184 return *this; 1185 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1186 } 1187 1188 APInt APInt::rotr(const APInt &rotateAmt) const { 1189 return rotr(rotateModulo(BitWidth, rotateAmt)); 1190 } 1191 1192 APInt APInt::rotr(unsigned rotateAmt) const { 1193 rotateAmt %= BitWidth; 1194 if (rotateAmt == 0) 1195 return *this; 1196 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1197 } 1198 1199 // Square Root - this method computes and returns the square root of "this". 1200 // Three mechanisms are used for computation. For small values (<= 5 bits), 1201 // a table lookup is done. This gets some performance for common cases. For 1202 // values using less than 52 bits, the value is converted to double and then 1203 // the libc sqrt function is called. The result is rounded and then converted 1204 // back to a uint64_t which is then used to construct the result. Finally, 1205 // the Babylonian method for computing square roots is used. 1206 APInt APInt::sqrt() const { 1207 1208 // Determine the magnitude of the value. 1209 unsigned magnitude = getActiveBits(); 1210 1211 // Use a fast table for some small values. This also gets rid of some 1212 // rounding errors in libc sqrt for small values. 1213 if (magnitude <= 5) { 1214 static const uint8_t results[32] = { 1215 /* 0 */ 0, 1216 /* 1- 2 */ 1, 1, 1217 /* 3- 6 */ 2, 2, 2, 2, 1218 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1219 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1220 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1221 /* 31 */ 6 1222 }; 1223 return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]); 1224 } 1225 1226 // If the magnitude of the value fits in less than 52 bits (the precision of 1227 // an IEEE double precision floating point value), then we can use the 1228 // libc sqrt function which will probably use a hardware sqrt computation. 1229 // This should be faster than the algorithm below. 1230 if (magnitude < 52) { 1231 return APInt(BitWidth, 1232 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); 1233 } 1234 1235 // Okay, all the short cuts are exhausted. We must compute it. The following 1236 // is a classical Babylonian method for computing the square root. This code 1237 // was adapted to APInt from a wikipedia article on such computations. 1238 // See http://www.wikipedia.org/ and go to the page named 1239 // Calculate_an_integer_square_root. 1240 unsigned nbits = BitWidth, i = 4; 1241 APInt testy(BitWidth, 16); 1242 APInt x_old(BitWidth, 1); 1243 APInt x_new(BitWidth, 0); 1244 APInt two(BitWidth, 2); 1245 1246 // Select a good starting value using binary logarithms. 1247 for (;; i += 2, testy = testy.shl(2)) 1248 if (i >= nbits || this->ule(testy)) { 1249 x_old = x_old.shl(i / 2); 1250 break; 1251 } 1252 1253 // Use the Babylonian method to arrive at the integer square root: 1254 for (;;) { 1255 x_new = (this->udiv(x_old) + x_old).udiv(two); 1256 if (x_old.ule(x_new)) 1257 break; 1258 x_old = x_new; 1259 } 1260 1261 // Make sure we return the closest approximation 1262 // NOTE: The rounding calculation below is correct. It will produce an 1263 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1264 // determined to be a rounding issue with pari/gp as it begins to use a 1265 // floating point representation after 192 bits. There are no discrepancies 1266 // between this algorithm and pari/gp for bit widths < 192 bits. 1267 APInt square(x_old * x_old); 1268 APInt nextSquare((x_old + 1) * (x_old +1)); 1269 if (this->ult(square)) 1270 return x_old; 1271 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1272 APInt midpoint((nextSquare - square).udiv(two)); 1273 APInt offset(*this - square); 1274 if (offset.ult(midpoint)) 1275 return x_old; 1276 return x_old + 1; 1277 } 1278 1279 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1280 /// iterative extended Euclidean algorithm is used to solve for this value, 1281 /// however we simplify it to speed up calculating only the inverse, and take 1282 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1283 /// (potentially large) APInts around. 1284 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1285 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1286 1287 // Using the properties listed at the following web page (accessed 06/21/08): 1288 // http://www.numbertheory.org/php/euclid.html 1289 // (especially the properties numbered 3, 4 and 9) it can be proved that 1290 // BitWidth bits suffice for all the computations in the algorithm implemented 1291 // below. More precisely, this number of bits suffice if the multiplicative 1292 // inverse exists, but may not suffice for the general extended Euclidean 1293 // algorithm. 1294 1295 APInt r[2] = { modulo, *this }; 1296 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1297 APInt q(BitWidth, 0); 1298 1299 unsigned i; 1300 for (i = 0; r[i^1] != 0; i ^= 1) { 1301 // An overview of the math without the confusing bit-flipping: 1302 // q = r[i-2] / r[i-1] 1303 // r[i] = r[i-2] % r[i-1] 1304 // t[i] = t[i-2] - t[i-1] * q 1305 udivrem(r[i], r[i^1], q, r[i]); 1306 t[i] -= t[i^1] * q; 1307 } 1308 1309 // If this APInt and the modulo are not coprime, there is no multiplicative 1310 // inverse, so return 0. We check this by looking at the next-to-last 1311 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1312 // algorithm. 1313 if (r[i] != 1) 1314 return APInt(BitWidth, 0); 1315 1316 // The next-to-last t is the multiplicative inverse. However, we are 1317 // interested in a positive inverse. Calcuate a positive one from a negative 1318 // one if necessary. A simple addition of the modulo suffices because 1319 // abs(t[i]) is known to be less than *this/2 (see the link above). 1320 return t[i].isNegative() ? t[i] + modulo : t[i]; 1321 } 1322 1323 /// Calculate the magic numbers required to implement a signed integer division 1324 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1325 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1326 /// Warren, Jr., chapter 10. 1327 APInt::ms APInt::magic() const { 1328 const APInt& d = *this; 1329 unsigned p; 1330 APInt ad, anc, delta, q1, r1, q2, r2, t; 1331 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1332 struct ms mag; 1333 1334 ad = d.abs(); 1335 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1336 anc = t - 1 - t.urem(ad); // absolute value of nc 1337 p = d.getBitWidth() - 1; // initialize p 1338 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1339 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1340 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1341 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1342 do { 1343 p = p + 1; 1344 q1 = q1<<1; // update q1 = 2p/abs(nc) 1345 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1346 if (r1.uge(anc)) { // must be unsigned comparison 1347 q1 = q1 + 1; 1348 r1 = r1 - anc; 1349 } 1350 q2 = q2<<1; // update q2 = 2p/abs(d) 1351 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1352 if (r2.uge(ad)) { // must be unsigned comparison 1353 q2 = q2 + 1; 1354 r2 = r2 - ad; 1355 } 1356 delta = ad - r2; 1357 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1358 1359 mag.m = q2 + 1; 1360 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1361 mag.s = p - d.getBitWidth(); // resulting shift 1362 return mag; 1363 } 1364 1365 /// Calculate the magic numbers required to implement an unsigned integer 1366 /// division by a constant as a sequence of multiplies, adds and shifts. 1367 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1368 /// S. Warren, Jr., chapter 10. 1369 /// LeadingZeros can be used to simplify the calculation if the upper bits 1370 /// of the divided value are known zero. 1371 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1372 const APInt& d = *this; 1373 unsigned p; 1374 APInt nc, delta, q1, r1, q2, r2; 1375 struct mu magu; 1376 magu.a = 0; // initialize "add" indicator 1377 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1378 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1379 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1380 1381 nc = allOnes - (allOnes - d).urem(d); 1382 p = d.getBitWidth() - 1; // initialize p 1383 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1384 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1385 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1386 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1387 do { 1388 p = p + 1; 1389 if (r1.uge(nc - r1)) { 1390 q1 = q1 + q1 + 1; // update q1 1391 r1 = r1 + r1 - nc; // update r1 1392 } 1393 else { 1394 q1 = q1+q1; // update q1 1395 r1 = r1+r1; // update r1 1396 } 1397 if ((r2 + 1).uge(d - r2)) { 1398 if (q2.uge(signedMax)) magu.a = 1; 1399 q2 = q2+q2 + 1; // update q2 1400 r2 = r2+r2 + 1 - d; // update r2 1401 } 1402 else { 1403 if (q2.uge(signedMin)) magu.a = 1; 1404 q2 = q2+q2; // update q2 1405 r2 = r2+r2 + 1; // update r2 1406 } 1407 delta = d - 1 - r2; 1408 } while (p < d.getBitWidth()*2 && 1409 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1410 magu.m = q2 + 1; // resulting magic number 1411 magu.s = p - d.getBitWidth(); // resulting shift 1412 return magu; 1413 } 1414 1415 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1416 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1417 /// variables here have the same names as in the algorithm. Comments explain 1418 /// the algorithm and any deviation from it. 1419 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, 1420 unsigned m, unsigned n) { 1421 assert(u && "Must provide dividend"); 1422 assert(v && "Must provide divisor"); 1423 assert(q && "Must provide quotient"); 1424 assert(u != v && u != q && v != q && "Must use different memory"); 1425 assert(n>1 && "n must be > 1"); 1426 1427 // b denotes the base of the number system. In our case b is 2^32. 1428 const uint64_t b = uint64_t(1) << 32; 1429 1430 DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1431 DEBUG(dbgs() << "KnuthDiv: original:"); 1432 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); 1433 DEBUG(dbgs() << " by"); 1434 DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); 1435 DEBUG(dbgs() << '\n'); 1436 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1437 // u and v by d. Note that we have taken Knuth's advice here to use a power 1438 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1439 // 2 allows us to shift instead of multiply and it is easy to determine the 1440 // shift amount from the leading zeros. We are basically normalizing the u 1441 // and v so that its high bits are shifted to the top of v's range without 1442 // overflow. Note that this can require an extra word in u so that u must 1443 // be of length m+n+1. 1444 unsigned shift = countLeadingZeros(v[n-1]); 1445 unsigned v_carry = 0; 1446 unsigned u_carry = 0; 1447 if (shift) { 1448 for (unsigned i = 0; i < m+n; ++i) { 1449 unsigned u_tmp = u[i] >> (32 - shift); 1450 u[i] = (u[i] << shift) | u_carry; 1451 u_carry = u_tmp; 1452 } 1453 for (unsigned i = 0; i < n; ++i) { 1454 unsigned v_tmp = v[i] >> (32 - shift); 1455 v[i] = (v[i] << shift) | v_carry; 1456 v_carry = v_tmp; 1457 } 1458 } 1459 u[m+n] = u_carry; 1460 1461 DEBUG(dbgs() << "KnuthDiv: normal:"); 1462 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); 1463 DEBUG(dbgs() << " by"); 1464 DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); 1465 DEBUG(dbgs() << '\n'); 1466 1467 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1468 int j = m; 1469 do { 1470 DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1471 // D3. [Calculate q'.]. 1472 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1473 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1474 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1475 // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test 1476 // on v[n-2] determines at high speed most of the cases in which the trial 1477 // value qp is one too large, and it eliminates all cases where qp is two 1478 // too large. 1479 uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); 1480 DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1481 uint64_t qp = dividend / v[n-1]; 1482 uint64_t rp = dividend % v[n-1]; 1483 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1484 qp--; 1485 rp += v[n-1]; 1486 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1487 qp--; 1488 } 1489 DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1490 1491 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1492 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1493 // consists of a simple multiplication by a one-place number, combined with 1494 // a subtraction. 1495 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1496 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1497 // true value plus b**(n+1), namely as the b's complement of 1498 // the true value, and a "borrow" to the left should be remembered. 1499 int64_t borrow = 0; 1500 for (unsigned i = 0; i < n; ++i) { 1501 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1502 int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p; 1503 u[j+i] = (unsigned)subres; 1504 borrow = (p >> 32) - (subres >> 32); 1505 DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i] 1506 << ", borrow = " << borrow << '\n'); 1507 } 1508 bool isNeg = u[j+n] < borrow; 1509 u[j+n] -= (unsigned)borrow; 1510 1511 DEBUG(dbgs() << "KnuthDiv: after subtraction:"); 1512 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); 1513 DEBUG(dbgs() << '\n'); 1514 1515 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1516 // negative, go to step D6; otherwise go on to step D7. 1517 q[j] = (unsigned)qp; 1518 if (isNeg) { 1519 // D6. [Add back]. The probability that this step is necessary is very 1520 // small, on the order of only 2/b. Make sure that test data accounts for 1521 // this possibility. Decrease q[j] by 1 1522 q[j]--; 1523 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1524 // A carry will occur to the left of u[j+n], and it should be ignored 1525 // since it cancels with the borrow that occurred in D4. 1526 bool carry = false; 1527 for (unsigned i = 0; i < n; i++) { 1528 unsigned limit = std::min(u[j+i],v[i]); 1529 u[j+i] += v[i] + carry; 1530 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1531 } 1532 u[j+n] += carry; 1533 } 1534 DEBUG(dbgs() << "KnuthDiv: after correction:"); 1535 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); 1536 DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1537 1538 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1539 } while (--j >= 0); 1540 1541 DEBUG(dbgs() << "KnuthDiv: quotient:"); 1542 DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]); 1543 DEBUG(dbgs() << '\n'); 1544 1545 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1546 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1547 // compute the remainder (urem uses this). 1548 if (r) { 1549 // The value d is expressed by the "shift" value above since we avoided 1550 // multiplication by d by using a shift left. So, all we have to do is 1551 // shift right here. 1552 if (shift) { 1553 unsigned carry = 0; 1554 DEBUG(dbgs() << "KnuthDiv: remainder:"); 1555 for (int i = n-1; i >= 0; i--) { 1556 r[i] = (u[i] >> shift) | carry; 1557 carry = u[i] << (32 - shift); 1558 DEBUG(dbgs() << " " << r[i]); 1559 } 1560 } else { 1561 for (int i = n-1; i >= 0; i--) { 1562 r[i] = u[i]; 1563 DEBUG(dbgs() << " " << r[i]); 1564 } 1565 } 1566 DEBUG(dbgs() << '\n'); 1567 } 1568 DEBUG(dbgs() << '\n'); 1569 } 1570 1571 void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, 1572 unsigned rhsWords, APInt *Quotient, APInt *Remainder) { 1573 assert(lhsWords >= rhsWords && "Fractional result"); 1574 1575 // First, compose the values into an array of 32-bit words instead of 1576 // 64-bit words. This is a necessity of both the "short division" algorithm 1577 // and the Knuth "classical algorithm" which requires there to be native 1578 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1579 // can't use 64-bit operands here because we don't have native results of 1580 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1581 // work on large-endian machines. 1582 uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT); 1583 unsigned n = rhsWords * 2; 1584 unsigned m = (lhsWords * 2) - n; 1585 1586 // Allocate space for the temporary values we need either on the stack, if 1587 // it will fit, or on the heap if it won't. 1588 unsigned SPACE[128]; 1589 unsigned *U = nullptr; 1590 unsigned *V = nullptr; 1591 unsigned *Q = nullptr; 1592 unsigned *R = nullptr; 1593 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1594 U = &SPACE[0]; 1595 V = &SPACE[m+n+1]; 1596 Q = &SPACE[(m+n+1) + n]; 1597 if (Remainder) 1598 R = &SPACE[(m+n+1) + n + (m+n)]; 1599 } else { 1600 U = new unsigned[m + n + 1]; 1601 V = new unsigned[n]; 1602 Q = new unsigned[m+n]; 1603 if (Remainder) 1604 R = new unsigned[n]; 1605 } 1606 1607 // Initialize the dividend 1608 memset(U, 0, (m+n+1)*sizeof(unsigned)); 1609 for (unsigned i = 0; i < lhsWords; ++i) { 1610 uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]); 1611 U[i * 2] = (unsigned)(tmp & mask); 1612 U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); 1613 } 1614 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1615 1616 // Initialize the divisor 1617 memset(V, 0, (n)*sizeof(unsigned)); 1618 for (unsigned i = 0; i < rhsWords; ++i) { 1619 uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]); 1620 V[i * 2] = (unsigned)(tmp & mask); 1621 V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); 1622 } 1623 1624 // initialize the quotient and remainder 1625 memset(Q, 0, (m+n) * sizeof(unsigned)); 1626 if (Remainder) 1627 memset(R, 0, n * sizeof(unsigned)); 1628 1629 // Now, adjust m and n for the Knuth division. n is the number of words in 1630 // the divisor. m is the number of words by which the dividend exceeds the 1631 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1632 // contain any zero words or the Knuth algorithm fails. 1633 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1634 n--; 1635 m++; 1636 } 1637 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1638 m--; 1639 1640 // If we're left with only a single word for the divisor, Knuth doesn't work 1641 // so we implement the short division algorithm here. This is much simpler 1642 // and faster because we are certain that we can divide a 64-bit quantity 1643 // by a 32-bit quantity at hardware speed and short division is simply a 1644 // series of such operations. This is just like doing short division but we 1645 // are using base 2^32 instead of base 10. 1646 assert(n != 0 && "Divide by zero?"); 1647 if (n == 1) { 1648 unsigned divisor = V[0]; 1649 unsigned remainder = 0; 1650 for (int i = m+n-1; i >= 0; i--) { 1651 uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i]; 1652 if (partial_dividend == 0) { 1653 Q[i] = 0; 1654 remainder = 0; 1655 } else if (partial_dividend < divisor) { 1656 Q[i] = 0; 1657 remainder = (unsigned)partial_dividend; 1658 } else if (partial_dividend == divisor) { 1659 Q[i] = 1; 1660 remainder = 0; 1661 } else { 1662 Q[i] = (unsigned)(partial_dividend / divisor); 1663 remainder = (unsigned)(partial_dividend - (Q[i] * divisor)); 1664 } 1665 } 1666 if (R) 1667 R[0] = remainder; 1668 } else { 1669 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1670 // case n > 1. 1671 KnuthDiv(U, V, Q, R, m, n); 1672 } 1673 1674 // If the caller wants the quotient 1675 if (Quotient) { 1676 // Set up the Quotient value's memory. 1677 if (Quotient->BitWidth != LHS.BitWidth) { 1678 if (Quotient->isSingleWord()) 1679 Quotient->VAL = 0; 1680 else 1681 delete [] Quotient->pVal; 1682 Quotient->BitWidth = LHS.BitWidth; 1683 if (!Quotient->isSingleWord()) 1684 Quotient->pVal = getClearedMemory(Quotient->getNumWords()); 1685 } else 1686 Quotient->clearAllBits(); 1687 1688 // The quotient is in Q. Reconstitute the quotient into Quotient's low 1689 // order words. 1690 // This case is currently dead as all users of divide() handle trivial cases 1691 // earlier. 1692 if (lhsWords == 1) { 1693 uint64_t tmp = 1694 uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); 1695 if (Quotient->isSingleWord()) 1696 Quotient->VAL = tmp; 1697 else 1698 Quotient->pVal[0] = tmp; 1699 } else { 1700 assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); 1701 for (unsigned i = 0; i < lhsWords; ++i) 1702 Quotient->pVal[i] = 1703 uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); 1704 } 1705 } 1706 1707 // If the caller wants the remainder 1708 if (Remainder) { 1709 // Set up the Remainder value's memory. 1710 if (Remainder->BitWidth != RHS.BitWidth) { 1711 if (Remainder->isSingleWord()) 1712 Remainder->VAL = 0; 1713 else 1714 delete [] Remainder->pVal; 1715 Remainder->BitWidth = RHS.BitWidth; 1716 if (!Remainder->isSingleWord()) 1717 Remainder->pVal = getClearedMemory(Remainder->getNumWords()); 1718 } else 1719 Remainder->clearAllBits(); 1720 1721 // The remainder is in R. Reconstitute the remainder into Remainder's low 1722 // order words. 1723 if (rhsWords == 1) { 1724 uint64_t tmp = 1725 uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); 1726 if (Remainder->isSingleWord()) 1727 Remainder->VAL = tmp; 1728 else 1729 Remainder->pVal[0] = tmp; 1730 } else { 1731 assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); 1732 for (unsigned i = 0; i < rhsWords; ++i) 1733 Remainder->pVal[i] = 1734 uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); 1735 } 1736 } 1737 1738 // Clean up the memory we allocated. 1739 if (U != &SPACE[0]) { 1740 delete [] U; 1741 delete [] V; 1742 delete [] Q; 1743 delete [] R; 1744 } 1745 } 1746 1747 APInt APInt::udiv(const APInt& RHS) const { 1748 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1749 1750 // First, deal with the easy case 1751 if (isSingleWord()) { 1752 assert(RHS.VAL != 0 && "Divide by zero?"); 1753 return APInt(BitWidth, VAL / RHS.VAL); 1754 } 1755 1756 // Get some facts about the LHS and RHS number of bits and words 1757 unsigned rhsBits = RHS.getActiveBits(); 1758 unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); 1759 assert(rhsWords && "Divided by zero???"); 1760 unsigned lhsBits = this->getActiveBits(); 1761 unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); 1762 1763 // Deal with some degenerate cases 1764 if (!lhsWords) 1765 // 0 / X ===> 0 1766 return APInt(BitWidth, 0); 1767 else if (lhsWords < rhsWords || this->ult(RHS)) { 1768 // X / Y ===> 0, iff X < Y 1769 return APInt(BitWidth, 0); 1770 } else if (*this == RHS) { 1771 // X / X ===> 1 1772 return APInt(BitWidth, 1); 1773 } else if (lhsWords == 1 && rhsWords == 1) { 1774 // All high words are zero, just use native divide 1775 return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]); 1776 } 1777 1778 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1779 APInt Quotient(1,0); // to hold result. 1780 divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr); 1781 return Quotient; 1782 } 1783 1784 APInt APInt::sdiv(const APInt &RHS) const { 1785 if (isNegative()) { 1786 if (RHS.isNegative()) 1787 return (-(*this)).udiv(-RHS); 1788 return -((-(*this)).udiv(RHS)); 1789 } 1790 if (RHS.isNegative()) 1791 return -(this->udiv(-RHS)); 1792 return this->udiv(RHS); 1793 } 1794 1795 APInt APInt::urem(const APInt& RHS) const { 1796 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1797 if (isSingleWord()) { 1798 assert(RHS.VAL != 0 && "Remainder by zero?"); 1799 return APInt(BitWidth, VAL % RHS.VAL); 1800 } 1801 1802 // Get some facts about the LHS 1803 unsigned lhsBits = getActiveBits(); 1804 unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1); 1805 1806 // Get some facts about the RHS 1807 unsigned rhsBits = RHS.getActiveBits(); 1808 unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); 1809 assert(rhsWords && "Performing remainder operation by zero ???"); 1810 1811 // Check the degenerate cases 1812 if (lhsWords == 0) { 1813 // 0 % Y ===> 0 1814 return APInt(BitWidth, 0); 1815 } else if (lhsWords < rhsWords || this->ult(RHS)) { 1816 // X % Y ===> X, iff X < Y 1817 return *this; 1818 } else if (*this == RHS) { 1819 // X % X == 0; 1820 return APInt(BitWidth, 0); 1821 } else if (lhsWords == 1) { 1822 // All high words are zero, just use native remainder 1823 return APInt(BitWidth, pVal[0] % RHS.pVal[0]); 1824 } 1825 1826 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1827 APInt Remainder(1,0); 1828 divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder); 1829 return Remainder; 1830 } 1831 1832 APInt APInt::srem(const APInt &RHS) const { 1833 if (isNegative()) { 1834 if (RHS.isNegative()) 1835 return -((-(*this)).urem(-RHS)); 1836 return -((-(*this)).urem(RHS)); 1837 } 1838 if (RHS.isNegative()) 1839 return this->urem(-RHS); 1840 return this->urem(RHS); 1841 } 1842 1843 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1844 APInt &Quotient, APInt &Remainder) { 1845 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1846 1847 // First, deal with the easy case 1848 if (LHS.isSingleWord()) { 1849 assert(RHS.VAL != 0 && "Divide by zero?"); 1850 uint64_t QuotVal = LHS.VAL / RHS.VAL; 1851 uint64_t RemVal = LHS.VAL % RHS.VAL; 1852 Quotient = APInt(LHS.BitWidth, QuotVal); 1853 Remainder = APInt(LHS.BitWidth, RemVal); 1854 return; 1855 } 1856 1857 // Get some size facts about the dividend and divisor 1858 unsigned lhsBits = LHS.getActiveBits(); 1859 unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); 1860 unsigned rhsBits = RHS.getActiveBits(); 1861 unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); 1862 1863 // Check the degenerate cases 1864 if (lhsWords == 0) { 1865 Quotient = 0; // 0 / Y ===> 0 1866 Remainder = 0; // 0 % Y ===> 0 1867 return; 1868 } 1869 1870 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1871 Remainder = LHS; // X % Y ===> X, iff X < Y 1872 Quotient = 0; // X / Y ===> 0, iff X < Y 1873 return; 1874 } 1875 1876 if (LHS == RHS) { 1877 Quotient = 1; // X / X ===> 1 1878 Remainder = 0; // X % X ===> 0; 1879 return; 1880 } 1881 1882 if (lhsWords == 1 && rhsWords == 1) { 1883 // There is only one word to consider so use the native versions. 1884 uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; 1885 uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0]; 1886 Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue); 1887 Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue); 1888 return; 1889 } 1890 1891 // Okay, lets do it the long way 1892 divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); 1893 } 1894 1895 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1896 APInt &Quotient, APInt &Remainder) { 1897 if (LHS.isNegative()) { 1898 if (RHS.isNegative()) 1899 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1900 else { 1901 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1902 Quotient = -Quotient; 1903 } 1904 Remainder = -Remainder; 1905 } else if (RHS.isNegative()) { 1906 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1907 Quotient = -Quotient; 1908 } else { 1909 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1910 } 1911 } 1912 1913 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1914 APInt Res = *this+RHS; 1915 Overflow = isNonNegative() == RHS.isNonNegative() && 1916 Res.isNonNegative() != isNonNegative(); 1917 return Res; 1918 } 1919 1920 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1921 APInt Res = *this+RHS; 1922 Overflow = Res.ult(RHS); 1923 return Res; 1924 } 1925 1926 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1927 APInt Res = *this - RHS; 1928 Overflow = isNonNegative() != RHS.isNonNegative() && 1929 Res.isNonNegative() != isNonNegative(); 1930 return Res; 1931 } 1932 1933 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1934 APInt Res = *this-RHS; 1935 Overflow = Res.ugt(*this); 1936 return Res; 1937 } 1938 1939 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1940 // MININT/-1 --> overflow. 1941 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1942 return sdiv(RHS); 1943 } 1944 1945 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1946 APInt Res = *this * RHS; 1947 1948 if (*this != 0 && RHS != 0) 1949 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1950 else 1951 Overflow = false; 1952 return Res; 1953 } 1954 1955 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1956 APInt Res = *this * RHS; 1957 1958 if (*this != 0 && RHS != 0) 1959 Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; 1960 else 1961 Overflow = false; 1962 return Res; 1963 } 1964 1965 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 1966 Overflow = ShAmt.uge(getBitWidth()); 1967 if (Overflow) 1968 return APInt(BitWidth, 0); 1969 1970 if (isNonNegative()) // Don't allow sign change. 1971 Overflow = ShAmt.uge(countLeadingZeros()); 1972 else 1973 Overflow = ShAmt.uge(countLeadingOnes()); 1974 1975 return *this << ShAmt; 1976 } 1977 1978 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 1979 Overflow = ShAmt.uge(getBitWidth()); 1980 if (Overflow) 1981 return APInt(BitWidth, 0); 1982 1983 Overflow = ShAmt.ugt(countLeadingZeros()); 1984 1985 return *this << ShAmt; 1986 } 1987 1988 1989 1990 1991 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 1992 // Check our assumptions here 1993 assert(!str.empty() && "Invalid string length"); 1994 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 1995 radix == 36) && 1996 "Radix should be 2, 8, 10, 16, or 36!"); 1997 1998 StringRef::iterator p = str.begin(); 1999 size_t slen = str.size(); 2000 bool isNeg = *p == '-'; 2001 if (*p == '-' || *p == '+') { 2002 p++; 2003 slen--; 2004 assert(slen && "String is only a sign, needs a value."); 2005 } 2006 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2007 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2008 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2009 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2010 "Insufficient bit width"); 2011 2012 // Allocate memory 2013 if (!isSingleWord()) 2014 pVal = getClearedMemory(getNumWords()); 2015 2016 // Figure out if we can shift instead of multiply 2017 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2018 2019 // Set up an APInt for the radix multiplier outside the loop so we don't 2020 // constantly construct/destruct it. 2021 APInt apradix(getBitWidth(), radix); 2022 2023 // Enter digit traversal loop 2024 for (StringRef::iterator e = str.end(); p != e; ++p) { 2025 unsigned digit = getDigit(*p, radix); 2026 assert(digit < radix && "Invalid character in digit string"); 2027 2028 // Shift or multiply the value by the radix 2029 if (slen > 1) { 2030 if (shift) 2031 *this <<= shift; 2032 else 2033 *this *= apradix; 2034 } 2035 2036 // Add in the digit we just interpreted 2037 *this += digit; 2038 } 2039 // If its negative, put it in two's complement form 2040 if (isNeg) { 2041 --(*this); 2042 this->flipAllBits(); 2043 } 2044 } 2045 2046 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2047 bool Signed, bool formatAsCLiteral) const { 2048 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2049 Radix == 36) && 2050 "Radix should be 2, 8, 10, 16, or 36!"); 2051 2052 const char *Prefix = ""; 2053 if (formatAsCLiteral) { 2054 switch (Radix) { 2055 case 2: 2056 // Binary literals are a non-standard extension added in gcc 4.3: 2057 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2058 Prefix = "0b"; 2059 break; 2060 case 8: 2061 Prefix = "0"; 2062 break; 2063 case 10: 2064 break; // No prefix 2065 case 16: 2066 Prefix = "0x"; 2067 break; 2068 default: 2069 llvm_unreachable("Invalid radix!"); 2070 } 2071 } 2072 2073 // First, check for a zero value and just short circuit the logic below. 2074 if (*this == 0) { 2075 while (*Prefix) { 2076 Str.push_back(*Prefix); 2077 ++Prefix; 2078 }; 2079 Str.push_back('0'); 2080 return; 2081 } 2082 2083 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2084 2085 if (isSingleWord()) { 2086 char Buffer[65]; 2087 char *BufPtr = Buffer+65; 2088 2089 uint64_t N; 2090 if (!Signed) { 2091 N = getZExtValue(); 2092 } else { 2093 int64_t I = getSExtValue(); 2094 if (I >= 0) { 2095 N = I; 2096 } else { 2097 Str.push_back('-'); 2098 N = -(uint64_t)I; 2099 } 2100 } 2101 2102 while (*Prefix) { 2103 Str.push_back(*Prefix); 2104 ++Prefix; 2105 }; 2106 2107 while (N) { 2108 *--BufPtr = Digits[N % Radix]; 2109 N /= Radix; 2110 } 2111 Str.append(BufPtr, Buffer+65); 2112 return; 2113 } 2114 2115 APInt Tmp(*this); 2116 2117 if (Signed && isNegative()) { 2118 // They want to print the signed version and it is a negative value 2119 // Flip the bits and add one to turn it into the equivalent positive 2120 // value and put a '-' in the result. 2121 Tmp.flipAllBits(); 2122 ++Tmp; 2123 Str.push_back('-'); 2124 } 2125 2126 while (*Prefix) { 2127 Str.push_back(*Prefix); 2128 ++Prefix; 2129 }; 2130 2131 // We insert the digits backward, then reverse them to get the right order. 2132 unsigned StartDig = Str.size(); 2133 2134 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2135 // because the number of bits per digit (1, 3 and 4 respectively) divides 2136 // equally. We just shift until the value is zero. 2137 if (Radix == 2 || Radix == 8 || Radix == 16) { 2138 // Just shift tmp right for each digit width until it becomes zero 2139 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2140 unsigned MaskAmt = Radix - 1; 2141 2142 while (Tmp != 0) { 2143 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2144 Str.push_back(Digits[Digit]); 2145 Tmp.lshrInPlace(ShiftAmt); 2146 } 2147 } else { 2148 APInt divisor(Radix == 10? 4 : 8, Radix); 2149 while (Tmp != 0) { 2150 APInt APdigit(1, 0); 2151 APInt tmp2(Tmp.getBitWidth(), 0); 2152 divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, 2153 &APdigit); 2154 unsigned Digit = (unsigned)APdigit.getZExtValue(); 2155 assert(Digit < Radix && "divide failed"); 2156 Str.push_back(Digits[Digit]); 2157 Tmp = tmp2; 2158 } 2159 } 2160 2161 // Reverse the digits before returning. 2162 std::reverse(Str.begin()+StartDig, Str.end()); 2163 } 2164 2165 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2166 /// It is better to pass in a SmallVector/SmallString to the methods above. 2167 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2168 SmallString<40> S; 2169 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2170 return S.str(); 2171 } 2172 2173 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2174 LLVM_DUMP_METHOD void APInt::dump() const { 2175 SmallString<40> S, U; 2176 this->toStringUnsigned(U); 2177 this->toStringSigned(S); 2178 dbgs() << "APInt(" << BitWidth << "b, " 2179 << U << "u " << S << "s)\n"; 2180 } 2181 #endif 2182 2183 void APInt::print(raw_ostream &OS, bool isSigned) const { 2184 SmallString<40> S; 2185 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2186 OS << S; 2187 } 2188 2189 // This implements a variety of operations on a representation of 2190 // arbitrary precision, two's-complement, bignum integer values. 2191 2192 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2193 // and unrestricting assumption. 2194 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2195 "Part width must be divisible by 2!"); 2196 2197 /* Some handy functions local to this file. */ 2198 2199 /* Returns the integer part with the least significant BITS set. 2200 BITS cannot be zero. */ 2201 static inline APInt::WordType lowBitMask(unsigned bits) { 2202 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2203 2204 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2205 } 2206 2207 /* Returns the value of the lower half of PART. */ 2208 static inline APInt::WordType lowHalf(APInt::WordType part) { 2209 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2210 } 2211 2212 /* Returns the value of the upper half of PART. */ 2213 static inline APInt::WordType highHalf(APInt::WordType part) { 2214 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2215 } 2216 2217 /* Returns the bit number of the most significant set bit of a part. 2218 If the input number has no bits set -1U is returned. */ 2219 static unsigned partMSB(APInt::WordType value) { 2220 return findLastSet(value, ZB_Max); 2221 } 2222 2223 /* Returns the bit number of the least significant set bit of a 2224 part. If the input number has no bits set -1U is returned. */ 2225 static unsigned partLSB(APInt::WordType value) { 2226 return findFirstSet(value, ZB_Max); 2227 } 2228 2229 /* Sets the least significant part of a bignum to the input value, and 2230 zeroes out higher parts. */ 2231 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2232 assert(parts > 0); 2233 2234 dst[0] = part; 2235 for (unsigned i = 1; i < parts; i++) 2236 dst[i] = 0; 2237 } 2238 2239 /* Assign one bignum to another. */ 2240 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2241 for (unsigned i = 0; i < parts; i++) 2242 dst[i] = src[i]; 2243 } 2244 2245 /* Returns true if a bignum is zero, false otherwise. */ 2246 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2247 for (unsigned i = 0; i < parts; i++) 2248 if (src[i]) 2249 return false; 2250 2251 return true; 2252 } 2253 2254 /* Extract the given bit of a bignum; returns 0 or 1. */ 2255 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2256 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2257 } 2258 2259 /* Set the given bit of a bignum. */ 2260 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2261 parts[whichWord(bit)] |= maskBit(bit); 2262 } 2263 2264 /* Clears the given bit of a bignum. */ 2265 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2266 parts[whichWord(bit)] &= ~maskBit(bit); 2267 } 2268 2269 /* Returns the bit number of the least significant set bit of a 2270 number. If the input number has no bits set -1U is returned. */ 2271 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2272 for (unsigned i = 0; i < n; i++) { 2273 if (parts[i] != 0) { 2274 unsigned lsb = partLSB(parts[i]); 2275 2276 return lsb + i * APINT_BITS_PER_WORD; 2277 } 2278 } 2279 2280 return -1U; 2281 } 2282 2283 /* Returns the bit number of the most significant set bit of a number. 2284 If the input number has no bits set -1U is returned. */ 2285 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2286 do { 2287 --n; 2288 2289 if (parts[n] != 0) { 2290 unsigned msb = partMSB(parts[n]); 2291 2292 return msb + n * APINT_BITS_PER_WORD; 2293 } 2294 } while (n); 2295 2296 return -1U; 2297 } 2298 2299 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2300 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2301 the least significant bit of DST. All high bits above srcBITS in 2302 DST are zero-filled. */ 2303 void 2304 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2305 unsigned srcBits, unsigned srcLSB) { 2306 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2307 assert(dstParts <= dstCount); 2308 2309 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2310 tcAssign (dst, src + firstSrcPart, dstParts); 2311 2312 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2313 tcShiftRight (dst, dstParts, shift); 2314 2315 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2316 in DST. If this is less that srcBits, append the rest, else 2317 clear the high bits. */ 2318 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2319 if (n < srcBits) { 2320 WordType mask = lowBitMask (srcBits - n); 2321 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2322 << n % APINT_BITS_PER_WORD); 2323 } else if (n > srcBits) { 2324 if (srcBits % APINT_BITS_PER_WORD) 2325 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2326 } 2327 2328 /* Clear high parts. */ 2329 while (dstParts < dstCount) 2330 dst[dstParts++] = 0; 2331 } 2332 2333 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2334 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2335 WordType c, unsigned parts) { 2336 assert(c <= 1); 2337 2338 for (unsigned i = 0; i < parts; i++) { 2339 WordType l = dst[i]; 2340 if (c) { 2341 dst[i] += rhs[i] + 1; 2342 c = (dst[i] <= l); 2343 } else { 2344 dst[i] += rhs[i]; 2345 c = (dst[i] < l); 2346 } 2347 } 2348 2349 return c; 2350 } 2351 2352 /// This function adds a single "word" integer, src, to the multiple 2353 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2354 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2355 /// @returns the carry of the addition. 2356 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2357 unsigned parts) { 2358 for (unsigned i = 0; i < parts; ++i) { 2359 dst[i] += src; 2360 if (dst[i] >= src) 2361 return 0; // No need to carry so exit early. 2362 src = 1; // Carry one to next digit. 2363 } 2364 2365 return 1; 2366 } 2367 2368 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2369 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2370 WordType c, unsigned parts) { 2371 assert(c <= 1); 2372 2373 for (unsigned i = 0; i < parts; i++) { 2374 WordType l = dst[i]; 2375 if (c) { 2376 dst[i] -= rhs[i] + 1; 2377 c = (dst[i] >= l); 2378 } else { 2379 dst[i] -= rhs[i]; 2380 c = (dst[i] > l); 2381 } 2382 } 2383 2384 return c; 2385 } 2386 2387 /// This function subtracts a single "word" (64-bit word), src, from 2388 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2389 /// no further borrowing is needed or it runs out of "words" in dst. The result 2390 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2391 /// exhausted. In other words, if src > dst then this function returns 1, 2392 /// otherwise 0. 2393 /// @returns the borrow out of the subtraction 2394 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2395 unsigned parts) { 2396 for (unsigned i = 0; i < parts; ++i) { 2397 WordType Dst = dst[i]; 2398 dst[i] -= src; 2399 if (src <= Dst) 2400 return 0; // No need to borrow so exit early. 2401 src = 1; // We have to "borrow 1" from next "word" 2402 } 2403 2404 return 1; 2405 } 2406 2407 /* Negate a bignum in-place. */ 2408 void APInt::tcNegate(WordType *dst, unsigned parts) { 2409 tcComplement(dst, parts); 2410 tcIncrement(dst, parts); 2411 } 2412 2413 /* DST += SRC * MULTIPLIER + CARRY if add is true 2414 DST = SRC * MULTIPLIER + CARRY if add is false 2415 2416 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2417 they must start at the same point, i.e. DST == SRC. 2418 2419 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2420 returned. Otherwise DST is filled with the least significant 2421 DSTPARTS parts of the result, and if all of the omitted higher 2422 parts were zero return zero, otherwise overflow occurred and 2423 return one. */ 2424 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2425 WordType multiplier, WordType carry, 2426 unsigned srcParts, unsigned dstParts, 2427 bool add) { 2428 /* Otherwise our writes of DST kill our later reads of SRC. */ 2429 assert(dst <= src || dst >= src + srcParts); 2430 assert(dstParts <= srcParts + 1); 2431 2432 /* N loops; minimum of dstParts and srcParts. */ 2433 unsigned n = dstParts < srcParts ? dstParts: srcParts; 2434 2435 unsigned i; 2436 for (i = 0; i < n; i++) { 2437 WordType low, mid, high, srcPart; 2438 2439 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2440 2441 This cannot overflow, because 2442 2443 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2444 2445 which is less than n^2. */ 2446 2447 srcPart = src[i]; 2448 2449 if (multiplier == 0 || srcPart == 0) { 2450 low = carry; 2451 high = 0; 2452 } else { 2453 low = lowHalf(srcPart) * lowHalf(multiplier); 2454 high = highHalf(srcPart) * highHalf(multiplier); 2455 2456 mid = lowHalf(srcPart) * highHalf(multiplier); 2457 high += highHalf(mid); 2458 mid <<= APINT_BITS_PER_WORD / 2; 2459 if (low + mid < low) 2460 high++; 2461 low += mid; 2462 2463 mid = highHalf(srcPart) * lowHalf(multiplier); 2464 high += highHalf(mid); 2465 mid <<= APINT_BITS_PER_WORD / 2; 2466 if (low + mid < low) 2467 high++; 2468 low += mid; 2469 2470 /* Now add carry. */ 2471 if (low + carry < low) 2472 high++; 2473 low += carry; 2474 } 2475 2476 if (add) { 2477 /* And now DST[i], and store the new low part there. */ 2478 if (low + dst[i] < low) 2479 high++; 2480 dst[i] += low; 2481 } else 2482 dst[i] = low; 2483 2484 carry = high; 2485 } 2486 2487 if (i < dstParts) { 2488 /* Full multiplication, there is no overflow. */ 2489 assert(i + 1 == dstParts); 2490 dst[i] = carry; 2491 return 0; 2492 } else { 2493 /* We overflowed if there is carry. */ 2494 if (carry) 2495 return 1; 2496 2497 /* We would overflow if any significant unwritten parts would be 2498 non-zero. This is true if any remaining src parts are non-zero 2499 and the multiplier is non-zero. */ 2500 if (multiplier) 2501 for (; i < srcParts; i++) 2502 if (src[i]) 2503 return 1; 2504 2505 /* We fitted in the narrow destination. */ 2506 return 0; 2507 } 2508 } 2509 2510 /* DST = LHS * RHS, where DST has the same width as the operands and 2511 is filled with the least significant parts of the result. Returns 2512 one if overflow occurred, otherwise zero. DST must be disjoint 2513 from both operands. */ 2514 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2515 const WordType *rhs, unsigned parts) { 2516 assert(dst != lhs && dst != rhs); 2517 2518 int overflow = 0; 2519 tcSet(dst, 0, parts); 2520 2521 for (unsigned i = 0; i < parts; i++) 2522 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2523 parts - i, true); 2524 2525 return overflow; 2526 } 2527 2528 /* DST = LHS * RHS, where DST has width the sum of the widths of the 2529 operands. No overflow occurs. DST must be disjoint from both 2530 operands. Returns the number of parts required to hold the 2531 result. */ 2532 unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2533 const WordType *rhs, unsigned lhsParts, 2534 unsigned rhsParts) { 2535 /* Put the narrower number on the LHS for less loops below. */ 2536 if (lhsParts > rhsParts) { 2537 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2538 } else { 2539 assert(dst != lhs && dst != rhs); 2540 2541 tcSet(dst, 0, rhsParts); 2542 2543 for (unsigned i = 0; i < lhsParts; i++) 2544 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2545 2546 unsigned n = lhsParts + rhsParts; 2547 2548 return n - (dst[n - 1] == 0); 2549 } 2550 } 2551 2552 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2553 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2554 set REMAINDER to the remainder, return zero. i.e. 2555 2556 OLD_LHS = RHS * LHS + REMAINDER 2557 2558 SCRATCH is a bignum of the same size as the operands and result for 2559 use by the routine; its contents need not be initialized and are 2560 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2561 */ 2562 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2563 WordType *remainder, WordType *srhs, 2564 unsigned parts) { 2565 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2566 2567 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2568 if (shiftCount == 0) 2569 return true; 2570 2571 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2572 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2573 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2574 2575 tcAssign(srhs, rhs, parts); 2576 tcShiftLeft(srhs, parts, shiftCount); 2577 tcAssign(remainder, lhs, parts); 2578 tcSet(lhs, 0, parts); 2579 2580 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2581 the total. */ 2582 for (;;) { 2583 int compare; 2584 2585 compare = tcCompare(remainder, srhs, parts); 2586 if (compare >= 0) { 2587 tcSubtract(remainder, srhs, 0, parts); 2588 lhs[n] |= mask; 2589 } 2590 2591 if (shiftCount == 0) 2592 break; 2593 shiftCount--; 2594 tcShiftRight(srhs, parts, 1); 2595 if ((mask >>= 1) == 0) { 2596 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2597 n--; 2598 } 2599 } 2600 2601 return false; 2602 } 2603 2604 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2605 /// no restrictions on Count. 2606 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2607 // Don't bother performing a no-op shift. 2608 if (!Count) 2609 return; 2610 2611 /* WordShift is the inter-part shift; BitShift is is intra-part shift. */ 2612 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2613 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2614 2615 // Fastpath for moving by whole words. 2616 if (BitShift == 0) { 2617 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2618 } else { 2619 while (Words-- > WordShift) { 2620 Dst[Words] = Dst[Words - WordShift] << BitShift; 2621 if (Words > WordShift) 2622 Dst[Words] |= 2623 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2624 } 2625 } 2626 2627 // Fill in the remainder with 0s. 2628 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2629 } 2630 2631 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2632 /// are no restrictions on Count. 2633 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2634 // Don't bother performing a no-op shift. 2635 if (!Count) 2636 return; 2637 2638 // WordShift is the inter-part shift; BitShift is is intra-part shift. 2639 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2640 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2641 2642 unsigned WordsToMove = Words - WordShift; 2643 // Fastpath for moving by whole words. 2644 if (BitShift == 0) { 2645 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2646 } else { 2647 for (unsigned i = 0; i != WordsToMove; ++i) { 2648 Dst[i] = Dst[i + WordShift] >> BitShift; 2649 if (i + 1 != WordsToMove) 2650 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2651 } 2652 } 2653 2654 // Fill in the remainder with 0s. 2655 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2656 } 2657 2658 /* Bitwise and of two bignums. */ 2659 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2660 for (unsigned i = 0; i < parts; i++) 2661 dst[i] &= rhs[i]; 2662 } 2663 2664 /* Bitwise inclusive or of two bignums. */ 2665 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2666 for (unsigned i = 0; i < parts; i++) 2667 dst[i] |= rhs[i]; 2668 } 2669 2670 /* Bitwise exclusive or of two bignums. */ 2671 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2672 for (unsigned i = 0; i < parts; i++) 2673 dst[i] ^= rhs[i]; 2674 } 2675 2676 /* Complement a bignum in-place. */ 2677 void APInt::tcComplement(WordType *dst, unsigned parts) { 2678 for (unsigned i = 0; i < parts; i++) 2679 dst[i] = ~dst[i]; 2680 } 2681 2682 /* Comparison (unsigned) of two bignums. */ 2683 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2684 unsigned parts) { 2685 while (parts) { 2686 parts--; 2687 if (lhs[parts] == rhs[parts]) 2688 continue; 2689 2690 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2691 } 2692 2693 return 0; 2694 } 2695 2696 /* Set the least significant BITS bits of a bignum, clear the 2697 rest. */ 2698 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2699 unsigned bits) { 2700 unsigned i = 0; 2701 while (bits > APINT_BITS_PER_WORD) { 2702 dst[i++] = ~(WordType) 0; 2703 bits -= APINT_BITS_PER_WORD; 2704 } 2705 2706 if (bits) 2707 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2708 2709 while (i < parts) 2710 dst[i++] = 0; 2711 } 2712