1 //===--- DeltaTree.cpp - B-Tree for Rewrite Delta tracking ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the DeltaTree and related classes. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Rewrite/DeltaTree.h" 15 #include "llvm/Support/Casting.h" 16 #include <cstring> 17 #include <cstdio> 18 using namespace clang; 19 using llvm::cast; 20 using llvm::dyn_cast; 21 22 /// The DeltaTree class is a multiway search tree (BTree) structure with some 23 /// fancy features. B-Trees are generally more memory and cache efficient 24 /// than binary trees, because they store multiple keys/values in each node. 25 /// 26 /// DeltaTree implements a key/value mapping from FileIndex to Delta, allowing 27 /// fast lookup by FileIndex. However, an added (important) bonus is that it 28 /// can also efficiently tell us the full accumulated delta for a specific 29 /// file offset as well, without traversing the whole tree. 30 /// 31 /// The nodes of the tree are made up of instances of two classes: 32 /// DeltaTreeNode and DeltaTreeInteriorNode. The later subclasses the 33 /// former and adds children pointers. Each node knows the full delta of all 34 /// entries (recursively) contained inside of it, which allows us to get the 35 /// full delta implied by a whole subtree in constant time. 36 37 namespace { 38 /// SourceDelta - As code in the original input buffer is added and deleted, 39 /// SourceDelta records are used to keep track of how the input SourceLocation 40 /// object is mapped into the output buffer. 41 struct SourceDelta { 42 unsigned FileLoc; 43 int Delta; 44 45 static SourceDelta get(unsigned Loc, int D) { 46 SourceDelta Delta; 47 Delta.FileLoc = Loc; 48 Delta.Delta = D; 49 return Delta; 50 } 51 }; 52 53 /// DeltaTreeNode - The common part of all nodes. 54 /// 55 class DeltaTreeNode { 56 public: 57 struct InsertResult { 58 DeltaTreeNode *LHS, *RHS; 59 SourceDelta Split; 60 }; 61 62 private: 63 friend class DeltaTreeInteriorNode; 64 65 /// WidthFactor - This controls the number of K/V slots held in the BTree: 66 /// how wide it is. Each level of the BTree is guaranteed to have at least 67 /// WidthFactor-1 K/V pairs (except the root) and may have at most 68 /// 2*WidthFactor-1 K/V pairs. 69 enum { WidthFactor = 8 }; 70 71 /// Values - This tracks the SourceDelta's currently in this node. 72 /// 73 SourceDelta Values[2*WidthFactor-1]; 74 75 /// NumValuesUsed - This tracks the number of values this node currently 76 /// holds. 77 unsigned char NumValuesUsed; 78 79 /// IsLeaf - This is true if this is a leaf of the btree. If false, this is 80 /// an interior node, and is actually an instance of DeltaTreeInteriorNode. 81 bool IsLeaf; 82 83 /// FullDelta - This is the full delta of all the values in this node and 84 /// all children nodes. 85 int FullDelta; 86 public: 87 DeltaTreeNode(bool isLeaf = true) 88 : NumValuesUsed(0), IsLeaf(isLeaf), FullDelta(0) {} 89 90 bool isLeaf() const { return IsLeaf; } 91 int getFullDelta() const { return FullDelta; } 92 bool isFull() const { return NumValuesUsed == 2*WidthFactor-1; } 93 94 unsigned getNumValuesUsed() const { return NumValuesUsed; } 95 const SourceDelta &getValue(unsigned i) const { 96 assert(i < NumValuesUsed && "Invalid value #"); 97 return Values[i]; 98 } 99 SourceDelta &getValue(unsigned i) { 100 assert(i < NumValuesUsed && "Invalid value #"); 101 return Values[i]; 102 } 103 104 /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into 105 /// this node. If insertion is easy, do it and return false. Otherwise, 106 /// split the node, populate InsertRes with info about the split, and return 107 /// true. 108 bool DoInsertion(unsigned FileIndex, int Delta, InsertResult *InsertRes); 109 110 void DoSplit(InsertResult &InsertRes); 111 112 113 /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a 114 /// local walk over our contained deltas. 115 void RecomputeFullDeltaLocally(); 116 117 void Destroy(); 118 119 static inline bool classof(const DeltaTreeNode *) { return true; } 120 }; 121 } // end anonymous namespace 122 123 namespace { 124 /// DeltaTreeInteriorNode - When isLeaf = false, a node has child pointers. 125 /// This class tracks them. 126 class DeltaTreeInteriorNode : public DeltaTreeNode { 127 DeltaTreeNode *Children[2*WidthFactor]; 128 ~DeltaTreeInteriorNode() { 129 for (unsigned i = 0, e = NumValuesUsed+1; i != e; ++i) 130 Children[i]->Destroy(); 131 } 132 friend class DeltaTreeNode; 133 public: 134 DeltaTreeInteriorNode() : DeltaTreeNode(false /*nonleaf*/) {} 135 136 DeltaTreeInteriorNode(DeltaTreeNode *FirstChild) 137 : DeltaTreeNode(false /*nonleaf*/) { 138 FullDelta = FirstChild->FullDelta; 139 Children[0] = FirstChild; 140 } 141 142 DeltaTreeInteriorNode(const InsertResult &IR) 143 : DeltaTreeNode(false /*nonleaf*/) { 144 Children[0] = IR.LHS; 145 Children[1] = IR.RHS; 146 Values[0] = IR.Split; 147 FullDelta = IR.LHS->getFullDelta()+IR.RHS->getFullDelta()+IR.Split.Delta; 148 NumValuesUsed = 1; 149 } 150 151 const DeltaTreeNode *getChild(unsigned i) const { 152 assert(i < getNumValuesUsed()+1 && "Invalid child"); 153 return Children[i]; 154 } 155 DeltaTreeNode *getChild(unsigned i) { 156 assert(i < getNumValuesUsed()+1 && "Invalid child"); 157 return Children[i]; 158 } 159 160 static inline bool classof(const DeltaTreeInteriorNode *) { return true; } 161 static inline bool classof(const DeltaTreeNode *N) { return !N->isLeaf(); } 162 }; 163 } 164 165 166 /// Destroy - A 'virtual' destructor. 167 void DeltaTreeNode::Destroy() { 168 if (isLeaf()) 169 delete this; 170 else 171 delete cast<DeltaTreeInteriorNode>(this); 172 } 173 174 /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a 175 /// local walk over our contained deltas. 176 void DeltaTreeNode::RecomputeFullDeltaLocally() { 177 int NewFullDelta = 0; 178 for (unsigned i = 0, e = getNumValuesUsed(); i != e; ++i) 179 NewFullDelta += Values[i].Delta; 180 if (DeltaTreeInteriorNode *IN = dyn_cast<DeltaTreeInteriorNode>(this)) 181 for (unsigned i = 0, e = getNumValuesUsed()+1; i != e; ++i) 182 NewFullDelta += IN->getChild(i)->getFullDelta(); 183 FullDelta = NewFullDelta; 184 } 185 186 /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into 187 /// this node. If insertion is easy, do it and return false. Otherwise, 188 /// split the node, populate InsertRes with info about the split, and return 189 /// true. 190 bool DeltaTreeNode::DoInsertion(unsigned FileIndex, int Delta, 191 InsertResult *InsertRes) { 192 // Maintain full delta for this node. 193 FullDelta += Delta; 194 195 // Find the insertion point, the first delta whose index is >= FileIndex. 196 unsigned i = 0, e = getNumValuesUsed(); 197 while (i != e && FileIndex > getValue(i).FileLoc) 198 ++i; 199 200 // If we found an a record for exactly this file index, just merge this 201 // value into the pre-existing record and finish early. 202 if (i != e && getValue(i).FileLoc == FileIndex) { 203 // NOTE: Delta could drop to zero here. This means that the delta entry is 204 // useless and could be removed. Supporting erases is more complex than 205 // leaving an entry with Delta=0, so we just leave an entry with Delta=0 in 206 // the tree. 207 Values[i].Delta += Delta; 208 return false; 209 } 210 211 // Otherwise, we found an insertion point, and we know that the value at the 212 // specified index is > FileIndex. Handle the leaf case first. 213 if (isLeaf()) { 214 if (!isFull()) { 215 // For an insertion into a non-full leaf node, just insert the value in 216 // its sorted position. This requires moving later values over. 217 if (i != e) 218 memmove(&Values[i+1], &Values[i], sizeof(Values[0])*(e-i)); 219 Values[i] = SourceDelta::get(FileIndex, Delta); 220 ++NumValuesUsed; 221 return false; 222 } 223 224 // Otherwise, if this is leaf is full, split the node at its median, insert 225 // the value into one of the children, and return the result. 226 assert(InsertRes && "No result location specified"); 227 DoSplit(*InsertRes); 228 229 if (InsertRes->Split.FileLoc > FileIndex) 230 InsertRes->LHS->DoInsertion(FileIndex, Delta, 0 /*can't fail*/); 231 else 232 InsertRes->RHS->DoInsertion(FileIndex, Delta, 0 /*can't fail*/); 233 return true; 234 } 235 236 // Otherwise, this is an interior node. Send the request down the tree. 237 DeltaTreeInteriorNode *IN = cast<DeltaTreeInteriorNode>(this); 238 if (!IN->Children[i]->DoInsertion(FileIndex, Delta, InsertRes)) 239 return false; // If there was space in the child, just return. 240 241 // Okay, this split the subtree, producing a new value and two children to 242 // insert here. If this node is non-full, we can just insert it directly. 243 if (!isFull()) { 244 // Now that we have two nodes and a new element, insert the perclated value 245 // into ourself by moving all the later values/children down, then inserting 246 // the new one. 247 if (i != e) 248 memmove(&IN->Children[i+2], &IN->Children[i+1], 249 (e-i)*sizeof(IN->Children[0])); 250 IN->Children[i] = InsertRes->LHS; 251 IN->Children[i+1] = InsertRes->RHS; 252 253 if (e != i) 254 memmove(&Values[i+1], &Values[i], (e-i)*sizeof(Values[0])); 255 Values[i] = InsertRes->Split; 256 ++NumValuesUsed; 257 return false; 258 } 259 260 // Finally, if this interior node was full and a node is percolated up, split 261 // ourself and return that up the chain. Start by saving all our info to 262 // avoid having the split clobber it. 263 IN->Children[i] = InsertRes->LHS; 264 DeltaTreeNode *SubRHS = InsertRes->RHS; 265 SourceDelta SubSplit = InsertRes->Split; 266 267 // Do the split. 268 DoSplit(*InsertRes); 269 270 // Figure out where to insert SubRHS/NewSplit. 271 DeltaTreeInteriorNode *InsertSide; 272 if (SubSplit.FileLoc < InsertRes->Split.FileLoc) 273 InsertSide = cast<DeltaTreeInteriorNode>(InsertRes->LHS); 274 else 275 InsertSide = cast<DeltaTreeInteriorNode>(InsertRes->RHS); 276 277 // We now have a non-empty interior node 'InsertSide' to insert 278 // SubRHS/SubSplit into. Find out where to insert SubSplit. 279 280 // Find the insertion point, the first delta whose index is >SubSplit.FileLoc. 281 i = 0; e = InsertSide->getNumValuesUsed(); 282 while (i != e && SubSplit.FileLoc > InsertSide->getValue(i).FileLoc) 283 ++i; 284 285 // Now we know that i is the place to insert the split value into. Insert it 286 // and the child right after it. 287 if (i != e) 288 memmove(&InsertSide->Children[i+2], &InsertSide->Children[i+1], 289 (e-i)*sizeof(IN->Children[0])); 290 InsertSide->Children[i+1] = SubRHS; 291 292 if (e != i) 293 memmove(&InsertSide->Values[i+1], &InsertSide->Values[i], 294 (e-i)*sizeof(Values[0])); 295 InsertSide->Values[i] = SubSplit; 296 ++InsertSide->NumValuesUsed; 297 InsertSide->FullDelta += SubSplit.Delta + SubRHS->getFullDelta(); 298 return true; 299 } 300 301 /// DoSplit - Split the currently full node (which has 2*WidthFactor-1 values) 302 /// into two subtrees each with "WidthFactor-1" values and a pivot value. 303 /// Return the pieces in InsertRes. 304 void DeltaTreeNode::DoSplit(InsertResult &InsertRes) { 305 assert(isFull() && "Why split a non-full node?"); 306 307 // Since this node is full, it contains 2*WidthFactor-1 values. We move 308 // the first 'WidthFactor-1' values to the LHS child (which we leave in this 309 // node), propagate one value up, and move the last 'WidthFactor-1' values 310 // into the RHS child. 311 312 // Create the new child node. 313 DeltaTreeNode *NewNode; 314 if (DeltaTreeInteriorNode *IN = dyn_cast<DeltaTreeInteriorNode>(this)) { 315 // If this is an interior node, also move over 'WidthFactor' children 316 // into the new node. 317 DeltaTreeInteriorNode *New = new DeltaTreeInteriorNode(); 318 memcpy(&New->Children[0], &IN->Children[WidthFactor], 319 WidthFactor*sizeof(IN->Children[0])); 320 NewNode = New; 321 } else { 322 // Just create the new leaf node. 323 NewNode = new DeltaTreeNode(); 324 } 325 326 // Move over the last 'WidthFactor-1' values from here to NewNode. 327 memcpy(&NewNode->Values[0], &Values[WidthFactor], 328 (WidthFactor-1)*sizeof(Values[0])); 329 330 // Decrease the number of values in the two nodes. 331 NewNode->NumValuesUsed = NumValuesUsed = WidthFactor-1; 332 333 // Recompute the two nodes' full delta. 334 NewNode->RecomputeFullDeltaLocally(); 335 RecomputeFullDeltaLocally(); 336 337 InsertRes.LHS = this; 338 InsertRes.RHS = NewNode; 339 InsertRes.Split = Values[WidthFactor-1]; 340 } 341 342 343 344 //===----------------------------------------------------------------------===// 345 // DeltaTree Implementation 346 //===----------------------------------------------------------------------===// 347 348 //#define VERIFY_TREE 349 350 #ifdef VERIFY_TREE 351 /// VerifyTree - Walk the btree performing assertions on various properties to 352 /// verify consistency. This is useful for debugging new changes to the tree. 353 static void VerifyTree(const DeltaTreeNode *N) { 354 const DeltaTreeInteriorNode *IN = dyn_cast<DeltaTreeInteriorNode>(N); 355 if (IN == 0) { 356 // Verify leaves, just ensure that FullDelta matches up and the elements 357 // are in proper order. 358 int FullDelta = 0; 359 for (unsigned i = 0, e = N->getNumValuesUsed(); i != e; ++i) { 360 if (i) 361 assert(N->getValue(i-1).FileLoc < N->getValue(i).FileLoc); 362 FullDelta += N->getValue(i).Delta; 363 } 364 assert(FullDelta == N->getFullDelta()); 365 return; 366 } 367 368 // Verify interior nodes: Ensure that FullDelta matches up and the 369 // elements are in proper order and the children are in proper order. 370 int FullDelta = 0; 371 for (unsigned i = 0, e = IN->getNumValuesUsed(); i != e; ++i) { 372 const SourceDelta &IVal = N->getValue(i); 373 const DeltaTreeNode *IChild = IN->getChild(i); 374 if (i) 375 assert(IN->getValue(i-1).FileLoc < IVal.FileLoc); 376 FullDelta += IVal.Delta; 377 FullDelta += IChild->getFullDelta(); 378 379 // The largest value in child #i should be smaller than FileLoc. 380 assert(IChild->getValue(IChild->getNumValuesUsed()-1).FileLoc < 381 IVal.FileLoc); 382 383 // The smallest value in child #i+1 should be larger than FileLoc. 384 assert(IN->getChild(i+1)->getValue(0).FileLoc > IVal.FileLoc); 385 VerifyTree(IChild); 386 } 387 388 FullDelta += IN->getChild(IN->getNumValuesUsed())->getFullDelta(); 389 390 assert(FullDelta == N->getFullDelta()); 391 } 392 #endif // VERIFY_TREE 393 394 static DeltaTreeNode *getRoot(void *Root) { 395 return (DeltaTreeNode*)Root; 396 } 397 398 DeltaTree::DeltaTree() { 399 Root = new DeltaTreeNode(); 400 } 401 DeltaTree::DeltaTree(const DeltaTree &RHS) { 402 // Currently we only support copying when the RHS is empty. 403 assert(getRoot(RHS.Root)->getNumValuesUsed() == 0 && 404 "Can only copy empty tree"); 405 Root = new DeltaTreeNode(); 406 } 407 408 DeltaTree::~DeltaTree() { 409 getRoot(Root)->Destroy(); 410 } 411 412 /// getDeltaAt - Return the accumulated delta at the specified file offset. 413 /// This includes all insertions or delections that occurred *before* the 414 /// specified file index. 415 int DeltaTree::getDeltaAt(unsigned FileIndex) const { 416 const DeltaTreeNode *Node = getRoot(Root); 417 418 int Result = 0; 419 420 // Walk down the tree. 421 while (1) { 422 // For all nodes, include any local deltas before the specified file 423 // index by summing them up directly. Keep track of how many were 424 // included. 425 unsigned NumValsGreater = 0; 426 for (unsigned e = Node->getNumValuesUsed(); NumValsGreater != e; 427 ++NumValsGreater) { 428 const SourceDelta &Val = Node->getValue(NumValsGreater); 429 430 if (Val.FileLoc >= FileIndex) 431 break; 432 Result += Val.Delta; 433 } 434 435 // If we have an interior node, include information about children and 436 // recurse. Otherwise, if we have a leaf, we're done. 437 const DeltaTreeInteriorNode *IN = dyn_cast<DeltaTreeInteriorNode>(Node); 438 if (!IN) return Result; 439 440 // Include any children to the left of the values we skipped, all of 441 // their deltas should be included as well. 442 for (unsigned i = 0; i != NumValsGreater; ++i) 443 Result += IN->getChild(i)->getFullDelta(); 444 445 // If we found exactly the value we were looking for, break off the 446 // search early. There is no need to search the RHS of the value for 447 // partial results. 448 if (NumValsGreater != Node->getNumValuesUsed() && 449 Node->getValue(NumValsGreater).FileLoc == FileIndex) 450 return Result+IN->getChild(NumValsGreater)->getFullDelta(); 451 452 // Otherwise, traverse down the tree. The selected subtree may be 453 // partially included in the range. 454 Node = IN->getChild(NumValsGreater); 455 } 456 // NOT REACHED. 457 } 458 459 /// AddDelta - When a change is made that shifts around the text buffer, 460 /// this method is used to record that info. It inserts a delta of 'Delta' 461 /// into the current DeltaTree at offset FileIndex. 462 void DeltaTree::AddDelta(unsigned FileIndex, int Delta) { 463 assert(Delta && "Adding a noop?"); 464 DeltaTreeNode *MyRoot = getRoot(Root); 465 466 DeltaTreeNode::InsertResult InsertRes; 467 if (MyRoot->DoInsertion(FileIndex, Delta, &InsertRes)) { 468 Root = MyRoot = new DeltaTreeInteriorNode(InsertRes); 469 } 470 471 #ifdef VERIFY_TREE 472 VerifyTree(MyRoot); 473 #endif 474 } 475 476