1 //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation for the Delta Debugging Algorithm: 10 // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.) 11 // into chunks and tries to reduce the number chunks that are interesting. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "Delta.h" 16 #include "ReducerWorkItem.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Bitcode/BitcodeReader.h" 19 #include "llvm/Bitcode/BitcodeWriter.h" 20 #include "llvm/IR/Verifier.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/ThreadPool.h" 23 #include "llvm/Support/ToolOutputFile.h" 24 #include <fstream> 25 #include <set> 26 27 using namespace llvm; 28 29 extern cl::OptionCategory LLVMReduceOptions; 30 31 static cl::opt<bool> AbortOnInvalidReduction( 32 "abort-on-invalid-reduction", 33 cl::desc("Abort if any reduction results in invalid IR"), 34 cl::cat(LLVMReduceOptions)); 35 36 static cl::opt<unsigned int> StartingGranularityLevel( 37 "starting-granularity-level", 38 cl::desc("Number of times to divide chunks prior to first test"), 39 cl::cat(LLVMReduceOptions)); 40 41 static cl::opt<bool> TmpFilesAsBitcode( 42 "write-tmp-files-as-bitcode", 43 cl::desc("Write temporary files as bitcode, instead of textual IR"), 44 cl::init(false), cl::cat(LLVMReduceOptions)); 45 46 #ifdef LLVM_ENABLE_THREADS 47 static cl::opt<unsigned> NumJobs( 48 "j", 49 cl::desc("Maximum number of threads to use to process chunks. Set to 1 to " 50 "disables parallelism."), 51 cl::init(1), cl::cat(LLVMReduceOptions)); 52 #else 53 unsigned NumJobs = 1; 54 #endif 55 56 void writeOutput(ReducerWorkItem &M, llvm::StringRef Message); 57 58 bool isReduced(ReducerWorkItem &M, TestRunner &Test, 59 SmallString<128> &CurrentFilepath) { 60 // Write ReducerWorkItem to tmp file 61 int FD; 62 std::error_code EC = sys::fs::createTemporaryFile( 63 "llvm-reduce", M.isMIR() ? "mir" : (TmpFilesAsBitcode ? "bc" : "ll"), FD, 64 CurrentFilepath); 65 if (EC) { 66 errs() << "Error making unique filename: " << EC.message() << "!\n"; 67 exit(1); 68 } 69 70 if (TmpFilesAsBitcode) { 71 llvm::raw_fd_ostream OutStream(FD, true); 72 WriteBitcodeToFile(M, OutStream); 73 OutStream.close(); 74 if (OutStream.has_error()) { 75 errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n"; 76 sys::fs::remove(CurrentFilepath); 77 exit(1); 78 } 79 bool Res = Test.run(CurrentFilepath); 80 sys::fs::remove(CurrentFilepath); 81 return Res; 82 } 83 ToolOutputFile Out(CurrentFilepath, FD); 84 M.print(Out.os(), /*AnnotationWriter=*/nullptr); 85 Out.os().close(); 86 if (Out.os().has_error()) { 87 errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n"; 88 exit(1); 89 } 90 91 // Current Chunks aren't interesting 92 return Test.run(CurrentFilepath); 93 } 94 95 /// Counts the amount of lines for a given file 96 static int getLines(StringRef Filepath) { 97 int Lines = 0; 98 std::string CurrLine; 99 std::ifstream FileStream{std::string(Filepath)}; 100 101 while (std::getline(FileStream, CurrLine)) 102 ++Lines; 103 104 return Lines; 105 } 106 107 /// Splits Chunks in half and prints them. 108 /// If unable to split (when chunk size is 1) returns false. 109 static bool increaseGranularity(std::vector<Chunk> &Chunks) { 110 errs() << "Increasing granularity..."; 111 std::vector<Chunk> NewChunks; 112 bool SplitOne = false; 113 114 for (auto &C : Chunks) { 115 if (C.End - C.Begin == 0) 116 NewChunks.push_back(C); 117 else { 118 int Half = (C.Begin + C.End) / 2; 119 NewChunks.push_back({C.Begin, Half}); 120 NewChunks.push_back({Half + 1, C.End}); 121 SplitOne = true; 122 } 123 } 124 if (SplitOne) { 125 Chunks = NewChunks; 126 errs() << "Success! New Chunks:\n"; 127 for (auto C : Chunks) { 128 errs() << '\t'; 129 C.print(); 130 errs() << '\n'; 131 } 132 } 133 return SplitOne; 134 } 135 136 // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the 137 // modified module if the chunk resulted in a reduction. 138 template <typename FuncType> 139 static std::unique_ptr<ReducerWorkItem> 140 CheckChunk(Chunk &ChunkToCheckForUninterestingness, 141 std::unique_ptr<ReducerWorkItem> Clone, TestRunner &Test, 142 FuncType ExtractChunksFromModule, 143 std::set<Chunk> &UninterestingChunks, 144 std::vector<Chunk> &ChunksStillConsideredInteresting) { 145 // Take all of ChunksStillConsideredInteresting chunks, except those we've 146 // already deemed uninteresting (UninterestingChunks) but didn't remove 147 // from ChunksStillConsideredInteresting yet, and additionally ignore 148 // ChunkToCheckForUninterestingness chunk. 149 std::vector<Chunk> CurrentChunks; 150 CurrentChunks.reserve(ChunksStillConsideredInteresting.size() - 151 UninterestingChunks.size() - 1); 152 copy_if(ChunksStillConsideredInteresting, std::back_inserter(CurrentChunks), 153 [&](const Chunk &C) { 154 return !UninterestingChunks.count(C) && 155 C != ChunkToCheckForUninterestingness; 156 }); 157 158 // Generate Module with only Targets inside Current Chunks 159 Oracle O(CurrentChunks); 160 ExtractChunksFromModule(O, *Clone); 161 162 // Some reductions may result in invalid IR. Skip such reductions. 163 if (verifyReducerWorkItem(*Clone, &errs())) { 164 if (AbortOnInvalidReduction) { 165 errs() << "Invalid reduction\n"; 166 exit(1); 167 } 168 errs() << " **** WARNING | reduction resulted in invalid module, " 169 "skipping\n"; 170 return nullptr; 171 } 172 173 errs() << "Ignoring: "; 174 ChunkToCheckForUninterestingness.print(); 175 for (const Chunk &C : UninterestingChunks) 176 C.print(); 177 178 SmallString<128> CurrentFilepath; 179 if (!isReduced(*Clone, Test, CurrentFilepath)) { 180 // Program became non-reduced, so this chunk appears to be interesting. 181 errs() << "\n"; 182 return nullptr; 183 } 184 return Clone; 185 } 186 187 template <typename FuncType> 188 SmallString<0> ProcessChunkFromSerializedBitcode( 189 Chunk &ChunkToCheckForUninterestingness, TestRunner &Test, 190 FuncType ExtractChunksFromModule, std::set<Chunk> &UninterestingChunks, 191 std::vector<Chunk> &ChunksStillConsideredInteresting, 192 SmallString<0> &OriginalBC, std::atomic<bool> &AnyReduced) { 193 LLVMContext Ctx; 194 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( 195 MemoryBufferRef(StringRef(OriginalBC.data(), OriginalBC.size()), 196 "<llvm-reduce tmp module>"), 197 Ctx); 198 if (!MOrErr) 199 report_fatal_error("Failed to read bitcode"); 200 auto CloneMMM = std::make_unique<ReducerWorkItem>(); 201 CloneMMM->M = std::move(MOrErr.get()); 202 203 SmallString<0> Result; 204 if (std::unique_ptr<ReducerWorkItem> ChunkResult = 205 CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM), 206 Test, ExtractChunksFromModule, UninterestingChunks, 207 ChunksStillConsideredInteresting)) { 208 raw_svector_ostream BCOS(Result); 209 WriteBitcodeToFile(*ChunkResult->M, BCOS); 210 // Communicate that the task reduced a chunk. 211 AnyReduced = true; 212 } 213 return Result; 214 } 215 216 /// Runs the Delta Debugging algorithm, splits the code into chunks and 217 /// reduces the amount of chunks that are considered interesting by the 218 /// given test. The number of chunks is determined by a preliminary run of the 219 /// reduction pass where no change must be made to the module. 220 void llvm::runDeltaPass(TestRunner &Test, 221 ReductionFunc ExtractChunksFromModule) { 222 assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) && 223 "input module is broken before making changes"); 224 225 SmallString<128> CurrentFilepath; 226 if (!isReduced(Test.getProgram(), Test, CurrentFilepath)) { 227 errs() << "\nInput isn't interesting! Verify interesting-ness test\n"; 228 exit(1); 229 } 230 231 int Targets; 232 { 233 // Count the number of chunks by counting the number of calls to 234 // Oracle::shouldKeep() but always returning true so no changes are 235 // made. 236 std::vector<Chunk> AllChunks = {{0, INT_MAX}}; 237 Oracle Counter(AllChunks); 238 ExtractChunksFromModule(Counter, Test.getProgram()); 239 Targets = Counter.count(); 240 241 assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) && 242 "input module is broken after counting chunks"); 243 assert(isReduced(Test.getProgram(), Test, CurrentFilepath) && 244 "input module no longer interesting after counting chunks"); 245 246 #ifndef NDEBUG 247 // Make sure that the number of chunks does not change as we reduce. 248 std::vector<Chunk> NoChunks; 249 Oracle NoChunksCounter(NoChunks); 250 std::unique_ptr<ReducerWorkItem> Clone = 251 cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine()); 252 ExtractChunksFromModule(NoChunksCounter, *Clone); 253 assert(Targets == NoChunksCounter.count() && 254 "number of chunks changes when reducing"); 255 #endif 256 } 257 if (!Targets) { 258 errs() << "\nNothing to reduce\n"; 259 return; 260 } 261 262 std::vector<Chunk> ChunksStillConsideredInteresting = {{0, Targets - 1}}; 263 std::unique_ptr<ReducerWorkItem> ReducedProgram; 264 265 for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) { 266 increaseGranularity(ChunksStillConsideredInteresting); 267 } 268 269 std::atomic<bool> AnyReduced; 270 std::unique_ptr<ThreadPool> ChunkThreadPoolPtr; 271 if (NumJobs > 1) 272 ChunkThreadPoolPtr = 273 std::make_unique<ThreadPool>(hardware_concurrency(NumJobs)); 274 275 bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity; 276 do { 277 FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false; 278 279 std::set<Chunk> UninterestingChunks; 280 281 // When running with more than one thread, serialize the original bitcode 282 // to OriginalBC. 283 SmallString<0> OriginalBC; 284 if (NumJobs > 1) { 285 raw_svector_ostream BCOS(OriginalBC); 286 WriteBitcodeToFile(*Test.getProgram().M, BCOS); 287 } 288 289 std::deque<std::shared_future<SmallString<0>>> TaskQueue; 290 for (auto I = ChunksStillConsideredInteresting.rbegin(), 291 E = ChunksStillConsideredInteresting.rend(); 292 I != E; ++I) { 293 std::unique_ptr<ReducerWorkItem> Result = nullptr; 294 unsigned WorkLeft = std::distance(I, E); 295 296 // Run in parallel mode, if the user requested more than one thread and 297 // there are at least a few chunks to process. 298 if (NumJobs > 1 && WorkLeft > 1) { 299 unsigned NumInitialTasks = std::min(WorkLeft, unsigned(NumJobs)); 300 unsigned NumChunksProcessed = 0; 301 302 ThreadPool &ChunkThreadPool = *ChunkThreadPoolPtr; 303 TaskQueue.clear(); 304 305 AnyReduced = false; 306 // Queue jobs to process NumInitialTasks chunks in parallel using 307 // ChunkThreadPool. When the tasks are added to the pool, parse the 308 // original module from OriginalBC with a fresh LLVMContext object. This 309 // ensures that the cloned module of each task uses an independent 310 // LLVMContext object. If a task reduces the input, serialize the result 311 // back in the corresponding Result element. 312 for (unsigned J = 0; J < NumInitialTasks; ++J) { 313 TaskQueue.emplace_back(ChunkThreadPool.async( 314 [J, I, &Test, &ExtractChunksFromModule, &UninterestingChunks, 315 &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced]() { 316 return ProcessChunkFromSerializedBitcode( 317 *(I + J), Test, ExtractChunksFromModule, 318 UninterestingChunks, ChunksStillConsideredInteresting, 319 OriginalBC, AnyReduced); 320 })); 321 } 322 323 // Start processing results of the queued tasks. We wait for the first 324 // task in the queue to finish. If it reduced a chunk, we parse the 325 // result and exit the loop. 326 // Otherwise we will try to schedule a new task, if 327 // * no other pending job reduced a chunk and 328 // * we have not reached the end of the chunk. 329 while (!TaskQueue.empty()) { 330 auto &Future = TaskQueue.front(); 331 Future.wait(); 332 333 NumChunksProcessed++; 334 SmallString<0> Res = Future.get(); 335 TaskQueue.pop_front(); 336 if (Res.empty()) { 337 unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size(); 338 if (!AnyReduced && I + NumScheduledTasks != E) { 339 Chunk &ChunkToCheck = *(I + NumScheduledTasks); 340 TaskQueue.emplace_back(ChunkThreadPool.async( 341 [&Test, &ExtractChunksFromModule, &UninterestingChunks, 342 &ChunksStillConsideredInteresting, &OriginalBC, 343 &ChunkToCheck, &AnyReduced]() { 344 return ProcessChunkFromSerializedBitcode( 345 ChunkToCheck, Test, ExtractChunksFromModule, 346 UninterestingChunks, ChunksStillConsideredInteresting, 347 OriginalBC, AnyReduced); 348 })); 349 } 350 continue; 351 } 352 353 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( 354 MemoryBufferRef(StringRef(Res.data(), Res.size()), 355 "<llvm-reduce tmp module>"), 356 Test.getProgram().M->getContext()); 357 if (!MOrErr) 358 report_fatal_error("Failed to read bitcode"); 359 Result = std::make_unique<ReducerWorkItem>(); 360 Result->M = std::move(MOrErr.get()); 361 break; 362 } 363 // Forward I to the last chunk processed in parallel. 364 I += NumChunksProcessed - 1; 365 } else { 366 Result = CheckChunk( 367 *I, 368 cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine()), 369 Test, ExtractChunksFromModule, UninterestingChunks, 370 ChunksStillConsideredInteresting); 371 } 372 373 if (!Result) 374 continue; 375 376 Chunk &ChunkToCheckForUninterestingness = *I; 377 FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true; 378 UninterestingChunks.insert(ChunkToCheckForUninterestingness); 379 ReducedProgram = std::move(Result); 380 errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n"; 381 writeOutput(*ReducedProgram, "Saved new best reduction to "); 382 } 383 // Delete uninteresting chunks 384 erase_if(ChunksStillConsideredInteresting, 385 [&UninterestingChunks](const Chunk &C) { 386 return UninterestingChunks.count(C); 387 }); 388 } while (!ChunksStillConsideredInteresting.empty() && 389 (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity || 390 increaseGranularity(ChunksStillConsideredInteresting))); 391 392 // If we reduced the testcase replace it 393 if (ReducedProgram) 394 Test.setProgram(std::move(ReducedProgram)); 395 errs() << "Couldn't increase anymore.\n"; 396 } 397