1 //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation for the Delta Debugging Algorithm:
10 // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
11 // into chunks and tries to reduce the number chunks that are interesting.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "Delta.h"
16 #include "ReducerWorkItem.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Analysis/ProfileSummaryInfo.h"
19 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/Bitcode/BitcodeWriter.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/Verifier.h"
25 #include "llvm/MC/TargetRegistry.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/MemoryBufferRef.h"
28 #include "llvm/Support/ThreadPool.h"
29 #include "llvm/Support/ToolOutputFile.h"
30 #include <fstream>
31 #include <set>
32 
33 using namespace llvm;
34 
35 extern cl::OptionCategory LLVMReduceOptions;
36 
37 static cl::opt<bool> AbortOnInvalidReduction(
38     "abort-on-invalid-reduction",
39     cl::desc("Abort if any reduction results in invalid IR"),
40     cl::cat(LLVMReduceOptions));
41 
42 static cl::opt<unsigned int> StartingGranularityLevel(
43     "starting-granularity-level",
44     cl::desc("Number of times to divide chunks prior to first test"),
45     cl::cat(LLVMReduceOptions));
46 
47 static cl::opt<bool> TmpFilesAsBitcode(
48     "write-tmp-files-as-bitcode",
49     cl::desc("Write temporary files as bitcode, instead of textual IR"),
50     cl::init(false), cl::cat(LLVMReduceOptions));
51 
52 #ifdef LLVM_ENABLE_THREADS
53 static cl::opt<unsigned> NumJobs(
54     "j",
55     cl::desc("Maximum number of threads to use to process chunks. Set to 1 to "
56              "disables parallelism."),
57     cl::init(1), cl::cat(LLVMReduceOptions));
58 #else
59 unsigned NumJobs = 1;
60 #endif
61 
62 void writeOutput(ReducerWorkItem &M, llvm::StringRef Message);
63 
64 void writeBitcode(ReducerWorkItem &M, raw_ostream &OutStream);
65 
66 void readBitcode(ReducerWorkItem &M, MemoryBufferRef Data, LLVMContext &Ctx,
67                  const char *ToolName);
68 
isReduced(ReducerWorkItem & M,TestRunner & Test,SmallString<128> & CurrentFilepath)69 bool isReduced(ReducerWorkItem &M, TestRunner &Test,
70                SmallString<128> &CurrentFilepath) {
71   // Write ReducerWorkItem to tmp file
72   int FD;
73   std::error_code EC = sys::fs::createTemporaryFile(
74       "llvm-reduce", M.isMIR() ? "mir" : (TmpFilesAsBitcode ? "bc" : "ll"), FD,
75       CurrentFilepath);
76   if (EC) {
77     errs() << "Error making unique filename: " << EC.message() << "!\n";
78     exit(1);
79   }
80 
81   if (TmpFilesAsBitcode) {
82     llvm::raw_fd_ostream OutStream(FD, true);
83     writeBitcode(M, OutStream);
84     OutStream.close();
85     if (OutStream.has_error()) {
86       errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
87       sys::fs::remove(CurrentFilepath);
88       exit(1);
89     }
90     bool Res = Test.run(CurrentFilepath);
91     sys::fs::remove(CurrentFilepath);
92     return Res;
93   }
94   ToolOutputFile Out(CurrentFilepath, FD);
95   M.print(Out.os(), /*AnnotationWriter=*/nullptr);
96   Out.os().close();
97   if (Out.os().has_error()) {
98     errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
99     exit(1);
100   }
101 
102   // Current Chunks aren't interesting
103   return Test.run(CurrentFilepath);
104 }
105 
106 /// Counts the amount of lines for a given file
getLines(StringRef Filepath)107 static int getLines(StringRef Filepath) {
108   int Lines = 0;
109   std::string CurrLine;
110   std::ifstream FileStream{std::string(Filepath)};
111 
112   while (std::getline(FileStream, CurrLine))
113     ++Lines;
114 
115   return Lines;
116 }
117 
118 /// Splits Chunks in half and prints them.
119 /// If unable to split (when chunk size is 1) returns false.
increaseGranularity(std::vector<Chunk> & Chunks)120 static bool increaseGranularity(std::vector<Chunk> &Chunks) {
121   errs() << "Increasing granularity...";
122   std::vector<Chunk> NewChunks;
123   bool SplitOne = false;
124 
125   for (auto &C : Chunks) {
126     if (C.End - C.Begin == 0)
127       NewChunks.push_back(C);
128     else {
129       int Half = (C.Begin + C.End) / 2;
130       NewChunks.push_back({C.Begin, Half});
131       NewChunks.push_back({Half + 1, C.End});
132       SplitOne = true;
133     }
134   }
135   if (SplitOne) {
136     Chunks = NewChunks;
137     errs() << "Success! New Chunks:\n";
138     for (auto C : Chunks) {
139       errs() << '\t';
140       C.print();
141       errs() << '\n';
142     }
143   }
144   return SplitOne;
145 }
146 
147 // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the
148 // modified module if the chunk resulted in a reduction.
149 template <typename FuncType>
150 static std::unique_ptr<ReducerWorkItem>
CheckChunk(Chunk & ChunkToCheckForUninterestingness,std::unique_ptr<ReducerWorkItem> Clone,TestRunner & Test,FuncType ExtractChunksFromModule,std::set<Chunk> & UninterestingChunks,std::vector<Chunk> & ChunksStillConsideredInteresting)151 CheckChunk(Chunk &ChunkToCheckForUninterestingness,
152            std::unique_ptr<ReducerWorkItem> Clone, TestRunner &Test,
153            FuncType ExtractChunksFromModule,
154            std::set<Chunk> &UninterestingChunks,
155            std::vector<Chunk> &ChunksStillConsideredInteresting) {
156   // Take all of ChunksStillConsideredInteresting chunks, except those we've
157   // already deemed uninteresting (UninterestingChunks) but didn't remove
158   // from ChunksStillConsideredInteresting yet, and additionally ignore
159   // ChunkToCheckForUninterestingness chunk.
160   std::vector<Chunk> CurrentChunks;
161   CurrentChunks.reserve(ChunksStillConsideredInteresting.size() -
162                         UninterestingChunks.size() - 1);
163   copy_if(ChunksStillConsideredInteresting, std::back_inserter(CurrentChunks),
164           [&](const Chunk &C) {
165             return !UninterestingChunks.count(C) &&
166                    C != ChunkToCheckForUninterestingness;
167           });
168 
169   // Generate Module with only Targets inside Current Chunks
170   Oracle O(CurrentChunks);
171   ExtractChunksFromModule(O, *Clone);
172 
173   // Some reductions may result in invalid IR. Skip such reductions.
174   if (verifyReducerWorkItem(*Clone, &errs())) {
175     if (AbortOnInvalidReduction) {
176       errs() << "Invalid reduction\n";
177       exit(1);
178     }
179     errs() << " **** WARNING | reduction resulted in invalid module, "
180               "skipping\n";
181     return nullptr;
182   }
183 
184   errs() << "Ignoring: ";
185   ChunkToCheckForUninterestingness.print();
186   for (const Chunk &C : UninterestingChunks)
187     C.print();
188 
189   SmallString<128> CurrentFilepath;
190   if (!isReduced(*Clone, Test, CurrentFilepath)) {
191     // Program became non-reduced, so this chunk appears to be interesting.
192     errs() << "\n";
193     return nullptr;
194   }
195   return Clone;
196 }
197 
198 template <typename FuncType>
ProcessChunkFromSerializedBitcode(Chunk & ChunkToCheckForUninterestingness,TestRunner & Test,FuncType ExtractChunksFromModule,std::set<Chunk> & UninterestingChunks,std::vector<Chunk> & ChunksStillConsideredInteresting,SmallString<0> & OriginalBC,std::atomic<bool> & AnyReduced)199 SmallString<0> ProcessChunkFromSerializedBitcode(
200     Chunk &ChunkToCheckForUninterestingness, TestRunner &Test,
201     FuncType ExtractChunksFromModule, std::set<Chunk> &UninterestingChunks,
202     std::vector<Chunk> &ChunksStillConsideredInteresting,
203     SmallString<0> &OriginalBC, std::atomic<bool> &AnyReduced) {
204   LLVMContext Ctx;
205   auto CloneMMM = std::make_unique<ReducerWorkItem>();
206   auto Data = MemoryBufferRef(StringRef(OriginalBC.data(), OriginalBC.size()),
207                               "<bc file>");
208   readBitcode(*CloneMMM, Data, Ctx, Test.getToolName());
209 
210   SmallString<0> Result;
211   if (std::unique_ptr<ReducerWorkItem> ChunkResult =
212           CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM),
213                      Test, ExtractChunksFromModule, UninterestingChunks,
214                      ChunksStillConsideredInteresting)) {
215     raw_svector_ostream BCOS(Result);
216     writeBitcode(*ChunkResult, BCOS);
217     // Communicate that the task reduced a chunk.
218     AnyReduced = true;
219   }
220   return Result;
221 }
222 
223 /// Runs the Delta Debugging algorithm, splits the code into chunks and
224 /// reduces the amount of chunks that are considered interesting by the
225 /// given test. The number of chunks is determined by a preliminary run of the
226 /// reduction pass where no change must be made to the module.
runDeltaPass(TestRunner & Test,ReductionFunc ExtractChunksFromModule)227 void llvm::runDeltaPass(TestRunner &Test,
228                         ReductionFunc ExtractChunksFromModule) {
229   assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
230          "input module is broken before making changes");
231 
232   SmallString<128> CurrentFilepath;
233   if (!isReduced(Test.getProgram(), Test, CurrentFilepath)) {
234     errs() << "\nInput isn't interesting! Verify interesting-ness test\n";
235     exit(1);
236   }
237 
238   int Targets;
239   {
240     // Count the number of chunks by counting the number of calls to
241     // Oracle::shouldKeep() but always returning true so no changes are
242     // made.
243     std::vector<Chunk> AllChunks = {{0, INT_MAX}};
244     Oracle Counter(AllChunks);
245     ExtractChunksFromModule(Counter, Test.getProgram());
246     Targets = Counter.count();
247 
248     assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
249            "input module is broken after counting chunks");
250     assert(isReduced(Test.getProgram(), Test, CurrentFilepath) &&
251            "input module no longer interesting after counting chunks");
252 
253 #ifndef NDEBUG
254     // Make sure that the number of chunks does not change as we reduce.
255     std::vector<Chunk> NoChunks;
256     Oracle NoChunksCounter(NoChunks);
257     std::unique_ptr<ReducerWorkItem> Clone =
258         cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine());
259     ExtractChunksFromModule(NoChunksCounter, *Clone);
260     assert(Targets == NoChunksCounter.count() &&
261            "number of chunks changes when reducing");
262 #endif
263   }
264   if (!Targets) {
265     errs() << "\nNothing to reduce\n";
266     return;
267   }
268 
269   std::vector<Chunk> ChunksStillConsideredInteresting = {{0, Targets - 1}};
270   std::unique_ptr<ReducerWorkItem> ReducedProgram;
271 
272   for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) {
273     increaseGranularity(ChunksStillConsideredInteresting);
274   }
275 
276   std::atomic<bool> AnyReduced;
277   std::unique_ptr<ThreadPool> ChunkThreadPoolPtr;
278   if (NumJobs > 1)
279     ChunkThreadPoolPtr =
280         std::make_unique<ThreadPool>(hardware_concurrency(NumJobs));
281 
282   bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity;
283   do {
284     FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false;
285 
286     std::set<Chunk> UninterestingChunks;
287 
288     // When running with more than one thread, serialize the original bitcode
289     // to OriginalBC.
290     SmallString<0> OriginalBC;
291     if (NumJobs > 1) {
292       raw_svector_ostream BCOS(OriginalBC);
293       writeBitcode(Test.getProgram(), BCOS);
294     }
295 
296     std::deque<std::shared_future<SmallString<0>>> TaskQueue;
297     for (auto I = ChunksStillConsideredInteresting.rbegin(),
298               E = ChunksStillConsideredInteresting.rend();
299          I != E; ++I) {
300       std::unique_ptr<ReducerWorkItem> Result = nullptr;
301       unsigned WorkLeft = std::distance(I, E);
302 
303       // Run in parallel mode, if the user requested more than one thread and
304       // there are at least a few chunks to process.
305       if (NumJobs > 1 && WorkLeft > 1) {
306         unsigned NumInitialTasks = std::min(WorkLeft, unsigned(NumJobs));
307         unsigned NumChunksProcessed = 0;
308 
309         ThreadPool &ChunkThreadPool = *ChunkThreadPoolPtr;
310         TaskQueue.clear();
311 
312         AnyReduced = false;
313         // Queue jobs to process NumInitialTasks chunks in parallel using
314         // ChunkThreadPool. When the tasks are added to the pool, parse the
315         // original module from OriginalBC with a fresh LLVMContext object. This
316         // ensures that the cloned module of each task uses an independent
317         // LLVMContext object. If a task reduces the input, serialize the result
318         // back in the corresponding Result element.
319         for (unsigned J = 0; J < NumInitialTasks; ++J) {
320           TaskQueue.emplace_back(ChunkThreadPool.async(
321               [J, I, &Test, &ExtractChunksFromModule, &UninterestingChunks,
322                &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced]() {
323                 return ProcessChunkFromSerializedBitcode(
324                     *(I + J), Test, ExtractChunksFromModule,
325                     UninterestingChunks, ChunksStillConsideredInteresting,
326                     OriginalBC, AnyReduced);
327               }));
328         }
329 
330         // Start processing results of the queued tasks. We wait for the first
331         // task in the queue to finish. If it reduced a chunk, we parse the
332         // result and exit the loop.
333         //  Otherwise we will try to schedule a new task, if
334         //  * no other pending job reduced a chunk and
335         //  * we have not reached the end of the chunk.
336         while (!TaskQueue.empty()) {
337           auto &Future = TaskQueue.front();
338           Future.wait();
339 
340           NumChunksProcessed++;
341           SmallString<0> Res = Future.get();
342           TaskQueue.pop_front();
343           if (Res.empty()) {
344             unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size();
345             if (!AnyReduced && I + NumScheduledTasks != E) {
346               Chunk &ChunkToCheck = *(I + NumScheduledTasks);
347               TaskQueue.emplace_back(ChunkThreadPool.async(
348                   [&Test, &ExtractChunksFromModule, &UninterestingChunks,
349                    &ChunksStillConsideredInteresting, &OriginalBC,
350                    &ChunkToCheck, &AnyReduced]() {
351                     return ProcessChunkFromSerializedBitcode(
352                         ChunkToCheck, Test, ExtractChunksFromModule,
353                         UninterestingChunks, ChunksStillConsideredInteresting,
354                         OriginalBC, AnyReduced);
355                   }));
356             }
357             continue;
358           }
359 
360           Result = std::make_unique<ReducerWorkItem>();
361           auto Data = MemoryBufferRef(StringRef(Res.data(), Res.size()),
362                                       "<bc file>");
363           readBitcode(*Result, Data, Test.getProgram().M->getContext(),
364                       Test.getToolName());
365           break;
366         }
367         // Forward I to the last chunk processed in parallel.
368         I += NumChunksProcessed - 1;
369       } else {
370         Result = CheckChunk(
371             *I,
372             cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine()),
373             Test, ExtractChunksFromModule, UninterestingChunks,
374             ChunksStillConsideredInteresting);
375       }
376 
377       if (!Result)
378         continue;
379 
380       Chunk &ChunkToCheckForUninterestingness = *I;
381       FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true;
382       UninterestingChunks.insert(ChunkToCheckForUninterestingness);
383       ReducedProgram = std::move(Result);
384       errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n";
385       writeOutput(*ReducedProgram, "Saved new best reduction to ");
386     }
387     // Delete uninteresting chunks
388     erase_if(ChunksStillConsideredInteresting,
389              [&UninterestingChunks](const Chunk &C) {
390                return UninterestingChunks.count(C);
391              });
392   } while (!ChunksStillConsideredInteresting.empty() &&
393            (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity ||
394             increaseGranularity(ChunksStillConsideredInteresting)));
395 
396   // If we reduced the testcase replace it
397   if (ReducedProgram)
398     Test.setProgram(std::move(ReducedProgram));
399   errs() << "Couldn't increase anymore.\n";
400 }
401