1 //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation for the Delta Debugging Algorithm:
10 // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
11 // into chunks and tries to reduce the number chunks that are interesting.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "Delta.h"
16 #include "ReducerWorkItem.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Bitcode/BitcodeReader.h"
19 #include "llvm/Bitcode/BitcodeWriter.h"
20 #include "llvm/IR/Verifier.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/ThreadPool.h"
23 #include "llvm/Support/ToolOutputFile.h"
24 #include <fstream>
25 #include <set>
26 
27 using namespace llvm;
28 
29 extern cl::OptionCategory LLVMReduceOptions;
30 
31 static cl::opt<bool> AbortOnInvalidReduction(
32     "abort-on-invalid-reduction",
33     cl::desc("Abort if any reduction results in invalid IR"),
34     cl::cat(LLVMReduceOptions));
35 
36 static cl::opt<unsigned int> StartingGranularityLevel(
37     "starting-granularity-level",
38     cl::desc("Number of times to divide chunks prior to first test"),
39     cl::cat(LLVMReduceOptions));
40 
41 static cl::opt<bool> TmpFilesAsBitcode(
42     "write-tmp-files-as-bitcode",
43     cl::desc("Write temporary files as bitcode, instead of textual IR"),
44     cl::init(false), cl::cat(LLVMReduceOptions));
45 
46 #ifdef LLVM_ENABLE_THREADS
47 static cl::opt<unsigned> NumJobs(
48     "j",
49     cl::desc("Maximum number of threads to use to process chunks. Set to 1 to "
50              "disables parallelism."),
51     cl::init(1), cl::cat(LLVMReduceOptions));
52 #else
53 unsigned NumJobs = 1;
54 #endif
55 
56 void writeOutput(ReducerWorkItem &M, llvm::StringRef Message);
57 
58 bool isReduced(ReducerWorkItem &M, TestRunner &Test,
59                SmallString<128> &CurrentFilepath) {
60   // Write ReducerWorkItem to tmp file
61   int FD;
62   std::error_code EC = sys::fs::createTemporaryFile(
63       "llvm-reduce", M.isMIR() ? "mir" : (TmpFilesAsBitcode ? "bc" : "ll"), FD,
64       CurrentFilepath);
65   if (EC) {
66     errs() << "Error making unique filename: " << EC.message() << "!\n";
67     exit(1);
68   }
69 
70   if (TmpFilesAsBitcode) {
71     llvm::raw_fd_ostream OutStream(FD, true);
72     WriteBitcodeToFile(M, OutStream);
73     OutStream.close();
74     if (OutStream.has_error()) {
75       errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
76       sys::fs::remove(CurrentFilepath);
77       exit(1);
78     }
79     bool Res = Test.run(CurrentFilepath);
80     sys::fs::remove(CurrentFilepath);
81     return Res;
82   }
83   ToolOutputFile Out(CurrentFilepath, FD);
84   M.print(Out.os(), /*AnnotationWriter=*/nullptr);
85   Out.os().close();
86   if (Out.os().has_error()) {
87     errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
88     exit(1);
89   }
90 
91   // Current Chunks aren't interesting
92   return Test.run(CurrentFilepath);
93 }
94 
95 /// Counts the amount of lines for a given file
96 static int getLines(StringRef Filepath) {
97   int Lines = 0;
98   std::string CurrLine;
99   std::ifstream FileStream{std::string(Filepath)};
100 
101   while (std::getline(FileStream, CurrLine))
102     ++Lines;
103 
104   return Lines;
105 }
106 
107 /// Splits Chunks in half and prints them.
108 /// If unable to split (when chunk size is 1) returns false.
109 static bool increaseGranularity(std::vector<Chunk> &Chunks) {
110   errs() << "Increasing granularity...";
111   std::vector<Chunk> NewChunks;
112   bool SplitOne = false;
113 
114   for (auto &C : Chunks) {
115     if (C.End - C.Begin == 0)
116       NewChunks.push_back(C);
117     else {
118       int Half = (C.Begin + C.End) / 2;
119       NewChunks.push_back({C.Begin, Half});
120       NewChunks.push_back({Half + 1, C.End});
121       SplitOne = true;
122     }
123   }
124   if (SplitOne) {
125     Chunks = NewChunks;
126     errs() << "Success! New Chunks:\n";
127     for (auto C : Chunks) {
128       errs() << '\t';
129       C.print();
130       errs() << '\n';
131     }
132   }
133   return SplitOne;
134 }
135 
136 // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the
137 // modified module if the chunk resulted in a reduction.
138 template <typename FuncType>
139 static std::unique_ptr<ReducerWorkItem>
140 CheckChunk(Chunk &ChunkToCheckForUninterestingness,
141            std::unique_ptr<ReducerWorkItem> Clone, TestRunner &Test,
142            FuncType ExtractChunksFromModule,
143            std::set<Chunk> &UninterestingChunks,
144            std::vector<Chunk> &ChunksStillConsideredInteresting) {
145   // Take all of ChunksStillConsideredInteresting chunks, except those we've
146   // already deemed uninteresting (UninterestingChunks) but didn't remove
147   // from ChunksStillConsideredInteresting yet, and additionally ignore
148   // ChunkToCheckForUninterestingness chunk.
149   std::vector<Chunk> CurrentChunks;
150   CurrentChunks.reserve(ChunksStillConsideredInteresting.size() -
151                         UninterestingChunks.size() - 1);
152   copy_if(ChunksStillConsideredInteresting, std::back_inserter(CurrentChunks),
153           [&](const Chunk &C) {
154             return !UninterestingChunks.count(C) &&
155                    C != ChunkToCheckForUninterestingness;
156           });
157 
158   // Generate Module with only Targets inside Current Chunks
159   Oracle O(CurrentChunks);
160   ExtractChunksFromModule(O, *Clone);
161 
162   // Some reductions may result in invalid IR. Skip such reductions.
163   if (verifyReducerWorkItem(*Clone, &errs())) {
164     if (AbortOnInvalidReduction) {
165       errs() << "Invalid reduction\n";
166       exit(1);
167     }
168     errs() << " **** WARNING | reduction resulted in invalid module, "
169               "skipping\n";
170     return nullptr;
171   }
172 
173   errs() << "Ignoring: ";
174   ChunkToCheckForUninterestingness.print();
175   for (const Chunk &C : UninterestingChunks)
176     C.print();
177 
178   SmallString<128> CurrentFilepath;
179   if (!isReduced(*Clone, Test, CurrentFilepath)) {
180     // Program became non-reduced, so this chunk appears to be interesting.
181     errs() << "\n";
182     return nullptr;
183   }
184   return Clone;
185 }
186 
187 template <typename FuncType>
188 SmallString<0> ProcessChunkFromSerializedBitcode(
189     Chunk &ChunkToCheckForUninterestingness, TestRunner &Test,
190     FuncType ExtractChunksFromModule, std::set<Chunk> &UninterestingChunks,
191     std::vector<Chunk> &ChunksStillConsideredInteresting,
192     SmallString<0> &OriginalBC, std::atomic<bool> &AnyReduced) {
193   LLVMContext Ctx;
194   Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
195       MemoryBufferRef(StringRef(OriginalBC.data(), OriginalBC.size()),
196                       "<llvm-reduce tmp module>"),
197       Ctx);
198   if (!MOrErr)
199     report_fatal_error("Failed to read bitcode");
200   auto CloneMMM = std::make_unique<ReducerWorkItem>();
201   CloneMMM->M = std::move(MOrErr.get());
202 
203   SmallString<0> Result;
204   if (std::unique_ptr<ReducerWorkItem> ChunkResult =
205           CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM),
206                      Test, ExtractChunksFromModule, UninterestingChunks,
207                      ChunksStillConsideredInteresting)) {
208     raw_svector_ostream BCOS(Result);
209     WriteBitcodeToFile(*ChunkResult->M, BCOS);
210     // Communicate that the task reduced a chunk.
211     AnyReduced = true;
212   }
213   return Result;
214 }
215 
216 /// Runs the Delta Debugging algorithm, splits the code into chunks and
217 /// reduces the amount of chunks that are considered interesting by the
218 /// given test. The number of chunks is determined by a preliminary run of the
219 /// reduction pass where no change must be made to the module.
220 void llvm::runDeltaPass(TestRunner &Test,
221                         ReductionFunc ExtractChunksFromModule) {
222   assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
223          "input module is broken before making changes");
224 
225   SmallString<128> CurrentFilepath;
226   if (!isReduced(Test.getProgram(), Test, CurrentFilepath)) {
227     errs() << "\nInput isn't interesting! Verify interesting-ness test\n";
228     exit(1);
229   }
230 
231   int Targets;
232   {
233     // Count the number of chunks by counting the number of calls to
234     // Oracle::shouldKeep() but always returning true so no changes are
235     // made.
236     std::vector<Chunk> AllChunks = {{0, INT_MAX}};
237     Oracle Counter(AllChunks);
238     ExtractChunksFromModule(Counter, Test.getProgram());
239     Targets = Counter.count();
240 
241     assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
242            "input module is broken after counting chunks");
243     assert(isReduced(Test.getProgram(), Test, CurrentFilepath) &&
244            "input module no longer interesting after counting chunks");
245 
246 #ifndef NDEBUG
247     // Make sure that the number of chunks does not change as we reduce.
248     std::vector<Chunk> NoChunks;
249     Oracle NoChunksCounter(NoChunks);
250     std::unique_ptr<ReducerWorkItem> Clone =
251         cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine());
252     ExtractChunksFromModule(NoChunksCounter, *Clone);
253     assert(Targets == NoChunksCounter.count() &&
254            "number of chunks changes when reducing");
255 #endif
256   }
257   if (!Targets) {
258     errs() << "\nNothing to reduce\n";
259     return;
260   }
261 
262   std::vector<Chunk> ChunksStillConsideredInteresting = {{0, Targets - 1}};
263   std::unique_ptr<ReducerWorkItem> ReducedProgram;
264 
265   for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) {
266     increaseGranularity(ChunksStillConsideredInteresting);
267   }
268 
269   std::atomic<bool> AnyReduced;
270   std::unique_ptr<ThreadPool> ChunkThreadPoolPtr;
271   if (NumJobs > 1)
272     ChunkThreadPoolPtr =
273         std::make_unique<ThreadPool>(hardware_concurrency(NumJobs));
274 
275   bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity;
276   do {
277     FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false;
278 
279     std::set<Chunk> UninterestingChunks;
280 
281     // When running with more than one thread, serialize the original bitcode
282     // to OriginalBC.
283     SmallString<0> OriginalBC;
284     if (NumJobs > 1) {
285       raw_svector_ostream BCOS(OriginalBC);
286       WriteBitcodeToFile(*Test.getProgram().M, BCOS);
287     }
288 
289     std::deque<std::shared_future<SmallString<0>>> TaskQueue;
290     for (auto I = ChunksStillConsideredInteresting.rbegin(),
291               E = ChunksStillConsideredInteresting.rend();
292          I != E; ++I) {
293       std::unique_ptr<ReducerWorkItem> Result = nullptr;
294       unsigned WorkLeft = std::distance(I, E);
295 
296       // Run in parallel mode, if the user requested more than one thread and
297       // there are at least a few chunks to process.
298       if (NumJobs > 1 && WorkLeft > 1) {
299         unsigned NumInitialTasks = std::min(WorkLeft, unsigned(NumJobs));
300         unsigned NumChunksProcessed = 0;
301 
302         ThreadPool &ChunkThreadPool = *ChunkThreadPoolPtr;
303         TaskQueue.clear();
304 
305         AnyReduced = false;
306         // Queue jobs to process NumInitialTasks chunks in parallel using
307         // ChunkThreadPool. When the tasks are added to the pool, parse the
308         // original module from OriginalBC with a fresh LLVMContext object. This
309         // ensures that the cloned module of each task uses an independent
310         // LLVMContext object. If a task reduces the input, serialize the result
311         // back in the corresponding Result element.
312         for (unsigned J = 0; J < NumInitialTasks; ++J) {
313           TaskQueue.emplace_back(ChunkThreadPool.async(
314               [J, I, &Test, &ExtractChunksFromModule, &UninterestingChunks,
315                &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced]() {
316                 return ProcessChunkFromSerializedBitcode(
317                     *(I + J), Test, ExtractChunksFromModule,
318                     UninterestingChunks, ChunksStillConsideredInteresting,
319                     OriginalBC, AnyReduced);
320               }));
321         }
322 
323         // Start processing results of the queued tasks. We wait for the first
324         // task in the queue to finish. If it reduced a chunk, we parse the
325         // result and exit the loop.
326         //  Otherwise we will try to schedule a new task, if
327         //  * no other pending job reduced a chunk and
328         //  * we have not reached the end of the chunk.
329         while (!TaskQueue.empty()) {
330           auto &Future = TaskQueue.front();
331           Future.wait();
332 
333           NumChunksProcessed++;
334           SmallString<0> Res = Future.get();
335           TaskQueue.pop_front();
336           if (Res.empty()) {
337             unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size();
338             if (!AnyReduced && I + NumScheduledTasks != E) {
339               Chunk &ChunkToCheck = *(I + NumScheduledTasks);
340               TaskQueue.emplace_back(ChunkThreadPool.async(
341                   [&Test, &ExtractChunksFromModule, &UninterestingChunks,
342                    &ChunksStillConsideredInteresting, &OriginalBC,
343                    &ChunkToCheck, &AnyReduced]() {
344                     return ProcessChunkFromSerializedBitcode(
345                         ChunkToCheck, Test, ExtractChunksFromModule,
346                         UninterestingChunks, ChunksStillConsideredInteresting,
347                         OriginalBC, AnyReduced);
348                   }));
349             }
350             continue;
351           }
352 
353           Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
354               MemoryBufferRef(StringRef(Res.data(), Res.size()),
355                               "<llvm-reduce tmp module>"),
356               Test.getProgram().M->getContext());
357           if (!MOrErr)
358             report_fatal_error("Failed to read bitcode");
359           Result = std::make_unique<ReducerWorkItem>();
360           Result->M = std::move(MOrErr.get());
361           break;
362         }
363         // Forward I to the last chunk processed in parallel.
364         I += NumChunksProcessed - 1;
365       } else {
366         Result = CheckChunk(
367             *I,
368             cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine()),
369             Test, ExtractChunksFromModule, UninterestingChunks,
370             ChunksStillConsideredInteresting);
371       }
372 
373       if (!Result)
374         continue;
375 
376       Chunk &ChunkToCheckForUninterestingness = *I;
377       FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true;
378       UninterestingChunks.insert(ChunkToCheckForUninterestingness);
379       ReducedProgram = std::move(Result);
380       errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n";
381       writeOutput(*ReducedProgram, "Saved new best reduction to ");
382     }
383     // Delete uninteresting chunks
384     erase_if(ChunksStillConsideredInteresting,
385              [&UninterestingChunks](const Chunk &C) {
386                return UninterestingChunks.count(C);
387              });
388   } while (!ChunksStillConsideredInteresting.empty() &&
389            (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity ||
390             increaseGranularity(ChunksStillConsideredInteresting)));
391 
392   // If we reduced the testcase replace it
393   if (ReducedProgram)
394     Test.setProgram(std::move(ReducedProgram));
395   errs() << "Couldn't increase anymore.\n";
396 }
397