1 //===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation for the Delta Debugging Algorithm:
10 // it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
11 // into chunks and tries to reduce the number chunks that are interesting.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "Delta.h"
16 #include "ReducerWorkItem.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Analysis/ProfileSummaryInfo.h"
19 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/Bitcode/BitcodeWriter.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/Verifier.h"
25 #include "llvm/MC/TargetRegistry.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/MemoryBufferRef.h"
28 #include "llvm/Support/ThreadPool.h"
29 #include "llvm/Support/ToolOutputFile.h"
30 #include <fstream>
31 #include <set>
32
33 using namespace llvm;
34
35 extern cl::OptionCategory LLVMReduceOptions;
36
37 static cl::opt<bool> AbortOnInvalidReduction(
38 "abort-on-invalid-reduction",
39 cl::desc("Abort if any reduction results in invalid IR"),
40 cl::cat(LLVMReduceOptions));
41
42 static cl::opt<unsigned int> StartingGranularityLevel(
43 "starting-granularity-level",
44 cl::desc("Number of times to divide chunks prior to first test"),
45 cl::cat(LLVMReduceOptions));
46
47 static cl::opt<bool> TmpFilesAsBitcode(
48 "write-tmp-files-as-bitcode",
49 cl::desc("Write temporary files as bitcode, instead of textual IR"),
50 cl::init(false), cl::cat(LLVMReduceOptions));
51
52 #ifdef LLVM_ENABLE_THREADS
53 static cl::opt<unsigned> NumJobs(
54 "j",
55 cl::desc("Maximum number of threads to use to process chunks. Set to 1 to "
56 "disables parallelism."),
57 cl::init(1), cl::cat(LLVMReduceOptions));
58 #else
59 unsigned NumJobs = 1;
60 #endif
61
62 void writeOutput(ReducerWorkItem &M, llvm::StringRef Message);
63
64 void writeBitcode(ReducerWorkItem &M, raw_ostream &OutStream);
65
66 void readBitcode(ReducerWorkItem &M, MemoryBufferRef Data, LLVMContext &Ctx,
67 const char *ToolName);
68
isReduced(ReducerWorkItem & M,TestRunner & Test,SmallString<128> & CurrentFilepath)69 bool isReduced(ReducerWorkItem &M, TestRunner &Test,
70 SmallString<128> &CurrentFilepath) {
71 // Write ReducerWorkItem to tmp file
72 int FD;
73 std::error_code EC = sys::fs::createTemporaryFile(
74 "llvm-reduce", M.isMIR() ? "mir" : (TmpFilesAsBitcode ? "bc" : "ll"), FD,
75 CurrentFilepath);
76 if (EC) {
77 errs() << "Error making unique filename: " << EC.message() << "!\n";
78 exit(1);
79 }
80
81 if (TmpFilesAsBitcode) {
82 llvm::raw_fd_ostream OutStream(FD, true);
83 writeBitcode(M, OutStream);
84 OutStream.close();
85 if (OutStream.has_error()) {
86 errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
87 sys::fs::remove(CurrentFilepath);
88 exit(1);
89 }
90 bool Res = Test.run(CurrentFilepath);
91 sys::fs::remove(CurrentFilepath);
92 return Res;
93 }
94 ToolOutputFile Out(CurrentFilepath, FD);
95 M.print(Out.os(), /*AnnotationWriter=*/nullptr);
96 Out.os().close();
97 if (Out.os().has_error()) {
98 errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n";
99 exit(1);
100 }
101
102 // Current Chunks aren't interesting
103 return Test.run(CurrentFilepath);
104 }
105
106 /// Counts the amount of lines for a given file
getLines(StringRef Filepath)107 static int getLines(StringRef Filepath) {
108 int Lines = 0;
109 std::string CurrLine;
110 std::ifstream FileStream{std::string(Filepath)};
111
112 while (std::getline(FileStream, CurrLine))
113 ++Lines;
114
115 return Lines;
116 }
117
118 /// Splits Chunks in half and prints them.
119 /// If unable to split (when chunk size is 1) returns false.
increaseGranularity(std::vector<Chunk> & Chunks)120 static bool increaseGranularity(std::vector<Chunk> &Chunks) {
121 errs() << "Increasing granularity...";
122 std::vector<Chunk> NewChunks;
123 bool SplitOne = false;
124
125 for (auto &C : Chunks) {
126 if (C.End - C.Begin == 0)
127 NewChunks.push_back(C);
128 else {
129 int Half = (C.Begin + C.End) / 2;
130 NewChunks.push_back({C.Begin, Half});
131 NewChunks.push_back({Half + 1, C.End});
132 SplitOne = true;
133 }
134 }
135 if (SplitOne) {
136 Chunks = NewChunks;
137 errs() << "Success! New Chunks:\n";
138 for (auto C : Chunks) {
139 errs() << '\t';
140 C.print();
141 errs() << '\n';
142 }
143 }
144 return SplitOne;
145 }
146
147 // Check if \p ChunkToCheckForUninterestingness is interesting. Returns the
148 // modified module if the chunk resulted in a reduction.
149 template <typename FuncType>
150 static std::unique_ptr<ReducerWorkItem>
CheckChunk(Chunk & ChunkToCheckForUninterestingness,std::unique_ptr<ReducerWorkItem> Clone,TestRunner & Test,FuncType ExtractChunksFromModule,std::set<Chunk> & UninterestingChunks,std::vector<Chunk> & ChunksStillConsideredInteresting)151 CheckChunk(Chunk &ChunkToCheckForUninterestingness,
152 std::unique_ptr<ReducerWorkItem> Clone, TestRunner &Test,
153 FuncType ExtractChunksFromModule,
154 std::set<Chunk> &UninterestingChunks,
155 std::vector<Chunk> &ChunksStillConsideredInteresting) {
156 // Take all of ChunksStillConsideredInteresting chunks, except those we've
157 // already deemed uninteresting (UninterestingChunks) but didn't remove
158 // from ChunksStillConsideredInteresting yet, and additionally ignore
159 // ChunkToCheckForUninterestingness chunk.
160 std::vector<Chunk> CurrentChunks;
161 CurrentChunks.reserve(ChunksStillConsideredInteresting.size() -
162 UninterestingChunks.size() - 1);
163 copy_if(ChunksStillConsideredInteresting, std::back_inserter(CurrentChunks),
164 [&](const Chunk &C) {
165 return !UninterestingChunks.count(C) &&
166 C != ChunkToCheckForUninterestingness;
167 });
168
169 // Generate Module with only Targets inside Current Chunks
170 Oracle O(CurrentChunks);
171 ExtractChunksFromModule(O, *Clone);
172
173 // Some reductions may result in invalid IR. Skip such reductions.
174 if (verifyReducerWorkItem(*Clone, &errs())) {
175 if (AbortOnInvalidReduction) {
176 errs() << "Invalid reduction\n";
177 exit(1);
178 }
179 errs() << " **** WARNING | reduction resulted in invalid module, "
180 "skipping\n";
181 return nullptr;
182 }
183
184 errs() << "Ignoring: ";
185 ChunkToCheckForUninterestingness.print();
186 for (const Chunk &C : UninterestingChunks)
187 C.print();
188
189 SmallString<128> CurrentFilepath;
190 if (!isReduced(*Clone, Test, CurrentFilepath)) {
191 // Program became non-reduced, so this chunk appears to be interesting.
192 errs() << "\n";
193 return nullptr;
194 }
195 return Clone;
196 }
197
198 template <typename FuncType>
ProcessChunkFromSerializedBitcode(Chunk & ChunkToCheckForUninterestingness,TestRunner & Test,FuncType ExtractChunksFromModule,std::set<Chunk> & UninterestingChunks,std::vector<Chunk> & ChunksStillConsideredInteresting,SmallString<0> & OriginalBC,std::atomic<bool> & AnyReduced)199 SmallString<0> ProcessChunkFromSerializedBitcode(
200 Chunk &ChunkToCheckForUninterestingness, TestRunner &Test,
201 FuncType ExtractChunksFromModule, std::set<Chunk> &UninterestingChunks,
202 std::vector<Chunk> &ChunksStillConsideredInteresting,
203 SmallString<0> &OriginalBC, std::atomic<bool> &AnyReduced) {
204 LLVMContext Ctx;
205 auto CloneMMM = std::make_unique<ReducerWorkItem>();
206 auto Data = MemoryBufferRef(StringRef(OriginalBC.data(), OriginalBC.size()),
207 "<bc file>");
208 readBitcode(*CloneMMM, Data, Ctx, Test.getToolName());
209
210 SmallString<0> Result;
211 if (std::unique_ptr<ReducerWorkItem> ChunkResult =
212 CheckChunk(ChunkToCheckForUninterestingness, std::move(CloneMMM),
213 Test, ExtractChunksFromModule, UninterestingChunks,
214 ChunksStillConsideredInteresting)) {
215 raw_svector_ostream BCOS(Result);
216 writeBitcode(*ChunkResult, BCOS);
217 // Communicate that the task reduced a chunk.
218 AnyReduced = true;
219 }
220 return Result;
221 }
222
223 /// Runs the Delta Debugging algorithm, splits the code into chunks and
224 /// reduces the amount of chunks that are considered interesting by the
225 /// given test. The number of chunks is determined by a preliminary run of the
226 /// reduction pass where no change must be made to the module.
runDeltaPass(TestRunner & Test,ReductionFunc ExtractChunksFromModule)227 void llvm::runDeltaPass(TestRunner &Test,
228 ReductionFunc ExtractChunksFromModule) {
229 assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
230 "input module is broken before making changes");
231
232 SmallString<128> CurrentFilepath;
233 if (!isReduced(Test.getProgram(), Test, CurrentFilepath)) {
234 errs() << "\nInput isn't interesting! Verify interesting-ness test\n";
235 exit(1);
236 }
237
238 int Targets;
239 {
240 // Count the number of chunks by counting the number of calls to
241 // Oracle::shouldKeep() but always returning true so no changes are
242 // made.
243 std::vector<Chunk> AllChunks = {{0, INT_MAX}};
244 Oracle Counter(AllChunks);
245 ExtractChunksFromModule(Counter, Test.getProgram());
246 Targets = Counter.count();
247
248 assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) &&
249 "input module is broken after counting chunks");
250 assert(isReduced(Test.getProgram(), Test, CurrentFilepath) &&
251 "input module no longer interesting after counting chunks");
252
253 #ifndef NDEBUG
254 // Make sure that the number of chunks does not change as we reduce.
255 std::vector<Chunk> NoChunks;
256 Oracle NoChunksCounter(NoChunks);
257 std::unique_ptr<ReducerWorkItem> Clone =
258 cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine());
259 ExtractChunksFromModule(NoChunksCounter, *Clone);
260 assert(Targets == NoChunksCounter.count() &&
261 "number of chunks changes when reducing");
262 #endif
263 }
264 if (!Targets) {
265 errs() << "\nNothing to reduce\n";
266 return;
267 }
268
269 std::vector<Chunk> ChunksStillConsideredInteresting = {{0, Targets - 1}};
270 std::unique_ptr<ReducerWorkItem> ReducedProgram;
271
272 for (unsigned int Level = 0; Level < StartingGranularityLevel; Level++) {
273 increaseGranularity(ChunksStillConsideredInteresting);
274 }
275
276 std::atomic<bool> AnyReduced;
277 std::unique_ptr<ThreadPool> ChunkThreadPoolPtr;
278 if (NumJobs > 1)
279 ChunkThreadPoolPtr =
280 std::make_unique<ThreadPool>(hardware_concurrency(NumJobs));
281
282 bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity;
283 do {
284 FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = false;
285
286 std::set<Chunk> UninterestingChunks;
287
288 // When running with more than one thread, serialize the original bitcode
289 // to OriginalBC.
290 SmallString<0> OriginalBC;
291 if (NumJobs > 1) {
292 raw_svector_ostream BCOS(OriginalBC);
293 writeBitcode(Test.getProgram(), BCOS);
294 }
295
296 std::deque<std::shared_future<SmallString<0>>> TaskQueue;
297 for (auto I = ChunksStillConsideredInteresting.rbegin(),
298 E = ChunksStillConsideredInteresting.rend();
299 I != E; ++I) {
300 std::unique_ptr<ReducerWorkItem> Result = nullptr;
301 unsigned WorkLeft = std::distance(I, E);
302
303 // Run in parallel mode, if the user requested more than one thread and
304 // there are at least a few chunks to process.
305 if (NumJobs > 1 && WorkLeft > 1) {
306 unsigned NumInitialTasks = std::min(WorkLeft, unsigned(NumJobs));
307 unsigned NumChunksProcessed = 0;
308
309 ThreadPool &ChunkThreadPool = *ChunkThreadPoolPtr;
310 TaskQueue.clear();
311
312 AnyReduced = false;
313 // Queue jobs to process NumInitialTasks chunks in parallel using
314 // ChunkThreadPool. When the tasks are added to the pool, parse the
315 // original module from OriginalBC with a fresh LLVMContext object. This
316 // ensures that the cloned module of each task uses an independent
317 // LLVMContext object. If a task reduces the input, serialize the result
318 // back in the corresponding Result element.
319 for (unsigned J = 0; J < NumInitialTasks; ++J) {
320 TaskQueue.emplace_back(ChunkThreadPool.async(
321 [J, I, &Test, &ExtractChunksFromModule, &UninterestingChunks,
322 &ChunksStillConsideredInteresting, &OriginalBC, &AnyReduced]() {
323 return ProcessChunkFromSerializedBitcode(
324 *(I + J), Test, ExtractChunksFromModule,
325 UninterestingChunks, ChunksStillConsideredInteresting,
326 OriginalBC, AnyReduced);
327 }));
328 }
329
330 // Start processing results of the queued tasks. We wait for the first
331 // task in the queue to finish. If it reduced a chunk, we parse the
332 // result and exit the loop.
333 // Otherwise we will try to schedule a new task, if
334 // * no other pending job reduced a chunk and
335 // * we have not reached the end of the chunk.
336 while (!TaskQueue.empty()) {
337 auto &Future = TaskQueue.front();
338 Future.wait();
339
340 NumChunksProcessed++;
341 SmallString<0> Res = Future.get();
342 TaskQueue.pop_front();
343 if (Res.empty()) {
344 unsigned NumScheduledTasks = NumChunksProcessed + TaskQueue.size();
345 if (!AnyReduced && I + NumScheduledTasks != E) {
346 Chunk &ChunkToCheck = *(I + NumScheduledTasks);
347 TaskQueue.emplace_back(ChunkThreadPool.async(
348 [&Test, &ExtractChunksFromModule, &UninterestingChunks,
349 &ChunksStillConsideredInteresting, &OriginalBC,
350 &ChunkToCheck, &AnyReduced]() {
351 return ProcessChunkFromSerializedBitcode(
352 ChunkToCheck, Test, ExtractChunksFromModule,
353 UninterestingChunks, ChunksStillConsideredInteresting,
354 OriginalBC, AnyReduced);
355 }));
356 }
357 continue;
358 }
359
360 Result = std::make_unique<ReducerWorkItem>();
361 auto Data = MemoryBufferRef(StringRef(Res.data(), Res.size()),
362 "<bc file>");
363 readBitcode(*Result, Data, Test.getProgram().M->getContext(),
364 Test.getToolName());
365 break;
366 }
367 // Forward I to the last chunk processed in parallel.
368 I += NumChunksProcessed - 1;
369 } else {
370 Result = CheckChunk(
371 *I,
372 cloneReducerWorkItem(Test.getProgram(), Test.getTargetMachine()),
373 Test, ExtractChunksFromModule, UninterestingChunks,
374 ChunksStillConsideredInteresting);
375 }
376
377 if (!Result)
378 continue;
379
380 Chunk &ChunkToCheckForUninterestingness = *I;
381 FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity = true;
382 UninterestingChunks.insert(ChunkToCheckForUninterestingness);
383 ReducedProgram = std::move(Result);
384 errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n";
385 writeOutput(*ReducedProgram, "Saved new best reduction to ");
386 }
387 // Delete uninteresting chunks
388 erase_if(ChunksStillConsideredInteresting,
389 [&UninterestingChunks](const Chunk &C) {
390 return UninterestingChunks.count(C);
391 });
392 } while (!ChunksStillConsideredInteresting.empty() &&
393 (FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity ||
394 increaseGranularity(ChunksStillConsideredInteresting)));
395
396 // If we reduced the testcase replace it
397 if (ReducedProgram)
398 Test.setProgram(std::move(ReducedProgram));
399 errs() << "Couldn't increase anymore.\n";
400 }
401