1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Spawn and orchestrate separate fuzzing processes.
9 //===----------------------------------------------------------------------===//
10 
11 #include "FuzzerCommand.h"
12 #include "FuzzerFork.h"
13 #include "FuzzerIO.h"
14 #include "FuzzerInternal.h"
15 #include "FuzzerMerge.h"
16 #include "FuzzerSHA1.h"
17 #include "FuzzerTracePC.h"
18 #include "FuzzerUtil.h"
19 
20 #include <atomic>
21 #include <chrono>
22 #include <fstream>
23 #include <memory>
24 #include <mutex>
25 #include <queue>
26 #include <sstream>
27 #include <thread>
28 
29 namespace fuzzer {
30 
31 struct Stats {
32   size_t number_of_executed_units = 0;
33   size_t peak_rss_mb = 0;
34   size_t average_exec_per_sec = 0;
35 };
36 
37 static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
38   std::ifstream In(LogPath);
39   std::string Line;
40   Stats Res;
41   struct {
42     const char *Name;
43     size_t *Var;
44   } NameVarPairs[] = {
45       {"stat::number_of_executed_units:", &Res.number_of_executed_units},
46       {"stat::peak_rss_mb:", &Res.peak_rss_mb},
47       {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
48       {nullptr, nullptr},
49   };
50   while (std::getline(In, Line, '\n')) {
51     if (Line.find("stat::") != 0) continue;
52     std::istringstream ISS(Line);
53     std::string Name;
54     size_t Val;
55     ISS >> Name >> Val;
56     for (size_t i = 0; NameVarPairs[i].Name; i++)
57       if (Name == NameVarPairs[i].Name)
58         *NameVarPairs[i].Var = Val;
59   }
60   return Res;
61 }
62 
63 struct FuzzJob {
64   // Inputs.
65   Command Cmd;
66   std::string CorpusDir;
67   std::string FeaturesDir;
68   std::string LogPath;
69   std::string SeedListPath;
70   std::string CFPath;
71 
72   // Fuzzing Outputs.
73   int ExitCode;
74 
75   ~FuzzJob() {
76     RemoveFile(CFPath);
77     RemoveFile(LogPath);
78     RemoveFile(SeedListPath);
79     RmDirRecursive(CorpusDir);
80     RmDirRecursive(FeaturesDir);
81   }
82 };
83 
84 struct GlobalEnv {
85   Vector<std::string> Args;
86   Vector<std::string> CorpusDirs;
87   std::string MainCorpusDir;
88   std::string TempDir;
89   std::string DFTDir;
90   std::string DataFlowBinary;
91   Set<uint32_t> Features, Cov;
92   Vector<std::string> Files;
93   Random *Rand;
94   std::chrono::system_clock::time_point ProcessStartTime;
95   int Verbosity = 0;
96 
97   size_t NumTimeouts = 0;
98   size_t NumOOMs = 0;
99   size_t NumCrashes = 0;
100 
101 
102   size_t NumRuns = 0;
103 
104   size_t secondsSinceProcessStartUp() const {
105     return std::chrono::duration_cast<std::chrono::seconds>(
106                std::chrono::system_clock::now() - ProcessStartTime)
107         .count();
108   }
109 
110   FuzzJob *CreateNewJob(size_t JobId) {
111     Command Cmd(Args);
112     Cmd.removeFlag("fork");
113     Cmd.removeFlag("runs");
114     Cmd.removeFlag("collect_data_flow");
115     for (auto &C : CorpusDirs) // Remove all corpora from the args.
116       Cmd.removeArgument(C);
117     Cmd.addFlag("reload", "0");  // working in an isolated dir, no reload.
118     Cmd.addFlag("print_final_stats", "1");
119     Cmd.addFlag("print_funcs", "0");  // no need to spend time symbolizing.
120     Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
121     if (!DataFlowBinary.empty()) {
122       Cmd.addFlag("data_flow_trace", DFTDir);
123       if (!Cmd.hasFlag("focus_function"))
124         Cmd.addFlag("focus_function", "auto");
125     }
126     auto Job = new FuzzJob;
127     std::string Seeds;
128     if (size_t CorpusSubsetSize =
129             std::min(Files.size(), (size_t)sqrt(Files.size() + 2)))
130       for (size_t i = 0; i < CorpusSubsetSize; i++)
131         Seeds += (Seeds.empty() ? "" : ",") +
132                  Files[Rand->SkewTowardsLast(Files.size())];
133     if (!Seeds.empty()) {
134       Job->SeedListPath =
135           DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
136       WriteToFile(Seeds, Job->SeedListPath);
137       Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
138     }
139     Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
140     Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
141     Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
142     Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
143 
144 
145     Cmd.addArgument(Job->CorpusDir);
146     Cmd.addFlag("features_dir", Job->FeaturesDir);
147 
148     for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
149       RmDirRecursive(D);
150       MkDir(D);
151     }
152 
153     Cmd.setOutputFile(Job->LogPath);
154     Cmd.combineOutAndErr();
155 
156     Job->Cmd = Cmd;
157 
158     if (Verbosity >= 2)
159       Printf("Job %zd/%p Created: %s\n", JobId, Job,
160              Job->Cmd.toString().c_str());
161     // Start from very short runs and gradually increase them.
162     return Job;
163   }
164 
165   void RunOneMergeJob(FuzzJob *Job) {
166     auto Stats = ParseFinalStatsFromLog(Job->LogPath);
167     NumRuns += Stats.number_of_executed_units;
168 
169     Vector<SizedFile> TempFiles, MergeCandidates;
170     // Read all newly created inputs and their feature sets.
171     // Choose only those inputs that have new features.
172     GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
173     std::sort(TempFiles.begin(), TempFiles.end());
174     for (auto &F : TempFiles) {
175       auto FeatureFile = F.File;
176       FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
177       auto FeatureBytes = FileToVector(FeatureFile, 0, false);
178       assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
179       Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
180       memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
181       for (auto Ft : NewFeatures) {
182         if (!Features.count(Ft)) {
183           MergeCandidates.push_back(F);
184           break;
185         }
186       }
187     }
188     if (MergeCandidates.empty()) return;
189 
190     Vector<std::string> FilesToAdd;
191     Set<uint32_t> NewFeatures, NewCov;
192     CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
193                         &NewFeatures, Cov, &NewCov, Job->CFPath, false);
194     for (auto &Path : FilesToAdd) {
195       auto U = FileToVector(Path);
196       auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
197       WriteToFile(U, NewPath);
198       Files.push_back(NewPath);
199       CollectDFT(NewPath);
200     }
201     Features.insert(NewFeatures.begin(), NewFeatures.end());
202     Cov.insert(NewCov.begin(), NewCov.end());
203     for (auto Idx : NewCov)
204       if (auto *TE = TPC.PCTableEntryByIdx(Idx))
205         if (TPC.PcIsFuncEntry(TE))
206           PrintPC("  NEW_FUNC: %p %F %L\n", "",
207                   TPC.GetNextInstructionPc(TE->PC));
208 
209     if (!FilesToAdd.empty() || Job->ExitCode != 0)
210       Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd "
211              "oom/timeout/crash: %zd/%zd/%zd time: %zds\n", NumRuns,
212              Cov.size(), Features.size(), Files.size(),
213              Stats.average_exec_per_sec,
214              NumOOMs, NumTimeouts, NumCrashes, secondsSinceProcessStartUp());
215   }
216 
217 
218   void CollectDFT(const std::string &InputPath) {
219     if (DataFlowBinary.empty()) return;
220     Command Cmd(Args);
221     Cmd.removeFlag("fork");
222     Cmd.removeFlag("runs");
223     Cmd.addFlag("data_flow_trace", DFTDir);
224     Cmd.addArgument(InputPath);
225     for (auto &C : CorpusDirs) // Remove all corpora from the args.
226       Cmd.removeArgument(C);
227     Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
228     Cmd.combineOutAndErr();
229     // Printf("CollectDFT: %s %s\n", InputPath.c_str(), Cmd.toString().c_str());
230     ExecuteCommand(Cmd);
231   }
232 
233 };
234 
235 struct JobQueue {
236   std::queue<FuzzJob *> Qu;
237   std::mutex Mu;
238 
239   void Push(FuzzJob *Job) {
240     std::lock_guard<std::mutex> Lock(Mu);
241     Qu.push(Job);
242   }
243   FuzzJob *Pop() {
244     std::lock_guard<std::mutex> Lock(Mu);
245     if (Qu.empty()) return nullptr;
246     auto Job = Qu.front();
247     Qu.pop();
248     return Job;
249   }
250 };
251 
252 void WorkerThread(std::atomic<bool> *Stop, JobQueue *FuzzQ, JobQueue *MergeQ) {
253   while (!Stop->load()) {
254     auto Job = FuzzQ->Pop();
255     // Printf("WorkerThread: job %p\n", Job);
256     if (!Job) {
257       SleepSeconds(1);
258       continue;
259     }
260     Job->ExitCode = ExecuteCommand(Job->Cmd);
261     MergeQ->Push(Job);
262   }
263 }
264 
265 // This is just a skeleton of an experimental -fork=1 feature.
266 void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
267                   const Vector<std::string> &Args,
268                   const Vector<std::string> &CorpusDirs, int NumJobs) {
269   Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
270 
271   GlobalEnv Env;
272   Env.Args = Args;
273   Env.CorpusDirs = CorpusDirs;
274   Env.Rand = &Rand;
275   Env.Verbosity = Options.Verbosity;
276   Env.ProcessStartTime = std::chrono::system_clock::now();
277   Env.DataFlowBinary = Options.CollectDataFlow;
278 
279   Vector<SizedFile> SeedFiles;
280   for (auto &Dir : CorpusDirs)
281     GetSizedFilesFromDir(Dir, &SeedFiles);
282   std::sort(SeedFiles.begin(), SeedFiles.end());
283   Env.TempDir = TempPath(".dir");
284   Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
285   RmDirRecursive(Env.TempDir);  // in case there is a leftover from old runs.
286   MkDir(Env.TempDir);
287   MkDir(Env.DFTDir);
288 
289 
290   if (CorpusDirs.empty())
291     MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
292   else
293     Env.MainCorpusDir = CorpusDirs[0];
294 
295   auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
296   CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
297                       {}, &Env.Cov,
298                       CFPath, false);
299   for (auto &F : Env.Files)
300     Env.CollectDFT(F);
301 
302   RemoveFile(CFPath);
303   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
304          Env.Files.size(), Env.TempDir.c_str());
305 
306   int ExitCode = 0;
307 
308   JobQueue FuzzQ, MergeQ;
309   std::atomic<bool> Stop(false);
310 
311   size_t JobId = 1;
312   Vector<std::thread> Threads;
313   for (int t = 0; t < NumJobs; t++) {
314     Threads.push_back(std::thread(WorkerThread, &Stop, &FuzzQ, &MergeQ));
315     FuzzQ.Push(Env.CreateNewJob(JobId++));
316   }
317 
318   while (true) {
319     std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
320     if (!Job) {
321       if (Stop)
322         break;
323       SleepSeconds(1);
324       continue;
325     }
326     ExitCode = Job->ExitCode;
327     if (ExitCode == Options.InterruptExitCode) {
328       Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
329       Stop = true;
330       break;
331     }
332     Fuzzer::MaybeExitGracefully();
333 
334     Env.RunOneMergeJob(Job.get());
335 
336     // Continue if our crash is one of the ignorred ones.
337     if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
338       Env.NumTimeouts++;
339     else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
340       Env.NumOOMs++;
341     else if (ExitCode != 0) {
342       Env.NumCrashes++;
343       if (Options.IgnoreCrashes) {
344         std::ifstream In(Job->LogPath);
345         std::string Line;
346         while (std::getline(In, Line, '\n'))
347           if (Line.find("ERROR:") != Line.npos ||
348               Line.find("runtime error:") != Line.npos)
349             Printf("%s\n", Line.c_str());
350       } else {
351         // And exit if we don't ignore this crash.
352         Printf("INFO: log from the inner process:\n%s",
353                FileToString(Job->LogPath).c_str());
354         Stop = true;
355       }
356     }
357 
358     // Stop if we are over the time budget.
359     // This is not precise, since other threads are still running
360     // and we will wait while joining them.
361     // We also don't stop instantly: other jobs need to finish.
362     if (Options.MaxTotalTimeSec > 0 && !Stop &&
363         Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
364       Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
365              Env.secondsSinceProcessStartUp());
366       Stop = true;
367     }
368     if (!Stop && Env.NumRuns >= Options.MaxNumberOfRuns) {
369       Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
370              Env.NumRuns);
371       Stop = true;
372     }
373 
374     if (!Stop)
375       FuzzQ.Push(Env.CreateNewJob(JobId++));
376   }
377   Stop = true;
378 
379   for (auto &T : Threads)
380     T.join();
381 
382   // The workers have terminated. Don't try to remove the directory before they
383   // terminate to avoid a race condition preventing cleanup on Windows.
384   RmDirRecursive(Env.TempDir);
385 
386   // Use the exit code from the last child process.
387   Printf("INFO: exiting: %d time: %zds\n", ExitCode,
388          Env.secondsSinceProcessStartUp());
389   exit(ExitCode);
390 }
391 
392 } // namespace fuzzer
393