1 //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // Spawn and orchestrate separate fuzzing processes. 9 //===----------------------------------------------------------------------===// 10 11 #include "FuzzerCommand.h" 12 #include "FuzzerFork.h" 13 #include "FuzzerIO.h" 14 #include "FuzzerInternal.h" 15 #include "FuzzerMerge.h" 16 #include "FuzzerSHA1.h" 17 #include "FuzzerTracePC.h" 18 #include "FuzzerUtil.h" 19 20 #include <atomic> 21 #include <chrono> 22 #include <fstream> 23 #include <memory> 24 #include <mutex> 25 #include <queue> 26 #include <sstream> 27 #include <thread> 28 29 namespace fuzzer { 30 31 struct Stats { 32 size_t number_of_executed_units = 0; 33 size_t peak_rss_mb = 0; 34 size_t average_exec_per_sec = 0; 35 }; 36 37 static Stats ParseFinalStatsFromLog(const std::string &LogPath) { 38 std::ifstream In(LogPath); 39 std::string Line; 40 Stats Res; 41 struct { 42 const char *Name; 43 size_t *Var; 44 } NameVarPairs[] = { 45 {"stat::number_of_executed_units:", &Res.number_of_executed_units}, 46 {"stat::peak_rss_mb:", &Res.peak_rss_mb}, 47 {"stat::average_exec_per_sec:", &Res.average_exec_per_sec}, 48 {nullptr, nullptr}, 49 }; 50 while (std::getline(In, Line, '\n')) { 51 if (Line.find("stat::") != 0) continue; 52 std::istringstream ISS(Line); 53 std::string Name; 54 size_t Val; 55 ISS >> Name >> Val; 56 for (size_t i = 0; NameVarPairs[i].Name; i++) 57 if (Name == NameVarPairs[i].Name) 58 *NameVarPairs[i].Var = Val; 59 } 60 return Res; 61 } 62 63 struct FuzzJob { 64 // Inputs. 65 Command Cmd; 66 std::string CorpusDir; 67 std::string FeaturesDir; 68 std::string LogPath; 69 std::string SeedListPath; 70 std::string CFPath; 71 72 // Fuzzing Outputs. 73 int ExitCode; 74 75 ~FuzzJob() { 76 RemoveFile(CFPath); 77 RemoveFile(LogPath); 78 RemoveFile(SeedListPath); 79 RmDirRecursive(CorpusDir); 80 RmDirRecursive(FeaturesDir); 81 } 82 }; 83 84 struct GlobalEnv { 85 Vector<std::string> Args; 86 Vector<std::string> CorpusDirs; 87 std::string MainCorpusDir; 88 std::string TempDir; 89 std::string DFTDir; 90 std::string DataFlowBinary; 91 Set<uint32_t> Features, Cov; 92 Vector<std::string> Files; 93 Random *Rand; 94 std::chrono::system_clock::time_point ProcessStartTime; 95 int Verbosity = 0; 96 97 size_t NumTimeouts = 0; 98 size_t NumOOMs = 0; 99 size_t NumCrashes = 0; 100 101 102 size_t NumRuns = 0; 103 104 size_t secondsSinceProcessStartUp() const { 105 return std::chrono::duration_cast<std::chrono::seconds>( 106 std::chrono::system_clock::now() - ProcessStartTime) 107 .count(); 108 } 109 110 FuzzJob *CreateNewJob(size_t JobId) { 111 Command Cmd(Args); 112 Cmd.removeFlag("fork"); 113 Cmd.removeFlag("runs"); 114 Cmd.removeFlag("collect_data_flow"); 115 for (auto &C : CorpusDirs) // Remove all corpora from the args. 116 Cmd.removeArgument(C); 117 Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload. 118 Cmd.addFlag("print_final_stats", "1"); 119 Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing. 120 Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId))); 121 if (!DataFlowBinary.empty()) { 122 Cmd.addFlag("data_flow_trace", DFTDir); 123 if (!Cmd.hasFlag("focus_function")) 124 Cmd.addFlag("focus_function", "auto"); 125 } 126 auto Job = new FuzzJob; 127 std::string Seeds; 128 if (size_t CorpusSubsetSize = 129 std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) 130 for (size_t i = 0; i < CorpusSubsetSize; i++) 131 Seeds += (Seeds.empty() ? "" : ",") + 132 Files[Rand->SkewTowardsLast(Files.size())]; 133 if (!Seeds.empty()) { 134 Job->SeedListPath = 135 DirPlusFile(TempDir, std::to_string(JobId) + ".seeds"); 136 WriteToFile(Seeds, Job->SeedListPath); 137 Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath); 138 } 139 Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log"); 140 Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId)); 141 Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId)); 142 Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge"); 143 144 145 Cmd.addArgument(Job->CorpusDir); 146 Cmd.addFlag("features_dir", Job->FeaturesDir); 147 148 for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) { 149 RmDirRecursive(D); 150 MkDir(D); 151 } 152 153 Cmd.setOutputFile(Job->LogPath); 154 Cmd.combineOutAndErr(); 155 156 Job->Cmd = Cmd; 157 158 if (Verbosity >= 2) 159 Printf("Job %zd/%p Created: %s\n", JobId, Job, 160 Job->Cmd.toString().c_str()); 161 // Start from very short runs and gradually increase them. 162 return Job; 163 } 164 165 void RunOneMergeJob(FuzzJob *Job) { 166 auto Stats = ParseFinalStatsFromLog(Job->LogPath); 167 NumRuns += Stats.number_of_executed_units; 168 169 Vector<SizedFile> TempFiles, MergeCandidates; 170 // Read all newly created inputs and their feature sets. 171 // Choose only those inputs that have new features. 172 GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); 173 std::sort(TempFiles.begin(), TempFiles.end()); 174 for (auto &F : TempFiles) { 175 auto FeatureFile = F.File; 176 FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir); 177 auto FeatureBytes = FileToVector(FeatureFile, 0, false); 178 assert((FeatureBytes.size() % sizeof(uint32_t)) == 0); 179 Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t)); 180 memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size()); 181 for (auto Ft : NewFeatures) { 182 if (!Features.count(Ft)) { 183 MergeCandidates.push_back(F); 184 break; 185 } 186 } 187 } 188 if (MergeCandidates.empty()) return; 189 190 Vector<std::string> FilesToAdd; 191 Set<uint32_t> NewFeatures, NewCov; 192 CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, 193 &NewFeatures, Cov, &NewCov, Job->CFPath, false); 194 for (auto &Path : FilesToAdd) { 195 auto U = FileToVector(Path); 196 auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); 197 WriteToFile(U, NewPath); 198 Files.push_back(NewPath); 199 CollectDFT(NewPath); 200 } 201 Features.insert(NewFeatures.begin(), NewFeatures.end()); 202 Cov.insert(NewCov.begin(), NewCov.end()); 203 for (auto Idx : NewCov) 204 if (auto *TE = TPC.PCTableEntryByIdx(Idx)) 205 if (TPC.PcIsFuncEntry(TE)) 206 PrintPC(" NEW_FUNC: %p %F %L\n", "", 207 TPC.GetNextInstructionPc(TE->PC)); 208 209 if (!FilesToAdd.empty() || Job->ExitCode != 0) 210 Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s %zd " 211 "oom/timeout/crash: %zd/%zd/%zd time: %zds\n", NumRuns, 212 Cov.size(), Features.size(), Files.size(), 213 Stats.average_exec_per_sec, 214 NumOOMs, NumTimeouts, NumCrashes, secondsSinceProcessStartUp()); 215 } 216 217 218 void CollectDFT(const std::string &InputPath) { 219 if (DataFlowBinary.empty()) return; 220 Command Cmd(Args); 221 Cmd.removeFlag("fork"); 222 Cmd.removeFlag("runs"); 223 Cmd.addFlag("data_flow_trace", DFTDir); 224 Cmd.addArgument(InputPath); 225 for (auto &C : CorpusDirs) // Remove all corpora from the args. 226 Cmd.removeArgument(C); 227 Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log")); 228 Cmd.combineOutAndErr(); 229 // Printf("CollectDFT: %s %s\n", InputPath.c_str(), Cmd.toString().c_str()); 230 ExecuteCommand(Cmd); 231 } 232 233 }; 234 235 struct JobQueue { 236 std::queue<FuzzJob *> Qu; 237 std::mutex Mu; 238 239 void Push(FuzzJob *Job) { 240 std::lock_guard<std::mutex> Lock(Mu); 241 Qu.push(Job); 242 } 243 FuzzJob *Pop() { 244 std::lock_guard<std::mutex> Lock(Mu); 245 if (Qu.empty()) return nullptr; 246 auto Job = Qu.front(); 247 Qu.pop(); 248 return Job; 249 } 250 }; 251 252 void WorkerThread(std::atomic<bool> *Stop, JobQueue *FuzzQ, JobQueue *MergeQ) { 253 while (!Stop->load()) { 254 auto Job = FuzzQ->Pop(); 255 // Printf("WorkerThread: job %p\n", Job); 256 if (!Job) { 257 SleepSeconds(1); 258 continue; 259 } 260 Job->ExitCode = ExecuteCommand(Job->Cmd); 261 MergeQ->Push(Job); 262 } 263 } 264 265 // This is just a skeleton of an experimental -fork=1 feature. 266 void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, 267 const Vector<std::string> &Args, 268 const Vector<std::string> &CorpusDirs, int NumJobs) { 269 Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs); 270 271 GlobalEnv Env; 272 Env.Args = Args; 273 Env.CorpusDirs = CorpusDirs; 274 Env.Rand = &Rand; 275 Env.Verbosity = Options.Verbosity; 276 Env.ProcessStartTime = std::chrono::system_clock::now(); 277 Env.DataFlowBinary = Options.CollectDataFlow; 278 279 Vector<SizedFile> SeedFiles; 280 for (auto &Dir : CorpusDirs) 281 GetSizedFilesFromDir(Dir, &SeedFiles); 282 std::sort(SeedFiles.begin(), SeedFiles.end()); 283 Env.TempDir = TempPath(".dir"); 284 Env.DFTDir = DirPlusFile(Env.TempDir, "DFT"); 285 RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs. 286 MkDir(Env.TempDir); 287 MkDir(Env.DFTDir); 288 289 290 if (CorpusDirs.empty()) 291 MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C")); 292 else 293 Env.MainCorpusDir = CorpusDirs[0]; 294 295 auto CFPath = DirPlusFile(Env.TempDir, "merge.txt"); 296 CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features, 297 {}, &Env.Cov, 298 CFPath, false); 299 for (auto &F : Env.Files) 300 Env.CollectDFT(F); 301 302 RemoveFile(CFPath); 303 Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs, 304 Env.Files.size(), Env.TempDir.c_str()); 305 306 int ExitCode = 0; 307 308 JobQueue FuzzQ, MergeQ; 309 std::atomic<bool> Stop(false); 310 311 size_t JobId = 1; 312 Vector<std::thread> Threads; 313 for (int t = 0; t < NumJobs; t++) { 314 Threads.push_back(std::thread(WorkerThread, &Stop, &FuzzQ, &MergeQ)); 315 FuzzQ.Push(Env.CreateNewJob(JobId++)); 316 } 317 318 while (true) { 319 std::unique_ptr<FuzzJob> Job(MergeQ.Pop()); 320 if (!Job) { 321 if (Stop) 322 break; 323 SleepSeconds(1); 324 continue; 325 } 326 ExitCode = Job->ExitCode; 327 if (ExitCode == Options.InterruptExitCode) { 328 Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid()); 329 Stop = true; 330 break; 331 } 332 Fuzzer::MaybeExitGracefully(); 333 334 Env.RunOneMergeJob(Job.get()); 335 336 // Continue if our crash is one of the ignorred ones. 337 if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) 338 Env.NumTimeouts++; 339 else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode) 340 Env.NumOOMs++; 341 else if (ExitCode != 0) { 342 Env.NumCrashes++; 343 if (Options.IgnoreCrashes) { 344 std::ifstream In(Job->LogPath); 345 std::string Line; 346 while (std::getline(In, Line, '\n')) 347 if (Line.find("ERROR:") != Line.npos || 348 Line.find("runtime error:") != Line.npos) 349 Printf("%s\n", Line.c_str()); 350 } else { 351 // And exit if we don't ignore this crash. 352 Printf("INFO: log from the inner process:\n%s", 353 FileToString(Job->LogPath).c_str()); 354 Stop = true; 355 } 356 } 357 358 // Stop if we are over the time budget. 359 // This is not precise, since other threads are still running 360 // and we will wait while joining them. 361 // We also don't stop instantly: other jobs need to finish. 362 if (Options.MaxTotalTimeSec > 0 && !Stop && 363 Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) { 364 Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n", 365 Env.secondsSinceProcessStartUp()); 366 Stop = true; 367 } 368 if (!Stop && Env.NumRuns >= Options.MaxNumberOfRuns) { 369 Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n", 370 Env.NumRuns); 371 Stop = true; 372 } 373 374 if (!Stop) 375 FuzzQ.Push(Env.CreateNewJob(JobId++)); 376 } 377 Stop = true; 378 379 for (auto &T : Threads) 380 T.join(); 381 382 // The workers have terminated. Don't try to remove the directory before they 383 // terminate to avoid a race condition preventing cleanup on Windows. 384 RmDirRecursive(Env.TempDir); 385 386 // Use the exit code from the last child process. 387 Printf("INFO: exiting: %d time: %zds\n", ExitCode, 388 Env.secondsSinceProcessStartUp()); 389 exit(ExitCode); 390 } 391 392 } // namespace fuzzer 393