1 /*===- DataFlow.cpp - a standalone DataFlow tracer -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // An experimental data-flow tracer for fuzz targets. 9 // It is based on DFSan and SanitizerCoverage. 10 // https://clang.llvm.org/docs/DataFlowSanitizer.html 11 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow 12 // 13 // It executes the fuzz target on the given input while monitoring the 14 // data flow for every instrumented comparison instruction. 15 // 16 // The output shows which functions depend on which bytes of the input, 17 // and also provides basic-block coverage for every input. 18 // 19 // Build: 20 // 1. Compile this file with -fsanitize=dataflow 21 // 2. Build the fuzz target with -g -fsanitize=dataflow 22 // -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp 23 // 3. Link those together with -fsanitize=dataflow 24 // 25 // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison 26 // instruction, DFSan modifies the calls to pass the data flow labels. 27 // The callbacks update the data flow label for the current function. 28 // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below. 29 // 30 // -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function 31 // entries so that the comparison callback knows that current function. 32 // -fsanitize-coverage=...,bb also allows to collect basic block coverage. 33 // 34 // 35 // Run: 36 // # Collect data flow and coverage for INPUT_FILE 37 // # write to OUTPUT_FILE (default: stdout) 38 // ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE] 39 // 40 // # Print all instrumented functions. llvm-symbolizer must be present in PATH 41 // ./a.out 42 // 43 // Example output: 44 // =============== 45 // F0 11111111111111 46 // F1 10000000000000 47 // C0 1 2 3 4 5 48 // C1 8 49 // =============== 50 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. 51 // The byte string is LEN+1 bytes. The last byte is set if the function 52 // depends on the input length. 53 // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered 54 // in addition to the function's entry block, out of T total instrumented 55 // blocks. 56 // 57 //===----------------------------------------------------------------------===*/ 58 59 #include <assert.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <stdint.h> 63 #include <string.h> 64 65 #include <execinfo.h> // backtrace_symbols_fd 66 67 #include <sanitizer/dfsan_interface.h> 68 69 extern "C" { 70 extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); 71 __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); 72 } // extern "C" 73 74 static size_t InputLen; 75 static size_t InputLabelBeg; 76 static size_t InputLabelEnd; 77 static size_t InputSizeLabel; 78 static size_t NumFuncs, NumGuards; 79 static uint32_t *GuardsBeg, *GuardsEnd; 80 static const uintptr_t *PCsBeg, *PCsEnd; 81 static __thread size_t CurrentFunc; 82 static dfsan_label *FuncLabels; // Array of NumFuncs elements. 83 static bool *BBExecuted; // Array of NumGuards elements. 84 static char *PrintableStringForLabel; // InputLen + 2 bytes. 85 static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; 86 87 enum { 88 PCFLAG_FUNC_ENTRY = 1, 89 }; 90 91 static inline bool BlockIsEntry(size_t BlockIdx) { 92 return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY; 93 } 94 95 // Prints all instrumented functions. 96 static int PrintFunctions() { 97 // We don't have the symbolizer integrated with dfsan yet. 98 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer. 99 // TODO(kcc): this is pretty ugly and may break in lots of ways. 100 // We'll need to make a proper in-process symbolizer work with DFSan. 101 FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' " 102 "| llvm-symbolizer " 103 "| grep 'dfs\\$' " 104 "| sed 's/dfs\\$//g'", "w"); 105 for (size_t I = 0; I < NumGuards; I++) { 106 uintptr_t PC = PCsBeg[I * 2]; 107 if (!BlockIsEntry(I)) continue; 108 void *const Buf[1] = {(void*)PC}; 109 backtrace_symbols_fd(Buf, 1, fileno(Pipe)); 110 } 111 pclose(Pipe); 112 return 0; 113 } 114 115 extern "C" 116 void SetBytesForLabel(dfsan_label L, char *Bytes) { 117 if (LabelSeen[L]) 118 return; 119 LabelSeen[L] = true; 120 assert(L); 121 if (L < InputSizeLabel) { 122 Bytes[L + InputLabelBeg - 1] = '1'; 123 } else if (L == InputSizeLabel) { 124 Bytes[InputLen] = '1'; 125 } else { 126 auto *DLI = dfsan_get_label_info(L); 127 SetBytesForLabel(DLI->l1, Bytes); 128 SetBytesForLabel(DLI->l2, Bytes); 129 } 130 } 131 132 static char *GetPrintableStringForLabel(dfsan_label L) { 133 memset(PrintableStringForLabel, '0', InputLen + 1); 134 PrintableStringForLabel[InputLen + 1] = 0; 135 memset(LabelSeen, 0, sizeof(LabelSeen)); 136 SetBytesForLabel(L, PrintableStringForLabel); 137 return PrintableStringForLabel; 138 } 139 140 static void PrintDataFlow(FILE *Out) { 141 for (size_t I = 0; I < NumFuncs; I++) 142 if (FuncLabels[I]) 143 fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I])); 144 } 145 146 static void PrintCoverage(FILE *Out) { 147 ssize_t CurrentFuncGuard = -1; 148 ssize_t CurrentFuncNum = -1; 149 ssize_t NumBlocksInCurrentFunc = -1; 150 for (size_t FuncBeg = 0; FuncBeg < NumGuards;) { 151 CurrentFuncNum++; 152 assert(BlockIsEntry(FuncBeg)); 153 size_t FuncEnd = FuncBeg + 1; 154 for (; FuncEnd < NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++) 155 ; 156 if (BBExecuted[FuncBeg]) { 157 fprintf(Out, "C%zd", CurrentFuncNum); 158 for (size_t I = FuncBeg + 1; I < FuncEnd; I++) 159 if (BBExecuted[I]) 160 fprintf(Out, " %zd", I - FuncBeg); 161 fprintf(Out, " %zd\n", FuncEnd - FuncBeg); 162 } 163 FuncBeg = FuncEnd; 164 } 165 } 166 167 int main(int argc, char **argv) { 168 if (LLVMFuzzerInitialize) 169 LLVMFuzzerInitialize(&argc, &argv); 170 if (argc == 1) 171 return PrintFunctions(); 172 assert(argc == 4 || argc == 5); 173 InputLabelBeg = atoi(argv[1]); 174 InputLabelEnd = atoi(argv[2]); 175 assert(InputLabelBeg < InputLabelEnd); 176 177 const char *Input = argv[3]; 178 fprintf(stderr, "INFO: reading '%s'\n", Input); 179 FILE *In = fopen(Input, "r"); 180 assert(In); 181 fseek(In, 0, SEEK_END); 182 InputLen = ftell(In); 183 fseek(In, 0, SEEK_SET); 184 unsigned char *Buf = (unsigned char*)malloc(InputLen); 185 size_t NumBytesRead = fread(Buf, 1, InputLen, In); 186 assert(NumBytesRead == InputLen); 187 PrintableStringForLabel = (char*)malloc(InputLen + 2); 188 fclose(In); 189 190 fprintf(stderr, "INFO: running '%s'\n", Input); 191 for (size_t I = 1; I <= InputLen; I++) { 192 size_t Idx = I - 1; 193 if (Idx >= InputLabelBeg && Idx < InputLabelEnd) { 194 dfsan_label L = dfsan_create_label("", nullptr); 195 assert(L == I - InputLabelBeg); 196 dfsan_set_label(L, Buf + Idx, 1); 197 } 198 } 199 dfsan_label SizeL = dfsan_create_label("", nullptr); 200 InputSizeLabel = SizeL; 201 assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1); 202 dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); 203 204 LLVMFuzzerTestOneInput(Buf, InputLen); 205 free(Buf); 206 207 bool OutIsStdout = argc == 4; 208 fprintf(stderr, "INFO: writing dataflow to %s\n", 209 OutIsStdout ? "<stdout>" : argv[4]); 210 FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w"); 211 PrintDataFlow(Out); 212 PrintCoverage(Out); 213 if (!OutIsStdout) fclose(Out); 214 } 215 216 extern "C" { 217 218 void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, 219 uint32_t *stop) { 220 assert(NumFuncs == 0 && "This tool does not support DSOs"); 221 assert(start < stop && "The code is not instrumented for coverage"); 222 if (start == stop || *start) return; // Initialize only once. 223 GuardsBeg = start; 224 GuardsEnd = stop; 225 } 226 227 void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, 228 const uintptr_t *pcs_end) { 229 if (NumGuards) return; // Initialize only once. 230 NumGuards = GuardsEnd - GuardsBeg; 231 PCsBeg = pcs_beg; 232 PCsEnd = pcs_end; 233 assert(NumGuards == (PCsEnd - PCsBeg) / 2); 234 for (size_t i = 0; i < NumGuards; i++) { 235 if (BlockIsEntry(i)) { 236 NumFuncs++; 237 GuardsBeg[i] = NumFuncs; 238 } 239 } 240 FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); 241 BBExecuted = (bool*)calloc(NumGuards, sizeof(bool)); 242 fprintf(stderr, "INFO: %zd instrumented function(s) observed " 243 "and %zd basic blocks\n", NumFuncs, NumGuards); 244 } 245 246 void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused. 247 248 void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { 249 size_t GuardIdx = guard - GuardsBeg; 250 assert(GuardIdx < NumGuards); 251 BBExecuted[GuardIdx] = true; 252 if (!*guard) return; // not a function entry. 253 uint32_t FuncNum = *guard - 1; // Guards start from 1. 254 assert(FuncNum < NumFuncs); 255 CurrentFunc = FuncNum; 256 } 257 258 void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, 259 dfsan_label L1, dfsan_label UnusedL) { 260 assert(CurrentFunc < NumFuncs); 261 FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); 262 } 263 264 #define HOOK(Name, Type) \ 265 void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ 266 assert(CurrentFunc < NumFuncs); \ 267 FuncLabels[CurrentFunc] = \ 268 dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ 269 } 270 271 HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) 272 HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t) 273 HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t) 274 HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t) 275 HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t) 276 HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t) 277 HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t) 278 HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t) 279 280 } // extern "C" 281