1 /*===- DataFlow.cpp - a standalone DataFlow tracer -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // An experimental data-flow tracer for fuzz targets. 9 // It is based on DFSan and SanitizerCoverage. 10 // https://clang.llvm.org/docs/DataFlowSanitizer.html 11 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow 12 // 13 // It executes the fuzz target on the given input while monitoring the 14 // data flow for every instrumented comparison instruction. 15 // 16 // The output shows which functions depend on which bytes of the input, 17 // and also provides basic-block coverage for every input. 18 // 19 // Build: 20 // 1. Compile this file with -fsanitize=dataflow 21 // 2. Build the fuzz target with -g -fsanitize=dataflow 22 // -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp 23 // 3. Link those together with -fsanitize=dataflow 24 // 25 // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison 26 // instruction, DFSan modifies the calls to pass the data flow labels. 27 // The callbacks update the data flow label for the current function. 28 // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below. 29 // 30 // -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function 31 // entries so that the comparison callback knows that current function. 32 // -fsanitize-coverage=...,bb also allows to collect basic block coverage. 33 // 34 // 35 // Run: 36 // # Collect data flow and coverage for INPUT_FILE 37 // # write to OUTPUT_FILE (default: stdout) 38 // ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE] 39 // 40 // # Print all instrumented functions. llvm-symbolizer must be present in PATH 41 // ./a.out 42 // 43 // Example output: 44 // =============== 45 // F0 11111111111111 46 // F1 10000000000000 47 // C0 1 2 3 4 48 // C1 49 // =============== 50 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. 51 // The byte string is LEN+1 bytes. The last byte is set if the function 52 // depends on the input length. 53 // "CN X Y Z": tells that a function N has basic blocks X, Y, and Z covered 54 // in addition to the function's entry block. 55 // 56 //===----------------------------------------------------------------------===*/ 57 58 #include <assert.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <stdint.h> 62 #include <string.h> 63 64 #include <execinfo.h> // backtrace_symbols_fd 65 66 #include <sanitizer/dfsan_interface.h> 67 68 extern "C" { 69 extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); 70 __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); 71 } // extern "C" 72 73 static size_t InputLen; 74 static size_t InputLabelBeg; 75 static size_t InputLabelEnd; 76 static size_t InputSizeLabel; 77 static size_t NumFuncs, NumGuards; 78 static uint32_t *GuardsBeg, *GuardsEnd; 79 static const uintptr_t *PCsBeg, *PCsEnd; 80 static __thread size_t CurrentFunc; 81 static dfsan_label *FuncLabels; // Array of NumFuncs elements. 82 static bool *BBExecuted; // Array of NumGuards elements. 83 static char *PrintableStringForLabel; // InputLen + 2 bytes. 84 static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; 85 86 enum { 87 PCFLAG_FUNC_ENTRY = 1, 88 }; 89 90 // Prints all instrumented functions. 91 static int PrintFunctions() { 92 // We don't have the symbolizer integrated with dfsan yet. 93 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer. 94 // TODO(kcc): this is pretty ugly and may break in lots of ways. 95 // We'll need to make a proper in-process symbolizer work with DFSan. 96 FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' " 97 "| llvm-symbolizer " 98 "| grep 'dfs\\$' " 99 "| sed 's/dfs\\$//g'", "w"); 100 for (size_t I = 0; I < NumGuards; I++) { 101 uintptr_t PC = PCsBeg[I * 2]; 102 uintptr_t PCFlags = PCsBeg[I * 2 + 1]; 103 if (!(PCFlags & PCFLAG_FUNC_ENTRY)) continue; 104 void *const Buf[1] = {(void*)PC}; 105 backtrace_symbols_fd(Buf, 1, fileno(Pipe)); 106 } 107 pclose(Pipe); 108 return 0; 109 } 110 111 extern "C" 112 void SetBytesForLabel(dfsan_label L, char *Bytes) { 113 if (LabelSeen[L]) 114 return; 115 LabelSeen[L] = true; 116 assert(L); 117 if (L < InputSizeLabel) { 118 Bytes[L + InputLabelBeg - 1] = '1'; 119 } else if (L == InputSizeLabel) { 120 Bytes[InputLen] = '1'; 121 } else { 122 auto *DLI = dfsan_get_label_info(L); 123 SetBytesForLabel(DLI->l1, Bytes); 124 SetBytesForLabel(DLI->l2, Bytes); 125 } 126 } 127 128 static char *GetPrintableStringForLabel(dfsan_label L) { 129 memset(PrintableStringForLabel, '0', InputLen + 1); 130 PrintableStringForLabel[InputLen + 1] = 0; 131 memset(LabelSeen, 0, sizeof(LabelSeen)); 132 SetBytesForLabel(L, PrintableStringForLabel); 133 return PrintableStringForLabel; 134 } 135 136 static void PrintDataFlow(FILE *Out) { 137 for (size_t I = 0; I < NumFuncs; I++) 138 if (FuncLabels[I]) 139 fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I])); 140 } 141 142 static void PrintCoverage(FILE *Out) { 143 ssize_t CurrentFuncGuard = -1; 144 ssize_t CurrentFuncNum = -1; 145 int NumFuncsCovered = 0; 146 for (size_t I = 0; I < NumGuards; I++) { 147 bool IsEntry = PCsBeg[I * 2 + 1] & PCFLAG_FUNC_ENTRY; 148 if (IsEntry) { 149 CurrentFuncNum++; 150 CurrentFuncGuard = I; 151 } 152 if (!BBExecuted[I]) continue; 153 if (IsEntry) { 154 if (NumFuncsCovered) fprintf(Out, "\n"); 155 fprintf(Out, "C%zd ", CurrentFuncNum); 156 NumFuncsCovered++; 157 } else { 158 fprintf(Out, "%zd ", I - CurrentFuncGuard); 159 } 160 } 161 fprintf(Out, "\n"); 162 } 163 164 int main(int argc, char **argv) { 165 if (LLVMFuzzerInitialize) 166 LLVMFuzzerInitialize(&argc, &argv); 167 if (argc == 1) 168 return PrintFunctions(); 169 assert(argc == 4 || argc == 5); 170 InputLabelBeg = atoi(argv[1]); 171 InputLabelEnd = atoi(argv[2]); 172 assert(InputLabelBeg < InputLabelEnd); 173 174 const char *Input = argv[3]; 175 fprintf(stderr, "INFO: reading '%s'\n", Input); 176 FILE *In = fopen(Input, "r"); 177 assert(In); 178 fseek(In, 0, SEEK_END); 179 InputLen = ftell(In); 180 fseek(In, 0, SEEK_SET); 181 unsigned char *Buf = (unsigned char*)malloc(InputLen); 182 size_t NumBytesRead = fread(Buf, 1, InputLen, In); 183 assert(NumBytesRead == InputLen); 184 PrintableStringForLabel = (char*)malloc(InputLen + 2); 185 fclose(In); 186 187 fprintf(stderr, "INFO: running '%s'\n", Input); 188 for (size_t I = 1; I <= InputLen; I++) { 189 size_t Idx = I - 1; 190 if (Idx >= InputLabelBeg && Idx < InputLabelEnd) { 191 dfsan_label L = dfsan_create_label("", nullptr); 192 assert(L == I - InputLabelBeg); 193 dfsan_set_label(L, Buf + Idx, 1); 194 } 195 } 196 dfsan_label SizeL = dfsan_create_label("", nullptr); 197 InputSizeLabel = SizeL; 198 assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1); 199 dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); 200 201 LLVMFuzzerTestOneInput(Buf, InputLen); 202 free(Buf); 203 204 bool OutIsStdout = argc == 4; 205 fprintf(stderr, "INFO: writing dataflow to %s\n", 206 OutIsStdout ? "<stdout>" : argv[4]); 207 FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w"); 208 PrintDataFlow(Out); 209 PrintCoverage(Out); 210 if (!OutIsStdout) fclose(Out); 211 } 212 213 extern "C" { 214 215 void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, 216 uint32_t *stop) { 217 assert(NumFuncs == 0 && "This tool does not support DSOs"); 218 assert(start < stop && "The code is not instrumented for coverage"); 219 if (start == stop || *start) return; // Initialize only once. 220 GuardsBeg = start; 221 GuardsEnd = stop; 222 } 223 224 void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, 225 const uintptr_t *pcs_end) { 226 if (NumGuards) return; // Initialize only once. 227 NumGuards = GuardsEnd - GuardsBeg; 228 PCsBeg = pcs_beg; 229 PCsEnd = pcs_end; 230 assert(NumGuards == (PCsEnd - PCsBeg) / 2); 231 for (size_t i = 0; i < NumGuards; i++) { 232 if (PCsBeg[i * 2 + 1] & PCFLAG_FUNC_ENTRY) { 233 NumFuncs++; 234 GuardsBeg[i] = NumFuncs; 235 } 236 } 237 FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); 238 BBExecuted = (bool*)calloc(NumGuards, sizeof(bool)); 239 fprintf(stderr, "INFO: %zd instrumented function(s) observed " 240 "and %zd basic blocks\n", NumFuncs, NumGuards); 241 } 242 243 void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused. 244 245 void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { 246 size_t GuardIdx = guard - GuardsBeg; 247 assert(GuardIdx < NumGuards); 248 BBExecuted[GuardIdx] = true; 249 if (!*guard) return; // not a function entry. 250 uint32_t FuncNum = *guard - 1; // Guards start from 1. 251 assert(FuncNum < NumFuncs); 252 CurrentFunc = FuncNum; 253 } 254 255 void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, 256 dfsan_label L1, dfsan_label UnusedL) { 257 assert(CurrentFunc < NumFuncs); 258 FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); 259 } 260 261 #define HOOK(Name, Type) \ 262 void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ 263 assert(CurrentFunc < NumFuncs); \ 264 FuncLabels[CurrentFunc] = \ 265 dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ 266 } 267 268 HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) 269 HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t) 270 HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t) 271 HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t) 272 HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t) 273 HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t) 274 HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t) 275 HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t) 276 277 } // extern "C" 278