1 /*===- DataFlow.cpp - a standalone DataFlow tracer -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // An experimental data-flow tracer for fuzz targets. 10 // It is based on DFSan and SanitizerCoverage. 11 // https://clang.llvm.org/docs/DataFlowSanitizer.html 12 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow 13 // 14 // It executes the fuzz target on the given input while monitoring the 15 // data flow for every instrumented comparison instruction. 16 // 17 // The output shows which functions depend on which bytes of the input. 18 // 19 // Build: 20 // 1. Compile this file with -fsanitize=dataflow 21 // 2. Build the fuzz target with -g -fsanitize=dataflow 22 // -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp 23 // 3. Link those together with -fsanitize=dataflow 24 // 25 // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison 26 // instruction, DFSan modifies the calls to pass the data flow labels. 27 // The callbacks update the data flow label for the current function. 28 // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below. 29 // 30 // -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function 31 // entries so that the comparison callback knows that current function. 32 // 33 // 34 // Run: 35 // # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout) 36 // ./a.out INPUT_FILE [OUTPUT_FILE] 37 // 38 // # Print all instrumented functions. llvm-symbolizer must be present in PATH 39 // ./a.out 40 // 41 // Example output: 42 // =============== 43 // F0 11111111111111 44 // F1 10000000000000 45 // =============== 46 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. 47 // The byte string is LEN+1 bytes. The last byte is set if the function 48 // depends on the input length. 49 //===----------------------------------------------------------------------===*/ 50 51 #include <assert.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <stdint.h> 55 #include <string.h> 56 57 #include <execinfo.h> // backtrace_symbols_fd 58 59 #include <sanitizer/dfsan_interface.h> 60 61 extern "C" { 62 extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); 63 __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); 64 } // extern "C" 65 66 static size_t InputLen; 67 static size_t NumFuncs; 68 static const uintptr_t *FuncsBeg; 69 static __thread size_t CurrentFunc; 70 static dfsan_label *FuncLabels; // Array of NumFuncs elements. 71 static char *PrintableStringForLabel; // InputLen + 2 bytes. 72 73 // Prints all instrumented functions. 74 static int PrintFunctions() { 75 // We don't have the symbolizer integrated with dfsan yet. 76 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer. 77 // TODO(kcc): this is pretty ugly and may break in lots of ways. 78 // We'll need to make a proper in-process symbolizer work with DFSan. 79 FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' " 80 "| llvm-symbolizer " 81 "| grep 'dfs\\$' " 82 "| sed 's/dfs\\$//g'", "w"); 83 for (size_t I = 0; I < NumFuncs; I++) { 84 uintptr_t PC = FuncsBeg[I * 2]; 85 void *const Buf[1] = {(void*)PC}; 86 backtrace_symbols_fd(Buf, 1, fileno(Pipe)); 87 } 88 pclose(Pipe); 89 return 0; 90 } 91 92 static void SetBytesForLabel(dfsan_label L, char *Bytes) { 93 assert(L); 94 if (L <= InputLen + 1) { 95 Bytes[L - 1] = '1'; 96 } else { 97 auto *DLI = dfsan_get_label_info(L); 98 SetBytesForLabel(DLI->l1, Bytes); 99 SetBytesForLabel(DLI->l2, Bytes); 100 } 101 } 102 103 static char *GetPrintableStringForLabel(dfsan_label L) { 104 memset(PrintableStringForLabel, '0', InputLen + 1); 105 PrintableStringForLabel[InputLen + 1] = 0; 106 SetBytesForLabel(L, PrintableStringForLabel); 107 return PrintableStringForLabel; 108 } 109 110 static void PrintDataFlow(FILE *Out) { 111 for (size_t I = 0; I < NumFuncs; I++) 112 if (FuncLabels[I]) 113 fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I])); 114 } 115 116 int main(int argc, char **argv) { 117 if (LLVMFuzzerInitialize) 118 LLVMFuzzerInitialize(&argc, &argv); 119 if (argc == 1) 120 return PrintFunctions(); 121 assert(argc == 4 || argc == 5); 122 size_t Beg = atoi(argv[1]); 123 size_t End = atoi(argv[2]); 124 assert(Beg < End); 125 126 const char *Input = argv[3]; 127 fprintf(stderr, "INFO: reading '%s'\n", Input); 128 FILE *In = fopen(Input, "r"); 129 assert(In); 130 fseek(In, 0, SEEK_END); 131 InputLen = ftell(In); 132 fseek(In, 0, SEEK_SET); 133 unsigned char *Buf = (unsigned char*)malloc(InputLen); 134 size_t NumBytesRead = fread(Buf, 1, InputLen, In); 135 assert(NumBytesRead == InputLen); 136 PrintableStringForLabel = (char*)malloc(InputLen + 2); 137 fclose(In); 138 139 fprintf(stderr, "INFO: running '%s'\n", Input); 140 for (size_t I = 1; I <= InputLen; I++) { 141 dfsan_label L = dfsan_create_label("", nullptr); 142 assert(L == I); 143 size_t Idx = I - 1; 144 if (Idx >= Beg && Idx < End) 145 dfsan_set_label(L, Buf + Idx, 1); 146 } 147 dfsan_label SizeL = dfsan_create_label("", nullptr); 148 assert(SizeL == InputLen + 1); 149 dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); 150 151 LLVMFuzzerTestOneInput(Buf, InputLen); 152 free(Buf); 153 154 bool OutIsStdout = argc == 4; 155 fprintf(stderr, "INFO: writing dataflow to %s\n", 156 OutIsStdout ? "<stdout>" : argv[4]); 157 FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w"); 158 PrintDataFlow(Out); 159 if (!OutIsStdout) fclose(Out); 160 } 161 162 extern "C" { 163 164 void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, 165 uint32_t *stop) { 166 assert(NumFuncs == 0 && "This tool does not support DSOs"); 167 assert(start < stop && "The code is not instrumented for coverage"); 168 if (start == stop || *start) return; // Initialize only once. 169 for (uint32_t *x = start; x < stop; x++) 170 *x = ++NumFuncs; // The first index is 1. 171 FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); 172 fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs); 173 } 174 175 void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, 176 const uintptr_t *pcs_end) { 177 assert(NumFuncs == (pcs_end - pcs_beg) / 2); 178 FuncsBeg = pcs_beg; 179 } 180 181 void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused. 182 183 void __sanitizer_cov_trace_pc_guard(uint32_t *guard){ 184 uint32_t FuncNum = *guard - 1; // Guards start from 1. 185 assert(FuncNum < NumFuncs); 186 CurrentFunc = FuncNum; 187 } 188 189 void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, 190 dfsan_label L1, dfsan_label UnusedL) { 191 assert(CurrentFunc < NumFuncs); 192 FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); 193 } 194 195 #define HOOK(Name, Type) \ 196 void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ 197 assert(CurrentFunc < NumFuncs); \ 198 FuncLabels[CurrentFunc] = \ 199 dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ 200 } 201 202 HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) 203 HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t) 204 HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t) 205 HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t) 206 HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t) 207 HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t) 208 HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t) 209 HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t) 210 211 } // extern "C" 212