1 /*===- DataFlow.cpp - a standalone DataFlow tracer -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // An experimental data-flow tracer for fuzz targets. 9 // It is based on DFSan and SanitizerCoverage. 10 // https://clang.llvm.org/docs/DataFlowSanitizer.html 11 // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow 12 // 13 // It executes the fuzz target on the given input while monitoring the 14 // data flow for every instrumented comparison instruction. 15 // 16 // The output shows which functions depend on which bytes of the input. 17 // 18 // Build: 19 // 1. Compile this file with -fsanitize=dataflow 20 // 2. Build the fuzz target with -g -fsanitize=dataflow 21 // -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp 22 // 3. Link those together with -fsanitize=dataflow 23 // 24 // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison 25 // instruction, DFSan modifies the calls to pass the data flow labels. 26 // The callbacks update the data flow label for the current function. 27 // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below. 28 // 29 // -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function 30 // entries so that the comparison callback knows that current function. 31 // 32 // 33 // Run: 34 // # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout) 35 // ./a.out INPUT_FILE [OUTPUT_FILE] 36 // 37 // # Print all instrumented functions. llvm-symbolizer must be present in PATH 38 // ./a.out 39 // 40 // Example output: 41 // =============== 42 // F0 11111111111111 43 // F1 10000000000000 44 // =============== 45 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. 46 // The byte string is LEN+1 bytes. The last byte is set if the function 47 // depends on the input length. 48 //===----------------------------------------------------------------------===*/ 49 50 #include <assert.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <stdint.h> 54 #include <string.h> 55 56 #include <execinfo.h> // backtrace_symbols_fd 57 58 #include <sanitizer/dfsan_interface.h> 59 60 extern "C" { 61 extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); 62 __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); 63 } // extern "C" 64 65 static size_t InputLen; 66 static size_t InputLabelBeg; 67 static size_t InputLabelEnd; 68 static size_t InputSizeLabel; 69 static size_t NumFuncs; 70 static const uintptr_t *FuncsBeg; 71 static __thread size_t CurrentFunc; 72 static dfsan_label *FuncLabels; // Array of NumFuncs elements. 73 static char *PrintableStringForLabel; // InputLen + 2 bytes. 74 static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; 75 76 // Prints all instrumented functions. 77 static int PrintFunctions() { 78 // We don't have the symbolizer integrated with dfsan yet. 79 // So use backtrace_symbols_fd and pipe it through llvm-symbolizer. 80 // TODO(kcc): this is pretty ugly and may break in lots of ways. 81 // We'll need to make a proper in-process symbolizer work with DFSan. 82 FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' " 83 "| llvm-symbolizer " 84 "| grep 'dfs\\$' " 85 "| sed 's/dfs\\$//g'", "w"); 86 for (size_t I = 0; I < NumFuncs; I++) { 87 uintptr_t PC = FuncsBeg[I * 2]; 88 void *const Buf[1] = {(void*)PC}; 89 backtrace_symbols_fd(Buf, 1, fileno(Pipe)); 90 } 91 pclose(Pipe); 92 return 0; 93 } 94 95 extern "C" 96 void SetBytesForLabel(dfsan_label L, char *Bytes) { 97 if (LabelSeen[L]) 98 return; 99 LabelSeen[L] = true; 100 assert(L); 101 if (L < InputSizeLabel) { 102 Bytes[L + InputLabelBeg - 1] = '1'; 103 } else if (L == InputSizeLabel) { 104 Bytes[InputLen] = '1'; 105 } else { 106 auto *DLI = dfsan_get_label_info(L); 107 SetBytesForLabel(DLI->l1, Bytes); 108 SetBytesForLabel(DLI->l2, Bytes); 109 } 110 } 111 112 static char *GetPrintableStringForLabel(dfsan_label L) { 113 memset(PrintableStringForLabel, '0', InputLen + 1); 114 PrintableStringForLabel[InputLen + 1] = 0; 115 memset(LabelSeen, 0, sizeof(LabelSeen)); 116 SetBytesForLabel(L, PrintableStringForLabel); 117 return PrintableStringForLabel; 118 } 119 120 static void PrintDataFlow(FILE *Out) { 121 for (size_t I = 0; I < NumFuncs; I++) 122 if (FuncLabels[I]) 123 fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I])); 124 } 125 126 int main(int argc, char **argv) { 127 if (LLVMFuzzerInitialize) 128 LLVMFuzzerInitialize(&argc, &argv); 129 if (argc == 1) 130 return PrintFunctions(); 131 assert(argc == 4 || argc == 5); 132 InputLabelBeg = atoi(argv[1]); 133 InputLabelEnd = atoi(argv[2]); 134 assert(InputLabelBeg < InputLabelEnd); 135 136 const char *Input = argv[3]; 137 fprintf(stderr, "INFO: reading '%s'\n", Input); 138 FILE *In = fopen(Input, "r"); 139 assert(In); 140 fseek(In, 0, SEEK_END); 141 InputLen = ftell(In); 142 fseek(In, 0, SEEK_SET); 143 unsigned char *Buf = (unsigned char*)malloc(InputLen); 144 size_t NumBytesRead = fread(Buf, 1, InputLen, In); 145 assert(NumBytesRead == InputLen); 146 PrintableStringForLabel = (char*)malloc(InputLen + 2); 147 fclose(In); 148 149 fprintf(stderr, "INFO: running '%s'\n", Input); 150 for (size_t I = 1; I <= InputLen; I++) { 151 size_t Idx = I - 1; 152 if (Idx >= InputLabelBeg && Idx < InputLabelEnd) { 153 dfsan_label L = dfsan_create_label("", nullptr); 154 assert(L == I - InputLabelBeg); 155 dfsan_set_label(L, Buf + Idx, 1); 156 } 157 } 158 dfsan_label SizeL = dfsan_create_label("", nullptr); 159 InputSizeLabel = SizeL; 160 assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1); 161 dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); 162 163 LLVMFuzzerTestOneInput(Buf, InputLen); 164 free(Buf); 165 166 bool OutIsStdout = argc == 4; 167 fprintf(stderr, "INFO: writing dataflow to %s\n", 168 OutIsStdout ? "<stdout>" : argv[4]); 169 FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w"); 170 PrintDataFlow(Out); 171 if (!OutIsStdout) fclose(Out); 172 } 173 174 extern "C" { 175 176 void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, 177 uint32_t *stop) { 178 assert(NumFuncs == 0 && "This tool does not support DSOs"); 179 assert(start < stop && "The code is not instrumented for coverage"); 180 if (start == stop || *start) return; // Initialize only once. 181 for (uint32_t *x = start; x < stop; x++) 182 *x = ++NumFuncs; // The first index is 1. 183 FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label)); 184 fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs); 185 } 186 187 void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, 188 const uintptr_t *pcs_end) { 189 assert(NumFuncs == (pcs_end - pcs_beg) / 2); 190 FuncsBeg = pcs_beg; 191 } 192 193 void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused. 194 195 void __sanitizer_cov_trace_pc_guard(uint32_t *guard){ 196 uint32_t FuncNum = *guard - 1; // Guards start from 1. 197 assert(FuncNum < NumFuncs); 198 CurrentFunc = FuncNum; 199 } 200 201 void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, 202 dfsan_label L1, dfsan_label UnusedL) { 203 assert(CurrentFunc < NumFuncs); 204 FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1); 205 } 206 207 #define HOOK(Name, Type) \ 208 void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \ 209 assert(CurrentFunc < NumFuncs); \ 210 FuncLabels[CurrentFunc] = \ 211 dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \ 212 } 213 214 HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t) 215 HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t) 216 HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t) 217 HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t) 218 HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t) 219 HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t) 220 HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t) 221 HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t) 222 223 } // extern "C" 224