1 //===-- dfsan.cpp ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of DataFlowSanitizer.
10 //
11 // DataFlowSanitizer runtime.  This file defines the public interface to
12 // DataFlowSanitizer as well as the definition of certain runtime functions
13 // called automatically by the compiler (specifically the instrumentation pass
14 // in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
15 //
16 // The public interface is defined in include/sanitizer/dfsan_interface.h whose
17 // functions are prefixed dfsan_ while the compiler interface functions are
18 // prefixed __dfsan_.
19 //===----------------------------------------------------------------------===//
20 
21 #include "dfsan/dfsan.h"
22 
23 #include "sanitizer_common/sanitizer_atomic.h"
24 #include "sanitizer_common/sanitizer_common.h"
25 #include "sanitizer_common/sanitizer_file.h"
26 #include "sanitizer_common/sanitizer_flag_parser.h"
27 #include "sanitizer_common/sanitizer_flags.h"
28 #include "sanitizer_common/sanitizer_internal_defs.h"
29 #include "sanitizer_common/sanitizer_libc.h"
30 #include "sanitizer_common/sanitizer_stacktrace.h"
31 
32 using namespace __dfsan;
33 
34 typedef atomic_uint16_t atomic_dfsan_label;
35 static const dfsan_label kInitializingLabel = -1;
36 
37 static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8);
38 
39 static atomic_dfsan_label __dfsan_last_label;
40 static dfsan_label_info __dfsan_label_info[kNumLabels];
41 
42 Flags __dfsan::flags_data;
43 
44 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_retval_tls;
45 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_arg_tls[64];
46 
47 SANITIZER_INTERFACE_ATTRIBUTE uptr __dfsan_shadow_ptr_mask;
48 
49 // On Linux/x86_64, memory is laid out as follows:
50 //
51 // +--------------------+ 0x800000000000 (top of memory)
52 // | application memory |
53 // +--------------------+ 0x700000008000 (kAppAddr)
54 // |                    |
55 // |       unused       |
56 // |                    |
57 // +--------------------+ 0x200200000000 (kUnusedAddr)
58 // |    union table     |
59 // +--------------------+ 0x200000000000 (kUnionTableAddr)
60 // |   shadow memory    |
61 // +--------------------+ 0x000000010000 (kShadowAddr)
62 // | reserved by kernel |
63 // +--------------------+ 0x000000000000
64 //
65 // To derive a shadow memory address from an application memory address,
66 // bits 44-46 are cleared to bring the address into the range
67 // [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
68 // account for the double byte representation of shadow labels and move the
69 // address into the shadow memory range.  See the function shadow_for below.
70 
71 // On Linux/MIPS64, memory is laid out as follows:
72 //
73 // +--------------------+ 0x10000000000 (top of memory)
74 // | application memory |
75 // +--------------------+ 0xF000008000 (kAppAddr)
76 // |                    |
77 // |       unused       |
78 // |                    |
79 // +--------------------+ 0x2200000000 (kUnusedAddr)
80 // |    union table     |
81 // +--------------------+ 0x2000000000 (kUnionTableAddr)
82 // |   shadow memory    |
83 // +--------------------+ 0x0000010000 (kShadowAddr)
84 // | reserved by kernel |
85 // +--------------------+ 0x0000000000
86 
87 // On Linux/AArch64 (39-bit VMA), memory is laid out as follow:
88 //
89 // +--------------------+ 0x8000000000 (top of memory)
90 // | application memory |
91 // +--------------------+ 0x7000008000 (kAppAddr)
92 // |                    |
93 // |       unused       |
94 // |                    |
95 // +--------------------+ 0x1200000000 (kUnusedAddr)
96 // |    union table     |
97 // +--------------------+ 0x1000000000 (kUnionTableAddr)
98 // |   shadow memory    |
99 // +--------------------+ 0x0000010000 (kShadowAddr)
100 // | reserved by kernel |
101 // +--------------------+ 0x0000000000
102 
103 // On Linux/AArch64 (42-bit VMA), memory is laid out as follow:
104 //
105 // +--------------------+ 0x40000000000 (top of memory)
106 // | application memory |
107 // +--------------------+ 0x3ff00008000 (kAppAddr)
108 // |                    |
109 // |       unused       |
110 // |                    |
111 // +--------------------+ 0x1200000000 (kUnusedAddr)
112 // |    union table     |
113 // +--------------------+ 0x8000000000 (kUnionTableAddr)
114 // |   shadow memory    |
115 // +--------------------+ 0x0000010000 (kShadowAddr)
116 // | reserved by kernel |
117 // +--------------------+ 0x0000000000
118 
119 // On Linux/AArch64 (48-bit VMA), memory is laid out as follow:
120 //
121 // +--------------------+ 0x1000000000000 (top of memory)
122 // | application memory |
123 // +--------------------+ 0xffff00008000 (kAppAddr)
124 // |       unused       |
125 // +--------------------+ 0xaaaab0000000 (top of PIE address)
126 // | application PIE    |
127 // +--------------------+ 0xaaaaa0000000 (top of PIE address)
128 // |                    |
129 // |       unused       |
130 // |                    |
131 // +--------------------+ 0x1200000000 (kUnusedAddr)
132 // |    union table     |
133 // +--------------------+ 0x8000000000 (kUnionTableAddr)
134 // |   shadow memory    |
135 // +--------------------+ 0x0000010000 (kShadowAddr)
136 // | reserved by kernel |
137 // +--------------------+ 0x0000000000
138 
139 typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
140 
141 #ifdef DFSAN_RUNTIME_VMA
142 // Runtime detected VMA size.
143 int __dfsan::vmaSize;
144 #endif
145 
146 static uptr UnusedAddr() {
147   return UnionTableAddr() + sizeof(dfsan_union_table_t);
148 }
149 
150 static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) {
151   return &(*(dfsan_union_table_t *) UnionTableAddr())[l1][l2];
152 }
153 
154 // Checks we do not run out of labels.
155 static void dfsan_check_label(dfsan_label label) {
156   if (label == kInitializingLabel) {
157     Report("FATAL: DataFlowSanitizer: out of labels\n");
158     Die();
159   }
160 }
161 
162 // Resolves the union of two unequal labels.  Nonequality is a precondition for
163 // this function (the instrumentation pass inlines the equality test).
164 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
165 dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) {
166   DCHECK_NE(l1, l2);
167 
168   if (l1 == 0)
169     return l2;
170   if (l2 == 0)
171     return l1;
172 
173   // If no labels have been created, yet l1 and l2 are non-zero, we are using
174   // fast16labels mode.
175   if (atomic_load(&__dfsan_last_label, memory_order_relaxed) == 0)
176     return l1 | l2;
177 
178   if (l1 > l2)
179     Swap(l1, l2);
180 
181   atomic_dfsan_label *table_ent = union_table(l1, l2);
182   // We need to deal with the case where two threads concurrently request
183   // a union of the same pair of labels.  If the table entry is uninitialized,
184   // (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel
185   // (i.e. -1) to mark that we are initializing it.
186   dfsan_label label = 0;
187   if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel,
188                                      memory_order_acquire)) {
189     // Check whether l2 subsumes l1.  We don't need to check whether l1
190     // subsumes l2 because we are guaranteed here that l1 < l2, and (at least
191     // in the cases we are interested in) a label may only subsume labels
192     // created earlier (i.e. with a lower numerical value).
193     if (__dfsan_label_info[l2].l1 == l1 ||
194         __dfsan_label_info[l2].l2 == l1) {
195       label = l2;
196     } else {
197       label =
198         atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
199       dfsan_check_label(label);
200       __dfsan_label_info[label].l1 = l1;
201       __dfsan_label_info[label].l2 = l2;
202     }
203     atomic_store(table_ent, label, memory_order_release);
204   } else if (label == kInitializingLabel) {
205     // Another thread is initializing the entry.  Wait until it is finished.
206     do {
207       internal_sched_yield();
208       label = atomic_load(table_ent, memory_order_acquire);
209     } while (label == kInitializingLabel);
210   }
211   return label;
212 }
213 
214 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
215 dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) {
216   dfsan_label label = ls[0];
217   for (uptr i = 1; i != n; ++i) {
218     dfsan_label next_label = ls[i];
219     if (label != next_label)
220       label = __dfsan_union(label, next_label);
221   }
222   return label;
223 }
224 
225 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
226 dfsan_label __dfsan_union_load_fast16labels(const dfsan_label *ls, uptr n) {
227   dfsan_label label = ls[0];
228   for (uptr i = 1; i != n; ++i)
229     label |= ls[i];
230   return label;
231 }
232 
233 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
234 void __dfsan_unimplemented(char *fname) {
235   if (flags().warn_unimplemented)
236     Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",
237            fname);
238 }
239 
240 // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function
241 // to try to figure out where labels are being introduced in a nominally
242 // label-free program.
243 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_nonzero_label() {
244   if (flags().warn_nonzero_labels)
245     Report("WARNING: DataFlowSanitizer: saw nonzero label\n");
246 }
247 
248 // Indirect call to an uninstrumented vararg function. We don't have a way of
249 // handling these at the moment.
250 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
251 __dfsan_vararg_wrapper(const char *fname) {
252   Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg "
253          "function %s\n", fname);
254   Die();
255 }
256 
257 // Like __dfsan_union, but for use from the client or custom functions.  Hence
258 // the equality comparison is done here before calling __dfsan_union.
259 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
260 dfsan_union(dfsan_label l1, dfsan_label l2) {
261   if (l1 == l2)
262     return l1;
263   return __dfsan_union(l1, l2);
264 }
265 
266 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
267 dfsan_label dfsan_create_label(const char *desc, void *userdata) {
268   dfsan_label label =
269       atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
270   dfsan_check_label(label);
271   __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;
272   __dfsan_label_info[label].desc = desc;
273   __dfsan_label_info[label].userdata = userdata;
274   return label;
275 }
276 
277 static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr,
278                                    uptr size) {
279   dfsan_label *labelp = (dfsan_label *)shadow_addr;
280   for (; size != 0; --size, ++labelp) {
281     // Don't write the label if it is already the value we need it to be.
282     // In a program where most addresses are not labeled, it is common that
283     // a page of shadow memory is entirely zeroed.  The Linux copy-on-write
284     // implementation will share all of the zeroed pages, making a copy of a
285     // page when any value is written.  The un-sharing will happen even if
286     // the value written does not change the value in memory.  Avoiding the
287     // write when both |label| and |*labelp| are zero dramatically reduces
288     // the amount of real memory used by large programs.
289     if (label == *labelp)
290       continue;
291 
292     *labelp = label;
293   }
294 }
295 
296 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label(
297     dfsan_label label, void *addr, uptr size) {
298   const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr);
299 
300   if (0 != label) {
301     WriteShadowIfDifferent(label, beg_shadow_addr, size);
302     return;
303   }
304 
305   // If label is 0, releases the pages within the shadow address range, and sets
306   // the shadow addresses not on the pages to be 0.
307   const void *end_addr = (void *)((uptr)addr + size);
308   const uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr);
309   const uptr page_size = GetPageSizeCached();
310   const uptr beg_aligned = RoundUpTo(beg_shadow_addr, page_size);
311   const uptr end_aligned = RoundDownTo(end_shadow_addr, page_size);
312 
313   // dfsan_set_label can be called from the following cases
314   // 1) mapped ranges by new/delete and malloc/free. This case has shadow memory
315   // size > 100k, and happens less frequently.
316   // 2) zero-filling internal data structures by utility libraries. This case
317   // has shadow memory size < 32k, and happens more often.
318   // Set kNumPagesThreshold to be 8 to avoid releasing small pages.
319   const int kNumPagesThreshold = 8;
320   if (beg_aligned + kNumPagesThreshold * page_size >= end_aligned)
321     return WriteShadowIfDifferent(label, beg_shadow_addr, size);
322 
323   WriteShadowIfDifferent(label, beg_shadow_addr, beg_aligned - beg_shadow_addr);
324   ReleaseMemoryPagesToOS(beg_aligned, end_aligned);
325   WriteShadowIfDifferent(label, end_aligned, end_shadow_addr - end_aligned);
326 }
327 
328 SANITIZER_INTERFACE_ATTRIBUTE
329 void dfsan_set_label(dfsan_label label, void *addr, uptr size) {
330   __dfsan_set_label(label, addr, size);
331 }
332 
333 SANITIZER_INTERFACE_ATTRIBUTE
334 void dfsan_add_label(dfsan_label label, void *addr, uptr size) {
335   for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
336     if (*labelp != label)
337       *labelp = __dfsan_union(*labelp, label);
338 }
339 
340 // Unlike the other dfsan interface functions the behavior of this function
341 // depends on the label of one of its arguments.  Hence it is implemented as a
342 // custom function.
343 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
344 __dfsw_dfsan_get_label(long data, dfsan_label data_label,
345                        dfsan_label *ret_label) {
346   *ret_label = 0;
347   return data_label;
348 }
349 
350 SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
351 dfsan_read_label(const void *addr, uptr size) {
352   if (size == 0)
353     return 0;
354   return __dfsan_union_load(shadow_for(addr), size);
355 }
356 
357 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
358 const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
359   return &__dfsan_label_info[label];
360 }
361 
362 extern "C" SANITIZER_INTERFACE_ATTRIBUTE int
363 dfsan_has_label(dfsan_label label, dfsan_label elem) {
364   if (label == elem)
365     return true;
366   const dfsan_label_info *info = dfsan_get_label_info(label);
367   if (info->l1 != 0) {
368     return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem);
369   } else {
370     return false;
371   }
372 }
373 
374 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
375 dfsan_has_label_with_desc(dfsan_label label, const char *desc) {
376   const dfsan_label_info *info = dfsan_get_label_info(label);
377   if (info->l1 != 0) {
378     return dfsan_has_label_with_desc(info->l1, desc) ||
379            dfsan_has_label_with_desc(info->l2, desc);
380   } else {
381     return internal_strcmp(desc, info->desc) == 0;
382   }
383 }
384 
385 extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
386 dfsan_get_label_count(void) {
387   dfsan_label max_label_allocated =
388       atomic_load(&__dfsan_last_label, memory_order_relaxed);
389 
390   return static_cast<uptr>(max_label_allocated);
391 }
392 
393 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
394 dfsan_dump_labels(int fd) {
395   dfsan_label last_label =
396       atomic_load(&__dfsan_last_label, memory_order_relaxed);
397   for (uptr l = 1; l <= last_label; ++l) {
398     char buf[64];
399     internal_snprintf(buf, sizeof(buf), "%u %u %u ", l,
400                       __dfsan_label_info[l].l1, __dfsan_label_info[l].l2);
401     WriteToFile(fd, buf, internal_strlen(buf));
402     if (__dfsan_label_info[l].l1 == 0 && __dfsan_label_info[l].desc) {
403       WriteToFile(fd, __dfsan_label_info[l].desc,
404                   internal_strlen(__dfsan_label_info[l].desc));
405     }
406     WriteToFile(fd, "\n", 1);
407   }
408 }
409 
410 #define GET_FATAL_STACK_TRACE_PC_BP(pc, bp) \
411   BufferedStackTrace stack;                 \
412   stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal);
413 
414 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
415                                                  void *context,
416                                                  bool request_fast,
417                                                  u32 max_depth) {
418   Unwind(max_depth, pc, bp, context, 0, 0, false);
419 }
420 
421 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() {
422   GET_FATAL_STACK_TRACE_PC_BP(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME());
423   stack.Print();
424 }
425 
426 void Flags::SetDefaults() {
427 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
428 #include "dfsan_flags.inc"
429 #undef DFSAN_FLAG
430 }
431 
432 static void RegisterDfsanFlags(FlagParser *parser, Flags *f) {
433 #define DFSAN_FLAG(Type, Name, DefaultValue, Description) \
434   RegisterFlag(parser, #Name, Description, &f->Name);
435 #include "dfsan_flags.inc"
436 #undef DFSAN_FLAG
437 }
438 
439 static void InitializeFlags() {
440   SetCommonFlagsDefaults();
441   flags().SetDefaults();
442 
443   FlagParser parser;
444   RegisterCommonFlags(&parser);
445   RegisterDfsanFlags(&parser, &flags());
446   parser.ParseStringFromEnv("DFSAN_OPTIONS");
447   InitializeCommonFlags();
448   if (Verbosity()) ReportUnrecognizedFlags();
449   if (common_flags()->help) parser.PrintFlagDescriptions();
450 }
451 
452 static void InitializePlatformEarly() {
453   AvoidCVE_2016_2143();
454 #ifdef DFSAN_RUNTIME_VMA
455   __dfsan::vmaSize =
456     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
457   if (__dfsan::vmaSize == 39 || __dfsan::vmaSize == 42 ||
458       __dfsan::vmaSize == 48) {
459     __dfsan_shadow_ptr_mask = ShadowMask();
460   } else {
461     Printf("FATAL: DataFlowSanitizer: unsupported VMA range\n");
462     Printf("FATAL: Found %d - Supported 39, 42, and 48\n", __dfsan::vmaSize);
463     Die();
464   }
465 #endif
466 }
467 
468 static void dfsan_fini() {
469   if (internal_strcmp(flags().dump_labels_at_exit, "") != 0) {
470     fd_t fd = OpenFile(flags().dump_labels_at_exit, WrOnly);
471     if (fd == kInvalidFd) {
472       Report("WARNING: DataFlowSanitizer: unable to open output file %s\n",
473              flags().dump_labels_at_exit);
474       return;
475     }
476 
477     Report("INFO: DataFlowSanitizer: dumping labels to %s\n",
478            flags().dump_labels_at_exit);
479     dfsan_dump_labels(fd);
480     CloseFile(fd);
481   }
482 }
483 
484 extern "C" void dfsan_flush() {
485   if (!MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
486     Die();
487 }
488 
489 static void dfsan_init(int argc, char **argv, char **envp) {
490   InitializeFlags();
491 
492   ::InitializePlatformEarly();
493 
494   if (!MmapFixedSuperNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
495     Die();
496   if (common_flags()->use_madv_dontdump)
497     DontDumpShadowMemory(ShadowAddr(), UnusedAddr() - ShadowAddr());
498 
499   // Protect the region of memory we don't use, to preserve the one-to-one
500   // mapping from application to shadow memory. But if ASLR is disabled, Linux
501   // will load our executable in the middle of our unused region. This mostly
502   // works so long as the program doesn't use too much memory. We support this
503   // case by disabling memory protection when ASLR is disabled.
504   uptr init_addr = (uptr)&dfsan_init;
505   if (!(init_addr >= UnusedAddr() && init_addr < AppAddr()))
506     MmapFixedNoAccess(UnusedAddr(), AppAddr() - UnusedAddr());
507 
508   InitializeInterceptors();
509 
510   // Register the fini callback to run when the program terminates successfully
511   // or it is killed by the runtime.
512   Atexit(dfsan_fini);
513   AddDieCallback(dfsan_fini);
514 
515   __dfsan_label_info[kInitializingLabel].desc = "<init label>";
516 }
517 
518 #if SANITIZER_CAN_USE_PREINIT_ARRAY
519 __attribute__((section(".preinit_array"), used))
520 static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init;
521 #endif
522