1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11 /// analysis.
12 ///
13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14 /// class of bugs on its own.  Instead, it provides a generic dynamic data flow
15 /// analysis framework to be used by clients to help detect application-specific
16 /// issues within their own code.
17 ///
18 /// The analysis is based on automatic propagation of data flow labels (also
19 /// known as taint labels) through a program as it performs computation.
20 ///
21 /// Argument and return value labels are passed through TLS variables
22 /// __dfsan_arg_tls and __dfsan_retval_tls.
23 ///
24 /// Each byte of application memory is backed by a shadow memory byte. The
25 /// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26 /// laid out as follows:
27 ///
28 /// +--------------------+ 0x800000000000 (top of memory)
29 /// |    application 3   |
30 /// +--------------------+ 0x700000000000
31 /// |      invalid       |
32 /// +--------------------+ 0x610000000000
33 /// |      origin 1      |
34 /// +--------------------+ 0x600000000000
35 /// |    application 2   |
36 /// +--------------------+ 0x510000000000
37 /// |      shadow 1      |
38 /// +--------------------+ 0x500000000000
39 /// |      invalid       |
40 /// +--------------------+ 0x400000000000
41 /// |      origin 3      |
42 /// +--------------------+ 0x300000000000
43 /// |      shadow 3      |
44 /// +--------------------+ 0x200000000000
45 /// |      origin 2      |
46 /// +--------------------+ 0x110000000000
47 /// |      invalid       |
48 /// +--------------------+ 0x100000000000
49 /// |      shadow 2      |
50 /// +--------------------+ 0x010000000000
51 /// |    application 1   |
52 /// +--------------------+ 0x000000000000
53 ///
54 /// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55 /// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
56 ///
57 /// For more information, please refer to the design document:
58 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
59 //
60 //===----------------------------------------------------------------------===//
61 
62 #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/DenseSet.h"
65 #include "llvm/ADT/DepthFirstIterator.h"
66 #include "llvm/ADT/None.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallVector.h"
69 #include "llvm/ADT/StringExtras.h"
70 #include "llvm/ADT/StringRef.h"
71 #include "llvm/ADT/Triple.h"
72 #include "llvm/ADT/iterator.h"
73 #include "llvm/Analysis/ValueTracking.h"
74 #include "llvm/IR/Argument.h"
75 #include "llvm/IR/Attributes.h"
76 #include "llvm/IR/BasicBlock.h"
77 #include "llvm/IR/Constant.h"
78 #include "llvm/IR/Constants.h"
79 #include "llvm/IR/DataLayout.h"
80 #include "llvm/IR/DerivedTypes.h"
81 #include "llvm/IR/Dominators.h"
82 #include "llvm/IR/Function.h"
83 #include "llvm/IR/GlobalAlias.h"
84 #include "llvm/IR/GlobalValue.h"
85 #include "llvm/IR/GlobalVariable.h"
86 #include "llvm/IR/IRBuilder.h"
87 #include "llvm/IR/InstVisitor.h"
88 #include "llvm/IR/InstrTypes.h"
89 #include "llvm/IR/Instruction.h"
90 #include "llvm/IR/Instructions.h"
91 #include "llvm/IR/IntrinsicInst.h"
92 #include "llvm/IR/MDBuilder.h"
93 #include "llvm/IR/Module.h"
94 #include "llvm/IR/PassManager.h"
95 #include "llvm/IR/Type.h"
96 #include "llvm/IR/User.h"
97 #include "llvm/IR/Value.h"
98 #include "llvm/InitializePasses.h"
99 #include "llvm/Pass.h"
100 #include "llvm/Support/Alignment.h"
101 #include "llvm/Support/Casting.h"
102 #include "llvm/Support/CommandLine.h"
103 #include "llvm/Support/ErrorHandling.h"
104 #include "llvm/Support/SpecialCaseList.h"
105 #include "llvm/Support/VirtualFileSystem.h"
106 #include "llvm/Transforms/Instrumentation.h"
107 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
108 #include "llvm/Transforms/Utils/Local.h"
109 #include <algorithm>
110 #include <cassert>
111 #include <cstddef>
112 #include <cstdint>
113 #include <iterator>
114 #include <memory>
115 #include <set>
116 #include <string>
117 #include <utility>
118 #include <vector>
119 
120 using namespace llvm;
121 
122 // This must be consistent with ShadowWidthBits.
123 static const Align ShadowTLSAlignment = Align(2);
124 
125 static const Align MinOriginAlignment = Align(4);
126 
127 // The size of TLS variables. These constants must be kept in sync with the ones
128 // in dfsan.cpp.
129 static const unsigned ArgTLSSize = 800;
130 static const unsigned RetvalTLSSize = 800;
131 
132 // The -dfsan-preserve-alignment flag controls whether this pass assumes that
133 // alignment requirements provided by the input IR are correct.  For example,
134 // if the input IR contains a load with alignment 8, this flag will cause
135 // the shadow load to have alignment 16.  This flag is disabled by default as
136 // we have unfortunately encountered too much code (including Clang itself;
137 // see PR14291) which performs misaligned access.
138 static cl::opt<bool> ClPreserveAlignment(
139     "dfsan-preserve-alignment",
140     cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
141     cl::init(false));
142 
143 // The ABI list files control how shadow parameters are passed. The pass treats
144 // every function labelled "uninstrumented" in the ABI list file as conforming
145 // to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
146 // additional annotations for those functions, a call to one of those functions
147 // will produce a warning message, as the labelling behaviour of the function is
148 // unknown. The other supported annotations for uninstrumented functions are
149 // "functional" and "discard", which are described below under
150 // DataFlowSanitizer::WrapperKind.
151 // Functions will often be labelled with both "uninstrumented" and one of
152 // "functional" or "discard". This will leave the function unchanged by this
153 // pass, and create a wrapper function that will call the original.
154 //
155 // Instrumented functions can also be annotated as "force_zero_labels", which
156 // will make all shadow and return values set zero labels.
157 // Functions should never be labelled with both "force_zero_labels" and
158 // "uninstrumented" or any of the unistrumented wrapper kinds.
159 static cl::list<std::string> ClABIListFiles(
160     "dfsan-abilist",
161     cl::desc("File listing native ABI functions and how the pass treats them"),
162     cl::Hidden);
163 
164 // Controls whether the pass includes or ignores the labels of pointers in load
165 // instructions.
166 static cl::opt<bool> ClCombinePointerLabelsOnLoad(
167     "dfsan-combine-pointer-labels-on-load",
168     cl::desc("Combine the label of the pointer with the label of the data when "
169              "loading from memory."),
170     cl::Hidden, cl::init(true));
171 
172 // Controls whether the pass includes or ignores the labels of pointers in
173 // stores instructions.
174 static cl::opt<bool> ClCombinePointerLabelsOnStore(
175     "dfsan-combine-pointer-labels-on-store",
176     cl::desc("Combine the label of the pointer with the label of the data when "
177              "storing in memory."),
178     cl::Hidden, cl::init(false));
179 
180 // Controls whether the pass propagates labels of offsets in GEP instructions.
181 static cl::opt<bool> ClCombineOffsetLabelsOnGEP(
182     "dfsan-combine-offset-labels-on-gep",
183     cl::desc(
184         "Combine the label of the offset with the label of the pointer when "
185         "doing pointer arithmetic."),
186     cl::Hidden, cl::init(true));
187 
188 static cl::opt<bool> ClDebugNonzeroLabels(
189     "dfsan-debug-nonzero-labels",
190     cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
191              "load or return with a nonzero label"),
192     cl::Hidden);
193 
194 // Experimental feature that inserts callbacks for certain data events.
195 // Currently callbacks are only inserted for loads, stores, memory transfers
196 // (i.e. memcpy and memmove), and comparisons.
197 //
198 // If this flag is set to true, the user must provide definitions for the
199 // following callback functions:
200 //   void __dfsan_load_callback(dfsan_label Label, void* addr);
201 //   void __dfsan_store_callback(dfsan_label Label, void* addr);
202 //   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
203 //   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
204 static cl::opt<bool> ClEventCallbacks(
205     "dfsan-event-callbacks",
206     cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
207     cl::Hidden, cl::init(false));
208 
209 // Experimental feature that inserts callbacks for conditionals, including:
210 // conditional branch, switch, select.
211 // This must be true for dfsan_set_conditional_callback() to have effect.
212 static cl::opt<bool> ClConditionalCallbacks(
213     "dfsan-conditional-callbacks",
214     cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
215     cl::init(false));
216 
217 // Controls whether the pass tracks the control flow of select instructions.
218 static cl::opt<bool> ClTrackSelectControlFlow(
219     "dfsan-track-select-control-flow",
220     cl::desc("Propagate labels from condition values of select instructions "
221              "to results."),
222     cl::Hidden, cl::init(true));
223 
224 // TODO: This default value follows MSan. DFSan may use a different value.
225 static cl::opt<int> ClInstrumentWithCallThreshold(
226     "dfsan-instrument-with-call-threshold",
227     cl::desc("If the function being instrumented requires more than "
228              "this number of origin stores, use callbacks instead of "
229              "inline checks (-1 means never use callbacks)."),
230     cl::Hidden, cl::init(3500));
231 
232 // Controls how to track origins.
233 // * 0: do not track origins.
234 // * 1: track origins at memory store operations.
235 // * 2: track origins at memory load and store operations.
236 //      TODO: track callsites.
237 static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
238                                    cl::desc("Track origins of labels"),
239                                    cl::Hidden, cl::init(0));
240 
241 static cl::opt<bool> ClIgnorePersonalityRoutine(
242     "dfsan-ignore-personality-routine",
243     cl::desc("If a personality routine is marked uninstrumented from the ABI "
244              "list, do not create a wrapper for it."),
245     cl::Hidden, cl::init(false));
246 
247 static StringRef getGlobalTypeString(const GlobalValue &G) {
248   // Types of GlobalVariables are always pointer types.
249   Type *GType = G.getValueType();
250   // For now we support excluding struct types only.
251   if (StructType *SGType = dyn_cast<StructType>(GType)) {
252     if (!SGType->isLiteral())
253       return SGType->getName();
254   }
255   return "<unknown type>";
256 }
257 
258 namespace {
259 
260 // Memory map parameters used in application-to-shadow address calculation.
261 // Offset = (Addr & ~AndMask) ^ XorMask
262 // Shadow = ShadowBase + Offset
263 // Origin = (OriginBase + Offset) & ~3ULL
264 struct MemoryMapParams {
265   uint64_t AndMask;
266   uint64_t XorMask;
267   uint64_t ShadowBase;
268   uint64_t OriginBase;
269 };
270 
271 } // end anonymous namespace
272 
273 // x86_64 Linux
274 // NOLINTNEXTLINE(readability-identifier-naming)
275 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
276     0,              // AndMask (not used)
277     0x500000000000, // XorMask
278     0,              // ShadowBase (not used)
279     0x100000000000, // OriginBase
280 };
281 
282 namespace {
283 
284 class DFSanABIList {
285   std::unique_ptr<SpecialCaseList> SCL;
286 
287 public:
288   DFSanABIList() = default;
289 
290   void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
291 
292   /// Returns whether either this function or its source file are listed in the
293   /// given category.
294   bool isIn(const Function &F, StringRef Category) const {
295     return isIn(*F.getParent(), Category) ||
296            SCL->inSection("dataflow", "fun", F.getName(), Category);
297   }
298 
299   /// Returns whether this global alias is listed in the given category.
300   ///
301   /// If GA aliases a function, the alias's name is matched as a function name
302   /// would be.  Similarly, aliases of globals are matched like globals.
303   bool isIn(const GlobalAlias &GA, StringRef Category) const {
304     if (isIn(*GA.getParent(), Category))
305       return true;
306 
307     if (isa<FunctionType>(GA.getValueType()))
308       return SCL->inSection("dataflow", "fun", GA.getName(), Category);
309 
310     return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
311            SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
312                           Category);
313   }
314 
315   /// Returns whether this module is listed in the given category.
316   bool isIn(const Module &M, StringRef Category) const {
317     return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
318   }
319 };
320 
321 /// TransformedFunction is used to express the result of transforming one
322 /// function type into another.  This struct is immutable.  It holds metadata
323 /// useful for updating calls of the old function to the new type.
324 struct TransformedFunction {
325   TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
326                       std::vector<unsigned> ArgumentIndexMapping)
327       : OriginalType(OriginalType), TransformedType(TransformedType),
328         ArgumentIndexMapping(ArgumentIndexMapping) {}
329 
330   // Disallow copies.
331   TransformedFunction(const TransformedFunction &) = delete;
332   TransformedFunction &operator=(const TransformedFunction &) = delete;
333 
334   // Allow moves.
335   TransformedFunction(TransformedFunction &&) = default;
336   TransformedFunction &operator=(TransformedFunction &&) = default;
337 
338   /// Type of the function before the transformation.
339   FunctionType *OriginalType;
340 
341   /// Type of the function after the transformation.
342   FunctionType *TransformedType;
343 
344   /// Transforming a function may change the position of arguments.  This
345   /// member records the mapping from each argument's old position to its new
346   /// position.  Argument positions are zero-indexed.  If the transformation
347   /// from F to F' made the first argument of F into the third argument of F',
348   /// then ArgumentIndexMapping[0] will equal 2.
349   std::vector<unsigned> ArgumentIndexMapping;
350 };
351 
352 /// Given function attributes from a call site for the original function,
353 /// return function attributes appropriate for a call to the transformed
354 /// function.
355 AttributeList
356 transformFunctionAttributes(const TransformedFunction &TransformedFunction,
357                             LLVMContext &Ctx, AttributeList CallSiteAttrs) {
358 
359   // Construct a vector of AttributeSet for each function argument.
360   std::vector<llvm::AttributeSet> ArgumentAttributes(
361       TransformedFunction.TransformedType->getNumParams());
362 
363   // Copy attributes from the parameter of the original function to the
364   // transformed version.  'ArgumentIndexMapping' holds the mapping from
365   // old argument position to new.
366   for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
367        I < IE; ++I) {
368     unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
369     ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
370   }
371 
372   // Copy annotations on varargs arguments.
373   for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
374                 IE = CallSiteAttrs.getNumAttrSets();
375        I < IE; ++I) {
376     ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
377   }
378 
379   return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
380                             CallSiteAttrs.getRetAttrs(),
381                             llvm::makeArrayRef(ArgumentAttributes));
382 }
383 
384 class DataFlowSanitizer {
385   friend struct DFSanFunction;
386   friend class DFSanVisitor;
387 
388   enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
389 
390   enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
391 
392   /// How should calls to uninstrumented functions be handled?
393   enum WrapperKind {
394     /// This function is present in an uninstrumented form but we don't know
395     /// how it should be handled.  Print a warning and call the function anyway.
396     /// Don't label the return value.
397     WK_Warning,
398 
399     /// This function does not write to (user-accessible) memory, and its return
400     /// value is unlabelled.
401     WK_Discard,
402 
403     /// This function does not write to (user-accessible) memory, and the label
404     /// of its return value is the union of the label of its arguments.
405     WK_Functional,
406 
407     /// Instead of calling the function, a custom wrapper __dfsw_F is called,
408     /// where F is the name of the function.  This function may wrap the
409     /// original function or provide its own implementation. WK_Custom uses an
410     /// extra pointer argument to return the shadow.  This allows the wrapped
411     /// form of the function type to be expressed in C.
412     WK_Custom
413   };
414 
415   Module *Mod;
416   LLVMContext *Ctx;
417   Type *Int8Ptr;
418   IntegerType *OriginTy;
419   PointerType *OriginPtrTy;
420   ConstantInt *ZeroOrigin;
421   /// The shadow type for all primitive types and vector types.
422   IntegerType *PrimitiveShadowTy;
423   PointerType *PrimitiveShadowPtrTy;
424   IntegerType *IntptrTy;
425   ConstantInt *ZeroPrimitiveShadow;
426   Constant *ArgTLS;
427   ArrayType *ArgOriginTLSTy;
428   Constant *ArgOriginTLS;
429   Constant *RetvalTLS;
430   Constant *RetvalOriginTLS;
431   FunctionType *DFSanUnionLoadFnTy;
432   FunctionType *DFSanLoadLabelAndOriginFnTy;
433   FunctionType *DFSanUnimplementedFnTy;
434   FunctionType *DFSanSetLabelFnTy;
435   FunctionType *DFSanNonzeroLabelFnTy;
436   FunctionType *DFSanVarargWrapperFnTy;
437   FunctionType *DFSanConditionalCallbackFnTy;
438   FunctionType *DFSanConditionalCallbackOriginFnTy;
439   FunctionType *DFSanCmpCallbackFnTy;
440   FunctionType *DFSanLoadStoreCallbackFnTy;
441   FunctionType *DFSanMemTransferCallbackFnTy;
442   FunctionType *DFSanChainOriginFnTy;
443   FunctionType *DFSanChainOriginIfTaintedFnTy;
444   FunctionType *DFSanMemOriginTransferFnTy;
445   FunctionType *DFSanMaybeStoreOriginFnTy;
446   FunctionCallee DFSanUnionLoadFn;
447   FunctionCallee DFSanLoadLabelAndOriginFn;
448   FunctionCallee DFSanUnimplementedFn;
449   FunctionCallee DFSanSetLabelFn;
450   FunctionCallee DFSanNonzeroLabelFn;
451   FunctionCallee DFSanVarargWrapperFn;
452   FunctionCallee DFSanLoadCallbackFn;
453   FunctionCallee DFSanStoreCallbackFn;
454   FunctionCallee DFSanMemTransferCallbackFn;
455   FunctionCallee DFSanConditionalCallbackFn;
456   FunctionCallee DFSanConditionalCallbackOriginFn;
457   FunctionCallee DFSanCmpCallbackFn;
458   FunctionCallee DFSanChainOriginFn;
459   FunctionCallee DFSanChainOriginIfTaintedFn;
460   FunctionCallee DFSanMemOriginTransferFn;
461   FunctionCallee DFSanMaybeStoreOriginFn;
462   SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
463   MDNode *ColdCallWeights;
464   MDNode *OriginStoreWeights;
465   DFSanABIList ABIList;
466   DenseMap<Value *, Function *> UnwrappedFnMap;
467   AttributeMask ReadOnlyNoneAttrs;
468 
469   /// Memory map parameters used in calculation mapping application addresses
470   /// to shadow addresses and origin addresses.
471   const MemoryMapParams *MapParams;
472 
473   Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
474   Value *getShadowAddress(Value *Addr, Instruction *Pos);
475   Value *getShadowAddress(Value *Addr, Instruction *Pos, Value *ShadowOffset);
476   std::pair<Value *, Value *>
477   getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
478   bool isInstrumented(const Function *F);
479   bool isInstrumented(const GlobalAlias *GA);
480   bool isForceZeroLabels(const Function *F);
481   TransformedFunction getCustomFunctionType(FunctionType *T);
482   WrapperKind getWrapperKind(Function *F);
483   void addGlobalNameSuffix(GlobalValue *GV);
484   Function *buildWrapperFunction(Function *F, StringRef NewFName,
485                                  GlobalValue::LinkageTypes NewFLink,
486                                  FunctionType *NewFT);
487   void initializeCallbackFunctions(Module &M);
488   void initializeRuntimeFunctions(Module &M);
489   void injectMetadataGlobals(Module &M);
490   bool initializeModule(Module &M);
491 
492   /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
493   /// from it. Returns the origin's loaded value.
494   Value *loadNextOrigin(Instruction *Pos, Align OriginAlign,
495                         Value **OriginAddr);
496 
497   /// Returns whether the given load byte size is amenable to inlined
498   /// optimization patterns.
499   bool hasLoadSizeForFastPath(uint64_t Size);
500 
501   /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
502   bool shouldTrackOrigins();
503 
504   /// Returns a zero constant with the shadow type of OrigTy.
505   ///
506   /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
507   /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
508   /// getZeroShadow(other type) = i16(0)
509   Constant *getZeroShadow(Type *OrigTy);
510   /// Returns a zero constant with the shadow type of V's type.
511   Constant *getZeroShadow(Value *V);
512 
513   /// Checks if V is a zero shadow.
514   bool isZeroShadow(Value *V);
515 
516   /// Returns the shadow type of OrigTy.
517   ///
518   /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
519   /// getShadowTy([n x T]) = [n x getShadowTy(T)]
520   /// getShadowTy(other type) = i16
521   Type *getShadowTy(Type *OrigTy);
522   /// Returns the shadow type of of V's type.
523   Type *getShadowTy(Value *V);
524 
525   const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
526 
527 public:
528   DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
529 
530   bool runImpl(Module &M);
531 };
532 
533 struct DFSanFunction {
534   DataFlowSanitizer &DFS;
535   Function *F;
536   DominatorTree DT;
537   bool IsNativeABI;
538   bool IsForceZeroLabels;
539   AllocaInst *LabelReturnAlloca = nullptr;
540   AllocaInst *OriginReturnAlloca = nullptr;
541   DenseMap<Value *, Value *> ValShadowMap;
542   DenseMap<Value *, Value *> ValOriginMap;
543   DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
544   DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap;
545 
546   struct PHIFixupElement {
547     PHINode *Phi;
548     PHINode *ShadowPhi;
549     PHINode *OriginPhi;
550   };
551   std::vector<PHIFixupElement> PHIFixups;
552 
553   DenseSet<Instruction *> SkipInsts;
554   std::vector<Value *> NonZeroChecks;
555 
556   struct CachedShadow {
557     BasicBlock *Block; // The block where Shadow is defined.
558     Value *Shadow;
559   };
560   /// Maps a value to its latest shadow value in terms of domination tree.
561   DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
562   /// Maps a value to its latest collapsed shadow value it was converted to in
563   /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
564   /// used at a post process where CFG blocks are split. So it does not cache
565   /// BasicBlock like CachedShadows, but uses domination between values.
566   DenseMap<Value *, Value *> CachedCollapsedShadows;
567   DenseMap<Value *, std::set<Value *>> ShadowElements;
568 
569   DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
570                 bool IsForceZeroLabels)
571       : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
572         IsForceZeroLabels(IsForceZeroLabels) {
573     DT.recalculate(*F);
574   }
575 
576   /// Computes the shadow address for a given function argument.
577   ///
578   /// Shadow = ArgTLS+ArgOffset.
579   Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
580 
581   /// Computes the shadow address for a return value.
582   Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
583 
584   /// Computes the origin address for a given function argument.
585   ///
586   /// Origin = ArgOriginTLS[ArgNo].
587   Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
588 
589   /// Computes the origin address for a return value.
590   Value *getRetvalOriginTLS();
591 
592   Value *getOrigin(Value *V);
593   void setOrigin(Instruction *I, Value *Origin);
594   /// Generates IR to compute the origin of the last operand with a taint label.
595   Value *combineOperandOrigins(Instruction *Inst);
596   /// Before the instruction Pos, generates IR to compute the last origin with a
597   /// taint label. Labels and origins are from vectors Shadows and Origins
598   /// correspondingly. The generated IR is like
599   ///   Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
600   /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
601   /// zeros with other bitwidths.
602   Value *combineOrigins(const std::vector<Value *> &Shadows,
603                         const std::vector<Value *> &Origins, Instruction *Pos,
604                         ConstantInt *Zero = nullptr);
605 
606   Value *getShadow(Value *V);
607   void setShadow(Instruction *I, Value *Shadow);
608   /// Generates IR to compute the union of the two given shadows, inserting it
609   /// before Pos. The combined value is with primitive type.
610   Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
611   /// Combines the shadow values of V1 and V2, then converts the combined value
612   /// with primitive type into a shadow value with the original type T.
613   Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
614                                    Instruction *Pos);
615   Value *combineOperandShadows(Instruction *Inst);
616 
617   /// Generates IR to load shadow and origin corresponding to bytes [\p
618   /// Addr, \p Addr + \p Size), where addr has alignment \p
619   /// InstAlignment, and take the union of each of those shadows. The returned
620   /// shadow always has primitive type.
621   ///
622   /// When tracking loads is enabled, the returned origin is a chain at the
623   /// current stack if the returned shadow is tainted.
624   std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
625                                                Align InstAlignment,
626                                                Instruction *Pos);
627 
628   void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
629                                   Align InstAlignment, Value *PrimitiveShadow,
630                                   Value *Origin, Instruction *Pos);
631   /// Applies PrimitiveShadow to all primitive subtypes of T, returning
632   /// the expanded shadow value.
633   ///
634   /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
635   /// EFP([n x T], PS) = [n x EFP(T,PS)]
636   /// EFP(other types, PS) = PS
637   Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
638                                    Instruction *Pos);
639   /// Collapses Shadow into a single primitive shadow value, unioning all
640   /// primitive shadow values in the process. Returns the final primitive
641   /// shadow value.
642   ///
643   /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
644   /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
645   /// CTP(other types, PS) = PS
646   Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos);
647 
648   void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
649                                 Instruction *Pos);
650 
651   Align getShadowAlign(Align InstAlignment);
652 
653   // If ClConditionalCallbacks is enabled, insert a callback after a given
654   // branch instruction using the given conditional expression.
655   void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
656 
657 private:
658   /// Collapses the shadow with aggregate type into a single primitive shadow
659   /// value.
660   template <class AggregateType>
661   Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
662                                  IRBuilder<> &IRB);
663 
664   Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
665 
666   /// Returns the shadow value of an argument A.
667   Value *getShadowForTLSArgument(Argument *A);
668 
669   /// The fast path of loading shadows.
670   std::pair<Value *, Value *>
671   loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
672                  Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
673                  Instruction *Pos);
674 
675   Align getOriginAlign(Align InstAlignment);
676 
677   /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
678   /// is __dfsan_load_label_and_origin. This function returns the union of all
679   /// labels and the origin of the first taint label. However this is an
680   /// additional call with many instructions. To ensure common cases are fast,
681   /// checks if it is possible to load labels and origins without using the
682   /// callback function.
683   ///
684   /// When enabling tracking load instructions, we always use
685   /// __dfsan_load_label_and_origin to reduce code size.
686   bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
687 
688   /// Returns a chain at the current stack with previous origin V.
689   Value *updateOrigin(Value *V, IRBuilder<> &IRB);
690 
691   /// Returns a chain at the current stack with previous origin V if Shadow is
692   /// tainted.
693   Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
694 
695   /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
696   /// Origin otherwise.
697   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
698 
699   /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
700   /// Size).
701   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
702                    uint64_t StoreOriginSize, Align Alignment);
703 
704   /// Stores Origin in terms of its Shadow value.
705   /// * Do not write origins for zero shadows because we do not trace origins
706   ///   for untainted sinks.
707   /// * Use __dfsan_maybe_store_origin if there are too many origin store
708   ///   instrumentations.
709   void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow,
710                    Value *Origin, Value *StoreOriginAddr, Align InstAlignment);
711 
712   /// Convert a scalar value to an i1 by comparing with 0.
713   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
714 
715   bool shouldInstrumentWithCall();
716 
717   /// Generates IR to load shadow and origin corresponding to bytes [\p
718   /// Addr, \p Addr + \p Size), where addr has alignment \p
719   /// InstAlignment, and take the union of each of those shadows. The returned
720   /// shadow always has primitive type.
721   std::pair<Value *, Value *>
722   loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
723                                    Align InstAlignment, Instruction *Pos);
724   int NumOriginStores = 0;
725 };
726 
727 class DFSanVisitor : public InstVisitor<DFSanVisitor> {
728 public:
729   DFSanFunction &DFSF;
730 
731   DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
732 
733   const DataLayout &getDataLayout() const {
734     return DFSF.F->getParent()->getDataLayout();
735   }
736 
737   // Combines shadow values and origins for all of I's operands.
738   void visitInstOperands(Instruction &I);
739 
740   void visitUnaryOperator(UnaryOperator &UO);
741   void visitBinaryOperator(BinaryOperator &BO);
742   void visitBitCastInst(BitCastInst &BCI);
743   void visitCastInst(CastInst &CI);
744   void visitCmpInst(CmpInst &CI);
745   void visitLandingPadInst(LandingPadInst &LPI);
746   void visitGetElementPtrInst(GetElementPtrInst &GEPI);
747   void visitLoadInst(LoadInst &LI);
748   void visitStoreInst(StoreInst &SI);
749   void visitAtomicRMWInst(AtomicRMWInst &I);
750   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
751   void visitReturnInst(ReturnInst &RI);
752   void visitCallBase(CallBase &CB);
753   void visitPHINode(PHINode &PN);
754   void visitExtractElementInst(ExtractElementInst &I);
755   void visitInsertElementInst(InsertElementInst &I);
756   void visitShuffleVectorInst(ShuffleVectorInst &I);
757   void visitExtractValueInst(ExtractValueInst &I);
758   void visitInsertValueInst(InsertValueInst &I);
759   void visitAllocaInst(AllocaInst &I);
760   void visitSelectInst(SelectInst &I);
761   void visitMemSetInst(MemSetInst &I);
762   void visitMemTransferInst(MemTransferInst &I);
763   void visitBranchInst(BranchInst &BR);
764   void visitSwitchInst(SwitchInst &SW);
765 
766 private:
767   void visitCASOrRMW(Align InstAlignment, Instruction &I);
768 
769   // Returns false when this is an invoke of a custom function.
770   bool visitWrappedCallBase(Function &F, CallBase &CB);
771 
772   // Combines origins for all of I's operands.
773   void visitInstOperandOrigins(Instruction &I);
774 
775   void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
776                           IRBuilder<> &IRB);
777 
778   void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
779                           IRBuilder<> &IRB);
780 };
781 
782 } // end anonymous namespace
783 
784 DataFlowSanitizer::DataFlowSanitizer(
785     const std::vector<std::string> &ABIListFiles) {
786   std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
787   llvm::append_range(AllABIListFiles, ClABIListFiles);
788   // FIXME: should we propagate vfs::FileSystem to this constructor?
789   ABIList.set(
790       SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
791 }
792 
793 TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
794   SmallVector<Type *, 4> ArgTypes;
795 
796   // Some parameters of the custom function being constructed are
797   // parameters of T.  Record the mapping from parameters of T to
798   // parameters of the custom function, so that parameter attributes
799   // at call sites can be updated.
800   std::vector<unsigned> ArgumentIndexMapping;
801   for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
802     Type *ParamType = T->getParamType(I);
803     ArgumentIndexMapping.push_back(ArgTypes.size());
804     ArgTypes.push_back(ParamType);
805   }
806   for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
807     ArgTypes.push_back(PrimitiveShadowTy);
808   if (T->isVarArg())
809     ArgTypes.push_back(PrimitiveShadowPtrTy);
810   Type *RetType = T->getReturnType();
811   if (!RetType->isVoidTy())
812     ArgTypes.push_back(PrimitiveShadowPtrTy);
813 
814   if (shouldTrackOrigins()) {
815     for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
816       ArgTypes.push_back(OriginTy);
817     if (T->isVarArg())
818       ArgTypes.push_back(OriginPtrTy);
819     if (!RetType->isVoidTy())
820       ArgTypes.push_back(OriginPtrTy);
821   }
822 
823   return TransformedFunction(
824       T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
825       ArgumentIndexMapping);
826 }
827 
828 bool DataFlowSanitizer::isZeroShadow(Value *V) {
829   Type *T = V->getType();
830   if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
831     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
832       return CI->isZero();
833     return false;
834   }
835 
836   return isa<ConstantAggregateZero>(V);
837 }
838 
839 bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
840   uint64_t ShadowSize = Size * ShadowWidthBytes;
841   return ShadowSize % 8 == 0 || ShadowSize == 4;
842 }
843 
844 bool DataFlowSanitizer::shouldTrackOrigins() {
845   static const bool ShouldTrackOrigins = ClTrackOrigins;
846   return ShouldTrackOrigins;
847 }
848 
849 Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
850   if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
851     return ZeroPrimitiveShadow;
852   Type *ShadowTy = getShadowTy(OrigTy);
853   return ConstantAggregateZero::get(ShadowTy);
854 }
855 
856 Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
857   return getZeroShadow(V->getType());
858 }
859 
860 static Value *expandFromPrimitiveShadowRecursive(
861     Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
862     Value *PrimitiveShadow, IRBuilder<> &IRB) {
863   if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
864     return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
865 
866   if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
867     for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
868       Indices.push_back(Idx);
869       Shadow = expandFromPrimitiveShadowRecursive(
870           Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
871       Indices.pop_back();
872     }
873     return Shadow;
874   }
875 
876   if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
877     for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
878       Indices.push_back(Idx);
879       Shadow = expandFromPrimitiveShadowRecursive(
880           Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
881       Indices.pop_back();
882     }
883     return Shadow;
884   }
885   llvm_unreachable("Unexpected shadow type");
886 }
887 
888 bool DFSanFunction::shouldInstrumentWithCall() {
889   return ClInstrumentWithCallThreshold >= 0 &&
890          NumOriginStores >= ClInstrumentWithCallThreshold;
891 }
892 
893 Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
894                                                 Instruction *Pos) {
895   Type *ShadowTy = DFS.getShadowTy(T);
896 
897   if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
898     return PrimitiveShadow;
899 
900   if (DFS.isZeroShadow(PrimitiveShadow))
901     return DFS.getZeroShadow(ShadowTy);
902 
903   IRBuilder<> IRB(Pos);
904   SmallVector<unsigned, 4> Indices;
905   Value *Shadow = UndefValue::get(ShadowTy);
906   Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
907                                               PrimitiveShadow, IRB);
908 
909   // Caches the primitive shadow value that built the shadow value.
910   CachedCollapsedShadows[Shadow] = PrimitiveShadow;
911   return Shadow;
912 }
913 
914 template <class AggregateType>
915 Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
916                                               IRBuilder<> &IRB) {
917   if (!AT->getNumElements())
918     return DFS.ZeroPrimitiveShadow;
919 
920   Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
921   Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
922 
923   for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
924     Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
925     Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
926     Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
927   }
928   return Aggregator;
929 }
930 
931 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
932                                                 IRBuilder<> &IRB) {
933   Type *ShadowTy = Shadow->getType();
934   if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
935     return Shadow;
936   if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
937     return collapseAggregateShadow<>(AT, Shadow, IRB);
938   if (StructType *ST = dyn_cast<StructType>(ShadowTy))
939     return collapseAggregateShadow<>(ST, Shadow, IRB);
940   llvm_unreachable("Unexpected shadow type");
941 }
942 
943 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
944                                                 Instruction *Pos) {
945   Type *ShadowTy = Shadow->getType();
946   if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
947     return Shadow;
948 
949   // Checks if the cached collapsed shadow value dominates Pos.
950   Value *&CS = CachedCollapsedShadows[Shadow];
951   if (CS && DT.dominates(CS, Pos))
952     return CS;
953 
954   IRBuilder<> IRB(Pos);
955   Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
956   // Caches the converted primitive shadow value.
957   CS = PrimitiveShadow;
958   return PrimitiveShadow;
959 }
960 
961 void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
962                                                      Value *Condition) {
963   if (!ClConditionalCallbacks) {
964     return;
965   }
966   IRBuilder<> IRB(&I);
967   Value *CondShadow = getShadow(Condition);
968   if (DFS.shouldTrackOrigins()) {
969     Value *CondOrigin = getOrigin(Condition);
970     IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
971                    {CondShadow, CondOrigin});
972   } else {
973     IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
974   }
975 }
976 
977 Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
978   if (!OrigTy->isSized())
979     return PrimitiveShadowTy;
980   if (isa<IntegerType>(OrigTy))
981     return PrimitiveShadowTy;
982   if (isa<VectorType>(OrigTy))
983     return PrimitiveShadowTy;
984   if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
985     return ArrayType::get(getShadowTy(AT->getElementType()),
986                           AT->getNumElements());
987   if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
988     SmallVector<Type *, 4> Elements;
989     for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
990       Elements.push_back(getShadowTy(ST->getElementType(I)));
991     return StructType::get(*Ctx, Elements);
992   }
993   return PrimitiveShadowTy;
994 }
995 
996 Type *DataFlowSanitizer::getShadowTy(Value *V) {
997   return getShadowTy(V->getType());
998 }
999 
1000 bool DataFlowSanitizer::initializeModule(Module &M) {
1001   Triple TargetTriple(M.getTargetTriple());
1002   const DataLayout &DL = M.getDataLayout();
1003 
1004   if (TargetTriple.getOS() != Triple::Linux)
1005     report_fatal_error("unsupported operating system");
1006   if (TargetTriple.getArch() != Triple::x86_64)
1007     report_fatal_error("unsupported architecture");
1008   MapParams = &Linux_X86_64_MemoryMapParams;
1009 
1010   Mod = &M;
1011   Ctx = &M.getContext();
1012   Int8Ptr = Type::getInt8PtrTy(*Ctx);
1013   OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1014   OriginPtrTy = PointerType::getUnqual(OriginTy);
1015   PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1016   PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
1017   IntptrTy = DL.getIntPtrType(*Ctx);
1018   ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1019   ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1020 
1021   Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1022   DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1023                                          /*isVarArg=*/false);
1024   Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1025   DFSanLoadLabelAndOriginFnTy =
1026       FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1027                         /*isVarArg=*/false);
1028   DFSanUnimplementedFnTy = FunctionType::get(
1029       Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1030   Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1031                                 Type::getInt8PtrTy(*Ctx), IntptrTy};
1032   DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1033                                         DFSanSetLabelArgs, /*isVarArg=*/false);
1034   DFSanNonzeroLabelFnTy =
1035       FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
1036   DFSanVarargWrapperFnTy = FunctionType::get(
1037       Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1038   DFSanConditionalCallbackFnTy =
1039       FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1040                         /*isVarArg=*/false);
1041   Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
1042   DFSanConditionalCallbackOriginFnTy = FunctionType::get(
1043       Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
1044       /*isVarArg=*/false);
1045   DFSanCmpCallbackFnTy =
1046       FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1047                         /*isVarArg=*/false);
1048   DFSanChainOriginFnTy =
1049       FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1050   Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1051   DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1052       OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1053   Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1054                                         Int8Ptr, IntptrTy, OriginTy};
1055   DFSanMaybeStoreOriginFnTy = FunctionType::get(
1056       Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1057   Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1058   DFSanMemOriginTransferFnTy = FunctionType::get(
1059       Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1060   Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1061   DFSanLoadStoreCallbackFnTy =
1062       FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1063                         /*isVarArg=*/false);
1064   Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1065   DFSanMemTransferCallbackFnTy =
1066       FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1067                         /*isVarArg=*/false);
1068 
1069   ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1070   OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1071   return true;
1072 }
1073 
1074 bool DataFlowSanitizer::isInstrumented(const Function *F) {
1075   return !ABIList.isIn(*F, "uninstrumented");
1076 }
1077 
1078 bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1079   return !ABIList.isIn(*GA, "uninstrumented");
1080 }
1081 
1082 bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
1083   return ABIList.isIn(*F, "force_zero_labels");
1084 }
1085 
1086 DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1087   if (ABIList.isIn(*F, "functional"))
1088     return WK_Functional;
1089   if (ABIList.isIn(*F, "discard"))
1090     return WK_Discard;
1091   if (ABIList.isIn(*F, "custom"))
1092     return WK_Custom;
1093 
1094   return WK_Warning;
1095 }
1096 
1097 void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
1098   std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
1099   GV->setName(GVName + Suffix);
1100 
1101   // Try to change the name of the function in module inline asm.  We only do
1102   // this for specific asm directives, currently only ".symver", to try to avoid
1103   // corrupting asm which happens to contain the symbol name as a substring.
1104   // Note that the substitution for .symver assumes that the versioned symbol
1105   // also has an instrumented name.
1106   std::string Asm = GV->getParent()->getModuleInlineAsm();
1107   std::string SearchStr = ".symver " + GVName + ",";
1108   size_t Pos = Asm.find(SearchStr);
1109   if (Pos != std::string::npos) {
1110     Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
1111     Pos = Asm.find("@");
1112 
1113     if (Pos == std::string::npos)
1114       report_fatal_error(Twine("unsupported .symver: ", Asm));
1115 
1116     Asm.replace(Pos, 1, Suffix + "@");
1117     GV->getParent()->setModuleInlineAsm(Asm);
1118   }
1119 }
1120 
1121 Function *
1122 DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1123                                         GlobalValue::LinkageTypes NewFLink,
1124                                         FunctionType *NewFT) {
1125   FunctionType *FT = F->getFunctionType();
1126   Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1127                                     NewFName, F->getParent());
1128   NewF->copyAttributesFrom(F);
1129   NewF->removeRetAttrs(
1130       AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
1131 
1132   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1133   if (F->isVarArg()) {
1134     NewF->removeFnAttr("split-stack");
1135     CallInst::Create(DFSanVarargWrapperFn,
1136                      IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
1137                      BB);
1138     new UnreachableInst(*Ctx, BB);
1139   } else {
1140     auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1141     std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1142 
1143     CallInst *CI = CallInst::Create(F, Args, "", BB);
1144     if (FT->getReturnType()->isVoidTy())
1145       ReturnInst::Create(*Ctx, BB);
1146     else
1147       ReturnInst::Create(*Ctx, CI, BB);
1148   }
1149 
1150   return NewF;
1151 }
1152 
1153 // Initialize DataFlowSanitizer runtime functions and declare them in the module
1154 void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1155   {
1156     AttributeList AL;
1157     AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
1158     AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
1159     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1160     DFSanUnionLoadFn =
1161         Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1162   }
1163   {
1164     AttributeList AL;
1165     AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
1166     AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
1167     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1168     DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1169         "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1170   }
1171   DFSanUnimplementedFn =
1172       Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1173   {
1174     AttributeList AL;
1175     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1176     AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1177     DFSanSetLabelFn =
1178         Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1179   }
1180   DFSanNonzeroLabelFn =
1181       Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1182   DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1183                                                   DFSanVarargWrapperFnTy);
1184   {
1185     AttributeList AL;
1186     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1187     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1188     DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1189                                                   DFSanChainOriginFnTy, AL);
1190   }
1191   {
1192     AttributeList AL;
1193     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1194     AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1195     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1196     DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1197         "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1198   }
1199   DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1200       "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1201 
1202   {
1203     AttributeList AL;
1204     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1205     AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1206     DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1207         "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1208   }
1209 
1210   DFSanRuntimeFunctions.insert(
1211       DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1212   DFSanRuntimeFunctions.insert(
1213       DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1214   DFSanRuntimeFunctions.insert(
1215       DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1216   DFSanRuntimeFunctions.insert(
1217       DFSanSetLabelFn.getCallee()->stripPointerCasts());
1218   DFSanRuntimeFunctions.insert(
1219       DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1220   DFSanRuntimeFunctions.insert(
1221       DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1222   DFSanRuntimeFunctions.insert(
1223       DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1224   DFSanRuntimeFunctions.insert(
1225       DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1226   DFSanRuntimeFunctions.insert(
1227       DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1228   DFSanRuntimeFunctions.insert(
1229       DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
1230   DFSanRuntimeFunctions.insert(
1231       DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
1232   DFSanRuntimeFunctions.insert(
1233       DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1234   DFSanRuntimeFunctions.insert(
1235       DFSanChainOriginFn.getCallee()->stripPointerCasts());
1236   DFSanRuntimeFunctions.insert(
1237       DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1238   DFSanRuntimeFunctions.insert(
1239       DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1240   DFSanRuntimeFunctions.insert(
1241       DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1242 }
1243 
1244 // Initializes event callback functions and declare them in the module
1245 void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1246   DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
1247                                                  DFSanLoadStoreCallbackFnTy);
1248   DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback",
1249                                                   DFSanLoadStoreCallbackFnTy);
1250   DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1251       "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1252   DFSanCmpCallbackFn =
1253       Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
1254 
1255   DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
1256       "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy);
1257   DFSanConditionalCallbackOriginFn =
1258       Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
1259                                DFSanConditionalCallbackOriginFnTy);
1260 }
1261 
1262 void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
1263   // These variables can be used:
1264   // - by the runtime (to discover what the shadow width was, during
1265   //   compilation)
1266   // - in testing (to avoid hardcoding the shadow width and type but instead
1267   //   extract them by pattern matching)
1268   Type *IntTy = Type::getInt32Ty(*Ctx);
1269   (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bits", IntTy, [&] {
1270     return new GlobalVariable(
1271         M, IntTy, /*isConstant=*/true, GlobalValue::WeakODRLinkage,
1272         ConstantInt::get(IntTy, ShadowWidthBits), "__dfsan_shadow_width_bits");
1273   });
1274   (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bytes", IntTy, [&] {
1275     return new GlobalVariable(M, IntTy, /*isConstant=*/true,
1276                               GlobalValue::WeakODRLinkage,
1277                               ConstantInt::get(IntTy, ShadowWidthBytes),
1278                               "__dfsan_shadow_width_bytes");
1279   });
1280 }
1281 
1282 bool DataFlowSanitizer::runImpl(Module &M) {
1283   initializeModule(M);
1284 
1285   if (ABIList.isIn(M, "skip"))
1286     return false;
1287 
1288   const unsigned InitialGlobalSize = M.global_size();
1289   const unsigned InitialModuleSize = M.size();
1290 
1291   bool Changed = false;
1292 
1293   auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1294                                             Type *Ty) -> Constant * {
1295     Constant *C = Mod->getOrInsertGlobal(Name, Ty);
1296     if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
1297       Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1298       G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1299     }
1300     return C;
1301   };
1302 
1303   // These globals must be kept in sync with the ones in dfsan.cpp.
1304   ArgTLS =
1305       GetOrInsertGlobal("__dfsan_arg_tls",
1306                         ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1307   RetvalTLS = GetOrInsertGlobal(
1308       "__dfsan_retval_tls",
1309       ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1310   ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1311   ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1312   RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1313 
1314   (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1315     Changed = true;
1316     return new GlobalVariable(
1317         M, OriginTy, true, GlobalValue::WeakODRLinkage,
1318         ConstantInt::getSigned(OriginTy,
1319                                shouldTrackOrigins() ? ClTrackOrigins : 0),
1320         "__dfsan_track_origins");
1321   });
1322 
1323   injectMetadataGlobals(M);
1324 
1325   initializeCallbackFunctions(M);
1326   initializeRuntimeFunctions(M);
1327 
1328   std::vector<Function *> FnsToInstrument;
1329   SmallPtrSet<Function *, 2> FnsWithNativeABI;
1330   SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
1331   SmallPtrSet<Constant *, 1> PersonalityFns;
1332   for (Function &F : M)
1333     if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F)) {
1334       FnsToInstrument.push_back(&F);
1335       if (F.hasPersonalityFn())
1336         PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
1337     }
1338 
1339   if (ClIgnorePersonalityRoutine) {
1340     for (auto *C : PersonalityFns) {
1341       assert(isa<Function>(C) && "Personality routine is not a function!");
1342       Function *F = cast<Function>(C);
1343       if (!isInstrumented(F))
1344         FnsToInstrument.erase(
1345             std::remove(FnsToInstrument.begin(), FnsToInstrument.end(), F),
1346             FnsToInstrument.end());
1347     }
1348   }
1349 
1350   // Give function aliases prefixes when necessary, and build wrappers where the
1351   // instrumentedness is inconsistent.
1352   for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
1353     // Don't stop on weak.  We assume people aren't playing games with the
1354     // instrumentedness of overridden weak aliases.
1355     auto *F = dyn_cast<Function>(GA.getAliaseeObject());
1356     if (!F)
1357       continue;
1358 
1359     bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
1360     if (GAInst && FInst) {
1361       addGlobalNameSuffix(&GA);
1362     } else if (GAInst != FInst) {
1363       // Non-instrumented alias of an instrumented function, or vice versa.
1364       // Replace the alias with a native-ABI wrapper of the aliasee.  The pass
1365       // below will take care of instrumenting it.
1366       Function *NewF =
1367           buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
1368       GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType()));
1369       NewF->takeName(&GA);
1370       GA.eraseFromParent();
1371       FnsToInstrument.push_back(NewF);
1372     }
1373   }
1374 
1375   ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
1376       .addAttribute(Attribute::ReadNone);
1377 
1378   // First, change the ABI of every function in the module.  ABI-listed
1379   // functions keep their original ABI and get a wrapper function.
1380   for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1381                                          FE = FnsToInstrument.end();
1382        FI != FE; ++FI) {
1383     Function &F = **FI;
1384     FunctionType *FT = F.getFunctionType();
1385 
1386     bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1387                               FT->getReturnType()->isVoidTy());
1388 
1389     if (isInstrumented(&F)) {
1390       if (isForceZeroLabels(&F))
1391         FnsWithForceZeroLabel.insert(&F);
1392 
1393       // Instrumented functions get a '.dfsan' suffix.  This allows us to more
1394       // easily identify cases of mismatching ABIs. This naming scheme is
1395       // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1396       addGlobalNameSuffix(&F);
1397     } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1398       // Build a wrapper function for F.  The wrapper simply calls F, and is
1399       // added to FnsToInstrument so that any instrumentation according to its
1400       // WrapperKind is done in the second pass below.
1401 
1402       // If the function being wrapped has local linkage, then preserve the
1403       // function's linkage in the wrapper function.
1404       GlobalValue::LinkageTypes WrapperLinkage =
1405           F.hasLocalLinkage() ? F.getLinkage()
1406                               : GlobalValue::LinkOnceODRLinkage;
1407 
1408       Function *NewF = buildWrapperFunction(
1409           &F,
1410           (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1411               std::string(F.getName()),
1412           WrapperLinkage, FT);
1413       NewF->removeFnAttrs(ReadOnlyNoneAttrs);
1414 
1415       Value *WrappedFnCst =
1416           ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
1417       F.replaceAllUsesWith(WrappedFnCst);
1418 
1419       UnwrappedFnMap[WrappedFnCst] = &F;
1420       *FI = NewF;
1421 
1422       if (!F.isDeclaration()) {
1423         // This function is probably defining an interposition of an
1424         // uninstrumented function and hence needs to keep the original ABI.
1425         // But any functions it may call need to use the instrumented ABI, so
1426         // we instrument it in a mode which preserves the original ABI.
1427         FnsWithNativeABI.insert(&F);
1428 
1429         // This code needs to rebuild the iterators, as they may be invalidated
1430         // by the push_back, taking care that the new range does not include
1431         // any functions added by this code.
1432         size_t N = FI - FnsToInstrument.begin(),
1433                Count = FE - FnsToInstrument.begin();
1434         FnsToInstrument.push_back(&F);
1435         FI = FnsToInstrument.begin() + N;
1436         FE = FnsToInstrument.begin() + Count;
1437       }
1438       // Hopefully, nobody will try to indirectly call a vararg
1439       // function... yet.
1440     } else if (FT->isVarArg()) {
1441       UnwrappedFnMap[&F] = &F;
1442       *FI = nullptr;
1443     }
1444   }
1445 
1446   for (Function *F : FnsToInstrument) {
1447     if (!F || F->isDeclaration())
1448       continue;
1449 
1450     removeUnreachableBlocks(*F);
1451 
1452     DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
1453                        FnsWithForceZeroLabel.count(F));
1454 
1455     // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1456     // Build a copy of the list before iterating over it.
1457     SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1458 
1459     for (BasicBlock *BB : BBList) {
1460       Instruction *Inst = &BB->front();
1461       while (true) {
1462         // DFSanVisitor may split the current basic block, changing the current
1463         // instruction's next pointer and moving the next instruction to the
1464         // tail block from which we should continue.
1465         Instruction *Next = Inst->getNextNode();
1466         // DFSanVisitor may delete Inst, so keep track of whether it was a
1467         // terminator.
1468         bool IsTerminator = Inst->isTerminator();
1469         if (!DFSF.SkipInsts.count(Inst))
1470           DFSanVisitor(DFSF).visit(Inst);
1471         if (IsTerminator)
1472           break;
1473         Inst = Next;
1474       }
1475     }
1476 
1477     // We will not necessarily be able to compute the shadow for every phi node
1478     // until we have visited every block.  Therefore, the code that handles phi
1479     // nodes adds them to the PHIFixups list so that they can be properly
1480     // handled here.
1481     for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1482       for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1483            ++Val) {
1484         P.ShadowPhi->setIncomingValue(
1485             Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1486         if (P.OriginPhi)
1487           P.OriginPhi->setIncomingValue(
1488               Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1489       }
1490     }
1491 
1492     // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1493     // places (i.e. instructions in basic blocks we haven't even begun visiting
1494     // yet).  To make our life easier, do this work in a pass after the main
1495     // instrumentation.
1496     if (ClDebugNonzeroLabels) {
1497       for (Value *V : DFSF.NonZeroChecks) {
1498         Instruction *Pos;
1499         if (Instruction *I = dyn_cast<Instruction>(V))
1500           Pos = I->getNextNode();
1501         else
1502           Pos = &DFSF.F->getEntryBlock().front();
1503         while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1504           Pos = Pos->getNextNode();
1505         IRBuilder<> IRB(Pos);
1506         Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1507         Value *Ne =
1508             IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1509         BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1510             Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1511         IRBuilder<> ThenIRB(BI);
1512         ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1513       }
1514     }
1515   }
1516 
1517   return Changed || !FnsToInstrument.empty() ||
1518          M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1519 }
1520 
1521 Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1522   Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
1523   if (ArgOffset)
1524     Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
1525   return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
1526                             "_dfsarg");
1527 }
1528 
1529 Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1530   return IRB.CreatePointerCast(
1531       DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
1532 }
1533 
1534 Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1535 
1536 Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1537   return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo,
1538                                 "_dfsarg_o");
1539 }
1540 
1541 Value *DFSanFunction::getOrigin(Value *V) {
1542   assert(DFS.shouldTrackOrigins());
1543   if (!isa<Argument>(V) && !isa<Instruction>(V))
1544     return DFS.ZeroOrigin;
1545   Value *&Origin = ValOriginMap[V];
1546   if (!Origin) {
1547     if (Argument *A = dyn_cast<Argument>(V)) {
1548       if (IsNativeABI)
1549         return DFS.ZeroOrigin;
1550       if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1551         Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1552         IRBuilder<> IRB(ArgOriginTLSPos);
1553         Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1554         Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1555       } else {
1556         // Overflow
1557         Origin = DFS.ZeroOrigin;
1558       }
1559     } else {
1560       Origin = DFS.ZeroOrigin;
1561     }
1562   }
1563   return Origin;
1564 }
1565 
1566 void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1567   if (!DFS.shouldTrackOrigins())
1568     return;
1569   assert(!ValOriginMap.count(I));
1570   assert(Origin->getType() == DFS.OriginTy);
1571   ValOriginMap[I] = Origin;
1572 }
1573 
1574 Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1575   unsigned ArgOffset = 0;
1576   const DataLayout &DL = F->getParent()->getDataLayout();
1577   for (auto &FArg : F->args()) {
1578     if (!FArg.getType()->isSized()) {
1579       if (A == &FArg)
1580         break;
1581       continue;
1582     }
1583 
1584     unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1585     if (A != &FArg) {
1586       ArgOffset += alignTo(Size, ShadowTLSAlignment);
1587       if (ArgOffset > ArgTLSSize)
1588         break; // ArgTLS overflows, uses a zero shadow.
1589       continue;
1590     }
1591 
1592     if (ArgOffset + Size > ArgTLSSize)
1593       break; // ArgTLS overflows, uses a zero shadow.
1594 
1595     Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1596     IRBuilder<> IRB(ArgTLSPos);
1597     Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1598     return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1599                                  ShadowTLSAlignment);
1600   }
1601 
1602   return DFS.getZeroShadow(A);
1603 }
1604 
1605 Value *DFSanFunction::getShadow(Value *V) {
1606   if (!isa<Argument>(V) && !isa<Instruction>(V))
1607     return DFS.getZeroShadow(V);
1608   if (IsForceZeroLabels)
1609     return DFS.getZeroShadow(V);
1610   Value *&Shadow = ValShadowMap[V];
1611   if (!Shadow) {
1612     if (Argument *A = dyn_cast<Argument>(V)) {
1613       if (IsNativeABI)
1614         return DFS.getZeroShadow(V);
1615       Shadow = getShadowForTLSArgument(A);
1616       NonZeroChecks.push_back(Shadow);
1617     } else {
1618       Shadow = DFS.getZeroShadow(V);
1619     }
1620   }
1621   return Shadow;
1622 }
1623 
1624 void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1625   assert(!ValShadowMap.count(I));
1626   ValShadowMap[I] = Shadow;
1627 }
1628 
1629 /// Compute the integer shadow offset that corresponds to a given
1630 /// application address.
1631 ///
1632 /// Offset = (Addr & ~AndMask) ^ XorMask
1633 Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1634   assert(Addr != RetvalTLS && "Reinstrumenting?");
1635   Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1636 
1637   uint64_t AndMask = MapParams->AndMask;
1638   if (AndMask)
1639     OffsetLong =
1640         IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
1641 
1642   uint64_t XorMask = MapParams->XorMask;
1643   if (XorMask)
1644     OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
1645   return OffsetLong;
1646 }
1647 
1648 std::pair<Value *, Value *>
1649 DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1650                                           Instruction *Pos) {
1651   // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1652   IRBuilder<> IRB(Pos);
1653   Value *ShadowOffset = getShadowOffset(Addr, IRB);
1654   Value *ShadowLong = ShadowOffset;
1655   uint64_t ShadowBase = MapParams->ShadowBase;
1656   if (ShadowBase != 0) {
1657     ShadowLong =
1658         IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
1659   }
1660   IntegerType *ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1661   Value *ShadowPtr =
1662       IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1663   Value *OriginPtr = nullptr;
1664   if (shouldTrackOrigins()) {
1665     Value *OriginLong = ShadowOffset;
1666     uint64_t OriginBase = MapParams->OriginBase;
1667     if (OriginBase != 0)
1668       OriginLong =
1669           IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
1670     const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1671     // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1672     // So Mask is unnecessary.
1673     if (Alignment < MinOriginAlignment) {
1674       uint64_t Mask = MinOriginAlignment.value() - 1;
1675       OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1676     }
1677     OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1678   }
1679   return std::make_pair(ShadowPtr, OriginPtr);
1680 }
1681 
1682 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos,
1683                                            Value *ShadowOffset) {
1684   IRBuilder<> IRB(Pos);
1685   return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1686 }
1687 
1688 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
1689   IRBuilder<> IRB(Pos);
1690   Value *ShadowOffset = getShadowOffset(Addr, IRB);
1691   return getShadowAddress(Addr, Pos, ShadowOffset);
1692 }
1693 
1694 Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1695                                                 Instruction *Pos) {
1696   Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1697   return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1698 }
1699 
1700 // Generates IR to compute the union of the two given shadows, inserting it
1701 // before Pos. The combined value is with primitive type.
1702 Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
1703   if (DFS.isZeroShadow(V1))
1704     return collapseToPrimitiveShadow(V2, Pos);
1705   if (DFS.isZeroShadow(V2))
1706     return collapseToPrimitiveShadow(V1, Pos);
1707   if (V1 == V2)
1708     return collapseToPrimitiveShadow(V1, Pos);
1709 
1710   auto V1Elems = ShadowElements.find(V1);
1711   auto V2Elems = ShadowElements.find(V2);
1712   if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1713     if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1714                       V2Elems->second.begin(), V2Elems->second.end())) {
1715       return collapseToPrimitiveShadow(V1, Pos);
1716     }
1717     if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1718                       V1Elems->second.begin(), V1Elems->second.end())) {
1719       return collapseToPrimitiveShadow(V2, Pos);
1720     }
1721   } else if (V1Elems != ShadowElements.end()) {
1722     if (V1Elems->second.count(V2))
1723       return collapseToPrimitiveShadow(V1, Pos);
1724   } else if (V2Elems != ShadowElements.end()) {
1725     if (V2Elems->second.count(V1))
1726       return collapseToPrimitiveShadow(V2, Pos);
1727   }
1728 
1729   auto Key = std::make_pair(V1, V2);
1730   if (V1 > V2)
1731     std::swap(Key.first, Key.second);
1732   CachedShadow &CCS = CachedShadows[Key];
1733   if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
1734     return CCS.Shadow;
1735 
1736   // Converts inputs shadows to shadows with primitive types.
1737   Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
1738   Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
1739 
1740   IRBuilder<> IRB(Pos);
1741   CCS.Block = Pos->getParent();
1742   CCS.Shadow = IRB.CreateOr(PV1, PV2);
1743 
1744   std::set<Value *> UnionElems;
1745   if (V1Elems != ShadowElements.end()) {
1746     UnionElems = V1Elems->second;
1747   } else {
1748     UnionElems.insert(V1);
1749   }
1750   if (V2Elems != ShadowElements.end()) {
1751     UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
1752   } else {
1753     UnionElems.insert(V2);
1754   }
1755   ShadowElements[CCS.Shadow] = std::move(UnionElems);
1756 
1757   return CCS.Shadow;
1758 }
1759 
1760 // A convenience function which folds the shadows of each of the operands
1761 // of the provided instruction Inst, inserting the IR before Inst.  Returns
1762 // the computed union Value.
1763 Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
1764   if (Inst->getNumOperands() == 0)
1765     return DFS.getZeroShadow(Inst);
1766 
1767   Value *Shadow = getShadow(Inst->getOperand(0));
1768   for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
1769     Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)), Inst);
1770 
1771   return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
1772 }
1773 
1774 void DFSanVisitor::visitInstOperands(Instruction &I) {
1775   Value *CombinedShadow = DFSF.combineOperandShadows(&I);
1776   DFSF.setShadow(&I, CombinedShadow);
1777   visitInstOperandOrigins(I);
1778 }
1779 
1780 Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
1781                                      const std::vector<Value *> &Origins,
1782                                      Instruction *Pos, ConstantInt *Zero) {
1783   assert(Shadows.size() == Origins.size());
1784   size_t Size = Origins.size();
1785   if (Size == 0)
1786     return DFS.ZeroOrigin;
1787   Value *Origin = nullptr;
1788   if (!Zero)
1789     Zero = DFS.ZeroPrimitiveShadow;
1790   for (size_t I = 0; I != Size; ++I) {
1791     Value *OpOrigin = Origins[I];
1792     Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
1793     if (ConstOpOrigin && ConstOpOrigin->isNullValue())
1794       continue;
1795     if (!Origin) {
1796       Origin = OpOrigin;
1797       continue;
1798     }
1799     Value *OpShadow = Shadows[I];
1800     Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
1801     IRBuilder<> IRB(Pos);
1802     Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
1803     Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
1804   }
1805   return Origin ? Origin : DFS.ZeroOrigin;
1806 }
1807 
1808 Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
1809   size_t Size = Inst->getNumOperands();
1810   std::vector<Value *> Shadows(Size);
1811   std::vector<Value *> Origins(Size);
1812   for (unsigned I = 0; I != Size; ++I) {
1813     Shadows[I] = getShadow(Inst->getOperand(I));
1814     Origins[I] = getOrigin(Inst->getOperand(I));
1815   }
1816   return combineOrigins(Shadows, Origins, Inst);
1817 }
1818 
1819 void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
1820   if (!DFSF.DFS.shouldTrackOrigins())
1821     return;
1822   Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
1823   DFSF.setOrigin(&I, CombinedOrigin);
1824 }
1825 
1826 Align DFSanFunction::getShadowAlign(Align InstAlignment) {
1827   const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
1828   return Align(Alignment.value() * DFS.ShadowWidthBytes);
1829 }
1830 
1831 Align DFSanFunction::getOriginAlign(Align InstAlignment) {
1832   const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1833   return Align(std::max(MinOriginAlignment, Alignment));
1834 }
1835 
1836 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
1837                                                   Align InstAlignment) {
1838   // When enabling tracking load instructions, we always use
1839   // __dfsan_load_label_and_origin to reduce code size.
1840   if (ClTrackOrigins == 2)
1841     return true;
1842 
1843   assert(Size != 0);
1844   // * if Size == 1, it is sufficient to load its origin aligned at 4.
1845   // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
1846   //   load its origin aligned at 4. If not, although origins may be lost, it
1847   //   should not happen very often.
1848   // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
1849   //   Size % 4 == 0, it is more efficient to load origins without callbacks.
1850   // * Otherwise we use __dfsan_load_label_and_origin.
1851   // This should ensure that common cases run efficiently.
1852   if (Size <= 2)
1853     return false;
1854 
1855   const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1856   return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
1857 }
1858 
1859 Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign,
1860                                          Value **OriginAddr) {
1861   IRBuilder<> IRB(Pos);
1862   *OriginAddr =
1863       IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
1864   return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
1865 }
1866 
1867 std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
1868     Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
1869     Align OriginAlign, Value *FirstOrigin, Instruction *Pos) {
1870   const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
1871   const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
1872 
1873   assert(Size >= 4 && "Not large enough load size for fast path!");
1874 
1875   // Used for origin tracking.
1876   std::vector<Value *> Shadows;
1877   std::vector<Value *> Origins;
1878 
1879   // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
1880   // but this function is only used in a subset of cases that make it possible
1881   // to optimize the instrumentation.
1882   //
1883   // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
1884   // per byte) is either:
1885   // - a multiple of 8  (common)
1886   // - equal to 4       (only for load32)
1887   //
1888   // For the second case, we can fit the wide shadow in a 32-bit integer. In all
1889   // other cases, we use a 64-bit integer to hold the wide shadow.
1890   Type *WideShadowTy =
1891       ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
1892 
1893   IRBuilder<> IRB(Pos);
1894   Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo());
1895   Value *CombinedWideShadow =
1896       IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
1897 
1898   unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
1899   const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
1900 
1901   auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
1902     if (BytesPerWideShadow > 4) {
1903       assert(BytesPerWideShadow == 8);
1904       // The wide shadow relates to two origin pointers: one for the first four
1905       // application bytes, and one for the latest four. We use a left shift to
1906       // get just the shadow bytes that correspond to the first origin pointer,
1907       // and then the entire shadow for the second origin pointer (which will be
1908       // chosen by combineOrigins() iff the least-significant half of the wide
1909       // shadow was empty but the other half was not).
1910       Value *WideShadowLo = IRB.CreateShl(
1911           WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
1912       Shadows.push_back(WideShadow);
1913       Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
1914 
1915       Shadows.push_back(WideShadowLo);
1916       Origins.push_back(Origin);
1917     } else {
1918       Shadows.push_back(WideShadow);
1919       Origins.push_back(Origin);
1920     }
1921   };
1922 
1923   if (ShouldTrackOrigins)
1924     AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
1925 
1926   // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
1927   // then OR individual shadows within the combined WideShadow by binary ORing.
1928   // This is fewer instructions than ORing shadows individually, since it
1929   // needs logN shift/or instructions (N being the bytes of the combined wide
1930   // shadow).
1931   for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
1932        ByteOfs += BytesPerWideShadow) {
1933     WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr,
1934                              ConstantInt::get(DFS.IntptrTy, 1));
1935     Value *NextWideShadow =
1936         IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
1937     CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
1938     if (ShouldTrackOrigins) {
1939       Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
1940       AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
1941     }
1942   }
1943   for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
1944        Width >>= 1) {
1945     Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
1946     CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
1947   }
1948   return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
1949           ShouldTrackOrigins
1950               ? combineOrigins(Shadows, Origins, Pos,
1951                                ConstantInt::getSigned(IRB.getInt64Ty(), 0))
1952               : DFS.ZeroOrigin};
1953 }
1954 
1955 std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
1956     Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
1957   const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
1958 
1959   // Non-escaped loads.
1960   if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
1961     const auto SI = AllocaShadowMap.find(AI);
1962     if (SI != AllocaShadowMap.end()) {
1963       IRBuilder<> IRB(Pos);
1964       Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
1965       const auto OI = AllocaOriginMap.find(AI);
1966       assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
1967       return {ShadowLI, ShouldTrackOrigins
1968                             ? IRB.CreateLoad(DFS.OriginTy, OI->second)
1969                             : nullptr};
1970     }
1971   }
1972 
1973   // Load from constant addresses.
1974   SmallVector<const Value *, 2> Objs;
1975   getUnderlyingObjects(Addr, Objs);
1976   bool AllConstants = true;
1977   for (const Value *Obj : Objs) {
1978     if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
1979       continue;
1980     if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
1981       continue;
1982 
1983     AllConstants = false;
1984     break;
1985   }
1986   if (AllConstants)
1987     return {DFS.ZeroPrimitiveShadow,
1988             ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
1989 
1990   if (Size == 0)
1991     return {DFS.ZeroPrimitiveShadow,
1992             ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
1993 
1994   // Use callback to load if this is not an optimizable case for origin
1995   // tracking.
1996   if (ShouldTrackOrigins &&
1997       useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
1998     IRBuilder<> IRB(Pos);
1999     CallInst *Call =
2000         IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2001                        {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2002                         ConstantInt::get(DFS.IntptrTy, Size)});
2003     Call->addRetAttr(Attribute::ZExt);
2004     return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2005                             DFS.PrimitiveShadowTy),
2006             IRB.CreateTrunc(Call, DFS.OriginTy)};
2007   }
2008 
2009   // Other cases that support loading shadows or origins in a fast way.
2010   Value *ShadowAddr, *OriginAddr;
2011   std::tie(ShadowAddr, OriginAddr) =
2012       DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2013 
2014   const Align ShadowAlign = getShadowAlign(InstAlignment);
2015   const Align OriginAlign = getOriginAlign(InstAlignment);
2016   Value *Origin = nullptr;
2017   if (ShouldTrackOrigins) {
2018     IRBuilder<> IRB(Pos);
2019     Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2020   }
2021 
2022   // When the byte size is small enough, we can load the shadow directly with
2023   // just a few instructions.
2024   switch (Size) {
2025   case 1: {
2026     LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2027     LI->setAlignment(ShadowAlign);
2028     return {LI, Origin};
2029   }
2030   case 2: {
2031     IRBuilder<> IRB(Pos);
2032     Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2033                                        ConstantInt::get(DFS.IntptrTy, 1));
2034     Value *Load =
2035         IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2036     Value *Load1 =
2037         IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2038     return {combineShadows(Load, Load1, Pos), Origin};
2039   }
2040   }
2041   bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2042 
2043   if (HasSizeForFastPath)
2044     return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2045                           OriginAlign, Origin, Pos);
2046 
2047   IRBuilder<> IRB(Pos);
2048   CallInst *FallbackCall = IRB.CreateCall(
2049       DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2050   FallbackCall->addRetAttr(Attribute::ZExt);
2051   return {FallbackCall, Origin};
2052 }
2053 
2054 std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
2055                                                             uint64_t Size,
2056                                                             Align InstAlignment,
2057                                                             Instruction *Pos) {
2058   Value *PrimitiveShadow, *Origin;
2059   std::tie(PrimitiveShadow, Origin) =
2060       loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2061   if (DFS.shouldTrackOrigins()) {
2062     if (ClTrackOrigins == 2) {
2063       IRBuilder<> IRB(Pos);
2064       auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2065       if (!ConstantShadow || !ConstantShadow->isZeroValue())
2066         Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2067     }
2068   }
2069   return {PrimitiveShadow, Origin};
2070 }
2071 
2072 static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
2073   switch (AO) {
2074   case AtomicOrdering::NotAtomic:
2075     return AtomicOrdering::NotAtomic;
2076   case AtomicOrdering::Unordered:
2077   case AtomicOrdering::Monotonic:
2078   case AtomicOrdering::Acquire:
2079     return AtomicOrdering::Acquire;
2080   case AtomicOrdering::Release:
2081   case AtomicOrdering::AcquireRelease:
2082     return AtomicOrdering::AcquireRelease;
2083   case AtomicOrdering::SequentiallyConsistent:
2084     return AtomicOrdering::SequentiallyConsistent;
2085   }
2086   llvm_unreachable("Unknown ordering");
2087 }
2088 
2089 void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2090   auto &DL = LI.getModule()->getDataLayout();
2091   uint64_t Size = DL.getTypeStoreSize(LI.getType());
2092   if (Size == 0) {
2093     DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2094     DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2095     return;
2096   }
2097 
2098   // When an application load is atomic, increase atomic ordering between
2099   // atomic application loads and stores to ensure happen-before order; load
2100   // shadow data after application data; store zero shadow data before
2101   // application data. This ensure shadow loads return either labels of the
2102   // initial application data or zeros.
2103   if (LI.isAtomic())
2104     LI.setOrdering(addAcquireOrdering(LI.getOrdering()));
2105 
2106   Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
2107   std::vector<Value *> Shadows;
2108   std::vector<Value *> Origins;
2109   Value *PrimitiveShadow, *Origin;
2110   std::tie(PrimitiveShadow, Origin) =
2111       DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2112   const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2113   if (ShouldTrackOrigins) {
2114     Shadows.push_back(PrimitiveShadow);
2115     Origins.push_back(Origin);
2116   }
2117   if (ClCombinePointerLabelsOnLoad) {
2118     Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2119     PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2120     if (ShouldTrackOrigins) {
2121       Shadows.push_back(PtrShadow);
2122       Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2123     }
2124   }
2125   if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2126     DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2127 
2128   Value *Shadow =
2129       DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2130   DFSF.setShadow(&LI, Shadow);
2131 
2132   if (ShouldTrackOrigins) {
2133     DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2134   }
2135 
2136   if (ClEventCallbacks) {
2137     IRBuilder<> IRB(Pos);
2138     Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2139     IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
2140   }
2141 }
2142 
2143 Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2144                                             IRBuilder<> &IRB) {
2145   assert(DFS.shouldTrackOrigins());
2146   return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2147 }
2148 
2149 Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2150   if (!DFS.shouldTrackOrigins())
2151     return V;
2152   return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2153 }
2154 
2155 Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2156   const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2157   const DataLayout &DL = F->getParent()->getDataLayout();
2158   unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2159   if (IntptrSize == OriginSize)
2160     return Origin;
2161   assert(IntptrSize == OriginSize * 2);
2162   Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2163   return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2164 }
2165 
2166 void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2167                                 Value *StoreOriginAddr,
2168                                 uint64_t StoreOriginSize, Align Alignment) {
2169   const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2170   const DataLayout &DL = F->getParent()->getDataLayout();
2171   const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2172   unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2173   assert(IntptrAlignment >= MinOriginAlignment);
2174   assert(IntptrSize >= OriginSize);
2175 
2176   unsigned Ofs = 0;
2177   Align CurrentAlignment = Alignment;
2178   if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2179     Value *IntptrOrigin = originToIntptr(IRB, Origin);
2180     Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
2181         StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
2182     for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2183       Value *Ptr =
2184           I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2185             : IntptrStoreOriginPtr;
2186       IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2187       Ofs += IntptrSize / OriginSize;
2188       CurrentAlignment = IntptrAlignment;
2189     }
2190   }
2191 
2192   for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2193        ++I) {
2194     Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2195                    : StoreOriginAddr;
2196     IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2197     CurrentAlignment = MinOriginAlignment;
2198   }
2199 }
2200 
2201 Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2202                                     const Twine &Name) {
2203   Type *VTy = V->getType();
2204   assert(VTy->isIntegerTy());
2205   if (VTy->getIntegerBitWidth() == 1)
2206     // Just converting a bool to a bool, so do nothing.
2207     return V;
2208   return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2209 }
2210 
2211 void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
2212                                 Value *Shadow, Value *Origin,
2213                                 Value *StoreOriginAddr, Align InstAlignment) {
2214   // Do not write origins for zero shadows because we do not trace origins for
2215   // untainted sinks.
2216   const Align OriginAlignment = getOriginAlign(InstAlignment);
2217   Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2218   IRBuilder<> IRB(Pos);
2219   if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2220     if (!ConstantShadow->isZeroValue())
2221       paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2222                   OriginAlignment);
2223     return;
2224   }
2225 
2226   if (shouldInstrumentWithCall()) {
2227     IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn,
2228                    {CollapsedShadow,
2229                     IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2230                     ConstantInt::get(DFS.IntptrTy, Size), Origin});
2231   } else {
2232     Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2233     Instruction *CheckTerm = SplitBlockAndInsertIfThen(
2234         Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT);
2235     IRBuilder<> IRBNew(CheckTerm);
2236     paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2237                 OriginAlignment);
2238     ++NumOriginStores;
2239   }
2240 }
2241 
2242 void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2243                                              Align ShadowAlign,
2244                                              Instruction *Pos) {
2245   IRBuilder<> IRB(Pos);
2246   IntegerType *ShadowTy =
2247       IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2248   Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2249   Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2250   Value *ExtShadowAddr =
2251       IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
2252   IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
2253   // Do not write origins for 0 shadows because we do not trace origins for
2254   // untainted sinks.
2255 }
2256 
2257 void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2258                                                Align InstAlignment,
2259                                                Value *PrimitiveShadow,
2260                                                Value *Origin,
2261                                                Instruction *Pos) {
2262   const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2263 
2264   if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2265     const auto SI = AllocaShadowMap.find(AI);
2266     if (SI != AllocaShadowMap.end()) {
2267       IRBuilder<> IRB(Pos);
2268       IRB.CreateStore(PrimitiveShadow, SI->second);
2269 
2270       // Do not write origins for 0 shadows because we do not trace origins for
2271       // untainted sinks.
2272       if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2273         const auto OI = AllocaOriginMap.find(AI);
2274         assert(OI != AllocaOriginMap.end() && Origin);
2275         IRB.CreateStore(Origin, OI->second);
2276       }
2277       return;
2278     }
2279   }
2280 
2281   const Align ShadowAlign = getShadowAlign(InstAlignment);
2282   if (DFS.isZeroShadow(PrimitiveShadow)) {
2283     storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2284     return;
2285   }
2286 
2287   IRBuilder<> IRB(Pos);
2288   Value *ShadowAddr, *OriginAddr;
2289   std::tie(ShadowAddr, OriginAddr) =
2290       DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2291 
2292   const unsigned ShadowVecSize = 8;
2293   assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2294          "Shadow vector is too large!");
2295 
2296   uint64_t Offset = 0;
2297   uint64_t LeftSize = Size;
2298   if (LeftSize >= ShadowVecSize) {
2299     auto *ShadowVecTy =
2300         FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2301     Value *ShadowVec = UndefValue::get(ShadowVecTy);
2302     for (unsigned I = 0; I != ShadowVecSize; ++I) {
2303       ShadowVec = IRB.CreateInsertElement(
2304           ShadowVec, PrimitiveShadow,
2305           ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2306     }
2307     Value *ShadowVecAddr =
2308         IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
2309     do {
2310       Value *CurShadowVecAddr =
2311           IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
2312       IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2313       LeftSize -= ShadowVecSize;
2314       ++Offset;
2315     } while (LeftSize >= ShadowVecSize);
2316     Offset *= ShadowVecSize;
2317   }
2318   while (LeftSize > 0) {
2319     Value *CurShadowAddr =
2320         IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2321     IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2322     --LeftSize;
2323     ++Offset;
2324   }
2325 
2326   if (ShouldTrackOrigins) {
2327     storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2328                 InstAlignment);
2329   }
2330 }
2331 
2332 static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) {
2333   switch (AO) {
2334   case AtomicOrdering::NotAtomic:
2335     return AtomicOrdering::NotAtomic;
2336   case AtomicOrdering::Unordered:
2337   case AtomicOrdering::Monotonic:
2338   case AtomicOrdering::Release:
2339     return AtomicOrdering::Release;
2340   case AtomicOrdering::Acquire:
2341   case AtomicOrdering::AcquireRelease:
2342     return AtomicOrdering::AcquireRelease;
2343   case AtomicOrdering::SequentiallyConsistent:
2344     return AtomicOrdering::SequentiallyConsistent;
2345   }
2346   llvm_unreachable("Unknown ordering");
2347 }
2348 
2349 void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2350   auto &DL = SI.getModule()->getDataLayout();
2351   Value *Val = SI.getValueOperand();
2352   uint64_t Size = DL.getTypeStoreSize(Val->getType());
2353   if (Size == 0)
2354     return;
2355 
2356   // When an application store is atomic, increase atomic ordering between
2357   // atomic application loads and stores to ensure happen-before order; load
2358   // shadow data after application data; store zero shadow data before
2359   // application data. This ensure shadow loads return either labels of the
2360   // initial application data or zeros.
2361   if (SI.isAtomic())
2362     SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2363 
2364   const bool ShouldTrackOrigins =
2365       DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2366   std::vector<Value *> Shadows;
2367   std::vector<Value *> Origins;
2368 
2369   Value *Shadow =
2370       SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2371 
2372   if (ShouldTrackOrigins) {
2373     Shadows.push_back(Shadow);
2374     Origins.push_back(DFSF.getOrigin(Val));
2375   }
2376 
2377   Value *PrimitiveShadow;
2378   if (ClCombinePointerLabelsOnStore) {
2379     Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2380     if (ShouldTrackOrigins) {
2381       Shadows.push_back(PtrShadow);
2382       Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2383     }
2384     PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
2385   } else {
2386     PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
2387   }
2388   Value *Origin = nullptr;
2389   if (ShouldTrackOrigins)
2390     Origin = DFSF.combineOrigins(Shadows, Origins, &SI);
2391   DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2392                                   PrimitiveShadow, Origin, &SI);
2393   if (ClEventCallbacks) {
2394     IRBuilder<> IRB(&SI);
2395     Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2396     IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
2397   }
2398 }
2399 
2400 void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2401   assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2402 
2403   Value *Val = I.getOperand(1);
2404   const auto &DL = I.getModule()->getDataLayout();
2405   uint64_t Size = DL.getTypeStoreSize(Val->getType());
2406   if (Size == 0)
2407     return;
2408 
2409   // Conservatively set data at stored addresses and return with zero shadow to
2410   // prevent shadow data races.
2411   IRBuilder<> IRB(&I);
2412   Value *Addr = I.getOperand(0);
2413   const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2414   DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, &I);
2415   DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2416   DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2417 }
2418 
2419 void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2420   visitCASOrRMW(I.getAlign(), I);
2421   // TODO: The ordering change follows MSan. It is possible not to change
2422   // ordering because we always set and use 0 shadows.
2423   I.setOrdering(addReleaseOrdering(I.getOrdering()));
2424 }
2425 
2426 void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2427   visitCASOrRMW(I.getAlign(), I);
2428   // TODO: The ordering change follows MSan. It is possible not to change
2429   // ordering because we always set and use 0 shadows.
2430   I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2431 }
2432 
2433 void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2434   visitInstOperands(UO);
2435 }
2436 
2437 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2438   visitInstOperands(BO);
2439 }
2440 
2441 void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
2442   // Special case: if this is the bitcast (there is exactly 1 allowed) between
2443   // a musttail call and a ret, don't instrument. New instructions are not
2444   // allowed after a musttail call.
2445   if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
2446     if (CI->isMustTailCall())
2447       return;
2448   visitInstOperands(BCI);
2449 }
2450 
2451 void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2452 
2453 void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2454   visitInstOperands(CI);
2455   if (ClEventCallbacks) {
2456     IRBuilder<> IRB(&CI);
2457     Value *CombinedShadow = DFSF.getShadow(&CI);
2458     IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2459   }
2460 }
2461 
2462 void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
2463   // We do not need to track data through LandingPadInst.
2464   //
2465   // For the C++ exceptions, if a value is thrown, this value will be stored
2466   // in a memory location provided by __cxa_allocate_exception(...) (on the
2467   // throw side) or  __cxa_begin_catch(...) (on the catch side).
2468   // This memory will have a shadow, so with the loads and stores we will be
2469   // able to propagate labels on data thrown through exceptions, without any
2470   // special handling of the LandingPadInst.
2471   //
2472   // The second element in the pair result of the LandingPadInst is a
2473   // register value, but it is for a type ID and should never be tainted.
2474   DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
2475   DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
2476 }
2477 
2478 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2479   if (ClCombineOffsetLabelsOnGEP) {
2480     visitInstOperands(GEPI);
2481     return;
2482   }
2483 
2484   // Only propagate shadow/origin of base pointer value but ignore those of
2485   // offset operands.
2486   Value *BasePointer = GEPI.getPointerOperand();
2487   DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
2488   if (DFSF.DFS.shouldTrackOrigins())
2489     DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
2490 }
2491 
2492 void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2493   visitInstOperands(I);
2494 }
2495 
2496 void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2497   visitInstOperands(I);
2498 }
2499 
2500 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2501   visitInstOperands(I);
2502 }
2503 
2504 void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2505   IRBuilder<> IRB(&I);
2506   Value *Agg = I.getAggregateOperand();
2507   Value *AggShadow = DFSF.getShadow(Agg);
2508   Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2509   DFSF.setShadow(&I, ResShadow);
2510   visitInstOperandOrigins(I);
2511 }
2512 
2513 void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2514   IRBuilder<> IRB(&I);
2515   Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2516   Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2517   Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2518   DFSF.setShadow(&I, Res);
2519   visitInstOperandOrigins(I);
2520 }
2521 
2522 void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2523   bool AllLoadsStores = true;
2524   for (User *U : I.users()) {
2525     if (isa<LoadInst>(U))
2526       continue;
2527 
2528     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2529       if (SI->getPointerOperand() == &I)
2530         continue;
2531     }
2532 
2533     AllLoadsStores = false;
2534     break;
2535   }
2536   if (AllLoadsStores) {
2537     IRBuilder<> IRB(&I);
2538     DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2539     if (DFSF.DFS.shouldTrackOrigins()) {
2540       DFSF.AllocaOriginMap[&I] =
2541           IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2542     }
2543   }
2544   DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2545   DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2546 }
2547 
2548 void DFSanVisitor::visitSelectInst(SelectInst &I) {
2549   Value *CondShadow = DFSF.getShadow(I.getCondition());
2550   Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2551   Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2552   Value *ShadowSel = nullptr;
2553   const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2554   std::vector<Value *> Shadows;
2555   std::vector<Value *> Origins;
2556   Value *TrueOrigin =
2557       ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2558   Value *FalseOrigin =
2559       ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2560 
2561   DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
2562 
2563   if (isa<VectorType>(I.getCondition()->getType())) {
2564     ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2565                                                FalseShadow, &I);
2566     if (ShouldTrackOrigins) {
2567       Shadows.push_back(TrueShadow);
2568       Shadows.push_back(FalseShadow);
2569       Origins.push_back(TrueOrigin);
2570       Origins.push_back(FalseOrigin);
2571     }
2572   } else {
2573     if (TrueShadow == FalseShadow) {
2574       ShadowSel = TrueShadow;
2575       if (ShouldTrackOrigins) {
2576         Shadows.push_back(TrueShadow);
2577         Origins.push_back(TrueOrigin);
2578       }
2579     } else {
2580       ShadowSel =
2581           SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
2582       if (ShouldTrackOrigins) {
2583         Shadows.push_back(ShadowSel);
2584         Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2585                                              FalseOrigin, "", &I));
2586       }
2587     }
2588   }
2589   DFSF.setShadow(&I, ClTrackSelectControlFlow
2590                          ? DFSF.combineShadowsThenConvert(
2591                                I.getType(), CondShadow, ShadowSel, &I)
2592                          : ShadowSel);
2593   if (ShouldTrackOrigins) {
2594     if (ClTrackSelectControlFlow) {
2595       Shadows.push_back(CondShadow);
2596       Origins.push_back(DFSF.getOrigin(I.getCondition()));
2597     }
2598     DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I));
2599   }
2600 }
2601 
2602 void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2603   IRBuilder<> IRB(&I);
2604   Value *ValShadow = DFSF.getShadow(I.getValue());
2605   Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2606                          ? DFSF.getOrigin(I.getValue())
2607                          : DFSF.DFS.ZeroOrigin;
2608   IRB.CreateCall(
2609       DFSF.DFS.DFSanSetLabelFn,
2610       {ValShadow, ValOrigin,
2611        IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
2612        IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2613 }
2614 
2615 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2616   IRBuilder<> IRB(&I);
2617 
2618   // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2619   // need to move origins before moving shadows.
2620   if (DFSF.DFS.shouldTrackOrigins()) {
2621     IRB.CreateCall(
2622         DFSF.DFS.DFSanMemOriginTransferFn,
2623         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2624          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2625          IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2626   }
2627 
2628   Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
2629   Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
2630   Value *LenShadow =
2631       IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2632                                                     DFSF.DFS.ShadowWidthBytes));
2633   Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
2634   Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
2635   SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
2636   auto *MTI = cast<MemTransferInst>(
2637       IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2638                      {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2639   if (ClPreserveAlignment) {
2640     MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes);
2641     MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes);
2642   } else {
2643     MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes));
2644     MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes));
2645   }
2646   if (ClEventCallbacks) {
2647     IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
2648                    {RawDestShadow,
2649                     IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2650   }
2651 }
2652 
2653 void DFSanVisitor::visitBranchInst(BranchInst &BR) {
2654   if (!BR.isConditional())
2655     return;
2656 
2657   DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
2658 }
2659 
2660 void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
2661   DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
2662 }
2663 
2664 static bool isAMustTailRetVal(Value *RetVal) {
2665   // Tail call may have a bitcast between return.
2666   if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2667     RetVal = I->getOperand(0);
2668   }
2669   if (auto *I = dyn_cast<CallInst>(RetVal)) {
2670     return I->isMustTailCall();
2671   }
2672   return false;
2673 }
2674 
2675 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2676   if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2677     // Don't emit the instrumentation for musttail call returns.
2678     if (isAMustTailRetVal(RI.getReturnValue()))
2679       return;
2680 
2681     Value *S = DFSF.getShadow(RI.getReturnValue());
2682     IRBuilder<> IRB(&RI);
2683     Type *RT = DFSF.F->getFunctionType()->getReturnType();
2684     unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
2685     if (Size <= RetvalTLSSize) {
2686       // If the size overflows, stores nothing. At callsite, oversized return
2687       // shadows are set to zero.
2688       IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
2689     }
2690     if (DFSF.DFS.shouldTrackOrigins()) {
2691       Value *O = DFSF.getOrigin(RI.getReturnValue());
2692       IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
2693     }
2694   }
2695 }
2696 
2697 void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
2698                                       std::vector<Value *> &Args,
2699                                       IRBuilder<> &IRB) {
2700   FunctionType *FT = F.getFunctionType();
2701 
2702   auto *I = CB.arg_begin();
2703 
2704   // Adds non-variable argument shadows.
2705   for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2706     Args.push_back(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB));
2707 
2708   // Adds variable argument shadows.
2709   if (FT->isVarArg()) {
2710     auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
2711                                      CB.arg_size() - FT->getNumParams());
2712     auto *LabelVAAlloca =
2713         new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
2714                        "labelva", &DFSF.F->getEntryBlock().front());
2715 
2716     for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
2717       auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
2718       IRB.CreateStore(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB),
2719                       LabelVAPtr);
2720     }
2721 
2722     Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
2723   }
2724 
2725   // Adds the return value shadow.
2726   if (!FT->getReturnType()->isVoidTy()) {
2727     if (!DFSF.LabelReturnAlloca) {
2728       DFSF.LabelReturnAlloca = new AllocaInst(
2729           DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
2730           "labelreturn", &DFSF.F->getEntryBlock().front());
2731     }
2732     Args.push_back(DFSF.LabelReturnAlloca);
2733   }
2734 }
2735 
2736 void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
2737                                       std::vector<Value *> &Args,
2738                                       IRBuilder<> &IRB) {
2739   FunctionType *FT = F.getFunctionType();
2740 
2741   auto *I = CB.arg_begin();
2742 
2743   // Add non-variable argument origins.
2744   for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2745     Args.push_back(DFSF.getOrigin(*I));
2746 
2747   // Add variable argument origins.
2748   if (FT->isVarArg()) {
2749     auto *OriginVATy =
2750         ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
2751     auto *OriginVAAlloca =
2752         new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
2753                        "originva", &DFSF.F->getEntryBlock().front());
2754 
2755     for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
2756       auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
2757       IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
2758     }
2759 
2760     Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
2761   }
2762 
2763   // Add the return value origin.
2764   if (!FT->getReturnType()->isVoidTy()) {
2765     if (!DFSF.OriginReturnAlloca) {
2766       DFSF.OriginReturnAlloca = new AllocaInst(
2767           DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
2768           "originreturn", &DFSF.F->getEntryBlock().front());
2769     }
2770     Args.push_back(DFSF.OriginReturnAlloca);
2771   }
2772 }
2773 
2774 bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
2775   IRBuilder<> IRB(&CB);
2776   switch (DFSF.DFS.getWrapperKind(&F)) {
2777   case DataFlowSanitizer::WK_Warning:
2778     CB.setCalledFunction(&F);
2779     IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
2780                    IRB.CreateGlobalStringPtr(F.getName()));
2781     DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2782     DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
2783     return true;
2784   case DataFlowSanitizer::WK_Discard:
2785     CB.setCalledFunction(&F);
2786     DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2787     DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
2788     return true;
2789   case DataFlowSanitizer::WK_Functional:
2790     CB.setCalledFunction(&F);
2791     visitInstOperands(CB);
2792     return true;
2793   case DataFlowSanitizer::WK_Custom:
2794     // Don't try to handle invokes of custom functions, it's too complicated.
2795     // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
2796     // wrapper.
2797     CallInst *CI = dyn_cast<CallInst>(&CB);
2798     if (!CI)
2799       return false;
2800 
2801     const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2802     FunctionType *FT = F.getFunctionType();
2803     TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
2804     std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
2805     CustomFName += F.getName();
2806     FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
2807         CustomFName, CustomFn.TransformedType);
2808     if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
2809       CustomFn->copyAttributesFrom(&F);
2810 
2811       // Custom functions returning non-void will write to the return label.
2812       if (!FT->getReturnType()->isVoidTy()) {
2813         CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
2814       }
2815     }
2816 
2817     std::vector<Value *> Args;
2818 
2819     // Adds non-variable arguments.
2820     auto *I = CB.arg_begin();
2821     for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
2822       Args.push_back(*I);
2823     }
2824 
2825     // Adds shadow arguments.
2826     const unsigned ShadowArgStart = Args.size();
2827     addShadowArguments(F, CB, Args, IRB);
2828 
2829     // Adds origin arguments.
2830     const unsigned OriginArgStart = Args.size();
2831     if (ShouldTrackOrigins)
2832       addOriginArguments(F, CB, Args, IRB);
2833 
2834     // Adds variable arguments.
2835     append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
2836 
2837     CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
2838     CustomCI->setCallingConv(CI->getCallingConv());
2839     CustomCI->setAttributes(transformFunctionAttributes(
2840         CustomFn, CI->getContext(), CI->getAttributes()));
2841 
2842     // Update the parameter attributes of the custom call instruction to
2843     // zero extend the shadow parameters. This is required for targets
2844     // which consider PrimitiveShadowTy an illegal type.
2845     for (unsigned N = 0; N < FT->getNumParams(); N++) {
2846       const unsigned ArgNo = ShadowArgStart + N;
2847       if (CustomCI->getArgOperand(ArgNo)->getType() ==
2848           DFSF.DFS.PrimitiveShadowTy)
2849         CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
2850       if (ShouldTrackOrigins) {
2851         const unsigned OriginArgNo = OriginArgStart + N;
2852         if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
2853             DFSF.DFS.OriginTy)
2854           CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
2855       }
2856     }
2857 
2858     // Loads the return value shadow and origin.
2859     if (!FT->getReturnType()->isVoidTy()) {
2860       LoadInst *LabelLoad =
2861           IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
2862       DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
2863                                    FT->getReturnType(), LabelLoad, &CB));
2864       if (ShouldTrackOrigins) {
2865         LoadInst *OriginLoad =
2866             IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
2867         DFSF.setOrigin(CustomCI, OriginLoad);
2868       }
2869     }
2870 
2871     CI->replaceAllUsesWith(CustomCI);
2872     CI->eraseFromParent();
2873     return true;
2874   }
2875   return false;
2876 }
2877 
2878 void DFSanVisitor::visitCallBase(CallBase &CB) {
2879   Function *F = CB.getCalledFunction();
2880   if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
2881     visitInstOperands(CB);
2882     return;
2883   }
2884 
2885   // Calls to this function are synthesized in wrappers, and we shouldn't
2886   // instrument them.
2887   if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
2888     return;
2889 
2890   DenseMap<Value *, Function *>::iterator UnwrappedFnIt =
2891       DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
2892   if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
2893     if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
2894       return;
2895 
2896   IRBuilder<> IRB(&CB);
2897 
2898   const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2899   FunctionType *FT = CB.getFunctionType();
2900   const DataLayout &DL = getDataLayout();
2901 
2902   // Stores argument shadows.
2903   unsigned ArgOffset = 0;
2904   for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
2905     if (ShouldTrackOrigins) {
2906       // Ignore overflowed origins
2907       Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
2908       if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
2909           !DFSF.DFS.isZeroShadow(ArgShadow))
2910         IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
2911                         DFSF.getArgOriginTLS(I, IRB));
2912     }
2913 
2914     unsigned Size =
2915         DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
2916     // Stop storing if arguments' size overflows. Inside a function, arguments
2917     // after overflow have zero shadow values.
2918     if (ArgOffset + Size > ArgTLSSize)
2919       break;
2920     IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
2921                            DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
2922                            ShadowTLSAlignment);
2923     ArgOffset += alignTo(Size, ShadowTLSAlignment);
2924   }
2925 
2926   Instruction *Next = nullptr;
2927   if (!CB.getType()->isVoidTy()) {
2928     if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
2929       if (II->getNormalDest()->getSinglePredecessor()) {
2930         Next = &II->getNormalDest()->front();
2931       } else {
2932         BasicBlock *NewBB =
2933             SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
2934         Next = &NewBB->front();
2935       }
2936     } else {
2937       assert(CB.getIterator() != CB.getParent()->end());
2938       Next = CB.getNextNode();
2939     }
2940 
2941     // Don't emit the epilogue for musttail call returns.
2942     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
2943       return;
2944 
2945     // Loads the return value shadow.
2946     IRBuilder<> NextIRB(Next);
2947     unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
2948     if (Size > RetvalTLSSize) {
2949       // Set overflowed return shadow to be zero.
2950       DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2951     } else {
2952       LoadInst *LI = NextIRB.CreateAlignedLoad(
2953           DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
2954           ShadowTLSAlignment, "_dfsret");
2955       DFSF.SkipInsts.insert(LI);
2956       DFSF.setShadow(&CB, LI);
2957       DFSF.NonZeroChecks.push_back(LI);
2958     }
2959 
2960     if (ShouldTrackOrigins) {
2961       LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
2962                                         DFSF.getRetvalOriginTLS(), "_dfsret_o");
2963       DFSF.SkipInsts.insert(LI);
2964       DFSF.setOrigin(&CB, LI);
2965     }
2966   }
2967 }
2968 
2969 void DFSanVisitor::visitPHINode(PHINode &PN) {
2970   Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
2971   PHINode *ShadowPN =
2972       PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
2973 
2974   // Give the shadow phi node valid predecessors to fool SplitEdge into working.
2975   Value *UndefShadow = UndefValue::get(ShadowTy);
2976   for (BasicBlock *BB : PN.blocks())
2977     ShadowPN->addIncoming(UndefShadow, BB);
2978 
2979   DFSF.setShadow(&PN, ShadowPN);
2980 
2981   PHINode *OriginPN = nullptr;
2982   if (DFSF.DFS.shouldTrackOrigins()) {
2983     OriginPN =
2984         PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN);
2985     Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
2986     for (BasicBlock *BB : PN.blocks())
2987       OriginPN->addIncoming(UndefOrigin, BB);
2988     DFSF.setOrigin(&PN, OriginPN);
2989   }
2990 
2991   DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
2992 }
2993 
2994 namespace {
2995 class DataFlowSanitizerLegacyPass : public ModulePass {
2996 private:
2997   std::vector<std::string> ABIListFiles;
2998 
2999 public:
3000   static char ID;
3001 
3002   DataFlowSanitizerLegacyPass(
3003       const std::vector<std::string> &ABIListFiles = std::vector<std::string>())
3004       : ModulePass(ID), ABIListFiles(ABIListFiles) {}
3005 
3006   bool runOnModule(Module &M) override {
3007     return DataFlowSanitizer(ABIListFiles).runImpl(M);
3008   }
3009 };
3010 } // namespace
3011 
3012 char DataFlowSanitizerLegacyPass::ID;
3013 
3014 INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan",
3015                 "DataFlowSanitizer: dynamic data flow analysis.", false, false)
3016 
3017 ModulePass *llvm::createDataFlowSanitizerLegacyPassPass(
3018     const std::vector<std::string> &ABIListFiles) {
3019   return new DataFlowSanitizerLegacyPass(ABIListFiles);
3020 }
3021 
3022 PreservedAnalyses DataFlowSanitizerPass::run(Module &M,
3023                                              ModuleAnalysisManager &AM) {
3024   if (DataFlowSanitizer(ABIListFiles).runImpl(M)) {
3025     return PreservedAnalyses::none();
3026   }
3027   return PreservedAnalyses::all();
3028 }
3029