1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow 11 /// analysis. 12 /// 13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific 14 /// class of bugs on its own. Instead, it provides a generic dynamic data flow 15 /// analysis framework to be used by clients to help detect application-specific 16 /// issues within their own code. 17 /// 18 /// The analysis is based on automatic propagation of data flow labels (also 19 /// known as taint labels) through a program as it performs computation. 20 /// 21 /// Each byte of application memory is backed by a shadow memory byte. The 22 /// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then 23 /// laid out as follows: 24 /// 25 /// +--------------------+ 0x800000000000 (top of memory) 26 /// | application memory | 27 /// +--------------------+ 0x700000008000 (kAppAddr) 28 /// | | 29 /// | unused | 30 /// | | 31 /// +--------------------+ 0x300000000000 (kUnusedAddr) 32 /// | origin | 33 /// +--------------------+ 0x200000008000 (kOriginAddr) 34 /// | unused | 35 /// +--------------------+ 0x200000000000 36 /// | shadow memory | 37 /// +--------------------+ 0x100000008000 (kShadowAddr) 38 /// | unused | 39 /// +--------------------+ 0x000000010000 40 /// | reserved by kernel | 41 /// +--------------------+ 0x000000000000 42 /// 43 /// 44 /// To derive a shadow memory address from an application memory address, bits 45 /// 45-46 are cleared to bring the address into the range 46 /// [0x100000008000,0x200000000000). See the function 47 /// DataFlowSanitizer::getShadowAddress below. 48 /// 49 /// For more information, please refer to the design document: 50 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html 51 // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h" 55 #include "llvm/ADT/DenseMap.h" 56 #include "llvm/ADT/DenseSet.h" 57 #include "llvm/ADT/DepthFirstIterator.h" 58 #include "llvm/ADT/None.h" 59 #include "llvm/ADT/SmallPtrSet.h" 60 #include "llvm/ADT/SmallVector.h" 61 #include "llvm/ADT/StringExtras.h" 62 #include "llvm/ADT/StringRef.h" 63 #include "llvm/ADT/Triple.h" 64 #include "llvm/ADT/iterator.h" 65 #include "llvm/Analysis/ValueTracking.h" 66 #include "llvm/IR/Argument.h" 67 #include "llvm/IR/Attributes.h" 68 #include "llvm/IR/BasicBlock.h" 69 #include "llvm/IR/Constant.h" 70 #include "llvm/IR/Constants.h" 71 #include "llvm/IR/DataLayout.h" 72 #include "llvm/IR/DerivedTypes.h" 73 #include "llvm/IR/Dominators.h" 74 #include "llvm/IR/Function.h" 75 #include "llvm/IR/GlobalAlias.h" 76 #include "llvm/IR/GlobalValue.h" 77 #include "llvm/IR/GlobalVariable.h" 78 #include "llvm/IR/IRBuilder.h" 79 #include "llvm/IR/InlineAsm.h" 80 #include "llvm/IR/InstVisitor.h" 81 #include "llvm/IR/InstrTypes.h" 82 #include "llvm/IR/Instruction.h" 83 #include "llvm/IR/Instructions.h" 84 #include "llvm/IR/IntrinsicInst.h" 85 #include "llvm/IR/LLVMContext.h" 86 #include "llvm/IR/MDBuilder.h" 87 #include "llvm/IR/Module.h" 88 #include "llvm/IR/PassManager.h" 89 #include "llvm/IR/Type.h" 90 #include "llvm/IR/User.h" 91 #include "llvm/IR/Value.h" 92 #include "llvm/InitializePasses.h" 93 #include "llvm/Pass.h" 94 #include "llvm/Support/Alignment.h" 95 #include "llvm/Support/Casting.h" 96 #include "llvm/Support/CommandLine.h" 97 #include "llvm/Support/ErrorHandling.h" 98 #include "llvm/Support/SpecialCaseList.h" 99 #include "llvm/Support/VirtualFileSystem.h" 100 #include "llvm/Transforms/Instrumentation.h" 101 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 102 #include "llvm/Transforms/Utils/Local.h" 103 #include <algorithm> 104 #include <cassert> 105 #include <cstddef> 106 #include <cstdint> 107 #include <iterator> 108 #include <memory> 109 #include <set> 110 #include <string> 111 #include <utility> 112 #include <vector> 113 114 using namespace llvm; 115 116 // This must be consistent with ShadowWidthBits. 117 static const Align ShadowTLSAlignment = Align(2); 118 119 static const Align MinOriginAlignment = Align(4); 120 121 // The size of TLS variables. These constants must be kept in sync with the ones 122 // in dfsan.cpp. 123 static const unsigned ArgTLSSize = 800; 124 static const unsigned RetvalTLSSize = 800; 125 126 // External symbol to be used when generating the shadow address for 127 // architectures with multiple VMAs. Instead of using a constant integer 128 // the runtime will set the external mask based on the VMA range. 129 const char DFSanExternShadowPtrMask[] = "__dfsan_shadow_ptr_mask"; 130 131 // The -dfsan-preserve-alignment flag controls whether this pass assumes that 132 // alignment requirements provided by the input IR are correct. For example, 133 // if the input IR contains a load with alignment 8, this flag will cause 134 // the shadow load to have alignment 16. This flag is disabled by default as 135 // we have unfortunately encountered too much code (including Clang itself; 136 // see PR14291) which performs misaligned access. 137 static cl::opt<bool> ClPreserveAlignment( 138 "dfsan-preserve-alignment", 139 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, 140 cl::init(false)); 141 142 // The ABI list files control how shadow parameters are passed. The pass treats 143 // every function labelled "uninstrumented" in the ABI list file as conforming 144 // to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains 145 // additional annotations for those functions, a call to one of those functions 146 // will produce a warning message, as the labelling behaviour of the function is 147 // unknown. The other supported annotations are "functional" and "discard", 148 // which are described below under DataFlowSanitizer::WrapperKind. 149 static cl::list<std::string> ClABIListFiles( 150 "dfsan-abilist", 151 cl::desc("File listing native ABI functions and how the pass treats them"), 152 cl::Hidden); 153 154 // Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented 155 // functions (see DataFlowSanitizer::InstrumentedABI below). 156 static cl::opt<bool> 157 ClArgsABI("dfsan-args-abi", 158 cl::desc("Use the argument ABI rather than the TLS ABI"), 159 cl::Hidden); 160 161 // Controls whether the pass includes or ignores the labels of pointers in load 162 // instructions. 163 static cl::opt<bool> ClCombinePointerLabelsOnLoad( 164 "dfsan-combine-pointer-labels-on-load", 165 cl::desc("Combine the label of the pointer with the label of the data when " 166 "loading from memory."), 167 cl::Hidden, cl::init(true)); 168 169 // Controls whether the pass includes or ignores the labels of pointers in 170 // stores instructions. 171 static cl::opt<bool> ClCombinePointerLabelsOnStore( 172 "dfsan-combine-pointer-labels-on-store", 173 cl::desc("Combine the label of the pointer with the label of the data when " 174 "storing in memory."), 175 cl::Hidden, cl::init(false)); 176 177 // Controls whether the pass propagates labels of offsets in GEP instructions. 178 static cl::opt<bool> ClCombineOffsetLabelsOnGEP( 179 "dfsan-combine-offset-labels-on-gep", 180 cl::desc( 181 "Combine the label of the offset with the label of the pointer when " 182 "doing pointer arithmetic."), 183 cl::Hidden, cl::init(true)); 184 185 static cl::opt<bool> ClDebugNonzeroLabels( 186 "dfsan-debug-nonzero-labels", 187 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " 188 "load or return with a nonzero label"), 189 cl::Hidden); 190 191 // Experimental feature that inserts callbacks for certain data events. 192 // Currently callbacks are only inserted for loads, stores, memory transfers 193 // (i.e. memcpy and memmove), and comparisons. 194 // 195 // If this flag is set to true, the user must provide definitions for the 196 // following callback functions: 197 // void __dfsan_load_callback(dfsan_label Label, void* addr); 198 // void __dfsan_store_callback(dfsan_label Label, void* addr); 199 // void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len); 200 // void __dfsan_cmp_callback(dfsan_label CombinedLabel); 201 static cl::opt<bool> ClEventCallbacks( 202 "dfsan-event-callbacks", 203 cl::desc("Insert calls to __dfsan_*_callback functions on data events."), 204 cl::Hidden, cl::init(false)); 205 206 // Controls whether the pass tracks the control flow of select instructions. 207 static cl::opt<bool> ClTrackSelectControlFlow( 208 "dfsan-track-select-control-flow", 209 cl::desc("Propagate labels from condition values of select instructions " 210 "to results."), 211 cl::Hidden, cl::init(true)); 212 213 // TODO: This default value follows MSan. DFSan may use a different value. 214 static cl::opt<int> ClInstrumentWithCallThreshold( 215 "dfsan-instrument-with-call-threshold", 216 cl::desc("If the function being instrumented requires more than " 217 "this number of origin stores, use callbacks instead of " 218 "inline checks (-1 means never use callbacks)."), 219 cl::Hidden, cl::init(3500)); 220 221 // Controls how to track origins. 222 // * 0: do not track origins. 223 // * 1: track origins at memory store operations. 224 // * 2: track origins at memory load and store operations. 225 // TODO: track callsites. 226 static cl::opt<int> ClTrackOrigins("dfsan-track-origins", 227 cl::desc("Track origins of labels"), 228 cl::Hidden, cl::init(0)); 229 230 static StringRef getGlobalTypeString(const GlobalValue &G) { 231 // Types of GlobalVariables are always pointer types. 232 Type *GType = G.getValueType(); 233 // For now we support excluding struct types only. 234 if (StructType *SGType = dyn_cast<StructType>(GType)) { 235 if (!SGType->isLiteral()) 236 return SGType->getName(); 237 } 238 return "<unknown type>"; 239 } 240 241 namespace { 242 243 class DFSanABIList { 244 std::unique_ptr<SpecialCaseList> SCL; 245 246 public: 247 DFSanABIList() = default; 248 249 void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); } 250 251 /// Returns whether either this function or its source file are listed in the 252 /// given category. 253 bool isIn(const Function &F, StringRef Category) const { 254 return isIn(*F.getParent(), Category) || 255 SCL->inSection("dataflow", "fun", F.getName(), Category); 256 } 257 258 /// Returns whether this global alias is listed in the given category. 259 /// 260 /// If GA aliases a function, the alias's name is matched as a function name 261 /// would be. Similarly, aliases of globals are matched like globals. 262 bool isIn(const GlobalAlias &GA, StringRef Category) const { 263 if (isIn(*GA.getParent(), Category)) 264 return true; 265 266 if (isa<FunctionType>(GA.getValueType())) 267 return SCL->inSection("dataflow", "fun", GA.getName(), Category); 268 269 return SCL->inSection("dataflow", "global", GA.getName(), Category) || 270 SCL->inSection("dataflow", "type", getGlobalTypeString(GA), 271 Category); 272 } 273 274 /// Returns whether this module is listed in the given category. 275 bool isIn(const Module &M, StringRef Category) const { 276 return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category); 277 } 278 }; 279 280 /// TransformedFunction is used to express the result of transforming one 281 /// function type into another. This struct is immutable. It holds metadata 282 /// useful for updating calls of the old function to the new type. 283 struct TransformedFunction { 284 TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType, 285 std::vector<unsigned> ArgumentIndexMapping) 286 : OriginalType(OriginalType), TransformedType(TransformedType), 287 ArgumentIndexMapping(ArgumentIndexMapping) {} 288 289 // Disallow copies. 290 TransformedFunction(const TransformedFunction &) = delete; 291 TransformedFunction &operator=(const TransformedFunction &) = delete; 292 293 // Allow moves. 294 TransformedFunction(TransformedFunction &&) = default; 295 TransformedFunction &operator=(TransformedFunction &&) = default; 296 297 /// Type of the function before the transformation. 298 FunctionType *OriginalType; 299 300 /// Type of the function after the transformation. 301 FunctionType *TransformedType; 302 303 /// Transforming a function may change the position of arguments. This 304 /// member records the mapping from each argument's old position to its new 305 /// position. Argument positions are zero-indexed. If the transformation 306 /// from F to F' made the first argument of F into the third argument of F', 307 /// then ArgumentIndexMapping[0] will equal 2. 308 std::vector<unsigned> ArgumentIndexMapping; 309 }; 310 311 /// Given function attributes from a call site for the original function, 312 /// return function attributes appropriate for a call to the transformed 313 /// function. 314 AttributeList 315 transformFunctionAttributes(const TransformedFunction &TransformedFunction, 316 LLVMContext &Ctx, AttributeList CallSiteAttrs) { 317 318 // Construct a vector of AttributeSet for each function argument. 319 std::vector<llvm::AttributeSet> ArgumentAttributes( 320 TransformedFunction.TransformedType->getNumParams()); 321 322 // Copy attributes from the parameter of the original function to the 323 // transformed version. 'ArgumentIndexMapping' holds the mapping from 324 // old argument position to new. 325 for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size(); 326 I < IE; ++I) { 327 unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I]; 328 ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(I); 329 } 330 331 // Copy annotations on varargs arguments. 332 for (unsigned I = TransformedFunction.OriginalType->getNumParams(), 333 IE = CallSiteAttrs.getNumAttrSets(); 334 I < IE; ++I) { 335 ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(I)); 336 } 337 338 return AttributeList::get(Ctx, CallSiteAttrs.getFnAttributes(), 339 CallSiteAttrs.getRetAttributes(), 340 llvm::makeArrayRef(ArgumentAttributes)); 341 } 342 343 class DataFlowSanitizer { 344 friend struct DFSanFunction; 345 friend class DFSanVisitor; 346 347 enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 }; 348 349 enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 }; 350 351 /// Which ABI should be used for instrumented functions? 352 enum InstrumentedABI { 353 /// Argument and return value labels are passed through additional 354 /// arguments and by modifying the return type. 355 IA_Args, 356 357 /// Argument and return value labels are passed through TLS variables 358 /// __dfsan_arg_tls and __dfsan_retval_tls. 359 IA_TLS 360 }; 361 362 /// How should calls to uninstrumented functions be handled? 363 enum WrapperKind { 364 /// This function is present in an uninstrumented form but we don't know 365 /// how it should be handled. Print a warning and call the function anyway. 366 /// Don't label the return value. 367 WK_Warning, 368 369 /// This function does not write to (user-accessible) memory, and its return 370 /// value is unlabelled. 371 WK_Discard, 372 373 /// This function does not write to (user-accessible) memory, and the label 374 /// of its return value is the union of the label of its arguments. 375 WK_Functional, 376 377 /// Instead of calling the function, a custom wrapper __dfsw_F is called, 378 /// where F is the name of the function. This function may wrap the 379 /// original function or provide its own implementation. This is similar to 380 /// the IA_Args ABI, except that IA_Args uses a struct return type to 381 /// pass the return value shadow in a register, while WK_Custom uses an 382 /// extra pointer argument to return the shadow. This allows the wrapped 383 /// form of the function type to be expressed in C. 384 WK_Custom 385 }; 386 387 Module *Mod; 388 LLVMContext *Ctx; 389 Type *Int8Ptr; 390 IntegerType *OriginTy; 391 PointerType *OriginPtrTy; 392 ConstantInt *ZeroOrigin; 393 /// The shadow type for all primitive types and vector types. 394 IntegerType *PrimitiveShadowTy; 395 PointerType *PrimitiveShadowPtrTy; 396 IntegerType *IntptrTy; 397 ConstantInt *ZeroPrimitiveShadow; 398 ConstantInt *ShadowPtrMask; 399 ConstantInt *ShadowBase; 400 ConstantInt *OriginBase; 401 Constant *ArgTLS; 402 ArrayType *ArgOriginTLSTy; 403 Constant *ArgOriginTLS; 404 Constant *RetvalTLS; 405 Constant *RetvalOriginTLS; 406 Constant *ExternalShadowMask; 407 FunctionType *DFSanUnionLoadFnTy; 408 FunctionType *DFSanLoadLabelAndOriginFnTy; 409 FunctionType *DFSanUnimplementedFnTy; 410 FunctionType *DFSanSetLabelFnTy; 411 FunctionType *DFSanNonzeroLabelFnTy; 412 FunctionType *DFSanVarargWrapperFnTy; 413 FunctionType *DFSanCmpCallbackFnTy; 414 FunctionType *DFSanLoadStoreCallbackFnTy; 415 FunctionType *DFSanMemTransferCallbackFnTy; 416 FunctionType *DFSanChainOriginFnTy; 417 FunctionType *DFSanChainOriginIfTaintedFnTy; 418 FunctionType *DFSanMemOriginTransferFnTy; 419 FunctionType *DFSanMaybeStoreOriginFnTy; 420 FunctionCallee DFSanUnionLoadFn; 421 FunctionCallee DFSanLoadLabelAndOriginFn; 422 FunctionCallee DFSanUnimplementedFn; 423 FunctionCallee DFSanSetLabelFn; 424 FunctionCallee DFSanNonzeroLabelFn; 425 FunctionCallee DFSanVarargWrapperFn; 426 FunctionCallee DFSanLoadCallbackFn; 427 FunctionCallee DFSanStoreCallbackFn; 428 FunctionCallee DFSanMemTransferCallbackFn; 429 FunctionCallee DFSanCmpCallbackFn; 430 FunctionCallee DFSanChainOriginFn; 431 FunctionCallee DFSanChainOriginIfTaintedFn; 432 FunctionCallee DFSanMemOriginTransferFn; 433 FunctionCallee DFSanMaybeStoreOriginFn; 434 SmallPtrSet<Value *, 16> DFSanRuntimeFunctions; 435 MDNode *ColdCallWeights; 436 MDNode *OriginStoreWeights; 437 DFSanABIList ABIList; 438 DenseMap<Value *, Function *> UnwrappedFnMap; 439 AttrBuilder ReadOnlyNoneAttrs; 440 bool DFSanRuntimeShadowMask = false; 441 442 Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB); 443 Value *getShadowAddress(Value *Addr, Instruction *Pos); 444 Value *getShadowAddress(Value *Addr, Instruction *Pos, Value *ShadowOffset); 445 std::pair<Value *, Value *> 446 getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos); 447 bool isInstrumented(const Function *F); 448 bool isInstrumented(const GlobalAlias *GA); 449 FunctionType *getArgsFunctionType(FunctionType *T); 450 FunctionType *getTrampolineFunctionType(FunctionType *T); 451 TransformedFunction getCustomFunctionType(FunctionType *T); 452 InstrumentedABI getInstrumentedABI(); 453 WrapperKind getWrapperKind(Function *F); 454 void addGlobalNamePrefix(GlobalValue *GV); 455 Function *buildWrapperFunction(Function *F, StringRef NewFName, 456 GlobalValue::LinkageTypes NewFLink, 457 FunctionType *NewFT); 458 Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); 459 void initializeCallbackFunctions(Module &M); 460 void initializeRuntimeFunctions(Module &M); 461 void injectMetadataGlobals(Module &M); 462 bool init(Module &M); 463 464 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads 465 /// from it. Returns the origin's loaded value. 466 Value *loadNextOrigin(Instruction *Pos, Align OriginAlign, 467 Value **OriginAddr); 468 469 /// Returns whether the given load byte size is amenable to inlined 470 /// optimization patterns. 471 bool hasLoadSizeForFastPath(uint64_t Size); 472 473 /// Returns whether the pass tracks origins. Supports only TLS ABI mode. 474 bool shouldTrackOrigins(); 475 476 /// Returns whether the pass tracks labels for struct fields and array 477 /// indices. Supports only TLS ABI mode. 478 bool shouldTrackFieldsAndIndices(); 479 480 /// Returns a zero constant with the shadow type of OrigTy. 481 /// 482 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...} 483 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)] 484 /// getZeroShadow(other type) = i16(0) 485 /// 486 /// Note that a zero shadow is always i16(0) when shouldTrackFieldsAndIndices 487 /// returns false. 488 Constant *getZeroShadow(Type *OrigTy); 489 /// Returns a zero constant with the shadow type of V's type. 490 Constant *getZeroShadow(Value *V); 491 492 /// Checks if V is a zero shadow. 493 bool isZeroShadow(Value *V); 494 495 /// Returns the shadow type of OrigTy. 496 /// 497 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...} 498 /// getShadowTy([n x T]) = [n x getShadowTy(T)] 499 /// getShadowTy(other type) = i16 500 /// 501 /// Note that a shadow type is always i16 when shouldTrackFieldsAndIndices 502 /// returns false. 503 Type *getShadowTy(Type *OrigTy); 504 /// Returns the shadow type of of V's type. 505 Type *getShadowTy(Value *V); 506 507 const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes; 508 509 public: 510 DataFlowSanitizer(const std::vector<std::string> &ABIListFiles); 511 512 bool runImpl(Module &M); 513 }; 514 515 struct DFSanFunction { 516 DataFlowSanitizer &DFS; 517 Function *F; 518 DominatorTree DT; 519 DataFlowSanitizer::InstrumentedABI IA; 520 bool IsNativeABI; 521 AllocaInst *LabelReturnAlloca = nullptr; 522 AllocaInst *OriginReturnAlloca = nullptr; 523 DenseMap<Value *, Value *> ValShadowMap; 524 DenseMap<Value *, Value *> ValOriginMap; 525 DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap; 526 DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap; 527 528 struct PHIFixupElement { 529 PHINode *Phi; 530 PHINode *ShadowPhi; 531 PHINode *OriginPhi; 532 }; 533 std::vector<PHIFixupElement> PHIFixups; 534 535 DenseSet<Instruction *> SkipInsts; 536 std::vector<Value *> NonZeroChecks; 537 538 struct CachedShadow { 539 BasicBlock *Block; // The block where Shadow is defined. 540 Value *Shadow; 541 }; 542 /// Maps a value to its latest shadow value in terms of domination tree. 543 DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows; 544 /// Maps a value to its latest collapsed shadow value it was converted to in 545 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is 546 /// used at a post process where CFG blocks are split. So it does not cache 547 /// BasicBlock like CachedShadows, but uses domination between values. 548 DenseMap<Value *, Value *> CachedCollapsedShadows; 549 DenseMap<Value *, std::set<Value *>> ShadowElements; 550 551 DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) 552 : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) { 553 DT.recalculate(*F); 554 } 555 556 /// Computes the shadow address for a given function argument. 557 /// 558 /// Shadow = ArgTLS+ArgOffset. 559 Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB); 560 561 /// Computes the shadow address for a return value. 562 Value *getRetvalTLS(Type *T, IRBuilder<> &IRB); 563 564 /// Computes the origin address for a given function argument. 565 /// 566 /// Origin = ArgOriginTLS[ArgNo]. 567 Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB); 568 569 /// Computes the origin address for a return value. 570 Value *getRetvalOriginTLS(); 571 572 Value *getOrigin(Value *V); 573 void setOrigin(Instruction *I, Value *Origin); 574 /// Generates IR to compute the origin of the last operand with a taint label. 575 Value *combineOperandOrigins(Instruction *Inst); 576 /// Before the instruction Pos, generates IR to compute the last origin with a 577 /// taint label. Labels and origins are from vectors Shadows and Origins 578 /// correspondingly. The generated IR is like 579 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0 580 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be 581 /// zeros with other bitwidths. 582 Value *combineOrigins(const std::vector<Value *> &Shadows, 583 const std::vector<Value *> &Origins, Instruction *Pos, 584 ConstantInt *Zero = nullptr); 585 586 Value *getShadow(Value *V); 587 void setShadow(Instruction *I, Value *Shadow); 588 /// Generates IR to compute the union of the two given shadows, inserting it 589 /// before Pos. The combined value is with primitive type. 590 Value *combineShadows(Value *V1, Value *V2, Instruction *Pos); 591 /// Combines the shadow values of V1 and V2, then converts the combined value 592 /// with primitive type into a shadow value with the original type T. 593 Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2, 594 Instruction *Pos); 595 Value *combineOperandShadows(Instruction *Inst); 596 597 /// Generates IR to load shadow and origin corresponding to bytes [\p 598 /// Addr, \p Addr + \p Size), where addr has alignment \p 599 /// InstAlignment, and take the union of each of those shadows. The returned 600 /// shadow always has primitive type. 601 /// 602 /// When tracking loads is enabled, the returned origin is a chain at the 603 /// current stack if the returned shadow is tainted. 604 std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size, 605 Align InstAlignment, 606 Instruction *Pos); 607 608 void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size, 609 Align InstAlignment, Value *PrimitiveShadow, 610 Value *Origin, Instruction *Pos); 611 /// Applies PrimitiveShadow to all primitive subtypes of T, returning 612 /// the expanded shadow value. 613 /// 614 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...} 615 /// EFP([n x T], PS) = [n x EFP(T,PS)] 616 /// EFP(other types, PS) = PS 617 Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow, 618 Instruction *Pos); 619 /// Collapses Shadow into a single primitive shadow value, unioning all 620 /// primitive shadow values in the process. Returns the final primitive 621 /// shadow value. 622 /// 623 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...) 624 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...) 625 /// CTP(other types, PS) = PS 626 Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos); 627 628 void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign, 629 Instruction *Pos); 630 631 Align getShadowAlign(Align InstAlignment); 632 633 private: 634 /// Collapses the shadow with aggregate type into a single primitive shadow 635 /// value. 636 template <class AggregateType> 637 Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow, 638 IRBuilder<> &IRB); 639 640 Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB); 641 642 /// Returns the shadow value of an argument A. 643 Value *getShadowForTLSArgument(Argument *A); 644 645 /// The fast path of loading shadows. 646 std::pair<Value *, Value *> 647 loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size, 648 Align ShadowAlign, Align OriginAlign, Value *FirstOrigin, 649 Instruction *Pos); 650 651 Align getOriginAlign(Align InstAlignment); 652 653 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load 654 /// is __dfsan_load_label_and_origin. This function returns the union of all 655 /// labels and the origin of the first taint label. However this is an 656 /// additional call with many instructions. To ensure common cases are fast, 657 /// checks if it is possible to load labels and origins without using the 658 /// callback function. 659 /// 660 /// When enabling tracking load instructions, we always use 661 /// __dfsan_load_label_and_origin to reduce code size. 662 bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment); 663 664 /// Returns a chain at the current stack with previous origin V. 665 Value *updateOrigin(Value *V, IRBuilder<> &IRB); 666 667 /// Returns a chain at the current stack with previous origin V if Shadow is 668 /// tainted. 669 Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB); 670 671 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns 672 /// Origin otherwise. 673 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin); 674 675 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr + 676 /// Size). 677 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr, 678 uint64_t StoreOriginSize, Align Alignment); 679 680 /// Stores Origin in terms of its Shadow value. 681 /// * Do not write origins for zero shadows because we do not trace origins 682 /// for untainted sinks. 683 /// * Use __dfsan_maybe_store_origin if there are too many origin store 684 /// instrumentations. 685 void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow, 686 Value *Origin, Value *StoreOriginAddr, Align InstAlignment); 687 688 /// Convert a scalar value to an i1 by comparing with 0. 689 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = ""); 690 691 bool shouldInstrumentWithCall(); 692 693 /// Generates IR to load shadow and origin corresponding to bytes [\p 694 /// Addr, \p Addr + \p Size), where addr has alignment \p 695 /// InstAlignment, and take the union of each of those shadows. The returned 696 /// shadow always has primitive type. 697 std::pair<Value *, Value *> 698 loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size, 699 Align InstAlignment, Instruction *Pos); 700 int NumOriginStores = 0; 701 }; 702 703 class DFSanVisitor : public InstVisitor<DFSanVisitor> { 704 public: 705 DFSanFunction &DFSF; 706 707 DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {} 708 709 const DataLayout &getDataLayout() const { 710 return DFSF.F->getParent()->getDataLayout(); 711 } 712 713 // Combines shadow values and origins for all of I's operands. 714 void visitInstOperands(Instruction &I); 715 716 void visitUnaryOperator(UnaryOperator &UO); 717 void visitBinaryOperator(BinaryOperator &BO); 718 void visitBitCastInst(BitCastInst &BCI); 719 void visitCastInst(CastInst &CI); 720 void visitCmpInst(CmpInst &CI); 721 void visitLandingPadInst(LandingPadInst &LPI); 722 void visitGetElementPtrInst(GetElementPtrInst &GEPI); 723 void visitLoadInst(LoadInst &LI); 724 void visitStoreInst(StoreInst &SI); 725 void visitAtomicRMWInst(AtomicRMWInst &I); 726 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); 727 void visitReturnInst(ReturnInst &RI); 728 void visitCallBase(CallBase &CB); 729 void visitPHINode(PHINode &PN); 730 void visitExtractElementInst(ExtractElementInst &I); 731 void visitInsertElementInst(InsertElementInst &I); 732 void visitShuffleVectorInst(ShuffleVectorInst &I); 733 void visitExtractValueInst(ExtractValueInst &I); 734 void visitInsertValueInst(InsertValueInst &I); 735 void visitAllocaInst(AllocaInst &I); 736 void visitSelectInst(SelectInst &I); 737 void visitMemSetInst(MemSetInst &I); 738 void visitMemTransferInst(MemTransferInst &I); 739 740 private: 741 void visitCASOrRMW(Align InstAlignment, Instruction &I); 742 743 // Returns false when this is an invoke of a custom function. 744 bool visitWrappedCallBase(Function &F, CallBase &CB); 745 746 // Combines origins for all of I's operands. 747 void visitInstOperandOrigins(Instruction &I); 748 749 void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args, 750 IRBuilder<> &IRB); 751 752 void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args, 753 IRBuilder<> &IRB); 754 }; 755 756 } // end anonymous namespace 757 758 DataFlowSanitizer::DataFlowSanitizer( 759 const std::vector<std::string> &ABIListFiles) { 760 std::vector<std::string> AllABIListFiles(std::move(ABIListFiles)); 761 llvm::append_range(AllABIListFiles, ClABIListFiles); 762 // FIXME: should we propagate vfs::FileSystem to this constructor? 763 ABIList.set( 764 SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem())); 765 } 766 767 FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { 768 SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end()); 769 ArgTypes.append(T->getNumParams(), PrimitiveShadowTy); 770 if (T->isVarArg()) 771 ArgTypes.push_back(PrimitiveShadowPtrTy); 772 Type *RetType = T->getReturnType(); 773 if (!RetType->isVoidTy()) 774 RetType = StructType::get(RetType, PrimitiveShadowTy); 775 return FunctionType::get(RetType, ArgTypes, T->isVarArg()); 776 } 777 778 FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { 779 assert(!T->isVarArg()); 780 SmallVector<Type *, 4> ArgTypes; 781 ArgTypes.push_back(T->getPointerTo()); 782 ArgTypes.append(T->param_begin(), T->param_end()); 783 ArgTypes.append(T->getNumParams(), PrimitiveShadowTy); 784 Type *RetType = T->getReturnType(); 785 if (!RetType->isVoidTy()) 786 ArgTypes.push_back(PrimitiveShadowPtrTy); 787 788 if (shouldTrackOrigins()) { 789 ArgTypes.append(T->getNumParams(), OriginTy); 790 if (!RetType->isVoidTy()) 791 ArgTypes.push_back(OriginPtrTy); 792 } 793 794 return FunctionType::get(T->getReturnType(), ArgTypes, false); 795 } 796 797 TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { 798 SmallVector<Type *, 4> ArgTypes; 799 800 // Some parameters of the custom function being constructed are 801 // parameters of T. Record the mapping from parameters of T to 802 // parameters of the custom function, so that parameter attributes 803 // at call sites can be updated. 804 std::vector<unsigned> ArgumentIndexMapping; 805 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) { 806 Type *ParamType = T->getParamType(I); 807 FunctionType *FT; 808 if (isa<PointerType>(ParamType) && 809 (FT = dyn_cast<FunctionType>(ParamType->getPointerElementType()))) { 810 ArgumentIndexMapping.push_back(ArgTypes.size()); 811 ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo()); 812 ArgTypes.push_back(Type::getInt8PtrTy(*Ctx)); 813 } else { 814 ArgumentIndexMapping.push_back(ArgTypes.size()); 815 ArgTypes.push_back(ParamType); 816 } 817 } 818 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) 819 ArgTypes.push_back(PrimitiveShadowTy); 820 if (T->isVarArg()) 821 ArgTypes.push_back(PrimitiveShadowPtrTy); 822 Type *RetType = T->getReturnType(); 823 if (!RetType->isVoidTy()) 824 ArgTypes.push_back(PrimitiveShadowPtrTy); 825 826 if (shouldTrackOrigins()) { 827 for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) 828 ArgTypes.push_back(OriginTy); 829 if (T->isVarArg()) 830 ArgTypes.push_back(OriginPtrTy); 831 if (!RetType->isVoidTy()) 832 ArgTypes.push_back(OriginPtrTy); 833 } 834 835 return TransformedFunction( 836 T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()), 837 ArgumentIndexMapping); 838 } 839 840 bool DataFlowSanitizer::isZeroShadow(Value *V) { 841 if (!shouldTrackFieldsAndIndices()) 842 return ZeroPrimitiveShadow == V; 843 844 Type *T = V->getType(); 845 if (!isa<ArrayType>(T) && !isa<StructType>(T)) { 846 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) 847 return CI->isZero(); 848 return false; 849 } 850 851 return isa<ConstantAggregateZero>(V); 852 } 853 854 bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) { 855 uint64_t ShadowSize = Size * ShadowWidthBytes; 856 return ShadowSize % 8 == 0 || ShadowSize == 4; 857 } 858 859 bool DataFlowSanitizer::shouldTrackOrigins() { 860 static const bool ShouldTrackOrigins = 861 ClTrackOrigins && getInstrumentedABI() == DataFlowSanitizer::IA_TLS; 862 return ShouldTrackOrigins; 863 } 864 865 bool DataFlowSanitizer::shouldTrackFieldsAndIndices() { 866 return getInstrumentedABI() == DataFlowSanitizer::IA_TLS; 867 } 868 869 Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) { 870 if (!shouldTrackFieldsAndIndices()) 871 return ZeroPrimitiveShadow; 872 873 if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy)) 874 return ZeroPrimitiveShadow; 875 Type *ShadowTy = getShadowTy(OrigTy); 876 return ConstantAggregateZero::get(ShadowTy); 877 } 878 879 Constant *DataFlowSanitizer::getZeroShadow(Value *V) { 880 return getZeroShadow(V->getType()); 881 } 882 883 static Value *expandFromPrimitiveShadowRecursive( 884 Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy, 885 Value *PrimitiveShadow, IRBuilder<> &IRB) { 886 if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy)) 887 return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices); 888 889 if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) { 890 for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) { 891 Indices.push_back(Idx); 892 Shadow = expandFromPrimitiveShadowRecursive( 893 Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB); 894 Indices.pop_back(); 895 } 896 return Shadow; 897 } 898 899 if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) { 900 for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) { 901 Indices.push_back(Idx); 902 Shadow = expandFromPrimitiveShadowRecursive( 903 Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB); 904 Indices.pop_back(); 905 } 906 return Shadow; 907 } 908 llvm_unreachable("Unexpected shadow type"); 909 } 910 911 bool DFSanFunction::shouldInstrumentWithCall() { 912 return ClInstrumentWithCallThreshold >= 0 && 913 NumOriginStores >= ClInstrumentWithCallThreshold; 914 } 915 916 Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow, 917 Instruction *Pos) { 918 Type *ShadowTy = DFS.getShadowTy(T); 919 920 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy)) 921 return PrimitiveShadow; 922 923 if (DFS.isZeroShadow(PrimitiveShadow)) 924 return DFS.getZeroShadow(ShadowTy); 925 926 IRBuilder<> IRB(Pos); 927 SmallVector<unsigned, 4> Indices; 928 Value *Shadow = UndefValue::get(ShadowTy); 929 Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy, 930 PrimitiveShadow, IRB); 931 932 // Caches the primitive shadow value that built the shadow value. 933 CachedCollapsedShadows[Shadow] = PrimitiveShadow; 934 return Shadow; 935 } 936 937 template <class AggregateType> 938 Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow, 939 IRBuilder<> &IRB) { 940 if (!AT->getNumElements()) 941 return DFS.ZeroPrimitiveShadow; 942 943 Value *FirstItem = IRB.CreateExtractValue(Shadow, 0); 944 Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB); 945 946 for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) { 947 Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx); 948 Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB); 949 Aggregator = IRB.CreateOr(Aggregator, ShadowInner); 950 } 951 return Aggregator; 952 } 953 954 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow, 955 IRBuilder<> &IRB) { 956 Type *ShadowTy = Shadow->getType(); 957 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy)) 958 return Shadow; 959 if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) 960 return collapseAggregateShadow<>(AT, Shadow, IRB); 961 if (StructType *ST = dyn_cast<StructType>(ShadowTy)) 962 return collapseAggregateShadow<>(ST, Shadow, IRB); 963 llvm_unreachable("Unexpected shadow type"); 964 } 965 966 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow, 967 Instruction *Pos) { 968 Type *ShadowTy = Shadow->getType(); 969 if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy)) 970 return Shadow; 971 972 assert(DFS.shouldTrackFieldsAndIndices()); 973 974 // Checks if the cached collapsed shadow value dominates Pos. 975 Value *&CS = CachedCollapsedShadows[Shadow]; 976 if (CS && DT.dominates(CS, Pos)) 977 return CS; 978 979 IRBuilder<> IRB(Pos); 980 Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB); 981 // Caches the converted primitive shadow value. 982 CS = PrimitiveShadow; 983 return PrimitiveShadow; 984 } 985 986 Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) { 987 if (!shouldTrackFieldsAndIndices()) 988 return PrimitiveShadowTy; 989 990 if (!OrigTy->isSized()) 991 return PrimitiveShadowTy; 992 if (isa<IntegerType>(OrigTy)) 993 return PrimitiveShadowTy; 994 if (isa<VectorType>(OrigTy)) 995 return PrimitiveShadowTy; 996 if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) 997 return ArrayType::get(getShadowTy(AT->getElementType()), 998 AT->getNumElements()); 999 if (StructType *ST = dyn_cast<StructType>(OrigTy)) { 1000 SmallVector<Type *, 4> Elements; 1001 for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I) 1002 Elements.push_back(getShadowTy(ST->getElementType(I))); 1003 return StructType::get(*Ctx, Elements); 1004 } 1005 return PrimitiveShadowTy; 1006 } 1007 1008 Type *DataFlowSanitizer::getShadowTy(Value *V) { 1009 return getShadowTy(V->getType()); 1010 } 1011 1012 bool DataFlowSanitizer::init(Module &M) { 1013 Triple TargetTriple(M.getTargetTriple()); 1014 const DataLayout &DL = M.getDataLayout(); 1015 1016 Mod = &M; 1017 Ctx = &M.getContext(); 1018 Int8Ptr = Type::getInt8PtrTy(*Ctx); 1019 OriginTy = IntegerType::get(*Ctx, OriginWidthBits); 1020 OriginPtrTy = PointerType::getUnqual(OriginTy); 1021 PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits); 1022 PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy); 1023 IntptrTy = DL.getIntPtrType(*Ctx); 1024 ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0); 1025 ZeroOrigin = ConstantInt::getSigned(OriginTy, 0); 1026 1027 // TODO: these should be platform-specific and set in the switch-stmt below. 1028 ShadowBase = ConstantInt::get(IntptrTy, 0x100000008000LL); 1029 OriginBase = ConstantInt::get(IntptrTy, 0x200000008000LL); 1030 1031 switch (TargetTriple.getArch()) { 1032 case Triple::x86_64: 1033 ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x600000000000LL); 1034 break; 1035 case Triple::mips64: 1036 case Triple::mips64el: 1037 ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xE000000000LL); 1038 break; 1039 case Triple::aarch64: 1040 case Triple::aarch64_be: 1041 // AArch64 supports multiple VMAs and the shadow mask is set at runtime. 1042 DFSanRuntimeShadowMask = true; 1043 break; 1044 default: 1045 report_fatal_error("unsupported triple"); 1046 } 1047 1048 Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy}; 1049 DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs, 1050 /*isVarArg=*/false); 1051 Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy}; 1052 DFSanLoadLabelAndOriginFnTy = 1053 FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs, 1054 /*isVarArg=*/false); 1055 DFSanUnimplementedFnTy = FunctionType::get( 1056 Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); 1057 Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy, 1058 Type::getInt8PtrTy(*Ctx), IntptrTy}; 1059 DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), 1060 DFSanSetLabelArgs, /*isVarArg=*/false); 1061 DFSanNonzeroLabelFnTy = 1062 FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false); 1063 DFSanVarargWrapperFnTy = FunctionType::get( 1064 Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); 1065 DFSanCmpCallbackFnTy = 1066 FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy, 1067 /*isVarArg=*/false); 1068 DFSanChainOriginFnTy = 1069 FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false); 1070 Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy}; 1071 DFSanChainOriginIfTaintedFnTy = FunctionType::get( 1072 OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false); 1073 Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits), 1074 Int8Ptr, IntptrTy, OriginTy}; 1075 DFSanMaybeStoreOriginFnTy = FunctionType::get( 1076 Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false); 1077 Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy}; 1078 DFSanMemOriginTransferFnTy = FunctionType::get( 1079 Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false); 1080 Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr}; 1081 DFSanLoadStoreCallbackFnTy = 1082 FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs, 1083 /*isVarArg=*/false); 1084 Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy}; 1085 DFSanMemTransferCallbackFnTy = 1086 FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs, 1087 /*isVarArg=*/false); 1088 1089 ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); 1090 OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); 1091 return true; 1092 } 1093 1094 bool DataFlowSanitizer::isInstrumented(const Function *F) { 1095 return !ABIList.isIn(*F, "uninstrumented"); 1096 } 1097 1098 bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) { 1099 return !ABIList.isIn(*GA, "uninstrumented"); 1100 } 1101 1102 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { 1103 return ClArgsABI ? IA_Args : IA_TLS; 1104 } 1105 1106 DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { 1107 if (ABIList.isIn(*F, "functional")) 1108 return WK_Functional; 1109 if (ABIList.isIn(*F, "discard")) 1110 return WK_Discard; 1111 if (ABIList.isIn(*F, "custom")) 1112 return WK_Custom; 1113 1114 return WK_Warning; 1115 } 1116 1117 void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) { 1118 std::string GVName = std::string(GV->getName()), Prefix = "dfs$"; 1119 GV->setName(Prefix + GVName); 1120 1121 // Try to change the name of the function in module inline asm. We only do 1122 // this for specific asm directives, currently only ".symver", to try to avoid 1123 // corrupting asm which happens to contain the symbol name as a substring. 1124 // Note that the substitution for .symver assumes that the versioned symbol 1125 // also has an instrumented name. 1126 std::string Asm = GV->getParent()->getModuleInlineAsm(); 1127 std::string SearchStr = ".symver " + GVName + ","; 1128 size_t Pos = Asm.find(SearchStr); 1129 if (Pos != std::string::npos) { 1130 Asm.replace(Pos, SearchStr.size(), 1131 ".symver " + Prefix + GVName + "," + Prefix); 1132 GV->getParent()->setModuleInlineAsm(Asm); 1133 } 1134 } 1135 1136 Function * 1137 DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, 1138 GlobalValue::LinkageTypes NewFLink, 1139 FunctionType *NewFT) { 1140 FunctionType *FT = F->getFunctionType(); 1141 Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(), 1142 NewFName, F->getParent()); 1143 NewF->copyAttributesFrom(F); 1144 NewF->removeAttributes( 1145 AttributeList::ReturnIndex, 1146 AttributeFuncs::typeIncompatible(NewFT->getReturnType())); 1147 1148 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); 1149 if (F->isVarArg()) { 1150 NewF->removeAttributes(AttributeList::FunctionIndex, 1151 AttrBuilder().addAttribute("split-stack")); 1152 CallInst::Create(DFSanVarargWrapperFn, 1153 IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", 1154 BB); 1155 new UnreachableInst(*Ctx, BB); 1156 } else { 1157 auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin()); 1158 std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams()); 1159 1160 CallInst *CI = CallInst::Create(F, Args, "", BB); 1161 if (FT->getReturnType()->isVoidTy()) 1162 ReturnInst::Create(*Ctx, BB); 1163 else 1164 ReturnInst::Create(*Ctx, CI, BB); 1165 } 1166 1167 return NewF; 1168 } 1169 1170 Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, 1171 StringRef FName) { 1172 FunctionType *FTT = getTrampolineFunctionType(FT); 1173 FunctionCallee C = Mod->getOrInsertFunction(FName, FTT); 1174 Function *F = dyn_cast<Function>(C.getCallee()); 1175 if (F && F->isDeclaration()) { 1176 F->setLinkage(GlobalValue::LinkOnceODRLinkage); 1177 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); 1178 std::vector<Value *> Args; 1179 Function::arg_iterator AI = F->arg_begin() + 1; 1180 for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N) 1181 Args.push_back(&*AI); 1182 CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB); 1183 Type *RetType = FT->getReturnType(); 1184 ReturnInst *RI = RetType->isVoidTy() ? ReturnInst::Create(*Ctx, BB) 1185 : ReturnInst::Create(*Ctx, CI, BB); 1186 1187 // F is called by a wrapped custom function with primitive shadows. So 1188 // its arguments and return value need conversion. 1189 DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); 1190 Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; 1191 ++ValAI; 1192 for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) { 1193 Value *Shadow = 1194 DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI); 1195 DFSF.ValShadowMap[&*ValAI] = Shadow; 1196 } 1197 Function::arg_iterator RetShadowAI = ShadowAI; 1198 const bool ShouldTrackOrigins = shouldTrackOrigins(); 1199 if (ShouldTrackOrigins) { 1200 ValAI = F->arg_begin(); 1201 ++ValAI; 1202 Function::arg_iterator OriginAI = ShadowAI; 1203 if (!RetType->isVoidTy()) 1204 ++OriginAI; 1205 for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++OriginAI, --N) { 1206 DFSF.ValOriginMap[&*ValAI] = &*OriginAI; 1207 } 1208 } 1209 DFSanVisitor(DFSF).visitCallInst(*CI); 1210 if (!RetType->isVoidTy()) { 1211 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow( 1212 DFSF.getShadow(RI->getReturnValue()), RI); 1213 new StoreInst(PrimitiveShadow, &*RetShadowAI, RI); 1214 if (ShouldTrackOrigins) { 1215 Value *Origin = DFSF.getOrigin(RI->getReturnValue()); 1216 new StoreInst(Origin, &*std::prev(F->arg_end()), RI); 1217 } 1218 } 1219 } 1220 1221 return cast<Constant>(C.getCallee()); 1222 } 1223 1224 // Initialize DataFlowSanitizer runtime functions and declare them in the module 1225 void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { 1226 { 1227 AttributeList AL; 1228 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 1229 Attribute::NoUnwind); 1230 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 1231 Attribute::ReadOnly); 1232 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 1233 Attribute::ZExt); 1234 DFSanUnionLoadFn = 1235 Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL); 1236 } 1237 { 1238 AttributeList AL; 1239 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 1240 Attribute::NoUnwind); 1241 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 1242 Attribute::ReadOnly); 1243 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 1244 Attribute::ZExt); 1245 DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction( 1246 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL); 1247 } 1248 DFSanUnimplementedFn = 1249 Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); 1250 { 1251 AttributeList AL; 1252 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 1253 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); 1254 DFSanSetLabelFn = 1255 Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL); 1256 } 1257 DFSanNonzeroLabelFn = 1258 Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); 1259 DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper", 1260 DFSanVarargWrapperFnTy); 1261 { 1262 AttributeList AL; 1263 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 1264 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 1265 Attribute::ZExt); 1266 DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin", 1267 DFSanChainOriginFnTy, AL); 1268 } 1269 { 1270 AttributeList AL; 1271 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 1272 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); 1273 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 1274 Attribute::ZExt); 1275 DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction( 1276 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL); 1277 } 1278 DFSanMemOriginTransferFn = Mod->getOrInsertFunction( 1279 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy); 1280 1281 { 1282 AttributeList AL; 1283 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 1284 AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt); 1285 DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction( 1286 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL); 1287 } 1288 1289 DFSanRuntimeFunctions.insert( 1290 DFSanUnionLoadFn.getCallee()->stripPointerCasts()); 1291 DFSanRuntimeFunctions.insert( 1292 DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts()); 1293 DFSanRuntimeFunctions.insert( 1294 DFSanUnimplementedFn.getCallee()->stripPointerCasts()); 1295 DFSanRuntimeFunctions.insert( 1296 DFSanSetLabelFn.getCallee()->stripPointerCasts()); 1297 DFSanRuntimeFunctions.insert( 1298 DFSanNonzeroLabelFn.getCallee()->stripPointerCasts()); 1299 DFSanRuntimeFunctions.insert( 1300 DFSanVarargWrapperFn.getCallee()->stripPointerCasts()); 1301 DFSanRuntimeFunctions.insert( 1302 DFSanLoadCallbackFn.getCallee()->stripPointerCasts()); 1303 DFSanRuntimeFunctions.insert( 1304 DFSanStoreCallbackFn.getCallee()->stripPointerCasts()); 1305 DFSanRuntimeFunctions.insert( 1306 DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts()); 1307 DFSanRuntimeFunctions.insert( 1308 DFSanCmpCallbackFn.getCallee()->stripPointerCasts()); 1309 DFSanRuntimeFunctions.insert( 1310 DFSanChainOriginFn.getCallee()->stripPointerCasts()); 1311 DFSanRuntimeFunctions.insert( 1312 DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts()); 1313 DFSanRuntimeFunctions.insert( 1314 DFSanMemOriginTransferFn.getCallee()->stripPointerCasts()); 1315 DFSanRuntimeFunctions.insert( 1316 DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts()); 1317 } 1318 1319 // Initializes event callback functions and declare them in the module 1320 void DataFlowSanitizer::initializeCallbackFunctions(Module &M) { 1321 DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback", 1322 DFSanLoadStoreCallbackFnTy); 1323 DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback", 1324 DFSanLoadStoreCallbackFnTy); 1325 DFSanMemTransferCallbackFn = Mod->getOrInsertFunction( 1326 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy); 1327 DFSanCmpCallbackFn = 1328 Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy); 1329 } 1330 1331 void DataFlowSanitizer::injectMetadataGlobals(Module &M) { 1332 // These variables can be used: 1333 // - by the runtime (to discover what the shadow width was, during 1334 // compilation) 1335 // - in testing (to avoid hardcoding the shadow width and type but instead 1336 // extract them by pattern matching) 1337 Type *IntTy = Type::getInt32Ty(*Ctx); 1338 (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bits", IntTy, [&] { 1339 return new GlobalVariable( 1340 M, IntTy, /*isConstant=*/true, GlobalValue::WeakODRLinkage, 1341 ConstantInt::get(IntTy, ShadowWidthBits), "__dfsan_shadow_width_bits"); 1342 }); 1343 (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bytes", IntTy, [&] { 1344 return new GlobalVariable(M, IntTy, /*isConstant=*/true, 1345 GlobalValue::WeakODRLinkage, 1346 ConstantInt::get(IntTy, ShadowWidthBytes), 1347 "__dfsan_shadow_width_bytes"); 1348 }); 1349 } 1350 1351 bool DataFlowSanitizer::runImpl(Module &M) { 1352 init(M); 1353 1354 if (ABIList.isIn(M, "skip")) 1355 return false; 1356 1357 const unsigned InitialGlobalSize = M.global_size(); 1358 const unsigned InitialModuleSize = M.size(); 1359 1360 bool Changed = false; 1361 1362 auto GetOrInsertGlobal = [this, &Changed](StringRef Name, 1363 Type *Ty) -> Constant * { 1364 Constant *C = Mod->getOrInsertGlobal(Name, Ty); 1365 if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) { 1366 Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel; 1367 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); 1368 } 1369 return C; 1370 }; 1371 1372 // These globals must be kept in sync with the ones in dfsan.cpp. 1373 ArgTLS = 1374 GetOrInsertGlobal("__dfsan_arg_tls", 1375 ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8)); 1376 RetvalTLS = GetOrInsertGlobal( 1377 "__dfsan_retval_tls", 1378 ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8)); 1379 ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS); 1380 ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy); 1381 RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy); 1382 1383 (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] { 1384 Changed = true; 1385 return new GlobalVariable( 1386 M, OriginTy, true, GlobalValue::WeakODRLinkage, 1387 ConstantInt::getSigned(OriginTy, shouldTrackOrigins()), 1388 "__dfsan_track_origins"); 1389 }); 1390 1391 injectMetadataGlobals(M); 1392 1393 ExternalShadowMask = 1394 Mod->getOrInsertGlobal(DFSanExternShadowPtrMask, IntptrTy); 1395 1396 initializeCallbackFunctions(M); 1397 initializeRuntimeFunctions(M); 1398 1399 std::vector<Function *> FnsToInstrument; 1400 SmallPtrSet<Function *, 2> FnsWithNativeABI; 1401 for (Function &F : M) 1402 if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F)) 1403 FnsToInstrument.push_back(&F); 1404 1405 // Give function aliases prefixes when necessary, and build wrappers where the 1406 // instrumentedness is inconsistent. 1407 for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); 1408 AI != AE;) { 1409 GlobalAlias *GA = &*AI; 1410 ++AI; 1411 // Don't stop on weak. We assume people aren't playing games with the 1412 // instrumentedness of overridden weak aliases. 1413 auto *F = dyn_cast<Function>(GA->getBaseObject()); 1414 if (!F) 1415 continue; 1416 1417 bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); 1418 if (GAInst && FInst) { 1419 addGlobalNamePrefix(GA); 1420 } else if (GAInst != FInst) { 1421 // Non-instrumented alias of an instrumented function, or vice versa. 1422 // Replace the alias with a native-ABI wrapper of the aliasee. The pass 1423 // below will take care of instrumenting it. 1424 Function *NewF = 1425 buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); 1426 GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType())); 1427 NewF->takeName(GA); 1428 GA->eraseFromParent(); 1429 FnsToInstrument.push_back(NewF); 1430 } 1431 } 1432 1433 ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) 1434 .addAttribute(Attribute::ReadNone); 1435 1436 // First, change the ABI of every function in the module. ABI-listed 1437 // functions keep their original ABI and get a wrapper function. 1438 for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(), 1439 FE = FnsToInstrument.end(); 1440 FI != FE; ++FI) { 1441 Function &F = **FI; 1442 FunctionType *FT = F.getFunctionType(); 1443 1444 bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() && 1445 FT->getReturnType()->isVoidTy()); 1446 1447 if (isInstrumented(&F)) { 1448 // Instrumented functions get a 'dfs$' prefix. This allows us to more 1449 // easily identify cases of mismatching ABIs. 1450 if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) { 1451 FunctionType *NewFT = getArgsFunctionType(FT); 1452 Function *NewF = Function::Create(NewFT, F.getLinkage(), 1453 F.getAddressSpace(), "", &M); 1454 NewF->copyAttributesFrom(&F); 1455 NewF->removeAttributes( 1456 AttributeList::ReturnIndex, 1457 AttributeFuncs::typeIncompatible(NewFT->getReturnType())); 1458 for (Function::arg_iterator FArg = F.arg_begin(), 1459 NewFArg = NewF->arg_begin(), 1460 FArgEnd = F.arg_end(); 1461 FArg != FArgEnd; ++FArg, ++NewFArg) { 1462 FArg->replaceAllUsesWith(&*NewFArg); 1463 } 1464 NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); 1465 1466 for (Function::user_iterator UI = F.user_begin(), UE = F.user_end(); 1467 UI != UE;) { 1468 BlockAddress *BA = dyn_cast<BlockAddress>(*UI); 1469 ++UI; 1470 if (BA) { 1471 BA->replaceAllUsesWith( 1472 BlockAddress::get(NewF, BA->getBasicBlock())); 1473 delete BA; 1474 } 1475 } 1476 F.replaceAllUsesWith( 1477 ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT))); 1478 NewF->takeName(&F); 1479 F.eraseFromParent(); 1480 *FI = NewF; 1481 addGlobalNamePrefix(NewF); 1482 } else { 1483 addGlobalNamePrefix(&F); 1484 } 1485 } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) { 1486 // Build a wrapper function for F. The wrapper simply calls F, and is 1487 // added to FnsToInstrument so that any instrumentation according to its 1488 // WrapperKind is done in the second pass below. 1489 FunctionType *NewFT = 1490 getInstrumentedABI() == IA_Args ? getArgsFunctionType(FT) : FT; 1491 1492 // If the function being wrapped has local linkage, then preserve the 1493 // function's linkage in the wrapper function. 1494 GlobalValue::LinkageTypes WrapperLinkage = 1495 F.hasLocalLinkage() ? F.getLinkage() 1496 : GlobalValue::LinkOnceODRLinkage; 1497 1498 Function *NewF = buildWrapperFunction( 1499 &F, 1500 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) + 1501 std::string(F.getName()), 1502 WrapperLinkage, NewFT); 1503 if (getInstrumentedABI() == IA_TLS) 1504 NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs); 1505 1506 Value *WrappedFnCst = 1507 ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); 1508 F.replaceAllUsesWith(WrappedFnCst); 1509 1510 UnwrappedFnMap[WrappedFnCst] = &F; 1511 *FI = NewF; 1512 1513 if (!F.isDeclaration()) { 1514 // This function is probably defining an interposition of an 1515 // uninstrumented function and hence needs to keep the original ABI. 1516 // But any functions it may call need to use the instrumented ABI, so 1517 // we instrument it in a mode which preserves the original ABI. 1518 FnsWithNativeABI.insert(&F); 1519 1520 // This code needs to rebuild the iterators, as they may be invalidated 1521 // by the push_back, taking care that the new range does not include 1522 // any functions added by this code. 1523 size_t N = FI - FnsToInstrument.begin(), 1524 Count = FE - FnsToInstrument.begin(); 1525 FnsToInstrument.push_back(&F); 1526 FI = FnsToInstrument.begin() + N; 1527 FE = FnsToInstrument.begin() + Count; 1528 } 1529 // Hopefully, nobody will try to indirectly call a vararg 1530 // function... yet. 1531 } else if (FT->isVarArg()) { 1532 UnwrappedFnMap[&F] = &F; 1533 *FI = nullptr; 1534 } 1535 } 1536 1537 for (Function *F : FnsToInstrument) { 1538 if (!F || F->isDeclaration()) 1539 continue; 1540 1541 removeUnreachableBlocks(*F); 1542 1543 DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F)); 1544 1545 // DFSanVisitor may create new basic blocks, which confuses df_iterator. 1546 // Build a copy of the list before iterating over it. 1547 SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock())); 1548 1549 for (BasicBlock *BB : BBList) { 1550 Instruction *Inst = &BB->front(); 1551 while (true) { 1552 // DFSanVisitor may split the current basic block, changing the current 1553 // instruction's next pointer and moving the next instruction to the 1554 // tail block from which we should continue. 1555 Instruction *Next = Inst->getNextNode(); 1556 // DFSanVisitor may delete Inst, so keep track of whether it was a 1557 // terminator. 1558 bool IsTerminator = Inst->isTerminator(); 1559 if (!DFSF.SkipInsts.count(Inst)) 1560 DFSanVisitor(DFSF).visit(Inst); 1561 if (IsTerminator) 1562 break; 1563 Inst = Next; 1564 } 1565 } 1566 1567 // We will not necessarily be able to compute the shadow for every phi node 1568 // until we have visited every block. Therefore, the code that handles phi 1569 // nodes adds them to the PHIFixups list so that they can be properly 1570 // handled here. 1571 for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) { 1572 for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N; 1573 ++Val) { 1574 P.ShadowPhi->setIncomingValue( 1575 Val, DFSF.getShadow(P.Phi->getIncomingValue(Val))); 1576 if (P.OriginPhi) 1577 P.OriginPhi->setIncomingValue( 1578 Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val))); 1579 } 1580 } 1581 1582 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy 1583 // places (i.e. instructions in basic blocks we haven't even begun visiting 1584 // yet). To make our life easier, do this work in a pass after the main 1585 // instrumentation. 1586 if (ClDebugNonzeroLabels) { 1587 for (Value *V : DFSF.NonZeroChecks) { 1588 Instruction *Pos; 1589 if (Instruction *I = dyn_cast<Instruction>(V)) 1590 Pos = I->getNextNode(); 1591 else 1592 Pos = &DFSF.F->getEntryBlock().front(); 1593 while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos)) 1594 Pos = Pos->getNextNode(); 1595 IRBuilder<> IRB(Pos); 1596 Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos); 1597 Value *Ne = 1598 IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow); 1599 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen( 1600 Ne, Pos, /*Unreachable=*/false, ColdCallWeights)); 1601 IRBuilder<> ThenIRB(BI); 1602 ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {}); 1603 } 1604 } 1605 } 1606 1607 return Changed || !FnsToInstrument.empty() || 1608 M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize; 1609 } 1610 1611 Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) { 1612 Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy); 1613 if (ArgOffset) 1614 Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset)); 1615 return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0), 1616 "_dfsarg"); 1617 } 1618 1619 Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) { 1620 return IRB.CreatePointerCast( 1621 DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret"); 1622 } 1623 1624 Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; } 1625 1626 Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) { 1627 return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo, 1628 "_dfsarg_o"); 1629 } 1630 1631 Value *DFSanFunction::getOrigin(Value *V) { 1632 assert(DFS.shouldTrackOrigins()); 1633 if (!isa<Argument>(V) && !isa<Instruction>(V)) 1634 return DFS.ZeroOrigin; 1635 Value *&Origin = ValOriginMap[V]; 1636 if (!Origin) { 1637 if (Argument *A = dyn_cast<Argument>(V)) { 1638 if (IsNativeABI) 1639 return DFS.ZeroOrigin; 1640 switch (IA) { 1641 case DataFlowSanitizer::IA_TLS: { 1642 if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) { 1643 Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin(); 1644 IRBuilder<> IRB(ArgOriginTLSPos); 1645 Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB); 1646 Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr); 1647 } else { 1648 // Overflow 1649 Origin = DFS.ZeroOrigin; 1650 } 1651 break; 1652 } 1653 case DataFlowSanitizer::IA_Args: { 1654 Origin = DFS.ZeroOrigin; 1655 break; 1656 } 1657 } 1658 } else { 1659 Origin = DFS.ZeroOrigin; 1660 } 1661 } 1662 return Origin; 1663 } 1664 1665 void DFSanFunction::setOrigin(Instruction *I, Value *Origin) { 1666 if (!DFS.shouldTrackOrigins()) 1667 return; 1668 assert(!ValOriginMap.count(I)); 1669 assert(Origin->getType() == DFS.OriginTy); 1670 ValOriginMap[I] = Origin; 1671 } 1672 1673 Value *DFSanFunction::getShadowForTLSArgument(Argument *A) { 1674 unsigned ArgOffset = 0; 1675 const DataLayout &DL = F->getParent()->getDataLayout(); 1676 for (auto &FArg : F->args()) { 1677 if (!FArg.getType()->isSized()) { 1678 if (A == &FArg) 1679 break; 1680 continue; 1681 } 1682 1683 unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg)); 1684 if (A != &FArg) { 1685 ArgOffset += alignTo(Size, ShadowTLSAlignment); 1686 if (ArgOffset > ArgTLSSize) 1687 break; // ArgTLS overflows, uses a zero shadow. 1688 continue; 1689 } 1690 1691 if (ArgOffset + Size > ArgTLSSize) 1692 break; // ArgTLS overflows, uses a zero shadow. 1693 1694 Instruction *ArgTLSPos = &*F->getEntryBlock().begin(); 1695 IRBuilder<> IRB(ArgTLSPos); 1696 Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB); 1697 return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr, 1698 ShadowTLSAlignment); 1699 } 1700 1701 return DFS.getZeroShadow(A); 1702 } 1703 1704 Value *DFSanFunction::getShadow(Value *V) { 1705 if (!isa<Argument>(V) && !isa<Instruction>(V)) 1706 return DFS.getZeroShadow(V); 1707 Value *&Shadow = ValShadowMap[V]; 1708 if (!Shadow) { 1709 if (Argument *A = dyn_cast<Argument>(V)) { 1710 if (IsNativeABI) 1711 return DFS.getZeroShadow(V); 1712 switch (IA) { 1713 case DataFlowSanitizer::IA_TLS: { 1714 Shadow = getShadowForTLSArgument(A); 1715 break; 1716 } 1717 case DataFlowSanitizer::IA_Args: { 1718 unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2; 1719 Function::arg_iterator Arg = F->arg_begin(); 1720 std::advance(Arg, ArgIdx); 1721 Shadow = &*Arg; 1722 assert(Shadow->getType() == DFS.PrimitiveShadowTy); 1723 break; 1724 } 1725 } 1726 NonZeroChecks.push_back(Shadow); 1727 } else { 1728 Shadow = DFS.getZeroShadow(V); 1729 } 1730 } 1731 return Shadow; 1732 } 1733 1734 void DFSanFunction::setShadow(Instruction *I, Value *Shadow) { 1735 assert(!ValShadowMap.count(I)); 1736 assert(DFS.shouldTrackFieldsAndIndices() || 1737 Shadow->getType() == DFS.PrimitiveShadowTy); 1738 ValShadowMap[I] = Shadow; 1739 } 1740 1741 Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) { 1742 // Returns Addr & shadow_mask 1743 assert(Addr != RetvalTLS && "Reinstrumenting?"); 1744 Value *ShadowPtrMaskValue; 1745 if (DFSanRuntimeShadowMask) 1746 ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask); 1747 else 1748 ShadowPtrMaskValue = ShadowPtrMask; 1749 return IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), 1750 IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)); 1751 } 1752 1753 std::pair<Value *, Value *> 1754 DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment, 1755 Instruction *Pos) { 1756 // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL 1757 IRBuilder<> IRB(Pos); 1758 Value *ShadowOffset = getShadowOffset(Addr, IRB); 1759 Value *ShadowPtr = getShadowAddress(Addr, Pos, ShadowOffset); 1760 Value *OriginPtr = nullptr; 1761 if (shouldTrackOrigins()) { 1762 static Value *OriginByShadowOffset = ConstantInt::get( 1763 IntptrTy, OriginBase->getZExtValue() - ShadowBase->getZExtValue()); 1764 1765 Value *OriginLong = IRB.CreateAdd(ShadowOffset, OriginByShadowOffset); 1766 const Align Alignment = llvm::assumeAligned(InstAlignment.value()); 1767 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB. 1768 // So Mask is unnecessary. 1769 if (Alignment < MinOriginAlignment) { 1770 uint64_t Mask = MinOriginAlignment.value() - 1; 1771 OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask)); 1772 } 1773 OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy); 1774 } 1775 return {ShadowPtr, OriginPtr}; 1776 } 1777 1778 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos, 1779 Value *ShadowOffset) { 1780 IRBuilder<> IRB(Pos); 1781 return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy); 1782 } 1783 1784 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { 1785 // Returns (Addr & shadow_mask) 1786 IRBuilder<> IRB(Pos); 1787 Value *ShadowOffset = getShadowOffset(Addr, IRB); 1788 return getShadowAddress(Addr, Pos, ShadowOffset); 1789 } 1790 1791 Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2, 1792 Instruction *Pos) { 1793 Value *PrimitiveValue = combineShadows(V1, V2, Pos); 1794 return expandFromPrimitiveShadow(T, PrimitiveValue, Pos); 1795 } 1796 1797 // Generates IR to compute the union of the two given shadows, inserting it 1798 // before Pos. The combined value is with primitive type. 1799 Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { 1800 if (DFS.isZeroShadow(V1)) 1801 return collapseToPrimitiveShadow(V2, Pos); 1802 if (DFS.isZeroShadow(V2)) 1803 return collapseToPrimitiveShadow(V1, Pos); 1804 if (V1 == V2) 1805 return collapseToPrimitiveShadow(V1, Pos); 1806 1807 auto V1Elems = ShadowElements.find(V1); 1808 auto V2Elems = ShadowElements.find(V2); 1809 if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) { 1810 if (std::includes(V1Elems->second.begin(), V1Elems->second.end(), 1811 V2Elems->second.begin(), V2Elems->second.end())) { 1812 return collapseToPrimitiveShadow(V1, Pos); 1813 } 1814 if (std::includes(V2Elems->second.begin(), V2Elems->second.end(), 1815 V1Elems->second.begin(), V1Elems->second.end())) { 1816 return collapseToPrimitiveShadow(V2, Pos); 1817 } 1818 } else if (V1Elems != ShadowElements.end()) { 1819 if (V1Elems->second.count(V2)) 1820 return collapseToPrimitiveShadow(V1, Pos); 1821 } else if (V2Elems != ShadowElements.end()) { 1822 if (V2Elems->second.count(V1)) 1823 return collapseToPrimitiveShadow(V2, Pos); 1824 } 1825 1826 auto Key = std::make_pair(V1, V2); 1827 if (V1 > V2) 1828 std::swap(Key.first, Key.second); 1829 CachedShadow &CCS = CachedShadows[Key]; 1830 if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent())) 1831 return CCS.Shadow; 1832 1833 // Converts inputs shadows to shadows with primitive types. 1834 Value *PV1 = collapseToPrimitiveShadow(V1, Pos); 1835 Value *PV2 = collapseToPrimitiveShadow(V2, Pos); 1836 1837 IRBuilder<> IRB(Pos); 1838 CCS.Block = Pos->getParent(); 1839 CCS.Shadow = IRB.CreateOr(PV1, PV2); 1840 1841 std::set<Value *> UnionElems; 1842 if (V1Elems != ShadowElements.end()) { 1843 UnionElems = V1Elems->second; 1844 } else { 1845 UnionElems.insert(V1); 1846 } 1847 if (V2Elems != ShadowElements.end()) { 1848 UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end()); 1849 } else { 1850 UnionElems.insert(V2); 1851 } 1852 ShadowElements[CCS.Shadow] = std::move(UnionElems); 1853 1854 return CCS.Shadow; 1855 } 1856 1857 // A convenience function which folds the shadows of each of the operands 1858 // of the provided instruction Inst, inserting the IR before Inst. Returns 1859 // the computed union Value. 1860 Value *DFSanFunction::combineOperandShadows(Instruction *Inst) { 1861 if (Inst->getNumOperands() == 0) 1862 return DFS.getZeroShadow(Inst); 1863 1864 Value *Shadow = getShadow(Inst->getOperand(0)); 1865 for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I) 1866 Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)), Inst); 1867 1868 return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst); 1869 } 1870 1871 void DFSanVisitor::visitInstOperands(Instruction &I) { 1872 Value *CombinedShadow = DFSF.combineOperandShadows(&I); 1873 DFSF.setShadow(&I, CombinedShadow); 1874 visitInstOperandOrigins(I); 1875 } 1876 1877 Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows, 1878 const std::vector<Value *> &Origins, 1879 Instruction *Pos, ConstantInt *Zero) { 1880 assert(Shadows.size() == Origins.size()); 1881 size_t Size = Origins.size(); 1882 if (Size == 0) 1883 return DFS.ZeroOrigin; 1884 Value *Origin = nullptr; 1885 if (!Zero) 1886 Zero = DFS.ZeroPrimitiveShadow; 1887 for (size_t I = 0; I != Size; ++I) { 1888 Value *OpOrigin = Origins[I]; 1889 Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin); 1890 if (ConstOpOrigin && ConstOpOrigin->isNullValue()) 1891 continue; 1892 if (!Origin) { 1893 Origin = OpOrigin; 1894 continue; 1895 } 1896 Value *OpShadow = Shadows[I]; 1897 Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos); 1898 IRBuilder<> IRB(Pos); 1899 Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero); 1900 Origin = IRB.CreateSelect(Cond, OpOrigin, Origin); 1901 } 1902 return Origin ? Origin : DFS.ZeroOrigin; 1903 } 1904 1905 Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) { 1906 size_t Size = Inst->getNumOperands(); 1907 std::vector<Value *> Shadows(Size); 1908 std::vector<Value *> Origins(Size); 1909 for (unsigned I = 0; I != Size; ++I) { 1910 Shadows[I] = getShadow(Inst->getOperand(I)); 1911 Origins[I] = getOrigin(Inst->getOperand(I)); 1912 } 1913 return combineOrigins(Shadows, Origins, Inst); 1914 } 1915 1916 void DFSanVisitor::visitInstOperandOrigins(Instruction &I) { 1917 if (!DFSF.DFS.shouldTrackOrigins()) 1918 return; 1919 Value *CombinedOrigin = DFSF.combineOperandOrigins(&I); 1920 DFSF.setOrigin(&I, CombinedOrigin); 1921 } 1922 1923 Align DFSanFunction::getShadowAlign(Align InstAlignment) { 1924 const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1); 1925 return Align(Alignment.value() * DFS.ShadowWidthBytes); 1926 } 1927 1928 Align DFSanFunction::getOriginAlign(Align InstAlignment) { 1929 const Align Alignment = llvm::assumeAligned(InstAlignment.value()); 1930 return Align(std::max(MinOriginAlignment, Alignment)); 1931 } 1932 1933 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size, 1934 Align InstAlignment) { 1935 // When enabling tracking load instructions, we always use 1936 // __dfsan_load_label_and_origin to reduce code size. 1937 if (ClTrackOrigins == 2) 1938 return true; 1939 1940 assert(Size != 0); 1941 // * if Size == 1, it is sufficient to load its origin aligned at 4. 1942 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to 1943 // load its origin aligned at 4. If not, although origins may be lost, it 1944 // should not happen very often. 1945 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When 1946 // Size % 4 == 0, it is more efficient to load origins without callbacks. 1947 // * Otherwise we use __dfsan_load_label_and_origin. 1948 // This should ensure that common cases run efficiently. 1949 if (Size <= 2) 1950 return false; 1951 1952 const Align Alignment = llvm::assumeAligned(InstAlignment.value()); 1953 return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size); 1954 } 1955 1956 Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign, 1957 Value **OriginAddr) { 1958 IRBuilder<> IRB(Pos); 1959 *OriginAddr = 1960 IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1)); 1961 return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign); 1962 } 1963 1964 std::pair<Value *, Value *> DFSanFunction::loadShadowFast( 1965 Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign, 1966 Align OriginAlign, Value *FirstOrigin, Instruction *Pos) { 1967 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); 1968 const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes; 1969 1970 assert(Size >= 4 && "Not large enough load size for fast path!"); 1971 1972 // Used for origin tracking. 1973 std::vector<Value *> Shadows; 1974 std::vector<Value *> Origins; 1975 1976 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20) 1977 // but this function is only used in a subset of cases that make it possible 1978 // to optimize the instrumentation. 1979 // 1980 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow 1981 // per byte) is either: 1982 // - a multiple of 8 (common) 1983 // - equal to 4 (only for load32) 1984 // 1985 // For the second case, we can fit the wide shadow in a 32-bit integer. In all 1986 // other cases, we use a 64-bit integer to hold the wide shadow. 1987 Type *WideShadowTy = 1988 ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx); 1989 1990 IRBuilder<> IRB(Pos); 1991 Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo()); 1992 Value *CombinedWideShadow = 1993 IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); 1994 1995 unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth(); 1996 const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits; 1997 1998 auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) { 1999 if (BytesPerWideShadow > 4) { 2000 assert(BytesPerWideShadow == 8); 2001 // The wide shadow relates to two origin pointers: one for the first four 2002 // application bytes, and one for the latest four. We use a left shift to 2003 // get just the shadow bytes that correspond to the first origin pointer, 2004 // and then the entire shadow for the second origin pointer (which will be 2005 // chosen by combineOrigins() iff the least-significant half of the wide 2006 // shadow was empty but the other half was not). 2007 Value *WideShadowLo = IRB.CreateShl( 2008 WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2)); 2009 Shadows.push_back(WideShadow); 2010 Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr)); 2011 2012 Shadows.push_back(WideShadowLo); 2013 Origins.push_back(Origin); 2014 } else { 2015 Shadows.push_back(WideShadow); 2016 Origins.push_back(Origin); 2017 } 2018 }; 2019 2020 if (ShouldTrackOrigins) 2021 AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin); 2022 2023 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly; 2024 // then OR individual shadows within the combined WideShadow by binary ORing. 2025 // This is fewer instructions than ORing shadows individually, since it 2026 // needs logN shift/or instructions (N being the bytes of the combined wide 2027 // shadow). 2028 for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size; 2029 ByteOfs += BytesPerWideShadow) { 2030 WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr, 2031 ConstantInt::get(DFS.IntptrTy, 1)); 2032 Value *NextWideShadow = 2033 IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); 2034 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow); 2035 if (ShouldTrackOrigins) { 2036 Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr); 2037 AppendWideShadowAndOrigin(NextWideShadow, NextOrigin); 2038 } 2039 } 2040 for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits; 2041 Width >>= 1) { 2042 Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width); 2043 CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow); 2044 } 2045 return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy), 2046 ShouldTrackOrigins 2047 ? combineOrigins(Shadows, Origins, Pos, 2048 ConstantInt::getSigned(IRB.getInt64Ty(), 0)) 2049 : DFS.ZeroOrigin}; 2050 } 2051 2052 std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking( 2053 Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) { 2054 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); 2055 2056 // Non-escaped loads. 2057 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { 2058 const auto SI = AllocaShadowMap.find(AI); 2059 if (SI != AllocaShadowMap.end()) { 2060 IRBuilder<> IRB(Pos); 2061 Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second); 2062 const auto OI = AllocaOriginMap.find(AI); 2063 assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end()); 2064 return {ShadowLI, ShouldTrackOrigins 2065 ? IRB.CreateLoad(DFS.OriginTy, OI->second) 2066 : nullptr}; 2067 } 2068 } 2069 2070 // Load from constant addresses. 2071 SmallVector<const Value *, 2> Objs; 2072 getUnderlyingObjects(Addr, Objs); 2073 bool AllConstants = true; 2074 for (const Value *Obj : Objs) { 2075 if (isa<Function>(Obj) || isa<BlockAddress>(Obj)) 2076 continue; 2077 if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant()) 2078 continue; 2079 2080 AllConstants = false; 2081 break; 2082 } 2083 if (AllConstants) 2084 return {DFS.ZeroPrimitiveShadow, 2085 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr}; 2086 2087 if (Size == 0) 2088 return {DFS.ZeroPrimitiveShadow, 2089 ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr}; 2090 2091 // Use callback to load if this is not an optimizable case for origin 2092 // tracking. 2093 if (ShouldTrackOrigins && 2094 useCallbackLoadLabelAndOrigin(Size, InstAlignment)) { 2095 IRBuilder<> IRB(Pos); 2096 CallInst *Call = 2097 IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn, 2098 {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), 2099 ConstantInt::get(DFS.IntptrTy, Size)}); 2100 Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 2101 return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits), 2102 DFS.PrimitiveShadowTy), 2103 IRB.CreateTrunc(Call, DFS.OriginTy)}; 2104 } 2105 2106 // Other cases that support loading shadows or origins in a fast way. 2107 Value *ShadowAddr, *OriginAddr; 2108 std::tie(ShadowAddr, OriginAddr) = 2109 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos); 2110 2111 const Align ShadowAlign = getShadowAlign(InstAlignment); 2112 const Align OriginAlign = getOriginAlign(InstAlignment); 2113 Value *Origin = nullptr; 2114 if (ShouldTrackOrigins) { 2115 IRBuilder<> IRB(Pos); 2116 Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign); 2117 } 2118 2119 // When the byte size is small enough, we can load the shadow directly with 2120 // just a few instructions. 2121 switch (Size) { 2122 case 1: { 2123 LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos); 2124 LI->setAlignment(ShadowAlign); 2125 return {LI, Origin}; 2126 } 2127 case 2: { 2128 IRBuilder<> IRB(Pos); 2129 Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr, 2130 ConstantInt::get(DFS.IntptrTy, 1)); 2131 Value *Load = 2132 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign); 2133 Value *Load1 = 2134 IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign); 2135 return {combineShadows(Load, Load1, Pos), Origin}; 2136 } 2137 } 2138 bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size); 2139 2140 if (HasSizeForFastPath) 2141 return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign, 2142 OriginAlign, Origin, Pos); 2143 2144 IRBuilder<> IRB(Pos); 2145 CallInst *FallbackCall = IRB.CreateCall( 2146 DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); 2147 FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 2148 return {FallbackCall, Origin}; 2149 } 2150 2151 std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr, 2152 uint64_t Size, 2153 Align InstAlignment, 2154 Instruction *Pos) { 2155 Value *PrimitiveShadow, *Origin; 2156 std::tie(PrimitiveShadow, Origin) = 2157 loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos); 2158 if (DFS.shouldTrackOrigins()) { 2159 if (ClTrackOrigins == 2) { 2160 IRBuilder<> IRB(Pos); 2161 auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow); 2162 if (!ConstantShadow || !ConstantShadow->isZeroValue()) 2163 Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB); 2164 } 2165 } 2166 return {PrimitiveShadow, Origin}; 2167 } 2168 2169 static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) { 2170 switch (AO) { 2171 case AtomicOrdering::NotAtomic: 2172 return AtomicOrdering::NotAtomic; 2173 case AtomicOrdering::Unordered: 2174 case AtomicOrdering::Monotonic: 2175 case AtomicOrdering::Acquire: 2176 return AtomicOrdering::Acquire; 2177 case AtomicOrdering::Release: 2178 case AtomicOrdering::AcquireRelease: 2179 return AtomicOrdering::AcquireRelease; 2180 case AtomicOrdering::SequentiallyConsistent: 2181 return AtomicOrdering::SequentiallyConsistent; 2182 } 2183 llvm_unreachable("Unknown ordering"); 2184 } 2185 2186 void DFSanVisitor::visitLoadInst(LoadInst &LI) { 2187 auto &DL = LI.getModule()->getDataLayout(); 2188 uint64_t Size = DL.getTypeStoreSize(LI.getType()); 2189 if (Size == 0) { 2190 DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI)); 2191 DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin); 2192 return; 2193 } 2194 2195 // When an application load is atomic, increase atomic ordering between 2196 // atomic application loads and stores to ensure happen-before order; load 2197 // shadow data after application data; store zero shadow data before 2198 // application data. This ensure shadow loads return either labels of the 2199 // initial application data or zeros. 2200 if (LI.isAtomic()) 2201 LI.setOrdering(addAcquireOrdering(LI.getOrdering())); 2202 2203 Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI; 2204 std::vector<Value *> Shadows; 2205 std::vector<Value *> Origins; 2206 Value *PrimitiveShadow, *Origin; 2207 std::tie(PrimitiveShadow, Origin) = 2208 DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos); 2209 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); 2210 if (ShouldTrackOrigins) { 2211 Shadows.push_back(PrimitiveShadow); 2212 Origins.push_back(Origin); 2213 } 2214 if (ClCombinePointerLabelsOnLoad) { 2215 Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); 2216 PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos); 2217 if (ShouldTrackOrigins) { 2218 Shadows.push_back(PtrShadow); 2219 Origins.push_back(DFSF.getOrigin(LI.getPointerOperand())); 2220 } 2221 } 2222 if (!DFSF.DFS.isZeroShadow(PrimitiveShadow)) 2223 DFSF.NonZeroChecks.push_back(PrimitiveShadow); 2224 2225 Value *Shadow = 2226 DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos); 2227 DFSF.setShadow(&LI, Shadow); 2228 2229 if (ShouldTrackOrigins) { 2230 DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos)); 2231 } 2232 2233 if (ClEventCallbacks) { 2234 IRBuilder<> IRB(Pos); 2235 Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr); 2236 IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8}); 2237 } 2238 } 2239 2240 Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin, 2241 IRBuilder<> &IRB) { 2242 assert(DFS.shouldTrackOrigins()); 2243 return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin}); 2244 } 2245 2246 Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) { 2247 if (!DFS.shouldTrackOrigins()) 2248 return V; 2249 return IRB.CreateCall(DFS.DFSanChainOriginFn, V); 2250 } 2251 2252 Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) { 2253 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes; 2254 const DataLayout &DL = F->getParent()->getDataLayout(); 2255 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy); 2256 if (IntptrSize == OriginSize) 2257 return Origin; 2258 assert(IntptrSize == OriginSize * 2); 2259 Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false); 2260 return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8)); 2261 } 2262 2263 void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin, 2264 Value *StoreOriginAddr, 2265 uint64_t StoreOriginSize, Align Alignment) { 2266 const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes; 2267 const DataLayout &DL = F->getParent()->getDataLayout(); 2268 const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy); 2269 unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy); 2270 assert(IntptrAlignment >= MinOriginAlignment); 2271 assert(IntptrSize >= OriginSize); 2272 2273 unsigned Ofs = 0; 2274 Align CurrentAlignment = Alignment; 2275 if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) { 2276 Value *IntptrOrigin = originToIntptr(IRB, Origin); 2277 Value *IntptrStoreOriginPtr = IRB.CreatePointerCast( 2278 StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0)); 2279 for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) { 2280 Value *Ptr = 2281 I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I) 2282 : IntptrStoreOriginPtr; 2283 IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment); 2284 Ofs += IntptrSize / OriginSize; 2285 CurrentAlignment = IntptrAlignment; 2286 } 2287 } 2288 2289 for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize; 2290 ++I) { 2291 Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I) 2292 : StoreOriginAddr; 2293 IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment); 2294 CurrentAlignment = MinOriginAlignment; 2295 } 2296 } 2297 2298 Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB, 2299 const Twine &Name) { 2300 Type *VTy = V->getType(); 2301 assert(VTy->isIntegerTy()); 2302 if (VTy->getIntegerBitWidth() == 1) 2303 // Just converting a bool to a bool, so do nothing. 2304 return V; 2305 return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name); 2306 } 2307 2308 void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, 2309 Value *Shadow, Value *Origin, 2310 Value *StoreOriginAddr, Align InstAlignment) { 2311 // Do not write origins for zero shadows because we do not trace origins for 2312 // untainted sinks. 2313 const Align OriginAlignment = getOriginAlign(InstAlignment); 2314 Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos); 2315 IRBuilder<> IRB(Pos); 2316 if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) { 2317 if (!ConstantShadow->isZeroValue()) 2318 paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size, 2319 OriginAlignment); 2320 return; 2321 } 2322 2323 if (shouldInstrumentWithCall()) { 2324 IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn, 2325 {CollapsedShadow, 2326 IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), 2327 ConstantInt::get(DFS.IntptrTy, Size), Origin}); 2328 } else { 2329 Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp"); 2330 Instruction *CheckTerm = SplitBlockAndInsertIfThen( 2331 Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT); 2332 IRBuilder<> IRBNew(CheckTerm); 2333 paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size, 2334 OriginAlignment); 2335 ++NumOriginStores; 2336 } 2337 } 2338 2339 void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, 2340 Align ShadowAlign, 2341 Instruction *Pos) { 2342 IRBuilder<> IRB(Pos); 2343 IntegerType *ShadowTy = 2344 IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits); 2345 Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0); 2346 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); 2347 Value *ExtShadowAddr = 2348 IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy)); 2349 IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign); 2350 // Do not write origins for 0 shadows because we do not trace origins for 2351 // untainted sinks. 2352 } 2353 2354 void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size, 2355 Align InstAlignment, 2356 Value *PrimitiveShadow, 2357 Value *Origin, 2358 Instruction *Pos) { 2359 const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin; 2360 2361 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { 2362 const auto SI = AllocaShadowMap.find(AI); 2363 if (SI != AllocaShadowMap.end()) { 2364 IRBuilder<> IRB(Pos); 2365 IRB.CreateStore(PrimitiveShadow, SI->second); 2366 2367 // Do not write origins for 0 shadows because we do not trace origins for 2368 // untainted sinks. 2369 if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) { 2370 const auto OI = AllocaOriginMap.find(AI); 2371 assert(OI != AllocaOriginMap.end() && Origin); 2372 IRB.CreateStore(Origin, OI->second); 2373 } 2374 return; 2375 } 2376 } 2377 2378 const Align ShadowAlign = getShadowAlign(InstAlignment); 2379 if (DFS.isZeroShadow(PrimitiveShadow)) { 2380 storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos); 2381 return; 2382 } 2383 2384 IRBuilder<> IRB(Pos); 2385 Value *ShadowAddr, *OriginAddr; 2386 std::tie(ShadowAddr, OriginAddr) = 2387 DFS.getShadowOriginAddress(Addr, InstAlignment, Pos); 2388 2389 const unsigned ShadowVecSize = 8; 2390 assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 && 2391 "Shadow vector is too large!"); 2392 2393 uint64_t Offset = 0; 2394 uint64_t LeftSize = Size; 2395 if (LeftSize >= ShadowVecSize) { 2396 auto *ShadowVecTy = 2397 FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize); 2398 Value *ShadowVec = UndefValue::get(ShadowVecTy); 2399 for (unsigned I = 0; I != ShadowVecSize; ++I) { 2400 ShadowVec = IRB.CreateInsertElement( 2401 ShadowVec, PrimitiveShadow, 2402 ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I)); 2403 } 2404 Value *ShadowVecAddr = 2405 IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy)); 2406 do { 2407 Value *CurShadowVecAddr = 2408 IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset); 2409 IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign); 2410 LeftSize -= ShadowVecSize; 2411 ++Offset; 2412 } while (LeftSize >= ShadowVecSize); 2413 Offset *= ShadowVecSize; 2414 } 2415 while (LeftSize > 0) { 2416 Value *CurShadowAddr = 2417 IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset); 2418 IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign); 2419 --LeftSize; 2420 ++Offset; 2421 } 2422 2423 if (ShouldTrackOrigins) { 2424 storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr, 2425 InstAlignment); 2426 } 2427 } 2428 2429 static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) { 2430 switch (AO) { 2431 case AtomicOrdering::NotAtomic: 2432 return AtomicOrdering::NotAtomic; 2433 case AtomicOrdering::Unordered: 2434 case AtomicOrdering::Monotonic: 2435 case AtomicOrdering::Release: 2436 return AtomicOrdering::Release; 2437 case AtomicOrdering::Acquire: 2438 case AtomicOrdering::AcquireRelease: 2439 return AtomicOrdering::AcquireRelease; 2440 case AtomicOrdering::SequentiallyConsistent: 2441 return AtomicOrdering::SequentiallyConsistent; 2442 } 2443 llvm_unreachable("Unknown ordering"); 2444 } 2445 2446 void DFSanVisitor::visitStoreInst(StoreInst &SI) { 2447 auto &DL = SI.getModule()->getDataLayout(); 2448 Value *Val = SI.getValueOperand(); 2449 uint64_t Size = DL.getTypeStoreSize(Val->getType()); 2450 if (Size == 0) 2451 return; 2452 2453 // When an application store is atomic, increase atomic ordering between 2454 // atomic application loads and stores to ensure happen-before order; load 2455 // shadow data after application data; store zero shadow data before 2456 // application data. This ensure shadow loads return either labels of the 2457 // initial application data or zeros. 2458 if (SI.isAtomic()) 2459 SI.setOrdering(addReleaseOrdering(SI.getOrdering())); 2460 2461 const bool ShouldTrackOrigins = 2462 DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic(); 2463 std::vector<Value *> Shadows; 2464 std::vector<Value *> Origins; 2465 2466 Value *Shadow = 2467 SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val); 2468 2469 if (ShouldTrackOrigins) { 2470 Shadows.push_back(Shadow); 2471 Origins.push_back(DFSF.getOrigin(Val)); 2472 } 2473 2474 Value *PrimitiveShadow; 2475 if (ClCombinePointerLabelsOnStore) { 2476 Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); 2477 if (ShouldTrackOrigins) { 2478 Shadows.push_back(PtrShadow); 2479 Origins.push_back(DFSF.getOrigin(SI.getPointerOperand())); 2480 } 2481 PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI); 2482 } else { 2483 PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI); 2484 } 2485 Value *Origin = nullptr; 2486 if (ShouldTrackOrigins) 2487 Origin = DFSF.combineOrigins(Shadows, Origins, &SI); 2488 DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(), 2489 PrimitiveShadow, Origin, &SI); 2490 if (ClEventCallbacks) { 2491 IRBuilder<> IRB(&SI); 2492 Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr); 2493 IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8}); 2494 } 2495 } 2496 2497 void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) { 2498 assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I)); 2499 2500 Value *Val = I.getOperand(1); 2501 const auto &DL = I.getModule()->getDataLayout(); 2502 uint64_t Size = DL.getTypeStoreSize(Val->getType()); 2503 if (Size == 0) 2504 return; 2505 2506 // Conservatively set data at stored addresses and return with zero shadow to 2507 // prevent shadow data races. 2508 IRBuilder<> IRB(&I); 2509 Value *Addr = I.getOperand(0); 2510 const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment); 2511 DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, &I); 2512 DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I)); 2513 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin); 2514 } 2515 2516 void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) { 2517 visitCASOrRMW(I.getAlign(), I); 2518 // TODO: The ordering change follows MSan. It is possible not to change 2519 // ordering because we always set and use 0 shadows. 2520 I.setOrdering(addReleaseOrdering(I.getOrdering())); 2521 } 2522 2523 void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { 2524 visitCASOrRMW(I.getAlign(), I); 2525 // TODO: The ordering change follows MSan. It is possible not to change 2526 // ordering because we always set and use 0 shadows. 2527 I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering())); 2528 } 2529 2530 void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) { 2531 visitInstOperands(UO); 2532 } 2533 2534 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) { 2535 visitInstOperands(BO); 2536 } 2537 2538 void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) { 2539 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { 2540 // Special case: if this is the bitcast (there is exactly 1 allowed) between 2541 // a musttail call and a ret, don't instrument. New instructions are not 2542 // allowed after a musttail call. 2543 if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0))) 2544 if (CI->isMustTailCall()) 2545 return; 2546 } 2547 // TODO: handle musttail call returns for IA_Args. 2548 visitInstOperands(BCI); 2549 } 2550 2551 void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); } 2552 2553 void DFSanVisitor::visitCmpInst(CmpInst &CI) { 2554 visitInstOperands(CI); 2555 if (ClEventCallbacks) { 2556 IRBuilder<> IRB(&CI); 2557 Value *CombinedShadow = DFSF.getShadow(&CI); 2558 IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow); 2559 } 2560 } 2561 2562 void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) { 2563 // We do not need to track data through LandingPadInst. 2564 // 2565 // For the C++ exceptions, if a value is thrown, this value will be stored 2566 // in a memory location provided by __cxa_allocate_exception(...) (on the 2567 // throw side) or __cxa_begin_catch(...) (on the catch side). 2568 // This memory will have a shadow, so with the loads and stores we will be 2569 // able to propagate labels on data thrown through exceptions, without any 2570 // special handling of the LandingPadInst. 2571 // 2572 // The second element in the pair result of the LandingPadInst is a 2573 // register value, but it is for a type ID and should never be tainted. 2574 DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI)); 2575 DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin); 2576 } 2577 2578 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { 2579 if (ClCombineOffsetLabelsOnGEP) { 2580 visitInstOperands(GEPI); 2581 return; 2582 } 2583 2584 // Only propagate shadow/origin of base pointer value but ignore those of 2585 // offset operands. 2586 Value *BasePointer = GEPI.getPointerOperand(); 2587 DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer)); 2588 if (DFSF.DFS.shouldTrackOrigins()) 2589 DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer)); 2590 } 2591 2592 void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) { 2593 visitInstOperands(I); 2594 } 2595 2596 void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) { 2597 visitInstOperands(I); 2598 } 2599 2600 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) { 2601 visitInstOperands(I); 2602 } 2603 2604 void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) { 2605 if (!DFSF.DFS.shouldTrackFieldsAndIndices()) { 2606 visitInstOperands(I); 2607 return; 2608 } 2609 2610 IRBuilder<> IRB(&I); 2611 Value *Agg = I.getAggregateOperand(); 2612 Value *AggShadow = DFSF.getShadow(Agg); 2613 Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices()); 2614 DFSF.setShadow(&I, ResShadow); 2615 visitInstOperandOrigins(I); 2616 } 2617 2618 void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) { 2619 if (!DFSF.DFS.shouldTrackFieldsAndIndices()) { 2620 visitInstOperands(I); 2621 return; 2622 } 2623 2624 IRBuilder<> IRB(&I); 2625 Value *AggShadow = DFSF.getShadow(I.getAggregateOperand()); 2626 Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand()); 2627 Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices()); 2628 DFSF.setShadow(&I, Res); 2629 visitInstOperandOrigins(I); 2630 } 2631 2632 void DFSanVisitor::visitAllocaInst(AllocaInst &I) { 2633 bool AllLoadsStores = true; 2634 for (User *U : I.users()) { 2635 if (isa<LoadInst>(U)) 2636 continue; 2637 2638 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 2639 if (SI->getPointerOperand() == &I) 2640 continue; 2641 } 2642 2643 AllLoadsStores = false; 2644 break; 2645 } 2646 if (AllLoadsStores) { 2647 IRBuilder<> IRB(&I); 2648 DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy); 2649 if (DFSF.DFS.shouldTrackOrigins()) { 2650 DFSF.AllocaOriginMap[&I] = 2651 IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa"); 2652 } 2653 } 2654 DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow); 2655 DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin); 2656 } 2657 2658 void DFSanVisitor::visitSelectInst(SelectInst &I) { 2659 Value *CondShadow = DFSF.getShadow(I.getCondition()); 2660 Value *TrueShadow = DFSF.getShadow(I.getTrueValue()); 2661 Value *FalseShadow = DFSF.getShadow(I.getFalseValue()); 2662 Value *ShadowSel = nullptr; 2663 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); 2664 std::vector<Value *> Shadows; 2665 std::vector<Value *> Origins; 2666 Value *TrueOrigin = 2667 ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr; 2668 Value *FalseOrigin = 2669 ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr; 2670 2671 if (isa<VectorType>(I.getCondition()->getType())) { 2672 ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow, 2673 FalseShadow, &I); 2674 if (ShouldTrackOrigins) { 2675 Shadows.push_back(TrueShadow); 2676 Shadows.push_back(FalseShadow); 2677 Origins.push_back(TrueOrigin); 2678 Origins.push_back(FalseOrigin); 2679 } 2680 } else { 2681 if (TrueShadow == FalseShadow) { 2682 ShadowSel = TrueShadow; 2683 if (ShouldTrackOrigins) { 2684 Shadows.push_back(TrueShadow); 2685 Origins.push_back(TrueOrigin); 2686 } 2687 } else { 2688 ShadowSel = 2689 SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I); 2690 if (ShouldTrackOrigins) { 2691 Shadows.push_back(ShadowSel); 2692 Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin, 2693 FalseOrigin, "", &I)); 2694 } 2695 } 2696 } 2697 DFSF.setShadow(&I, ClTrackSelectControlFlow 2698 ? DFSF.combineShadowsThenConvert( 2699 I.getType(), CondShadow, ShadowSel, &I) 2700 : ShadowSel); 2701 if (ShouldTrackOrigins) { 2702 if (ClTrackSelectControlFlow) { 2703 Shadows.push_back(CondShadow); 2704 Origins.push_back(DFSF.getOrigin(I.getCondition())); 2705 } 2706 DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I)); 2707 } 2708 } 2709 2710 void DFSanVisitor::visitMemSetInst(MemSetInst &I) { 2711 IRBuilder<> IRB(&I); 2712 Value *ValShadow = DFSF.getShadow(I.getValue()); 2713 Value *ValOrigin = DFSF.DFS.shouldTrackOrigins() 2714 ? DFSF.getOrigin(I.getValue()) 2715 : DFSF.DFS.ZeroOrigin; 2716 IRB.CreateCall( 2717 DFSF.DFS.DFSanSetLabelFn, 2718 {ValShadow, ValOrigin, 2719 IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)), 2720 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)}); 2721 } 2722 2723 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { 2724 IRBuilder<> IRB(&I); 2725 2726 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we 2727 // need to move origins before moving shadows. 2728 if (DFSF.DFS.shouldTrackOrigins()) { 2729 IRB.CreateCall( 2730 DFSF.DFS.DFSanMemOriginTransferFn, 2731 {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), 2732 IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), 2733 IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)}); 2734 } 2735 2736 Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); 2737 Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I); 2738 Value *LenShadow = 2739 IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(), 2740 DFSF.DFS.ShadowWidthBytes)); 2741 Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); 2742 Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr); 2743 SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); 2744 auto *MTI = cast<MemTransferInst>( 2745 IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), 2746 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); 2747 if (ClPreserveAlignment) { 2748 MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes); 2749 MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes); 2750 } else { 2751 MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes)); 2752 MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes)); 2753 } 2754 if (ClEventCallbacks) { 2755 IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn, 2756 {RawDestShadow, 2757 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)}); 2758 } 2759 } 2760 2761 static bool isAMustTailRetVal(Value *RetVal) { 2762 // Tail call may have a bitcast between return. 2763 if (auto *I = dyn_cast<BitCastInst>(RetVal)) { 2764 RetVal = I->getOperand(0); 2765 } 2766 if (auto *I = dyn_cast<CallInst>(RetVal)) { 2767 return I->isMustTailCall(); 2768 } 2769 return false; 2770 } 2771 2772 void DFSanVisitor::visitReturnInst(ReturnInst &RI) { 2773 if (!DFSF.IsNativeABI && RI.getReturnValue()) { 2774 switch (DFSF.IA) { 2775 case DataFlowSanitizer::IA_TLS: { 2776 // Don't emit the instrumentation for musttail call returns. 2777 if (isAMustTailRetVal(RI.getReturnValue())) 2778 return; 2779 2780 Value *S = DFSF.getShadow(RI.getReturnValue()); 2781 IRBuilder<> IRB(&RI); 2782 Type *RT = DFSF.F->getFunctionType()->getReturnType(); 2783 unsigned Size = 2784 getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT)); 2785 if (Size <= RetvalTLSSize) { 2786 // If the size overflows, stores nothing. At callsite, oversized return 2787 // shadows are set to zero. 2788 IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), 2789 ShadowTLSAlignment); 2790 } 2791 if (DFSF.DFS.shouldTrackOrigins()) { 2792 Value *O = DFSF.getOrigin(RI.getReturnValue()); 2793 IRB.CreateStore(O, DFSF.getRetvalOriginTLS()); 2794 } 2795 break; 2796 } 2797 case DataFlowSanitizer::IA_Args: { 2798 // TODO: handle musttail call returns for IA_Args. 2799 2800 IRBuilder<> IRB(&RI); 2801 Type *RT = DFSF.F->getFunctionType()->getReturnType(); 2802 Value *InsVal = 2803 IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0); 2804 Value *InsShadow = 2805 IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1); 2806 RI.setOperand(0, InsShadow); 2807 break; 2808 } 2809 } 2810 } 2811 } 2812 2813 void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB, 2814 std::vector<Value *> &Args, 2815 IRBuilder<> &IRB) { 2816 FunctionType *FT = F.getFunctionType(); 2817 2818 auto *I = CB.arg_begin(); 2819 2820 // Adds non-variable argument shadows. 2821 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) 2822 Args.push_back(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB)); 2823 2824 // Adds variable argument shadows. 2825 if (FT->isVarArg()) { 2826 auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy, 2827 CB.arg_size() - FT->getNumParams()); 2828 auto *LabelVAAlloca = 2829 new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(), 2830 "labelva", &DFSF.F->getEntryBlock().front()); 2831 2832 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) { 2833 auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N); 2834 IRB.CreateStore(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB), 2835 LabelVAPtr); 2836 } 2837 2838 Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0)); 2839 } 2840 2841 // Adds the return value shadow. 2842 if (!FT->getReturnType()->isVoidTy()) { 2843 if (!DFSF.LabelReturnAlloca) { 2844 DFSF.LabelReturnAlloca = new AllocaInst( 2845 DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(), 2846 "labelreturn", &DFSF.F->getEntryBlock().front()); 2847 } 2848 Args.push_back(DFSF.LabelReturnAlloca); 2849 } 2850 } 2851 2852 void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB, 2853 std::vector<Value *> &Args, 2854 IRBuilder<> &IRB) { 2855 FunctionType *FT = F.getFunctionType(); 2856 2857 auto *I = CB.arg_begin(); 2858 2859 // Add non-variable argument origins. 2860 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) 2861 Args.push_back(DFSF.getOrigin(*I)); 2862 2863 // Add variable argument origins. 2864 if (FT->isVarArg()) { 2865 auto *OriginVATy = 2866 ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams()); 2867 auto *OriginVAAlloca = 2868 new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(), 2869 "originva", &DFSF.F->getEntryBlock().front()); 2870 2871 for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) { 2872 auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N); 2873 IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr); 2874 } 2875 2876 Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0)); 2877 } 2878 2879 // Add the return value origin. 2880 if (!FT->getReturnType()->isVoidTy()) { 2881 if (!DFSF.OriginReturnAlloca) { 2882 DFSF.OriginReturnAlloca = new AllocaInst( 2883 DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(), 2884 "originreturn", &DFSF.F->getEntryBlock().front()); 2885 } 2886 Args.push_back(DFSF.OriginReturnAlloca); 2887 } 2888 } 2889 2890 bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) { 2891 IRBuilder<> IRB(&CB); 2892 switch (DFSF.DFS.getWrapperKind(&F)) { 2893 case DataFlowSanitizer::WK_Warning: 2894 CB.setCalledFunction(&F); 2895 IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, 2896 IRB.CreateGlobalStringPtr(F.getName())); 2897 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB)); 2898 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin); 2899 return true; 2900 case DataFlowSanitizer::WK_Discard: 2901 CB.setCalledFunction(&F); 2902 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB)); 2903 DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin); 2904 return true; 2905 case DataFlowSanitizer::WK_Functional: 2906 CB.setCalledFunction(&F); 2907 visitInstOperands(CB); 2908 return true; 2909 case DataFlowSanitizer::WK_Custom: 2910 // Don't try to handle invokes of custom functions, it's too complicated. 2911 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_ 2912 // wrapper. 2913 CallInst *CI = dyn_cast<CallInst>(&CB); 2914 if (!CI) 2915 return false; 2916 2917 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); 2918 FunctionType *FT = F.getFunctionType(); 2919 TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT); 2920 std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_"; 2921 CustomFName += F.getName(); 2922 FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction( 2923 CustomFName, CustomFn.TransformedType); 2924 if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) { 2925 CustomFn->copyAttributesFrom(&F); 2926 2927 // Custom functions returning non-void will write to the return label. 2928 if (!FT->getReturnType()->isVoidTy()) { 2929 CustomFn->removeAttributes(AttributeList::FunctionIndex, 2930 DFSF.DFS.ReadOnlyNoneAttrs); 2931 } 2932 } 2933 2934 std::vector<Value *> Args; 2935 2936 // Adds non-variable arguments. 2937 auto *I = CB.arg_begin(); 2938 for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) { 2939 Type *T = (*I)->getType(); 2940 FunctionType *ParamFT; 2941 if (isa<PointerType>(T) && 2942 (ParamFT = dyn_cast<FunctionType>(T->getPointerElementType()))) { 2943 std::string TName = "dfst"; 2944 TName += utostr(FT->getNumParams() - N); 2945 TName += "$"; 2946 TName += F.getName(); 2947 Constant *Trampoline = 2948 DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName); 2949 Args.push_back(Trampoline); 2950 Args.push_back( 2951 IRB.CreateBitCast(*I, Type::getInt8PtrTy(*DFSF.DFS.Ctx))); 2952 } else { 2953 Args.push_back(*I); 2954 } 2955 } 2956 2957 // Adds shadow arguments. 2958 const unsigned ShadowArgStart = Args.size(); 2959 addShadowArguments(F, CB, Args, IRB); 2960 2961 // Adds origin arguments. 2962 const unsigned OriginArgStart = Args.size(); 2963 if (ShouldTrackOrigins) 2964 addOriginArguments(F, CB, Args, IRB); 2965 2966 // Adds variable arguments. 2967 append_range(Args, drop_begin(CB.args(), FT->getNumParams())); 2968 2969 CallInst *CustomCI = IRB.CreateCall(CustomF, Args); 2970 CustomCI->setCallingConv(CI->getCallingConv()); 2971 CustomCI->setAttributes(transformFunctionAttributes( 2972 CustomFn, CI->getContext(), CI->getAttributes())); 2973 2974 // Update the parameter attributes of the custom call instruction to 2975 // zero extend the shadow parameters. This is required for targets 2976 // which consider PrimitiveShadowTy an illegal type. 2977 for (unsigned N = 0; N < FT->getNumParams(); N++) { 2978 const unsigned ArgNo = ShadowArgStart + N; 2979 if (CustomCI->getArgOperand(ArgNo)->getType() == 2980 DFSF.DFS.PrimitiveShadowTy) 2981 CustomCI->addParamAttr(ArgNo, Attribute::ZExt); 2982 if (ShouldTrackOrigins) { 2983 const unsigned OriginArgNo = OriginArgStart + N; 2984 if (CustomCI->getArgOperand(OriginArgNo)->getType() == 2985 DFSF.DFS.OriginTy) 2986 CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt); 2987 } 2988 } 2989 2990 // Loads the return value shadow and origin. 2991 if (!FT->getReturnType()->isVoidTy()) { 2992 LoadInst *LabelLoad = 2993 IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca); 2994 DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow( 2995 FT->getReturnType(), LabelLoad, &CB)); 2996 if (ShouldTrackOrigins) { 2997 LoadInst *OriginLoad = 2998 IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca); 2999 DFSF.setOrigin(CustomCI, OriginLoad); 3000 } 3001 } 3002 3003 CI->replaceAllUsesWith(CustomCI); 3004 CI->eraseFromParent(); 3005 return true; 3006 } 3007 return false; 3008 } 3009 3010 void DFSanVisitor::visitCallBase(CallBase &CB) { 3011 Function *F = CB.getCalledFunction(); 3012 if ((F && F->isIntrinsic()) || CB.isInlineAsm()) { 3013 visitInstOperands(CB); 3014 return; 3015 } 3016 3017 // Calls to this function are synthesized in wrappers, and we shouldn't 3018 // instrument them. 3019 if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts()) 3020 return; 3021 3022 DenseMap<Value *, Function *>::iterator UnwrappedFnIt = 3023 DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand()); 3024 if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end()) 3025 if (visitWrappedCallBase(*UnwrappedFnIt->second, CB)) 3026 return; 3027 3028 IRBuilder<> IRB(&CB); 3029 3030 const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); 3031 FunctionType *FT = CB.getFunctionType(); 3032 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { 3033 // Stores argument shadows. 3034 unsigned ArgOffset = 0; 3035 const DataLayout &DL = getDataLayout(); 3036 for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) { 3037 if (ShouldTrackOrigins) { 3038 // Ignore overflowed origins 3039 Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I)); 3040 if (I < DFSF.DFS.NumOfElementsInArgOrgTLS && 3041 !DFSF.DFS.isZeroShadow(ArgShadow)) 3042 IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)), 3043 DFSF.getArgOriginTLS(I, IRB)); 3044 } 3045 3046 unsigned Size = 3047 DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I))); 3048 // Stop storing if arguments' size overflows. Inside a function, arguments 3049 // after overflow have zero shadow values. 3050 if (ArgOffset + Size > ArgTLSSize) 3051 break; 3052 IRB.CreateAlignedStore( 3053 DFSF.getShadow(CB.getArgOperand(I)), 3054 DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB), 3055 ShadowTLSAlignment); 3056 ArgOffset += alignTo(Size, ShadowTLSAlignment); 3057 } 3058 } 3059 3060 Instruction *Next = nullptr; 3061 if (!CB.getType()->isVoidTy()) { 3062 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 3063 if (II->getNormalDest()->getSinglePredecessor()) { 3064 Next = &II->getNormalDest()->front(); 3065 } else { 3066 BasicBlock *NewBB = 3067 SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT); 3068 Next = &NewBB->front(); 3069 } 3070 } else { 3071 assert(CB.getIterator() != CB.getParent()->end()); 3072 Next = CB.getNextNode(); 3073 } 3074 3075 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { 3076 // Don't emit the epilogue for musttail call returns. 3077 if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall()) 3078 return; 3079 3080 // Loads the return value shadow. 3081 IRBuilder<> NextIRB(Next); 3082 const DataLayout &DL = getDataLayout(); 3083 unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB)); 3084 if (Size > RetvalTLSSize) { 3085 // Set overflowed return shadow to be zero. 3086 DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB)); 3087 } else { 3088 LoadInst *LI = NextIRB.CreateAlignedLoad( 3089 DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB), 3090 ShadowTLSAlignment, "_dfsret"); 3091 DFSF.SkipInsts.insert(LI); 3092 DFSF.setShadow(&CB, LI); 3093 DFSF.NonZeroChecks.push_back(LI); 3094 } 3095 3096 if (ShouldTrackOrigins) { 3097 LoadInst *LI = NextIRB.CreateLoad( 3098 DFSF.DFS.OriginTy, DFSF.getRetvalOriginTLS(), "_dfsret_o"); 3099 DFSF.SkipInsts.insert(LI); 3100 DFSF.setOrigin(&CB, LI); 3101 } 3102 } 3103 } 3104 3105 // Do all instrumentation for IA_Args down here to defer tampering with the 3106 // CFG in a way that SplitEdge may be able to detect. 3107 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) { 3108 // TODO: handle musttail call returns for IA_Args. 3109 3110 FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT); 3111 Value *Func = 3112 IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT)); 3113 3114 const unsigned NumParams = FT->getNumParams(); 3115 3116 // Copy original arguments. 3117 auto *ArgIt = CB.arg_begin(), *ArgEnd = CB.arg_end(); 3118 std::vector<Value *> Args(NumParams); 3119 std::copy_n(ArgIt, NumParams, Args.begin()); 3120 3121 // Add shadow arguments by transforming original arguments. 3122 std::generate_n(std::back_inserter(Args), NumParams, 3123 [&]() { return DFSF.getShadow(*ArgIt++); }); 3124 3125 if (FT->isVarArg()) { 3126 unsigned VarArgSize = CB.arg_size() - NumParams; 3127 ArrayType *VarArgArrayTy = 3128 ArrayType::get(DFSF.DFS.PrimitiveShadowTy, VarArgSize); 3129 AllocaInst *VarArgShadow = 3130 new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(), 3131 "", &DFSF.F->getEntryBlock().front()); 3132 Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); 3133 3134 // Copy remaining var args. 3135 unsigned GepIndex = 0; 3136 std::for_each(ArgIt, ArgEnd, [&](Value *Arg) { 3137 IRB.CreateStore( 3138 DFSF.getShadow(Arg), 3139 IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, GepIndex++)); 3140 Args.push_back(Arg); 3141 }); 3142 } 3143 3144 CallBase *NewCB; 3145 if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 3146 NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(), 3147 II->getUnwindDest(), Args); 3148 } else { 3149 NewCB = IRB.CreateCall(NewFT, Func, Args); 3150 } 3151 NewCB->setCallingConv(CB.getCallingConv()); 3152 NewCB->setAttributes(CB.getAttributes().removeAttributes( 3153 *DFSF.DFS.Ctx, AttributeList::ReturnIndex, 3154 AttributeFuncs::typeIncompatible(NewCB->getType()))); 3155 3156 if (Next) { 3157 ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next); 3158 DFSF.SkipInsts.insert(ExVal); 3159 ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next); 3160 DFSF.SkipInsts.insert(ExShadow); 3161 DFSF.setShadow(ExVal, ExShadow); 3162 DFSF.NonZeroChecks.push_back(ExShadow); 3163 3164 CB.replaceAllUsesWith(ExVal); 3165 } 3166 3167 CB.eraseFromParent(); 3168 } 3169 } 3170 3171 void DFSanVisitor::visitPHINode(PHINode &PN) { 3172 Type *ShadowTy = DFSF.DFS.getShadowTy(&PN); 3173 PHINode *ShadowPN = 3174 PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN); 3175 3176 // Give the shadow phi node valid predecessors to fool SplitEdge into working. 3177 Value *UndefShadow = UndefValue::get(ShadowTy); 3178 for (BasicBlock *BB : PN.blocks()) 3179 ShadowPN->addIncoming(UndefShadow, BB); 3180 3181 DFSF.setShadow(&PN, ShadowPN); 3182 3183 PHINode *OriginPN = nullptr; 3184 if (DFSF.DFS.shouldTrackOrigins()) { 3185 OriginPN = 3186 PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN); 3187 Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy); 3188 for (BasicBlock *BB : PN.blocks()) 3189 OriginPN->addIncoming(UndefOrigin, BB); 3190 DFSF.setOrigin(&PN, OriginPN); 3191 } 3192 3193 DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN}); 3194 } 3195 3196 namespace { 3197 class DataFlowSanitizerLegacyPass : public ModulePass { 3198 private: 3199 std::vector<std::string> ABIListFiles; 3200 3201 public: 3202 static char ID; 3203 3204 DataFlowSanitizerLegacyPass( 3205 const std::vector<std::string> &ABIListFiles = std::vector<std::string>()) 3206 : ModulePass(ID), ABIListFiles(ABIListFiles) {} 3207 3208 bool runOnModule(Module &M) override { 3209 return DataFlowSanitizer(ABIListFiles).runImpl(M); 3210 } 3211 }; 3212 } // namespace 3213 3214 char DataFlowSanitizerLegacyPass::ID; 3215 3216 INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan", 3217 "DataFlowSanitizer: dynamic data flow analysis.", false, false) 3218 3219 ModulePass *llvm::createDataFlowSanitizerLegacyPassPass( 3220 const std::vector<std::string> &ABIListFiles) { 3221 return new DataFlowSanitizerLegacyPass(ABIListFiles); 3222 } 3223 3224 PreservedAnalyses DataFlowSanitizerPass::run(Module &M, 3225 ModuleAnalysisManager &AM) { 3226 if (DataFlowSanitizer(ABIListFiles).runImpl(M)) { 3227 return PreservedAnalyses::none(); 3228 } 3229 return PreservedAnalyses::all(); 3230 } 3231