1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file is a part of MemorySanitizer, a detector of uninitialized
12 /// reads.
13 ///
14 /// The algorithm of the tool is similar to Memcheck
15 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
16 /// byte of the application memory, poison the shadow of the malloc-ed
17 /// or alloca-ed memory, load the shadow bits on every memory read,
18 /// propagate the shadow bits through some of the arithmetic
19 /// instruction (including MOV), store the shadow bits on every memory
20 /// write, report a bug on some other instructions (e.g. JMP) if the
21 /// associated shadow is poisoned.
22 ///
23 /// But there are differences too. The first and the major one:
24 /// compiler instrumentation instead of binary instrumentation. This
25 /// gives us much better register allocation, possible compiler
26 /// optimizations and a fast start-up. But this brings the major issue
27 /// as well: msan needs to see all program events, including system
28 /// calls and reads/writes in system libraries, so we either need to
29 /// compile *everything* with msan or use a binary translation
30 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
31 /// Another difference from Memcheck is that we use 8 shadow bits per
32 /// byte of application memory and use a direct shadow mapping. This
33 /// greatly simplifies the instrumentation code and avoids races on
34 /// shadow updates (Memcheck is single-threaded so races are not a
35 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
36 /// path storage that uses 8 bits per byte).
37 ///
38 /// The default value of shadow is 0, which means "clean" (not poisoned).
39 ///
40 /// Every module initializer should call __msan_init to ensure that the
41 /// shadow memory is ready. On error, __msan_warning is called. Since
42 /// parameters and return values may be passed via registers, we have a
43 /// specialized thread-local shadow for return values
44 /// (__msan_retval_tls) and parameters (__msan_param_tls).
45 ///
46 ///                           Origin tracking.
47 ///
48 /// MemorySanitizer can track origins (allocation points) of all uninitialized
49 /// values. This behavior is controlled with a flag (msan-track-origins) and is
50 /// disabled by default.
51 ///
52 /// Origins are 4-byte values created and interpreted by the runtime library.
53 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
54 /// of application memory. Propagation of origins is basically a bunch of
55 /// "select" instructions that pick the origin of a dirty argument, if an
56 /// instruction has one.
57 ///
58 /// Every 4 aligned, consecutive bytes of application memory have one origin
59 /// value associated with them. If these bytes contain uninitialized data
60 /// coming from 2 different allocations, the last store wins. Because of this,
61 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
62 /// practice.
63 ///
64 /// Origins are meaningless for fully initialized values, so MemorySanitizer
65 /// avoids storing origin to memory when a fully initialized value is stored.
66 /// This way it avoids needless overwritting origin of the 4-byte region on
67 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
68 ///
69 ///                            Atomic handling.
70 ///
71 /// Ideally, every atomic store of application value should update the
72 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
73 /// of two disjoint locations can not be done without severe slowdown.
74 ///
75 /// Therefore, we implement an approximation that may err on the safe side.
76 /// In this implementation, every atomically accessed location in the program
77 /// may only change from (partially) uninitialized to fully initialized, but
78 /// not the other way around. We load the shadow _after_ the application load,
79 /// and we store the shadow _before_ the app store. Also, we always store clean
80 /// shadow (if the application store is atomic). This way, if the store-load
81 /// pair constitutes a happens-before arc, shadow store and load are correctly
82 /// ordered such that the load will get either the value that was stored, or
83 /// some later value (which is always clean).
84 ///
85 /// This does not work very well with Compare-And-Swap (CAS) and
86 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
87 /// must store the new shadow before the app operation, and load the shadow
88 /// after the app operation. Computers don't work this way. Current
89 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
90 /// value. It implements the store part as a simple atomic store by storing a
91 /// clean shadow.
92 //
93 //===----------------------------------------------------------------------===//
94 
95 #include "llvm/ADT/APInt.h"
96 #include "llvm/ADT/ArrayRef.h"
97 #include "llvm/ADT/DepthFirstIterator.h"
98 #include "llvm/ADT/SmallString.h"
99 #include "llvm/ADT/SmallVector.h"
100 #include "llvm/ADT/StringExtras.h"
101 #include "llvm/ADT/StringRef.h"
102 #include "llvm/ADT/Triple.h"
103 #include "llvm/Analysis/TargetLibraryInfo.h"
104 #include "llvm/IR/Argument.h"
105 #include "llvm/IR/Attributes.h"
106 #include "llvm/IR/BasicBlock.h"
107 #include "llvm/IR/CallSite.h"
108 #include "llvm/IR/CallingConv.h"
109 #include "llvm/IR/Constant.h"
110 #include "llvm/IR/Constants.h"
111 #include "llvm/IR/DataLayout.h"
112 #include "llvm/IR/DerivedTypes.h"
113 #include "llvm/IR/Function.h"
114 #include "llvm/IR/GlobalValue.h"
115 #include "llvm/IR/GlobalVariable.h"
116 #include "llvm/IR/IRBuilder.h"
117 #include "llvm/IR/InlineAsm.h"
118 #include "llvm/IR/InstVisitor.h"
119 #include "llvm/IR/InstrTypes.h"
120 #include "llvm/IR/Instruction.h"
121 #include "llvm/IR/Instructions.h"
122 #include "llvm/IR/IntrinsicInst.h"
123 #include "llvm/IR/Intrinsics.h"
124 #include "llvm/IR/LLVMContext.h"
125 #include "llvm/IR/MDBuilder.h"
126 #include "llvm/IR/Module.h"
127 #include "llvm/IR/Type.h"
128 #include "llvm/IR/Value.h"
129 #include "llvm/IR/ValueMap.h"
130 #include "llvm/Pass.h"
131 #include "llvm/Support/AtomicOrdering.h"
132 #include "llvm/Support/Casting.h"
133 #include "llvm/Support/CommandLine.h"
134 #include "llvm/Support/Compiler.h"
135 #include "llvm/Support/Debug.h"
136 #include "llvm/Support/ErrorHandling.h"
137 #include "llvm/Support/MathExtras.h"
138 #include "llvm/Support/raw_ostream.h"
139 #include "llvm/Transforms/Instrumentation.h"
140 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
141 #include "llvm/Transforms/Utils/Local.h"
142 #include "llvm/Transforms/Utils/ModuleUtils.h"
143 #include <algorithm>
144 #include <cassert>
145 #include <cstddef>
146 #include <cstdint>
147 #include <memory>
148 #include <string>
149 #include <tuple>
150 
151 using namespace llvm;
152 
153 #define DEBUG_TYPE "msan"
154 
155 static const unsigned kOriginSize = 4;
156 static const unsigned kMinOriginAlignment = 4;
157 static const unsigned kShadowTLSAlignment = 8;
158 
159 // These constants must be kept in sync with the ones in msan.h.
160 static const unsigned kParamTLSSize = 800;
161 static const unsigned kRetvalTLSSize = 800;
162 
163 // Accesses sizes are powers of two: 1, 2, 4, 8.
164 static const size_t kNumberOfAccessSizes = 4;
165 
166 /// \brief Track origins of uninitialized values.
167 ///
168 /// Adds a section to MemorySanitizer report that points to the allocation
169 /// (stack or heap) the uninitialized bits came from originally.
170 static cl::opt<int> ClTrackOrigins("msan-track-origins",
171        cl::desc("Track origins (allocation sites) of poisoned memory"),
172        cl::Hidden, cl::init(0));
173 
174 static cl::opt<bool> ClKeepGoing("msan-keep-going",
175        cl::desc("keep going after reporting a UMR"),
176        cl::Hidden, cl::init(false));
177 
178 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
179        cl::desc("poison uninitialized stack variables"),
180        cl::Hidden, cl::init(true));
181 
182 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
183        cl::desc("poison uninitialized stack variables with a call"),
184        cl::Hidden, cl::init(false));
185 
186 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
187        cl::desc("poison uninitialized stack variables with the given pattern"),
188        cl::Hidden, cl::init(0xff));
189 
190 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
191        cl::desc("poison undef temps"),
192        cl::Hidden, cl::init(true));
193 
194 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
195        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
196        cl::Hidden, cl::init(true));
197 
198 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
199        cl::desc("exact handling of relational integer ICmp"),
200        cl::Hidden, cl::init(false));
201 
202 // This flag controls whether we check the shadow of the address
203 // operand of load or store. Such bugs are very rare, since load from
204 // a garbage address typically results in SEGV, but still happen
205 // (e.g. only lower bits of address are garbage, or the access happens
206 // early at program startup where malloc-ed memory is more likely to
207 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
208 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
209        cl::desc("report accesses through a pointer which has poisoned shadow"),
210        cl::Hidden, cl::init(true));
211 
212 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
213        cl::desc("print out instructions with default strict semantics"),
214        cl::Hidden, cl::init(false));
215 
216 static cl::opt<int> ClInstrumentationWithCallThreshold(
217     "msan-instrumentation-with-call-threshold",
218     cl::desc(
219         "If the function being instrumented requires more than "
220         "this number of checks and origin stores, use callbacks instead of "
221         "inline checks (-1 means never use callbacks)."),
222     cl::Hidden, cl::init(3500));
223 
224 // This is an experiment to enable handling of cases where shadow is a non-zero
225 // compile-time constant. For some unexplainable reason they were silently
226 // ignored in the instrumentation.
227 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
228        cl::desc("Insert checks for constant shadow values"),
229        cl::Hidden, cl::init(false));
230 
231 // This is off by default because of a bug in gold:
232 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
233 static cl::opt<bool> ClWithComdat("msan-with-comdat",
234        cl::desc("Place MSan constructors in comdat sections"),
235        cl::Hidden, cl::init(false));
236 
237 static const char *const kMsanModuleCtorName = "msan.module_ctor";
238 static const char *const kMsanInitName = "__msan_init";
239 
240 namespace {
241 
242 // Memory map parameters used in application-to-shadow address calculation.
243 // Offset = (Addr & ~AndMask) ^ XorMask
244 // Shadow = ShadowBase + Offset
245 // Origin = OriginBase + Offset
246 struct MemoryMapParams {
247   uint64_t AndMask;
248   uint64_t XorMask;
249   uint64_t ShadowBase;
250   uint64_t OriginBase;
251 };
252 
253 struct PlatformMemoryMapParams {
254   const MemoryMapParams *bits32;
255   const MemoryMapParams *bits64;
256 };
257 
258 } // end anonymous namespace
259 
260 // i386 Linux
261 static const MemoryMapParams Linux_I386_MemoryMapParams = {
262   0x000080000000,  // AndMask
263   0,               // XorMask (not used)
264   0,               // ShadowBase (not used)
265   0x000040000000,  // OriginBase
266 };
267 
268 // x86_64 Linux
269 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
270 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
271   0x400000000000,  // AndMask
272   0,               // XorMask (not used)
273   0,               // ShadowBase (not used)
274   0x200000000000,  // OriginBase
275 #else
276   0,               // AndMask (not used)
277   0x500000000000,  // XorMask
278   0,               // ShadowBase (not used)
279   0x100000000000,  // OriginBase
280 #endif
281 };
282 
283 // mips64 Linux
284 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
285   0,               // AndMask (not used)
286   0x008000000000,  // XorMask
287   0,               // ShadowBase (not used)
288   0x002000000000,  // OriginBase
289 };
290 
291 // ppc64 Linux
292 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
293   0xE00000000000,  // AndMask
294   0x100000000000,  // XorMask
295   0x080000000000,  // ShadowBase
296   0x1C0000000000,  // OriginBase
297 };
298 
299 // aarch64 Linux
300 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
301   0,               // AndMask (not used)
302   0x06000000000,   // XorMask
303   0,               // ShadowBase (not used)
304   0x01000000000,   // OriginBase
305 };
306 
307 // i386 FreeBSD
308 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
309   0x000180000000,  // AndMask
310   0x000040000000,  // XorMask
311   0x000020000000,  // ShadowBase
312   0x000700000000,  // OriginBase
313 };
314 
315 // x86_64 FreeBSD
316 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
317   0xc00000000000,  // AndMask
318   0x200000000000,  // XorMask
319   0x100000000000,  // ShadowBase
320   0x380000000000,  // OriginBase
321 };
322 
323 // x86_64 NetBSD
324 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
325   0,               // AndMask
326   0x500000000000,  // XorMask
327   0,               // ShadowBase
328   0x100000000000,  // OriginBase
329 };
330 
331 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
332   &Linux_I386_MemoryMapParams,
333   &Linux_X86_64_MemoryMapParams,
334 };
335 
336 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
337   nullptr,
338   &Linux_MIPS64_MemoryMapParams,
339 };
340 
341 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
342   nullptr,
343   &Linux_PowerPC64_MemoryMapParams,
344 };
345 
346 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
347   nullptr,
348   &Linux_AArch64_MemoryMapParams,
349 };
350 
351 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
352   &FreeBSD_I386_MemoryMapParams,
353   &FreeBSD_X86_64_MemoryMapParams,
354 };
355 
356 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
357   nullptr,
358   &NetBSD_X86_64_MemoryMapParams,
359 };
360 
361 namespace {
362 
363 /// \brief An instrumentation pass implementing detection of uninitialized
364 /// reads.
365 ///
366 /// MemorySanitizer: instrument the code in module to find
367 /// uninitialized reads.
368 class MemorySanitizer : public FunctionPass {
369 public:
370   // Pass identification, replacement for typeid.
371   static char ID;
372 
373   MemorySanitizer(int TrackOrigins = 0, bool Recover = false)
374       : FunctionPass(ID),
375         TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
376         Recover(Recover || ClKeepGoing) {}
377 
378   StringRef getPassName() const override { return "MemorySanitizer"; }
379 
380   void getAnalysisUsage(AnalysisUsage &AU) const override {
381     AU.addRequired<TargetLibraryInfoWrapperPass>();
382   }
383 
384   bool runOnFunction(Function &F) override;
385   bool doInitialization(Module &M) override;
386 
387 private:
388   friend struct MemorySanitizerVisitor;
389   friend struct VarArgAMD64Helper;
390   friend struct VarArgMIPS64Helper;
391   friend struct VarArgAArch64Helper;
392   friend struct VarArgPowerPC64Helper;
393 
394   void initializeCallbacks(Module &M);
395 
396   /// \brief Track origins (allocation points) of uninitialized values.
397   int TrackOrigins;
398   bool Recover;
399 
400   LLVMContext *C;
401   Type *IntptrTy;
402   Type *OriginTy;
403 
404   /// \brief Thread-local shadow storage for function parameters.
405   GlobalVariable *ParamTLS;
406 
407   /// \brief Thread-local origin storage for function parameters.
408   GlobalVariable *ParamOriginTLS;
409 
410   /// \brief Thread-local shadow storage for function return value.
411   GlobalVariable *RetvalTLS;
412 
413   /// \brief Thread-local origin storage for function return value.
414   GlobalVariable *RetvalOriginTLS;
415 
416   /// \brief Thread-local shadow storage for in-register va_arg function
417   /// parameters (x86_64-specific).
418   GlobalVariable *VAArgTLS;
419 
420   /// \brief Thread-local shadow storage for va_arg overflow area
421   /// (x86_64-specific).
422   GlobalVariable *VAArgOverflowSizeTLS;
423 
424   /// \brief Thread-local space used to pass origin value to the UMR reporting
425   /// function.
426   GlobalVariable *OriginTLS;
427 
428   /// \brief The run-time callback to print a warning.
429   Value *WarningFn = nullptr;
430 
431   // These arrays are indexed by log2(AccessSize).
432   Value *MaybeWarningFn[kNumberOfAccessSizes];
433   Value *MaybeStoreOriginFn[kNumberOfAccessSizes];
434 
435   /// \brief Run-time helper that generates a new origin value for a stack
436   /// allocation.
437   Value *MsanSetAllocaOrigin4Fn;
438 
439   /// \brief Run-time helper that poisons stack on function entry.
440   Value *MsanPoisonStackFn;
441 
442   /// \brief Run-time helper that records a store (or any event) of an
443   /// uninitialized value and returns an updated origin id encoding this info.
444   Value *MsanChainOriginFn;
445 
446   /// \brief MSan runtime replacements for memmove, memcpy and memset.
447   Value *MemmoveFn, *MemcpyFn, *MemsetFn;
448 
449   /// \brief Memory map parameters used in application-to-shadow calculation.
450   const MemoryMapParams *MapParams;
451 
452   MDNode *ColdCallWeights;
453 
454   /// \brief Branch weights for origin store.
455   MDNode *OriginStoreWeights;
456 
457   /// \brief An empty volatile inline asm that prevents callback merge.
458   InlineAsm *EmptyAsm;
459 
460   Function *MsanCtorFunction;
461 };
462 
463 } // end anonymous namespace
464 
465 char MemorySanitizer::ID = 0;
466 
467 INITIALIZE_PASS_BEGIN(
468     MemorySanitizer, "msan",
469     "MemorySanitizer: detects uninitialized reads.", false, false)
470 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
471 INITIALIZE_PASS_END(
472     MemorySanitizer, "msan",
473     "MemorySanitizer: detects uninitialized reads.", false, false)
474 
475 FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins, bool Recover) {
476   return new MemorySanitizer(TrackOrigins, Recover);
477 }
478 
479 /// \brief Create a non-const global initialized with the given string.
480 ///
481 /// Creates a writable global for Str so that we can pass it to the
482 /// run-time lib. Runtime uses first 4 bytes of the string to store the
483 /// frame ID, so the string needs to be mutable.
484 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
485                                                             StringRef Str) {
486   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
487   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
488                             GlobalValue::PrivateLinkage, StrConst, "");
489 }
490 
491 /// \brief Insert extern declaration of runtime-provided functions and globals.
492 void MemorySanitizer::initializeCallbacks(Module &M) {
493   // Only do this once.
494   if (WarningFn)
495     return;
496 
497   IRBuilder<> IRB(*C);
498   // Create the callback.
499   // FIXME: this function should have "Cold" calling conv,
500   // which is not yet implemented.
501   StringRef WarningFnName = Recover ? "__msan_warning"
502                                     : "__msan_warning_noreturn";
503   WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
504 
505   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
506        AccessSizeIndex++) {
507     unsigned AccessSize = 1 << AccessSizeIndex;
508     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
509     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
510         FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
511         IRB.getInt32Ty());
512 
513     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
514     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
515         FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
516         IRB.getInt8PtrTy(), IRB.getInt32Ty());
517   }
518 
519   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
520     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
521     IRB.getInt8PtrTy(), IntptrTy);
522   MsanPoisonStackFn =
523       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
524                             IRB.getInt8PtrTy(), IntptrTy);
525   MsanChainOriginFn = M.getOrInsertFunction(
526     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
527   MemmoveFn = M.getOrInsertFunction(
528     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
529     IRB.getInt8PtrTy(), IntptrTy);
530   MemcpyFn = M.getOrInsertFunction(
531     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
532     IntptrTy);
533   MemsetFn = M.getOrInsertFunction(
534     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
535     IntptrTy);
536 
537   // Create globals.
538   RetvalTLS = new GlobalVariable(
539     M, ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8), false,
540     GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr,
541     GlobalVariable::InitialExecTLSModel);
542   RetvalOriginTLS = new GlobalVariable(
543     M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr,
544     "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
545 
546   ParamTLS = new GlobalVariable(
547     M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
548     GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr,
549     GlobalVariable::InitialExecTLSModel);
550   ParamOriginTLS = new GlobalVariable(
551     M, ArrayType::get(OriginTy, kParamTLSSize / 4), false,
552     GlobalVariable::ExternalLinkage, nullptr, "__msan_param_origin_tls",
553     nullptr, GlobalVariable::InitialExecTLSModel);
554 
555   VAArgTLS = new GlobalVariable(
556     M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
557     GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr,
558     GlobalVariable::InitialExecTLSModel);
559   VAArgOverflowSizeTLS = new GlobalVariable(
560     M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
561     "__msan_va_arg_overflow_size_tls", nullptr,
562     GlobalVariable::InitialExecTLSModel);
563   OriginTLS = new GlobalVariable(
564     M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
565     "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
566 
567   // We insert an empty inline asm after __msan_report* to avoid callback merge.
568   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
569                             StringRef(""), StringRef(""),
570                             /*hasSideEffects=*/true);
571 }
572 
573 /// \brief Module-level initialization.
574 ///
575 /// inserts a call to __msan_init to the module's constructor list.
576 bool MemorySanitizer::doInitialization(Module &M) {
577   auto &DL = M.getDataLayout();
578 
579   Triple TargetTriple(M.getTargetTriple());
580   switch (TargetTriple.getOS()) {
581     case Triple::FreeBSD:
582       switch (TargetTriple.getArch()) {
583         case Triple::x86_64:
584           MapParams = FreeBSD_X86_MemoryMapParams.bits64;
585           break;
586         case Triple::x86:
587           MapParams = FreeBSD_X86_MemoryMapParams.bits32;
588           break;
589         default:
590           report_fatal_error("unsupported architecture");
591       }
592       break;
593     case Triple::NetBSD:
594       switch (TargetTriple.getArch()) {
595         case Triple::x86_64:
596           MapParams = NetBSD_X86_MemoryMapParams.bits64;
597           break;
598         default:
599           report_fatal_error("unsupported architecture");
600       }
601       break;
602     case Triple::Linux:
603       switch (TargetTriple.getArch()) {
604         case Triple::x86_64:
605           MapParams = Linux_X86_MemoryMapParams.bits64;
606           break;
607         case Triple::x86:
608           MapParams = Linux_X86_MemoryMapParams.bits32;
609           break;
610         case Triple::mips64:
611         case Triple::mips64el:
612           MapParams = Linux_MIPS_MemoryMapParams.bits64;
613           break;
614         case Triple::ppc64:
615         case Triple::ppc64le:
616           MapParams = Linux_PowerPC_MemoryMapParams.bits64;
617           break;
618         case Triple::aarch64:
619         case Triple::aarch64_be:
620           MapParams = Linux_ARM_MemoryMapParams.bits64;
621           break;
622         default:
623           report_fatal_error("unsupported architecture");
624       }
625       break;
626     default:
627       report_fatal_error("unsupported operating system");
628   }
629 
630   C = &(M.getContext());
631   IRBuilder<> IRB(*C);
632   IntptrTy = IRB.getIntPtrTy(DL);
633   OriginTy = IRB.getInt32Ty();
634 
635   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
636   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
637 
638   std::tie(MsanCtorFunction, std::ignore) =
639       createSanitizerCtorAndInitFunctions(M, kMsanModuleCtorName, kMsanInitName,
640                                           /*InitArgTypes=*/{},
641                                           /*InitArgs=*/{});
642   if (ClWithComdat) {
643     Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
644     MsanCtorFunction->setComdat(MsanCtorComdat);
645     appendToGlobalCtors(M, MsanCtorFunction, 0, MsanCtorFunction);
646   } else {
647     appendToGlobalCtors(M, MsanCtorFunction, 0);
648   }
649 
650 
651   if (TrackOrigins)
652     new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
653                        IRB.getInt32(TrackOrigins), "__msan_track_origins");
654 
655   if (Recover)
656     new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
657                        IRB.getInt32(Recover), "__msan_keep_going");
658 
659   return true;
660 }
661 
662 namespace {
663 
664 /// \brief A helper class that handles instrumentation of VarArg
665 /// functions on a particular platform.
666 ///
667 /// Implementations are expected to insert the instrumentation
668 /// necessary to propagate argument shadow through VarArg function
669 /// calls. Visit* methods are called during an InstVisitor pass over
670 /// the function, and should avoid creating new basic blocks. A new
671 /// instance of this class is created for each instrumented function.
672 struct VarArgHelper {
673   virtual ~VarArgHelper() = default;
674 
675   /// \brief Visit a CallSite.
676   virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
677 
678   /// \brief Visit a va_start call.
679   virtual void visitVAStartInst(VAStartInst &I) = 0;
680 
681   /// \brief Visit a va_copy call.
682   virtual void visitVACopyInst(VACopyInst &I) = 0;
683 
684   /// \brief Finalize function instrumentation.
685   ///
686   /// This method is called after visiting all interesting (see above)
687   /// instructions in a function.
688   virtual void finalizeInstrumentation() = 0;
689 };
690 
691 struct MemorySanitizerVisitor;
692 
693 } // end anonymous namespace
694 
695 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
696                                         MemorySanitizerVisitor &Visitor);
697 
698 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
699   if (TypeSize <= 8) return 0;
700   return Log2_32_Ceil((TypeSize + 7) / 8);
701 }
702 
703 namespace {
704 
705 /// This class does all the work for a given function. Store and Load
706 /// instructions store and load corresponding shadow and origin
707 /// values. Most instructions propagate shadow from arguments to their
708 /// return values. Certain instructions (most importantly, BranchInst)
709 /// test their argument shadow and print reports (with a runtime call) if it's
710 /// non-zero.
711 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
712   Function &F;
713   MemorySanitizer &MS;
714   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
715   ValueMap<Value*, Value*> ShadowMap, OriginMap;
716   std::unique_ptr<VarArgHelper> VAHelper;
717   const TargetLibraryInfo *TLI;
718 
719   // The following flags disable parts of MSan instrumentation based on
720   // blacklist contents and command-line options.
721   bool InsertChecks;
722   bool PropagateShadow;
723   bool PoisonStack;
724   bool PoisonUndef;
725   bool CheckReturnValue;
726 
727   struct ShadowOriginAndInsertPoint {
728     Value *Shadow;
729     Value *Origin;
730     Instruction *OrigIns;
731 
732     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
733       : Shadow(S), Origin(O), OrigIns(I) {}
734   };
735   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
736   SmallVector<StoreInst *, 16> StoreList;
737 
738   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
739       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
740     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
741     InsertChecks = SanitizeFunction;
742     PropagateShadow = SanitizeFunction;
743     PoisonStack = SanitizeFunction && ClPoisonStack;
744     PoisonUndef = SanitizeFunction && ClPoisonUndef;
745     // FIXME: Consider using SpecialCaseList to specify a list of functions that
746     // must always return fully initialized values. For now, we hardcode "main".
747     CheckReturnValue = SanitizeFunction && (F.getName() == "main");
748     TLI = &MS.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
749 
750     DEBUG(if (!InsertChecks)
751           dbgs() << "MemorySanitizer is not inserting checks into '"
752                  << F.getName() << "'\n");
753   }
754 
755   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
756     if (MS.TrackOrigins <= 1) return V;
757     return IRB.CreateCall(MS.MsanChainOriginFn, V);
758   }
759 
760   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
761     const DataLayout &DL = F.getParent()->getDataLayout();
762     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
763     if (IntptrSize == kOriginSize) return Origin;
764     assert(IntptrSize == kOriginSize * 2);
765     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
766     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
767   }
768 
769   /// \brief Fill memory range with the given origin value.
770   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
771                    unsigned Size, unsigned Alignment) {
772     const DataLayout &DL = F.getParent()->getDataLayout();
773     unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
774     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
775     assert(IntptrAlignment >= kMinOriginAlignment);
776     assert(IntptrSize >= kOriginSize);
777 
778     unsigned Ofs = 0;
779     unsigned CurrentAlignment = Alignment;
780     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
781       Value *IntptrOrigin = originToIntptr(IRB, Origin);
782       Value *IntptrOriginPtr =
783           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
784       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
785         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
786                        : IntptrOriginPtr;
787         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
788         Ofs += IntptrSize / kOriginSize;
789         CurrentAlignment = IntptrAlignment;
790       }
791     }
792 
793     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
794       Value *GEP =
795           i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
796       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
797       CurrentAlignment = kMinOriginAlignment;
798     }
799   }
800 
801   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
802                    Value *OriginPtr, unsigned Alignment, bool AsCall) {
803     const DataLayout &DL = F.getParent()->getDataLayout();
804     unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
805     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
806     if (Shadow->getType()->isAggregateType()) {
807       paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
808                   OriginAlignment);
809     } else {
810       Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
811       Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
812       if (ConstantShadow) {
813         if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
814           paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
815                       OriginAlignment);
816         return;
817       }
818 
819       unsigned TypeSizeInBits =
820           DL.getTypeSizeInBits(ConvertedShadow->getType());
821       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
822       if (AsCall && SizeIndex < kNumberOfAccessSizes) {
823         Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
824         Value *ConvertedShadow2 = IRB.CreateZExt(
825             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
826         IRB.CreateCall(Fn, {ConvertedShadow2,
827                             IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
828                             Origin});
829       } else {
830         Value *Cmp = IRB.CreateICmpNE(
831             ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
832         Instruction *CheckTerm = SplitBlockAndInsertIfThen(
833             Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
834         IRBuilder<> IRBNew(CheckTerm);
835         paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
836                     OriginAlignment);
837       }
838     }
839   }
840 
841   void materializeStores(bool InstrumentWithCalls) {
842     for (StoreInst *SI : StoreList) {
843       IRBuilder<> IRB(SI);
844       Value *Val = SI->getValueOperand();
845       Value *Addr = SI->getPointerOperand();
846       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
847       Value *ShadowPtr, *OriginPtr;
848       Type *ShadowTy = Shadow->getType();
849       unsigned Alignment = SI->getAlignment();
850       unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
851       std::tie(ShadowPtr, OriginPtr) =
852           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment);
853 
854       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
855       DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
856 
857       if (ClCheckAccessAddress)
858         insertShadowCheck(Addr, NewSI);
859 
860       if (SI->isAtomic())
861         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
862 
863       if (MS.TrackOrigins && !SI->isAtomic())
864         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
865                     OriginAlignment, InstrumentWithCalls);
866     }
867   }
868 
869   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
870                            bool AsCall) {
871     IRBuilder<> IRB(OrigIns);
872     DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
873     Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
874     DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
875 
876     Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
877     if (ConstantShadow) {
878       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
879         if (MS.TrackOrigins) {
880           IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
881                           MS.OriginTLS);
882         }
883         IRB.CreateCall(MS.WarningFn, {});
884         IRB.CreateCall(MS.EmptyAsm, {});
885         // FIXME: Insert UnreachableInst if !MS.Recover?
886         // This may invalidate some of the following checks and needs to be done
887         // at the very end.
888       }
889       return;
890     }
891 
892     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
893 
894     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
895     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
896     if (AsCall && SizeIndex < kNumberOfAccessSizes) {
897       Value *Fn = MS.MaybeWarningFn[SizeIndex];
898       Value *ConvertedShadow2 =
899           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
900       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
901                                                 ? Origin
902                                                 : (Value *)IRB.getInt32(0)});
903     } else {
904       Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
905                                     getCleanShadow(ConvertedShadow), "_mscmp");
906       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
907           Cmp, OrigIns,
908           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
909 
910       IRB.SetInsertPoint(CheckTerm);
911       if (MS.TrackOrigins) {
912         IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
913                         MS.OriginTLS);
914       }
915       IRB.CreateCall(MS.WarningFn, {});
916       IRB.CreateCall(MS.EmptyAsm, {});
917       DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
918     }
919   }
920 
921   void materializeChecks(bool InstrumentWithCalls) {
922     for (const auto &ShadowData : InstrumentationList) {
923       Instruction *OrigIns = ShadowData.OrigIns;
924       Value *Shadow = ShadowData.Shadow;
925       Value *Origin = ShadowData.Origin;
926       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
927     }
928     DEBUG(dbgs() << "DONE:\n" << F);
929   }
930 
931   /// \brief Add MemorySanitizer instrumentation to a function.
932   bool runOnFunction() {
933     MS.initializeCallbacks(*F.getParent());
934 
935     // In the presence of unreachable blocks, we may see Phi nodes with
936     // incoming nodes from such blocks. Since InstVisitor skips unreachable
937     // blocks, such nodes will not have any shadow value associated with them.
938     // It's easier to remove unreachable blocks than deal with missing shadow.
939     removeUnreachableBlocks(F);
940 
941     // Iterate all BBs in depth-first order and create shadow instructions
942     // for all instructions (where applicable).
943     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
944     for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
945       visit(*BB);
946 
947     // Finalize PHI nodes.
948     for (PHINode *PN : ShadowPHINodes) {
949       PHINode *PNS = cast<PHINode>(getShadow(PN));
950       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
951       size_t NumValues = PN->getNumIncomingValues();
952       for (size_t v = 0; v < NumValues; v++) {
953         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
954         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
955       }
956     }
957 
958     VAHelper->finalizeInstrumentation();
959 
960     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
961                                InstrumentationList.size() + StoreList.size() >
962                                    (unsigned)ClInstrumentationWithCallThreshold;
963 
964     // Delayed instrumentation of StoreInst.
965     // This may add new checks to be inserted later.
966     materializeStores(InstrumentWithCalls);
967 
968     // Insert shadow value checks.
969     materializeChecks(InstrumentWithCalls);
970 
971     return true;
972   }
973 
974   /// \brief Compute the shadow type that corresponds to a given Value.
975   Type *getShadowTy(Value *V) {
976     return getShadowTy(V->getType());
977   }
978 
979   /// \brief Compute the shadow type that corresponds to a given Type.
980   Type *getShadowTy(Type *OrigTy) {
981     if (!OrigTy->isSized()) {
982       return nullptr;
983     }
984     // For integer type, shadow is the same as the original type.
985     // This may return weird-sized types like i1.
986     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
987       return IT;
988     const DataLayout &DL = F.getParent()->getDataLayout();
989     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
990       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
991       return VectorType::get(IntegerType::get(*MS.C, EltSize),
992                              VT->getNumElements());
993     }
994     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
995       return ArrayType::get(getShadowTy(AT->getElementType()),
996                             AT->getNumElements());
997     }
998     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
999       SmallVector<Type*, 4> Elements;
1000       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1001         Elements.push_back(getShadowTy(ST->getElementType(i)));
1002       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1003       DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1004       return Res;
1005     }
1006     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1007     return IntegerType::get(*MS.C, TypeSize);
1008   }
1009 
1010   /// \brief Flatten a vector type.
1011   Type *getShadowTyNoVec(Type *ty) {
1012     if (VectorType *vt = dyn_cast<VectorType>(ty))
1013       return IntegerType::get(*MS.C, vt->getBitWidth());
1014     return ty;
1015   }
1016 
1017   /// \brief Convert a shadow value to it's flattened variant.
1018   Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
1019     Type *Ty = V->getType();
1020     Type *NoVecTy = getShadowTyNoVec(Ty);
1021     if (Ty == NoVecTy) return V;
1022     return IRB.CreateBitCast(V, NoVecTy);
1023   }
1024 
1025   /// \brief Compute the integer shadow offset that corresponds to a given
1026   /// application address.
1027   ///
1028   /// Offset = (Addr & ~AndMask) ^ XorMask
1029   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1030     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1031 
1032     uint64_t AndMask = MS.MapParams->AndMask;
1033     if (AndMask)
1034       OffsetLong =
1035           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1036 
1037     uint64_t XorMask = MS.MapParams->XorMask;
1038     if (XorMask)
1039       OffsetLong =
1040           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1041     return OffsetLong;
1042   }
1043 
1044   /// \brief Compute the shadow and origin addresses corresponding to a given
1045   /// application address.
1046   ///
1047   /// Shadow = ShadowBase + Offset
1048   /// Origin = (OriginBase + Offset) & ~3ULL
1049   std::pair<Value *, Value *> getShadowOriginPtrUserspace(
1050       Value *Addr, IRBuilder<> &IRB, Type *ShadowTy, unsigned Alignment,
1051       Instruction **FirstInsn) {
1052     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1053     Value *ShadowLong = ShadowOffset;
1054     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1055     *FirstInsn = dyn_cast<Instruction>(ShadowLong);
1056     if (ShadowBase != 0) {
1057       ShadowLong =
1058         IRB.CreateAdd(ShadowLong,
1059                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1060     }
1061     Value *ShadowPtr =
1062         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1063     Value *OriginPtr = nullptr;
1064     if (MS.TrackOrigins) {
1065       Value *OriginLong = ShadowOffset;
1066       uint64_t OriginBase = MS.MapParams->OriginBase;
1067       if (OriginBase != 0)
1068         OriginLong = IRB.CreateAdd(OriginLong,
1069                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1070       if (Alignment < kMinOriginAlignment) {
1071         uint64_t Mask = kMinOriginAlignment - 1;
1072         OriginLong =
1073             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1074       }
1075       OriginPtr =
1076           IRB.CreateIntToPtr(OriginLong, PointerType::get(IRB.getInt32Ty(), 0));
1077     }
1078     return std::make_pair(ShadowPtr, OriginPtr);
1079   }
1080 
1081   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1082                                                  Type *ShadowTy,
1083                                                  unsigned Alignment) {
1084     Instruction *FirstInsn = nullptr;
1085     std::pair<Value *, Value *> ret =
1086         getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment, &FirstInsn);
1087     return ret;
1088   }
1089 
1090   /// \brief Compute the shadow address for a given function argument.
1091   ///
1092   /// Shadow = ParamTLS+ArgOffset.
1093   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1094                                  int ArgOffset) {
1095     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1096     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1097     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1098                               "_msarg");
1099   }
1100 
1101   /// \brief Compute the origin address for a given function argument.
1102   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1103                                  int ArgOffset) {
1104     if (!MS.TrackOrigins) return nullptr;
1105     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1106     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1107     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1108                               "_msarg_o");
1109   }
1110 
1111   /// \brief Compute the shadow address for a retval.
1112   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1113     return IRB.CreatePointerCast(MS.RetvalTLS,
1114                                  PointerType::get(getShadowTy(A), 0),
1115                                  "_msret");
1116   }
1117 
1118   /// \brief Compute the origin address for a retval.
1119   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1120     // We keep a single origin for the entire retval. Might be too optimistic.
1121     return MS.RetvalOriginTLS;
1122   }
1123 
1124   /// \brief Set SV to be the shadow value for V.
1125   void setShadow(Value *V, Value *SV) {
1126     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1127     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1128   }
1129 
1130   /// \brief Set Origin to be the origin value for V.
1131   void setOrigin(Value *V, Value *Origin) {
1132     if (!MS.TrackOrigins) return;
1133     assert(!OriginMap.count(V) && "Values may only have one origin");
1134     DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1135     OriginMap[V] = Origin;
1136   }
1137 
1138   Constant *getCleanShadow(Type *OrigTy) {
1139     Type *ShadowTy = getShadowTy(OrigTy);
1140     if (!ShadowTy)
1141       return nullptr;
1142     return Constant::getNullValue(ShadowTy);
1143   }
1144 
1145   /// \brief Create a clean shadow value for a given value.
1146   ///
1147   /// Clean shadow (all zeroes) means all bits of the value are defined
1148   /// (initialized).
1149   Constant *getCleanShadow(Value *V) {
1150     return getCleanShadow(V->getType());
1151   }
1152 
1153   /// \brief Create a dirty shadow of a given shadow type.
1154   Constant *getPoisonedShadow(Type *ShadowTy) {
1155     assert(ShadowTy);
1156     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1157       return Constant::getAllOnesValue(ShadowTy);
1158     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1159       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1160                                       getPoisonedShadow(AT->getElementType()));
1161       return ConstantArray::get(AT, Vals);
1162     }
1163     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1164       SmallVector<Constant *, 4> Vals;
1165       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1166         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1167       return ConstantStruct::get(ST, Vals);
1168     }
1169     llvm_unreachable("Unexpected shadow type");
1170   }
1171 
1172   /// \brief Create a dirty shadow for a given value.
1173   Constant *getPoisonedShadow(Value *V) {
1174     Type *ShadowTy = getShadowTy(V);
1175     if (!ShadowTy)
1176       return nullptr;
1177     return getPoisonedShadow(ShadowTy);
1178   }
1179 
1180   /// \brief Create a clean (zero) origin.
1181   Value *getCleanOrigin() {
1182     return Constant::getNullValue(MS.OriginTy);
1183   }
1184 
1185   /// \brief Get the shadow value for a given Value.
1186   ///
1187   /// This function either returns the value set earlier with setShadow,
1188   /// or extracts if from ParamTLS (for function arguments).
1189   Value *getShadow(Value *V) {
1190     if (!PropagateShadow) return getCleanShadow(V);
1191     if (Instruction *I = dyn_cast<Instruction>(V)) {
1192       if (I->getMetadata("nosanitize"))
1193         return getCleanShadow(V);
1194       // For instructions the shadow is already stored in the map.
1195       Value *Shadow = ShadowMap[V];
1196       if (!Shadow) {
1197         DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1198         (void)I;
1199         assert(Shadow && "No shadow for a value");
1200       }
1201       return Shadow;
1202     }
1203     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1204       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1205       DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1206       (void)U;
1207       return AllOnes;
1208     }
1209     if (Argument *A = dyn_cast<Argument>(V)) {
1210       // For arguments we compute the shadow on demand and store it in the map.
1211       Value **ShadowPtr = &ShadowMap[V];
1212       if (*ShadowPtr)
1213         return *ShadowPtr;
1214       Function *F = A->getParent();
1215       IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
1216       unsigned ArgOffset = 0;
1217       const DataLayout &DL = F->getParent()->getDataLayout();
1218       for (auto &FArg : F->args()) {
1219         if (!FArg.getType()->isSized()) {
1220           DEBUG(dbgs() << "Arg is not sized\n");
1221           continue;
1222         }
1223         unsigned Size =
1224             FArg.hasByValAttr()
1225                 ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType())
1226                 : DL.getTypeAllocSize(FArg.getType());
1227         if (A == &FArg) {
1228           bool Overflow = ArgOffset + Size > kParamTLSSize;
1229           Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1230           if (FArg.hasByValAttr()) {
1231             // ByVal pointer itself has clean shadow. We copy the actual
1232             // argument shadow to the underlying memory.
1233             // Figure out maximal valid memcpy alignment.
1234             unsigned ArgAlign = FArg.getParamAlignment();
1235             if (ArgAlign == 0) {
1236               Type *EltType = A->getType()->getPointerElementType();
1237               ArgAlign = DL.getABITypeAlignment(EltType);
1238             }
1239             Value *CpShadowPtr =
1240                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign)
1241                     .first;
1242             if (Overflow) {
1243               // ParamTLS overflow.
1244               EntryIRB.CreateMemSet(
1245                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1246                   Size, ArgAlign);
1247             } else {
1248               unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1249               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1250                                                  CopyAlign, Size);
1251               DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1252               (void)Cpy;
1253             }
1254             *ShadowPtr = getCleanShadow(V);
1255           } else {
1256             if (Overflow) {
1257               // ParamTLS overflow.
1258               *ShadowPtr = getCleanShadow(V);
1259             } else {
1260               *ShadowPtr =
1261                   EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
1262             }
1263           }
1264           DEBUG(dbgs() << "  ARG:    "  << FArg << " ==> " <<
1265                 **ShadowPtr << "\n");
1266           if (MS.TrackOrigins && !Overflow) {
1267             Value *OriginPtr =
1268                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1269             setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
1270           } else {
1271             setOrigin(A, getCleanOrigin());
1272           }
1273         }
1274         ArgOffset += alignTo(Size, kShadowTLSAlignment);
1275       }
1276       assert(*ShadowPtr && "Could not find shadow for an argument");
1277       return *ShadowPtr;
1278     }
1279     // For everything else the shadow is zero.
1280     return getCleanShadow(V);
1281   }
1282 
1283   /// \brief Get the shadow for i-th argument of the instruction I.
1284   Value *getShadow(Instruction *I, int i) {
1285     return getShadow(I->getOperand(i));
1286   }
1287 
1288   /// \brief Get the origin for a value.
1289   Value *getOrigin(Value *V) {
1290     if (!MS.TrackOrigins) return nullptr;
1291     if (!PropagateShadow) return getCleanOrigin();
1292     if (isa<Constant>(V)) return getCleanOrigin();
1293     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1294            "Unexpected value type in getOrigin()");
1295     if (Instruction *I = dyn_cast<Instruction>(V)) {
1296       if (I->getMetadata("nosanitize"))
1297         return getCleanOrigin();
1298     }
1299     Value *Origin = OriginMap[V];
1300     assert(Origin && "Missing origin");
1301     return Origin;
1302   }
1303 
1304   /// \brief Get the origin for i-th argument of the instruction I.
1305   Value *getOrigin(Instruction *I, int i) {
1306     return getOrigin(I->getOperand(i));
1307   }
1308 
1309   /// \brief Remember the place where a shadow check should be inserted.
1310   ///
1311   /// This location will be later instrumented with a check that will print a
1312   /// UMR warning in runtime if the shadow value is not 0.
1313   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1314     assert(Shadow);
1315     if (!InsertChecks) return;
1316 #ifndef NDEBUG
1317     Type *ShadowTy = Shadow->getType();
1318     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
1319            "Can only insert checks for integer and vector shadow types");
1320 #endif
1321     InstrumentationList.push_back(
1322         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1323   }
1324 
1325   /// \brief Remember the place where a shadow check should be inserted.
1326   ///
1327   /// This location will be later instrumented with a check that will print a
1328   /// UMR warning in runtime if the value is not fully defined.
1329   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1330     assert(Val);
1331     Value *Shadow, *Origin;
1332     if (ClCheckConstantShadow) {
1333       Shadow = getShadow(Val);
1334       if (!Shadow) return;
1335       Origin = getOrigin(Val);
1336     } else {
1337       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1338       if (!Shadow) return;
1339       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1340     }
1341     insertShadowCheck(Shadow, Origin, OrigIns);
1342   }
1343 
1344   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1345     switch (a) {
1346       case AtomicOrdering::NotAtomic:
1347         return AtomicOrdering::NotAtomic;
1348       case AtomicOrdering::Unordered:
1349       case AtomicOrdering::Monotonic:
1350       case AtomicOrdering::Release:
1351         return AtomicOrdering::Release;
1352       case AtomicOrdering::Acquire:
1353       case AtomicOrdering::AcquireRelease:
1354         return AtomicOrdering::AcquireRelease;
1355       case AtomicOrdering::SequentiallyConsistent:
1356         return AtomicOrdering::SequentiallyConsistent;
1357     }
1358     llvm_unreachable("Unknown ordering");
1359   }
1360 
1361   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1362     switch (a) {
1363       case AtomicOrdering::NotAtomic:
1364         return AtomicOrdering::NotAtomic;
1365       case AtomicOrdering::Unordered:
1366       case AtomicOrdering::Monotonic:
1367       case AtomicOrdering::Acquire:
1368         return AtomicOrdering::Acquire;
1369       case AtomicOrdering::Release:
1370       case AtomicOrdering::AcquireRelease:
1371         return AtomicOrdering::AcquireRelease;
1372       case AtomicOrdering::SequentiallyConsistent:
1373         return AtomicOrdering::SequentiallyConsistent;
1374     }
1375     llvm_unreachable("Unknown ordering");
1376   }
1377 
1378   // ------------------- Visitors.
1379   using InstVisitor<MemorySanitizerVisitor>::visit;
1380   void visit(Instruction &I) {
1381     if (!I.getMetadata("nosanitize"))
1382       InstVisitor<MemorySanitizerVisitor>::visit(I);
1383   }
1384 
1385   /// \brief Instrument LoadInst
1386   ///
1387   /// Loads the corresponding shadow and (optionally) origin.
1388   /// Optionally, checks that the load address is fully defined.
1389   void visitLoadInst(LoadInst &I) {
1390     assert(I.getType()->isSized() && "Load type must have size");
1391     assert(!I.getMetadata("nosanitize"));
1392     IRBuilder<> IRB(I.getNextNode());
1393     Type *ShadowTy = getShadowTy(&I);
1394     Value *Addr = I.getPointerOperand();
1395     Value *ShadowPtr, *OriginPtr;
1396     unsigned Alignment = I.getAlignment();
1397     if (PropagateShadow) {
1398       std::tie(ShadowPtr, OriginPtr) =
1399           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment);
1400       setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
1401     } else {
1402       setShadow(&I, getCleanShadow(&I));
1403     }
1404 
1405     if (ClCheckAccessAddress)
1406       insertShadowCheck(I.getPointerOperand(), &I);
1407 
1408     if (I.isAtomic())
1409       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1410 
1411     if (MS.TrackOrigins) {
1412       if (PropagateShadow) {
1413         unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1414         setOrigin(&I, IRB.CreateAlignedLoad(OriginPtr, OriginAlignment));
1415       } else {
1416         setOrigin(&I, getCleanOrigin());
1417       }
1418     }
1419   }
1420 
1421   /// \brief Instrument StoreInst
1422   ///
1423   /// Stores the corresponding shadow and (optionally) origin.
1424   /// Optionally, checks that the store address is fully defined.
1425   void visitStoreInst(StoreInst &I) {
1426     StoreList.push_back(&I);
1427   }
1428 
1429   void handleCASOrRMW(Instruction &I) {
1430     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1431 
1432     IRBuilder<> IRB(&I);
1433     Value *Addr = I.getOperand(0);
1434     Value *ShadowPtr =
1435         getShadowOriginPtr(Addr, IRB, I.getType(), /*Alignment*/ 1).first;
1436 
1437     if (ClCheckAccessAddress)
1438       insertShadowCheck(Addr, &I);
1439 
1440     // Only test the conditional argument of cmpxchg instruction.
1441     // The other argument can potentially be uninitialized, but we can not
1442     // detect this situation reliably without possible false positives.
1443     if (isa<AtomicCmpXchgInst>(I))
1444       insertShadowCheck(I.getOperand(1), &I);
1445 
1446     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
1447 
1448     setShadow(&I, getCleanShadow(&I));
1449     setOrigin(&I, getCleanOrigin());
1450   }
1451 
1452   void visitAtomicRMWInst(AtomicRMWInst &I) {
1453     handleCASOrRMW(I);
1454     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1455   }
1456 
1457   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1458     handleCASOrRMW(I);
1459     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1460   }
1461 
1462   // Vector manipulation.
1463   void visitExtractElementInst(ExtractElementInst &I) {
1464     insertShadowCheck(I.getOperand(1), &I);
1465     IRBuilder<> IRB(&I);
1466     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1467               "_msprop"));
1468     setOrigin(&I, getOrigin(&I, 0));
1469   }
1470 
1471   void visitInsertElementInst(InsertElementInst &I) {
1472     insertShadowCheck(I.getOperand(2), &I);
1473     IRBuilder<> IRB(&I);
1474     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1475               I.getOperand(2), "_msprop"));
1476     setOriginForNaryOp(I);
1477   }
1478 
1479   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1480     insertShadowCheck(I.getOperand(2), &I);
1481     IRBuilder<> IRB(&I);
1482     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1483               I.getOperand(2), "_msprop"));
1484     setOriginForNaryOp(I);
1485   }
1486 
1487   // Casts.
1488   void visitSExtInst(SExtInst &I) {
1489     IRBuilder<> IRB(&I);
1490     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
1491     setOrigin(&I, getOrigin(&I, 0));
1492   }
1493 
1494   void visitZExtInst(ZExtInst &I) {
1495     IRBuilder<> IRB(&I);
1496     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
1497     setOrigin(&I, getOrigin(&I, 0));
1498   }
1499 
1500   void visitTruncInst(TruncInst &I) {
1501     IRBuilder<> IRB(&I);
1502     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
1503     setOrigin(&I, getOrigin(&I, 0));
1504   }
1505 
1506   void visitBitCastInst(BitCastInst &I) {
1507     // Special case: if this is the bitcast (there is exactly 1 allowed) between
1508     // a musttail call and a ret, don't instrument. New instructions are not
1509     // allowed after a musttail call.
1510     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
1511       if (CI->isMustTailCall())
1512         return;
1513     IRBuilder<> IRB(&I);
1514     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
1515     setOrigin(&I, getOrigin(&I, 0));
1516   }
1517 
1518   void visitPtrToIntInst(PtrToIntInst &I) {
1519     IRBuilder<> IRB(&I);
1520     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1521              "_msprop_ptrtoint"));
1522     setOrigin(&I, getOrigin(&I, 0));
1523   }
1524 
1525   void visitIntToPtrInst(IntToPtrInst &I) {
1526     IRBuilder<> IRB(&I);
1527     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1528              "_msprop_inttoptr"));
1529     setOrigin(&I, getOrigin(&I, 0));
1530   }
1531 
1532   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
1533   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
1534   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
1535   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
1536   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
1537   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
1538 
1539   /// \brief Propagate shadow for bitwise AND.
1540   ///
1541   /// This code is exact, i.e. if, for example, a bit in the left argument
1542   /// is defined and 0, then neither the value not definedness of the
1543   /// corresponding bit in B don't affect the resulting shadow.
1544   void visitAnd(BinaryOperator &I) {
1545     IRBuilder<> IRB(&I);
1546     //  "And" of 0 and a poisoned value results in unpoisoned value.
1547     //  1&1 => 1;     0&1 => 0;     p&1 => p;
1548     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
1549     //  1&p => p;     0&p => 0;     p&p => p;
1550     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
1551     Value *S1 = getShadow(&I, 0);
1552     Value *S2 = getShadow(&I, 1);
1553     Value *V1 = I.getOperand(0);
1554     Value *V2 = I.getOperand(1);
1555     if (V1->getType() != S1->getType()) {
1556       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1557       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1558     }
1559     Value *S1S2 = IRB.CreateAnd(S1, S2);
1560     Value *V1S2 = IRB.CreateAnd(V1, S2);
1561     Value *S1V2 = IRB.CreateAnd(S1, V2);
1562     setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
1563     setOriginForNaryOp(I);
1564   }
1565 
1566   void visitOr(BinaryOperator &I) {
1567     IRBuilder<> IRB(&I);
1568     //  "Or" of 1 and a poisoned value results in unpoisoned value.
1569     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
1570     //  1|0 => 1;     0|0 => 0;     p|0 => p;
1571     //  1|p => 1;     0|p => p;     p|p => p;
1572     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
1573     Value *S1 = getShadow(&I, 0);
1574     Value *S2 = getShadow(&I, 1);
1575     Value *V1 = IRB.CreateNot(I.getOperand(0));
1576     Value *V2 = IRB.CreateNot(I.getOperand(1));
1577     if (V1->getType() != S1->getType()) {
1578       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1579       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1580     }
1581     Value *S1S2 = IRB.CreateAnd(S1, S2);
1582     Value *V1S2 = IRB.CreateAnd(V1, S2);
1583     Value *S1V2 = IRB.CreateAnd(S1, V2);
1584     setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
1585     setOriginForNaryOp(I);
1586   }
1587 
1588   /// \brief Default propagation of shadow and/or origin.
1589   ///
1590   /// This class implements the general case of shadow propagation, used in all
1591   /// cases where we don't know and/or don't care about what the operation
1592   /// actually does. It converts all input shadow values to a common type
1593   /// (extending or truncating as necessary), and bitwise OR's them.
1594   ///
1595   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
1596   /// fully initialized), and less prone to false positives.
1597   ///
1598   /// This class also implements the general case of origin propagation. For a
1599   /// Nary operation, result origin is set to the origin of an argument that is
1600   /// not entirely initialized. If there is more than one such arguments, the
1601   /// rightmost of them is picked. It does not matter which one is picked if all
1602   /// arguments are initialized.
1603   template <bool CombineShadow>
1604   class Combiner {
1605     Value *Shadow = nullptr;
1606     Value *Origin = nullptr;
1607     IRBuilder<> &IRB;
1608     MemorySanitizerVisitor *MSV;
1609 
1610   public:
1611     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
1612         : IRB(IRB), MSV(MSV) {}
1613 
1614     /// \brief Add a pair of shadow and origin values to the mix.
1615     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
1616       if (CombineShadow) {
1617         assert(OpShadow);
1618         if (!Shadow)
1619           Shadow = OpShadow;
1620         else {
1621           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
1622           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
1623         }
1624       }
1625 
1626       if (MSV->MS.TrackOrigins) {
1627         assert(OpOrigin);
1628         if (!Origin) {
1629           Origin = OpOrigin;
1630         } else {
1631           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
1632           // No point in adding something that might result in 0 origin value.
1633           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
1634             Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
1635             Value *Cond =
1636                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
1637             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
1638           }
1639         }
1640       }
1641       return *this;
1642     }
1643 
1644     /// \brief Add an application value to the mix.
1645     Combiner &Add(Value *V) {
1646       Value *OpShadow = MSV->getShadow(V);
1647       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
1648       return Add(OpShadow, OpOrigin);
1649     }
1650 
1651     /// \brief Set the current combined values as the given instruction's shadow
1652     /// and origin.
1653     void Done(Instruction *I) {
1654       if (CombineShadow) {
1655         assert(Shadow);
1656         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
1657         MSV->setShadow(I, Shadow);
1658       }
1659       if (MSV->MS.TrackOrigins) {
1660         assert(Origin);
1661         MSV->setOrigin(I, Origin);
1662       }
1663     }
1664   };
1665 
1666   using ShadowAndOriginCombiner = Combiner<true>;
1667   using OriginCombiner = Combiner<false>;
1668 
1669   /// \brief Propagate origin for arbitrary operation.
1670   void setOriginForNaryOp(Instruction &I) {
1671     if (!MS.TrackOrigins) return;
1672     IRBuilder<> IRB(&I);
1673     OriginCombiner OC(this, IRB);
1674     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
1675       OC.Add(OI->get());
1676     OC.Done(&I);
1677   }
1678 
1679   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
1680     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
1681            "Vector of pointers is not a valid shadow type");
1682     return Ty->isVectorTy() ?
1683       Ty->getVectorNumElements() * Ty->getScalarSizeInBits() :
1684       Ty->getPrimitiveSizeInBits();
1685   }
1686 
1687   /// \brief Cast between two shadow types, extending or truncating as
1688   /// necessary.
1689   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
1690                           bool Signed = false) {
1691     Type *srcTy = V->getType();
1692     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
1693     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
1694     if (srcSizeInBits > 1 && dstSizeInBits == 1)
1695       return IRB.CreateICmpNE(V, getCleanShadow(V));
1696 
1697     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
1698       return IRB.CreateIntCast(V, dstTy, Signed);
1699     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
1700         dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
1701       return IRB.CreateIntCast(V, dstTy, Signed);
1702     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
1703     Value *V2 =
1704       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
1705     return IRB.CreateBitCast(V2, dstTy);
1706     // TODO: handle struct types.
1707   }
1708 
1709   /// \brief Cast an application value to the type of its own shadow.
1710   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
1711     Type *ShadowTy = getShadowTy(V);
1712     if (V->getType() == ShadowTy)
1713       return V;
1714     if (V->getType()->isPtrOrPtrVectorTy())
1715       return IRB.CreatePtrToInt(V, ShadowTy);
1716     else
1717       return IRB.CreateBitCast(V, ShadowTy);
1718   }
1719 
1720   /// \brief Propagate shadow for arbitrary operation.
1721   void handleShadowOr(Instruction &I) {
1722     IRBuilder<> IRB(&I);
1723     ShadowAndOriginCombiner SC(this, IRB);
1724     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
1725       SC.Add(OI->get());
1726     SC.Done(&I);
1727   }
1728 
1729   // \brief Handle multiplication by constant.
1730   //
1731   // Handle a special case of multiplication by constant that may have one or
1732   // more zeros in the lower bits. This makes corresponding number of lower bits
1733   // of the result zero as well. We model it by shifting the other operand
1734   // shadow left by the required number of bits. Effectively, we transform
1735   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
1736   // We use multiplication by 2**N instead of shift to cover the case of
1737   // multiplication by 0, which may occur in some elements of a vector operand.
1738   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
1739                            Value *OtherArg) {
1740     Constant *ShadowMul;
1741     Type *Ty = ConstArg->getType();
1742     if (Ty->isVectorTy()) {
1743       unsigned NumElements = Ty->getVectorNumElements();
1744       Type *EltTy = Ty->getSequentialElementType();
1745       SmallVector<Constant *, 16> Elements;
1746       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
1747         if (ConstantInt *Elt =
1748                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
1749           const APInt &V = Elt->getValue();
1750           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
1751           Elements.push_back(ConstantInt::get(EltTy, V2));
1752         } else {
1753           Elements.push_back(ConstantInt::get(EltTy, 1));
1754         }
1755       }
1756       ShadowMul = ConstantVector::get(Elements);
1757     } else {
1758       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
1759         const APInt &V = Elt->getValue();
1760         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
1761         ShadowMul = ConstantInt::get(Ty, V2);
1762       } else {
1763         ShadowMul = ConstantInt::get(Ty, 1);
1764       }
1765     }
1766 
1767     IRBuilder<> IRB(&I);
1768     setShadow(&I,
1769               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
1770     setOrigin(&I, getOrigin(OtherArg));
1771   }
1772 
1773   void visitMul(BinaryOperator &I) {
1774     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
1775     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
1776     if (constOp0 && !constOp1)
1777       handleMulByConstant(I, constOp0, I.getOperand(1));
1778     else if (constOp1 && !constOp0)
1779       handleMulByConstant(I, constOp1, I.getOperand(0));
1780     else
1781       handleShadowOr(I);
1782   }
1783 
1784   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
1785   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
1786   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
1787   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
1788   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
1789   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
1790 
1791   void handleDiv(Instruction &I) {
1792     IRBuilder<> IRB(&I);
1793     // Strict on the second argument.
1794     insertShadowCheck(I.getOperand(1), &I);
1795     setShadow(&I, getShadow(&I, 0));
1796     setOrigin(&I, getOrigin(&I, 0));
1797   }
1798 
1799   void visitUDiv(BinaryOperator &I) { handleDiv(I); }
1800   void visitSDiv(BinaryOperator &I) { handleDiv(I); }
1801   void visitFDiv(BinaryOperator &I) { handleDiv(I); }
1802   void visitURem(BinaryOperator &I) { handleDiv(I); }
1803   void visitSRem(BinaryOperator &I) { handleDiv(I); }
1804   void visitFRem(BinaryOperator &I) { handleDiv(I); }
1805 
1806   /// \brief Instrument == and != comparisons.
1807   ///
1808   /// Sometimes the comparison result is known even if some of the bits of the
1809   /// arguments are not.
1810   void handleEqualityComparison(ICmpInst &I) {
1811     IRBuilder<> IRB(&I);
1812     Value *A = I.getOperand(0);
1813     Value *B = I.getOperand(1);
1814     Value *Sa = getShadow(A);
1815     Value *Sb = getShadow(B);
1816 
1817     // Get rid of pointers and vectors of pointers.
1818     // For ints (and vectors of ints), types of A and Sa match,
1819     // and this is a no-op.
1820     A = IRB.CreatePointerCast(A, Sa->getType());
1821     B = IRB.CreatePointerCast(B, Sb->getType());
1822 
1823     // A == B  <==>  (C = A^B) == 0
1824     // A != B  <==>  (C = A^B) != 0
1825     // Sc = Sa | Sb
1826     Value *C = IRB.CreateXor(A, B);
1827     Value *Sc = IRB.CreateOr(Sa, Sb);
1828     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
1829     // Result is defined if one of the following is true
1830     // * there is a defined 1 bit in C
1831     // * C is fully defined
1832     // Si = !(C & ~Sc) && Sc
1833     Value *Zero = Constant::getNullValue(Sc->getType());
1834     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
1835     Value *Si =
1836       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
1837                     IRB.CreateICmpEQ(
1838                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
1839     Si->setName("_msprop_icmp");
1840     setShadow(&I, Si);
1841     setOriginForNaryOp(I);
1842   }
1843 
1844   /// \brief Build the lowest possible value of V, taking into account V's
1845   ///        uninitialized bits.
1846   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
1847                                 bool isSigned) {
1848     if (isSigned) {
1849       // Split shadow into sign bit and other bits.
1850       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
1851       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
1852       // Maximise the undefined shadow bit, minimize other undefined bits.
1853       return
1854         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
1855     } else {
1856       // Minimize undefined bits.
1857       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
1858     }
1859   }
1860 
1861   /// \brief Build the highest possible value of V, taking into account V's
1862   ///        uninitialized bits.
1863   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
1864                                 bool isSigned) {
1865     if (isSigned) {
1866       // Split shadow into sign bit and other bits.
1867       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
1868       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
1869       // Minimise the undefined shadow bit, maximise other undefined bits.
1870       return
1871         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
1872     } else {
1873       // Maximize undefined bits.
1874       return IRB.CreateOr(A, Sa);
1875     }
1876   }
1877 
1878   /// \brief Instrument relational comparisons.
1879   ///
1880   /// This function does exact shadow propagation for all relational
1881   /// comparisons of integers, pointers and vectors of those.
1882   /// FIXME: output seems suboptimal when one of the operands is a constant
1883   void handleRelationalComparisonExact(ICmpInst &I) {
1884     IRBuilder<> IRB(&I);
1885     Value *A = I.getOperand(0);
1886     Value *B = I.getOperand(1);
1887     Value *Sa = getShadow(A);
1888     Value *Sb = getShadow(B);
1889 
1890     // Get rid of pointers and vectors of pointers.
1891     // For ints (and vectors of ints), types of A and Sa match,
1892     // and this is a no-op.
1893     A = IRB.CreatePointerCast(A, Sa->getType());
1894     B = IRB.CreatePointerCast(B, Sb->getType());
1895 
1896     // Let [a0, a1] be the interval of possible values of A, taking into account
1897     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
1898     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
1899     bool IsSigned = I.isSigned();
1900     Value *S1 = IRB.CreateICmp(I.getPredicate(),
1901                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
1902                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
1903     Value *S2 = IRB.CreateICmp(I.getPredicate(),
1904                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
1905                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
1906     Value *Si = IRB.CreateXor(S1, S2);
1907     setShadow(&I, Si);
1908     setOriginForNaryOp(I);
1909   }
1910 
1911   /// \brief Instrument signed relational comparisons.
1912   ///
1913   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
1914   /// bit of the shadow. Everything else is delegated to handleShadowOr().
1915   void handleSignedRelationalComparison(ICmpInst &I) {
1916     Constant *constOp;
1917     Value *op = nullptr;
1918     CmpInst::Predicate pre;
1919     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
1920       op = I.getOperand(0);
1921       pre = I.getPredicate();
1922     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
1923       op = I.getOperand(1);
1924       pre = I.getSwappedPredicate();
1925     } else {
1926       handleShadowOr(I);
1927       return;
1928     }
1929 
1930     if ((constOp->isNullValue() &&
1931          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
1932         (constOp->isAllOnesValue() &&
1933          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
1934       IRBuilder<> IRB(&I);
1935       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
1936                                         "_msprop_icmp_s");
1937       setShadow(&I, Shadow);
1938       setOrigin(&I, getOrigin(op));
1939     } else {
1940       handleShadowOr(I);
1941     }
1942   }
1943 
1944   void visitICmpInst(ICmpInst &I) {
1945     if (!ClHandleICmp) {
1946       handleShadowOr(I);
1947       return;
1948     }
1949     if (I.isEquality()) {
1950       handleEqualityComparison(I);
1951       return;
1952     }
1953 
1954     assert(I.isRelational());
1955     if (ClHandleICmpExact) {
1956       handleRelationalComparisonExact(I);
1957       return;
1958     }
1959     if (I.isSigned()) {
1960       handleSignedRelationalComparison(I);
1961       return;
1962     }
1963 
1964     assert(I.isUnsigned());
1965     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
1966       handleRelationalComparisonExact(I);
1967       return;
1968     }
1969 
1970     handleShadowOr(I);
1971   }
1972 
1973   void visitFCmpInst(FCmpInst &I) {
1974     handleShadowOr(I);
1975   }
1976 
1977   void handleShift(BinaryOperator &I) {
1978     IRBuilder<> IRB(&I);
1979     // If any of the S2 bits are poisoned, the whole thing is poisoned.
1980     // Otherwise perform the same shift on S1.
1981     Value *S1 = getShadow(&I, 0);
1982     Value *S2 = getShadow(&I, 1);
1983     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
1984                                    S2->getType());
1985     Value *V2 = I.getOperand(1);
1986     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
1987     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
1988     setOriginForNaryOp(I);
1989   }
1990 
1991   void visitShl(BinaryOperator &I) { handleShift(I); }
1992   void visitAShr(BinaryOperator &I) { handleShift(I); }
1993   void visitLShr(BinaryOperator &I) { handleShift(I); }
1994 
1995   /// \brief Instrument llvm.memmove
1996   ///
1997   /// At this point we don't know if llvm.memmove will be inlined or not.
1998   /// If we don't instrument it and it gets inlined,
1999   /// our interceptor will not kick in and we will lose the memmove.
2000   /// If we instrument the call here, but it does not get inlined,
2001   /// we will memove the shadow twice: which is bad in case
2002   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2003   ///
2004   /// Similar situation exists for memcpy and memset.
2005   void visitMemMoveInst(MemMoveInst &I) {
2006     IRBuilder<> IRB(&I);
2007     IRB.CreateCall(
2008         MS.MemmoveFn,
2009         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2010          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2011          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2012     I.eraseFromParent();
2013   }
2014 
2015   // Similar to memmove: avoid copying shadow twice.
2016   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2017   // FIXME: consider doing manual inline for small constant sizes and proper
2018   // alignment.
2019   void visitMemCpyInst(MemCpyInst &I) {
2020     IRBuilder<> IRB(&I);
2021     IRB.CreateCall(
2022         MS.MemcpyFn,
2023         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2024          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2025          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2026     I.eraseFromParent();
2027   }
2028 
2029   // Same as memcpy.
2030   void visitMemSetInst(MemSetInst &I) {
2031     IRBuilder<> IRB(&I);
2032     IRB.CreateCall(
2033         MS.MemsetFn,
2034         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2035          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2036          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2037     I.eraseFromParent();
2038   }
2039 
2040   void visitVAStartInst(VAStartInst &I) {
2041     VAHelper->visitVAStartInst(I);
2042   }
2043 
2044   void visitVACopyInst(VACopyInst &I) {
2045     VAHelper->visitVACopyInst(I);
2046   }
2047 
2048   /// \brief Handle vector store-like intrinsics.
2049   ///
2050   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2051   /// has 1 pointer argument and 1 vector argument, returns void.
2052   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2053     IRBuilder<> IRB(&I);
2054     Value* Addr = I.getArgOperand(0);
2055     Value *Shadow = getShadow(&I, 1);
2056     Value *ShadowPtr, *OriginPtr;
2057 
2058     // We don't know the pointer alignment (could be unaligned SSE store!).
2059     // Have to assume to worst case.
2060     std::tie(ShadowPtr, OriginPtr) =
2061         getShadowOriginPtr(Addr, IRB, Shadow->getType(), /*Alignment*/ 1);
2062     IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
2063 
2064     if (ClCheckAccessAddress)
2065       insertShadowCheck(Addr, &I);
2066 
2067     // FIXME: factor out common code from materializeStores
2068     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2069     return true;
2070   }
2071 
2072   /// \brief Handle vector load-like intrinsics.
2073   ///
2074   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2075   /// has 1 pointer argument, returns a vector.
2076   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2077     IRBuilder<> IRB(&I);
2078     Value *Addr = I.getArgOperand(0);
2079 
2080     Type *ShadowTy = getShadowTy(&I);
2081     Value *ShadowPtr, *OriginPtr;
2082     if (PropagateShadow) {
2083       // We don't know the pointer alignment (could be unaligned SSE load!).
2084       // Have to assume to worst case.
2085       unsigned Alignment = 1;
2086       std::tie(ShadowPtr, OriginPtr) =
2087           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment);
2088       setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
2089     } else {
2090       setShadow(&I, getCleanShadow(&I));
2091     }
2092 
2093     if (ClCheckAccessAddress)
2094       insertShadowCheck(Addr, &I);
2095 
2096     if (MS.TrackOrigins) {
2097       if (PropagateShadow)
2098         setOrigin(&I, IRB.CreateLoad(OriginPtr));
2099       else
2100         setOrigin(&I, getCleanOrigin());
2101     }
2102     return true;
2103   }
2104 
2105   /// \brief Handle (SIMD arithmetic)-like intrinsics.
2106   ///
2107   /// Instrument intrinsics with any number of arguments of the same type,
2108   /// equal to the return type. The type should be simple (no aggregates or
2109   /// pointers; vectors are fine).
2110   /// Caller guarantees that this intrinsic does not access memory.
2111   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2112     Type *RetTy = I.getType();
2113     if (!(RetTy->isIntOrIntVectorTy() ||
2114           RetTy->isFPOrFPVectorTy() ||
2115           RetTy->isX86_MMXTy()))
2116       return false;
2117 
2118     unsigned NumArgOperands = I.getNumArgOperands();
2119 
2120     for (unsigned i = 0; i < NumArgOperands; ++i) {
2121       Type *Ty = I.getArgOperand(i)->getType();
2122       if (Ty != RetTy)
2123         return false;
2124     }
2125 
2126     IRBuilder<> IRB(&I);
2127     ShadowAndOriginCombiner SC(this, IRB);
2128     for (unsigned i = 0; i < NumArgOperands; ++i)
2129       SC.Add(I.getArgOperand(i));
2130     SC.Done(&I);
2131 
2132     return true;
2133   }
2134 
2135   /// \brief Heuristically instrument unknown intrinsics.
2136   ///
2137   /// The main purpose of this code is to do something reasonable with all
2138   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2139   /// We recognize several classes of intrinsics by their argument types and
2140   /// ModRefBehaviour and apply special intrumentation when we are reasonably
2141   /// sure that we know what the intrinsic does.
2142   ///
2143   /// We special-case intrinsics where this approach fails. See llvm.bswap
2144   /// handling as an example of that.
2145   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2146     unsigned NumArgOperands = I.getNumArgOperands();
2147     if (NumArgOperands == 0)
2148       return false;
2149 
2150     if (NumArgOperands == 2 &&
2151         I.getArgOperand(0)->getType()->isPointerTy() &&
2152         I.getArgOperand(1)->getType()->isVectorTy() &&
2153         I.getType()->isVoidTy() &&
2154         !I.onlyReadsMemory()) {
2155       // This looks like a vector store.
2156       return handleVectorStoreIntrinsic(I);
2157     }
2158 
2159     if (NumArgOperands == 1 &&
2160         I.getArgOperand(0)->getType()->isPointerTy() &&
2161         I.getType()->isVectorTy() &&
2162         I.onlyReadsMemory()) {
2163       // This looks like a vector load.
2164       return handleVectorLoadIntrinsic(I);
2165     }
2166 
2167     if (I.doesNotAccessMemory())
2168       if (maybeHandleSimpleNomemIntrinsic(I))
2169         return true;
2170 
2171     // FIXME: detect and handle SSE maskstore/maskload
2172     return false;
2173   }
2174 
2175   void handleBswap(IntrinsicInst &I) {
2176     IRBuilder<> IRB(&I);
2177     Value *Op = I.getArgOperand(0);
2178     Type *OpType = Op->getType();
2179     Function *BswapFunc = Intrinsic::getDeclaration(
2180       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2181     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2182     setOrigin(&I, getOrigin(Op));
2183   }
2184 
2185   // \brief Instrument vector convert instrinsic.
2186   //
2187   // This function instruments intrinsics like cvtsi2ss:
2188   // %Out = int_xxx_cvtyyy(%ConvertOp)
2189   // or
2190   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2191   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2192   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2193   // elements from \p CopyOp.
2194   // In most cases conversion involves floating-point value which may trigger a
2195   // hardware exception when not fully initialized. For this reason we require
2196   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2197   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2198   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2199   // return a fully initialized value.
2200   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
2201     IRBuilder<> IRB(&I);
2202     Value *CopyOp, *ConvertOp;
2203 
2204     switch (I.getNumArgOperands()) {
2205     case 3:
2206       assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
2207       LLVM_FALLTHROUGH;
2208     case 2:
2209       CopyOp = I.getArgOperand(0);
2210       ConvertOp = I.getArgOperand(1);
2211       break;
2212     case 1:
2213       ConvertOp = I.getArgOperand(0);
2214       CopyOp = nullptr;
2215       break;
2216     default:
2217       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2218     }
2219 
2220     // The first *NumUsedElements* elements of ConvertOp are converted to the
2221     // same number of output elements. The rest of the output is copied from
2222     // CopyOp, or (if not available) filled with zeroes.
2223     // Combine shadow for elements of ConvertOp that are used in this operation,
2224     // and insert a check.
2225     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2226     // int->any conversion.
2227     Value *ConvertShadow = getShadow(ConvertOp);
2228     Value *AggShadow = nullptr;
2229     if (ConvertOp->getType()->isVectorTy()) {
2230       AggShadow = IRB.CreateExtractElement(
2231           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2232       for (int i = 1; i < NumUsedElements; ++i) {
2233         Value *MoreShadow = IRB.CreateExtractElement(
2234             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2235         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2236       }
2237     } else {
2238       AggShadow = ConvertShadow;
2239     }
2240     assert(AggShadow->getType()->isIntegerTy());
2241     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2242 
2243     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2244     // ConvertOp.
2245     if (CopyOp) {
2246       assert(CopyOp->getType() == I.getType());
2247       assert(CopyOp->getType()->isVectorTy());
2248       Value *ResultShadow = getShadow(CopyOp);
2249       Type *EltTy = ResultShadow->getType()->getVectorElementType();
2250       for (int i = 0; i < NumUsedElements; ++i) {
2251         ResultShadow = IRB.CreateInsertElement(
2252             ResultShadow, ConstantInt::getNullValue(EltTy),
2253             ConstantInt::get(IRB.getInt32Ty(), i));
2254       }
2255       setShadow(&I, ResultShadow);
2256       setOrigin(&I, getOrigin(CopyOp));
2257     } else {
2258       setShadow(&I, getCleanShadow(&I));
2259       setOrigin(&I, getCleanOrigin());
2260     }
2261   }
2262 
2263   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2264   // zeroes if it is zero, and all ones otherwise.
2265   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2266     if (S->getType()->isVectorTy())
2267       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2268     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2269     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2270     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2271   }
2272 
2273   // Given a vector, extract its first element, and return all
2274   // zeroes if it is zero, and all ones otherwise.
2275   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2276     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2277     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2278     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2279   }
2280 
2281   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2282     Type *T = S->getType();
2283     assert(T->isVectorTy());
2284     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2285     return IRB.CreateSExt(S2, T);
2286   }
2287 
2288   // \brief Instrument vector shift instrinsic.
2289   //
2290   // This function instruments intrinsics like int_x86_avx2_psll_w.
2291   // Intrinsic shifts %In by %ShiftSize bits.
2292   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2293   // size, and the rest is ignored. Behavior is defined even if shift size is
2294   // greater than register (or field) width.
2295   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2296     assert(I.getNumArgOperands() == 2);
2297     IRBuilder<> IRB(&I);
2298     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2299     // Otherwise perform the same shift on S1.
2300     Value *S1 = getShadow(&I, 0);
2301     Value *S2 = getShadow(&I, 1);
2302     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2303                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2304     Value *V1 = I.getOperand(0);
2305     Value *V2 = I.getOperand(1);
2306     Value *Shift = IRB.CreateCall(I.getCalledValue(),
2307                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2308     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2309     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2310     setOriginForNaryOp(I);
2311   }
2312 
2313   // \brief Get an X86_MMX-sized vector type.
2314   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2315     const unsigned X86_MMXSizeInBits = 64;
2316     return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2317                            X86_MMXSizeInBits / EltSizeInBits);
2318   }
2319 
2320   // \brief Returns a signed counterpart for an (un)signed-saturate-and-pack
2321   // intrinsic.
2322   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2323     switch (id) {
2324       case Intrinsic::x86_sse2_packsswb_128:
2325       case Intrinsic::x86_sse2_packuswb_128:
2326         return Intrinsic::x86_sse2_packsswb_128;
2327 
2328       case Intrinsic::x86_sse2_packssdw_128:
2329       case Intrinsic::x86_sse41_packusdw:
2330         return Intrinsic::x86_sse2_packssdw_128;
2331 
2332       case Intrinsic::x86_avx2_packsswb:
2333       case Intrinsic::x86_avx2_packuswb:
2334         return Intrinsic::x86_avx2_packsswb;
2335 
2336       case Intrinsic::x86_avx2_packssdw:
2337       case Intrinsic::x86_avx2_packusdw:
2338         return Intrinsic::x86_avx2_packssdw;
2339 
2340       case Intrinsic::x86_mmx_packsswb:
2341       case Intrinsic::x86_mmx_packuswb:
2342         return Intrinsic::x86_mmx_packsswb;
2343 
2344       case Intrinsic::x86_mmx_packssdw:
2345         return Intrinsic::x86_mmx_packssdw;
2346       default:
2347         llvm_unreachable("unexpected intrinsic id");
2348     }
2349   }
2350 
2351   // \brief Instrument vector pack instrinsic.
2352   //
2353   // This function instruments intrinsics like x86_mmx_packsswb, that
2354   // packs elements of 2 input vectors into half as many bits with saturation.
2355   // Shadow is propagated with the signed variant of the same intrinsic applied
2356   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2357   // EltSizeInBits is used only for x86mmx arguments.
2358   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2359     assert(I.getNumArgOperands() == 2);
2360     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2361     IRBuilder<> IRB(&I);
2362     Value *S1 = getShadow(&I, 0);
2363     Value *S2 = getShadow(&I, 1);
2364     assert(isX86_MMX || S1->getType()->isVectorTy());
2365 
2366     // SExt and ICmpNE below must apply to individual elements of input vectors.
2367     // In case of x86mmx arguments, cast them to appropriate vector types and
2368     // back.
2369     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2370     if (isX86_MMX) {
2371       S1 = IRB.CreateBitCast(S1, T);
2372       S2 = IRB.CreateBitCast(S2, T);
2373     }
2374     Value *S1_ext = IRB.CreateSExt(
2375         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2376     Value *S2_ext = IRB.CreateSExt(
2377         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2378     if (isX86_MMX) {
2379       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2380       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2381       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2382     }
2383 
2384     Function *ShadowFn = Intrinsic::getDeclaration(
2385         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2386 
2387     Value *S =
2388         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2389     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2390     setShadow(&I, S);
2391     setOriginForNaryOp(I);
2392   }
2393 
2394   // \brief Instrument sum-of-absolute-differencies intrinsic.
2395   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2396     const unsigned SignificantBitsPerResultElement = 16;
2397     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2398     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2399     unsigned ZeroBitsPerResultElement =
2400         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2401 
2402     IRBuilder<> IRB(&I);
2403     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2404     S = IRB.CreateBitCast(S, ResTy);
2405     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2406                        ResTy);
2407     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2408     S = IRB.CreateBitCast(S, getShadowTy(&I));
2409     setShadow(&I, S);
2410     setOriginForNaryOp(I);
2411   }
2412 
2413   // \brief Instrument multiply-add intrinsic.
2414   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2415                                   unsigned EltSizeInBits = 0) {
2416     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2417     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2418     IRBuilder<> IRB(&I);
2419     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2420     S = IRB.CreateBitCast(S, ResTy);
2421     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2422                        ResTy);
2423     S = IRB.CreateBitCast(S, getShadowTy(&I));
2424     setShadow(&I, S);
2425     setOriginForNaryOp(I);
2426   }
2427 
2428   // \brief Instrument compare-packed intrinsic.
2429   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2430   // all-ones shadow.
2431   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2432     IRBuilder<> IRB(&I);
2433     Type *ResTy = getShadowTy(&I);
2434     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2435     Value *S = IRB.CreateSExt(
2436         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2437     setShadow(&I, S);
2438     setOriginForNaryOp(I);
2439   }
2440 
2441   // \brief Instrument compare-scalar intrinsic.
2442   // This handles both cmp* intrinsics which return the result in the first
2443   // element of a vector, and comi* which return the result as i32.
2444   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2445     IRBuilder<> IRB(&I);
2446     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2447     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
2448     setShadow(&I, S);
2449     setOriginForNaryOp(I);
2450   }
2451 
2452   void handleStmxcsr(IntrinsicInst &I) {
2453     IRBuilder<> IRB(&I);
2454     Value* Addr = I.getArgOperand(0);
2455     Type *Ty = IRB.getInt32Ty();
2456     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, Ty, /*Alignment*/ 1).first;
2457 
2458     IRB.CreateStore(getCleanShadow(Ty),
2459                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
2460 
2461     if (ClCheckAccessAddress)
2462       insertShadowCheck(Addr, &I);
2463   }
2464 
2465   void handleLdmxcsr(IntrinsicInst &I) {
2466     if (!InsertChecks) return;
2467 
2468     IRBuilder<> IRB(&I);
2469     Value *Addr = I.getArgOperand(0);
2470     Type *Ty = IRB.getInt32Ty();
2471     unsigned Alignment = 1;
2472     Value *ShadowPtr, *OriginPtr;
2473     std::tie(ShadowPtr, OriginPtr) =
2474         getShadowOriginPtr(Addr, IRB, Ty, Alignment);
2475 
2476     if (ClCheckAccessAddress)
2477       insertShadowCheck(Addr, &I);
2478 
2479     Value *Shadow = IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_ldmxcsr");
2480     Value *Origin =
2481         MS.TrackOrigins ? IRB.CreateLoad(OriginPtr) : getCleanOrigin();
2482     insertShadowCheck(Shadow, Origin, &I);
2483   }
2484 
2485   void visitIntrinsicInst(IntrinsicInst &I) {
2486     switch (I.getIntrinsicID()) {
2487     case Intrinsic::bswap:
2488       handleBswap(I);
2489       break;
2490     case Intrinsic::x86_sse_stmxcsr:
2491       handleStmxcsr(I);
2492       break;
2493     case Intrinsic::x86_sse_ldmxcsr:
2494       handleLdmxcsr(I);
2495       break;
2496     case Intrinsic::x86_avx512_vcvtsd2usi64:
2497     case Intrinsic::x86_avx512_vcvtsd2usi32:
2498     case Intrinsic::x86_avx512_vcvtss2usi64:
2499     case Intrinsic::x86_avx512_vcvtss2usi32:
2500     case Intrinsic::x86_avx512_cvttss2usi64:
2501     case Intrinsic::x86_avx512_cvttss2usi:
2502     case Intrinsic::x86_avx512_cvttsd2usi64:
2503     case Intrinsic::x86_avx512_cvttsd2usi:
2504     case Intrinsic::x86_avx512_cvtusi2sd:
2505     case Intrinsic::x86_avx512_cvtusi2ss:
2506     case Intrinsic::x86_avx512_cvtusi642sd:
2507     case Intrinsic::x86_avx512_cvtusi642ss:
2508     case Intrinsic::x86_sse2_cvtsd2si64:
2509     case Intrinsic::x86_sse2_cvtsd2si:
2510     case Intrinsic::x86_sse2_cvtsd2ss:
2511     case Intrinsic::x86_sse2_cvtsi2sd:
2512     case Intrinsic::x86_sse2_cvtsi642sd:
2513     case Intrinsic::x86_sse2_cvtss2sd:
2514     case Intrinsic::x86_sse2_cvttsd2si64:
2515     case Intrinsic::x86_sse2_cvttsd2si:
2516     case Intrinsic::x86_sse_cvtsi2ss:
2517     case Intrinsic::x86_sse_cvtsi642ss:
2518     case Intrinsic::x86_sse_cvtss2si64:
2519     case Intrinsic::x86_sse_cvtss2si:
2520     case Intrinsic::x86_sse_cvttss2si64:
2521     case Intrinsic::x86_sse_cvttss2si:
2522       handleVectorConvertIntrinsic(I, 1);
2523       break;
2524     case Intrinsic::x86_sse_cvtps2pi:
2525     case Intrinsic::x86_sse_cvttps2pi:
2526       handleVectorConvertIntrinsic(I, 2);
2527       break;
2528 
2529     case Intrinsic::x86_avx512_psll_w_512:
2530     case Intrinsic::x86_avx512_psll_d_512:
2531     case Intrinsic::x86_avx512_psll_q_512:
2532     case Intrinsic::x86_avx512_pslli_w_512:
2533     case Intrinsic::x86_avx512_pslli_d_512:
2534     case Intrinsic::x86_avx512_pslli_q_512:
2535     case Intrinsic::x86_avx512_psrl_w_512:
2536     case Intrinsic::x86_avx512_psrl_d_512:
2537     case Intrinsic::x86_avx512_psrl_q_512:
2538     case Intrinsic::x86_avx512_psra_w_512:
2539     case Intrinsic::x86_avx512_psra_d_512:
2540     case Intrinsic::x86_avx512_psra_q_512:
2541     case Intrinsic::x86_avx512_psrli_w_512:
2542     case Intrinsic::x86_avx512_psrli_d_512:
2543     case Intrinsic::x86_avx512_psrli_q_512:
2544     case Intrinsic::x86_avx512_psrai_w_512:
2545     case Intrinsic::x86_avx512_psrai_d_512:
2546     case Intrinsic::x86_avx512_psrai_q_512:
2547     case Intrinsic::x86_avx512_psra_q_256:
2548     case Intrinsic::x86_avx512_psra_q_128:
2549     case Intrinsic::x86_avx512_psrai_q_256:
2550     case Intrinsic::x86_avx512_psrai_q_128:
2551     case Intrinsic::x86_avx2_psll_w:
2552     case Intrinsic::x86_avx2_psll_d:
2553     case Intrinsic::x86_avx2_psll_q:
2554     case Intrinsic::x86_avx2_pslli_w:
2555     case Intrinsic::x86_avx2_pslli_d:
2556     case Intrinsic::x86_avx2_pslli_q:
2557     case Intrinsic::x86_avx2_psrl_w:
2558     case Intrinsic::x86_avx2_psrl_d:
2559     case Intrinsic::x86_avx2_psrl_q:
2560     case Intrinsic::x86_avx2_psra_w:
2561     case Intrinsic::x86_avx2_psra_d:
2562     case Intrinsic::x86_avx2_psrli_w:
2563     case Intrinsic::x86_avx2_psrli_d:
2564     case Intrinsic::x86_avx2_psrli_q:
2565     case Intrinsic::x86_avx2_psrai_w:
2566     case Intrinsic::x86_avx2_psrai_d:
2567     case Intrinsic::x86_sse2_psll_w:
2568     case Intrinsic::x86_sse2_psll_d:
2569     case Intrinsic::x86_sse2_psll_q:
2570     case Intrinsic::x86_sse2_pslli_w:
2571     case Intrinsic::x86_sse2_pslli_d:
2572     case Intrinsic::x86_sse2_pslli_q:
2573     case Intrinsic::x86_sse2_psrl_w:
2574     case Intrinsic::x86_sse2_psrl_d:
2575     case Intrinsic::x86_sse2_psrl_q:
2576     case Intrinsic::x86_sse2_psra_w:
2577     case Intrinsic::x86_sse2_psra_d:
2578     case Intrinsic::x86_sse2_psrli_w:
2579     case Intrinsic::x86_sse2_psrli_d:
2580     case Intrinsic::x86_sse2_psrli_q:
2581     case Intrinsic::x86_sse2_psrai_w:
2582     case Intrinsic::x86_sse2_psrai_d:
2583     case Intrinsic::x86_mmx_psll_w:
2584     case Intrinsic::x86_mmx_psll_d:
2585     case Intrinsic::x86_mmx_psll_q:
2586     case Intrinsic::x86_mmx_pslli_w:
2587     case Intrinsic::x86_mmx_pslli_d:
2588     case Intrinsic::x86_mmx_pslli_q:
2589     case Intrinsic::x86_mmx_psrl_w:
2590     case Intrinsic::x86_mmx_psrl_d:
2591     case Intrinsic::x86_mmx_psrl_q:
2592     case Intrinsic::x86_mmx_psra_w:
2593     case Intrinsic::x86_mmx_psra_d:
2594     case Intrinsic::x86_mmx_psrli_w:
2595     case Intrinsic::x86_mmx_psrli_d:
2596     case Intrinsic::x86_mmx_psrli_q:
2597     case Intrinsic::x86_mmx_psrai_w:
2598     case Intrinsic::x86_mmx_psrai_d:
2599       handleVectorShiftIntrinsic(I, /* Variable */ false);
2600       break;
2601     case Intrinsic::x86_avx2_psllv_d:
2602     case Intrinsic::x86_avx2_psllv_d_256:
2603     case Intrinsic::x86_avx512_psllv_d_512:
2604     case Intrinsic::x86_avx2_psllv_q:
2605     case Intrinsic::x86_avx2_psllv_q_256:
2606     case Intrinsic::x86_avx512_psllv_q_512:
2607     case Intrinsic::x86_avx2_psrlv_d:
2608     case Intrinsic::x86_avx2_psrlv_d_256:
2609     case Intrinsic::x86_avx512_psrlv_d_512:
2610     case Intrinsic::x86_avx2_psrlv_q:
2611     case Intrinsic::x86_avx2_psrlv_q_256:
2612     case Intrinsic::x86_avx512_psrlv_q_512:
2613     case Intrinsic::x86_avx2_psrav_d:
2614     case Intrinsic::x86_avx2_psrav_d_256:
2615     case Intrinsic::x86_avx512_psrav_d_512:
2616     case Intrinsic::x86_avx512_psrav_q_128:
2617     case Intrinsic::x86_avx512_psrav_q_256:
2618     case Intrinsic::x86_avx512_psrav_q_512:
2619       handleVectorShiftIntrinsic(I, /* Variable */ true);
2620       break;
2621 
2622     case Intrinsic::x86_sse2_packsswb_128:
2623     case Intrinsic::x86_sse2_packssdw_128:
2624     case Intrinsic::x86_sse2_packuswb_128:
2625     case Intrinsic::x86_sse41_packusdw:
2626     case Intrinsic::x86_avx2_packsswb:
2627     case Intrinsic::x86_avx2_packssdw:
2628     case Intrinsic::x86_avx2_packuswb:
2629     case Intrinsic::x86_avx2_packusdw:
2630       handleVectorPackIntrinsic(I);
2631       break;
2632 
2633     case Intrinsic::x86_mmx_packsswb:
2634     case Intrinsic::x86_mmx_packuswb:
2635       handleVectorPackIntrinsic(I, 16);
2636       break;
2637 
2638     case Intrinsic::x86_mmx_packssdw:
2639       handleVectorPackIntrinsic(I, 32);
2640       break;
2641 
2642     case Intrinsic::x86_mmx_psad_bw:
2643     case Intrinsic::x86_sse2_psad_bw:
2644     case Intrinsic::x86_avx2_psad_bw:
2645       handleVectorSadIntrinsic(I);
2646       break;
2647 
2648     case Intrinsic::x86_sse2_pmadd_wd:
2649     case Intrinsic::x86_avx2_pmadd_wd:
2650     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2651     case Intrinsic::x86_avx2_pmadd_ub_sw:
2652       handleVectorPmaddIntrinsic(I);
2653       break;
2654 
2655     case Intrinsic::x86_ssse3_pmadd_ub_sw:
2656       handleVectorPmaddIntrinsic(I, 8);
2657       break;
2658 
2659     case Intrinsic::x86_mmx_pmadd_wd:
2660       handleVectorPmaddIntrinsic(I, 16);
2661       break;
2662 
2663     case Intrinsic::x86_sse_cmp_ss:
2664     case Intrinsic::x86_sse2_cmp_sd:
2665     case Intrinsic::x86_sse_comieq_ss:
2666     case Intrinsic::x86_sse_comilt_ss:
2667     case Intrinsic::x86_sse_comile_ss:
2668     case Intrinsic::x86_sse_comigt_ss:
2669     case Intrinsic::x86_sse_comige_ss:
2670     case Intrinsic::x86_sse_comineq_ss:
2671     case Intrinsic::x86_sse_ucomieq_ss:
2672     case Intrinsic::x86_sse_ucomilt_ss:
2673     case Intrinsic::x86_sse_ucomile_ss:
2674     case Intrinsic::x86_sse_ucomigt_ss:
2675     case Intrinsic::x86_sse_ucomige_ss:
2676     case Intrinsic::x86_sse_ucomineq_ss:
2677     case Intrinsic::x86_sse2_comieq_sd:
2678     case Intrinsic::x86_sse2_comilt_sd:
2679     case Intrinsic::x86_sse2_comile_sd:
2680     case Intrinsic::x86_sse2_comigt_sd:
2681     case Intrinsic::x86_sse2_comige_sd:
2682     case Intrinsic::x86_sse2_comineq_sd:
2683     case Intrinsic::x86_sse2_ucomieq_sd:
2684     case Intrinsic::x86_sse2_ucomilt_sd:
2685     case Intrinsic::x86_sse2_ucomile_sd:
2686     case Intrinsic::x86_sse2_ucomigt_sd:
2687     case Intrinsic::x86_sse2_ucomige_sd:
2688     case Intrinsic::x86_sse2_ucomineq_sd:
2689       handleVectorCompareScalarIntrinsic(I);
2690       break;
2691 
2692     case Intrinsic::x86_sse_cmp_ps:
2693     case Intrinsic::x86_sse2_cmp_pd:
2694       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
2695       // generates reasonably looking IR that fails in the backend with "Do not
2696       // know how to split the result of this operator!".
2697       handleVectorComparePackedIntrinsic(I);
2698       break;
2699 
2700     default:
2701       if (!handleUnknownIntrinsic(I))
2702         visitInstruction(I);
2703       break;
2704     }
2705   }
2706 
2707   void visitCallSite(CallSite CS) {
2708     Instruction &I = *CS.getInstruction();
2709     assert(!I.getMetadata("nosanitize"));
2710     assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
2711     if (CS.isCall()) {
2712       CallInst *Call = cast<CallInst>(&I);
2713 
2714       // For inline asm, do the usual thing: check argument shadow and mark all
2715       // outputs as clean. Note that any side effects of the inline asm that are
2716       // not immediately visible in its constraints are not handled.
2717       if (Call->isInlineAsm()) {
2718         visitInstruction(I);
2719         return;
2720       }
2721 
2722       assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
2723 
2724       // We are going to insert code that relies on the fact that the callee
2725       // will become a non-readonly function after it is instrumented by us. To
2726       // prevent this code from being optimized out, mark that function
2727       // non-readonly in advance.
2728       if (Function *Func = Call->getCalledFunction()) {
2729         // Clear out readonly/readnone attributes.
2730         AttrBuilder B;
2731         B.addAttribute(Attribute::ReadOnly)
2732           .addAttribute(Attribute::ReadNone);
2733         Func->removeAttributes(AttributeList::FunctionIndex, B);
2734       }
2735 
2736       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
2737     }
2738     IRBuilder<> IRB(&I);
2739 
2740     unsigned ArgOffset = 0;
2741     DEBUG(dbgs() << "  CallSite: " << I << "\n");
2742     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
2743          ArgIt != End; ++ArgIt) {
2744       Value *A = *ArgIt;
2745       unsigned i = ArgIt - CS.arg_begin();
2746       if (!A->getType()->isSized()) {
2747         DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
2748         continue;
2749       }
2750       unsigned Size = 0;
2751       Value *Store = nullptr;
2752       // Compute the Shadow for arg even if it is ByVal, because
2753       // in that case getShadow() will copy the actual arg shadow to
2754       // __msan_param_tls.
2755       Value *ArgShadow = getShadow(A);
2756       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
2757       DEBUG(dbgs() << "  Arg#" << i << ": " << *A <<
2758             " Shadow: " << *ArgShadow << "\n");
2759       bool ArgIsInitialized = false;
2760       const DataLayout &DL = F.getParent()->getDataLayout();
2761       if (CS.paramHasAttr(i, Attribute::ByVal)) {
2762         assert(A->getType()->isPointerTy() &&
2763                "ByVal argument is not a pointer!");
2764         Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
2765         if (ArgOffset + Size > kParamTLSSize) break;
2766         unsigned ParamAlignment = CS.getParamAlignment(i);
2767         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
2768         Value *AShadowPtr =
2769             getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment).first;
2770 
2771         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
2772                                  Alignment, Size);
2773       } else {
2774         Size = DL.getTypeAllocSize(A->getType());
2775         if (ArgOffset + Size > kParamTLSSize) break;
2776         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
2777                                        kShadowTLSAlignment);
2778         Constant *Cst = dyn_cast<Constant>(ArgShadow);
2779         if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
2780       }
2781       if (MS.TrackOrigins && !ArgIsInitialized)
2782         IRB.CreateStore(getOrigin(A),
2783                         getOriginPtrForArgument(A, IRB, ArgOffset));
2784       (void)Store;
2785       assert(Size != 0 && Store != nullptr);
2786       DEBUG(dbgs() << "  Param:" << *Store << "\n");
2787       ArgOffset += alignTo(Size, 8);
2788     }
2789     DEBUG(dbgs() << "  done with call args\n");
2790 
2791     FunctionType *FT =
2792       cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
2793     if (FT->isVarArg()) {
2794       VAHelper->visitCallSite(CS, IRB);
2795     }
2796 
2797     // Now, get the shadow for the RetVal.
2798     if (!I.getType()->isSized()) return;
2799     // Don't emit the epilogue for musttail call returns.
2800     if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return;
2801     IRBuilder<> IRBBefore(&I);
2802     // Until we have full dynamic coverage, make sure the retval shadow is 0.
2803     Value *Base = getShadowPtrForRetval(&I, IRBBefore);
2804     IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
2805     BasicBlock::iterator NextInsn;
2806     if (CS.isCall()) {
2807       NextInsn = ++I.getIterator();
2808       assert(NextInsn != I.getParent()->end());
2809     } else {
2810       BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
2811       if (!NormalDest->getSinglePredecessor()) {
2812         // FIXME: this case is tricky, so we are just conservative here.
2813         // Perhaps we need to split the edge between this BB and NormalDest,
2814         // but a naive attempt to use SplitEdge leads to a crash.
2815         setShadow(&I, getCleanShadow(&I));
2816         setOrigin(&I, getCleanOrigin());
2817         return;
2818       }
2819       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
2820       // Anything inserted there will be instrumented by MSan later!
2821       NextInsn = NormalDest->getFirstInsertionPt();
2822       assert(NextInsn != NormalDest->end() &&
2823              "Could not find insertion point for retval shadow load");
2824     }
2825     IRBuilder<> IRBAfter(&*NextInsn);
2826     Value *RetvalShadow =
2827       IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
2828                                  kShadowTLSAlignment, "_msret");
2829     setShadow(&I, RetvalShadow);
2830     if (MS.TrackOrigins)
2831       setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
2832   }
2833 
2834   bool isAMustTailRetVal(Value *RetVal) {
2835     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2836       RetVal = I->getOperand(0);
2837     }
2838     if (auto *I = dyn_cast<CallInst>(RetVal)) {
2839       return I->isMustTailCall();
2840     }
2841     return false;
2842   }
2843 
2844   void visitReturnInst(ReturnInst &I) {
2845     IRBuilder<> IRB(&I);
2846     Value *RetVal = I.getReturnValue();
2847     if (!RetVal) return;
2848     // Don't emit the epilogue for musttail call returns.
2849     if (isAMustTailRetVal(RetVal)) return;
2850     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
2851     if (CheckReturnValue) {
2852       insertShadowCheck(RetVal, &I);
2853       Value *Shadow = getCleanShadow(RetVal);
2854       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
2855     } else {
2856       Value *Shadow = getShadow(RetVal);
2857       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
2858       if (MS.TrackOrigins)
2859         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
2860     }
2861   }
2862 
2863   void visitPHINode(PHINode &I) {
2864     IRBuilder<> IRB(&I);
2865     if (!PropagateShadow) {
2866       setShadow(&I, getCleanShadow(&I));
2867       setOrigin(&I, getCleanOrigin());
2868       return;
2869     }
2870 
2871     ShadowPHINodes.push_back(&I);
2872     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
2873                                 "_msphi_s"));
2874     if (MS.TrackOrigins)
2875       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
2876                                   "_msphi_o"));
2877   }
2878 
2879   void visitAllocaInst(AllocaInst &I) {
2880     setShadow(&I, getCleanShadow(&I));
2881     setOrigin(&I, getCleanOrigin());
2882     IRBuilder<> IRB(I.getNextNode());
2883     const DataLayout &DL = F.getParent()->getDataLayout();
2884     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
2885     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
2886     if (I.isArrayAllocation())
2887       Len = IRB.CreateMul(Len, I.getArraySize());
2888     if (PoisonStack && ClPoisonStackWithCall) {
2889       IRB.CreateCall(MS.MsanPoisonStackFn,
2890                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
2891     } else {
2892       Value *ShadowBase =
2893           getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), I.getAlignment()).first;
2894 
2895       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
2896       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
2897     }
2898 
2899     if (PoisonStack && MS.TrackOrigins) {
2900       SmallString<2048> StackDescriptionStorage;
2901       raw_svector_ostream StackDescription(StackDescriptionStorage);
2902       // We create a string with a description of the stack allocation and
2903       // pass it into __msan_set_alloca_origin.
2904       // It will be printed by the run-time if stack-originated UMR is found.
2905       // The first 4 bytes of the string are set to '----' and will be replaced
2906       // by __msan_va_arg_overflow_size_tls at the first call.
2907       StackDescription << "----" << I.getName() << "@" << F.getName();
2908       Value *Descr =
2909           createPrivateNonConstGlobalForString(*F.getParent(),
2910                                                StackDescription.str());
2911 
2912       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
2913                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
2914                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
2915                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
2916     }
2917   }
2918 
2919   void visitSelectInst(SelectInst& I) {
2920     IRBuilder<> IRB(&I);
2921     // a = select b, c, d
2922     Value *B = I.getCondition();
2923     Value *C = I.getTrueValue();
2924     Value *D = I.getFalseValue();
2925     Value *Sb = getShadow(B);
2926     Value *Sc = getShadow(C);
2927     Value *Sd = getShadow(D);
2928 
2929     // Result shadow if condition shadow is 0.
2930     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
2931     Value *Sa1;
2932     if (I.getType()->isAggregateType()) {
2933       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
2934       // an extra "select". This results in much more compact IR.
2935       // Sa = select Sb, poisoned, (select b, Sc, Sd)
2936       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
2937     } else {
2938       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
2939       // If Sb (condition is poisoned), look for bits in c and d that are equal
2940       // and both unpoisoned.
2941       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
2942 
2943       // Cast arguments to shadow-compatible type.
2944       C = CreateAppToShadowCast(IRB, C);
2945       D = CreateAppToShadowCast(IRB, D);
2946 
2947       // Result shadow if condition shadow is 1.
2948       Sa1 = IRB.CreateOr(IRB.CreateXor(C, D), IRB.CreateOr(Sc, Sd));
2949     }
2950     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
2951     setShadow(&I, Sa);
2952     if (MS.TrackOrigins) {
2953       // Origins are always i32, so any vector conditions must be flattened.
2954       // FIXME: consider tracking vector origins for app vectors?
2955       if (B->getType()->isVectorTy()) {
2956         Type *FlatTy = getShadowTyNoVec(B->getType());
2957         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
2958                                 ConstantInt::getNullValue(FlatTy));
2959         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
2960                                       ConstantInt::getNullValue(FlatTy));
2961       }
2962       // a = select b, c, d
2963       // Oa = Sb ? Ob : (b ? Oc : Od)
2964       setOrigin(
2965           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
2966                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
2967                                                 getOrigin(I.getFalseValue()))));
2968     }
2969   }
2970 
2971   void visitLandingPadInst(LandingPadInst &I) {
2972     // Do nothing.
2973     // See https://github.com/google/sanitizers/issues/504
2974     setShadow(&I, getCleanShadow(&I));
2975     setOrigin(&I, getCleanOrigin());
2976   }
2977 
2978   void visitCatchSwitchInst(CatchSwitchInst &I) {
2979     setShadow(&I, getCleanShadow(&I));
2980     setOrigin(&I, getCleanOrigin());
2981   }
2982 
2983   void visitFuncletPadInst(FuncletPadInst &I) {
2984     setShadow(&I, getCleanShadow(&I));
2985     setOrigin(&I, getCleanOrigin());
2986   }
2987 
2988   void visitGetElementPtrInst(GetElementPtrInst &I) {
2989     handleShadowOr(I);
2990   }
2991 
2992   void visitExtractValueInst(ExtractValueInst &I) {
2993     IRBuilder<> IRB(&I);
2994     Value *Agg = I.getAggregateOperand();
2995     DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
2996     Value *AggShadow = getShadow(Agg);
2997     DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
2998     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2999     DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
3000     setShadow(&I, ResShadow);
3001     setOriginForNaryOp(I);
3002   }
3003 
3004   void visitInsertValueInst(InsertValueInst &I) {
3005     IRBuilder<> IRB(&I);
3006     DEBUG(dbgs() << "InsertValue:  " << I << "\n");
3007     Value *AggShadow = getShadow(I.getAggregateOperand());
3008     Value *InsShadow = getShadow(I.getInsertedValueOperand());
3009     DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3010     DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
3011     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
3012     DEBUG(dbgs() << "   Res:        " << *Res << "\n");
3013     setShadow(&I, Res);
3014     setOriginForNaryOp(I);
3015   }
3016 
3017   void dumpInst(Instruction &I) {
3018     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3019       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
3020     } else {
3021       errs() << "ZZZ " << I.getOpcodeName() << "\n";
3022     }
3023     errs() << "QQQ " << I << "\n";
3024   }
3025 
3026   void visitResumeInst(ResumeInst &I) {
3027     DEBUG(dbgs() << "Resume: " << I << "\n");
3028     // Nothing to do here.
3029   }
3030 
3031   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
3032     DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
3033     // Nothing to do here.
3034   }
3035 
3036   void visitCatchReturnInst(CatchReturnInst &CRI) {
3037     DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
3038     // Nothing to do here.
3039   }
3040 
3041   void visitInstruction(Instruction &I) {
3042     // Everything else: stop propagating and check for poisoned shadow.
3043     if (ClDumpStrictInstructions)
3044       dumpInst(I);
3045     DEBUG(dbgs() << "DEFAULT: " << I << "\n");
3046     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
3047       Value *Operand = I.getOperand(i);
3048       if (Operand->getType()->isSized())
3049         insertShadowCheck(Operand, &I);
3050     }
3051     setShadow(&I, getCleanShadow(&I));
3052     setOrigin(&I, getCleanOrigin());
3053   }
3054 };
3055 
3056 /// \brief AMD64-specific implementation of VarArgHelper.
3057 struct VarArgAMD64Helper : public VarArgHelper {
3058   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
3059   // See a comment in visitCallSite for more details.
3060   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
3061   static const unsigned AMD64FpEndOffset = 176;
3062 
3063   Function &F;
3064   MemorySanitizer &MS;
3065   MemorySanitizerVisitor &MSV;
3066   Value *VAArgTLSCopy = nullptr;
3067   Value *VAArgOverflowSize = nullptr;
3068 
3069   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3070 
3071   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
3072 
3073   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
3074                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3075 
3076   ArgKind classifyArgument(Value* arg) {
3077     // A very rough approximation of X86_64 argument classification rules.
3078     Type *T = arg->getType();
3079     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
3080       return AK_FloatingPoint;
3081     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
3082       return AK_GeneralPurpose;
3083     if (T->isPointerTy())
3084       return AK_GeneralPurpose;
3085     return AK_Memory;
3086   }
3087 
3088   // For VarArg functions, store the argument shadow in an ABI-specific format
3089   // that corresponds to va_list layout.
3090   // We do this because Clang lowers va_arg in the frontend, and this pass
3091   // only sees the low level code that deals with va_list internals.
3092   // A much easier alternative (provided that Clang emits va_arg instructions)
3093   // would have been to associate each live instance of va_list with a copy of
3094   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
3095   // order.
3096   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3097     unsigned GpOffset = 0;
3098     unsigned FpOffset = AMD64GpEndOffset;
3099     unsigned OverflowOffset = AMD64FpEndOffset;
3100     const DataLayout &DL = F.getParent()->getDataLayout();
3101     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3102          ArgIt != End; ++ArgIt) {
3103       Value *A = *ArgIt;
3104       unsigned ArgNo = CS.getArgumentNo(ArgIt);
3105       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
3106       bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
3107       if (IsByVal) {
3108         // ByVal arguments always go to the overflow area.
3109         // Fixed arguments passed through the overflow area will be stepped
3110         // over by va_start, so don't count them towards the offset.
3111         if (IsFixed)
3112           continue;
3113         assert(A->getType()->isPointerTy());
3114         Type *RealTy = A->getType()->getPointerElementType();
3115         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
3116         Value *ShadowBase =
3117             getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
3118         OverflowOffset += alignTo(ArgSize, 8);
3119         Value *ShadowPtr, *OriginPtr;
3120         std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
3121             A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment);
3122 
3123         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
3124                          kShadowTLSAlignment, ArgSize);
3125       } else {
3126         ArgKind AK = classifyArgument(A);
3127         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
3128           AK = AK_Memory;
3129         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
3130           AK = AK_Memory;
3131         Value *ShadowBase;
3132         switch (AK) {
3133           case AK_GeneralPurpose:
3134             ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);
3135             GpOffset += 8;
3136             break;
3137           case AK_FloatingPoint:
3138             ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);
3139             FpOffset += 16;
3140             break;
3141           case AK_Memory:
3142             if (IsFixed)
3143               continue;
3144             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3145             ShadowBase =
3146                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
3147             OverflowOffset += alignTo(ArgSize, 8);
3148         }
3149         // Take fixed arguments into account for GpOffset and FpOffset,
3150         // but don't actually store shadows for them.
3151         if (IsFixed)
3152           continue;
3153         IRB.CreateAlignedStore(MSV.getShadow(A), ShadowBase,
3154                                kShadowTLSAlignment);
3155       }
3156     }
3157     Constant *OverflowSize =
3158       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
3159     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
3160   }
3161 
3162   /// \brief Compute the shadow address for a given va_arg.
3163   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3164                                    int ArgOffset) {
3165     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3166     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3167     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3168                               "_msarg");
3169   }
3170 
3171   void unpoisonVAListTagForInst(IntrinsicInst &I) {
3172     IRBuilder<> IRB(&I);
3173     Value *VAListTag = I.getArgOperand(0);
3174     Value *ShadowPtr, *OriginPtr;
3175     unsigned Alignment = 8;
3176     std::tie(ShadowPtr, OriginPtr) =
3177         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3178 
3179     // Unpoison the whole __va_list_tag.
3180     // FIXME: magic ABI constants.
3181     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3182                      /* size */ 24, Alignment, false);
3183     // We shouldn't need to zero out the origins, as they're only checked for
3184     // nonzero shadow.
3185   }
3186 
3187   void visitVAStartInst(VAStartInst &I) override {
3188     if (F.getCallingConv() == CallingConv::Win64)
3189       return;
3190     VAStartInstrumentationList.push_back(&I);
3191     unpoisonVAListTagForInst(I);
3192   }
3193 
3194   void visitVACopyInst(VACopyInst &I) override {
3195     if (F.getCallingConv() == CallingConv::Win64) return;
3196     unpoisonVAListTagForInst(I);
3197   }
3198 
3199   void finalizeInstrumentation() override {
3200     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
3201            "finalizeInstrumentation called twice");
3202     if (!VAStartInstrumentationList.empty()) {
3203       // If there is a va_start in this function, make a backup copy of
3204       // va_arg_tls somewhere in the function entry block.
3205       IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3206       VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3207       Value *CopySize =
3208         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
3209                       VAArgOverflowSize);
3210       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3211       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3212     }
3213 
3214     // Instrument va_start.
3215     // Copy va_list shadow from the backup copy of the TLS contents.
3216     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3217       CallInst *OrigInst = VAStartInstrumentationList[i];
3218       IRBuilder<> IRB(OrigInst->getNextNode());
3219       Value *VAListTag = OrigInst->getArgOperand(0);
3220 
3221       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
3222           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3223                         ConstantInt::get(MS.IntptrTy, 16)),
3224           Type::getInt64PtrTy(*MS.C));
3225       Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
3226       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
3227       unsigned Alignment = 16;
3228       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
3229           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3230                                  Alignment);
3231       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
3232                        AMD64FpEndOffset);
3233       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
3234           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3235                         ConstantInt::get(MS.IntptrTy, 8)),
3236           Type::getInt64PtrTy(*MS.C));
3237       Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
3238       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
3239       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
3240           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
3241                                  Alignment);
3242       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
3243                                              AMD64FpEndOffset);
3244       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
3245                        VAArgOverflowSize);
3246     }
3247   }
3248 };
3249 
3250 /// \brief MIPS64-specific implementation of VarArgHelper.
3251 struct VarArgMIPS64Helper : public VarArgHelper {
3252   Function &F;
3253   MemorySanitizer &MS;
3254   MemorySanitizerVisitor &MSV;
3255   Value *VAArgTLSCopy = nullptr;
3256   Value *VAArgSize = nullptr;
3257 
3258   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3259 
3260   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
3261                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3262 
3263   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3264     unsigned VAArgOffset = 0;
3265     const DataLayout &DL = F.getParent()->getDataLayout();
3266     for (CallSite::arg_iterator ArgIt = CS.arg_begin() +
3267          CS.getFunctionType()->getNumParams(), End = CS.arg_end();
3268          ArgIt != End; ++ArgIt) {
3269       Triple TargetTriple(F.getParent()->getTargetTriple());
3270       Value *A = *ArgIt;
3271       Value *Base;
3272       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3273       if (TargetTriple.getArch() == Triple::mips64) {
3274         // Adjusting the shadow for argument with size < 8 to match the placement
3275         // of bits in big endian system
3276         if (ArgSize < 8)
3277           VAArgOffset += (8 - ArgSize);
3278       }
3279       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset);
3280       VAArgOffset += ArgSize;
3281       VAArgOffset = alignTo(VAArgOffset, 8);
3282       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
3283     }
3284 
3285     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
3286     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
3287     // a new class member i.e. it is the total size of all VarArgs.
3288     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
3289   }
3290 
3291   /// \brief Compute the shadow address for a given va_arg.
3292   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3293                                    int ArgOffset) {
3294     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3295     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3296     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3297                               "_msarg");
3298   }
3299 
3300   void visitVAStartInst(VAStartInst &I) override {
3301     IRBuilder<> IRB(&I);
3302     VAStartInstrumentationList.push_back(&I);
3303     Value *VAListTag = I.getArgOperand(0);
3304     Value *ShadowPtr, *OriginPtr;
3305     unsigned Alignment = 8;
3306     std::tie(ShadowPtr, OriginPtr) =
3307         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3308     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3309                      /* size */ 8, Alignment, false);
3310   }
3311 
3312   void visitVACopyInst(VACopyInst &I) override {
3313     IRBuilder<> IRB(&I);
3314     VAStartInstrumentationList.push_back(&I);
3315     Value *VAListTag = I.getArgOperand(0);
3316     Value *ShadowPtr, *OriginPtr;
3317     unsigned Alignment = 8;
3318     std::tie(ShadowPtr, OriginPtr) =
3319         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3320     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3321                      /* size */ 8, Alignment, false);
3322   }
3323 
3324   void finalizeInstrumentation() override {
3325     assert(!VAArgSize && !VAArgTLSCopy &&
3326            "finalizeInstrumentation called twice");
3327     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3328     VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3329     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
3330                                     VAArgSize);
3331 
3332     if (!VAStartInstrumentationList.empty()) {
3333       // If there is a va_start in this function, make a backup copy of
3334       // va_arg_tls somewhere in the function entry block.
3335       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3336       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3337     }
3338 
3339     // Instrument va_start.
3340     // Copy va_list shadow from the backup copy of the TLS contents.
3341     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3342       CallInst *OrigInst = VAStartInstrumentationList[i];
3343       IRBuilder<> IRB(OrigInst->getNextNode());
3344       Value *VAListTag = OrigInst->getArgOperand(0);
3345       Value *RegSaveAreaPtrPtr =
3346         IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3347                         Type::getInt64PtrTy(*MS.C));
3348       Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
3349       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
3350       unsigned Alignment = 8;
3351       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
3352           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3353                                  Alignment);
3354       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
3355                        CopySize);
3356     }
3357   }
3358 };
3359 
3360 /// \brief AArch64-specific implementation of VarArgHelper.
3361 struct VarArgAArch64Helper : public VarArgHelper {
3362   static const unsigned kAArch64GrArgSize = 64;
3363   static const unsigned kAArch64VrArgSize = 128;
3364 
3365   static const unsigned AArch64GrBegOffset = 0;
3366   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
3367   // Make VR space aligned to 16 bytes.
3368   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
3369   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
3370                                              + kAArch64VrArgSize;
3371   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
3372 
3373   Function &F;
3374   MemorySanitizer &MS;
3375   MemorySanitizerVisitor &MSV;
3376   Value *VAArgTLSCopy = nullptr;
3377   Value *VAArgOverflowSize = nullptr;
3378 
3379   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3380 
3381   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
3382 
3383   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
3384                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3385 
3386   ArgKind classifyArgument(Value* arg) {
3387     Type *T = arg->getType();
3388     if (T->isFPOrFPVectorTy())
3389       return AK_FloatingPoint;
3390     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
3391         || (T->isPointerTy()))
3392       return AK_GeneralPurpose;
3393     return AK_Memory;
3394   }
3395 
3396   // The instrumentation stores the argument shadow in a non ABI-specific
3397   // format because it does not know which argument is named (since Clang,
3398   // like x86_64 case, lowers the va_args in the frontend and this pass only
3399   // sees the low level code that deals with va_list internals).
3400   // The first seven GR registers are saved in the first 56 bytes of the
3401   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
3402   // the remaining arguments.
3403   // Using constant offset within the va_arg TLS array allows fast copy
3404   // in the finalize instrumentation.
3405   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3406     unsigned GrOffset = AArch64GrBegOffset;
3407     unsigned VrOffset = AArch64VrBegOffset;
3408     unsigned OverflowOffset = AArch64VAEndOffset;
3409 
3410     const DataLayout &DL = F.getParent()->getDataLayout();
3411     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3412          ArgIt != End; ++ArgIt) {
3413       Value *A = *ArgIt;
3414       unsigned ArgNo = CS.getArgumentNo(ArgIt);
3415       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
3416       ArgKind AK = classifyArgument(A);
3417       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
3418         AK = AK_Memory;
3419       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
3420         AK = AK_Memory;
3421       Value *Base;
3422       switch (AK) {
3423         case AK_GeneralPurpose:
3424           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
3425           GrOffset += 8;
3426           break;
3427         case AK_FloatingPoint:
3428           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
3429           VrOffset += 16;
3430           break;
3431         case AK_Memory:
3432           // Don't count fixed arguments in the overflow area - va_start will
3433           // skip right over them.
3434           if (IsFixed)
3435             continue;
3436           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3437           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
3438           OverflowOffset += alignTo(ArgSize, 8);
3439           break;
3440       }
3441       // Count Gp/Vr fixed arguments to their respective offsets, but don't
3442       // bother to actually store a shadow.
3443       if (IsFixed)
3444         continue;
3445       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
3446     }
3447     Constant *OverflowSize =
3448       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
3449     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
3450   }
3451 
3452   /// Compute the shadow address for a given va_arg.
3453   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3454                                    int ArgOffset) {
3455     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3456     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3457     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3458                               "_msarg");
3459   }
3460 
3461   void visitVAStartInst(VAStartInst &I) override {
3462     IRBuilder<> IRB(&I);
3463     VAStartInstrumentationList.push_back(&I);
3464     Value *VAListTag = I.getArgOperand(0);
3465     Value *ShadowPtr, *OriginPtr;
3466     unsigned Alignment = 8;
3467     std::tie(ShadowPtr, OriginPtr) =
3468         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3469     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3470                      /* size */ 32, Alignment, false);
3471   }
3472 
3473   void visitVACopyInst(VACopyInst &I) override {
3474     IRBuilder<> IRB(&I);
3475     VAStartInstrumentationList.push_back(&I);
3476     Value *VAListTag = I.getArgOperand(0);
3477     Value *ShadowPtr, *OriginPtr;
3478     unsigned Alignment = 8;
3479     std::tie(ShadowPtr, OriginPtr) =
3480         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3481     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3482                      /* size */ 32, Alignment, false);
3483   }
3484 
3485   // Retrieve a va_list field of 'void*' size.
3486   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
3487     Value *SaveAreaPtrPtr =
3488       IRB.CreateIntToPtr(
3489         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3490                       ConstantInt::get(MS.IntptrTy, offset)),
3491         Type::getInt64PtrTy(*MS.C));
3492     return IRB.CreateLoad(SaveAreaPtrPtr);
3493   }
3494 
3495   // Retrieve a va_list field of 'int' size.
3496   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
3497     Value *SaveAreaPtr =
3498       IRB.CreateIntToPtr(
3499         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3500                       ConstantInt::get(MS.IntptrTy, offset)),
3501         Type::getInt32PtrTy(*MS.C));
3502     Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
3503     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
3504   }
3505 
3506   void finalizeInstrumentation() override {
3507     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
3508            "finalizeInstrumentation called twice");
3509     if (!VAStartInstrumentationList.empty()) {
3510       // If there is a va_start in this function, make a backup copy of
3511       // va_arg_tls somewhere in the function entry block.
3512       IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3513       VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3514       Value *CopySize =
3515         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
3516                       VAArgOverflowSize);
3517       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3518       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3519     }
3520 
3521     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
3522     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
3523 
3524     // Instrument va_start, copy va_list shadow from the backup copy of
3525     // the TLS contents.
3526     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3527       CallInst *OrigInst = VAStartInstrumentationList[i];
3528       IRBuilder<> IRB(OrigInst->getNextNode());
3529 
3530       Value *VAListTag = OrigInst->getArgOperand(0);
3531 
3532       // The variadic ABI for AArch64 creates two areas to save the incoming
3533       // argument registers (one for 64-bit general register xn-x7 and another
3534       // for 128-bit FP/SIMD vn-v7).
3535       // We need then to propagate the shadow arguments on both regions
3536       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
3537       // The remaning arguments are saved on shadow for 'va::stack'.
3538       // One caveat is it requires only to propagate the non-named arguments,
3539       // however on the call site instrumentation 'all' the arguments are
3540       // saved. So to copy the shadow values from the va_arg TLS array
3541       // we need to adjust the offset for both GR and VR fields based on
3542       // the __{gr,vr}_offs value (since they are stores based on incoming
3543       // named arguments).
3544 
3545       // Read the stack pointer from the va_list.
3546       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
3547 
3548       // Read both the __gr_top and __gr_off and add them up.
3549       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
3550       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
3551 
3552       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
3553 
3554       // Read both the __vr_top and __vr_off and add them up.
3555       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
3556       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
3557 
3558       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
3559 
3560       // It does not know how many named arguments is being used and, on the
3561       // callsite all the arguments were saved.  Since __gr_off is defined as
3562       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
3563       // argument by ignoring the bytes of shadow from named arguments.
3564       Value *GrRegSaveAreaShadowPtrOff =
3565         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
3566 
3567       Value *GrRegSaveAreaShadowPtr =
3568           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3569                                  /*Alignment*/ 8)
3570               .first;
3571 
3572       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
3573                                               GrRegSaveAreaShadowPtrOff);
3574       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
3575 
3576       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, 8, GrSrcPtr, 8, GrCopySize);
3577 
3578       // Again, but for FP/SIMD values.
3579       Value *VrRegSaveAreaShadowPtrOff =
3580           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
3581 
3582       Value *VrRegSaveAreaShadowPtr =
3583           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3584                                  /*Alignment*/ 8)
3585               .first;
3586 
3587       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
3588         IRB.getInt8Ty(),
3589         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
3590                               IRB.getInt32(AArch64VrBegOffset)),
3591         VrRegSaveAreaShadowPtrOff);
3592       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
3593 
3594       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, 8, VrSrcPtr, 8, VrCopySize);
3595 
3596       // And finally for remaining arguments.
3597       Value *StackSaveAreaShadowPtr =
3598           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
3599                                  /*Alignment*/ 16)
3600               .first;
3601 
3602       Value *StackSrcPtr =
3603         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
3604                               IRB.getInt32(AArch64VAEndOffset));
3605 
3606       IRB.CreateMemCpy(StackSaveAreaShadowPtr, 16, StackSrcPtr, 16,
3607                        VAArgOverflowSize);
3608     }
3609   }
3610 };
3611 
3612 /// \brief PowerPC64-specific implementation of VarArgHelper.
3613 struct VarArgPowerPC64Helper : public VarArgHelper {
3614   Function &F;
3615   MemorySanitizer &MS;
3616   MemorySanitizerVisitor &MSV;
3617   Value *VAArgTLSCopy = nullptr;
3618   Value *VAArgSize = nullptr;
3619 
3620   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3621 
3622   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
3623                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3624 
3625   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3626     // For PowerPC, we need to deal with alignment of stack arguments -
3627     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
3628     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
3629     // and QPX vectors are aligned to 32 bytes.  For that reason, we
3630     // compute current offset from stack pointer (which is always properly
3631     // aligned), and offset for the first vararg, then subtract them.
3632     unsigned VAArgBase;
3633     Triple TargetTriple(F.getParent()->getTargetTriple());
3634     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
3635     // and 32 bytes for ABIv2.  This is usually determined by target
3636     // endianness, but in theory could be overriden by function attribute.
3637     // For simplicity, we ignore it here (it'd only matter for QPX vectors).
3638     if (TargetTriple.getArch() == Triple::ppc64)
3639       VAArgBase = 48;
3640     else
3641       VAArgBase = 32;
3642     unsigned VAArgOffset = VAArgBase;
3643     const DataLayout &DL = F.getParent()->getDataLayout();
3644     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3645          ArgIt != End; ++ArgIt) {
3646       Value *A = *ArgIt;
3647       unsigned ArgNo = CS.getArgumentNo(ArgIt);
3648       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
3649       bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
3650       if (IsByVal) {
3651         assert(A->getType()->isPointerTy());
3652         Type *RealTy = A->getType()->getPointerElementType();
3653         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
3654         uint64_t ArgAlign = CS.getParamAlignment(ArgNo);
3655         if (ArgAlign < 8)
3656           ArgAlign = 8;
3657         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
3658         if (!IsFixed) {
3659           Value *Base = getShadowPtrForVAArgument(RealTy, IRB,
3660                                                   VAArgOffset - VAArgBase);
3661           Value *AShadowPtr, *AOriginPtr;
3662           std::tie(AShadowPtr, AOriginPtr) = MSV.getShadowOriginPtr(
3663               A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment);
3664 
3665           IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
3666                            kShadowTLSAlignment, ArgSize);
3667         }
3668         VAArgOffset += alignTo(ArgSize, 8);
3669       } else {
3670         Value *Base;
3671         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3672         uint64_t ArgAlign = 8;
3673         if (A->getType()->isArrayTy()) {
3674           // Arrays are aligned to element size, except for long double
3675           // arrays, which are aligned to 8 bytes.
3676           Type *ElementTy = A->getType()->getArrayElementType();
3677           if (!ElementTy->isPPC_FP128Ty())
3678             ArgAlign = DL.getTypeAllocSize(ElementTy);
3679         } else if (A->getType()->isVectorTy()) {
3680           // Vectors are naturally aligned.
3681           ArgAlign = DL.getTypeAllocSize(A->getType());
3682         }
3683         if (ArgAlign < 8)
3684           ArgAlign = 8;
3685         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
3686         if (DL.isBigEndian()) {
3687           // Adjusting the shadow for argument with size < 8 to match the placement
3688           // of bits in big endian system
3689           if (ArgSize < 8)
3690             VAArgOffset += (8 - ArgSize);
3691         }
3692         if (!IsFixed) {
3693           Base = getShadowPtrForVAArgument(A->getType(), IRB,
3694                                            VAArgOffset - VAArgBase);
3695           IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
3696         }
3697         VAArgOffset += ArgSize;
3698         VAArgOffset = alignTo(VAArgOffset, 8);
3699       }
3700       if (IsFixed)
3701         VAArgBase = VAArgOffset;
3702     }
3703 
3704     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
3705                                                 VAArgOffset - VAArgBase);
3706     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
3707     // a new class member i.e. it is the total size of all VarArgs.
3708     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
3709   }
3710 
3711   /// \brief Compute the shadow address for a given va_arg.
3712   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3713                                    int ArgOffset) {
3714     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3715     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3716     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3717                               "_msarg");
3718   }
3719 
3720   void visitVAStartInst(VAStartInst &I) override {
3721     IRBuilder<> IRB(&I);
3722     VAStartInstrumentationList.push_back(&I);
3723     Value *VAListTag = I.getArgOperand(0);
3724     Value *ShadowPtr, *OriginPtr;
3725     unsigned Alignment = 8;
3726     std::tie(ShadowPtr, OriginPtr) =
3727         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3728     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3729                      /* size */ 8, Alignment, false);
3730   }
3731 
3732   void visitVACopyInst(VACopyInst &I) override {
3733     IRBuilder<> IRB(&I);
3734     Value *VAListTag = I.getArgOperand(0);
3735     Value *ShadowPtr, *OriginPtr;
3736     unsigned Alignment = 8;
3737     std::tie(ShadowPtr, OriginPtr) =
3738         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3739     // Unpoison the whole __va_list_tag.
3740     // FIXME: magic ABI constants.
3741     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3742                      /* size */ 8, Alignment, false);
3743   }
3744 
3745   void finalizeInstrumentation() override {
3746     assert(!VAArgSize && !VAArgTLSCopy &&
3747            "finalizeInstrumentation called twice");
3748     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3749     VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3750     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
3751                                     VAArgSize);
3752 
3753     if (!VAStartInstrumentationList.empty()) {
3754       // If there is a va_start in this function, make a backup copy of
3755       // va_arg_tls somewhere in the function entry block.
3756       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3757       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3758     }
3759 
3760     // Instrument va_start.
3761     // Copy va_list shadow from the backup copy of the TLS contents.
3762     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3763       CallInst *OrigInst = VAStartInstrumentationList[i];
3764       IRBuilder<> IRB(OrigInst->getNextNode());
3765       Value *VAListTag = OrigInst->getArgOperand(0);
3766       Value *RegSaveAreaPtrPtr =
3767         IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3768                         Type::getInt64PtrTy(*MS.C));
3769       Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
3770       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
3771       unsigned Alignment = 8;
3772       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
3773           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3774                                  Alignment);
3775       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
3776                        CopySize);
3777     }
3778   }
3779 };
3780 
3781 /// \brief A no-op implementation of VarArgHelper.
3782 struct VarArgNoOpHelper : public VarArgHelper {
3783   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
3784                    MemorySanitizerVisitor &MSV) {}
3785 
3786   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {}
3787 
3788   void visitVAStartInst(VAStartInst &I) override {}
3789 
3790   void visitVACopyInst(VACopyInst &I) override {}
3791 
3792   void finalizeInstrumentation() override {}
3793 };
3794 
3795 } // end anonymous namespace
3796 
3797 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
3798                                         MemorySanitizerVisitor &Visitor) {
3799   // VarArg handling is only implemented on AMD64. False positives are possible
3800   // on other platforms.
3801   Triple TargetTriple(Func.getParent()->getTargetTriple());
3802   if (TargetTriple.getArch() == Triple::x86_64)
3803     return new VarArgAMD64Helper(Func, Msan, Visitor);
3804   else if (TargetTriple.getArch() == Triple::mips64 ||
3805            TargetTriple.getArch() == Triple::mips64el)
3806     return new VarArgMIPS64Helper(Func, Msan, Visitor);
3807   else if (TargetTriple.getArch() == Triple::aarch64)
3808     return new VarArgAArch64Helper(Func, Msan, Visitor);
3809   else if (TargetTriple.getArch() == Triple::ppc64 ||
3810            TargetTriple.getArch() == Triple::ppc64le)
3811     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
3812   else
3813     return new VarArgNoOpHelper(Func, Msan, Visitor);
3814 }
3815 
3816 bool MemorySanitizer::runOnFunction(Function &F) {
3817   if (&F == MsanCtorFunction)
3818     return false;
3819   MemorySanitizerVisitor Visitor(F, *this);
3820 
3821   // Clear out readonly/readnone attributes.
3822   AttrBuilder B;
3823   B.addAttribute(Attribute::ReadOnly)
3824     .addAttribute(Attribute::ReadNone);
3825   F.removeAttributes(AttributeList::FunctionIndex, B);
3826 
3827   return Visitor.runOnFunction();
3828 }
3829