1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file is a part of MemorySanitizer, a detector of uninitialized
12 /// reads.
13 ///
14 /// The algorithm of the tool is similar to Memcheck
15 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
16 /// byte of the application memory, poison the shadow of the malloc-ed
17 /// or alloca-ed memory, load the shadow bits on every memory read,
18 /// propagate the shadow bits through some of the arithmetic
19 /// instruction (including MOV), store the shadow bits on every memory
20 /// write, report a bug on some other instructions (e.g. JMP) if the
21 /// associated shadow is poisoned.
22 ///
23 /// But there are differences too. The first and the major one:
24 /// compiler instrumentation instead of binary instrumentation. This
25 /// gives us much better register allocation, possible compiler
26 /// optimizations and a fast start-up. But this brings the major issue
27 /// as well: msan needs to see all program events, including system
28 /// calls and reads/writes in system libraries, so we either need to
29 /// compile *everything* with msan or use a binary translation
30 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
31 /// Another difference from Memcheck is that we use 8 shadow bits per
32 /// byte of application memory and use a direct shadow mapping. This
33 /// greatly simplifies the instrumentation code and avoids races on
34 /// shadow updates (Memcheck is single-threaded so races are not a
35 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
36 /// path storage that uses 8 bits per byte).
37 ///
38 /// The default value of shadow is 0, which means "clean" (not poisoned).
39 ///
40 /// Every module initializer should call __msan_init to ensure that the
41 /// shadow memory is ready. On error, __msan_warning is called. Since
42 /// parameters and return values may be passed via registers, we have a
43 /// specialized thread-local shadow for return values
44 /// (__msan_retval_tls) and parameters (__msan_param_tls).
45 ///
46 ///                           Origin tracking.
47 ///
48 /// MemorySanitizer can track origins (allocation points) of all uninitialized
49 /// values. This behavior is controlled with a flag (msan-track-origins) and is
50 /// disabled by default.
51 ///
52 /// Origins are 4-byte values created and interpreted by the runtime library.
53 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
54 /// of application memory. Propagation of origins is basically a bunch of
55 /// "select" instructions that pick the origin of a dirty argument, if an
56 /// instruction has one.
57 ///
58 /// Every 4 aligned, consecutive bytes of application memory have one origin
59 /// value associated with them. If these bytes contain uninitialized data
60 /// coming from 2 different allocations, the last store wins. Because of this,
61 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
62 /// practice.
63 ///
64 /// Origins are meaningless for fully initialized values, so MemorySanitizer
65 /// avoids storing origin to memory when a fully initialized value is stored.
66 /// This way it avoids needless overwritting origin of the 4-byte region on
67 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
68 ///
69 ///                            Atomic handling.
70 ///
71 /// Ideally, every atomic store of application value should update the
72 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
73 /// of two disjoint locations can not be done without severe slowdown.
74 ///
75 /// Therefore, we implement an approximation that may err on the safe side.
76 /// In this implementation, every atomically accessed location in the program
77 /// may only change from (partially) uninitialized to fully initialized, but
78 /// not the other way around. We load the shadow _after_ the application load,
79 /// and we store the shadow _before_ the app store. Also, we always store clean
80 /// shadow (if the application store is atomic). This way, if the store-load
81 /// pair constitutes a happens-before arc, shadow store and load are correctly
82 /// ordered such that the load will get either the value that was stored, or
83 /// some later value (which is always clean).
84 ///
85 /// This does not work very well with Compare-And-Swap (CAS) and
86 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
87 /// must store the new shadow before the app operation, and load the shadow
88 /// after the app operation. Computers don't work this way. Current
89 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
90 /// value. It implements the store part as a simple atomic store by storing a
91 /// clean shadow.
92 //
93 //===----------------------------------------------------------------------===//
94 
95 #include "llvm/ADT/APInt.h"
96 #include "llvm/ADT/ArrayRef.h"
97 #include "llvm/ADT/DepthFirstIterator.h"
98 #include "llvm/ADT/SmallString.h"
99 #include "llvm/ADT/SmallVector.h"
100 #include "llvm/ADT/StringExtras.h"
101 #include "llvm/ADT/StringRef.h"
102 #include "llvm/ADT/Triple.h"
103 #include "llvm/Analysis/TargetLibraryInfo.h"
104 #include "llvm/Analysis/Utils/Local.h"
105 #include "llvm/IR/Argument.h"
106 #include "llvm/IR/Attributes.h"
107 #include "llvm/IR/BasicBlock.h"
108 #include "llvm/IR/CallSite.h"
109 #include "llvm/IR/CallingConv.h"
110 #include "llvm/IR/Constant.h"
111 #include "llvm/IR/Constants.h"
112 #include "llvm/IR/DataLayout.h"
113 #include "llvm/IR/DerivedTypes.h"
114 #include "llvm/IR/Function.h"
115 #include "llvm/IR/GlobalValue.h"
116 #include "llvm/IR/GlobalVariable.h"
117 #include "llvm/IR/IRBuilder.h"
118 #include "llvm/IR/InlineAsm.h"
119 #include "llvm/IR/InstVisitor.h"
120 #include "llvm/IR/InstrTypes.h"
121 #include "llvm/IR/Instruction.h"
122 #include "llvm/IR/Instructions.h"
123 #include "llvm/IR/IntrinsicInst.h"
124 #include "llvm/IR/Intrinsics.h"
125 #include "llvm/IR/LLVMContext.h"
126 #include "llvm/IR/MDBuilder.h"
127 #include "llvm/IR/Module.h"
128 #include "llvm/IR/Type.h"
129 #include "llvm/IR/Value.h"
130 #include "llvm/IR/ValueMap.h"
131 #include "llvm/Pass.h"
132 #include "llvm/Support/AtomicOrdering.h"
133 #include "llvm/Support/Casting.h"
134 #include "llvm/Support/CommandLine.h"
135 #include "llvm/Support/Compiler.h"
136 #include "llvm/Support/Debug.h"
137 #include "llvm/Support/ErrorHandling.h"
138 #include "llvm/Support/MathExtras.h"
139 #include "llvm/Support/raw_ostream.h"
140 #include "llvm/Transforms/Instrumentation.h"
141 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
142 #include "llvm/Transforms/Utils/ModuleUtils.h"
143 #include <algorithm>
144 #include <cassert>
145 #include <cstddef>
146 #include <cstdint>
147 #include <memory>
148 #include <string>
149 #include <tuple>
150 
151 using namespace llvm;
152 
153 #define DEBUG_TYPE "msan"
154 
155 static const unsigned kOriginSize = 4;
156 static const unsigned kMinOriginAlignment = 4;
157 static const unsigned kShadowTLSAlignment = 8;
158 
159 // These constants must be kept in sync with the ones in msan.h.
160 static const unsigned kParamTLSSize = 800;
161 static const unsigned kRetvalTLSSize = 800;
162 
163 // Accesses sizes are powers of two: 1, 2, 4, 8.
164 static const size_t kNumberOfAccessSizes = 4;
165 
166 /// \brief Track origins of uninitialized values.
167 ///
168 /// Adds a section to MemorySanitizer report that points to the allocation
169 /// (stack or heap) the uninitialized bits came from originally.
170 static cl::opt<int> ClTrackOrigins("msan-track-origins",
171        cl::desc("Track origins (allocation sites) of poisoned memory"),
172        cl::Hidden, cl::init(0));
173 
174 static cl::opt<bool> ClKeepGoing("msan-keep-going",
175        cl::desc("keep going after reporting a UMR"),
176        cl::Hidden, cl::init(false));
177 
178 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
179        cl::desc("poison uninitialized stack variables"),
180        cl::Hidden, cl::init(true));
181 
182 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
183        cl::desc("poison uninitialized stack variables with a call"),
184        cl::Hidden, cl::init(false));
185 
186 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
187        cl::desc("poison uninitialized stack variables with the given pattern"),
188        cl::Hidden, cl::init(0xff));
189 
190 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
191        cl::desc("poison undef temps"),
192        cl::Hidden, cl::init(true));
193 
194 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
195        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
196        cl::Hidden, cl::init(true));
197 
198 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
199        cl::desc("exact handling of relational integer ICmp"),
200        cl::Hidden, cl::init(false));
201 
202 // This flag controls whether we check the shadow of the address
203 // operand of load or store. Such bugs are very rare, since load from
204 // a garbage address typically results in SEGV, but still happen
205 // (e.g. only lower bits of address are garbage, or the access happens
206 // early at program startup where malloc-ed memory is more likely to
207 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
208 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
209        cl::desc("report accesses through a pointer which has poisoned shadow"),
210        cl::Hidden, cl::init(true));
211 
212 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
213        cl::desc("print out instructions with default strict semantics"),
214        cl::Hidden, cl::init(false));
215 
216 static cl::opt<int> ClInstrumentationWithCallThreshold(
217     "msan-instrumentation-with-call-threshold",
218     cl::desc(
219         "If the function being instrumented requires more than "
220         "this number of checks and origin stores, use callbacks instead of "
221         "inline checks (-1 means never use callbacks)."),
222     cl::Hidden, cl::init(3500));
223 
224 // This is an experiment to enable handling of cases where shadow is a non-zero
225 // compile-time constant. For some unexplainable reason they were silently
226 // ignored in the instrumentation.
227 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
228        cl::desc("Insert checks for constant shadow values"),
229        cl::Hidden, cl::init(false));
230 
231 // This is off by default because of a bug in gold:
232 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
233 static cl::opt<bool> ClWithComdat("msan-with-comdat",
234        cl::desc("Place MSan constructors in comdat sections"),
235        cl::Hidden, cl::init(false));
236 
237 static const char *const kMsanModuleCtorName = "msan.module_ctor";
238 static const char *const kMsanInitName = "__msan_init";
239 
240 namespace {
241 
242 // Memory map parameters used in application-to-shadow address calculation.
243 // Offset = (Addr & ~AndMask) ^ XorMask
244 // Shadow = ShadowBase + Offset
245 // Origin = OriginBase + Offset
246 struct MemoryMapParams {
247   uint64_t AndMask;
248   uint64_t XorMask;
249   uint64_t ShadowBase;
250   uint64_t OriginBase;
251 };
252 
253 struct PlatformMemoryMapParams {
254   const MemoryMapParams *bits32;
255   const MemoryMapParams *bits64;
256 };
257 
258 } // end anonymous namespace
259 
260 // i386 Linux
261 static const MemoryMapParams Linux_I386_MemoryMapParams = {
262   0x000080000000,  // AndMask
263   0,               // XorMask (not used)
264   0,               // ShadowBase (not used)
265   0x000040000000,  // OriginBase
266 };
267 
268 // x86_64 Linux
269 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
270 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
271   0x400000000000,  // AndMask
272   0,               // XorMask (not used)
273   0,               // ShadowBase (not used)
274   0x200000000000,  // OriginBase
275 #else
276   0,               // AndMask (not used)
277   0x500000000000,  // XorMask
278   0,               // ShadowBase (not used)
279   0x100000000000,  // OriginBase
280 #endif
281 };
282 
283 // mips64 Linux
284 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
285   0,               // AndMask (not used)
286   0x008000000000,  // XorMask
287   0,               // ShadowBase (not used)
288   0x002000000000,  // OriginBase
289 };
290 
291 // ppc64 Linux
292 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
293   0xE00000000000,  // AndMask
294   0x100000000000,  // XorMask
295   0x080000000000,  // ShadowBase
296   0x1C0000000000,  // OriginBase
297 };
298 
299 // aarch64 Linux
300 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
301   0,               // AndMask (not used)
302   0x06000000000,   // XorMask
303   0,               // ShadowBase (not used)
304   0x01000000000,   // OriginBase
305 };
306 
307 // i386 FreeBSD
308 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
309   0x000180000000,  // AndMask
310   0x000040000000,  // XorMask
311   0x000020000000,  // ShadowBase
312   0x000700000000,  // OriginBase
313 };
314 
315 // x86_64 FreeBSD
316 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
317   0xc00000000000,  // AndMask
318   0x200000000000,  // XorMask
319   0x100000000000,  // ShadowBase
320   0x380000000000,  // OriginBase
321 };
322 
323 // x86_64 NetBSD
324 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
325   0,               // AndMask
326   0x500000000000,  // XorMask
327   0,               // ShadowBase
328   0x100000000000,  // OriginBase
329 };
330 
331 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
332   &Linux_I386_MemoryMapParams,
333   &Linux_X86_64_MemoryMapParams,
334 };
335 
336 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
337   nullptr,
338   &Linux_MIPS64_MemoryMapParams,
339 };
340 
341 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
342   nullptr,
343   &Linux_PowerPC64_MemoryMapParams,
344 };
345 
346 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
347   nullptr,
348   &Linux_AArch64_MemoryMapParams,
349 };
350 
351 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
352   &FreeBSD_I386_MemoryMapParams,
353   &FreeBSD_X86_64_MemoryMapParams,
354 };
355 
356 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
357   nullptr,
358   &NetBSD_X86_64_MemoryMapParams,
359 };
360 
361 namespace {
362 
363 /// \brief An instrumentation pass implementing detection of uninitialized
364 /// reads.
365 ///
366 /// MemorySanitizer: instrument the code in module to find
367 /// uninitialized reads.
368 class MemorySanitizer : public FunctionPass {
369 public:
370   // Pass identification, replacement for typeid.
371   static char ID;
372 
373   MemorySanitizer(int TrackOrigins = 0, bool Recover = false)
374       : FunctionPass(ID),
375         TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
376         Recover(Recover || ClKeepGoing) {}
377 
378   StringRef getPassName() const override { return "MemorySanitizer"; }
379 
380   void getAnalysisUsage(AnalysisUsage &AU) const override {
381     AU.addRequired<TargetLibraryInfoWrapperPass>();
382   }
383 
384   bool runOnFunction(Function &F) override;
385   bool doInitialization(Module &M) override;
386 
387 private:
388   friend struct MemorySanitizerVisitor;
389   friend struct VarArgAMD64Helper;
390   friend struct VarArgMIPS64Helper;
391   friend struct VarArgAArch64Helper;
392   friend struct VarArgPowerPC64Helper;
393 
394   void initializeCallbacks(Module &M);
395 
396   /// \brief Track origins (allocation points) of uninitialized values.
397   int TrackOrigins;
398   bool Recover;
399 
400   LLVMContext *C;
401   Type *IntptrTy;
402   Type *OriginTy;
403 
404   /// \brief Thread-local shadow storage for function parameters.
405   GlobalVariable *ParamTLS;
406 
407   /// \brief Thread-local origin storage for function parameters.
408   GlobalVariable *ParamOriginTLS;
409 
410   /// \brief Thread-local shadow storage for function return value.
411   GlobalVariable *RetvalTLS;
412 
413   /// \brief Thread-local origin storage for function return value.
414   GlobalVariable *RetvalOriginTLS;
415 
416   /// \brief Thread-local shadow storage for in-register va_arg function
417   /// parameters (x86_64-specific).
418   GlobalVariable *VAArgTLS;
419 
420   /// \brief Thread-local shadow storage for va_arg overflow area
421   /// (x86_64-specific).
422   GlobalVariable *VAArgOverflowSizeTLS;
423 
424   /// \brief Thread-local space used to pass origin value to the UMR reporting
425   /// function.
426   GlobalVariable *OriginTLS;
427 
428   /// \brief The run-time callback to print a warning.
429   Value *WarningFn = nullptr;
430 
431   // These arrays are indexed by log2(AccessSize).
432   Value *MaybeWarningFn[kNumberOfAccessSizes];
433   Value *MaybeStoreOriginFn[kNumberOfAccessSizes];
434 
435   /// \brief Run-time helper that generates a new origin value for a stack
436   /// allocation.
437   Value *MsanSetAllocaOrigin4Fn;
438 
439   /// \brief Run-time helper that poisons stack on function entry.
440   Value *MsanPoisonStackFn;
441 
442   /// \brief Run-time helper that records a store (or any event) of an
443   /// uninitialized value and returns an updated origin id encoding this info.
444   Value *MsanChainOriginFn;
445 
446   /// \brief MSan runtime replacements for memmove, memcpy and memset.
447   Value *MemmoveFn, *MemcpyFn, *MemsetFn;
448 
449   /// \brief Memory map parameters used in application-to-shadow calculation.
450   const MemoryMapParams *MapParams;
451 
452   MDNode *ColdCallWeights;
453 
454   /// \brief Branch weights for origin store.
455   MDNode *OriginStoreWeights;
456 
457   /// \brief An empty volatile inline asm that prevents callback merge.
458   InlineAsm *EmptyAsm;
459 
460   Function *MsanCtorFunction;
461 };
462 
463 } // end anonymous namespace
464 
465 char MemorySanitizer::ID = 0;
466 
467 INITIALIZE_PASS_BEGIN(
468     MemorySanitizer, "msan",
469     "MemorySanitizer: detects uninitialized reads.", false, false)
470 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
471 INITIALIZE_PASS_END(
472     MemorySanitizer, "msan",
473     "MemorySanitizer: detects uninitialized reads.", false, false)
474 
475 FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins, bool Recover) {
476   return new MemorySanitizer(TrackOrigins, Recover);
477 }
478 
479 /// \brief Create a non-const global initialized with the given string.
480 ///
481 /// Creates a writable global for Str so that we can pass it to the
482 /// run-time lib. Runtime uses first 4 bytes of the string to store the
483 /// frame ID, so the string needs to be mutable.
484 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
485                                                             StringRef Str) {
486   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
487   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
488                             GlobalValue::PrivateLinkage, StrConst, "");
489 }
490 
491 /// \brief Insert extern declaration of runtime-provided functions and globals.
492 void MemorySanitizer::initializeCallbacks(Module &M) {
493   // Only do this once.
494   if (WarningFn)
495     return;
496 
497   IRBuilder<> IRB(*C);
498   // Create the callback.
499   // FIXME: this function should have "Cold" calling conv,
500   // which is not yet implemented.
501   StringRef WarningFnName = Recover ? "__msan_warning"
502                                     : "__msan_warning_noreturn";
503   WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
504 
505   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
506        AccessSizeIndex++) {
507     unsigned AccessSize = 1 << AccessSizeIndex;
508     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
509     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
510         FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
511         IRB.getInt32Ty());
512 
513     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
514     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
515         FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
516         IRB.getInt8PtrTy(), IRB.getInt32Ty());
517   }
518 
519   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
520     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
521     IRB.getInt8PtrTy(), IntptrTy);
522   MsanPoisonStackFn =
523       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
524                             IRB.getInt8PtrTy(), IntptrTy);
525   MsanChainOriginFn = M.getOrInsertFunction(
526     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
527   MemmoveFn = M.getOrInsertFunction(
528     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
529     IRB.getInt8PtrTy(), IntptrTy);
530   MemcpyFn = M.getOrInsertFunction(
531     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
532     IntptrTy);
533   MemsetFn = M.getOrInsertFunction(
534     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
535     IntptrTy);
536 
537   // Create globals.
538   RetvalTLS = new GlobalVariable(
539     M, ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8), false,
540     GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr,
541     GlobalVariable::InitialExecTLSModel);
542   RetvalOriginTLS = new GlobalVariable(
543     M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr,
544     "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
545 
546   ParamTLS = new GlobalVariable(
547     M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
548     GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr,
549     GlobalVariable::InitialExecTLSModel);
550   ParamOriginTLS = new GlobalVariable(
551     M, ArrayType::get(OriginTy, kParamTLSSize / 4), false,
552     GlobalVariable::ExternalLinkage, nullptr, "__msan_param_origin_tls",
553     nullptr, GlobalVariable::InitialExecTLSModel);
554 
555   VAArgTLS = new GlobalVariable(
556     M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
557     GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr,
558     GlobalVariable::InitialExecTLSModel);
559   VAArgOverflowSizeTLS = new GlobalVariable(
560     M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
561     "__msan_va_arg_overflow_size_tls", nullptr,
562     GlobalVariable::InitialExecTLSModel);
563   OriginTLS = new GlobalVariable(
564     M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
565     "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
566 
567   // We insert an empty inline asm after __msan_report* to avoid callback merge.
568   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
569                             StringRef(""), StringRef(""),
570                             /*hasSideEffects=*/true);
571 }
572 
573 /// \brief Module-level initialization.
574 ///
575 /// inserts a call to __msan_init to the module's constructor list.
576 bool MemorySanitizer::doInitialization(Module &M) {
577   auto &DL = M.getDataLayout();
578 
579   Triple TargetTriple(M.getTargetTriple());
580   switch (TargetTriple.getOS()) {
581     case Triple::FreeBSD:
582       switch (TargetTriple.getArch()) {
583         case Triple::x86_64:
584           MapParams = FreeBSD_X86_MemoryMapParams.bits64;
585           break;
586         case Triple::x86:
587           MapParams = FreeBSD_X86_MemoryMapParams.bits32;
588           break;
589         default:
590           report_fatal_error("unsupported architecture");
591       }
592       break;
593     case Triple::NetBSD:
594       switch (TargetTriple.getArch()) {
595         case Triple::x86_64:
596           MapParams = NetBSD_X86_MemoryMapParams.bits64;
597           break;
598         default:
599           report_fatal_error("unsupported architecture");
600       }
601       break;
602     case Triple::Linux:
603       switch (TargetTriple.getArch()) {
604         case Triple::x86_64:
605           MapParams = Linux_X86_MemoryMapParams.bits64;
606           break;
607         case Triple::x86:
608           MapParams = Linux_X86_MemoryMapParams.bits32;
609           break;
610         case Triple::mips64:
611         case Triple::mips64el:
612           MapParams = Linux_MIPS_MemoryMapParams.bits64;
613           break;
614         case Triple::ppc64:
615         case Triple::ppc64le:
616           MapParams = Linux_PowerPC_MemoryMapParams.bits64;
617           break;
618         case Triple::aarch64:
619         case Triple::aarch64_be:
620           MapParams = Linux_ARM_MemoryMapParams.bits64;
621           break;
622         default:
623           report_fatal_error("unsupported architecture");
624       }
625       break;
626     default:
627       report_fatal_error("unsupported operating system");
628   }
629 
630   C = &(M.getContext());
631   IRBuilder<> IRB(*C);
632   IntptrTy = IRB.getIntPtrTy(DL);
633   OriginTy = IRB.getInt32Ty();
634 
635   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
636   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
637 
638   std::tie(MsanCtorFunction, std::ignore) =
639       createSanitizerCtorAndInitFunctions(M, kMsanModuleCtorName, kMsanInitName,
640                                           /*InitArgTypes=*/{},
641                                           /*InitArgs=*/{});
642   if (ClWithComdat) {
643     Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
644     MsanCtorFunction->setComdat(MsanCtorComdat);
645     appendToGlobalCtors(M, MsanCtorFunction, 0, MsanCtorFunction);
646   } else {
647     appendToGlobalCtors(M, MsanCtorFunction, 0);
648   }
649 
650 
651   if (TrackOrigins)
652     new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
653                        IRB.getInt32(TrackOrigins), "__msan_track_origins");
654 
655   if (Recover)
656     new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
657                        IRB.getInt32(Recover), "__msan_keep_going");
658 
659   return true;
660 }
661 
662 namespace {
663 
664 /// \brief A helper class that handles instrumentation of VarArg
665 /// functions on a particular platform.
666 ///
667 /// Implementations are expected to insert the instrumentation
668 /// necessary to propagate argument shadow through VarArg function
669 /// calls. Visit* methods are called during an InstVisitor pass over
670 /// the function, and should avoid creating new basic blocks. A new
671 /// instance of this class is created for each instrumented function.
672 struct VarArgHelper {
673   virtual ~VarArgHelper() = default;
674 
675   /// \brief Visit a CallSite.
676   virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
677 
678   /// \brief Visit a va_start call.
679   virtual void visitVAStartInst(VAStartInst &I) = 0;
680 
681   /// \brief Visit a va_copy call.
682   virtual void visitVACopyInst(VACopyInst &I) = 0;
683 
684   /// \brief Finalize function instrumentation.
685   ///
686   /// This method is called after visiting all interesting (see above)
687   /// instructions in a function.
688   virtual void finalizeInstrumentation() = 0;
689 };
690 
691 struct MemorySanitizerVisitor;
692 
693 } // end anonymous namespace
694 
695 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
696                                         MemorySanitizerVisitor &Visitor);
697 
698 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
699   if (TypeSize <= 8) return 0;
700   return Log2_32_Ceil((TypeSize + 7) / 8);
701 }
702 
703 namespace {
704 
705 /// This class does all the work for a given function. Store and Load
706 /// instructions store and load corresponding shadow and origin
707 /// values. Most instructions propagate shadow from arguments to their
708 /// return values. Certain instructions (most importantly, BranchInst)
709 /// test their argument shadow and print reports (with a runtime call) if it's
710 /// non-zero.
711 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
712   Function &F;
713   MemorySanitizer &MS;
714   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
715   ValueMap<Value*, Value*> ShadowMap, OriginMap;
716   std::unique_ptr<VarArgHelper> VAHelper;
717   const TargetLibraryInfo *TLI;
718 
719   // The following flags disable parts of MSan instrumentation based on
720   // blacklist contents and command-line options.
721   bool InsertChecks;
722   bool PropagateShadow;
723   bool PoisonStack;
724   bool PoisonUndef;
725   bool CheckReturnValue;
726 
727   struct ShadowOriginAndInsertPoint {
728     Value *Shadow;
729     Value *Origin;
730     Instruction *OrigIns;
731 
732     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
733       : Shadow(S), Origin(O), OrigIns(I) {}
734   };
735   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
736   SmallVector<StoreInst *, 16> StoreList;
737 
738   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
739       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
740     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
741     InsertChecks = SanitizeFunction;
742     PropagateShadow = SanitizeFunction;
743     PoisonStack = SanitizeFunction && ClPoisonStack;
744     PoisonUndef = SanitizeFunction && ClPoisonUndef;
745     // FIXME: Consider using SpecialCaseList to specify a list of functions that
746     // must always return fully initialized values. For now, we hardcode "main".
747     CheckReturnValue = SanitizeFunction && (F.getName() == "main");
748     TLI = &MS.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
749 
750     DEBUG(if (!InsertChecks)
751           dbgs() << "MemorySanitizer is not inserting checks into '"
752                  << F.getName() << "'\n");
753   }
754 
755   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
756     if (MS.TrackOrigins <= 1) return V;
757     return IRB.CreateCall(MS.MsanChainOriginFn, V);
758   }
759 
760   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
761     const DataLayout &DL = F.getParent()->getDataLayout();
762     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
763     if (IntptrSize == kOriginSize) return Origin;
764     assert(IntptrSize == kOriginSize * 2);
765     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
766     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
767   }
768 
769   /// \brief Fill memory range with the given origin value.
770   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
771                    unsigned Size, unsigned Alignment) {
772     const DataLayout &DL = F.getParent()->getDataLayout();
773     unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
774     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
775     assert(IntptrAlignment >= kMinOriginAlignment);
776     assert(IntptrSize >= kOriginSize);
777 
778     unsigned Ofs = 0;
779     unsigned CurrentAlignment = Alignment;
780     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
781       Value *IntptrOrigin = originToIntptr(IRB, Origin);
782       Value *IntptrOriginPtr =
783           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
784       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
785         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
786                        : IntptrOriginPtr;
787         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
788         Ofs += IntptrSize / kOriginSize;
789         CurrentAlignment = IntptrAlignment;
790       }
791     }
792 
793     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
794       Value *GEP =
795           i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
796       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
797       CurrentAlignment = kMinOriginAlignment;
798     }
799   }
800 
801   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
802                    Value *OriginPtr, unsigned Alignment, bool AsCall) {
803     const DataLayout &DL = F.getParent()->getDataLayout();
804     unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
805     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
806     if (Shadow->getType()->isAggregateType()) {
807       paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
808                   OriginAlignment);
809     } else {
810       Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
811       Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
812       if (ConstantShadow) {
813         if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
814           paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
815                       OriginAlignment);
816         return;
817       }
818 
819       unsigned TypeSizeInBits =
820           DL.getTypeSizeInBits(ConvertedShadow->getType());
821       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
822       if (AsCall && SizeIndex < kNumberOfAccessSizes) {
823         Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
824         Value *ConvertedShadow2 = IRB.CreateZExt(
825             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
826         IRB.CreateCall(Fn, {ConvertedShadow2,
827                             IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
828                             Origin});
829       } else {
830         Value *Cmp = IRB.CreateICmpNE(
831             ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
832         Instruction *CheckTerm = SplitBlockAndInsertIfThen(
833             Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
834         IRBuilder<> IRBNew(CheckTerm);
835         paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
836                     OriginAlignment);
837       }
838     }
839   }
840 
841   void materializeStores(bool InstrumentWithCalls) {
842     for (StoreInst *SI : StoreList) {
843       IRBuilder<> IRB(SI);
844       Value *Val = SI->getValueOperand();
845       Value *Addr = SI->getPointerOperand();
846       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
847       Value *ShadowPtr, *OriginPtr;
848       Type *ShadowTy = Shadow->getType();
849       unsigned Alignment = SI->getAlignment();
850       unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
851       std::tie(ShadowPtr, OriginPtr) =
852           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment);
853 
854       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
855       DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
856 
857       if (ClCheckAccessAddress)
858         insertShadowCheck(Addr, NewSI);
859 
860       if (SI->isAtomic())
861         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
862 
863       if (MS.TrackOrigins && !SI->isAtomic())
864         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
865                     OriginAlignment, InstrumentWithCalls);
866     }
867   }
868 
869   /// \brief Helper function to insert a warning at IRB's current insert point.
870   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
871     if (!Origin)
872       Origin = (Value *)IRB.getInt32(0);
873     if (MS.TrackOrigins) {
874       IRB.CreateStore(Origin, MS.OriginTLS);
875     }
876     IRB.CreateCall(MS.WarningFn, {});
877     IRB.CreateCall(MS.EmptyAsm, {});
878     // FIXME: Insert UnreachableInst if !MS.Recover?
879     // This may invalidate some of the following checks and needs to be done
880     // at the very end.
881   }
882 
883   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
884                            bool AsCall) {
885     IRBuilder<> IRB(OrigIns);
886     DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
887     Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
888     DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
889 
890     Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
891     if (ConstantShadow) {
892       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
893         insertWarningFn(IRB, Origin);
894       }
895       return;
896     }
897 
898     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
899 
900     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
901     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
902     if (AsCall && SizeIndex < kNumberOfAccessSizes) {
903       Value *Fn = MS.MaybeWarningFn[SizeIndex];
904       Value *ConvertedShadow2 =
905           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
906       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
907                                                 ? Origin
908                                                 : (Value *)IRB.getInt32(0)});
909     } else {
910       Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
911                                     getCleanShadow(ConvertedShadow), "_mscmp");
912       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
913           Cmp, OrigIns,
914           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
915 
916       IRB.SetInsertPoint(CheckTerm);
917       insertWarningFn(IRB, Origin);
918       DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
919     }
920   }
921 
922   void materializeChecks(bool InstrumentWithCalls) {
923     for (const auto &ShadowData : InstrumentationList) {
924       Instruction *OrigIns = ShadowData.OrigIns;
925       Value *Shadow = ShadowData.Shadow;
926       Value *Origin = ShadowData.Origin;
927       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
928     }
929     DEBUG(dbgs() << "DONE:\n" << F);
930   }
931 
932   /// \brief Add MemorySanitizer instrumentation to a function.
933   bool runOnFunction() {
934     MS.initializeCallbacks(*F.getParent());
935 
936     // In the presence of unreachable blocks, we may see Phi nodes with
937     // incoming nodes from such blocks. Since InstVisitor skips unreachable
938     // blocks, such nodes will not have any shadow value associated with them.
939     // It's easier to remove unreachable blocks than deal with missing shadow.
940     removeUnreachableBlocks(F);
941 
942     // Iterate all BBs in depth-first order and create shadow instructions
943     // for all instructions (where applicable).
944     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
945     for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
946       visit(*BB);
947 
948     // Finalize PHI nodes.
949     for (PHINode *PN : ShadowPHINodes) {
950       PHINode *PNS = cast<PHINode>(getShadow(PN));
951       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
952       size_t NumValues = PN->getNumIncomingValues();
953       for (size_t v = 0; v < NumValues; v++) {
954         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
955         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
956       }
957     }
958 
959     VAHelper->finalizeInstrumentation();
960 
961     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
962                                InstrumentationList.size() + StoreList.size() >
963                                    (unsigned)ClInstrumentationWithCallThreshold;
964 
965     // Delayed instrumentation of StoreInst.
966     // This may add new checks to be inserted later.
967     materializeStores(InstrumentWithCalls);
968 
969     // Insert shadow value checks.
970     materializeChecks(InstrumentWithCalls);
971 
972     return true;
973   }
974 
975   /// \brief Compute the shadow type that corresponds to a given Value.
976   Type *getShadowTy(Value *V) {
977     return getShadowTy(V->getType());
978   }
979 
980   /// \brief Compute the shadow type that corresponds to a given Type.
981   Type *getShadowTy(Type *OrigTy) {
982     if (!OrigTy->isSized()) {
983       return nullptr;
984     }
985     // For integer type, shadow is the same as the original type.
986     // This may return weird-sized types like i1.
987     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
988       return IT;
989     const DataLayout &DL = F.getParent()->getDataLayout();
990     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
991       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
992       return VectorType::get(IntegerType::get(*MS.C, EltSize),
993                              VT->getNumElements());
994     }
995     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
996       return ArrayType::get(getShadowTy(AT->getElementType()),
997                             AT->getNumElements());
998     }
999     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1000       SmallVector<Type*, 4> Elements;
1001       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1002         Elements.push_back(getShadowTy(ST->getElementType(i)));
1003       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1004       DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1005       return Res;
1006     }
1007     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1008     return IntegerType::get(*MS.C, TypeSize);
1009   }
1010 
1011   /// \brief Flatten a vector type.
1012   Type *getShadowTyNoVec(Type *ty) {
1013     if (VectorType *vt = dyn_cast<VectorType>(ty))
1014       return IntegerType::get(*MS.C, vt->getBitWidth());
1015     return ty;
1016   }
1017 
1018   /// \brief Convert a shadow value to it's flattened variant.
1019   Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
1020     Type *Ty = V->getType();
1021     Type *NoVecTy = getShadowTyNoVec(Ty);
1022     if (Ty == NoVecTy) return V;
1023     return IRB.CreateBitCast(V, NoVecTy);
1024   }
1025 
1026   /// \brief Compute the integer shadow offset that corresponds to a given
1027   /// application address.
1028   ///
1029   /// Offset = (Addr & ~AndMask) ^ XorMask
1030   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1031     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1032 
1033     uint64_t AndMask = MS.MapParams->AndMask;
1034     if (AndMask)
1035       OffsetLong =
1036           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1037 
1038     uint64_t XorMask = MS.MapParams->XorMask;
1039     if (XorMask)
1040       OffsetLong =
1041           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1042     return OffsetLong;
1043   }
1044 
1045   /// \brief Compute the shadow and origin addresses corresponding to a given
1046   /// application address.
1047   ///
1048   /// Shadow = ShadowBase + Offset
1049   /// Origin = (OriginBase + Offset) & ~3ULL
1050   std::pair<Value *, Value *> getShadowOriginPtrUserspace(
1051       Value *Addr, IRBuilder<> &IRB, Type *ShadowTy, unsigned Alignment,
1052       Instruction **FirstInsn) {
1053     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1054     Value *ShadowLong = ShadowOffset;
1055     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1056     *FirstInsn = dyn_cast<Instruction>(ShadowLong);
1057     if (ShadowBase != 0) {
1058       ShadowLong =
1059         IRB.CreateAdd(ShadowLong,
1060                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1061     }
1062     Value *ShadowPtr =
1063         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1064     Value *OriginPtr = nullptr;
1065     if (MS.TrackOrigins) {
1066       Value *OriginLong = ShadowOffset;
1067       uint64_t OriginBase = MS.MapParams->OriginBase;
1068       if (OriginBase != 0)
1069         OriginLong = IRB.CreateAdd(OriginLong,
1070                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1071       if (Alignment < kMinOriginAlignment) {
1072         uint64_t Mask = kMinOriginAlignment - 1;
1073         OriginLong =
1074             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1075       }
1076       OriginPtr =
1077           IRB.CreateIntToPtr(OriginLong, PointerType::get(IRB.getInt32Ty(), 0));
1078     }
1079     return std::make_pair(ShadowPtr, OriginPtr);
1080   }
1081 
1082   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1083                                                  Type *ShadowTy,
1084                                                  unsigned Alignment) {
1085     Instruction *FirstInsn = nullptr;
1086     std::pair<Value *, Value *> ret =
1087         getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment, &FirstInsn);
1088     return ret;
1089   }
1090 
1091   /// \brief Compute the shadow address for a given function argument.
1092   ///
1093   /// Shadow = ParamTLS+ArgOffset.
1094   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1095                                  int ArgOffset) {
1096     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1097     if (ArgOffset)
1098       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1099     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1100                               "_msarg");
1101   }
1102 
1103   /// \brief Compute the origin address for a given function argument.
1104   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1105                                  int ArgOffset) {
1106     if (!MS.TrackOrigins) return nullptr;
1107     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1108     if (ArgOffset)
1109       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1110     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1111                               "_msarg_o");
1112   }
1113 
1114   /// \brief Compute the shadow address for a retval.
1115   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1116     return IRB.CreatePointerCast(MS.RetvalTLS,
1117                                  PointerType::get(getShadowTy(A), 0),
1118                                  "_msret");
1119   }
1120 
1121   /// \brief Compute the origin address for a retval.
1122   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1123     // We keep a single origin for the entire retval. Might be too optimistic.
1124     return MS.RetvalOriginTLS;
1125   }
1126 
1127   /// \brief Set SV to be the shadow value for V.
1128   void setShadow(Value *V, Value *SV) {
1129     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1130     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1131   }
1132 
1133   /// \brief Set Origin to be the origin value for V.
1134   void setOrigin(Value *V, Value *Origin) {
1135     if (!MS.TrackOrigins) return;
1136     assert(!OriginMap.count(V) && "Values may only have one origin");
1137     DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1138     OriginMap[V] = Origin;
1139   }
1140 
1141   Constant *getCleanShadow(Type *OrigTy) {
1142     Type *ShadowTy = getShadowTy(OrigTy);
1143     if (!ShadowTy)
1144       return nullptr;
1145     return Constant::getNullValue(ShadowTy);
1146   }
1147 
1148   /// \brief Create a clean shadow value for a given value.
1149   ///
1150   /// Clean shadow (all zeroes) means all bits of the value are defined
1151   /// (initialized).
1152   Constant *getCleanShadow(Value *V) {
1153     return getCleanShadow(V->getType());
1154   }
1155 
1156   /// \brief Create a dirty shadow of a given shadow type.
1157   Constant *getPoisonedShadow(Type *ShadowTy) {
1158     assert(ShadowTy);
1159     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1160       return Constant::getAllOnesValue(ShadowTy);
1161     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1162       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1163                                       getPoisonedShadow(AT->getElementType()));
1164       return ConstantArray::get(AT, Vals);
1165     }
1166     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1167       SmallVector<Constant *, 4> Vals;
1168       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1169         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1170       return ConstantStruct::get(ST, Vals);
1171     }
1172     llvm_unreachable("Unexpected shadow type");
1173   }
1174 
1175   /// \brief Create a dirty shadow for a given value.
1176   Constant *getPoisonedShadow(Value *V) {
1177     Type *ShadowTy = getShadowTy(V);
1178     if (!ShadowTy)
1179       return nullptr;
1180     return getPoisonedShadow(ShadowTy);
1181   }
1182 
1183   /// \brief Create a clean (zero) origin.
1184   Value *getCleanOrigin() {
1185     return Constant::getNullValue(MS.OriginTy);
1186   }
1187 
1188   /// \brief Get the shadow value for a given Value.
1189   ///
1190   /// This function either returns the value set earlier with setShadow,
1191   /// or extracts if from ParamTLS (for function arguments).
1192   Value *getShadow(Value *V) {
1193     if (!PropagateShadow) return getCleanShadow(V);
1194     if (Instruction *I = dyn_cast<Instruction>(V)) {
1195       if (I->getMetadata("nosanitize"))
1196         return getCleanShadow(V);
1197       // For instructions the shadow is already stored in the map.
1198       Value *Shadow = ShadowMap[V];
1199       if (!Shadow) {
1200         DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1201         (void)I;
1202         assert(Shadow && "No shadow for a value");
1203       }
1204       return Shadow;
1205     }
1206     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1207       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1208       DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1209       (void)U;
1210       return AllOnes;
1211     }
1212     if (Argument *A = dyn_cast<Argument>(V)) {
1213       // For arguments we compute the shadow on demand and store it in the map.
1214       Value **ShadowPtr = &ShadowMap[V];
1215       if (*ShadowPtr)
1216         return *ShadowPtr;
1217       Function *F = A->getParent();
1218       IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
1219       unsigned ArgOffset = 0;
1220       const DataLayout &DL = F->getParent()->getDataLayout();
1221       for (auto &FArg : F->args()) {
1222         if (!FArg.getType()->isSized()) {
1223           DEBUG(dbgs() << "Arg is not sized\n");
1224           continue;
1225         }
1226         unsigned Size =
1227             FArg.hasByValAttr()
1228                 ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType())
1229                 : DL.getTypeAllocSize(FArg.getType());
1230         if (A == &FArg) {
1231           bool Overflow = ArgOffset + Size > kParamTLSSize;
1232           Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1233           if (FArg.hasByValAttr()) {
1234             // ByVal pointer itself has clean shadow. We copy the actual
1235             // argument shadow to the underlying memory.
1236             // Figure out maximal valid memcpy alignment.
1237             unsigned ArgAlign = FArg.getParamAlignment();
1238             if (ArgAlign == 0) {
1239               Type *EltType = A->getType()->getPointerElementType();
1240               ArgAlign = DL.getABITypeAlignment(EltType);
1241             }
1242             Value *CpShadowPtr =
1243                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign)
1244                     .first;
1245             if (Overflow) {
1246               // ParamTLS overflow.
1247               EntryIRB.CreateMemSet(
1248                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1249                   Size, ArgAlign);
1250             } else {
1251               unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1252               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1253                                                  CopyAlign, Size);
1254               DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1255               (void)Cpy;
1256             }
1257             *ShadowPtr = getCleanShadow(V);
1258           } else {
1259             if (Overflow) {
1260               // ParamTLS overflow.
1261               *ShadowPtr = getCleanShadow(V);
1262             } else {
1263               *ShadowPtr =
1264                   EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
1265             }
1266           }
1267           DEBUG(dbgs() << "  ARG:    "  << FArg << " ==> " <<
1268                 **ShadowPtr << "\n");
1269           if (MS.TrackOrigins && !Overflow) {
1270             Value *OriginPtr =
1271                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1272             setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
1273           } else {
1274             setOrigin(A, getCleanOrigin());
1275           }
1276         }
1277         ArgOffset += alignTo(Size, kShadowTLSAlignment);
1278       }
1279       assert(*ShadowPtr && "Could not find shadow for an argument");
1280       return *ShadowPtr;
1281     }
1282     // For everything else the shadow is zero.
1283     return getCleanShadow(V);
1284   }
1285 
1286   /// \brief Get the shadow for i-th argument of the instruction I.
1287   Value *getShadow(Instruction *I, int i) {
1288     return getShadow(I->getOperand(i));
1289   }
1290 
1291   /// \brief Get the origin for a value.
1292   Value *getOrigin(Value *V) {
1293     if (!MS.TrackOrigins) return nullptr;
1294     if (!PropagateShadow) return getCleanOrigin();
1295     if (isa<Constant>(V)) return getCleanOrigin();
1296     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1297            "Unexpected value type in getOrigin()");
1298     if (Instruction *I = dyn_cast<Instruction>(V)) {
1299       if (I->getMetadata("nosanitize"))
1300         return getCleanOrigin();
1301     }
1302     Value *Origin = OriginMap[V];
1303     assert(Origin && "Missing origin");
1304     return Origin;
1305   }
1306 
1307   /// \brief Get the origin for i-th argument of the instruction I.
1308   Value *getOrigin(Instruction *I, int i) {
1309     return getOrigin(I->getOperand(i));
1310   }
1311 
1312   /// \brief Remember the place where a shadow check should be inserted.
1313   ///
1314   /// This location will be later instrumented with a check that will print a
1315   /// UMR warning in runtime if the shadow value is not 0.
1316   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1317     assert(Shadow);
1318     if (!InsertChecks) return;
1319 #ifndef NDEBUG
1320     Type *ShadowTy = Shadow->getType();
1321     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
1322            "Can only insert checks for integer and vector shadow types");
1323 #endif
1324     InstrumentationList.push_back(
1325         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1326   }
1327 
1328   /// \brief Remember the place where a shadow check should be inserted.
1329   ///
1330   /// This location will be later instrumented with a check that will print a
1331   /// UMR warning in runtime if the value is not fully defined.
1332   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1333     assert(Val);
1334     Value *Shadow, *Origin;
1335     if (ClCheckConstantShadow) {
1336       Shadow = getShadow(Val);
1337       if (!Shadow) return;
1338       Origin = getOrigin(Val);
1339     } else {
1340       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1341       if (!Shadow) return;
1342       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1343     }
1344     insertShadowCheck(Shadow, Origin, OrigIns);
1345   }
1346 
1347   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1348     switch (a) {
1349       case AtomicOrdering::NotAtomic:
1350         return AtomicOrdering::NotAtomic;
1351       case AtomicOrdering::Unordered:
1352       case AtomicOrdering::Monotonic:
1353       case AtomicOrdering::Release:
1354         return AtomicOrdering::Release;
1355       case AtomicOrdering::Acquire:
1356       case AtomicOrdering::AcquireRelease:
1357         return AtomicOrdering::AcquireRelease;
1358       case AtomicOrdering::SequentiallyConsistent:
1359         return AtomicOrdering::SequentiallyConsistent;
1360     }
1361     llvm_unreachable("Unknown ordering");
1362   }
1363 
1364   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1365     switch (a) {
1366       case AtomicOrdering::NotAtomic:
1367         return AtomicOrdering::NotAtomic;
1368       case AtomicOrdering::Unordered:
1369       case AtomicOrdering::Monotonic:
1370       case AtomicOrdering::Acquire:
1371         return AtomicOrdering::Acquire;
1372       case AtomicOrdering::Release:
1373       case AtomicOrdering::AcquireRelease:
1374         return AtomicOrdering::AcquireRelease;
1375       case AtomicOrdering::SequentiallyConsistent:
1376         return AtomicOrdering::SequentiallyConsistent;
1377     }
1378     llvm_unreachable("Unknown ordering");
1379   }
1380 
1381   // ------------------- Visitors.
1382   using InstVisitor<MemorySanitizerVisitor>::visit;
1383   void visit(Instruction &I) {
1384     if (!I.getMetadata("nosanitize"))
1385       InstVisitor<MemorySanitizerVisitor>::visit(I);
1386   }
1387 
1388   /// \brief Instrument LoadInst
1389   ///
1390   /// Loads the corresponding shadow and (optionally) origin.
1391   /// Optionally, checks that the load address is fully defined.
1392   void visitLoadInst(LoadInst &I) {
1393     assert(I.getType()->isSized() && "Load type must have size");
1394     assert(!I.getMetadata("nosanitize"));
1395     IRBuilder<> IRB(I.getNextNode());
1396     Type *ShadowTy = getShadowTy(&I);
1397     Value *Addr = I.getPointerOperand();
1398     Value *ShadowPtr, *OriginPtr;
1399     unsigned Alignment = I.getAlignment();
1400     if (PropagateShadow) {
1401       std::tie(ShadowPtr, OriginPtr) =
1402           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment);
1403       setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
1404     } else {
1405       setShadow(&I, getCleanShadow(&I));
1406     }
1407 
1408     if (ClCheckAccessAddress)
1409       insertShadowCheck(I.getPointerOperand(), &I);
1410 
1411     if (I.isAtomic())
1412       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1413 
1414     if (MS.TrackOrigins) {
1415       if (PropagateShadow) {
1416         unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1417         setOrigin(&I, IRB.CreateAlignedLoad(OriginPtr, OriginAlignment));
1418       } else {
1419         setOrigin(&I, getCleanOrigin());
1420       }
1421     }
1422   }
1423 
1424   /// \brief Instrument StoreInst
1425   ///
1426   /// Stores the corresponding shadow and (optionally) origin.
1427   /// Optionally, checks that the store address is fully defined.
1428   void visitStoreInst(StoreInst &I) {
1429     StoreList.push_back(&I);
1430   }
1431 
1432   void handleCASOrRMW(Instruction &I) {
1433     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1434 
1435     IRBuilder<> IRB(&I);
1436     Value *Addr = I.getOperand(0);
1437     Value *ShadowPtr =
1438         getShadowOriginPtr(Addr, IRB, I.getType(), /*Alignment*/ 1).first;
1439 
1440     if (ClCheckAccessAddress)
1441       insertShadowCheck(Addr, &I);
1442 
1443     // Only test the conditional argument of cmpxchg instruction.
1444     // The other argument can potentially be uninitialized, but we can not
1445     // detect this situation reliably without possible false positives.
1446     if (isa<AtomicCmpXchgInst>(I))
1447       insertShadowCheck(I.getOperand(1), &I);
1448 
1449     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
1450 
1451     setShadow(&I, getCleanShadow(&I));
1452     setOrigin(&I, getCleanOrigin());
1453   }
1454 
1455   void visitAtomicRMWInst(AtomicRMWInst &I) {
1456     handleCASOrRMW(I);
1457     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1458   }
1459 
1460   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1461     handleCASOrRMW(I);
1462     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1463   }
1464 
1465   // Vector manipulation.
1466   void visitExtractElementInst(ExtractElementInst &I) {
1467     insertShadowCheck(I.getOperand(1), &I);
1468     IRBuilder<> IRB(&I);
1469     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1470               "_msprop"));
1471     setOrigin(&I, getOrigin(&I, 0));
1472   }
1473 
1474   void visitInsertElementInst(InsertElementInst &I) {
1475     insertShadowCheck(I.getOperand(2), &I);
1476     IRBuilder<> IRB(&I);
1477     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1478               I.getOperand(2), "_msprop"));
1479     setOriginForNaryOp(I);
1480   }
1481 
1482   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1483     insertShadowCheck(I.getOperand(2), &I);
1484     IRBuilder<> IRB(&I);
1485     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1486               I.getOperand(2), "_msprop"));
1487     setOriginForNaryOp(I);
1488   }
1489 
1490   // Casts.
1491   void visitSExtInst(SExtInst &I) {
1492     IRBuilder<> IRB(&I);
1493     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
1494     setOrigin(&I, getOrigin(&I, 0));
1495   }
1496 
1497   void visitZExtInst(ZExtInst &I) {
1498     IRBuilder<> IRB(&I);
1499     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
1500     setOrigin(&I, getOrigin(&I, 0));
1501   }
1502 
1503   void visitTruncInst(TruncInst &I) {
1504     IRBuilder<> IRB(&I);
1505     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
1506     setOrigin(&I, getOrigin(&I, 0));
1507   }
1508 
1509   void visitBitCastInst(BitCastInst &I) {
1510     // Special case: if this is the bitcast (there is exactly 1 allowed) between
1511     // a musttail call and a ret, don't instrument. New instructions are not
1512     // allowed after a musttail call.
1513     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
1514       if (CI->isMustTailCall())
1515         return;
1516     IRBuilder<> IRB(&I);
1517     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
1518     setOrigin(&I, getOrigin(&I, 0));
1519   }
1520 
1521   void visitPtrToIntInst(PtrToIntInst &I) {
1522     IRBuilder<> IRB(&I);
1523     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1524              "_msprop_ptrtoint"));
1525     setOrigin(&I, getOrigin(&I, 0));
1526   }
1527 
1528   void visitIntToPtrInst(IntToPtrInst &I) {
1529     IRBuilder<> IRB(&I);
1530     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1531              "_msprop_inttoptr"));
1532     setOrigin(&I, getOrigin(&I, 0));
1533   }
1534 
1535   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
1536   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
1537   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
1538   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
1539   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
1540   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
1541 
1542   /// \brief Propagate shadow for bitwise AND.
1543   ///
1544   /// This code is exact, i.e. if, for example, a bit in the left argument
1545   /// is defined and 0, then neither the value not definedness of the
1546   /// corresponding bit in B don't affect the resulting shadow.
1547   void visitAnd(BinaryOperator &I) {
1548     IRBuilder<> IRB(&I);
1549     //  "And" of 0 and a poisoned value results in unpoisoned value.
1550     //  1&1 => 1;     0&1 => 0;     p&1 => p;
1551     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
1552     //  1&p => p;     0&p => 0;     p&p => p;
1553     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
1554     Value *S1 = getShadow(&I, 0);
1555     Value *S2 = getShadow(&I, 1);
1556     Value *V1 = I.getOperand(0);
1557     Value *V2 = I.getOperand(1);
1558     if (V1->getType() != S1->getType()) {
1559       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1560       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1561     }
1562     Value *S1S2 = IRB.CreateAnd(S1, S2);
1563     Value *V1S2 = IRB.CreateAnd(V1, S2);
1564     Value *S1V2 = IRB.CreateAnd(S1, V2);
1565     setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
1566     setOriginForNaryOp(I);
1567   }
1568 
1569   void visitOr(BinaryOperator &I) {
1570     IRBuilder<> IRB(&I);
1571     //  "Or" of 1 and a poisoned value results in unpoisoned value.
1572     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
1573     //  1|0 => 1;     0|0 => 0;     p|0 => p;
1574     //  1|p => 1;     0|p => p;     p|p => p;
1575     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
1576     Value *S1 = getShadow(&I, 0);
1577     Value *S2 = getShadow(&I, 1);
1578     Value *V1 = IRB.CreateNot(I.getOperand(0));
1579     Value *V2 = IRB.CreateNot(I.getOperand(1));
1580     if (V1->getType() != S1->getType()) {
1581       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1582       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1583     }
1584     Value *S1S2 = IRB.CreateAnd(S1, S2);
1585     Value *V1S2 = IRB.CreateAnd(V1, S2);
1586     Value *S1V2 = IRB.CreateAnd(S1, V2);
1587     setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
1588     setOriginForNaryOp(I);
1589   }
1590 
1591   /// \brief Default propagation of shadow and/or origin.
1592   ///
1593   /// This class implements the general case of shadow propagation, used in all
1594   /// cases where we don't know and/or don't care about what the operation
1595   /// actually does. It converts all input shadow values to a common type
1596   /// (extending or truncating as necessary), and bitwise OR's them.
1597   ///
1598   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
1599   /// fully initialized), and less prone to false positives.
1600   ///
1601   /// This class also implements the general case of origin propagation. For a
1602   /// Nary operation, result origin is set to the origin of an argument that is
1603   /// not entirely initialized. If there is more than one such arguments, the
1604   /// rightmost of them is picked. It does not matter which one is picked if all
1605   /// arguments are initialized.
1606   template <bool CombineShadow>
1607   class Combiner {
1608     Value *Shadow = nullptr;
1609     Value *Origin = nullptr;
1610     IRBuilder<> &IRB;
1611     MemorySanitizerVisitor *MSV;
1612 
1613   public:
1614     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
1615         : IRB(IRB), MSV(MSV) {}
1616 
1617     /// \brief Add a pair of shadow and origin values to the mix.
1618     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
1619       if (CombineShadow) {
1620         assert(OpShadow);
1621         if (!Shadow)
1622           Shadow = OpShadow;
1623         else {
1624           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
1625           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
1626         }
1627       }
1628 
1629       if (MSV->MS.TrackOrigins) {
1630         assert(OpOrigin);
1631         if (!Origin) {
1632           Origin = OpOrigin;
1633         } else {
1634           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
1635           // No point in adding something that might result in 0 origin value.
1636           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
1637             Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
1638             Value *Cond =
1639                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
1640             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
1641           }
1642         }
1643       }
1644       return *this;
1645     }
1646 
1647     /// \brief Add an application value to the mix.
1648     Combiner &Add(Value *V) {
1649       Value *OpShadow = MSV->getShadow(V);
1650       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
1651       return Add(OpShadow, OpOrigin);
1652     }
1653 
1654     /// \brief Set the current combined values as the given instruction's shadow
1655     /// and origin.
1656     void Done(Instruction *I) {
1657       if (CombineShadow) {
1658         assert(Shadow);
1659         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
1660         MSV->setShadow(I, Shadow);
1661       }
1662       if (MSV->MS.TrackOrigins) {
1663         assert(Origin);
1664         MSV->setOrigin(I, Origin);
1665       }
1666     }
1667   };
1668 
1669   using ShadowAndOriginCombiner = Combiner<true>;
1670   using OriginCombiner = Combiner<false>;
1671 
1672   /// \brief Propagate origin for arbitrary operation.
1673   void setOriginForNaryOp(Instruction &I) {
1674     if (!MS.TrackOrigins) return;
1675     IRBuilder<> IRB(&I);
1676     OriginCombiner OC(this, IRB);
1677     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
1678       OC.Add(OI->get());
1679     OC.Done(&I);
1680   }
1681 
1682   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
1683     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
1684            "Vector of pointers is not a valid shadow type");
1685     return Ty->isVectorTy() ?
1686       Ty->getVectorNumElements() * Ty->getScalarSizeInBits() :
1687       Ty->getPrimitiveSizeInBits();
1688   }
1689 
1690   /// \brief Cast between two shadow types, extending or truncating as
1691   /// necessary.
1692   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
1693                           bool Signed = false) {
1694     Type *srcTy = V->getType();
1695     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
1696     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
1697     if (srcSizeInBits > 1 && dstSizeInBits == 1)
1698       return IRB.CreateICmpNE(V, getCleanShadow(V));
1699 
1700     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
1701       return IRB.CreateIntCast(V, dstTy, Signed);
1702     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
1703         dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
1704       return IRB.CreateIntCast(V, dstTy, Signed);
1705     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
1706     Value *V2 =
1707       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
1708     return IRB.CreateBitCast(V2, dstTy);
1709     // TODO: handle struct types.
1710   }
1711 
1712   /// \brief Cast an application value to the type of its own shadow.
1713   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
1714     Type *ShadowTy = getShadowTy(V);
1715     if (V->getType() == ShadowTy)
1716       return V;
1717     if (V->getType()->isPtrOrPtrVectorTy())
1718       return IRB.CreatePtrToInt(V, ShadowTy);
1719     else
1720       return IRB.CreateBitCast(V, ShadowTy);
1721   }
1722 
1723   /// \brief Propagate shadow for arbitrary operation.
1724   void handleShadowOr(Instruction &I) {
1725     IRBuilder<> IRB(&I);
1726     ShadowAndOriginCombiner SC(this, IRB);
1727     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
1728       SC.Add(OI->get());
1729     SC.Done(&I);
1730   }
1731 
1732   // \brief Handle multiplication by constant.
1733   //
1734   // Handle a special case of multiplication by constant that may have one or
1735   // more zeros in the lower bits. This makes corresponding number of lower bits
1736   // of the result zero as well. We model it by shifting the other operand
1737   // shadow left by the required number of bits. Effectively, we transform
1738   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
1739   // We use multiplication by 2**N instead of shift to cover the case of
1740   // multiplication by 0, which may occur in some elements of a vector operand.
1741   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
1742                            Value *OtherArg) {
1743     Constant *ShadowMul;
1744     Type *Ty = ConstArg->getType();
1745     if (Ty->isVectorTy()) {
1746       unsigned NumElements = Ty->getVectorNumElements();
1747       Type *EltTy = Ty->getSequentialElementType();
1748       SmallVector<Constant *, 16> Elements;
1749       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
1750         if (ConstantInt *Elt =
1751                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
1752           const APInt &V = Elt->getValue();
1753           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
1754           Elements.push_back(ConstantInt::get(EltTy, V2));
1755         } else {
1756           Elements.push_back(ConstantInt::get(EltTy, 1));
1757         }
1758       }
1759       ShadowMul = ConstantVector::get(Elements);
1760     } else {
1761       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
1762         const APInt &V = Elt->getValue();
1763         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
1764         ShadowMul = ConstantInt::get(Ty, V2);
1765       } else {
1766         ShadowMul = ConstantInt::get(Ty, 1);
1767       }
1768     }
1769 
1770     IRBuilder<> IRB(&I);
1771     setShadow(&I,
1772               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
1773     setOrigin(&I, getOrigin(OtherArg));
1774   }
1775 
1776   void visitMul(BinaryOperator &I) {
1777     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
1778     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
1779     if (constOp0 && !constOp1)
1780       handleMulByConstant(I, constOp0, I.getOperand(1));
1781     else if (constOp1 && !constOp0)
1782       handleMulByConstant(I, constOp1, I.getOperand(0));
1783     else
1784       handleShadowOr(I);
1785   }
1786 
1787   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
1788   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
1789   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
1790   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
1791   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
1792   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
1793 
1794   void handleDiv(Instruction &I) {
1795     IRBuilder<> IRB(&I);
1796     // Strict on the second argument.
1797     insertShadowCheck(I.getOperand(1), &I);
1798     setShadow(&I, getShadow(&I, 0));
1799     setOrigin(&I, getOrigin(&I, 0));
1800   }
1801 
1802   void visitUDiv(BinaryOperator &I) { handleDiv(I); }
1803   void visitSDiv(BinaryOperator &I) { handleDiv(I); }
1804   void visitFDiv(BinaryOperator &I) { handleDiv(I); }
1805   void visitURem(BinaryOperator &I) { handleDiv(I); }
1806   void visitSRem(BinaryOperator &I) { handleDiv(I); }
1807   void visitFRem(BinaryOperator &I) { handleDiv(I); }
1808 
1809   /// \brief Instrument == and != comparisons.
1810   ///
1811   /// Sometimes the comparison result is known even if some of the bits of the
1812   /// arguments are not.
1813   void handleEqualityComparison(ICmpInst &I) {
1814     IRBuilder<> IRB(&I);
1815     Value *A = I.getOperand(0);
1816     Value *B = I.getOperand(1);
1817     Value *Sa = getShadow(A);
1818     Value *Sb = getShadow(B);
1819 
1820     // Get rid of pointers and vectors of pointers.
1821     // For ints (and vectors of ints), types of A and Sa match,
1822     // and this is a no-op.
1823     A = IRB.CreatePointerCast(A, Sa->getType());
1824     B = IRB.CreatePointerCast(B, Sb->getType());
1825 
1826     // A == B  <==>  (C = A^B) == 0
1827     // A != B  <==>  (C = A^B) != 0
1828     // Sc = Sa | Sb
1829     Value *C = IRB.CreateXor(A, B);
1830     Value *Sc = IRB.CreateOr(Sa, Sb);
1831     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
1832     // Result is defined if one of the following is true
1833     // * there is a defined 1 bit in C
1834     // * C is fully defined
1835     // Si = !(C & ~Sc) && Sc
1836     Value *Zero = Constant::getNullValue(Sc->getType());
1837     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
1838     Value *Si =
1839       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
1840                     IRB.CreateICmpEQ(
1841                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
1842     Si->setName("_msprop_icmp");
1843     setShadow(&I, Si);
1844     setOriginForNaryOp(I);
1845   }
1846 
1847   /// \brief Build the lowest possible value of V, taking into account V's
1848   ///        uninitialized bits.
1849   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
1850                                 bool isSigned) {
1851     if (isSigned) {
1852       // Split shadow into sign bit and other bits.
1853       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
1854       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
1855       // Maximise the undefined shadow bit, minimize other undefined bits.
1856       return
1857         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
1858     } else {
1859       // Minimize undefined bits.
1860       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
1861     }
1862   }
1863 
1864   /// \brief Build the highest possible value of V, taking into account V's
1865   ///        uninitialized bits.
1866   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
1867                                 bool isSigned) {
1868     if (isSigned) {
1869       // Split shadow into sign bit and other bits.
1870       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
1871       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
1872       // Minimise the undefined shadow bit, maximise other undefined bits.
1873       return
1874         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
1875     } else {
1876       // Maximize undefined bits.
1877       return IRB.CreateOr(A, Sa);
1878     }
1879   }
1880 
1881   /// \brief Instrument relational comparisons.
1882   ///
1883   /// This function does exact shadow propagation for all relational
1884   /// comparisons of integers, pointers and vectors of those.
1885   /// FIXME: output seems suboptimal when one of the operands is a constant
1886   void handleRelationalComparisonExact(ICmpInst &I) {
1887     IRBuilder<> IRB(&I);
1888     Value *A = I.getOperand(0);
1889     Value *B = I.getOperand(1);
1890     Value *Sa = getShadow(A);
1891     Value *Sb = getShadow(B);
1892 
1893     // Get rid of pointers and vectors of pointers.
1894     // For ints (and vectors of ints), types of A and Sa match,
1895     // and this is a no-op.
1896     A = IRB.CreatePointerCast(A, Sa->getType());
1897     B = IRB.CreatePointerCast(B, Sb->getType());
1898 
1899     // Let [a0, a1] be the interval of possible values of A, taking into account
1900     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
1901     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
1902     bool IsSigned = I.isSigned();
1903     Value *S1 = IRB.CreateICmp(I.getPredicate(),
1904                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
1905                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
1906     Value *S2 = IRB.CreateICmp(I.getPredicate(),
1907                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
1908                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
1909     Value *Si = IRB.CreateXor(S1, S2);
1910     setShadow(&I, Si);
1911     setOriginForNaryOp(I);
1912   }
1913 
1914   /// \brief Instrument signed relational comparisons.
1915   ///
1916   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
1917   /// bit of the shadow. Everything else is delegated to handleShadowOr().
1918   void handleSignedRelationalComparison(ICmpInst &I) {
1919     Constant *constOp;
1920     Value *op = nullptr;
1921     CmpInst::Predicate pre;
1922     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
1923       op = I.getOperand(0);
1924       pre = I.getPredicate();
1925     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
1926       op = I.getOperand(1);
1927       pre = I.getSwappedPredicate();
1928     } else {
1929       handleShadowOr(I);
1930       return;
1931     }
1932 
1933     if ((constOp->isNullValue() &&
1934          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
1935         (constOp->isAllOnesValue() &&
1936          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
1937       IRBuilder<> IRB(&I);
1938       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
1939                                         "_msprop_icmp_s");
1940       setShadow(&I, Shadow);
1941       setOrigin(&I, getOrigin(op));
1942     } else {
1943       handleShadowOr(I);
1944     }
1945   }
1946 
1947   void visitICmpInst(ICmpInst &I) {
1948     if (!ClHandleICmp) {
1949       handleShadowOr(I);
1950       return;
1951     }
1952     if (I.isEquality()) {
1953       handleEqualityComparison(I);
1954       return;
1955     }
1956 
1957     assert(I.isRelational());
1958     if (ClHandleICmpExact) {
1959       handleRelationalComparisonExact(I);
1960       return;
1961     }
1962     if (I.isSigned()) {
1963       handleSignedRelationalComparison(I);
1964       return;
1965     }
1966 
1967     assert(I.isUnsigned());
1968     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
1969       handleRelationalComparisonExact(I);
1970       return;
1971     }
1972 
1973     handleShadowOr(I);
1974   }
1975 
1976   void visitFCmpInst(FCmpInst &I) {
1977     handleShadowOr(I);
1978   }
1979 
1980   void handleShift(BinaryOperator &I) {
1981     IRBuilder<> IRB(&I);
1982     // If any of the S2 bits are poisoned, the whole thing is poisoned.
1983     // Otherwise perform the same shift on S1.
1984     Value *S1 = getShadow(&I, 0);
1985     Value *S2 = getShadow(&I, 1);
1986     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
1987                                    S2->getType());
1988     Value *V2 = I.getOperand(1);
1989     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
1990     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
1991     setOriginForNaryOp(I);
1992   }
1993 
1994   void visitShl(BinaryOperator &I) { handleShift(I); }
1995   void visitAShr(BinaryOperator &I) { handleShift(I); }
1996   void visitLShr(BinaryOperator &I) { handleShift(I); }
1997 
1998   /// \brief Instrument llvm.memmove
1999   ///
2000   /// At this point we don't know if llvm.memmove will be inlined or not.
2001   /// If we don't instrument it and it gets inlined,
2002   /// our interceptor will not kick in and we will lose the memmove.
2003   /// If we instrument the call here, but it does not get inlined,
2004   /// we will memove the shadow twice: which is bad in case
2005   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2006   ///
2007   /// Similar situation exists for memcpy and memset.
2008   void visitMemMoveInst(MemMoveInst &I) {
2009     IRBuilder<> IRB(&I);
2010     IRB.CreateCall(
2011         MS.MemmoveFn,
2012         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2013          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2014          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2015     I.eraseFromParent();
2016   }
2017 
2018   // Similar to memmove: avoid copying shadow twice.
2019   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2020   // FIXME: consider doing manual inline for small constant sizes and proper
2021   // alignment.
2022   void visitMemCpyInst(MemCpyInst &I) {
2023     IRBuilder<> IRB(&I);
2024     IRB.CreateCall(
2025         MS.MemcpyFn,
2026         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2027          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2028          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2029     I.eraseFromParent();
2030   }
2031 
2032   // Same as memcpy.
2033   void visitMemSetInst(MemSetInst &I) {
2034     IRBuilder<> IRB(&I);
2035     IRB.CreateCall(
2036         MS.MemsetFn,
2037         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2038          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2039          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2040     I.eraseFromParent();
2041   }
2042 
2043   void visitVAStartInst(VAStartInst &I) {
2044     VAHelper->visitVAStartInst(I);
2045   }
2046 
2047   void visitVACopyInst(VACopyInst &I) {
2048     VAHelper->visitVACopyInst(I);
2049   }
2050 
2051   /// \brief Handle vector store-like intrinsics.
2052   ///
2053   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2054   /// has 1 pointer argument and 1 vector argument, returns void.
2055   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2056     IRBuilder<> IRB(&I);
2057     Value* Addr = I.getArgOperand(0);
2058     Value *Shadow = getShadow(&I, 1);
2059     Value *ShadowPtr, *OriginPtr;
2060 
2061     // We don't know the pointer alignment (could be unaligned SSE store!).
2062     // Have to assume to worst case.
2063     std::tie(ShadowPtr, OriginPtr) =
2064         getShadowOriginPtr(Addr, IRB, Shadow->getType(), /*Alignment*/ 1);
2065     IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
2066 
2067     if (ClCheckAccessAddress)
2068       insertShadowCheck(Addr, &I);
2069 
2070     // FIXME: factor out common code from materializeStores
2071     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2072     return true;
2073   }
2074 
2075   /// \brief Handle vector load-like intrinsics.
2076   ///
2077   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2078   /// has 1 pointer argument, returns a vector.
2079   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2080     IRBuilder<> IRB(&I);
2081     Value *Addr = I.getArgOperand(0);
2082 
2083     Type *ShadowTy = getShadowTy(&I);
2084     Value *ShadowPtr, *OriginPtr;
2085     if (PropagateShadow) {
2086       // We don't know the pointer alignment (could be unaligned SSE load!).
2087       // Have to assume to worst case.
2088       unsigned Alignment = 1;
2089       std::tie(ShadowPtr, OriginPtr) =
2090           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment);
2091       setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
2092     } else {
2093       setShadow(&I, getCleanShadow(&I));
2094     }
2095 
2096     if (ClCheckAccessAddress)
2097       insertShadowCheck(Addr, &I);
2098 
2099     if (MS.TrackOrigins) {
2100       if (PropagateShadow)
2101         setOrigin(&I, IRB.CreateLoad(OriginPtr));
2102       else
2103         setOrigin(&I, getCleanOrigin());
2104     }
2105     return true;
2106   }
2107 
2108   /// \brief Handle (SIMD arithmetic)-like intrinsics.
2109   ///
2110   /// Instrument intrinsics with any number of arguments of the same type,
2111   /// equal to the return type. The type should be simple (no aggregates or
2112   /// pointers; vectors are fine).
2113   /// Caller guarantees that this intrinsic does not access memory.
2114   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2115     Type *RetTy = I.getType();
2116     if (!(RetTy->isIntOrIntVectorTy() ||
2117           RetTy->isFPOrFPVectorTy() ||
2118           RetTy->isX86_MMXTy()))
2119       return false;
2120 
2121     unsigned NumArgOperands = I.getNumArgOperands();
2122 
2123     for (unsigned i = 0; i < NumArgOperands; ++i) {
2124       Type *Ty = I.getArgOperand(i)->getType();
2125       if (Ty != RetTy)
2126         return false;
2127     }
2128 
2129     IRBuilder<> IRB(&I);
2130     ShadowAndOriginCombiner SC(this, IRB);
2131     for (unsigned i = 0; i < NumArgOperands; ++i)
2132       SC.Add(I.getArgOperand(i));
2133     SC.Done(&I);
2134 
2135     return true;
2136   }
2137 
2138   /// \brief Heuristically instrument unknown intrinsics.
2139   ///
2140   /// The main purpose of this code is to do something reasonable with all
2141   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2142   /// We recognize several classes of intrinsics by their argument types and
2143   /// ModRefBehaviour and apply special intrumentation when we are reasonably
2144   /// sure that we know what the intrinsic does.
2145   ///
2146   /// We special-case intrinsics where this approach fails. See llvm.bswap
2147   /// handling as an example of that.
2148   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2149     unsigned NumArgOperands = I.getNumArgOperands();
2150     if (NumArgOperands == 0)
2151       return false;
2152 
2153     if (NumArgOperands == 2 &&
2154         I.getArgOperand(0)->getType()->isPointerTy() &&
2155         I.getArgOperand(1)->getType()->isVectorTy() &&
2156         I.getType()->isVoidTy() &&
2157         !I.onlyReadsMemory()) {
2158       // This looks like a vector store.
2159       return handleVectorStoreIntrinsic(I);
2160     }
2161 
2162     if (NumArgOperands == 1 &&
2163         I.getArgOperand(0)->getType()->isPointerTy() &&
2164         I.getType()->isVectorTy() &&
2165         I.onlyReadsMemory()) {
2166       // This looks like a vector load.
2167       return handleVectorLoadIntrinsic(I);
2168     }
2169 
2170     if (I.doesNotAccessMemory())
2171       if (maybeHandleSimpleNomemIntrinsic(I))
2172         return true;
2173 
2174     // FIXME: detect and handle SSE maskstore/maskload
2175     return false;
2176   }
2177 
2178   void handleBswap(IntrinsicInst &I) {
2179     IRBuilder<> IRB(&I);
2180     Value *Op = I.getArgOperand(0);
2181     Type *OpType = Op->getType();
2182     Function *BswapFunc = Intrinsic::getDeclaration(
2183       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2184     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2185     setOrigin(&I, getOrigin(Op));
2186   }
2187 
2188   // \brief Instrument vector convert instrinsic.
2189   //
2190   // This function instruments intrinsics like cvtsi2ss:
2191   // %Out = int_xxx_cvtyyy(%ConvertOp)
2192   // or
2193   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2194   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2195   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2196   // elements from \p CopyOp.
2197   // In most cases conversion involves floating-point value which may trigger a
2198   // hardware exception when not fully initialized. For this reason we require
2199   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2200   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2201   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2202   // return a fully initialized value.
2203   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
2204     IRBuilder<> IRB(&I);
2205     Value *CopyOp, *ConvertOp;
2206 
2207     switch (I.getNumArgOperands()) {
2208     case 3:
2209       assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
2210       LLVM_FALLTHROUGH;
2211     case 2:
2212       CopyOp = I.getArgOperand(0);
2213       ConvertOp = I.getArgOperand(1);
2214       break;
2215     case 1:
2216       ConvertOp = I.getArgOperand(0);
2217       CopyOp = nullptr;
2218       break;
2219     default:
2220       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2221     }
2222 
2223     // The first *NumUsedElements* elements of ConvertOp are converted to the
2224     // same number of output elements. The rest of the output is copied from
2225     // CopyOp, or (if not available) filled with zeroes.
2226     // Combine shadow for elements of ConvertOp that are used in this operation,
2227     // and insert a check.
2228     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2229     // int->any conversion.
2230     Value *ConvertShadow = getShadow(ConvertOp);
2231     Value *AggShadow = nullptr;
2232     if (ConvertOp->getType()->isVectorTy()) {
2233       AggShadow = IRB.CreateExtractElement(
2234           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2235       for (int i = 1; i < NumUsedElements; ++i) {
2236         Value *MoreShadow = IRB.CreateExtractElement(
2237             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2238         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2239       }
2240     } else {
2241       AggShadow = ConvertShadow;
2242     }
2243     assert(AggShadow->getType()->isIntegerTy());
2244     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2245 
2246     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2247     // ConvertOp.
2248     if (CopyOp) {
2249       assert(CopyOp->getType() == I.getType());
2250       assert(CopyOp->getType()->isVectorTy());
2251       Value *ResultShadow = getShadow(CopyOp);
2252       Type *EltTy = ResultShadow->getType()->getVectorElementType();
2253       for (int i = 0; i < NumUsedElements; ++i) {
2254         ResultShadow = IRB.CreateInsertElement(
2255             ResultShadow, ConstantInt::getNullValue(EltTy),
2256             ConstantInt::get(IRB.getInt32Ty(), i));
2257       }
2258       setShadow(&I, ResultShadow);
2259       setOrigin(&I, getOrigin(CopyOp));
2260     } else {
2261       setShadow(&I, getCleanShadow(&I));
2262       setOrigin(&I, getCleanOrigin());
2263     }
2264   }
2265 
2266   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2267   // zeroes if it is zero, and all ones otherwise.
2268   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2269     if (S->getType()->isVectorTy())
2270       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2271     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2272     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2273     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2274   }
2275 
2276   // Given a vector, extract its first element, and return all
2277   // zeroes if it is zero, and all ones otherwise.
2278   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2279     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2280     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2281     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2282   }
2283 
2284   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2285     Type *T = S->getType();
2286     assert(T->isVectorTy());
2287     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2288     return IRB.CreateSExt(S2, T);
2289   }
2290 
2291   // \brief Instrument vector shift instrinsic.
2292   //
2293   // This function instruments intrinsics like int_x86_avx2_psll_w.
2294   // Intrinsic shifts %In by %ShiftSize bits.
2295   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2296   // size, and the rest is ignored. Behavior is defined even if shift size is
2297   // greater than register (or field) width.
2298   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2299     assert(I.getNumArgOperands() == 2);
2300     IRBuilder<> IRB(&I);
2301     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2302     // Otherwise perform the same shift on S1.
2303     Value *S1 = getShadow(&I, 0);
2304     Value *S2 = getShadow(&I, 1);
2305     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2306                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2307     Value *V1 = I.getOperand(0);
2308     Value *V2 = I.getOperand(1);
2309     Value *Shift = IRB.CreateCall(I.getCalledValue(),
2310                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2311     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2312     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2313     setOriginForNaryOp(I);
2314   }
2315 
2316   // \brief Get an X86_MMX-sized vector type.
2317   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2318     const unsigned X86_MMXSizeInBits = 64;
2319     return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2320                            X86_MMXSizeInBits / EltSizeInBits);
2321   }
2322 
2323   // \brief Returns a signed counterpart for an (un)signed-saturate-and-pack
2324   // intrinsic.
2325   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2326     switch (id) {
2327       case Intrinsic::x86_sse2_packsswb_128:
2328       case Intrinsic::x86_sse2_packuswb_128:
2329         return Intrinsic::x86_sse2_packsswb_128;
2330 
2331       case Intrinsic::x86_sse2_packssdw_128:
2332       case Intrinsic::x86_sse41_packusdw:
2333         return Intrinsic::x86_sse2_packssdw_128;
2334 
2335       case Intrinsic::x86_avx2_packsswb:
2336       case Intrinsic::x86_avx2_packuswb:
2337         return Intrinsic::x86_avx2_packsswb;
2338 
2339       case Intrinsic::x86_avx2_packssdw:
2340       case Intrinsic::x86_avx2_packusdw:
2341         return Intrinsic::x86_avx2_packssdw;
2342 
2343       case Intrinsic::x86_mmx_packsswb:
2344       case Intrinsic::x86_mmx_packuswb:
2345         return Intrinsic::x86_mmx_packsswb;
2346 
2347       case Intrinsic::x86_mmx_packssdw:
2348         return Intrinsic::x86_mmx_packssdw;
2349       default:
2350         llvm_unreachable("unexpected intrinsic id");
2351     }
2352   }
2353 
2354   // \brief Instrument vector pack instrinsic.
2355   //
2356   // This function instruments intrinsics like x86_mmx_packsswb, that
2357   // packs elements of 2 input vectors into half as many bits with saturation.
2358   // Shadow is propagated with the signed variant of the same intrinsic applied
2359   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2360   // EltSizeInBits is used only for x86mmx arguments.
2361   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2362     assert(I.getNumArgOperands() == 2);
2363     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2364     IRBuilder<> IRB(&I);
2365     Value *S1 = getShadow(&I, 0);
2366     Value *S2 = getShadow(&I, 1);
2367     assert(isX86_MMX || S1->getType()->isVectorTy());
2368 
2369     // SExt and ICmpNE below must apply to individual elements of input vectors.
2370     // In case of x86mmx arguments, cast them to appropriate vector types and
2371     // back.
2372     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2373     if (isX86_MMX) {
2374       S1 = IRB.CreateBitCast(S1, T);
2375       S2 = IRB.CreateBitCast(S2, T);
2376     }
2377     Value *S1_ext = IRB.CreateSExt(
2378         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2379     Value *S2_ext = IRB.CreateSExt(
2380         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2381     if (isX86_MMX) {
2382       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2383       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2384       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2385     }
2386 
2387     Function *ShadowFn = Intrinsic::getDeclaration(
2388         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2389 
2390     Value *S =
2391         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2392     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2393     setShadow(&I, S);
2394     setOriginForNaryOp(I);
2395   }
2396 
2397   // \brief Instrument sum-of-absolute-differencies intrinsic.
2398   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2399     const unsigned SignificantBitsPerResultElement = 16;
2400     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2401     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2402     unsigned ZeroBitsPerResultElement =
2403         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2404 
2405     IRBuilder<> IRB(&I);
2406     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2407     S = IRB.CreateBitCast(S, ResTy);
2408     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2409                        ResTy);
2410     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2411     S = IRB.CreateBitCast(S, getShadowTy(&I));
2412     setShadow(&I, S);
2413     setOriginForNaryOp(I);
2414   }
2415 
2416   // \brief Instrument multiply-add intrinsic.
2417   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2418                                   unsigned EltSizeInBits = 0) {
2419     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2420     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2421     IRBuilder<> IRB(&I);
2422     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2423     S = IRB.CreateBitCast(S, ResTy);
2424     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2425                        ResTy);
2426     S = IRB.CreateBitCast(S, getShadowTy(&I));
2427     setShadow(&I, S);
2428     setOriginForNaryOp(I);
2429   }
2430 
2431   // \brief Instrument compare-packed intrinsic.
2432   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2433   // all-ones shadow.
2434   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2435     IRBuilder<> IRB(&I);
2436     Type *ResTy = getShadowTy(&I);
2437     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2438     Value *S = IRB.CreateSExt(
2439         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2440     setShadow(&I, S);
2441     setOriginForNaryOp(I);
2442   }
2443 
2444   // \brief Instrument compare-scalar intrinsic.
2445   // This handles both cmp* intrinsics which return the result in the first
2446   // element of a vector, and comi* which return the result as i32.
2447   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2448     IRBuilder<> IRB(&I);
2449     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2450     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
2451     setShadow(&I, S);
2452     setOriginForNaryOp(I);
2453   }
2454 
2455   void handleStmxcsr(IntrinsicInst &I) {
2456     IRBuilder<> IRB(&I);
2457     Value* Addr = I.getArgOperand(0);
2458     Type *Ty = IRB.getInt32Ty();
2459     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, Ty, /*Alignment*/ 1).first;
2460 
2461     IRB.CreateStore(getCleanShadow(Ty),
2462                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
2463 
2464     if (ClCheckAccessAddress)
2465       insertShadowCheck(Addr, &I);
2466   }
2467 
2468   void handleLdmxcsr(IntrinsicInst &I) {
2469     if (!InsertChecks) return;
2470 
2471     IRBuilder<> IRB(&I);
2472     Value *Addr = I.getArgOperand(0);
2473     Type *Ty = IRB.getInt32Ty();
2474     unsigned Alignment = 1;
2475     Value *ShadowPtr, *OriginPtr;
2476     std::tie(ShadowPtr, OriginPtr) =
2477         getShadowOriginPtr(Addr, IRB, Ty, Alignment);
2478 
2479     if (ClCheckAccessAddress)
2480       insertShadowCheck(Addr, &I);
2481 
2482     Value *Shadow = IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_ldmxcsr");
2483     Value *Origin =
2484         MS.TrackOrigins ? IRB.CreateLoad(OriginPtr) : getCleanOrigin();
2485     insertShadowCheck(Shadow, Origin, &I);
2486   }
2487 
2488   void visitIntrinsicInst(IntrinsicInst &I) {
2489     switch (I.getIntrinsicID()) {
2490     case Intrinsic::bswap:
2491       handleBswap(I);
2492       break;
2493     case Intrinsic::x86_sse_stmxcsr:
2494       handleStmxcsr(I);
2495       break;
2496     case Intrinsic::x86_sse_ldmxcsr:
2497       handleLdmxcsr(I);
2498       break;
2499     case Intrinsic::x86_avx512_vcvtsd2usi64:
2500     case Intrinsic::x86_avx512_vcvtsd2usi32:
2501     case Intrinsic::x86_avx512_vcvtss2usi64:
2502     case Intrinsic::x86_avx512_vcvtss2usi32:
2503     case Intrinsic::x86_avx512_cvttss2usi64:
2504     case Intrinsic::x86_avx512_cvttss2usi:
2505     case Intrinsic::x86_avx512_cvttsd2usi64:
2506     case Intrinsic::x86_avx512_cvttsd2usi:
2507     case Intrinsic::x86_avx512_cvtusi2sd:
2508     case Intrinsic::x86_avx512_cvtusi2ss:
2509     case Intrinsic::x86_avx512_cvtusi642sd:
2510     case Intrinsic::x86_avx512_cvtusi642ss:
2511     case Intrinsic::x86_sse2_cvtsd2si64:
2512     case Intrinsic::x86_sse2_cvtsd2si:
2513     case Intrinsic::x86_sse2_cvtsd2ss:
2514     case Intrinsic::x86_sse2_cvtsi2sd:
2515     case Intrinsic::x86_sse2_cvtsi642sd:
2516     case Intrinsic::x86_sse2_cvtss2sd:
2517     case Intrinsic::x86_sse2_cvttsd2si64:
2518     case Intrinsic::x86_sse2_cvttsd2si:
2519     case Intrinsic::x86_sse_cvtsi2ss:
2520     case Intrinsic::x86_sse_cvtsi642ss:
2521     case Intrinsic::x86_sse_cvtss2si64:
2522     case Intrinsic::x86_sse_cvtss2si:
2523     case Intrinsic::x86_sse_cvttss2si64:
2524     case Intrinsic::x86_sse_cvttss2si:
2525       handleVectorConvertIntrinsic(I, 1);
2526       break;
2527     case Intrinsic::x86_sse_cvtps2pi:
2528     case Intrinsic::x86_sse_cvttps2pi:
2529       handleVectorConvertIntrinsic(I, 2);
2530       break;
2531 
2532     case Intrinsic::x86_avx512_psll_w_512:
2533     case Intrinsic::x86_avx512_psll_d_512:
2534     case Intrinsic::x86_avx512_psll_q_512:
2535     case Intrinsic::x86_avx512_pslli_w_512:
2536     case Intrinsic::x86_avx512_pslli_d_512:
2537     case Intrinsic::x86_avx512_pslli_q_512:
2538     case Intrinsic::x86_avx512_psrl_w_512:
2539     case Intrinsic::x86_avx512_psrl_d_512:
2540     case Intrinsic::x86_avx512_psrl_q_512:
2541     case Intrinsic::x86_avx512_psra_w_512:
2542     case Intrinsic::x86_avx512_psra_d_512:
2543     case Intrinsic::x86_avx512_psra_q_512:
2544     case Intrinsic::x86_avx512_psrli_w_512:
2545     case Intrinsic::x86_avx512_psrli_d_512:
2546     case Intrinsic::x86_avx512_psrli_q_512:
2547     case Intrinsic::x86_avx512_psrai_w_512:
2548     case Intrinsic::x86_avx512_psrai_d_512:
2549     case Intrinsic::x86_avx512_psrai_q_512:
2550     case Intrinsic::x86_avx512_psra_q_256:
2551     case Intrinsic::x86_avx512_psra_q_128:
2552     case Intrinsic::x86_avx512_psrai_q_256:
2553     case Intrinsic::x86_avx512_psrai_q_128:
2554     case Intrinsic::x86_avx2_psll_w:
2555     case Intrinsic::x86_avx2_psll_d:
2556     case Intrinsic::x86_avx2_psll_q:
2557     case Intrinsic::x86_avx2_pslli_w:
2558     case Intrinsic::x86_avx2_pslli_d:
2559     case Intrinsic::x86_avx2_pslli_q:
2560     case Intrinsic::x86_avx2_psrl_w:
2561     case Intrinsic::x86_avx2_psrl_d:
2562     case Intrinsic::x86_avx2_psrl_q:
2563     case Intrinsic::x86_avx2_psra_w:
2564     case Intrinsic::x86_avx2_psra_d:
2565     case Intrinsic::x86_avx2_psrli_w:
2566     case Intrinsic::x86_avx2_psrli_d:
2567     case Intrinsic::x86_avx2_psrli_q:
2568     case Intrinsic::x86_avx2_psrai_w:
2569     case Intrinsic::x86_avx2_psrai_d:
2570     case Intrinsic::x86_sse2_psll_w:
2571     case Intrinsic::x86_sse2_psll_d:
2572     case Intrinsic::x86_sse2_psll_q:
2573     case Intrinsic::x86_sse2_pslli_w:
2574     case Intrinsic::x86_sse2_pslli_d:
2575     case Intrinsic::x86_sse2_pslli_q:
2576     case Intrinsic::x86_sse2_psrl_w:
2577     case Intrinsic::x86_sse2_psrl_d:
2578     case Intrinsic::x86_sse2_psrl_q:
2579     case Intrinsic::x86_sse2_psra_w:
2580     case Intrinsic::x86_sse2_psra_d:
2581     case Intrinsic::x86_sse2_psrli_w:
2582     case Intrinsic::x86_sse2_psrli_d:
2583     case Intrinsic::x86_sse2_psrli_q:
2584     case Intrinsic::x86_sse2_psrai_w:
2585     case Intrinsic::x86_sse2_psrai_d:
2586     case Intrinsic::x86_mmx_psll_w:
2587     case Intrinsic::x86_mmx_psll_d:
2588     case Intrinsic::x86_mmx_psll_q:
2589     case Intrinsic::x86_mmx_pslli_w:
2590     case Intrinsic::x86_mmx_pslli_d:
2591     case Intrinsic::x86_mmx_pslli_q:
2592     case Intrinsic::x86_mmx_psrl_w:
2593     case Intrinsic::x86_mmx_psrl_d:
2594     case Intrinsic::x86_mmx_psrl_q:
2595     case Intrinsic::x86_mmx_psra_w:
2596     case Intrinsic::x86_mmx_psra_d:
2597     case Intrinsic::x86_mmx_psrli_w:
2598     case Intrinsic::x86_mmx_psrli_d:
2599     case Intrinsic::x86_mmx_psrli_q:
2600     case Intrinsic::x86_mmx_psrai_w:
2601     case Intrinsic::x86_mmx_psrai_d:
2602       handleVectorShiftIntrinsic(I, /* Variable */ false);
2603       break;
2604     case Intrinsic::x86_avx2_psllv_d:
2605     case Intrinsic::x86_avx2_psllv_d_256:
2606     case Intrinsic::x86_avx512_psllv_d_512:
2607     case Intrinsic::x86_avx2_psllv_q:
2608     case Intrinsic::x86_avx2_psllv_q_256:
2609     case Intrinsic::x86_avx512_psllv_q_512:
2610     case Intrinsic::x86_avx2_psrlv_d:
2611     case Intrinsic::x86_avx2_psrlv_d_256:
2612     case Intrinsic::x86_avx512_psrlv_d_512:
2613     case Intrinsic::x86_avx2_psrlv_q:
2614     case Intrinsic::x86_avx2_psrlv_q_256:
2615     case Intrinsic::x86_avx512_psrlv_q_512:
2616     case Intrinsic::x86_avx2_psrav_d:
2617     case Intrinsic::x86_avx2_psrav_d_256:
2618     case Intrinsic::x86_avx512_psrav_d_512:
2619     case Intrinsic::x86_avx512_psrav_q_128:
2620     case Intrinsic::x86_avx512_psrav_q_256:
2621     case Intrinsic::x86_avx512_psrav_q_512:
2622       handleVectorShiftIntrinsic(I, /* Variable */ true);
2623       break;
2624 
2625     case Intrinsic::x86_sse2_packsswb_128:
2626     case Intrinsic::x86_sse2_packssdw_128:
2627     case Intrinsic::x86_sse2_packuswb_128:
2628     case Intrinsic::x86_sse41_packusdw:
2629     case Intrinsic::x86_avx2_packsswb:
2630     case Intrinsic::x86_avx2_packssdw:
2631     case Intrinsic::x86_avx2_packuswb:
2632     case Intrinsic::x86_avx2_packusdw:
2633       handleVectorPackIntrinsic(I);
2634       break;
2635 
2636     case Intrinsic::x86_mmx_packsswb:
2637     case Intrinsic::x86_mmx_packuswb:
2638       handleVectorPackIntrinsic(I, 16);
2639       break;
2640 
2641     case Intrinsic::x86_mmx_packssdw:
2642       handleVectorPackIntrinsic(I, 32);
2643       break;
2644 
2645     case Intrinsic::x86_mmx_psad_bw:
2646     case Intrinsic::x86_sse2_psad_bw:
2647     case Intrinsic::x86_avx2_psad_bw:
2648       handleVectorSadIntrinsic(I);
2649       break;
2650 
2651     case Intrinsic::x86_sse2_pmadd_wd:
2652     case Intrinsic::x86_avx2_pmadd_wd:
2653     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2654     case Intrinsic::x86_avx2_pmadd_ub_sw:
2655       handleVectorPmaddIntrinsic(I);
2656       break;
2657 
2658     case Intrinsic::x86_ssse3_pmadd_ub_sw:
2659       handleVectorPmaddIntrinsic(I, 8);
2660       break;
2661 
2662     case Intrinsic::x86_mmx_pmadd_wd:
2663       handleVectorPmaddIntrinsic(I, 16);
2664       break;
2665 
2666     case Intrinsic::x86_sse_cmp_ss:
2667     case Intrinsic::x86_sse2_cmp_sd:
2668     case Intrinsic::x86_sse_comieq_ss:
2669     case Intrinsic::x86_sse_comilt_ss:
2670     case Intrinsic::x86_sse_comile_ss:
2671     case Intrinsic::x86_sse_comigt_ss:
2672     case Intrinsic::x86_sse_comige_ss:
2673     case Intrinsic::x86_sse_comineq_ss:
2674     case Intrinsic::x86_sse_ucomieq_ss:
2675     case Intrinsic::x86_sse_ucomilt_ss:
2676     case Intrinsic::x86_sse_ucomile_ss:
2677     case Intrinsic::x86_sse_ucomigt_ss:
2678     case Intrinsic::x86_sse_ucomige_ss:
2679     case Intrinsic::x86_sse_ucomineq_ss:
2680     case Intrinsic::x86_sse2_comieq_sd:
2681     case Intrinsic::x86_sse2_comilt_sd:
2682     case Intrinsic::x86_sse2_comile_sd:
2683     case Intrinsic::x86_sse2_comigt_sd:
2684     case Intrinsic::x86_sse2_comige_sd:
2685     case Intrinsic::x86_sse2_comineq_sd:
2686     case Intrinsic::x86_sse2_ucomieq_sd:
2687     case Intrinsic::x86_sse2_ucomilt_sd:
2688     case Intrinsic::x86_sse2_ucomile_sd:
2689     case Intrinsic::x86_sse2_ucomigt_sd:
2690     case Intrinsic::x86_sse2_ucomige_sd:
2691     case Intrinsic::x86_sse2_ucomineq_sd:
2692       handleVectorCompareScalarIntrinsic(I);
2693       break;
2694 
2695     case Intrinsic::x86_sse_cmp_ps:
2696     case Intrinsic::x86_sse2_cmp_pd:
2697       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
2698       // generates reasonably looking IR that fails in the backend with "Do not
2699       // know how to split the result of this operator!".
2700       handleVectorComparePackedIntrinsic(I);
2701       break;
2702 
2703     default:
2704       if (!handleUnknownIntrinsic(I))
2705         visitInstruction(I);
2706       break;
2707     }
2708   }
2709 
2710   void visitCallSite(CallSite CS) {
2711     Instruction &I = *CS.getInstruction();
2712     assert(!I.getMetadata("nosanitize"));
2713     assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
2714     if (CS.isCall()) {
2715       CallInst *Call = cast<CallInst>(&I);
2716 
2717       // For inline asm, do the usual thing: check argument shadow and mark all
2718       // outputs as clean. Note that any side effects of the inline asm that are
2719       // not immediately visible in its constraints are not handled.
2720       if (Call->isInlineAsm()) {
2721         visitInstruction(I);
2722         return;
2723       }
2724 
2725       assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
2726 
2727       // We are going to insert code that relies on the fact that the callee
2728       // will become a non-readonly function after it is instrumented by us. To
2729       // prevent this code from being optimized out, mark that function
2730       // non-readonly in advance.
2731       if (Function *Func = Call->getCalledFunction()) {
2732         // Clear out readonly/readnone attributes.
2733         AttrBuilder B;
2734         B.addAttribute(Attribute::ReadOnly)
2735           .addAttribute(Attribute::ReadNone);
2736         Func->removeAttributes(AttributeList::FunctionIndex, B);
2737       }
2738 
2739       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
2740     }
2741     IRBuilder<> IRB(&I);
2742 
2743     unsigned ArgOffset = 0;
2744     DEBUG(dbgs() << "  CallSite: " << I << "\n");
2745     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
2746          ArgIt != End; ++ArgIt) {
2747       Value *A = *ArgIt;
2748       unsigned i = ArgIt - CS.arg_begin();
2749       if (!A->getType()->isSized()) {
2750         DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
2751         continue;
2752       }
2753       unsigned Size = 0;
2754       Value *Store = nullptr;
2755       // Compute the Shadow for arg even if it is ByVal, because
2756       // in that case getShadow() will copy the actual arg shadow to
2757       // __msan_param_tls.
2758       Value *ArgShadow = getShadow(A);
2759       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
2760       DEBUG(dbgs() << "  Arg#" << i << ": " << *A <<
2761             " Shadow: " << *ArgShadow << "\n");
2762       bool ArgIsInitialized = false;
2763       const DataLayout &DL = F.getParent()->getDataLayout();
2764       if (CS.paramHasAttr(i, Attribute::ByVal)) {
2765         assert(A->getType()->isPointerTy() &&
2766                "ByVal argument is not a pointer!");
2767         Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
2768         if (ArgOffset + Size > kParamTLSSize) break;
2769         unsigned ParamAlignment = CS.getParamAlignment(i);
2770         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
2771         Value *AShadowPtr =
2772             getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment).first;
2773 
2774         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
2775                                  Alignment, Size);
2776       } else {
2777         Size = DL.getTypeAllocSize(A->getType());
2778         if (ArgOffset + Size > kParamTLSSize) break;
2779         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
2780                                        kShadowTLSAlignment);
2781         Constant *Cst = dyn_cast<Constant>(ArgShadow);
2782         if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
2783       }
2784       if (MS.TrackOrigins && !ArgIsInitialized)
2785         IRB.CreateStore(getOrigin(A),
2786                         getOriginPtrForArgument(A, IRB, ArgOffset));
2787       (void)Store;
2788       assert(Size != 0 && Store != nullptr);
2789       DEBUG(dbgs() << "  Param:" << *Store << "\n");
2790       ArgOffset += alignTo(Size, 8);
2791     }
2792     DEBUG(dbgs() << "  done with call args\n");
2793 
2794     FunctionType *FT =
2795       cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
2796     if (FT->isVarArg()) {
2797       VAHelper->visitCallSite(CS, IRB);
2798     }
2799 
2800     // Now, get the shadow for the RetVal.
2801     if (!I.getType()->isSized()) return;
2802     // Don't emit the epilogue for musttail call returns.
2803     if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return;
2804     IRBuilder<> IRBBefore(&I);
2805     // Until we have full dynamic coverage, make sure the retval shadow is 0.
2806     Value *Base = getShadowPtrForRetval(&I, IRBBefore);
2807     IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
2808     BasicBlock::iterator NextInsn;
2809     if (CS.isCall()) {
2810       NextInsn = ++I.getIterator();
2811       assert(NextInsn != I.getParent()->end());
2812     } else {
2813       BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
2814       if (!NormalDest->getSinglePredecessor()) {
2815         // FIXME: this case is tricky, so we are just conservative here.
2816         // Perhaps we need to split the edge between this BB and NormalDest,
2817         // but a naive attempt to use SplitEdge leads to a crash.
2818         setShadow(&I, getCleanShadow(&I));
2819         setOrigin(&I, getCleanOrigin());
2820         return;
2821       }
2822       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
2823       // Anything inserted there will be instrumented by MSan later!
2824       NextInsn = NormalDest->getFirstInsertionPt();
2825       assert(NextInsn != NormalDest->end() &&
2826              "Could not find insertion point for retval shadow load");
2827     }
2828     IRBuilder<> IRBAfter(&*NextInsn);
2829     Value *RetvalShadow =
2830       IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
2831                                  kShadowTLSAlignment, "_msret");
2832     setShadow(&I, RetvalShadow);
2833     if (MS.TrackOrigins)
2834       setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
2835   }
2836 
2837   bool isAMustTailRetVal(Value *RetVal) {
2838     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2839       RetVal = I->getOperand(0);
2840     }
2841     if (auto *I = dyn_cast<CallInst>(RetVal)) {
2842       return I->isMustTailCall();
2843     }
2844     return false;
2845   }
2846 
2847   void visitReturnInst(ReturnInst &I) {
2848     IRBuilder<> IRB(&I);
2849     Value *RetVal = I.getReturnValue();
2850     if (!RetVal) return;
2851     // Don't emit the epilogue for musttail call returns.
2852     if (isAMustTailRetVal(RetVal)) return;
2853     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
2854     if (CheckReturnValue) {
2855       insertShadowCheck(RetVal, &I);
2856       Value *Shadow = getCleanShadow(RetVal);
2857       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
2858     } else {
2859       Value *Shadow = getShadow(RetVal);
2860       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
2861       if (MS.TrackOrigins)
2862         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
2863     }
2864   }
2865 
2866   void visitPHINode(PHINode &I) {
2867     IRBuilder<> IRB(&I);
2868     if (!PropagateShadow) {
2869       setShadow(&I, getCleanShadow(&I));
2870       setOrigin(&I, getCleanOrigin());
2871       return;
2872     }
2873 
2874     ShadowPHINodes.push_back(&I);
2875     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
2876                                 "_msphi_s"));
2877     if (MS.TrackOrigins)
2878       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
2879                                   "_msphi_o"));
2880   }
2881 
2882   void visitAllocaInst(AllocaInst &I) {
2883     setShadow(&I, getCleanShadow(&I));
2884     setOrigin(&I, getCleanOrigin());
2885     IRBuilder<> IRB(I.getNextNode());
2886     const DataLayout &DL = F.getParent()->getDataLayout();
2887     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
2888     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
2889     if (I.isArrayAllocation())
2890       Len = IRB.CreateMul(Len, I.getArraySize());
2891     if (PoisonStack && ClPoisonStackWithCall) {
2892       IRB.CreateCall(MS.MsanPoisonStackFn,
2893                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
2894     } else {
2895       Value *ShadowBase =
2896           getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), I.getAlignment()).first;
2897 
2898       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
2899       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
2900     }
2901 
2902     if (PoisonStack && MS.TrackOrigins) {
2903       SmallString<2048> StackDescriptionStorage;
2904       raw_svector_ostream StackDescription(StackDescriptionStorage);
2905       // We create a string with a description of the stack allocation and
2906       // pass it into __msan_set_alloca_origin.
2907       // It will be printed by the run-time if stack-originated UMR is found.
2908       // The first 4 bytes of the string are set to '----' and will be replaced
2909       // by __msan_va_arg_overflow_size_tls at the first call.
2910       StackDescription << "----" << I.getName() << "@" << F.getName();
2911       Value *Descr =
2912           createPrivateNonConstGlobalForString(*F.getParent(),
2913                                                StackDescription.str());
2914 
2915       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
2916                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
2917                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
2918                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
2919     }
2920   }
2921 
2922   void visitSelectInst(SelectInst& I) {
2923     IRBuilder<> IRB(&I);
2924     // a = select b, c, d
2925     Value *B = I.getCondition();
2926     Value *C = I.getTrueValue();
2927     Value *D = I.getFalseValue();
2928     Value *Sb = getShadow(B);
2929     Value *Sc = getShadow(C);
2930     Value *Sd = getShadow(D);
2931 
2932     // Result shadow if condition shadow is 0.
2933     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
2934     Value *Sa1;
2935     if (I.getType()->isAggregateType()) {
2936       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
2937       // an extra "select". This results in much more compact IR.
2938       // Sa = select Sb, poisoned, (select b, Sc, Sd)
2939       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
2940     } else {
2941       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
2942       // If Sb (condition is poisoned), look for bits in c and d that are equal
2943       // and both unpoisoned.
2944       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
2945 
2946       // Cast arguments to shadow-compatible type.
2947       C = CreateAppToShadowCast(IRB, C);
2948       D = CreateAppToShadowCast(IRB, D);
2949 
2950       // Result shadow if condition shadow is 1.
2951       Sa1 = IRB.CreateOr(IRB.CreateXor(C, D), IRB.CreateOr(Sc, Sd));
2952     }
2953     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
2954     setShadow(&I, Sa);
2955     if (MS.TrackOrigins) {
2956       // Origins are always i32, so any vector conditions must be flattened.
2957       // FIXME: consider tracking vector origins for app vectors?
2958       if (B->getType()->isVectorTy()) {
2959         Type *FlatTy = getShadowTyNoVec(B->getType());
2960         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
2961                                 ConstantInt::getNullValue(FlatTy));
2962         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
2963                                       ConstantInt::getNullValue(FlatTy));
2964       }
2965       // a = select b, c, d
2966       // Oa = Sb ? Ob : (b ? Oc : Od)
2967       setOrigin(
2968           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
2969                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
2970                                                 getOrigin(I.getFalseValue()))));
2971     }
2972   }
2973 
2974   void visitLandingPadInst(LandingPadInst &I) {
2975     // Do nothing.
2976     // See https://github.com/google/sanitizers/issues/504
2977     setShadow(&I, getCleanShadow(&I));
2978     setOrigin(&I, getCleanOrigin());
2979   }
2980 
2981   void visitCatchSwitchInst(CatchSwitchInst &I) {
2982     setShadow(&I, getCleanShadow(&I));
2983     setOrigin(&I, getCleanOrigin());
2984   }
2985 
2986   void visitFuncletPadInst(FuncletPadInst &I) {
2987     setShadow(&I, getCleanShadow(&I));
2988     setOrigin(&I, getCleanOrigin());
2989   }
2990 
2991   void visitGetElementPtrInst(GetElementPtrInst &I) {
2992     handleShadowOr(I);
2993   }
2994 
2995   void visitExtractValueInst(ExtractValueInst &I) {
2996     IRBuilder<> IRB(&I);
2997     Value *Agg = I.getAggregateOperand();
2998     DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
2999     Value *AggShadow = getShadow(Agg);
3000     DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3001     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
3002     DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
3003     setShadow(&I, ResShadow);
3004     setOriginForNaryOp(I);
3005   }
3006 
3007   void visitInsertValueInst(InsertValueInst &I) {
3008     IRBuilder<> IRB(&I);
3009     DEBUG(dbgs() << "InsertValue:  " << I << "\n");
3010     Value *AggShadow = getShadow(I.getAggregateOperand());
3011     Value *InsShadow = getShadow(I.getInsertedValueOperand());
3012     DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3013     DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
3014     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
3015     DEBUG(dbgs() << "   Res:        " << *Res << "\n");
3016     setShadow(&I, Res);
3017     setOriginForNaryOp(I);
3018   }
3019 
3020   void dumpInst(Instruction &I) {
3021     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3022       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
3023     } else {
3024       errs() << "ZZZ " << I.getOpcodeName() << "\n";
3025     }
3026     errs() << "QQQ " << I << "\n";
3027   }
3028 
3029   void visitResumeInst(ResumeInst &I) {
3030     DEBUG(dbgs() << "Resume: " << I << "\n");
3031     // Nothing to do here.
3032   }
3033 
3034   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
3035     DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
3036     // Nothing to do here.
3037   }
3038 
3039   void visitCatchReturnInst(CatchReturnInst &CRI) {
3040     DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
3041     // Nothing to do here.
3042   }
3043 
3044   void visitInstruction(Instruction &I) {
3045     // Everything else: stop propagating and check for poisoned shadow.
3046     if (ClDumpStrictInstructions)
3047       dumpInst(I);
3048     DEBUG(dbgs() << "DEFAULT: " << I << "\n");
3049     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
3050       Value *Operand = I.getOperand(i);
3051       if (Operand->getType()->isSized())
3052         insertShadowCheck(Operand, &I);
3053     }
3054     setShadow(&I, getCleanShadow(&I));
3055     setOrigin(&I, getCleanOrigin());
3056   }
3057 };
3058 
3059 /// \brief AMD64-specific implementation of VarArgHelper.
3060 struct VarArgAMD64Helper : public VarArgHelper {
3061   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
3062   // See a comment in visitCallSite for more details.
3063   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
3064   static const unsigned AMD64FpEndOffset = 176;
3065 
3066   Function &F;
3067   MemorySanitizer &MS;
3068   MemorySanitizerVisitor &MSV;
3069   Value *VAArgTLSCopy = nullptr;
3070   Value *VAArgOverflowSize = nullptr;
3071 
3072   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3073 
3074   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
3075 
3076   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
3077                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3078 
3079   ArgKind classifyArgument(Value* arg) {
3080     // A very rough approximation of X86_64 argument classification rules.
3081     Type *T = arg->getType();
3082     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
3083       return AK_FloatingPoint;
3084     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
3085       return AK_GeneralPurpose;
3086     if (T->isPointerTy())
3087       return AK_GeneralPurpose;
3088     return AK_Memory;
3089   }
3090 
3091   // For VarArg functions, store the argument shadow in an ABI-specific format
3092   // that corresponds to va_list layout.
3093   // We do this because Clang lowers va_arg in the frontend, and this pass
3094   // only sees the low level code that deals with va_list internals.
3095   // A much easier alternative (provided that Clang emits va_arg instructions)
3096   // would have been to associate each live instance of va_list with a copy of
3097   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
3098   // order.
3099   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3100     unsigned GpOffset = 0;
3101     unsigned FpOffset = AMD64GpEndOffset;
3102     unsigned OverflowOffset = AMD64FpEndOffset;
3103     const DataLayout &DL = F.getParent()->getDataLayout();
3104     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3105          ArgIt != End; ++ArgIt) {
3106       Value *A = *ArgIt;
3107       unsigned ArgNo = CS.getArgumentNo(ArgIt);
3108       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
3109       bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
3110       if (IsByVal) {
3111         // ByVal arguments always go to the overflow area.
3112         // Fixed arguments passed through the overflow area will be stepped
3113         // over by va_start, so don't count them towards the offset.
3114         if (IsFixed)
3115           continue;
3116         assert(A->getType()->isPointerTy());
3117         Type *RealTy = A->getType()->getPointerElementType();
3118         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
3119         Value *ShadowBase =
3120             getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
3121         OverflowOffset += alignTo(ArgSize, 8);
3122         Value *ShadowPtr, *OriginPtr;
3123         std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
3124             A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment);
3125 
3126         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
3127                          kShadowTLSAlignment, ArgSize);
3128       } else {
3129         ArgKind AK = classifyArgument(A);
3130         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
3131           AK = AK_Memory;
3132         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
3133           AK = AK_Memory;
3134         Value *ShadowBase;
3135         switch (AK) {
3136           case AK_GeneralPurpose:
3137             ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);
3138             GpOffset += 8;
3139             break;
3140           case AK_FloatingPoint:
3141             ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);
3142             FpOffset += 16;
3143             break;
3144           case AK_Memory:
3145             if (IsFixed)
3146               continue;
3147             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3148             ShadowBase =
3149                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
3150             OverflowOffset += alignTo(ArgSize, 8);
3151         }
3152         // Take fixed arguments into account for GpOffset and FpOffset,
3153         // but don't actually store shadows for them.
3154         if (IsFixed)
3155           continue;
3156         IRB.CreateAlignedStore(MSV.getShadow(A), ShadowBase,
3157                                kShadowTLSAlignment);
3158       }
3159     }
3160     Constant *OverflowSize =
3161       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
3162     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
3163   }
3164 
3165   /// \brief Compute the shadow address for a given va_arg.
3166   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3167                                    int ArgOffset) {
3168     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3169     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3170     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3171                               "_msarg");
3172   }
3173 
3174   void unpoisonVAListTagForInst(IntrinsicInst &I) {
3175     IRBuilder<> IRB(&I);
3176     Value *VAListTag = I.getArgOperand(0);
3177     Value *ShadowPtr, *OriginPtr;
3178     unsigned Alignment = 8;
3179     std::tie(ShadowPtr, OriginPtr) =
3180         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3181 
3182     // Unpoison the whole __va_list_tag.
3183     // FIXME: magic ABI constants.
3184     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3185                      /* size */ 24, Alignment, false);
3186     // We shouldn't need to zero out the origins, as they're only checked for
3187     // nonzero shadow.
3188   }
3189 
3190   void visitVAStartInst(VAStartInst &I) override {
3191     if (F.getCallingConv() == CallingConv::Win64)
3192       return;
3193     VAStartInstrumentationList.push_back(&I);
3194     unpoisonVAListTagForInst(I);
3195   }
3196 
3197   void visitVACopyInst(VACopyInst &I) override {
3198     if (F.getCallingConv() == CallingConv::Win64) return;
3199     unpoisonVAListTagForInst(I);
3200   }
3201 
3202   void finalizeInstrumentation() override {
3203     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
3204            "finalizeInstrumentation called twice");
3205     if (!VAStartInstrumentationList.empty()) {
3206       // If there is a va_start in this function, make a backup copy of
3207       // va_arg_tls somewhere in the function entry block.
3208       IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3209       VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3210       Value *CopySize =
3211         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
3212                       VAArgOverflowSize);
3213       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3214       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3215     }
3216 
3217     // Instrument va_start.
3218     // Copy va_list shadow from the backup copy of the TLS contents.
3219     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3220       CallInst *OrigInst = VAStartInstrumentationList[i];
3221       IRBuilder<> IRB(OrigInst->getNextNode());
3222       Value *VAListTag = OrigInst->getArgOperand(0);
3223 
3224       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
3225           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3226                         ConstantInt::get(MS.IntptrTy, 16)),
3227           PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
3228       Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
3229       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
3230       unsigned Alignment = 16;
3231       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
3232           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3233                                  Alignment);
3234       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
3235                        AMD64FpEndOffset);
3236       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
3237           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3238                         ConstantInt::get(MS.IntptrTy, 8)),
3239           PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
3240       Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
3241       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
3242       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
3243           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
3244                                  Alignment);
3245       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
3246                                              AMD64FpEndOffset);
3247       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
3248                        VAArgOverflowSize);
3249     }
3250   }
3251 };
3252 
3253 /// \brief MIPS64-specific implementation of VarArgHelper.
3254 struct VarArgMIPS64Helper : public VarArgHelper {
3255   Function &F;
3256   MemorySanitizer &MS;
3257   MemorySanitizerVisitor &MSV;
3258   Value *VAArgTLSCopy = nullptr;
3259   Value *VAArgSize = nullptr;
3260 
3261   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3262 
3263   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
3264                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3265 
3266   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3267     unsigned VAArgOffset = 0;
3268     const DataLayout &DL = F.getParent()->getDataLayout();
3269     for (CallSite::arg_iterator ArgIt = CS.arg_begin() +
3270          CS.getFunctionType()->getNumParams(), End = CS.arg_end();
3271          ArgIt != End; ++ArgIt) {
3272       Triple TargetTriple(F.getParent()->getTargetTriple());
3273       Value *A = *ArgIt;
3274       Value *Base;
3275       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3276       if (TargetTriple.getArch() == Triple::mips64) {
3277         // Adjusting the shadow for argument with size < 8 to match the placement
3278         // of bits in big endian system
3279         if (ArgSize < 8)
3280           VAArgOffset += (8 - ArgSize);
3281       }
3282       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset);
3283       VAArgOffset += ArgSize;
3284       VAArgOffset = alignTo(VAArgOffset, 8);
3285       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
3286     }
3287 
3288     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
3289     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
3290     // a new class member i.e. it is the total size of all VarArgs.
3291     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
3292   }
3293 
3294   /// \brief Compute the shadow address for a given va_arg.
3295   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3296                                    int ArgOffset) {
3297     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3298     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3299     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3300                               "_msarg");
3301   }
3302 
3303   void visitVAStartInst(VAStartInst &I) override {
3304     IRBuilder<> IRB(&I);
3305     VAStartInstrumentationList.push_back(&I);
3306     Value *VAListTag = I.getArgOperand(0);
3307     Value *ShadowPtr, *OriginPtr;
3308     unsigned Alignment = 8;
3309     std::tie(ShadowPtr, OriginPtr) =
3310         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3311     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3312                      /* size */ 8, Alignment, false);
3313   }
3314 
3315   void visitVACopyInst(VACopyInst &I) override {
3316     IRBuilder<> IRB(&I);
3317     VAStartInstrumentationList.push_back(&I);
3318     Value *VAListTag = I.getArgOperand(0);
3319     Value *ShadowPtr, *OriginPtr;
3320     unsigned Alignment = 8;
3321     std::tie(ShadowPtr, OriginPtr) =
3322         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3323     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3324                      /* size */ 8, Alignment, false);
3325   }
3326 
3327   void finalizeInstrumentation() override {
3328     assert(!VAArgSize && !VAArgTLSCopy &&
3329            "finalizeInstrumentation called twice");
3330     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3331     VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3332     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
3333                                     VAArgSize);
3334 
3335     if (!VAStartInstrumentationList.empty()) {
3336       // If there is a va_start in this function, make a backup copy of
3337       // va_arg_tls somewhere in the function entry block.
3338       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3339       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3340     }
3341 
3342     // Instrument va_start.
3343     // Copy va_list shadow from the backup copy of the TLS contents.
3344     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3345       CallInst *OrigInst = VAStartInstrumentationList[i];
3346       IRBuilder<> IRB(OrigInst->getNextNode());
3347       Value *VAListTag = OrigInst->getArgOperand(0);
3348       Value *RegSaveAreaPtrPtr =
3349           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3350                              PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
3351       Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
3352       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
3353       unsigned Alignment = 8;
3354       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
3355           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3356                                  Alignment);
3357       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
3358                        CopySize);
3359     }
3360   }
3361 };
3362 
3363 /// \brief AArch64-specific implementation of VarArgHelper.
3364 struct VarArgAArch64Helper : public VarArgHelper {
3365   static const unsigned kAArch64GrArgSize = 64;
3366   static const unsigned kAArch64VrArgSize = 128;
3367 
3368   static const unsigned AArch64GrBegOffset = 0;
3369   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
3370   // Make VR space aligned to 16 bytes.
3371   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
3372   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
3373                                              + kAArch64VrArgSize;
3374   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
3375 
3376   Function &F;
3377   MemorySanitizer &MS;
3378   MemorySanitizerVisitor &MSV;
3379   Value *VAArgTLSCopy = nullptr;
3380   Value *VAArgOverflowSize = nullptr;
3381 
3382   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3383 
3384   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
3385 
3386   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
3387                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3388 
3389   ArgKind classifyArgument(Value* arg) {
3390     Type *T = arg->getType();
3391     if (T->isFPOrFPVectorTy())
3392       return AK_FloatingPoint;
3393     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
3394         || (T->isPointerTy()))
3395       return AK_GeneralPurpose;
3396     return AK_Memory;
3397   }
3398 
3399   // The instrumentation stores the argument shadow in a non ABI-specific
3400   // format because it does not know which argument is named (since Clang,
3401   // like x86_64 case, lowers the va_args in the frontend and this pass only
3402   // sees the low level code that deals with va_list internals).
3403   // The first seven GR registers are saved in the first 56 bytes of the
3404   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
3405   // the remaining arguments.
3406   // Using constant offset within the va_arg TLS array allows fast copy
3407   // in the finalize instrumentation.
3408   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3409     unsigned GrOffset = AArch64GrBegOffset;
3410     unsigned VrOffset = AArch64VrBegOffset;
3411     unsigned OverflowOffset = AArch64VAEndOffset;
3412 
3413     const DataLayout &DL = F.getParent()->getDataLayout();
3414     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3415          ArgIt != End; ++ArgIt) {
3416       Value *A = *ArgIt;
3417       unsigned ArgNo = CS.getArgumentNo(ArgIt);
3418       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
3419       ArgKind AK = classifyArgument(A);
3420       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
3421         AK = AK_Memory;
3422       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
3423         AK = AK_Memory;
3424       Value *Base;
3425       switch (AK) {
3426         case AK_GeneralPurpose:
3427           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
3428           GrOffset += 8;
3429           break;
3430         case AK_FloatingPoint:
3431           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
3432           VrOffset += 16;
3433           break;
3434         case AK_Memory:
3435           // Don't count fixed arguments in the overflow area - va_start will
3436           // skip right over them.
3437           if (IsFixed)
3438             continue;
3439           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3440           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
3441           OverflowOffset += alignTo(ArgSize, 8);
3442           break;
3443       }
3444       // Count Gp/Vr fixed arguments to their respective offsets, but don't
3445       // bother to actually store a shadow.
3446       if (IsFixed)
3447         continue;
3448       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
3449     }
3450     Constant *OverflowSize =
3451       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
3452     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
3453   }
3454 
3455   /// Compute the shadow address for a given va_arg.
3456   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3457                                    int ArgOffset) {
3458     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3459     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3460     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3461                               "_msarg");
3462   }
3463 
3464   void visitVAStartInst(VAStartInst &I) override {
3465     IRBuilder<> IRB(&I);
3466     VAStartInstrumentationList.push_back(&I);
3467     Value *VAListTag = I.getArgOperand(0);
3468     Value *ShadowPtr, *OriginPtr;
3469     unsigned Alignment = 8;
3470     std::tie(ShadowPtr, OriginPtr) =
3471         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3472     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3473                      /* size */ 32, Alignment, false);
3474   }
3475 
3476   void visitVACopyInst(VACopyInst &I) override {
3477     IRBuilder<> IRB(&I);
3478     VAStartInstrumentationList.push_back(&I);
3479     Value *VAListTag = I.getArgOperand(0);
3480     Value *ShadowPtr, *OriginPtr;
3481     unsigned Alignment = 8;
3482     std::tie(ShadowPtr, OriginPtr) =
3483         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3484     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3485                      /* size */ 32, Alignment, false);
3486   }
3487 
3488   // Retrieve a va_list field of 'void*' size.
3489   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
3490     Value *SaveAreaPtrPtr =
3491       IRB.CreateIntToPtr(
3492         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3493                       ConstantInt::get(MS.IntptrTy, offset)),
3494         Type::getInt64PtrTy(*MS.C));
3495     return IRB.CreateLoad(SaveAreaPtrPtr);
3496   }
3497 
3498   // Retrieve a va_list field of 'int' size.
3499   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
3500     Value *SaveAreaPtr =
3501       IRB.CreateIntToPtr(
3502         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3503                       ConstantInt::get(MS.IntptrTy, offset)),
3504         Type::getInt32PtrTy(*MS.C));
3505     Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
3506     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
3507   }
3508 
3509   void finalizeInstrumentation() override {
3510     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
3511            "finalizeInstrumentation called twice");
3512     if (!VAStartInstrumentationList.empty()) {
3513       // If there is a va_start in this function, make a backup copy of
3514       // va_arg_tls somewhere in the function entry block.
3515       IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3516       VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3517       Value *CopySize =
3518         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
3519                       VAArgOverflowSize);
3520       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3521       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3522     }
3523 
3524     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
3525     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
3526 
3527     // Instrument va_start, copy va_list shadow from the backup copy of
3528     // the TLS contents.
3529     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3530       CallInst *OrigInst = VAStartInstrumentationList[i];
3531       IRBuilder<> IRB(OrigInst->getNextNode());
3532 
3533       Value *VAListTag = OrigInst->getArgOperand(0);
3534 
3535       // The variadic ABI for AArch64 creates two areas to save the incoming
3536       // argument registers (one for 64-bit general register xn-x7 and another
3537       // for 128-bit FP/SIMD vn-v7).
3538       // We need then to propagate the shadow arguments on both regions
3539       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
3540       // The remaning arguments are saved on shadow for 'va::stack'.
3541       // One caveat is it requires only to propagate the non-named arguments,
3542       // however on the call site instrumentation 'all' the arguments are
3543       // saved. So to copy the shadow values from the va_arg TLS array
3544       // we need to adjust the offset for both GR and VR fields based on
3545       // the __{gr,vr}_offs value (since they are stores based on incoming
3546       // named arguments).
3547 
3548       // Read the stack pointer from the va_list.
3549       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
3550 
3551       // Read both the __gr_top and __gr_off and add them up.
3552       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
3553       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
3554 
3555       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
3556 
3557       // Read both the __vr_top and __vr_off and add them up.
3558       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
3559       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
3560 
3561       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
3562 
3563       // It does not know how many named arguments is being used and, on the
3564       // callsite all the arguments were saved.  Since __gr_off is defined as
3565       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
3566       // argument by ignoring the bytes of shadow from named arguments.
3567       Value *GrRegSaveAreaShadowPtrOff =
3568         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
3569 
3570       Value *GrRegSaveAreaShadowPtr =
3571           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3572                                  /*Alignment*/ 8)
3573               .first;
3574 
3575       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
3576                                               GrRegSaveAreaShadowPtrOff);
3577       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
3578 
3579       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, 8, GrSrcPtr, 8, GrCopySize);
3580 
3581       // Again, but for FP/SIMD values.
3582       Value *VrRegSaveAreaShadowPtrOff =
3583           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
3584 
3585       Value *VrRegSaveAreaShadowPtr =
3586           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3587                                  /*Alignment*/ 8)
3588               .first;
3589 
3590       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
3591         IRB.getInt8Ty(),
3592         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
3593                               IRB.getInt32(AArch64VrBegOffset)),
3594         VrRegSaveAreaShadowPtrOff);
3595       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
3596 
3597       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, 8, VrSrcPtr, 8, VrCopySize);
3598 
3599       // And finally for remaining arguments.
3600       Value *StackSaveAreaShadowPtr =
3601           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
3602                                  /*Alignment*/ 16)
3603               .first;
3604 
3605       Value *StackSrcPtr =
3606         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
3607                               IRB.getInt32(AArch64VAEndOffset));
3608 
3609       IRB.CreateMemCpy(StackSaveAreaShadowPtr, 16, StackSrcPtr, 16,
3610                        VAArgOverflowSize);
3611     }
3612   }
3613 };
3614 
3615 /// \brief PowerPC64-specific implementation of VarArgHelper.
3616 struct VarArgPowerPC64Helper : public VarArgHelper {
3617   Function &F;
3618   MemorySanitizer &MS;
3619   MemorySanitizerVisitor &MSV;
3620   Value *VAArgTLSCopy = nullptr;
3621   Value *VAArgSize = nullptr;
3622 
3623   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3624 
3625   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
3626                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
3627 
3628   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3629     // For PowerPC, we need to deal with alignment of stack arguments -
3630     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
3631     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
3632     // and QPX vectors are aligned to 32 bytes.  For that reason, we
3633     // compute current offset from stack pointer (which is always properly
3634     // aligned), and offset for the first vararg, then subtract them.
3635     unsigned VAArgBase;
3636     Triple TargetTriple(F.getParent()->getTargetTriple());
3637     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
3638     // and 32 bytes for ABIv2.  This is usually determined by target
3639     // endianness, but in theory could be overriden by function attribute.
3640     // For simplicity, we ignore it here (it'd only matter for QPX vectors).
3641     if (TargetTriple.getArch() == Triple::ppc64)
3642       VAArgBase = 48;
3643     else
3644       VAArgBase = 32;
3645     unsigned VAArgOffset = VAArgBase;
3646     const DataLayout &DL = F.getParent()->getDataLayout();
3647     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3648          ArgIt != End; ++ArgIt) {
3649       Value *A = *ArgIt;
3650       unsigned ArgNo = CS.getArgumentNo(ArgIt);
3651       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
3652       bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
3653       if (IsByVal) {
3654         assert(A->getType()->isPointerTy());
3655         Type *RealTy = A->getType()->getPointerElementType();
3656         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
3657         uint64_t ArgAlign = CS.getParamAlignment(ArgNo);
3658         if (ArgAlign < 8)
3659           ArgAlign = 8;
3660         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
3661         if (!IsFixed) {
3662           Value *Base = getShadowPtrForVAArgument(RealTy, IRB,
3663                                                   VAArgOffset - VAArgBase);
3664           Value *AShadowPtr, *AOriginPtr;
3665           std::tie(AShadowPtr, AOriginPtr) = MSV.getShadowOriginPtr(
3666               A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment);
3667 
3668           IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
3669                            kShadowTLSAlignment, ArgSize);
3670         }
3671         VAArgOffset += alignTo(ArgSize, 8);
3672       } else {
3673         Value *Base;
3674         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3675         uint64_t ArgAlign = 8;
3676         if (A->getType()->isArrayTy()) {
3677           // Arrays are aligned to element size, except for long double
3678           // arrays, which are aligned to 8 bytes.
3679           Type *ElementTy = A->getType()->getArrayElementType();
3680           if (!ElementTy->isPPC_FP128Ty())
3681             ArgAlign = DL.getTypeAllocSize(ElementTy);
3682         } else if (A->getType()->isVectorTy()) {
3683           // Vectors are naturally aligned.
3684           ArgAlign = DL.getTypeAllocSize(A->getType());
3685         }
3686         if (ArgAlign < 8)
3687           ArgAlign = 8;
3688         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
3689         if (DL.isBigEndian()) {
3690           // Adjusting the shadow for argument with size < 8 to match the placement
3691           // of bits in big endian system
3692           if (ArgSize < 8)
3693             VAArgOffset += (8 - ArgSize);
3694         }
3695         if (!IsFixed) {
3696           Base = getShadowPtrForVAArgument(A->getType(), IRB,
3697                                            VAArgOffset - VAArgBase);
3698           IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
3699         }
3700         VAArgOffset += ArgSize;
3701         VAArgOffset = alignTo(VAArgOffset, 8);
3702       }
3703       if (IsFixed)
3704         VAArgBase = VAArgOffset;
3705     }
3706 
3707     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
3708                                                 VAArgOffset - VAArgBase);
3709     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
3710     // a new class member i.e. it is the total size of all VarArgs.
3711     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
3712   }
3713 
3714   /// \brief Compute the shadow address for a given va_arg.
3715   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3716                                    int ArgOffset) {
3717     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3718     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3719     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3720                               "_msarg");
3721   }
3722 
3723   void visitVAStartInst(VAStartInst &I) override {
3724     IRBuilder<> IRB(&I);
3725     VAStartInstrumentationList.push_back(&I);
3726     Value *VAListTag = I.getArgOperand(0);
3727     Value *ShadowPtr, *OriginPtr;
3728     unsigned Alignment = 8;
3729     std::tie(ShadowPtr, OriginPtr) =
3730         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3731     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3732                      /* size */ 8, Alignment, false);
3733   }
3734 
3735   void visitVACopyInst(VACopyInst &I) override {
3736     IRBuilder<> IRB(&I);
3737     Value *VAListTag = I.getArgOperand(0);
3738     Value *ShadowPtr, *OriginPtr;
3739     unsigned Alignment = 8;
3740     std::tie(ShadowPtr, OriginPtr) =
3741         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment);
3742     // Unpoison the whole __va_list_tag.
3743     // FIXME: magic ABI constants.
3744     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3745                      /* size */ 8, Alignment, false);
3746   }
3747 
3748   void finalizeInstrumentation() override {
3749     assert(!VAArgSize && !VAArgTLSCopy &&
3750            "finalizeInstrumentation called twice");
3751     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
3752     VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
3753     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
3754                                     VAArgSize);
3755 
3756     if (!VAStartInstrumentationList.empty()) {
3757       // If there is a va_start in this function, make a backup copy of
3758       // va_arg_tls somewhere in the function entry block.
3759       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3760       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3761     }
3762 
3763     // Instrument va_start.
3764     // Copy va_list shadow from the backup copy of the TLS contents.
3765     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3766       CallInst *OrigInst = VAStartInstrumentationList[i];
3767       IRBuilder<> IRB(OrigInst->getNextNode());
3768       Value *VAListTag = OrigInst->getArgOperand(0);
3769       Value *RegSaveAreaPtrPtr =
3770           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3771                              PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
3772       Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
3773       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
3774       unsigned Alignment = 8;
3775       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
3776           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3777                                  Alignment);
3778       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
3779                        CopySize);
3780     }
3781   }
3782 };
3783 
3784 /// \brief A no-op implementation of VarArgHelper.
3785 struct VarArgNoOpHelper : public VarArgHelper {
3786   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
3787                    MemorySanitizerVisitor &MSV) {}
3788 
3789   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {}
3790 
3791   void visitVAStartInst(VAStartInst &I) override {}
3792 
3793   void visitVACopyInst(VACopyInst &I) override {}
3794 
3795   void finalizeInstrumentation() override {}
3796 };
3797 
3798 } // end anonymous namespace
3799 
3800 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
3801                                         MemorySanitizerVisitor &Visitor) {
3802   // VarArg handling is only implemented on AMD64. False positives are possible
3803   // on other platforms.
3804   Triple TargetTriple(Func.getParent()->getTargetTriple());
3805   if (TargetTriple.getArch() == Triple::x86_64)
3806     return new VarArgAMD64Helper(Func, Msan, Visitor);
3807   else if (TargetTriple.getArch() == Triple::mips64 ||
3808            TargetTriple.getArch() == Triple::mips64el)
3809     return new VarArgMIPS64Helper(Func, Msan, Visitor);
3810   else if (TargetTriple.getArch() == Triple::aarch64)
3811     return new VarArgAArch64Helper(Func, Msan, Visitor);
3812   else if (TargetTriple.getArch() == Triple::ppc64 ||
3813            TargetTriple.getArch() == Triple::ppc64le)
3814     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
3815   else
3816     return new VarArgNoOpHelper(Func, Msan, Visitor);
3817 }
3818 
3819 bool MemorySanitizer::runOnFunction(Function &F) {
3820   if (&F == MsanCtorFunction)
3821     return false;
3822   MemorySanitizerVisitor Visitor(F, *this);
3823 
3824   // Clear out readonly/readnone attributes.
3825   AttrBuilder B;
3826   B.addAttribute(Attribute::ReadOnly)
3827     .addAttribute(Attribute::ReadNone);
3828   F.removeAttributes(AttributeList::FunctionIndex, B);
3829 
3830   return Visitor.runOnFunction();
3831 }
3832