1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwritting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///  - TLS variables are stored in a single per-task struct. A call to a
126 ///    function __msan_get_context_state() returning a pointer to that struct
127 ///    is inserted into every instrumented function before the entry block;
128 ///  - __msan_warning() takes a 32-bit origin parameter;
129 ///  - local variables are poisoned with __msan_poison_alloca() upon function
130 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
131 ///    function;
132 ///  - the pass doesn't declare any global variables or add global constructors
133 ///    to the translation unit.
134 ///
135 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
136 /// calls, making sure we're on the safe side wrt. possible false positives.
137 ///
138 ///  KernelMemorySanitizer only supports X86_64 at the moment.
139 ///
140 //===----------------------------------------------------------------------===//
141 
142 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
143 #include "llvm/ADT/APInt.h"
144 #include "llvm/ADT/ArrayRef.h"
145 #include "llvm/ADT/DepthFirstIterator.h"
146 #include "llvm/ADT/SmallSet.h"
147 #include "llvm/ADT/SmallString.h"
148 #include "llvm/ADT/SmallVector.h"
149 #include "llvm/ADT/StringExtras.h"
150 #include "llvm/ADT/StringRef.h"
151 #include "llvm/ADT/Triple.h"
152 #include "llvm/Analysis/TargetLibraryInfo.h"
153 #include "llvm/IR/Argument.h"
154 #include "llvm/IR/Attributes.h"
155 #include "llvm/IR/BasicBlock.h"
156 #include "llvm/IR/CallSite.h"
157 #include "llvm/IR/CallingConv.h"
158 #include "llvm/IR/Constant.h"
159 #include "llvm/IR/Constants.h"
160 #include "llvm/IR/DataLayout.h"
161 #include "llvm/IR/DerivedTypes.h"
162 #include "llvm/IR/Function.h"
163 #include "llvm/IR/GlobalValue.h"
164 #include "llvm/IR/GlobalVariable.h"
165 #include "llvm/IR/IRBuilder.h"
166 #include "llvm/IR/InlineAsm.h"
167 #include "llvm/IR/InstVisitor.h"
168 #include "llvm/IR/InstrTypes.h"
169 #include "llvm/IR/Instruction.h"
170 #include "llvm/IR/Instructions.h"
171 #include "llvm/IR/IntrinsicInst.h"
172 #include "llvm/IR/Intrinsics.h"
173 #include "llvm/IR/LLVMContext.h"
174 #include "llvm/IR/MDBuilder.h"
175 #include "llvm/IR/Module.h"
176 #include "llvm/IR/Type.h"
177 #include "llvm/IR/Value.h"
178 #include "llvm/IR/ValueMap.h"
179 #include "llvm/InitializePasses.h"
180 #include "llvm/Pass.h"
181 #include "llvm/Support/AtomicOrdering.h"
182 #include "llvm/Support/Casting.h"
183 #include "llvm/Support/CommandLine.h"
184 #include "llvm/Support/Compiler.h"
185 #include "llvm/Support/Debug.h"
186 #include "llvm/Support/ErrorHandling.h"
187 #include "llvm/Support/MathExtras.h"
188 #include "llvm/Support/raw_ostream.h"
189 #include "llvm/Transforms/Instrumentation.h"
190 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
191 #include "llvm/Transforms/Utils/Local.h"
192 #include "llvm/Transforms/Utils/ModuleUtils.h"
193 #include <algorithm>
194 #include <cassert>
195 #include <cstddef>
196 #include <cstdint>
197 #include <memory>
198 #include <string>
199 #include <tuple>
200 
201 using namespace llvm;
202 
203 #define DEBUG_TYPE "msan"
204 
205 static const unsigned kOriginSize = 4;
206 static const unsigned kMinOriginAlignment = 4;
207 static const unsigned kShadowTLSAlignment = 8;
208 
209 // These constants must be kept in sync with the ones in msan.h.
210 static const unsigned kParamTLSSize = 800;
211 static const unsigned kRetvalTLSSize = 800;
212 
213 // Accesses sizes are powers of two: 1, 2, 4, 8.
214 static const size_t kNumberOfAccessSizes = 4;
215 
216 /// Track origins of uninitialized values.
217 ///
218 /// Adds a section to MemorySanitizer report that points to the allocation
219 /// (stack or heap) the uninitialized bits came from originally.
220 static cl::opt<int> ClTrackOrigins("msan-track-origins",
221        cl::desc("Track origins (allocation sites) of poisoned memory"),
222        cl::Hidden, cl::init(0));
223 
224 static cl::opt<bool> ClKeepGoing("msan-keep-going",
225        cl::desc("keep going after reporting a UMR"),
226        cl::Hidden, cl::init(false));
227 
228 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
229        cl::desc("poison uninitialized stack variables"),
230        cl::Hidden, cl::init(true));
231 
232 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
233        cl::desc("poison uninitialized stack variables with a call"),
234        cl::Hidden, cl::init(false));
235 
236 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
237        cl::desc("poison uninitialized stack variables with the given pattern"),
238        cl::Hidden, cl::init(0xff));
239 
240 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
241        cl::desc("poison undef temps"),
242        cl::Hidden, cl::init(true));
243 
244 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
245        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
246        cl::Hidden, cl::init(true));
247 
248 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
249        cl::desc("exact handling of relational integer ICmp"),
250        cl::Hidden, cl::init(false));
251 
252 static cl::opt<bool> ClHandleLifetimeIntrinsics(
253     "msan-handle-lifetime-intrinsics",
254     cl::desc(
255         "when possible, poison scoped variables at the beginning of the scope "
256         "(slower, but more precise)"),
257     cl::Hidden, cl::init(true));
258 
259 // When compiling the Linux kernel, we sometimes see false positives related to
260 // MSan being unable to understand that inline assembly calls may initialize
261 // local variables.
262 // This flag makes the compiler conservatively unpoison every memory location
263 // passed into an assembly call. Note that this may cause false positives.
264 // Because it's impossible to figure out the array sizes, we can only unpoison
265 // the first sizeof(type) bytes for each type* pointer.
266 // The instrumentation is only enabled in KMSAN builds, and only if
267 // -msan-handle-asm-conservative is on. This is done because we may want to
268 // quickly disable assembly instrumentation when it breaks.
269 static cl::opt<bool> ClHandleAsmConservative(
270     "msan-handle-asm-conservative",
271     cl::desc("conservative handling of inline assembly"), cl::Hidden,
272     cl::init(true));
273 
274 // This flag controls whether we check the shadow of the address
275 // operand of load or store. Such bugs are very rare, since load from
276 // a garbage address typically results in SEGV, but still happen
277 // (e.g. only lower bits of address are garbage, or the access happens
278 // early at program startup where malloc-ed memory is more likely to
279 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
280 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
281        cl::desc("report accesses through a pointer which has poisoned shadow"),
282        cl::Hidden, cl::init(true));
283 
284 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
285        cl::desc("print out instructions with default strict semantics"),
286        cl::Hidden, cl::init(false));
287 
288 static cl::opt<int> ClInstrumentationWithCallThreshold(
289     "msan-instrumentation-with-call-threshold",
290     cl::desc(
291         "If the function being instrumented requires more than "
292         "this number of checks and origin stores, use callbacks instead of "
293         "inline checks (-1 means never use callbacks)."),
294     cl::Hidden, cl::init(3500));
295 
296 static cl::opt<bool>
297     ClEnableKmsan("msan-kernel",
298                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
299                   cl::Hidden, cl::init(false));
300 
301 // This is an experiment to enable handling of cases where shadow is a non-zero
302 // compile-time constant. For some unexplainable reason they were silently
303 // ignored in the instrumentation.
304 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
305        cl::desc("Insert checks for constant shadow values"),
306        cl::Hidden, cl::init(false));
307 
308 // This is off by default because of a bug in gold:
309 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
310 static cl::opt<bool> ClWithComdat("msan-with-comdat",
311        cl::desc("Place MSan constructors in comdat sections"),
312        cl::Hidden, cl::init(false));
313 
314 // These options allow to specify custom memory map parameters
315 // See MemoryMapParams for details.
316 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
317                                    cl::desc("Define custom MSan AndMask"),
318                                    cl::Hidden, cl::init(0));
319 
320 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
321                                    cl::desc("Define custom MSan XorMask"),
322                                    cl::Hidden, cl::init(0));
323 
324 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
325                                       cl::desc("Define custom MSan ShadowBase"),
326                                       cl::Hidden, cl::init(0));
327 
328 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
329                                       cl::desc("Define custom MSan OriginBase"),
330                                       cl::Hidden, cl::init(0));
331 
332 static const char *const kMsanModuleCtorName = "msan.module_ctor";
333 static const char *const kMsanInitName = "__msan_init";
334 
335 namespace {
336 
337 // Memory map parameters used in application-to-shadow address calculation.
338 // Offset = (Addr & ~AndMask) ^ XorMask
339 // Shadow = ShadowBase + Offset
340 // Origin = OriginBase + Offset
341 struct MemoryMapParams {
342   uint64_t AndMask;
343   uint64_t XorMask;
344   uint64_t ShadowBase;
345   uint64_t OriginBase;
346 };
347 
348 struct PlatformMemoryMapParams {
349   const MemoryMapParams *bits32;
350   const MemoryMapParams *bits64;
351 };
352 
353 } // end anonymous namespace
354 
355 // i386 Linux
356 static const MemoryMapParams Linux_I386_MemoryMapParams = {
357   0x000080000000,  // AndMask
358   0,               // XorMask (not used)
359   0,               // ShadowBase (not used)
360   0x000040000000,  // OriginBase
361 };
362 
363 // x86_64 Linux
364 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
365 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
366   0x400000000000,  // AndMask
367   0,               // XorMask (not used)
368   0,               // ShadowBase (not used)
369   0x200000000000,  // OriginBase
370 #else
371   0,               // AndMask (not used)
372   0x500000000000,  // XorMask
373   0,               // ShadowBase (not used)
374   0x100000000000,  // OriginBase
375 #endif
376 };
377 
378 // mips64 Linux
379 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
380   0,               // AndMask (not used)
381   0x008000000000,  // XorMask
382   0,               // ShadowBase (not used)
383   0x002000000000,  // OriginBase
384 };
385 
386 // ppc64 Linux
387 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
388   0xE00000000000,  // AndMask
389   0x100000000000,  // XorMask
390   0x080000000000,  // ShadowBase
391   0x1C0000000000,  // OriginBase
392 };
393 
394 // aarch64 Linux
395 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
396   0,               // AndMask (not used)
397   0x06000000000,   // XorMask
398   0,               // ShadowBase (not used)
399   0x01000000000,   // OriginBase
400 };
401 
402 // i386 FreeBSD
403 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
404   0x000180000000,  // AndMask
405   0x000040000000,  // XorMask
406   0x000020000000,  // ShadowBase
407   0x000700000000,  // OriginBase
408 };
409 
410 // x86_64 FreeBSD
411 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
412   0xc00000000000,  // AndMask
413   0x200000000000,  // XorMask
414   0x100000000000,  // ShadowBase
415   0x380000000000,  // OriginBase
416 };
417 
418 // x86_64 NetBSD
419 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
420   0,               // AndMask
421   0x500000000000,  // XorMask
422   0,               // ShadowBase
423   0x100000000000,  // OriginBase
424 };
425 
426 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
427   &Linux_I386_MemoryMapParams,
428   &Linux_X86_64_MemoryMapParams,
429 };
430 
431 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
432   nullptr,
433   &Linux_MIPS64_MemoryMapParams,
434 };
435 
436 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
437   nullptr,
438   &Linux_PowerPC64_MemoryMapParams,
439 };
440 
441 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
442   nullptr,
443   &Linux_AArch64_MemoryMapParams,
444 };
445 
446 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
447   &FreeBSD_I386_MemoryMapParams,
448   &FreeBSD_X86_64_MemoryMapParams,
449 };
450 
451 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
452   nullptr,
453   &NetBSD_X86_64_MemoryMapParams,
454 };
455 
456 namespace {
457 
458 /// Instrument functions of a module to detect uninitialized reads.
459 ///
460 /// Instantiating MemorySanitizer inserts the msan runtime library API function
461 /// declarations into the module if they don't exist already. Instantiating
462 /// ensures the __msan_init function is in the list of global constructors for
463 /// the module.
464 class MemorySanitizer {
465 public:
466   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
467       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
468         Recover(Options.Recover) {
469     initializeModule(M);
470   }
471 
472   // MSan cannot be moved or copied because of MapParams.
473   MemorySanitizer(MemorySanitizer &&) = delete;
474   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
475   MemorySanitizer(const MemorySanitizer &) = delete;
476   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
477 
478   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
479 
480 private:
481   friend struct MemorySanitizerVisitor;
482   friend struct VarArgAMD64Helper;
483   friend struct VarArgMIPS64Helper;
484   friend struct VarArgAArch64Helper;
485   friend struct VarArgPowerPC64Helper;
486 
487   void initializeModule(Module &M);
488   void initializeCallbacks(Module &M);
489   void createKernelApi(Module &M);
490   void createUserspaceApi(Module &M);
491 
492   /// True if we're compiling the Linux kernel.
493   bool CompileKernel;
494   /// Track origins (allocation points) of uninitialized values.
495   int TrackOrigins;
496   bool Recover;
497 
498   LLVMContext *C;
499   Type *IntptrTy;
500   Type *OriginTy;
501 
502   // XxxTLS variables represent the per-thread state in MSan and per-task state
503   // in KMSAN.
504   // For the userspace these point to thread-local globals. In the kernel land
505   // they point to the members of a per-task struct obtained via a call to
506   // __msan_get_context_state().
507 
508   /// Thread-local shadow storage for function parameters.
509   Value *ParamTLS;
510 
511   /// Thread-local origin storage for function parameters.
512   Value *ParamOriginTLS;
513 
514   /// Thread-local shadow storage for function return value.
515   Value *RetvalTLS;
516 
517   /// Thread-local origin storage for function return value.
518   Value *RetvalOriginTLS;
519 
520   /// Thread-local shadow storage for in-register va_arg function
521   /// parameters (x86_64-specific).
522   Value *VAArgTLS;
523 
524   /// Thread-local shadow storage for in-register va_arg function
525   /// parameters (x86_64-specific).
526   Value *VAArgOriginTLS;
527 
528   /// Thread-local shadow storage for va_arg overflow area
529   /// (x86_64-specific).
530   Value *VAArgOverflowSizeTLS;
531 
532   /// Thread-local space used to pass origin value to the UMR reporting
533   /// function.
534   Value *OriginTLS;
535 
536   /// Are the instrumentation callbacks set up?
537   bool CallbacksInitialized = false;
538 
539   /// The run-time callback to print a warning.
540   FunctionCallee WarningFn;
541 
542   // These arrays are indexed by log2(AccessSize).
543   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
544   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
545 
546   /// Run-time helper that generates a new origin value for a stack
547   /// allocation.
548   FunctionCallee MsanSetAllocaOrigin4Fn;
549 
550   /// Run-time helper that poisons stack on function entry.
551   FunctionCallee MsanPoisonStackFn;
552 
553   /// Run-time helper that records a store (or any event) of an
554   /// uninitialized value and returns an updated origin id encoding this info.
555   FunctionCallee MsanChainOriginFn;
556 
557   /// MSan runtime replacements for memmove, memcpy and memset.
558   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
559 
560   /// KMSAN callback for task-local function argument shadow.
561   StructType *MsanContextStateTy;
562   FunctionCallee MsanGetContextStateFn;
563 
564   /// Functions for poisoning/unpoisoning local variables
565   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
566 
567   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
568   /// pointers.
569   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
570   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
571   FunctionCallee MsanMetadataPtrForStore_1_8[4];
572   FunctionCallee MsanInstrumentAsmStoreFn;
573 
574   /// Helper to choose between different MsanMetadataPtrXxx().
575   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
576 
577   /// Memory map parameters used in application-to-shadow calculation.
578   const MemoryMapParams *MapParams;
579 
580   /// Custom memory map parameters used when -msan-shadow-base or
581   // -msan-origin-base is provided.
582   MemoryMapParams CustomMapParams;
583 
584   MDNode *ColdCallWeights;
585 
586   /// Branch weights for origin store.
587   MDNode *OriginStoreWeights;
588 
589   /// An empty volatile inline asm that prevents callback merge.
590   InlineAsm *EmptyAsm;
591 };
592 
593 void insertModuleCtor(Module &M) {
594   getOrCreateSanitizerCtorAndInitFunctions(
595       M, kMsanModuleCtorName, kMsanInitName,
596       /*InitArgTypes=*/{},
597       /*InitArgs=*/{},
598       // This callback is invoked when the functions are created the first
599       // time. Hook them into the global ctors list in that case:
600       [&](Function *Ctor, FunctionCallee) {
601         if (!ClWithComdat) {
602           appendToGlobalCtors(M, Ctor, 0);
603           return;
604         }
605         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
606         Ctor->setComdat(MsanCtorComdat);
607         appendToGlobalCtors(M, Ctor, 0, Ctor);
608       });
609 }
610 
611 /// A legacy function pass for msan instrumentation.
612 ///
613 /// Instruments functions to detect unitialized reads.
614 struct MemorySanitizerLegacyPass : public FunctionPass {
615   // Pass identification, replacement for typeid.
616   static char ID;
617 
618   MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
619       : FunctionPass(ID), Options(Options) {}
620   StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
621 
622   void getAnalysisUsage(AnalysisUsage &AU) const override {
623     AU.addRequired<TargetLibraryInfoWrapperPass>();
624   }
625 
626   bool runOnFunction(Function &F) override {
627     return MSan->sanitizeFunction(
628         F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
629   }
630   bool doInitialization(Module &M) override;
631 
632   Optional<MemorySanitizer> MSan;
633   MemorySanitizerOptions Options;
634 };
635 
636 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
637   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
638 }
639 
640 } // end anonymous namespace
641 
642 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
643     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
644       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
645       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
646 
647 PreservedAnalyses MemorySanitizerPass::run(Function &F,
648                                            FunctionAnalysisManager &FAM) {
649   MemorySanitizer Msan(*F.getParent(), Options);
650   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
651     return PreservedAnalyses::none();
652   return PreservedAnalyses::all();
653 }
654 
655 PreservedAnalyses MemorySanitizerPass::run(Module &M,
656                                            ModuleAnalysisManager &AM) {
657   if (Options.Kernel)
658     return PreservedAnalyses::all();
659   insertModuleCtor(M);
660   return PreservedAnalyses::none();
661 }
662 
663 char MemorySanitizerLegacyPass::ID = 0;
664 
665 INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
666                       "MemorySanitizer: detects uninitialized reads.", false,
667                       false)
668 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
669 INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
670                     "MemorySanitizer: detects uninitialized reads.", false,
671                     false)
672 
673 FunctionPass *
674 llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
675   return new MemorySanitizerLegacyPass(Options);
676 }
677 
678 /// Create a non-const global initialized with the given string.
679 ///
680 /// Creates a writable global for Str so that we can pass it to the
681 /// run-time lib. Runtime uses first 4 bytes of the string to store the
682 /// frame ID, so the string needs to be mutable.
683 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
684                                                             StringRef Str) {
685   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
686   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
687                             GlobalValue::PrivateLinkage, StrConst, "");
688 }
689 
690 /// Create KMSAN API callbacks.
691 void MemorySanitizer::createKernelApi(Module &M) {
692   IRBuilder<> IRB(*C);
693 
694   // These will be initialized in insertKmsanPrologue().
695   RetvalTLS = nullptr;
696   RetvalOriginTLS = nullptr;
697   ParamTLS = nullptr;
698   ParamOriginTLS = nullptr;
699   VAArgTLS = nullptr;
700   VAArgOriginTLS = nullptr;
701   VAArgOverflowSizeTLS = nullptr;
702   // OriginTLS is unused in the kernel.
703   OriginTLS = nullptr;
704 
705   // __msan_warning() in the kernel takes an origin.
706   WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
707                                     IRB.getInt32Ty());
708   // Requests the per-task context state (kmsan_context_state*) from the
709   // runtime library.
710   MsanContextStateTy = StructType::get(
711       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
712       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
713       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
714       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
715       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
716       OriginTy);
717   MsanGetContextStateFn = M.getOrInsertFunction(
718       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
719 
720   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
721                                 PointerType::get(IRB.getInt32Ty(), 0));
722 
723   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
724     std::string name_load =
725         "__msan_metadata_ptr_for_load_" + std::to_string(size);
726     std::string name_store =
727         "__msan_metadata_ptr_for_store_" + std::to_string(size);
728     MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
729         name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
730     MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
731         name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
732   }
733 
734   MsanMetadataPtrForLoadN = M.getOrInsertFunction(
735       "__msan_metadata_ptr_for_load_n", RetTy,
736       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
737   MsanMetadataPtrForStoreN = M.getOrInsertFunction(
738       "__msan_metadata_ptr_for_store_n", RetTy,
739       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
740 
741   // Functions for poisoning and unpoisoning memory.
742   MsanPoisonAllocaFn =
743       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
744                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
745   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
746       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
747 }
748 
749 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
750   return M.getOrInsertGlobal(Name, Ty, [&] {
751     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
752                               nullptr, Name, nullptr,
753                               GlobalVariable::InitialExecTLSModel);
754   });
755 }
756 
757 /// Insert declarations for userspace-specific functions and globals.
758 void MemorySanitizer::createUserspaceApi(Module &M) {
759   IRBuilder<> IRB(*C);
760   // Create the callback.
761   // FIXME: this function should have "Cold" calling conv,
762   // which is not yet implemented.
763   StringRef WarningFnName = Recover ? "__msan_warning"
764                                     : "__msan_warning_noreturn";
765   WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
766 
767   // Create the global TLS variables.
768   RetvalTLS =
769       getOrInsertGlobal(M, "__msan_retval_tls",
770                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
771 
772   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
773 
774   ParamTLS =
775       getOrInsertGlobal(M, "__msan_param_tls",
776                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
777 
778   ParamOriginTLS =
779       getOrInsertGlobal(M, "__msan_param_origin_tls",
780                         ArrayType::get(OriginTy, kParamTLSSize / 4));
781 
782   VAArgTLS =
783       getOrInsertGlobal(M, "__msan_va_arg_tls",
784                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
785 
786   VAArgOriginTLS =
787       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
788                         ArrayType::get(OriginTy, kParamTLSSize / 4));
789 
790   VAArgOverflowSizeTLS =
791       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
792   OriginTLS = getOrInsertGlobal(M, "__msan_origin_tls", IRB.getInt32Ty());
793 
794   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
795        AccessSizeIndex++) {
796     unsigned AccessSize = 1 << AccessSizeIndex;
797     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
798     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
799         FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
800         IRB.getInt32Ty());
801 
802     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
803     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
804         FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
805         IRB.getInt8PtrTy(), IRB.getInt32Ty());
806   }
807 
808   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
809     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
810     IRB.getInt8PtrTy(), IntptrTy);
811   MsanPoisonStackFn =
812       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
813                             IRB.getInt8PtrTy(), IntptrTy);
814 }
815 
816 /// Insert extern declaration of runtime-provided functions and globals.
817 void MemorySanitizer::initializeCallbacks(Module &M) {
818   // Only do this once.
819   if (CallbacksInitialized)
820     return;
821 
822   IRBuilder<> IRB(*C);
823   // Initialize callbacks that are common for kernel and userspace
824   // instrumentation.
825   MsanChainOriginFn = M.getOrInsertFunction(
826     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
827   MemmoveFn = M.getOrInsertFunction(
828     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
829     IRB.getInt8PtrTy(), IntptrTy);
830   MemcpyFn = M.getOrInsertFunction(
831     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
832     IntptrTy);
833   MemsetFn = M.getOrInsertFunction(
834     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
835     IntptrTy);
836   // We insert an empty inline asm after __msan_report* to avoid callback merge.
837   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
838                             StringRef(""), StringRef(""),
839                             /*hasSideEffects=*/true);
840 
841   MsanInstrumentAsmStoreFn =
842       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
843                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
844 
845   if (CompileKernel) {
846     createKernelApi(M);
847   } else {
848     createUserspaceApi(M);
849   }
850   CallbacksInitialized = true;
851 }
852 
853 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
854                                                              int size) {
855   FunctionCallee *Fns =
856       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
857   switch (size) {
858   case 1:
859     return Fns[0];
860   case 2:
861     return Fns[1];
862   case 4:
863     return Fns[2];
864   case 8:
865     return Fns[3];
866   default:
867     return nullptr;
868   }
869 }
870 
871 /// Module-level initialization.
872 ///
873 /// inserts a call to __msan_init to the module's constructor list.
874 void MemorySanitizer::initializeModule(Module &M) {
875   auto &DL = M.getDataLayout();
876 
877   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
878   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
879   // Check the overrides first
880   if (ShadowPassed || OriginPassed) {
881     CustomMapParams.AndMask = ClAndMask;
882     CustomMapParams.XorMask = ClXorMask;
883     CustomMapParams.ShadowBase = ClShadowBase;
884     CustomMapParams.OriginBase = ClOriginBase;
885     MapParams = &CustomMapParams;
886   } else {
887     Triple TargetTriple(M.getTargetTriple());
888     switch (TargetTriple.getOS()) {
889       case Triple::FreeBSD:
890         switch (TargetTriple.getArch()) {
891           case Triple::x86_64:
892             MapParams = FreeBSD_X86_MemoryMapParams.bits64;
893             break;
894           case Triple::x86:
895             MapParams = FreeBSD_X86_MemoryMapParams.bits32;
896             break;
897           default:
898             report_fatal_error("unsupported architecture");
899         }
900         break;
901       case Triple::NetBSD:
902         switch (TargetTriple.getArch()) {
903           case Triple::x86_64:
904             MapParams = NetBSD_X86_MemoryMapParams.bits64;
905             break;
906           default:
907             report_fatal_error("unsupported architecture");
908         }
909         break;
910       case Triple::Linux:
911         switch (TargetTriple.getArch()) {
912           case Triple::x86_64:
913             MapParams = Linux_X86_MemoryMapParams.bits64;
914             break;
915           case Triple::x86:
916             MapParams = Linux_X86_MemoryMapParams.bits32;
917             break;
918           case Triple::mips64:
919           case Triple::mips64el:
920             MapParams = Linux_MIPS_MemoryMapParams.bits64;
921             break;
922           case Triple::ppc64:
923           case Triple::ppc64le:
924             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
925             break;
926           case Triple::aarch64:
927           case Triple::aarch64_be:
928             MapParams = Linux_ARM_MemoryMapParams.bits64;
929             break;
930           default:
931             report_fatal_error("unsupported architecture");
932         }
933         break;
934       default:
935         report_fatal_error("unsupported operating system");
936     }
937   }
938 
939   C = &(M.getContext());
940   IRBuilder<> IRB(*C);
941   IntptrTy = IRB.getIntPtrTy(DL);
942   OriginTy = IRB.getInt32Ty();
943 
944   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
945   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
946 
947   if (!CompileKernel) {
948     if (TrackOrigins)
949       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
950         return new GlobalVariable(
951             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
952             IRB.getInt32(TrackOrigins), "__msan_track_origins");
953       });
954 
955     if (Recover)
956       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
957         return new GlobalVariable(M, IRB.getInt32Ty(), true,
958                                   GlobalValue::WeakODRLinkage,
959                                   IRB.getInt32(Recover), "__msan_keep_going");
960       });
961 }
962 }
963 
964 bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
965   if (!Options.Kernel)
966     insertModuleCtor(M);
967   MSan.emplace(M, Options);
968   return true;
969 }
970 
971 namespace {
972 
973 /// A helper class that handles instrumentation of VarArg
974 /// functions on a particular platform.
975 ///
976 /// Implementations are expected to insert the instrumentation
977 /// necessary to propagate argument shadow through VarArg function
978 /// calls. Visit* methods are called during an InstVisitor pass over
979 /// the function, and should avoid creating new basic blocks. A new
980 /// instance of this class is created for each instrumented function.
981 struct VarArgHelper {
982   virtual ~VarArgHelper() = default;
983 
984   /// Visit a CallSite.
985   virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
986 
987   /// Visit a va_start call.
988   virtual void visitVAStartInst(VAStartInst &I) = 0;
989 
990   /// Visit a va_copy call.
991   virtual void visitVACopyInst(VACopyInst &I) = 0;
992 
993   /// Finalize function instrumentation.
994   ///
995   /// This method is called after visiting all interesting (see above)
996   /// instructions in a function.
997   virtual void finalizeInstrumentation() = 0;
998 };
999 
1000 struct MemorySanitizerVisitor;
1001 
1002 } // end anonymous namespace
1003 
1004 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1005                                         MemorySanitizerVisitor &Visitor);
1006 
1007 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
1008   if (TypeSize <= 8) return 0;
1009   return Log2_32_Ceil((TypeSize + 7) / 8);
1010 }
1011 
1012 namespace {
1013 
1014 /// This class does all the work for a given function. Store and Load
1015 /// instructions store and load corresponding shadow and origin
1016 /// values. Most instructions propagate shadow from arguments to their
1017 /// return values. Certain instructions (most importantly, BranchInst)
1018 /// test their argument shadow and print reports (with a runtime call) if it's
1019 /// non-zero.
1020 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1021   Function &F;
1022   MemorySanitizer &MS;
1023   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1024   ValueMap<Value*, Value*> ShadowMap, OriginMap;
1025   std::unique_ptr<VarArgHelper> VAHelper;
1026   const TargetLibraryInfo *TLI;
1027   BasicBlock *ActualFnStart;
1028 
1029   // The following flags disable parts of MSan instrumentation based on
1030   // blacklist contents and command-line options.
1031   bool InsertChecks;
1032   bool PropagateShadow;
1033   bool PoisonStack;
1034   bool PoisonUndef;
1035   bool CheckReturnValue;
1036 
1037   struct ShadowOriginAndInsertPoint {
1038     Value *Shadow;
1039     Value *Origin;
1040     Instruction *OrigIns;
1041 
1042     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1043       : Shadow(S), Origin(O), OrigIns(I) {}
1044   };
1045   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1046   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1047   SmallSet<AllocaInst *, 16> AllocaSet;
1048   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1049   SmallVector<StoreInst *, 16> StoreList;
1050 
1051   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1052                          const TargetLibraryInfo &TLI)
1053       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1054     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
1055     InsertChecks = SanitizeFunction;
1056     PropagateShadow = SanitizeFunction;
1057     PoisonStack = SanitizeFunction && ClPoisonStack;
1058     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1059     // FIXME: Consider using SpecialCaseList to specify a list of functions that
1060     // must always return fully initialized values. For now, we hardcode "main".
1061     CheckReturnValue = SanitizeFunction && (F.getName() == "main");
1062 
1063     MS.initializeCallbacks(*F.getParent());
1064     if (MS.CompileKernel)
1065       ActualFnStart = insertKmsanPrologue(F);
1066     else
1067       ActualFnStart = &F.getEntryBlock();
1068 
1069     LLVM_DEBUG(if (!InsertChecks) dbgs()
1070                << "MemorySanitizer is not inserting checks into '"
1071                << F.getName() << "'\n");
1072   }
1073 
1074   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1075     if (MS.TrackOrigins <= 1) return V;
1076     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1077   }
1078 
1079   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1080     const DataLayout &DL = F.getParent()->getDataLayout();
1081     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1082     if (IntptrSize == kOriginSize) return Origin;
1083     assert(IntptrSize == kOriginSize * 2);
1084     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1085     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1086   }
1087 
1088   /// Fill memory range with the given origin value.
1089   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1090                    unsigned Size, unsigned Alignment) {
1091     const DataLayout &DL = F.getParent()->getDataLayout();
1092     unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
1093     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1094     assert(IntptrAlignment >= kMinOriginAlignment);
1095     assert(IntptrSize >= kOriginSize);
1096 
1097     unsigned Ofs = 0;
1098     unsigned CurrentAlignment = Alignment;
1099     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1100       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1101       Value *IntptrOriginPtr =
1102           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1103       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1104         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1105                        : IntptrOriginPtr;
1106         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1107         Ofs += IntptrSize / kOriginSize;
1108         CurrentAlignment = IntptrAlignment;
1109       }
1110     }
1111 
1112     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1113       Value *GEP =
1114           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1115       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1116       CurrentAlignment = kMinOriginAlignment;
1117     }
1118   }
1119 
1120   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1121                    Value *OriginPtr, unsigned Alignment, bool AsCall) {
1122     const DataLayout &DL = F.getParent()->getDataLayout();
1123     unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1124     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
1125     if (Shadow->getType()->isAggregateType()) {
1126       paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1127                   OriginAlignment);
1128     } else {
1129       Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
1130       Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
1131       if (ConstantShadow) {
1132         if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
1133           paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1134                       OriginAlignment);
1135         return;
1136       }
1137 
1138       unsigned TypeSizeInBits =
1139           DL.getTypeSizeInBits(ConvertedShadow->getType());
1140       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1141       if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1142         FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1143         Value *ConvertedShadow2 = IRB.CreateZExt(
1144             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1145         IRB.CreateCall(Fn, {ConvertedShadow2,
1146                             IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
1147                             Origin});
1148       } else {
1149         Value *Cmp = IRB.CreateICmpNE(
1150             ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
1151         Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1152             Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1153         IRBuilder<> IRBNew(CheckTerm);
1154         paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1155                     OriginAlignment);
1156       }
1157     }
1158   }
1159 
1160   void materializeStores(bool InstrumentWithCalls) {
1161     for (StoreInst *SI : StoreList) {
1162       IRBuilder<> IRB(SI);
1163       Value *Val = SI->getValueOperand();
1164       Value *Addr = SI->getPointerOperand();
1165       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1166       Value *ShadowPtr, *OriginPtr;
1167       Type *ShadowTy = Shadow->getType();
1168       unsigned Alignment = SI->getAlignment();
1169       unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1170       std::tie(ShadowPtr, OriginPtr) =
1171           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1172 
1173       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1174       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1175       (void)NewSI;
1176 
1177       if (SI->isAtomic())
1178         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1179 
1180       if (MS.TrackOrigins && !SI->isAtomic())
1181         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1182                     OriginAlignment, InstrumentWithCalls);
1183     }
1184   }
1185 
1186   /// Helper function to insert a warning at IRB's current insert point.
1187   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1188     if (!Origin)
1189       Origin = (Value *)IRB.getInt32(0);
1190     if (MS.CompileKernel) {
1191       IRB.CreateCall(MS.WarningFn, Origin);
1192     } else {
1193       if (MS.TrackOrigins) {
1194         IRB.CreateStore(Origin, MS.OriginTLS);
1195       }
1196       IRB.CreateCall(MS.WarningFn, {});
1197     }
1198     IRB.CreateCall(MS.EmptyAsm, {});
1199     // FIXME: Insert UnreachableInst if !MS.Recover?
1200     // This may invalidate some of the following checks and needs to be done
1201     // at the very end.
1202   }
1203 
1204   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
1205                            bool AsCall) {
1206     IRBuilder<> IRB(OrigIns);
1207     LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
1208     Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
1209     LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
1210 
1211     Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
1212     if (ConstantShadow) {
1213       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
1214         insertWarningFn(IRB, Origin);
1215       }
1216       return;
1217     }
1218 
1219     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
1220 
1221     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1222     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1223     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1224       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1225       Value *ConvertedShadow2 =
1226           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1227       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
1228                                                 ? Origin
1229                                                 : (Value *)IRB.getInt32(0)});
1230     } else {
1231       Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
1232                                     getCleanShadow(ConvertedShadow), "_mscmp");
1233       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1234           Cmp, OrigIns,
1235           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1236 
1237       IRB.SetInsertPoint(CheckTerm);
1238       insertWarningFn(IRB, Origin);
1239       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1240     }
1241   }
1242 
1243   void materializeChecks(bool InstrumentWithCalls) {
1244     for (const auto &ShadowData : InstrumentationList) {
1245       Instruction *OrigIns = ShadowData.OrigIns;
1246       Value *Shadow = ShadowData.Shadow;
1247       Value *Origin = ShadowData.Origin;
1248       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
1249     }
1250     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1251   }
1252 
1253   BasicBlock *insertKmsanPrologue(Function &F) {
1254     BasicBlock *ret =
1255         SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
1256     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
1257     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1258     Constant *Zero = IRB.getInt32(0);
1259     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1260                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1261     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1262                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1263     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1264                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1265     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1266                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1267     MS.VAArgOverflowSizeTLS =
1268         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1269                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1270     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1271                                       {Zero, IRB.getInt32(5)}, "param_origin");
1272     MS.RetvalOriginTLS =
1273         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1274                       {Zero, IRB.getInt32(6)}, "retval_origin");
1275     return ret;
1276   }
1277 
1278   /// Add MemorySanitizer instrumentation to a function.
1279   bool runOnFunction() {
1280     // In the presence of unreachable blocks, we may see Phi nodes with
1281     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1282     // blocks, such nodes will not have any shadow value associated with them.
1283     // It's easier to remove unreachable blocks than deal with missing shadow.
1284     removeUnreachableBlocks(F);
1285 
1286     // Iterate all BBs in depth-first order and create shadow instructions
1287     // for all instructions (where applicable).
1288     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1289     for (BasicBlock *BB : depth_first(ActualFnStart))
1290       visit(*BB);
1291 
1292     // Finalize PHI nodes.
1293     for (PHINode *PN : ShadowPHINodes) {
1294       PHINode *PNS = cast<PHINode>(getShadow(PN));
1295       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1296       size_t NumValues = PN->getNumIncomingValues();
1297       for (size_t v = 0; v < NumValues; v++) {
1298         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1299         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1300       }
1301     }
1302 
1303     VAHelper->finalizeInstrumentation();
1304 
1305     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1306     // instrumenting only allocas.
1307     if (InstrumentLifetimeStart) {
1308       for (auto Item : LifetimeStartList) {
1309         instrumentAlloca(*Item.second, Item.first);
1310         AllocaSet.erase(Item.second);
1311       }
1312     }
1313     // Poison the allocas for which we didn't instrument the corresponding
1314     // lifetime intrinsics.
1315     for (AllocaInst *AI : AllocaSet)
1316       instrumentAlloca(*AI);
1317 
1318     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
1319                                InstrumentationList.size() + StoreList.size() >
1320                                    (unsigned)ClInstrumentationWithCallThreshold;
1321 
1322     // Insert shadow value checks.
1323     materializeChecks(InstrumentWithCalls);
1324 
1325     // Delayed instrumentation of StoreInst.
1326     // This may not add new address checks.
1327     materializeStores(InstrumentWithCalls);
1328 
1329     return true;
1330   }
1331 
1332   /// Compute the shadow type that corresponds to a given Value.
1333   Type *getShadowTy(Value *V) {
1334     return getShadowTy(V->getType());
1335   }
1336 
1337   /// Compute the shadow type that corresponds to a given Type.
1338   Type *getShadowTy(Type *OrigTy) {
1339     if (!OrigTy->isSized()) {
1340       return nullptr;
1341     }
1342     // For integer type, shadow is the same as the original type.
1343     // This may return weird-sized types like i1.
1344     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1345       return IT;
1346     const DataLayout &DL = F.getParent()->getDataLayout();
1347     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1348       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1349       return VectorType::get(IntegerType::get(*MS.C, EltSize),
1350                              VT->getNumElements());
1351     }
1352     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1353       return ArrayType::get(getShadowTy(AT->getElementType()),
1354                             AT->getNumElements());
1355     }
1356     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1357       SmallVector<Type*, 4> Elements;
1358       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1359         Elements.push_back(getShadowTy(ST->getElementType(i)));
1360       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1361       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1362       return Res;
1363     }
1364     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1365     return IntegerType::get(*MS.C, TypeSize);
1366   }
1367 
1368   /// Flatten a vector type.
1369   Type *getShadowTyNoVec(Type *ty) {
1370     if (VectorType *vt = dyn_cast<VectorType>(ty))
1371       return IntegerType::get(*MS.C, vt->getBitWidth());
1372     return ty;
1373   }
1374 
1375   /// Convert a shadow value to it's flattened variant.
1376   Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
1377     Type *Ty = V->getType();
1378     Type *NoVecTy = getShadowTyNoVec(Ty);
1379     if (Ty == NoVecTy) return V;
1380     return IRB.CreateBitCast(V, NoVecTy);
1381   }
1382 
1383   /// Compute the integer shadow offset that corresponds to a given
1384   /// application address.
1385   ///
1386   /// Offset = (Addr & ~AndMask) ^ XorMask
1387   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1388     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1389 
1390     uint64_t AndMask = MS.MapParams->AndMask;
1391     if (AndMask)
1392       OffsetLong =
1393           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1394 
1395     uint64_t XorMask = MS.MapParams->XorMask;
1396     if (XorMask)
1397       OffsetLong =
1398           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1399     return OffsetLong;
1400   }
1401 
1402   /// Compute the shadow and origin addresses corresponding to a given
1403   /// application address.
1404   ///
1405   /// Shadow = ShadowBase + Offset
1406   /// Origin = (OriginBase + Offset) & ~3ULL
1407   std::pair<Value *, Value *> getShadowOriginPtrUserspace(Value *Addr,
1408                                                           IRBuilder<> &IRB,
1409                                                           Type *ShadowTy,
1410                                                           unsigned Alignment) {
1411     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1412     Value *ShadowLong = ShadowOffset;
1413     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1414     if (ShadowBase != 0) {
1415       ShadowLong =
1416         IRB.CreateAdd(ShadowLong,
1417                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1418     }
1419     Value *ShadowPtr =
1420         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1421     Value *OriginPtr = nullptr;
1422     if (MS.TrackOrigins) {
1423       Value *OriginLong = ShadowOffset;
1424       uint64_t OriginBase = MS.MapParams->OriginBase;
1425       if (OriginBase != 0)
1426         OriginLong = IRB.CreateAdd(OriginLong,
1427                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1428       if (Alignment < kMinOriginAlignment) {
1429         uint64_t Mask = kMinOriginAlignment - 1;
1430         OriginLong =
1431             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1432       }
1433       OriginPtr =
1434           IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
1435     }
1436     return std::make_pair(ShadowPtr, OriginPtr);
1437   }
1438 
1439   std::pair<Value *, Value *>
1440   getShadowOriginPtrKernel(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1441                            unsigned Alignment, bool isStore) {
1442     Value *ShadowOriginPtrs;
1443     const DataLayout &DL = F.getParent()->getDataLayout();
1444     int Size = DL.getTypeStoreSize(ShadowTy);
1445 
1446     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1447     Value *AddrCast =
1448         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1449     if (Getter) {
1450       ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
1451     } else {
1452       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1453       ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
1454                                                 : MS.MsanMetadataPtrForLoadN,
1455                                         {AddrCast, SizeVal});
1456     }
1457     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1458     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1459     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1460 
1461     return std::make_pair(ShadowPtr, OriginPtr);
1462   }
1463 
1464   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1465                                                  Type *ShadowTy,
1466                                                  unsigned Alignment,
1467                                                  bool isStore) {
1468     std::pair<Value *, Value *> ret;
1469     if (MS.CompileKernel)
1470       ret = getShadowOriginPtrKernel(Addr, IRB, ShadowTy, Alignment, isStore);
1471     else
1472       ret = getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1473     return ret;
1474   }
1475 
1476   /// Compute the shadow address for a given function argument.
1477   ///
1478   /// Shadow = ParamTLS+ArgOffset.
1479   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1480                                  int ArgOffset) {
1481     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1482     if (ArgOffset)
1483       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1484     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1485                               "_msarg");
1486   }
1487 
1488   /// Compute the origin address for a given function argument.
1489   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1490                                  int ArgOffset) {
1491     if (!MS.TrackOrigins)
1492       return nullptr;
1493     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1494     if (ArgOffset)
1495       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1496     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1497                               "_msarg_o");
1498   }
1499 
1500   /// Compute the shadow address for a retval.
1501   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1502     return IRB.CreatePointerCast(MS.RetvalTLS,
1503                                  PointerType::get(getShadowTy(A), 0),
1504                                  "_msret");
1505   }
1506 
1507   /// Compute the origin address for a retval.
1508   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1509     // We keep a single origin for the entire retval. Might be too optimistic.
1510     return MS.RetvalOriginTLS;
1511   }
1512 
1513   /// Set SV to be the shadow value for V.
1514   void setShadow(Value *V, Value *SV) {
1515     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1516     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1517   }
1518 
1519   /// Set Origin to be the origin value for V.
1520   void setOrigin(Value *V, Value *Origin) {
1521     if (!MS.TrackOrigins) return;
1522     assert(!OriginMap.count(V) && "Values may only have one origin");
1523     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1524     OriginMap[V] = Origin;
1525   }
1526 
1527   Constant *getCleanShadow(Type *OrigTy) {
1528     Type *ShadowTy = getShadowTy(OrigTy);
1529     if (!ShadowTy)
1530       return nullptr;
1531     return Constant::getNullValue(ShadowTy);
1532   }
1533 
1534   /// Create a clean shadow value for a given value.
1535   ///
1536   /// Clean shadow (all zeroes) means all bits of the value are defined
1537   /// (initialized).
1538   Constant *getCleanShadow(Value *V) {
1539     return getCleanShadow(V->getType());
1540   }
1541 
1542   /// Create a dirty shadow of a given shadow type.
1543   Constant *getPoisonedShadow(Type *ShadowTy) {
1544     assert(ShadowTy);
1545     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1546       return Constant::getAllOnesValue(ShadowTy);
1547     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1548       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1549                                       getPoisonedShadow(AT->getElementType()));
1550       return ConstantArray::get(AT, Vals);
1551     }
1552     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1553       SmallVector<Constant *, 4> Vals;
1554       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1555         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1556       return ConstantStruct::get(ST, Vals);
1557     }
1558     llvm_unreachable("Unexpected shadow type");
1559   }
1560 
1561   /// Create a dirty shadow for a given value.
1562   Constant *getPoisonedShadow(Value *V) {
1563     Type *ShadowTy = getShadowTy(V);
1564     if (!ShadowTy)
1565       return nullptr;
1566     return getPoisonedShadow(ShadowTy);
1567   }
1568 
1569   /// Create a clean (zero) origin.
1570   Value *getCleanOrigin() {
1571     return Constant::getNullValue(MS.OriginTy);
1572   }
1573 
1574   /// Get the shadow value for a given Value.
1575   ///
1576   /// This function either returns the value set earlier with setShadow,
1577   /// or extracts if from ParamTLS (for function arguments).
1578   Value *getShadow(Value *V) {
1579     if (!PropagateShadow) return getCleanShadow(V);
1580     if (Instruction *I = dyn_cast<Instruction>(V)) {
1581       if (I->getMetadata("nosanitize"))
1582         return getCleanShadow(V);
1583       // For instructions the shadow is already stored in the map.
1584       Value *Shadow = ShadowMap[V];
1585       if (!Shadow) {
1586         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1587         (void)I;
1588         assert(Shadow && "No shadow for a value");
1589       }
1590       return Shadow;
1591     }
1592     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1593       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1594       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1595       (void)U;
1596       return AllOnes;
1597     }
1598     if (Argument *A = dyn_cast<Argument>(V)) {
1599       // For arguments we compute the shadow on demand and store it in the map.
1600       Value **ShadowPtr = &ShadowMap[V];
1601       if (*ShadowPtr)
1602         return *ShadowPtr;
1603       Function *F = A->getParent();
1604       IRBuilder<> EntryIRB(ActualFnStart->getFirstNonPHI());
1605       unsigned ArgOffset = 0;
1606       const DataLayout &DL = F->getParent()->getDataLayout();
1607       for (auto &FArg : F->args()) {
1608         if (!FArg.getType()->isSized()) {
1609           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1610           continue;
1611         }
1612         unsigned Size =
1613             FArg.hasByValAttr()
1614                 ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType())
1615                 : DL.getTypeAllocSize(FArg.getType());
1616         if (A == &FArg) {
1617           bool Overflow = ArgOffset + Size > kParamTLSSize;
1618           Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1619           if (FArg.hasByValAttr()) {
1620             // ByVal pointer itself has clean shadow. We copy the actual
1621             // argument shadow to the underlying memory.
1622             // Figure out maximal valid memcpy alignment.
1623             unsigned ArgAlign = FArg.getParamAlignment();
1624             if (ArgAlign == 0) {
1625               Type *EltType = A->getType()->getPointerElementType();
1626               ArgAlign = DL.getABITypeAlignment(EltType);
1627             }
1628             Value *CpShadowPtr =
1629                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1630                                    /*isStore*/ true)
1631                     .first;
1632             // TODO(glider): need to copy origins.
1633             if (Overflow) {
1634               // ParamTLS overflow.
1635               EntryIRB.CreateMemSet(
1636                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1637                   Size, ArgAlign);
1638             } else {
1639               unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1640               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1641                                                  CopyAlign, Size);
1642               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1643               (void)Cpy;
1644             }
1645             *ShadowPtr = getCleanShadow(V);
1646           } else {
1647             if (Overflow) {
1648               // ParamTLS overflow.
1649               *ShadowPtr = getCleanShadow(V);
1650             } else {
1651               *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
1652                                                       kShadowTLSAlignment);
1653             }
1654           }
1655           LLVM_DEBUG(dbgs()
1656                      << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n");
1657           if (MS.TrackOrigins && !Overflow) {
1658             Value *OriginPtr =
1659                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1660             setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
1661           } else {
1662             setOrigin(A, getCleanOrigin());
1663           }
1664         }
1665         ArgOffset += alignTo(Size, kShadowTLSAlignment);
1666       }
1667       assert(*ShadowPtr && "Could not find shadow for an argument");
1668       return *ShadowPtr;
1669     }
1670     // For everything else the shadow is zero.
1671     return getCleanShadow(V);
1672   }
1673 
1674   /// Get the shadow for i-th argument of the instruction I.
1675   Value *getShadow(Instruction *I, int i) {
1676     return getShadow(I->getOperand(i));
1677   }
1678 
1679   /// Get the origin for a value.
1680   Value *getOrigin(Value *V) {
1681     if (!MS.TrackOrigins) return nullptr;
1682     if (!PropagateShadow) return getCleanOrigin();
1683     if (isa<Constant>(V)) return getCleanOrigin();
1684     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1685            "Unexpected value type in getOrigin()");
1686     if (Instruction *I = dyn_cast<Instruction>(V)) {
1687       if (I->getMetadata("nosanitize"))
1688         return getCleanOrigin();
1689     }
1690     Value *Origin = OriginMap[V];
1691     assert(Origin && "Missing origin");
1692     return Origin;
1693   }
1694 
1695   /// Get the origin for i-th argument of the instruction I.
1696   Value *getOrigin(Instruction *I, int i) {
1697     return getOrigin(I->getOperand(i));
1698   }
1699 
1700   /// Remember the place where a shadow check should be inserted.
1701   ///
1702   /// This location will be later instrumented with a check that will print a
1703   /// UMR warning in runtime if the shadow value is not 0.
1704   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1705     assert(Shadow);
1706     if (!InsertChecks) return;
1707 #ifndef NDEBUG
1708     Type *ShadowTy = Shadow->getType();
1709     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
1710            "Can only insert checks for integer and vector shadow types");
1711 #endif
1712     InstrumentationList.push_back(
1713         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1714   }
1715 
1716   /// Remember the place where a shadow check should be inserted.
1717   ///
1718   /// This location will be later instrumented with a check that will print a
1719   /// UMR warning in runtime if the value is not fully defined.
1720   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1721     assert(Val);
1722     Value *Shadow, *Origin;
1723     if (ClCheckConstantShadow) {
1724       Shadow = getShadow(Val);
1725       if (!Shadow) return;
1726       Origin = getOrigin(Val);
1727     } else {
1728       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1729       if (!Shadow) return;
1730       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1731     }
1732     insertShadowCheck(Shadow, Origin, OrigIns);
1733   }
1734 
1735   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1736     switch (a) {
1737       case AtomicOrdering::NotAtomic:
1738         return AtomicOrdering::NotAtomic;
1739       case AtomicOrdering::Unordered:
1740       case AtomicOrdering::Monotonic:
1741       case AtomicOrdering::Release:
1742         return AtomicOrdering::Release;
1743       case AtomicOrdering::Acquire:
1744       case AtomicOrdering::AcquireRelease:
1745         return AtomicOrdering::AcquireRelease;
1746       case AtomicOrdering::SequentiallyConsistent:
1747         return AtomicOrdering::SequentiallyConsistent;
1748     }
1749     llvm_unreachable("Unknown ordering");
1750   }
1751 
1752   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1753     switch (a) {
1754       case AtomicOrdering::NotAtomic:
1755         return AtomicOrdering::NotAtomic;
1756       case AtomicOrdering::Unordered:
1757       case AtomicOrdering::Monotonic:
1758       case AtomicOrdering::Acquire:
1759         return AtomicOrdering::Acquire;
1760       case AtomicOrdering::Release:
1761       case AtomicOrdering::AcquireRelease:
1762         return AtomicOrdering::AcquireRelease;
1763       case AtomicOrdering::SequentiallyConsistent:
1764         return AtomicOrdering::SequentiallyConsistent;
1765     }
1766     llvm_unreachable("Unknown ordering");
1767   }
1768 
1769   // ------------------- Visitors.
1770   using InstVisitor<MemorySanitizerVisitor>::visit;
1771   void visit(Instruction &I) {
1772     if (!I.getMetadata("nosanitize"))
1773       InstVisitor<MemorySanitizerVisitor>::visit(I);
1774   }
1775 
1776   /// Instrument LoadInst
1777   ///
1778   /// Loads the corresponding shadow and (optionally) origin.
1779   /// Optionally, checks that the load address is fully defined.
1780   void visitLoadInst(LoadInst &I) {
1781     assert(I.getType()->isSized() && "Load type must have size");
1782     assert(!I.getMetadata("nosanitize"));
1783     IRBuilder<> IRB(I.getNextNode());
1784     Type *ShadowTy = getShadowTy(&I);
1785     Value *Addr = I.getPointerOperand();
1786     Value *ShadowPtr, *OriginPtr;
1787     unsigned Alignment = I.getAlignment();
1788     if (PropagateShadow) {
1789       std::tie(ShadowPtr, OriginPtr) =
1790           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
1791       setShadow(&I,
1792                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
1793     } else {
1794       setShadow(&I, getCleanShadow(&I));
1795     }
1796 
1797     if (ClCheckAccessAddress)
1798       insertShadowCheck(I.getPointerOperand(), &I);
1799 
1800     if (I.isAtomic())
1801       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1802 
1803     if (MS.TrackOrigins) {
1804       if (PropagateShadow) {
1805         unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1806         setOrigin(
1807             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
1808       } else {
1809         setOrigin(&I, getCleanOrigin());
1810       }
1811     }
1812   }
1813 
1814   /// Instrument StoreInst
1815   ///
1816   /// Stores the corresponding shadow and (optionally) origin.
1817   /// Optionally, checks that the store address is fully defined.
1818   void visitStoreInst(StoreInst &I) {
1819     StoreList.push_back(&I);
1820     if (ClCheckAccessAddress)
1821       insertShadowCheck(I.getPointerOperand(), &I);
1822   }
1823 
1824   void handleCASOrRMW(Instruction &I) {
1825     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1826 
1827     IRBuilder<> IRB(&I);
1828     Value *Addr = I.getOperand(0);
1829     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(),
1830                                           /*Alignment*/ 1, /*isStore*/ true)
1831                            .first;
1832 
1833     if (ClCheckAccessAddress)
1834       insertShadowCheck(Addr, &I);
1835 
1836     // Only test the conditional argument of cmpxchg instruction.
1837     // The other argument can potentially be uninitialized, but we can not
1838     // detect this situation reliably without possible false positives.
1839     if (isa<AtomicCmpXchgInst>(I))
1840       insertShadowCheck(I.getOperand(1), &I);
1841 
1842     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
1843 
1844     setShadow(&I, getCleanShadow(&I));
1845     setOrigin(&I, getCleanOrigin());
1846   }
1847 
1848   void visitAtomicRMWInst(AtomicRMWInst &I) {
1849     handleCASOrRMW(I);
1850     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1851   }
1852 
1853   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1854     handleCASOrRMW(I);
1855     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1856   }
1857 
1858   // Vector manipulation.
1859   void visitExtractElementInst(ExtractElementInst &I) {
1860     insertShadowCheck(I.getOperand(1), &I);
1861     IRBuilder<> IRB(&I);
1862     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1863               "_msprop"));
1864     setOrigin(&I, getOrigin(&I, 0));
1865   }
1866 
1867   void visitInsertElementInst(InsertElementInst &I) {
1868     insertShadowCheck(I.getOperand(2), &I);
1869     IRBuilder<> IRB(&I);
1870     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1871               I.getOperand(2), "_msprop"));
1872     setOriginForNaryOp(I);
1873   }
1874 
1875   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1876     insertShadowCheck(I.getOperand(2), &I);
1877     IRBuilder<> IRB(&I);
1878     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1879               I.getOperand(2), "_msprop"));
1880     setOriginForNaryOp(I);
1881   }
1882 
1883   // Casts.
1884   void visitSExtInst(SExtInst &I) {
1885     IRBuilder<> IRB(&I);
1886     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
1887     setOrigin(&I, getOrigin(&I, 0));
1888   }
1889 
1890   void visitZExtInst(ZExtInst &I) {
1891     IRBuilder<> IRB(&I);
1892     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
1893     setOrigin(&I, getOrigin(&I, 0));
1894   }
1895 
1896   void visitTruncInst(TruncInst &I) {
1897     IRBuilder<> IRB(&I);
1898     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
1899     setOrigin(&I, getOrigin(&I, 0));
1900   }
1901 
1902   void visitBitCastInst(BitCastInst &I) {
1903     // Special case: if this is the bitcast (there is exactly 1 allowed) between
1904     // a musttail call and a ret, don't instrument. New instructions are not
1905     // allowed after a musttail call.
1906     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
1907       if (CI->isMustTailCall())
1908         return;
1909     IRBuilder<> IRB(&I);
1910     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
1911     setOrigin(&I, getOrigin(&I, 0));
1912   }
1913 
1914   void visitPtrToIntInst(PtrToIntInst &I) {
1915     IRBuilder<> IRB(&I);
1916     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1917              "_msprop_ptrtoint"));
1918     setOrigin(&I, getOrigin(&I, 0));
1919   }
1920 
1921   void visitIntToPtrInst(IntToPtrInst &I) {
1922     IRBuilder<> IRB(&I);
1923     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1924              "_msprop_inttoptr"));
1925     setOrigin(&I, getOrigin(&I, 0));
1926   }
1927 
1928   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
1929   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
1930   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
1931   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
1932   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
1933   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
1934 
1935   /// Propagate shadow for bitwise AND.
1936   ///
1937   /// This code is exact, i.e. if, for example, a bit in the left argument
1938   /// is defined and 0, then neither the value not definedness of the
1939   /// corresponding bit in B don't affect the resulting shadow.
1940   void visitAnd(BinaryOperator &I) {
1941     IRBuilder<> IRB(&I);
1942     //  "And" of 0 and a poisoned value results in unpoisoned value.
1943     //  1&1 => 1;     0&1 => 0;     p&1 => p;
1944     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
1945     //  1&p => p;     0&p => 0;     p&p => p;
1946     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
1947     Value *S1 = getShadow(&I, 0);
1948     Value *S2 = getShadow(&I, 1);
1949     Value *V1 = I.getOperand(0);
1950     Value *V2 = I.getOperand(1);
1951     if (V1->getType() != S1->getType()) {
1952       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1953       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1954     }
1955     Value *S1S2 = IRB.CreateAnd(S1, S2);
1956     Value *V1S2 = IRB.CreateAnd(V1, S2);
1957     Value *S1V2 = IRB.CreateAnd(S1, V2);
1958     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
1959     setOriginForNaryOp(I);
1960   }
1961 
1962   void visitOr(BinaryOperator &I) {
1963     IRBuilder<> IRB(&I);
1964     //  "Or" of 1 and a poisoned value results in unpoisoned value.
1965     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
1966     //  1|0 => 1;     0|0 => 0;     p|0 => p;
1967     //  1|p => 1;     0|p => p;     p|p => p;
1968     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
1969     Value *S1 = getShadow(&I, 0);
1970     Value *S2 = getShadow(&I, 1);
1971     Value *V1 = IRB.CreateNot(I.getOperand(0));
1972     Value *V2 = IRB.CreateNot(I.getOperand(1));
1973     if (V1->getType() != S1->getType()) {
1974       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1975       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1976     }
1977     Value *S1S2 = IRB.CreateAnd(S1, S2);
1978     Value *V1S2 = IRB.CreateAnd(V1, S2);
1979     Value *S1V2 = IRB.CreateAnd(S1, V2);
1980     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
1981     setOriginForNaryOp(I);
1982   }
1983 
1984   /// Default propagation of shadow and/or origin.
1985   ///
1986   /// This class implements the general case of shadow propagation, used in all
1987   /// cases where we don't know and/or don't care about what the operation
1988   /// actually does. It converts all input shadow values to a common type
1989   /// (extending or truncating as necessary), and bitwise OR's them.
1990   ///
1991   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
1992   /// fully initialized), and less prone to false positives.
1993   ///
1994   /// This class also implements the general case of origin propagation. For a
1995   /// Nary operation, result origin is set to the origin of an argument that is
1996   /// not entirely initialized. If there is more than one such arguments, the
1997   /// rightmost of them is picked. It does not matter which one is picked if all
1998   /// arguments are initialized.
1999   template <bool CombineShadow>
2000   class Combiner {
2001     Value *Shadow = nullptr;
2002     Value *Origin = nullptr;
2003     IRBuilder<> &IRB;
2004     MemorySanitizerVisitor *MSV;
2005 
2006   public:
2007     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2008         : IRB(IRB), MSV(MSV) {}
2009 
2010     /// Add a pair of shadow and origin values to the mix.
2011     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2012       if (CombineShadow) {
2013         assert(OpShadow);
2014         if (!Shadow)
2015           Shadow = OpShadow;
2016         else {
2017           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2018           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2019         }
2020       }
2021 
2022       if (MSV->MS.TrackOrigins) {
2023         assert(OpOrigin);
2024         if (!Origin) {
2025           Origin = OpOrigin;
2026         } else {
2027           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2028           // No point in adding something that might result in 0 origin value.
2029           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2030             Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
2031             Value *Cond =
2032                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
2033             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2034           }
2035         }
2036       }
2037       return *this;
2038     }
2039 
2040     /// Add an application value to the mix.
2041     Combiner &Add(Value *V) {
2042       Value *OpShadow = MSV->getShadow(V);
2043       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2044       return Add(OpShadow, OpOrigin);
2045     }
2046 
2047     /// Set the current combined values as the given instruction's shadow
2048     /// and origin.
2049     void Done(Instruction *I) {
2050       if (CombineShadow) {
2051         assert(Shadow);
2052         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2053         MSV->setShadow(I, Shadow);
2054       }
2055       if (MSV->MS.TrackOrigins) {
2056         assert(Origin);
2057         MSV->setOrigin(I, Origin);
2058       }
2059     }
2060   };
2061 
2062   using ShadowAndOriginCombiner = Combiner<true>;
2063   using OriginCombiner = Combiner<false>;
2064 
2065   /// Propagate origin for arbitrary operation.
2066   void setOriginForNaryOp(Instruction &I) {
2067     if (!MS.TrackOrigins) return;
2068     IRBuilder<> IRB(&I);
2069     OriginCombiner OC(this, IRB);
2070     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2071       OC.Add(OI->get());
2072     OC.Done(&I);
2073   }
2074 
2075   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2076     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2077            "Vector of pointers is not a valid shadow type");
2078     return Ty->isVectorTy() ?
2079       Ty->getVectorNumElements() * Ty->getScalarSizeInBits() :
2080       Ty->getPrimitiveSizeInBits();
2081   }
2082 
2083   /// Cast between two shadow types, extending or truncating as
2084   /// necessary.
2085   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2086                           bool Signed = false) {
2087     Type *srcTy = V->getType();
2088     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2089     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2090     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2091       return IRB.CreateICmpNE(V, getCleanShadow(V));
2092 
2093     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2094       return IRB.CreateIntCast(V, dstTy, Signed);
2095     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2096         dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
2097       return IRB.CreateIntCast(V, dstTy, Signed);
2098     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2099     Value *V2 =
2100       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2101     return IRB.CreateBitCast(V2, dstTy);
2102     // TODO: handle struct types.
2103   }
2104 
2105   /// Cast an application value to the type of its own shadow.
2106   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2107     Type *ShadowTy = getShadowTy(V);
2108     if (V->getType() == ShadowTy)
2109       return V;
2110     if (V->getType()->isPtrOrPtrVectorTy())
2111       return IRB.CreatePtrToInt(V, ShadowTy);
2112     else
2113       return IRB.CreateBitCast(V, ShadowTy);
2114   }
2115 
2116   /// Propagate shadow for arbitrary operation.
2117   void handleShadowOr(Instruction &I) {
2118     IRBuilder<> IRB(&I);
2119     ShadowAndOriginCombiner SC(this, IRB);
2120     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2121       SC.Add(OI->get());
2122     SC.Done(&I);
2123   }
2124 
2125   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2126 
2127   // Handle multiplication by constant.
2128   //
2129   // Handle a special case of multiplication by constant that may have one or
2130   // more zeros in the lower bits. This makes corresponding number of lower bits
2131   // of the result zero as well. We model it by shifting the other operand
2132   // shadow left by the required number of bits. Effectively, we transform
2133   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2134   // We use multiplication by 2**N instead of shift to cover the case of
2135   // multiplication by 0, which may occur in some elements of a vector operand.
2136   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2137                            Value *OtherArg) {
2138     Constant *ShadowMul;
2139     Type *Ty = ConstArg->getType();
2140     if (Ty->isVectorTy()) {
2141       unsigned NumElements = Ty->getVectorNumElements();
2142       Type *EltTy = Ty->getSequentialElementType();
2143       SmallVector<Constant *, 16> Elements;
2144       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2145         if (ConstantInt *Elt =
2146                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2147           const APInt &V = Elt->getValue();
2148           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2149           Elements.push_back(ConstantInt::get(EltTy, V2));
2150         } else {
2151           Elements.push_back(ConstantInt::get(EltTy, 1));
2152         }
2153       }
2154       ShadowMul = ConstantVector::get(Elements);
2155     } else {
2156       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2157         const APInt &V = Elt->getValue();
2158         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2159         ShadowMul = ConstantInt::get(Ty, V2);
2160       } else {
2161         ShadowMul = ConstantInt::get(Ty, 1);
2162       }
2163     }
2164 
2165     IRBuilder<> IRB(&I);
2166     setShadow(&I,
2167               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2168     setOrigin(&I, getOrigin(OtherArg));
2169   }
2170 
2171   void visitMul(BinaryOperator &I) {
2172     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2173     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2174     if (constOp0 && !constOp1)
2175       handleMulByConstant(I, constOp0, I.getOperand(1));
2176     else if (constOp1 && !constOp0)
2177       handleMulByConstant(I, constOp1, I.getOperand(0));
2178     else
2179       handleShadowOr(I);
2180   }
2181 
2182   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2183   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2184   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2185   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2186   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2187   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2188 
2189   void handleIntegerDiv(Instruction &I) {
2190     IRBuilder<> IRB(&I);
2191     // Strict on the second argument.
2192     insertShadowCheck(I.getOperand(1), &I);
2193     setShadow(&I, getShadow(&I, 0));
2194     setOrigin(&I, getOrigin(&I, 0));
2195   }
2196 
2197   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2198   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2199   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2200   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2201 
2202   // Floating point division is side-effect free. We can not require that the
2203   // divisor is fully initialized and must propagate shadow. See PR37523.
2204   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2205   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2206 
2207   /// Instrument == and != comparisons.
2208   ///
2209   /// Sometimes the comparison result is known even if some of the bits of the
2210   /// arguments are not.
2211   void handleEqualityComparison(ICmpInst &I) {
2212     IRBuilder<> IRB(&I);
2213     Value *A = I.getOperand(0);
2214     Value *B = I.getOperand(1);
2215     Value *Sa = getShadow(A);
2216     Value *Sb = getShadow(B);
2217 
2218     // Get rid of pointers and vectors of pointers.
2219     // For ints (and vectors of ints), types of A and Sa match,
2220     // and this is a no-op.
2221     A = IRB.CreatePointerCast(A, Sa->getType());
2222     B = IRB.CreatePointerCast(B, Sb->getType());
2223 
2224     // A == B  <==>  (C = A^B) == 0
2225     // A != B  <==>  (C = A^B) != 0
2226     // Sc = Sa | Sb
2227     Value *C = IRB.CreateXor(A, B);
2228     Value *Sc = IRB.CreateOr(Sa, Sb);
2229     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2230     // Result is defined if one of the following is true
2231     // * there is a defined 1 bit in C
2232     // * C is fully defined
2233     // Si = !(C & ~Sc) && Sc
2234     Value *Zero = Constant::getNullValue(Sc->getType());
2235     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2236     Value *Si =
2237       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
2238                     IRB.CreateICmpEQ(
2239                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
2240     Si->setName("_msprop_icmp");
2241     setShadow(&I, Si);
2242     setOriginForNaryOp(I);
2243   }
2244 
2245   /// Build the lowest possible value of V, taking into account V's
2246   ///        uninitialized bits.
2247   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2248                                 bool isSigned) {
2249     if (isSigned) {
2250       // Split shadow into sign bit and other bits.
2251       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2252       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2253       // Maximise the undefined shadow bit, minimize other undefined bits.
2254       return
2255         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
2256     } else {
2257       // Minimize undefined bits.
2258       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2259     }
2260   }
2261 
2262   /// Build the highest possible value of V, taking into account V's
2263   ///        uninitialized bits.
2264   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2265                                 bool isSigned) {
2266     if (isSigned) {
2267       // Split shadow into sign bit and other bits.
2268       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2269       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2270       // Minimise the undefined shadow bit, maximise other undefined bits.
2271       return
2272         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
2273     } else {
2274       // Maximize undefined bits.
2275       return IRB.CreateOr(A, Sa);
2276     }
2277   }
2278 
2279   /// Instrument relational comparisons.
2280   ///
2281   /// This function does exact shadow propagation for all relational
2282   /// comparisons of integers, pointers and vectors of those.
2283   /// FIXME: output seems suboptimal when one of the operands is a constant
2284   void handleRelationalComparisonExact(ICmpInst &I) {
2285     IRBuilder<> IRB(&I);
2286     Value *A = I.getOperand(0);
2287     Value *B = I.getOperand(1);
2288     Value *Sa = getShadow(A);
2289     Value *Sb = getShadow(B);
2290 
2291     // Get rid of pointers and vectors of pointers.
2292     // For ints (and vectors of ints), types of A and Sa match,
2293     // and this is a no-op.
2294     A = IRB.CreatePointerCast(A, Sa->getType());
2295     B = IRB.CreatePointerCast(B, Sb->getType());
2296 
2297     // Let [a0, a1] be the interval of possible values of A, taking into account
2298     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2299     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2300     bool IsSigned = I.isSigned();
2301     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2302                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2303                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2304     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2305                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2306                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2307     Value *Si = IRB.CreateXor(S1, S2);
2308     setShadow(&I, Si);
2309     setOriginForNaryOp(I);
2310   }
2311 
2312   /// Instrument signed relational comparisons.
2313   ///
2314   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2315   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2316   void handleSignedRelationalComparison(ICmpInst &I) {
2317     Constant *constOp;
2318     Value *op = nullptr;
2319     CmpInst::Predicate pre;
2320     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2321       op = I.getOperand(0);
2322       pre = I.getPredicate();
2323     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2324       op = I.getOperand(1);
2325       pre = I.getSwappedPredicate();
2326     } else {
2327       handleShadowOr(I);
2328       return;
2329     }
2330 
2331     if ((constOp->isNullValue() &&
2332          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2333         (constOp->isAllOnesValue() &&
2334          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2335       IRBuilder<> IRB(&I);
2336       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2337                                         "_msprop_icmp_s");
2338       setShadow(&I, Shadow);
2339       setOrigin(&I, getOrigin(op));
2340     } else {
2341       handleShadowOr(I);
2342     }
2343   }
2344 
2345   void visitICmpInst(ICmpInst &I) {
2346     if (!ClHandleICmp) {
2347       handleShadowOr(I);
2348       return;
2349     }
2350     if (I.isEquality()) {
2351       handleEqualityComparison(I);
2352       return;
2353     }
2354 
2355     assert(I.isRelational());
2356     if (ClHandleICmpExact) {
2357       handleRelationalComparisonExact(I);
2358       return;
2359     }
2360     if (I.isSigned()) {
2361       handleSignedRelationalComparison(I);
2362       return;
2363     }
2364 
2365     assert(I.isUnsigned());
2366     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2367       handleRelationalComparisonExact(I);
2368       return;
2369     }
2370 
2371     handleShadowOr(I);
2372   }
2373 
2374   void visitFCmpInst(FCmpInst &I) {
2375     handleShadowOr(I);
2376   }
2377 
2378   void handleShift(BinaryOperator &I) {
2379     IRBuilder<> IRB(&I);
2380     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2381     // Otherwise perform the same shift on S1.
2382     Value *S1 = getShadow(&I, 0);
2383     Value *S2 = getShadow(&I, 1);
2384     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
2385                                    S2->getType());
2386     Value *V2 = I.getOperand(1);
2387     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2388     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2389     setOriginForNaryOp(I);
2390   }
2391 
2392   void visitShl(BinaryOperator &I) { handleShift(I); }
2393   void visitAShr(BinaryOperator &I) { handleShift(I); }
2394   void visitLShr(BinaryOperator &I) { handleShift(I); }
2395 
2396   /// Instrument llvm.memmove
2397   ///
2398   /// At this point we don't know if llvm.memmove will be inlined or not.
2399   /// If we don't instrument it and it gets inlined,
2400   /// our interceptor will not kick in and we will lose the memmove.
2401   /// If we instrument the call here, but it does not get inlined,
2402   /// we will memove the shadow twice: which is bad in case
2403   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2404   ///
2405   /// Similar situation exists for memcpy and memset.
2406   void visitMemMoveInst(MemMoveInst &I) {
2407     IRBuilder<> IRB(&I);
2408     IRB.CreateCall(
2409         MS.MemmoveFn,
2410         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2411          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2412          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2413     I.eraseFromParent();
2414   }
2415 
2416   // Similar to memmove: avoid copying shadow twice.
2417   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2418   // FIXME: consider doing manual inline for small constant sizes and proper
2419   // alignment.
2420   void visitMemCpyInst(MemCpyInst &I) {
2421     IRBuilder<> IRB(&I);
2422     IRB.CreateCall(
2423         MS.MemcpyFn,
2424         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2425          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2426          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2427     I.eraseFromParent();
2428   }
2429 
2430   // Same as memcpy.
2431   void visitMemSetInst(MemSetInst &I) {
2432     IRBuilder<> IRB(&I);
2433     IRB.CreateCall(
2434         MS.MemsetFn,
2435         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2436          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2437          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2438     I.eraseFromParent();
2439   }
2440 
2441   void visitVAStartInst(VAStartInst &I) {
2442     VAHelper->visitVAStartInst(I);
2443   }
2444 
2445   void visitVACopyInst(VACopyInst &I) {
2446     VAHelper->visitVACopyInst(I);
2447   }
2448 
2449   /// Handle vector store-like intrinsics.
2450   ///
2451   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2452   /// has 1 pointer argument and 1 vector argument, returns void.
2453   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2454     IRBuilder<> IRB(&I);
2455     Value* Addr = I.getArgOperand(0);
2456     Value *Shadow = getShadow(&I, 1);
2457     Value *ShadowPtr, *OriginPtr;
2458 
2459     // We don't know the pointer alignment (could be unaligned SSE store!).
2460     // Have to assume to worst case.
2461     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2462         Addr, IRB, Shadow->getType(), /*Alignment*/ 1, /*isStore*/ true);
2463     IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
2464 
2465     if (ClCheckAccessAddress)
2466       insertShadowCheck(Addr, &I);
2467 
2468     // FIXME: factor out common code from materializeStores
2469     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2470     return true;
2471   }
2472 
2473   /// Handle vector load-like intrinsics.
2474   ///
2475   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2476   /// has 1 pointer argument, returns a vector.
2477   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2478     IRBuilder<> IRB(&I);
2479     Value *Addr = I.getArgOperand(0);
2480 
2481     Type *ShadowTy = getShadowTy(&I);
2482     Value *ShadowPtr, *OriginPtr;
2483     if (PropagateShadow) {
2484       // We don't know the pointer alignment (could be unaligned SSE load!).
2485       // Have to assume to worst case.
2486       unsigned Alignment = 1;
2487       std::tie(ShadowPtr, OriginPtr) =
2488           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2489       setShadow(&I,
2490                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2491     } else {
2492       setShadow(&I, getCleanShadow(&I));
2493     }
2494 
2495     if (ClCheckAccessAddress)
2496       insertShadowCheck(Addr, &I);
2497 
2498     if (MS.TrackOrigins) {
2499       if (PropagateShadow)
2500         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2501       else
2502         setOrigin(&I, getCleanOrigin());
2503     }
2504     return true;
2505   }
2506 
2507   /// Handle (SIMD arithmetic)-like intrinsics.
2508   ///
2509   /// Instrument intrinsics with any number of arguments of the same type,
2510   /// equal to the return type. The type should be simple (no aggregates or
2511   /// pointers; vectors are fine).
2512   /// Caller guarantees that this intrinsic does not access memory.
2513   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2514     Type *RetTy = I.getType();
2515     if (!(RetTy->isIntOrIntVectorTy() ||
2516           RetTy->isFPOrFPVectorTy() ||
2517           RetTy->isX86_MMXTy()))
2518       return false;
2519 
2520     unsigned NumArgOperands = I.getNumArgOperands();
2521 
2522     for (unsigned i = 0; i < NumArgOperands; ++i) {
2523       Type *Ty = I.getArgOperand(i)->getType();
2524       if (Ty != RetTy)
2525         return false;
2526     }
2527 
2528     IRBuilder<> IRB(&I);
2529     ShadowAndOriginCombiner SC(this, IRB);
2530     for (unsigned i = 0; i < NumArgOperands; ++i)
2531       SC.Add(I.getArgOperand(i));
2532     SC.Done(&I);
2533 
2534     return true;
2535   }
2536 
2537   /// Heuristically instrument unknown intrinsics.
2538   ///
2539   /// The main purpose of this code is to do something reasonable with all
2540   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2541   /// We recognize several classes of intrinsics by their argument types and
2542   /// ModRefBehaviour and apply special intrumentation when we are reasonably
2543   /// sure that we know what the intrinsic does.
2544   ///
2545   /// We special-case intrinsics where this approach fails. See llvm.bswap
2546   /// handling as an example of that.
2547   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2548     unsigned NumArgOperands = I.getNumArgOperands();
2549     if (NumArgOperands == 0)
2550       return false;
2551 
2552     if (NumArgOperands == 2 &&
2553         I.getArgOperand(0)->getType()->isPointerTy() &&
2554         I.getArgOperand(1)->getType()->isVectorTy() &&
2555         I.getType()->isVoidTy() &&
2556         !I.onlyReadsMemory()) {
2557       // This looks like a vector store.
2558       return handleVectorStoreIntrinsic(I);
2559     }
2560 
2561     if (NumArgOperands == 1 &&
2562         I.getArgOperand(0)->getType()->isPointerTy() &&
2563         I.getType()->isVectorTy() &&
2564         I.onlyReadsMemory()) {
2565       // This looks like a vector load.
2566       return handleVectorLoadIntrinsic(I);
2567     }
2568 
2569     if (I.doesNotAccessMemory())
2570       if (maybeHandleSimpleNomemIntrinsic(I))
2571         return true;
2572 
2573     // FIXME: detect and handle SSE maskstore/maskload
2574     return false;
2575   }
2576 
2577   void handleInvariantGroup(IntrinsicInst &I) {
2578     setShadow(&I, getShadow(&I, 0));
2579     setOrigin(&I, getOrigin(&I, 0));
2580   }
2581 
2582   void handleLifetimeStart(IntrinsicInst &I) {
2583     if (!PoisonStack)
2584       return;
2585     DenseMap<Value *, AllocaInst *> AllocaForValue;
2586     AllocaInst *AI =
2587         llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
2588     if (!AI)
2589       InstrumentLifetimeStart = false;
2590     LifetimeStartList.push_back(std::make_pair(&I, AI));
2591   }
2592 
2593   void handleBswap(IntrinsicInst &I) {
2594     IRBuilder<> IRB(&I);
2595     Value *Op = I.getArgOperand(0);
2596     Type *OpType = Op->getType();
2597     Function *BswapFunc = Intrinsic::getDeclaration(
2598       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2599     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2600     setOrigin(&I, getOrigin(Op));
2601   }
2602 
2603   // Instrument vector convert instrinsic.
2604   //
2605   // This function instruments intrinsics like cvtsi2ss:
2606   // %Out = int_xxx_cvtyyy(%ConvertOp)
2607   // or
2608   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2609   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2610   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2611   // elements from \p CopyOp.
2612   // In most cases conversion involves floating-point value which may trigger a
2613   // hardware exception when not fully initialized. For this reason we require
2614   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2615   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2616   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2617   // return a fully initialized value.
2618   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
2619     IRBuilder<> IRB(&I);
2620     Value *CopyOp, *ConvertOp;
2621 
2622     switch (I.getNumArgOperands()) {
2623     case 3:
2624       assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
2625       LLVM_FALLTHROUGH;
2626     case 2:
2627       CopyOp = I.getArgOperand(0);
2628       ConvertOp = I.getArgOperand(1);
2629       break;
2630     case 1:
2631       ConvertOp = I.getArgOperand(0);
2632       CopyOp = nullptr;
2633       break;
2634     default:
2635       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2636     }
2637 
2638     // The first *NumUsedElements* elements of ConvertOp are converted to the
2639     // same number of output elements. The rest of the output is copied from
2640     // CopyOp, or (if not available) filled with zeroes.
2641     // Combine shadow for elements of ConvertOp that are used in this operation,
2642     // and insert a check.
2643     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2644     // int->any conversion.
2645     Value *ConvertShadow = getShadow(ConvertOp);
2646     Value *AggShadow = nullptr;
2647     if (ConvertOp->getType()->isVectorTy()) {
2648       AggShadow = IRB.CreateExtractElement(
2649           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2650       for (int i = 1; i < NumUsedElements; ++i) {
2651         Value *MoreShadow = IRB.CreateExtractElement(
2652             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2653         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2654       }
2655     } else {
2656       AggShadow = ConvertShadow;
2657     }
2658     assert(AggShadow->getType()->isIntegerTy());
2659     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2660 
2661     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2662     // ConvertOp.
2663     if (CopyOp) {
2664       assert(CopyOp->getType() == I.getType());
2665       assert(CopyOp->getType()->isVectorTy());
2666       Value *ResultShadow = getShadow(CopyOp);
2667       Type *EltTy = ResultShadow->getType()->getVectorElementType();
2668       for (int i = 0; i < NumUsedElements; ++i) {
2669         ResultShadow = IRB.CreateInsertElement(
2670             ResultShadow, ConstantInt::getNullValue(EltTy),
2671             ConstantInt::get(IRB.getInt32Ty(), i));
2672       }
2673       setShadow(&I, ResultShadow);
2674       setOrigin(&I, getOrigin(CopyOp));
2675     } else {
2676       setShadow(&I, getCleanShadow(&I));
2677       setOrigin(&I, getCleanOrigin());
2678     }
2679   }
2680 
2681   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2682   // zeroes if it is zero, and all ones otherwise.
2683   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2684     if (S->getType()->isVectorTy())
2685       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2686     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2687     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2688     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2689   }
2690 
2691   // Given a vector, extract its first element, and return all
2692   // zeroes if it is zero, and all ones otherwise.
2693   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2694     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2695     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2696     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2697   }
2698 
2699   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2700     Type *T = S->getType();
2701     assert(T->isVectorTy());
2702     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2703     return IRB.CreateSExt(S2, T);
2704   }
2705 
2706   // Instrument vector shift instrinsic.
2707   //
2708   // This function instruments intrinsics like int_x86_avx2_psll_w.
2709   // Intrinsic shifts %In by %ShiftSize bits.
2710   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2711   // size, and the rest is ignored. Behavior is defined even if shift size is
2712   // greater than register (or field) width.
2713   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2714     assert(I.getNumArgOperands() == 2);
2715     IRBuilder<> IRB(&I);
2716     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2717     // Otherwise perform the same shift on S1.
2718     Value *S1 = getShadow(&I, 0);
2719     Value *S2 = getShadow(&I, 1);
2720     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2721                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2722     Value *V1 = I.getOperand(0);
2723     Value *V2 = I.getOperand(1);
2724     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
2725                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2726     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2727     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2728     setOriginForNaryOp(I);
2729   }
2730 
2731   // Get an X86_MMX-sized vector type.
2732   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2733     const unsigned X86_MMXSizeInBits = 64;
2734     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
2735            "Illegal MMX vector element size");
2736     return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2737                            X86_MMXSizeInBits / EltSizeInBits);
2738   }
2739 
2740   // Returns a signed counterpart for an (un)signed-saturate-and-pack
2741   // intrinsic.
2742   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2743     switch (id) {
2744       case Intrinsic::x86_sse2_packsswb_128:
2745       case Intrinsic::x86_sse2_packuswb_128:
2746         return Intrinsic::x86_sse2_packsswb_128;
2747 
2748       case Intrinsic::x86_sse2_packssdw_128:
2749       case Intrinsic::x86_sse41_packusdw:
2750         return Intrinsic::x86_sse2_packssdw_128;
2751 
2752       case Intrinsic::x86_avx2_packsswb:
2753       case Intrinsic::x86_avx2_packuswb:
2754         return Intrinsic::x86_avx2_packsswb;
2755 
2756       case Intrinsic::x86_avx2_packssdw:
2757       case Intrinsic::x86_avx2_packusdw:
2758         return Intrinsic::x86_avx2_packssdw;
2759 
2760       case Intrinsic::x86_mmx_packsswb:
2761       case Intrinsic::x86_mmx_packuswb:
2762         return Intrinsic::x86_mmx_packsswb;
2763 
2764       case Intrinsic::x86_mmx_packssdw:
2765         return Intrinsic::x86_mmx_packssdw;
2766       default:
2767         llvm_unreachable("unexpected intrinsic id");
2768     }
2769   }
2770 
2771   // Instrument vector pack instrinsic.
2772   //
2773   // This function instruments intrinsics like x86_mmx_packsswb, that
2774   // packs elements of 2 input vectors into half as many bits with saturation.
2775   // Shadow is propagated with the signed variant of the same intrinsic applied
2776   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2777   // EltSizeInBits is used only for x86mmx arguments.
2778   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2779     assert(I.getNumArgOperands() == 2);
2780     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2781     IRBuilder<> IRB(&I);
2782     Value *S1 = getShadow(&I, 0);
2783     Value *S2 = getShadow(&I, 1);
2784     assert(isX86_MMX || S1->getType()->isVectorTy());
2785 
2786     // SExt and ICmpNE below must apply to individual elements of input vectors.
2787     // In case of x86mmx arguments, cast them to appropriate vector types and
2788     // back.
2789     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2790     if (isX86_MMX) {
2791       S1 = IRB.CreateBitCast(S1, T);
2792       S2 = IRB.CreateBitCast(S2, T);
2793     }
2794     Value *S1_ext = IRB.CreateSExt(
2795         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2796     Value *S2_ext = IRB.CreateSExt(
2797         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2798     if (isX86_MMX) {
2799       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2800       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2801       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2802     }
2803 
2804     Function *ShadowFn = Intrinsic::getDeclaration(
2805         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2806 
2807     Value *S =
2808         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2809     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2810     setShadow(&I, S);
2811     setOriginForNaryOp(I);
2812   }
2813 
2814   // Instrument sum-of-absolute-differencies intrinsic.
2815   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2816     const unsigned SignificantBitsPerResultElement = 16;
2817     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2818     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2819     unsigned ZeroBitsPerResultElement =
2820         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2821 
2822     IRBuilder<> IRB(&I);
2823     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2824     S = IRB.CreateBitCast(S, ResTy);
2825     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2826                        ResTy);
2827     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2828     S = IRB.CreateBitCast(S, getShadowTy(&I));
2829     setShadow(&I, S);
2830     setOriginForNaryOp(I);
2831   }
2832 
2833   // Instrument multiply-add intrinsic.
2834   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2835                                   unsigned EltSizeInBits = 0) {
2836     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2837     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2838     IRBuilder<> IRB(&I);
2839     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2840     S = IRB.CreateBitCast(S, ResTy);
2841     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2842                        ResTy);
2843     S = IRB.CreateBitCast(S, getShadowTy(&I));
2844     setShadow(&I, S);
2845     setOriginForNaryOp(I);
2846   }
2847 
2848   // Instrument compare-packed intrinsic.
2849   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2850   // all-ones shadow.
2851   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2852     IRBuilder<> IRB(&I);
2853     Type *ResTy = getShadowTy(&I);
2854     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2855     Value *S = IRB.CreateSExt(
2856         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2857     setShadow(&I, S);
2858     setOriginForNaryOp(I);
2859   }
2860 
2861   // Instrument compare-scalar intrinsic.
2862   // This handles both cmp* intrinsics which return the result in the first
2863   // element of a vector, and comi* which return the result as i32.
2864   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2865     IRBuilder<> IRB(&I);
2866     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2867     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
2868     setShadow(&I, S);
2869     setOriginForNaryOp(I);
2870   }
2871 
2872   void handleStmxcsr(IntrinsicInst &I) {
2873     IRBuilder<> IRB(&I);
2874     Value* Addr = I.getArgOperand(0);
2875     Type *Ty = IRB.getInt32Ty();
2876     Value *ShadowPtr =
2877         getShadowOriginPtr(Addr, IRB, Ty, /*Alignment*/ 1, /*isStore*/ true)
2878             .first;
2879 
2880     IRB.CreateStore(getCleanShadow(Ty),
2881                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
2882 
2883     if (ClCheckAccessAddress)
2884       insertShadowCheck(Addr, &I);
2885   }
2886 
2887   void handleLdmxcsr(IntrinsicInst &I) {
2888     if (!InsertChecks) return;
2889 
2890     IRBuilder<> IRB(&I);
2891     Value *Addr = I.getArgOperand(0);
2892     Type *Ty = IRB.getInt32Ty();
2893     unsigned Alignment = 1;
2894     Value *ShadowPtr, *OriginPtr;
2895     std::tie(ShadowPtr, OriginPtr) =
2896         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
2897 
2898     if (ClCheckAccessAddress)
2899       insertShadowCheck(Addr, &I);
2900 
2901     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
2902     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
2903                                     : getCleanOrigin();
2904     insertShadowCheck(Shadow, Origin, &I);
2905   }
2906 
2907   void handleMaskedStore(IntrinsicInst &I) {
2908     IRBuilder<> IRB(&I);
2909     Value *V = I.getArgOperand(0);
2910     Value *Addr = I.getArgOperand(1);
2911     unsigned Align = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
2912     Value *Mask = I.getArgOperand(3);
2913     Value *Shadow = getShadow(V);
2914 
2915     Value *ShadowPtr;
2916     Value *OriginPtr;
2917     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2918         Addr, IRB, Shadow->getType(), Align, /*isStore*/ true);
2919 
2920     if (ClCheckAccessAddress) {
2921       insertShadowCheck(Addr, &I);
2922       // Uninitialized mask is kind of like uninitialized address, but not as
2923       // scary.
2924       insertShadowCheck(Mask, &I);
2925     }
2926 
2927     IRB.CreateMaskedStore(Shadow, ShadowPtr, Align, Mask);
2928 
2929     if (MS.TrackOrigins) {
2930       auto &DL = F.getParent()->getDataLayout();
2931       paintOrigin(IRB, getOrigin(V), OriginPtr,
2932                   DL.getTypeStoreSize(Shadow->getType()),
2933                   std::max(Align, kMinOriginAlignment));
2934     }
2935   }
2936 
2937   bool handleMaskedLoad(IntrinsicInst &I) {
2938     IRBuilder<> IRB(&I);
2939     Value *Addr = I.getArgOperand(0);
2940     unsigned Align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
2941     Value *Mask = I.getArgOperand(2);
2942     Value *PassThru = I.getArgOperand(3);
2943 
2944     Type *ShadowTy = getShadowTy(&I);
2945     Value *ShadowPtr, *OriginPtr;
2946     if (PropagateShadow) {
2947       std::tie(ShadowPtr, OriginPtr) =
2948           getShadowOriginPtr(Addr, IRB, ShadowTy, Align, /*isStore*/ false);
2949       setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Align, Mask,
2950                                          getShadow(PassThru), "_msmaskedld"));
2951     } else {
2952       setShadow(&I, getCleanShadow(&I));
2953     }
2954 
2955     if (ClCheckAccessAddress) {
2956       insertShadowCheck(Addr, &I);
2957       insertShadowCheck(Mask, &I);
2958     }
2959 
2960     if (MS.TrackOrigins) {
2961       if (PropagateShadow) {
2962         // Choose between PassThru's and the loaded value's origins.
2963         Value *MaskedPassThruShadow = IRB.CreateAnd(
2964             getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
2965 
2966         Value *Acc = IRB.CreateExtractElement(
2967             MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2968         for (int i = 1, N = PassThru->getType()->getVectorNumElements(); i < N;
2969              ++i) {
2970           Value *More = IRB.CreateExtractElement(
2971               MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2972           Acc = IRB.CreateOr(Acc, More);
2973         }
2974 
2975         Value *Origin = IRB.CreateSelect(
2976             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
2977             getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
2978 
2979         setOrigin(&I, Origin);
2980       } else {
2981         setOrigin(&I, getCleanOrigin());
2982       }
2983     }
2984     return true;
2985   }
2986 
2987   // Instrument BMI / BMI2 intrinsics.
2988   // All of these intrinsics are Z = I(X, Y)
2989   // where the types of all operands and the result match, and are either i32 or i64.
2990   // The following instrumentation happens to work for all of them:
2991   //   Sz = I(Sx, Y) | (sext (Sy != 0))
2992   void handleBmiIntrinsic(IntrinsicInst &I) {
2993     IRBuilder<> IRB(&I);
2994     Type *ShadowTy = getShadowTy(&I);
2995 
2996     // If any bit of the mask operand is poisoned, then the whole thing is.
2997     Value *SMask = getShadow(&I, 1);
2998     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
2999                            ShadowTy);
3000     // Apply the same intrinsic to the shadow of the first operand.
3001     Value *S = IRB.CreateCall(I.getCalledFunction(),
3002                               {getShadow(&I, 0), I.getOperand(1)});
3003     S = IRB.CreateOr(SMask, S);
3004     setShadow(&I, S);
3005     setOriginForNaryOp(I);
3006   }
3007 
3008   void visitIntrinsicInst(IntrinsicInst &I) {
3009     switch (I.getIntrinsicID()) {
3010     case Intrinsic::lifetime_start:
3011       handleLifetimeStart(I);
3012       break;
3013     case Intrinsic::launder_invariant_group:
3014     case Intrinsic::strip_invariant_group:
3015       handleInvariantGroup(I);
3016       break;
3017     case Intrinsic::bswap:
3018       handleBswap(I);
3019       break;
3020     case Intrinsic::masked_store:
3021       handleMaskedStore(I);
3022       break;
3023     case Intrinsic::masked_load:
3024       handleMaskedLoad(I);
3025       break;
3026     case Intrinsic::x86_sse_stmxcsr:
3027       handleStmxcsr(I);
3028       break;
3029     case Intrinsic::x86_sse_ldmxcsr:
3030       handleLdmxcsr(I);
3031       break;
3032     case Intrinsic::x86_avx512_vcvtsd2usi64:
3033     case Intrinsic::x86_avx512_vcvtsd2usi32:
3034     case Intrinsic::x86_avx512_vcvtss2usi64:
3035     case Intrinsic::x86_avx512_vcvtss2usi32:
3036     case Intrinsic::x86_avx512_cvttss2usi64:
3037     case Intrinsic::x86_avx512_cvttss2usi:
3038     case Intrinsic::x86_avx512_cvttsd2usi64:
3039     case Intrinsic::x86_avx512_cvttsd2usi:
3040     case Intrinsic::x86_avx512_cvtusi2ss:
3041     case Intrinsic::x86_avx512_cvtusi642sd:
3042     case Intrinsic::x86_avx512_cvtusi642ss:
3043     case Intrinsic::x86_sse2_cvtsd2si64:
3044     case Intrinsic::x86_sse2_cvtsd2si:
3045     case Intrinsic::x86_sse2_cvtsd2ss:
3046     case Intrinsic::x86_sse2_cvttsd2si64:
3047     case Intrinsic::x86_sse2_cvttsd2si:
3048     case Intrinsic::x86_sse_cvtss2si64:
3049     case Intrinsic::x86_sse_cvtss2si:
3050     case Intrinsic::x86_sse_cvttss2si64:
3051     case Intrinsic::x86_sse_cvttss2si:
3052       handleVectorConvertIntrinsic(I, 1);
3053       break;
3054     case Intrinsic::x86_sse_cvtps2pi:
3055     case Intrinsic::x86_sse_cvttps2pi:
3056       handleVectorConvertIntrinsic(I, 2);
3057       break;
3058 
3059     case Intrinsic::x86_avx512_psll_w_512:
3060     case Intrinsic::x86_avx512_psll_d_512:
3061     case Intrinsic::x86_avx512_psll_q_512:
3062     case Intrinsic::x86_avx512_pslli_w_512:
3063     case Intrinsic::x86_avx512_pslli_d_512:
3064     case Intrinsic::x86_avx512_pslli_q_512:
3065     case Intrinsic::x86_avx512_psrl_w_512:
3066     case Intrinsic::x86_avx512_psrl_d_512:
3067     case Intrinsic::x86_avx512_psrl_q_512:
3068     case Intrinsic::x86_avx512_psra_w_512:
3069     case Intrinsic::x86_avx512_psra_d_512:
3070     case Intrinsic::x86_avx512_psra_q_512:
3071     case Intrinsic::x86_avx512_psrli_w_512:
3072     case Intrinsic::x86_avx512_psrli_d_512:
3073     case Intrinsic::x86_avx512_psrli_q_512:
3074     case Intrinsic::x86_avx512_psrai_w_512:
3075     case Intrinsic::x86_avx512_psrai_d_512:
3076     case Intrinsic::x86_avx512_psrai_q_512:
3077     case Intrinsic::x86_avx512_psra_q_256:
3078     case Intrinsic::x86_avx512_psra_q_128:
3079     case Intrinsic::x86_avx512_psrai_q_256:
3080     case Intrinsic::x86_avx512_psrai_q_128:
3081     case Intrinsic::x86_avx2_psll_w:
3082     case Intrinsic::x86_avx2_psll_d:
3083     case Intrinsic::x86_avx2_psll_q:
3084     case Intrinsic::x86_avx2_pslli_w:
3085     case Intrinsic::x86_avx2_pslli_d:
3086     case Intrinsic::x86_avx2_pslli_q:
3087     case Intrinsic::x86_avx2_psrl_w:
3088     case Intrinsic::x86_avx2_psrl_d:
3089     case Intrinsic::x86_avx2_psrl_q:
3090     case Intrinsic::x86_avx2_psra_w:
3091     case Intrinsic::x86_avx2_psra_d:
3092     case Intrinsic::x86_avx2_psrli_w:
3093     case Intrinsic::x86_avx2_psrli_d:
3094     case Intrinsic::x86_avx2_psrli_q:
3095     case Intrinsic::x86_avx2_psrai_w:
3096     case Intrinsic::x86_avx2_psrai_d:
3097     case Intrinsic::x86_sse2_psll_w:
3098     case Intrinsic::x86_sse2_psll_d:
3099     case Intrinsic::x86_sse2_psll_q:
3100     case Intrinsic::x86_sse2_pslli_w:
3101     case Intrinsic::x86_sse2_pslli_d:
3102     case Intrinsic::x86_sse2_pslli_q:
3103     case Intrinsic::x86_sse2_psrl_w:
3104     case Intrinsic::x86_sse2_psrl_d:
3105     case Intrinsic::x86_sse2_psrl_q:
3106     case Intrinsic::x86_sse2_psra_w:
3107     case Intrinsic::x86_sse2_psra_d:
3108     case Intrinsic::x86_sse2_psrli_w:
3109     case Intrinsic::x86_sse2_psrli_d:
3110     case Intrinsic::x86_sse2_psrli_q:
3111     case Intrinsic::x86_sse2_psrai_w:
3112     case Intrinsic::x86_sse2_psrai_d:
3113     case Intrinsic::x86_mmx_psll_w:
3114     case Intrinsic::x86_mmx_psll_d:
3115     case Intrinsic::x86_mmx_psll_q:
3116     case Intrinsic::x86_mmx_pslli_w:
3117     case Intrinsic::x86_mmx_pslli_d:
3118     case Intrinsic::x86_mmx_pslli_q:
3119     case Intrinsic::x86_mmx_psrl_w:
3120     case Intrinsic::x86_mmx_psrl_d:
3121     case Intrinsic::x86_mmx_psrl_q:
3122     case Intrinsic::x86_mmx_psra_w:
3123     case Intrinsic::x86_mmx_psra_d:
3124     case Intrinsic::x86_mmx_psrli_w:
3125     case Intrinsic::x86_mmx_psrli_d:
3126     case Intrinsic::x86_mmx_psrli_q:
3127     case Intrinsic::x86_mmx_psrai_w:
3128     case Intrinsic::x86_mmx_psrai_d:
3129       handleVectorShiftIntrinsic(I, /* Variable */ false);
3130       break;
3131     case Intrinsic::x86_avx2_psllv_d:
3132     case Intrinsic::x86_avx2_psllv_d_256:
3133     case Intrinsic::x86_avx512_psllv_d_512:
3134     case Intrinsic::x86_avx2_psllv_q:
3135     case Intrinsic::x86_avx2_psllv_q_256:
3136     case Intrinsic::x86_avx512_psllv_q_512:
3137     case Intrinsic::x86_avx2_psrlv_d:
3138     case Intrinsic::x86_avx2_psrlv_d_256:
3139     case Intrinsic::x86_avx512_psrlv_d_512:
3140     case Intrinsic::x86_avx2_psrlv_q:
3141     case Intrinsic::x86_avx2_psrlv_q_256:
3142     case Intrinsic::x86_avx512_psrlv_q_512:
3143     case Intrinsic::x86_avx2_psrav_d:
3144     case Intrinsic::x86_avx2_psrav_d_256:
3145     case Intrinsic::x86_avx512_psrav_d_512:
3146     case Intrinsic::x86_avx512_psrav_q_128:
3147     case Intrinsic::x86_avx512_psrav_q_256:
3148     case Intrinsic::x86_avx512_psrav_q_512:
3149       handleVectorShiftIntrinsic(I, /* Variable */ true);
3150       break;
3151 
3152     case Intrinsic::x86_sse2_packsswb_128:
3153     case Intrinsic::x86_sse2_packssdw_128:
3154     case Intrinsic::x86_sse2_packuswb_128:
3155     case Intrinsic::x86_sse41_packusdw:
3156     case Intrinsic::x86_avx2_packsswb:
3157     case Intrinsic::x86_avx2_packssdw:
3158     case Intrinsic::x86_avx2_packuswb:
3159     case Intrinsic::x86_avx2_packusdw:
3160       handleVectorPackIntrinsic(I);
3161       break;
3162 
3163     case Intrinsic::x86_mmx_packsswb:
3164     case Intrinsic::x86_mmx_packuswb:
3165       handleVectorPackIntrinsic(I, 16);
3166       break;
3167 
3168     case Intrinsic::x86_mmx_packssdw:
3169       handleVectorPackIntrinsic(I, 32);
3170       break;
3171 
3172     case Intrinsic::x86_mmx_psad_bw:
3173     case Intrinsic::x86_sse2_psad_bw:
3174     case Intrinsic::x86_avx2_psad_bw:
3175       handleVectorSadIntrinsic(I);
3176       break;
3177 
3178     case Intrinsic::x86_sse2_pmadd_wd:
3179     case Intrinsic::x86_avx2_pmadd_wd:
3180     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3181     case Intrinsic::x86_avx2_pmadd_ub_sw:
3182       handleVectorPmaddIntrinsic(I);
3183       break;
3184 
3185     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3186       handleVectorPmaddIntrinsic(I, 8);
3187       break;
3188 
3189     case Intrinsic::x86_mmx_pmadd_wd:
3190       handleVectorPmaddIntrinsic(I, 16);
3191       break;
3192 
3193     case Intrinsic::x86_sse_cmp_ss:
3194     case Intrinsic::x86_sse2_cmp_sd:
3195     case Intrinsic::x86_sse_comieq_ss:
3196     case Intrinsic::x86_sse_comilt_ss:
3197     case Intrinsic::x86_sse_comile_ss:
3198     case Intrinsic::x86_sse_comigt_ss:
3199     case Intrinsic::x86_sse_comige_ss:
3200     case Intrinsic::x86_sse_comineq_ss:
3201     case Intrinsic::x86_sse_ucomieq_ss:
3202     case Intrinsic::x86_sse_ucomilt_ss:
3203     case Intrinsic::x86_sse_ucomile_ss:
3204     case Intrinsic::x86_sse_ucomigt_ss:
3205     case Intrinsic::x86_sse_ucomige_ss:
3206     case Intrinsic::x86_sse_ucomineq_ss:
3207     case Intrinsic::x86_sse2_comieq_sd:
3208     case Intrinsic::x86_sse2_comilt_sd:
3209     case Intrinsic::x86_sse2_comile_sd:
3210     case Intrinsic::x86_sse2_comigt_sd:
3211     case Intrinsic::x86_sse2_comige_sd:
3212     case Intrinsic::x86_sse2_comineq_sd:
3213     case Intrinsic::x86_sse2_ucomieq_sd:
3214     case Intrinsic::x86_sse2_ucomilt_sd:
3215     case Intrinsic::x86_sse2_ucomile_sd:
3216     case Intrinsic::x86_sse2_ucomigt_sd:
3217     case Intrinsic::x86_sse2_ucomige_sd:
3218     case Intrinsic::x86_sse2_ucomineq_sd:
3219       handleVectorCompareScalarIntrinsic(I);
3220       break;
3221 
3222     case Intrinsic::x86_sse_cmp_ps:
3223     case Intrinsic::x86_sse2_cmp_pd:
3224       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
3225       // generates reasonably looking IR that fails in the backend with "Do not
3226       // know how to split the result of this operator!".
3227       handleVectorComparePackedIntrinsic(I);
3228       break;
3229 
3230     case Intrinsic::x86_bmi_bextr_32:
3231     case Intrinsic::x86_bmi_bextr_64:
3232     case Intrinsic::x86_bmi_bzhi_32:
3233     case Intrinsic::x86_bmi_bzhi_64:
3234     case Intrinsic::x86_bmi_pdep_32:
3235     case Intrinsic::x86_bmi_pdep_64:
3236     case Intrinsic::x86_bmi_pext_32:
3237     case Intrinsic::x86_bmi_pext_64:
3238       handleBmiIntrinsic(I);
3239       break;
3240 
3241     case Intrinsic::is_constant:
3242       // The result of llvm.is.constant() is always defined.
3243       setShadow(&I, getCleanShadow(&I));
3244       setOrigin(&I, getCleanOrigin());
3245       break;
3246 
3247     default:
3248       if (!handleUnknownIntrinsic(I))
3249         visitInstruction(I);
3250       break;
3251     }
3252   }
3253 
3254   void visitCallSite(CallSite CS) {
3255     Instruction &I = *CS.getInstruction();
3256     assert(!I.getMetadata("nosanitize"));
3257     assert((CS.isCall() || CS.isInvoke() || CS.isCallBr()) &&
3258            "Unknown type of CallSite");
3259     if (CS.isCallBr() || (CS.isCall() && cast<CallInst>(&I)->isInlineAsm())) {
3260       // For inline asm (either a call to asm function, or callbr instruction),
3261       // do the usual thing: check argument shadow and mark all outputs as
3262       // clean. Note that any side effects of the inline asm that are not
3263       // immediately visible in its constraints are not handled.
3264       if (ClHandleAsmConservative && MS.CompileKernel)
3265         visitAsmInstruction(I);
3266       else
3267         visitInstruction(I);
3268       return;
3269     }
3270     if (CS.isCall()) {
3271       CallInst *Call = cast<CallInst>(&I);
3272       assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
3273 
3274       // We are going to insert code that relies on the fact that the callee
3275       // will become a non-readonly function after it is instrumented by us. To
3276       // prevent this code from being optimized out, mark that function
3277       // non-readonly in advance.
3278       if (Function *Func = Call->getCalledFunction()) {
3279         // Clear out readonly/readnone attributes.
3280         AttrBuilder B;
3281         B.addAttribute(Attribute::ReadOnly)
3282             .addAttribute(Attribute::ReadNone)
3283             .addAttribute(Attribute::WriteOnly)
3284             .addAttribute(Attribute::ArgMemOnly)
3285             .addAttribute(Attribute::Speculatable);
3286         Func->removeAttributes(AttributeList::FunctionIndex, B);
3287       }
3288 
3289       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
3290     }
3291     IRBuilder<> IRB(&I);
3292 
3293     unsigned ArgOffset = 0;
3294     LLVM_DEBUG(dbgs() << "  CallSite: " << I << "\n");
3295     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3296          ArgIt != End; ++ArgIt) {
3297       Value *A = *ArgIt;
3298       unsigned i = ArgIt - CS.arg_begin();
3299       if (!A->getType()->isSized()) {
3300         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
3301         continue;
3302       }
3303       unsigned Size = 0;
3304       Value *Store = nullptr;
3305       // Compute the Shadow for arg even if it is ByVal, because
3306       // in that case getShadow() will copy the actual arg shadow to
3307       // __msan_param_tls.
3308       Value *ArgShadow = getShadow(A);
3309       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
3310       LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
3311                         << " Shadow: " << *ArgShadow << "\n");
3312       bool ArgIsInitialized = false;
3313       const DataLayout &DL = F.getParent()->getDataLayout();
3314       if (CS.paramHasAttr(i, Attribute::ByVal)) {
3315         assert(A->getType()->isPointerTy() &&
3316                "ByVal argument is not a pointer!");
3317         Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
3318         if (ArgOffset + Size > kParamTLSSize) break;
3319         unsigned ParamAlignment = CS.getParamAlignment(i);
3320         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
3321         Value *AShadowPtr =
3322             getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
3323                                /*isStore*/ false)
3324                 .first;
3325 
3326         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
3327                                  Alignment, Size);
3328         // TODO(glider): need to copy origins.
3329       } else {
3330         Size = DL.getTypeAllocSize(A->getType());
3331         if (ArgOffset + Size > kParamTLSSize) break;
3332         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
3333                                        kShadowTLSAlignment);
3334         Constant *Cst = dyn_cast<Constant>(ArgShadow);
3335         if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
3336       }
3337       if (MS.TrackOrigins && !ArgIsInitialized)
3338         IRB.CreateStore(getOrigin(A),
3339                         getOriginPtrForArgument(A, IRB, ArgOffset));
3340       (void)Store;
3341       assert(Size != 0 && Store != nullptr);
3342       LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
3343       ArgOffset += alignTo(Size, 8);
3344     }
3345     LLVM_DEBUG(dbgs() << "  done with call args\n");
3346 
3347     FunctionType *FT = CS.getFunctionType();
3348     if (FT->isVarArg()) {
3349       VAHelper->visitCallSite(CS, IRB);
3350     }
3351 
3352     // Now, get the shadow for the RetVal.
3353     if (!I.getType()->isSized()) return;
3354     // Don't emit the epilogue for musttail call returns.
3355     if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return;
3356     IRBuilder<> IRBBefore(&I);
3357     // Until we have full dynamic coverage, make sure the retval shadow is 0.
3358     Value *Base = getShadowPtrForRetval(&I, IRBBefore);
3359     IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
3360     BasicBlock::iterator NextInsn;
3361     if (CS.isCall()) {
3362       NextInsn = ++I.getIterator();
3363       assert(NextInsn != I.getParent()->end());
3364     } else {
3365       BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
3366       if (!NormalDest->getSinglePredecessor()) {
3367         // FIXME: this case is tricky, so we are just conservative here.
3368         // Perhaps we need to split the edge between this BB and NormalDest,
3369         // but a naive attempt to use SplitEdge leads to a crash.
3370         setShadow(&I, getCleanShadow(&I));
3371         setOrigin(&I, getCleanOrigin());
3372         return;
3373       }
3374       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
3375       // Anything inserted there will be instrumented by MSan later!
3376       NextInsn = NormalDest->getFirstInsertionPt();
3377       assert(NextInsn != NormalDest->end() &&
3378              "Could not find insertion point for retval shadow load");
3379     }
3380     IRBuilder<> IRBAfter(&*NextInsn);
3381     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
3382         getShadowTy(&I), getShadowPtrForRetval(&I, IRBAfter),
3383         kShadowTLSAlignment, "_msret");
3384     setShadow(&I, RetvalShadow);
3385     if (MS.TrackOrigins)
3386       setOrigin(&I, IRBAfter.CreateLoad(MS.OriginTy,
3387                                         getOriginPtrForRetval(IRBAfter)));
3388   }
3389 
3390   bool isAMustTailRetVal(Value *RetVal) {
3391     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
3392       RetVal = I->getOperand(0);
3393     }
3394     if (auto *I = dyn_cast<CallInst>(RetVal)) {
3395       return I->isMustTailCall();
3396     }
3397     return false;
3398   }
3399 
3400   void visitReturnInst(ReturnInst &I) {
3401     IRBuilder<> IRB(&I);
3402     Value *RetVal = I.getReturnValue();
3403     if (!RetVal) return;
3404     // Don't emit the epilogue for musttail call returns.
3405     if (isAMustTailRetVal(RetVal)) return;
3406     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
3407     if (CheckReturnValue) {
3408       insertShadowCheck(RetVal, &I);
3409       Value *Shadow = getCleanShadow(RetVal);
3410       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3411     } else {
3412       Value *Shadow = getShadow(RetVal);
3413       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3414       if (MS.TrackOrigins)
3415         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
3416     }
3417   }
3418 
3419   void visitPHINode(PHINode &I) {
3420     IRBuilder<> IRB(&I);
3421     if (!PropagateShadow) {
3422       setShadow(&I, getCleanShadow(&I));
3423       setOrigin(&I, getCleanOrigin());
3424       return;
3425     }
3426 
3427     ShadowPHINodes.push_back(&I);
3428     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
3429                                 "_msphi_s"));
3430     if (MS.TrackOrigins)
3431       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
3432                                   "_msphi_o"));
3433   }
3434 
3435   Value *getLocalVarDescription(AllocaInst &I) {
3436     SmallString<2048> StackDescriptionStorage;
3437     raw_svector_ostream StackDescription(StackDescriptionStorage);
3438     // We create a string with a description of the stack allocation and
3439     // pass it into __msan_set_alloca_origin.
3440     // It will be printed by the run-time if stack-originated UMR is found.
3441     // The first 4 bytes of the string are set to '----' and will be replaced
3442     // by __msan_va_arg_overflow_size_tls at the first call.
3443     StackDescription << "----" << I.getName() << "@" << F.getName();
3444     return createPrivateNonConstGlobalForString(*F.getParent(),
3445                                                 StackDescription.str());
3446   }
3447 
3448   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3449     if (PoisonStack && ClPoisonStackWithCall) {
3450       IRB.CreateCall(MS.MsanPoisonStackFn,
3451                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3452     } else {
3453       Value *ShadowBase, *OriginBase;
3454       std::tie(ShadowBase, OriginBase) =
3455           getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), 1, /*isStore*/ true);
3456 
3457       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
3458       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
3459     }
3460 
3461     if (PoisonStack && MS.TrackOrigins) {
3462       Value *Descr = getLocalVarDescription(I);
3463       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
3464                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3465                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
3466                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
3467     }
3468   }
3469 
3470   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3471     Value *Descr = getLocalVarDescription(I);
3472     if (PoisonStack) {
3473       IRB.CreateCall(MS.MsanPoisonAllocaFn,
3474                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3475                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
3476     } else {
3477       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
3478                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3479     }
3480   }
3481 
3482   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
3483     if (!InsPoint)
3484       InsPoint = &I;
3485     IRBuilder<> IRB(InsPoint->getNextNode());
3486     const DataLayout &DL = F.getParent()->getDataLayout();
3487     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
3488     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
3489     if (I.isArrayAllocation())
3490       Len = IRB.CreateMul(Len, I.getArraySize());
3491 
3492     if (MS.CompileKernel)
3493       poisonAllocaKmsan(I, IRB, Len);
3494     else
3495       poisonAllocaUserspace(I, IRB, Len);
3496   }
3497 
3498   void visitAllocaInst(AllocaInst &I) {
3499     setShadow(&I, getCleanShadow(&I));
3500     setOrigin(&I, getCleanOrigin());
3501     // We'll get to this alloca later unless it's poisoned at the corresponding
3502     // llvm.lifetime.start.
3503     AllocaSet.insert(&I);
3504   }
3505 
3506   void visitSelectInst(SelectInst& I) {
3507     IRBuilder<> IRB(&I);
3508     // a = select b, c, d
3509     Value *B = I.getCondition();
3510     Value *C = I.getTrueValue();
3511     Value *D = I.getFalseValue();
3512     Value *Sb = getShadow(B);
3513     Value *Sc = getShadow(C);
3514     Value *Sd = getShadow(D);
3515 
3516     // Result shadow if condition shadow is 0.
3517     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
3518     Value *Sa1;
3519     if (I.getType()->isAggregateType()) {
3520       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
3521       // an extra "select". This results in much more compact IR.
3522       // Sa = select Sb, poisoned, (select b, Sc, Sd)
3523       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
3524     } else {
3525       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
3526       // If Sb (condition is poisoned), look for bits in c and d that are equal
3527       // and both unpoisoned.
3528       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
3529 
3530       // Cast arguments to shadow-compatible type.
3531       C = CreateAppToShadowCast(IRB, C);
3532       D = CreateAppToShadowCast(IRB, D);
3533 
3534       // Result shadow if condition shadow is 1.
3535       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
3536     }
3537     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
3538     setShadow(&I, Sa);
3539     if (MS.TrackOrigins) {
3540       // Origins are always i32, so any vector conditions must be flattened.
3541       // FIXME: consider tracking vector origins for app vectors?
3542       if (B->getType()->isVectorTy()) {
3543         Type *FlatTy = getShadowTyNoVec(B->getType());
3544         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
3545                                 ConstantInt::getNullValue(FlatTy));
3546         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
3547                                       ConstantInt::getNullValue(FlatTy));
3548       }
3549       // a = select b, c, d
3550       // Oa = Sb ? Ob : (b ? Oc : Od)
3551       setOrigin(
3552           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
3553                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
3554                                                 getOrigin(I.getFalseValue()))));
3555     }
3556   }
3557 
3558   void visitLandingPadInst(LandingPadInst &I) {
3559     // Do nothing.
3560     // See https://github.com/google/sanitizers/issues/504
3561     setShadow(&I, getCleanShadow(&I));
3562     setOrigin(&I, getCleanOrigin());
3563   }
3564 
3565   void visitCatchSwitchInst(CatchSwitchInst &I) {
3566     setShadow(&I, getCleanShadow(&I));
3567     setOrigin(&I, getCleanOrigin());
3568   }
3569 
3570   void visitFuncletPadInst(FuncletPadInst &I) {
3571     setShadow(&I, getCleanShadow(&I));
3572     setOrigin(&I, getCleanOrigin());
3573   }
3574 
3575   void visitGetElementPtrInst(GetElementPtrInst &I) {
3576     handleShadowOr(I);
3577   }
3578 
3579   void visitExtractValueInst(ExtractValueInst &I) {
3580     IRBuilder<> IRB(&I);
3581     Value *Agg = I.getAggregateOperand();
3582     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
3583     Value *AggShadow = getShadow(Agg);
3584     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3585     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
3586     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
3587     setShadow(&I, ResShadow);
3588     setOriginForNaryOp(I);
3589   }
3590 
3591   void visitInsertValueInst(InsertValueInst &I) {
3592     IRBuilder<> IRB(&I);
3593     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
3594     Value *AggShadow = getShadow(I.getAggregateOperand());
3595     Value *InsShadow = getShadow(I.getInsertedValueOperand());
3596     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3597     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
3598     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
3599     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
3600     setShadow(&I, Res);
3601     setOriginForNaryOp(I);
3602   }
3603 
3604   void dumpInst(Instruction &I) {
3605     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3606       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
3607     } else {
3608       errs() << "ZZZ " << I.getOpcodeName() << "\n";
3609     }
3610     errs() << "QQQ " << I << "\n";
3611   }
3612 
3613   void visitResumeInst(ResumeInst &I) {
3614     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
3615     // Nothing to do here.
3616   }
3617 
3618   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
3619     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
3620     // Nothing to do here.
3621   }
3622 
3623   void visitCatchReturnInst(CatchReturnInst &CRI) {
3624     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
3625     // Nothing to do here.
3626   }
3627 
3628   void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
3629                              const DataLayout &DL, bool isOutput) {
3630     // For each assembly argument, we check its value for being initialized.
3631     // If the argument is a pointer, we assume it points to a single element
3632     // of the corresponding type (or to a 8-byte word, if the type is unsized).
3633     // Each such pointer is instrumented with a call to the runtime library.
3634     Type *OpType = Operand->getType();
3635     // Check the operand value itself.
3636     insertShadowCheck(Operand, &I);
3637     if (!OpType->isPointerTy() || !isOutput) {
3638       assert(!isOutput);
3639       return;
3640     }
3641     Type *ElType = OpType->getPointerElementType();
3642     if (!ElType->isSized())
3643       return;
3644     int Size = DL.getTypeStoreSize(ElType);
3645     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
3646     Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
3647     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
3648   }
3649 
3650   /// Get the number of output arguments returned by pointers.
3651   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
3652     int NumRetOutputs = 0;
3653     int NumOutputs = 0;
3654     Type *RetTy = cast<Value>(CB)->getType();
3655     if (!RetTy->isVoidTy()) {
3656       // Register outputs are returned via the CallInst return value.
3657       auto *ST = dyn_cast<StructType>(RetTy);
3658       if (ST)
3659         NumRetOutputs = ST->getNumElements();
3660       else
3661         NumRetOutputs = 1;
3662     }
3663     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
3664     for (size_t i = 0, n = Constraints.size(); i < n; i++) {
3665       InlineAsm::ConstraintInfo Info = Constraints[i];
3666       switch (Info.Type) {
3667       case InlineAsm::isOutput:
3668         NumOutputs++;
3669         break;
3670       default:
3671         break;
3672       }
3673     }
3674     return NumOutputs - NumRetOutputs;
3675   }
3676 
3677   void visitAsmInstruction(Instruction &I) {
3678     // Conservative inline assembly handling: check for poisoned shadow of
3679     // asm() arguments, then unpoison the result and all the memory locations
3680     // pointed to by those arguments.
3681     // An inline asm() statement in C++ contains lists of input and output
3682     // arguments used by the assembly code. These are mapped to operands of the
3683     // CallInst as follows:
3684     //  - nR register outputs ("=r) are returned by value in a single structure
3685     //  (SSA value of the CallInst);
3686     //  - nO other outputs ("=m" and others) are returned by pointer as first
3687     // nO operands of the CallInst;
3688     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
3689     // remaining nI operands.
3690     // The total number of asm() arguments in the source is nR+nO+nI, and the
3691     // corresponding CallInst has nO+nI+1 operands (the last operand is the
3692     // function to be called).
3693     const DataLayout &DL = F.getParent()->getDataLayout();
3694     CallBase *CB = cast<CallBase>(&I);
3695     IRBuilder<> IRB(&I);
3696     InlineAsm *IA = cast<InlineAsm>(CB->getCalledValue());
3697     int OutputArgs = getNumOutputArgs(IA, CB);
3698     // The last operand of a CallInst is the function itself.
3699     int NumOperands = CB->getNumOperands() - 1;
3700 
3701     // Check input arguments. Doing so before unpoisoning output arguments, so
3702     // that we won't overwrite uninit values before checking them.
3703     for (int i = OutputArgs; i < NumOperands; i++) {
3704       Value *Operand = CB->getOperand(i);
3705       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
3706     }
3707     // Unpoison output arguments. This must happen before the actual InlineAsm
3708     // call, so that the shadow for memory published in the asm() statement
3709     // remains valid.
3710     for (int i = 0; i < OutputArgs; i++) {
3711       Value *Operand = CB->getOperand(i);
3712       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
3713     }
3714 
3715     setShadow(&I, getCleanShadow(&I));
3716     setOrigin(&I, getCleanOrigin());
3717   }
3718 
3719   void visitInstruction(Instruction &I) {
3720     // Everything else: stop propagating and check for poisoned shadow.
3721     if (ClDumpStrictInstructions)
3722       dumpInst(I);
3723     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
3724     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
3725       Value *Operand = I.getOperand(i);
3726       if (Operand->getType()->isSized())
3727         insertShadowCheck(Operand, &I);
3728     }
3729     setShadow(&I, getCleanShadow(&I));
3730     setOrigin(&I, getCleanOrigin());
3731   }
3732 };
3733 
3734 /// AMD64-specific implementation of VarArgHelper.
3735 struct VarArgAMD64Helper : public VarArgHelper {
3736   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
3737   // See a comment in visitCallSite for more details.
3738   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
3739   static const unsigned AMD64FpEndOffsetSSE = 176;
3740   // If SSE is disabled, fp_offset in va_list is zero.
3741   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
3742 
3743   unsigned AMD64FpEndOffset;
3744   Function &F;
3745   MemorySanitizer &MS;
3746   MemorySanitizerVisitor &MSV;
3747   Value *VAArgTLSCopy = nullptr;
3748   Value *VAArgTLSOriginCopy = nullptr;
3749   Value *VAArgOverflowSize = nullptr;
3750 
3751   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3752 
3753   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
3754 
3755   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
3756                     MemorySanitizerVisitor &MSV)
3757       : F(F), MS(MS), MSV(MSV) {
3758     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
3759     for (const auto &Attr : F.getAttributes().getFnAttributes()) {
3760       if (Attr.isStringAttribute() &&
3761           (Attr.getKindAsString() == "target-features")) {
3762         if (Attr.getValueAsString().contains("-sse"))
3763           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
3764         break;
3765       }
3766     }
3767   }
3768 
3769   ArgKind classifyArgument(Value* arg) {
3770     // A very rough approximation of X86_64 argument classification rules.
3771     Type *T = arg->getType();
3772     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
3773       return AK_FloatingPoint;
3774     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
3775       return AK_GeneralPurpose;
3776     if (T->isPointerTy())
3777       return AK_GeneralPurpose;
3778     return AK_Memory;
3779   }
3780 
3781   // For VarArg functions, store the argument shadow in an ABI-specific format
3782   // that corresponds to va_list layout.
3783   // We do this because Clang lowers va_arg in the frontend, and this pass
3784   // only sees the low level code that deals with va_list internals.
3785   // A much easier alternative (provided that Clang emits va_arg instructions)
3786   // would have been to associate each live instance of va_list with a copy of
3787   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
3788   // order.
3789   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
3790     unsigned GpOffset = 0;
3791     unsigned FpOffset = AMD64GpEndOffset;
3792     unsigned OverflowOffset = AMD64FpEndOffset;
3793     const DataLayout &DL = F.getParent()->getDataLayout();
3794     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
3795          ArgIt != End; ++ArgIt) {
3796       Value *A = *ArgIt;
3797       unsigned ArgNo = CS.getArgumentNo(ArgIt);
3798       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
3799       bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
3800       if (IsByVal) {
3801         // ByVal arguments always go to the overflow area.
3802         // Fixed arguments passed through the overflow area will be stepped
3803         // over by va_start, so don't count them towards the offset.
3804         if (IsFixed)
3805           continue;
3806         assert(A->getType()->isPointerTy());
3807         Type *RealTy = A->getType()->getPointerElementType();
3808         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
3809         Value *ShadowBase = getShadowPtrForVAArgument(
3810             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
3811         Value *OriginBase = nullptr;
3812         if (MS.TrackOrigins)
3813           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
3814         OverflowOffset += alignTo(ArgSize, 8);
3815         if (!ShadowBase)
3816           continue;
3817         Value *ShadowPtr, *OriginPtr;
3818         std::tie(ShadowPtr, OriginPtr) =
3819             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
3820                                    /*isStore*/ false);
3821 
3822         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
3823                          kShadowTLSAlignment, ArgSize);
3824         if (MS.TrackOrigins)
3825           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
3826                            kShadowTLSAlignment, ArgSize);
3827       } else {
3828         ArgKind AK = classifyArgument(A);
3829         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
3830           AK = AK_Memory;
3831         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
3832           AK = AK_Memory;
3833         Value *ShadowBase, *OriginBase = nullptr;
3834         switch (AK) {
3835           case AK_GeneralPurpose:
3836             ShadowBase =
3837                 getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
3838             if (MS.TrackOrigins)
3839               OriginBase =
3840                   getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
3841             GpOffset += 8;
3842             break;
3843           case AK_FloatingPoint:
3844             ShadowBase =
3845                 getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
3846             if (MS.TrackOrigins)
3847               OriginBase =
3848                   getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
3849             FpOffset += 16;
3850             break;
3851           case AK_Memory:
3852             if (IsFixed)
3853               continue;
3854             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
3855             ShadowBase =
3856                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
3857             if (MS.TrackOrigins)
3858               OriginBase =
3859                   getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
3860             OverflowOffset += alignTo(ArgSize, 8);
3861         }
3862         // Take fixed arguments into account for GpOffset and FpOffset,
3863         // but don't actually store shadows for them.
3864         // TODO(glider): don't call get*PtrForVAArgument() for them.
3865         if (IsFixed)
3866           continue;
3867         if (!ShadowBase)
3868           continue;
3869         Value *Shadow = MSV.getShadow(A);
3870         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
3871         if (MS.TrackOrigins) {
3872           Value *Origin = MSV.getOrigin(A);
3873           unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
3874           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
3875                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
3876         }
3877       }
3878     }
3879     Constant *OverflowSize =
3880       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
3881     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
3882   }
3883 
3884   /// Compute the shadow address for a given va_arg.
3885   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
3886                                    unsigned ArgOffset, unsigned ArgSize) {
3887     // Make sure we don't overflow __msan_va_arg_tls.
3888     if (ArgOffset + ArgSize > kParamTLSSize)
3889       return nullptr;
3890     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
3891     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3892     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
3893                               "_msarg_va_s");
3894   }
3895 
3896   /// Compute the origin address for a given va_arg.
3897   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
3898     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
3899     // getOriginPtrForVAArgument() is always called after
3900     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
3901     // overflow.
3902     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
3903     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
3904                               "_msarg_va_o");
3905   }
3906 
3907   void unpoisonVAListTagForInst(IntrinsicInst &I) {
3908     IRBuilder<> IRB(&I);
3909     Value *VAListTag = I.getArgOperand(0);
3910     Value *ShadowPtr, *OriginPtr;
3911     unsigned Alignment = 8;
3912     std::tie(ShadowPtr, OriginPtr) =
3913         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
3914                                /*isStore*/ true);
3915 
3916     // Unpoison the whole __va_list_tag.
3917     // FIXME: magic ABI constants.
3918     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
3919                      /* size */ 24, Alignment, false);
3920     // We shouldn't need to zero out the origins, as they're only checked for
3921     // nonzero shadow.
3922   }
3923 
3924   void visitVAStartInst(VAStartInst &I) override {
3925     if (F.getCallingConv() == CallingConv::Win64)
3926       return;
3927     VAStartInstrumentationList.push_back(&I);
3928     unpoisonVAListTagForInst(I);
3929   }
3930 
3931   void visitVACopyInst(VACopyInst &I) override {
3932     if (F.getCallingConv() == CallingConv::Win64) return;
3933     unpoisonVAListTagForInst(I);
3934   }
3935 
3936   void finalizeInstrumentation() override {
3937     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
3938            "finalizeInstrumentation called twice");
3939     if (!VAStartInstrumentationList.empty()) {
3940       // If there is a va_start in this function, make a backup copy of
3941       // va_arg_tls somewhere in the function entry block.
3942       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
3943       VAArgOverflowSize =
3944           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
3945       Value *CopySize =
3946         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
3947                       VAArgOverflowSize);
3948       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3949       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
3950       if (MS.TrackOrigins) {
3951         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
3952         IRB.CreateMemCpy(VAArgTLSOriginCopy, 8, MS.VAArgOriginTLS, 8, CopySize);
3953       }
3954     }
3955 
3956     // Instrument va_start.
3957     // Copy va_list shadow from the backup copy of the TLS contents.
3958     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
3959       CallInst *OrigInst = VAStartInstrumentationList[i];
3960       IRBuilder<> IRB(OrigInst->getNextNode());
3961       Value *VAListTag = OrigInst->getArgOperand(0);
3962 
3963       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
3964       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
3965           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3966                         ConstantInt::get(MS.IntptrTy, 16)),
3967           PointerType::get(RegSaveAreaPtrTy, 0));
3968       Value *RegSaveAreaPtr =
3969           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
3970       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
3971       unsigned Alignment = 16;
3972       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
3973           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
3974                                  Alignment, /*isStore*/ true);
3975       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
3976                        AMD64FpEndOffset);
3977       if (MS.TrackOrigins)
3978         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
3979                          Alignment, AMD64FpEndOffset);
3980       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
3981       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
3982           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
3983                         ConstantInt::get(MS.IntptrTy, 8)),
3984           PointerType::get(OverflowArgAreaPtrTy, 0));
3985       Value *OverflowArgAreaPtr =
3986           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
3987       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
3988       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
3989           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
3990                                  Alignment, /*isStore*/ true);
3991       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
3992                                              AMD64FpEndOffset);
3993       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
3994                        VAArgOverflowSize);
3995       if (MS.TrackOrigins) {
3996         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
3997                                         AMD64FpEndOffset);
3998         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
3999                          VAArgOverflowSize);
4000       }
4001     }
4002   }
4003 };
4004 
4005 /// MIPS64-specific implementation of VarArgHelper.
4006 struct VarArgMIPS64Helper : public VarArgHelper {
4007   Function &F;
4008   MemorySanitizer &MS;
4009   MemorySanitizerVisitor &MSV;
4010   Value *VAArgTLSCopy = nullptr;
4011   Value *VAArgSize = nullptr;
4012 
4013   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4014 
4015   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4016                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4017 
4018   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
4019     unsigned VAArgOffset = 0;
4020     const DataLayout &DL = F.getParent()->getDataLayout();
4021     for (CallSite::arg_iterator ArgIt = CS.arg_begin() +
4022          CS.getFunctionType()->getNumParams(), End = CS.arg_end();
4023          ArgIt != End; ++ArgIt) {
4024       Triple TargetTriple(F.getParent()->getTargetTriple());
4025       Value *A = *ArgIt;
4026       Value *Base;
4027       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4028       if (TargetTriple.getArch() == Triple::mips64) {
4029         // Adjusting the shadow for argument with size < 8 to match the placement
4030         // of bits in big endian system
4031         if (ArgSize < 8)
4032           VAArgOffset += (8 - ArgSize);
4033       }
4034       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4035       VAArgOffset += ArgSize;
4036       VAArgOffset = alignTo(VAArgOffset, 8);
4037       if (!Base)
4038         continue;
4039       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4040     }
4041 
4042     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4043     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4044     // a new class member i.e. it is the total size of all VarArgs.
4045     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4046   }
4047 
4048   /// Compute the shadow address for a given va_arg.
4049   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4050                                    unsigned ArgOffset, unsigned ArgSize) {
4051     // Make sure we don't overflow __msan_va_arg_tls.
4052     if (ArgOffset + ArgSize > kParamTLSSize)
4053       return nullptr;
4054     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4055     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4056     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4057                               "_msarg");
4058   }
4059 
4060   void visitVAStartInst(VAStartInst &I) override {
4061     IRBuilder<> IRB(&I);
4062     VAStartInstrumentationList.push_back(&I);
4063     Value *VAListTag = I.getArgOperand(0);
4064     Value *ShadowPtr, *OriginPtr;
4065     unsigned Alignment = 8;
4066     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4067         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4068     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4069                      /* size */ 8, Alignment, false);
4070   }
4071 
4072   void visitVACopyInst(VACopyInst &I) override {
4073     IRBuilder<> IRB(&I);
4074     VAStartInstrumentationList.push_back(&I);
4075     Value *VAListTag = I.getArgOperand(0);
4076     Value *ShadowPtr, *OriginPtr;
4077     unsigned Alignment = 8;
4078     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4079         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4080     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4081                      /* size */ 8, Alignment, false);
4082   }
4083 
4084   void finalizeInstrumentation() override {
4085     assert(!VAArgSize && !VAArgTLSCopy &&
4086            "finalizeInstrumentation called twice");
4087     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4088     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4089     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4090                                     VAArgSize);
4091 
4092     if (!VAStartInstrumentationList.empty()) {
4093       // If there is a va_start in this function, make a backup copy of
4094       // va_arg_tls somewhere in the function entry block.
4095       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4096       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
4097     }
4098 
4099     // Instrument va_start.
4100     // Copy va_list shadow from the backup copy of the TLS contents.
4101     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4102       CallInst *OrigInst = VAStartInstrumentationList[i];
4103       IRBuilder<> IRB(OrigInst->getNextNode());
4104       Value *VAListTag = OrigInst->getArgOperand(0);
4105       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4106       Value *RegSaveAreaPtrPtr =
4107           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4108                              PointerType::get(RegSaveAreaPtrTy, 0));
4109       Value *RegSaveAreaPtr =
4110           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4111       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4112       unsigned Alignment = 8;
4113       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4114           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4115                                  Alignment, /*isStore*/ true);
4116       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4117                        CopySize);
4118     }
4119   }
4120 };
4121 
4122 /// AArch64-specific implementation of VarArgHelper.
4123 struct VarArgAArch64Helper : public VarArgHelper {
4124   static const unsigned kAArch64GrArgSize = 64;
4125   static const unsigned kAArch64VrArgSize = 128;
4126 
4127   static const unsigned AArch64GrBegOffset = 0;
4128   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
4129   // Make VR space aligned to 16 bytes.
4130   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
4131   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
4132                                              + kAArch64VrArgSize;
4133   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
4134 
4135   Function &F;
4136   MemorySanitizer &MS;
4137   MemorySanitizerVisitor &MSV;
4138   Value *VAArgTLSCopy = nullptr;
4139   Value *VAArgOverflowSize = nullptr;
4140 
4141   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4142 
4143   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4144 
4145   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
4146                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4147 
4148   ArgKind classifyArgument(Value* arg) {
4149     Type *T = arg->getType();
4150     if (T->isFPOrFPVectorTy())
4151       return AK_FloatingPoint;
4152     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4153         || (T->isPointerTy()))
4154       return AK_GeneralPurpose;
4155     return AK_Memory;
4156   }
4157 
4158   // The instrumentation stores the argument shadow in a non ABI-specific
4159   // format because it does not know which argument is named (since Clang,
4160   // like x86_64 case, lowers the va_args in the frontend and this pass only
4161   // sees the low level code that deals with va_list internals).
4162   // The first seven GR registers are saved in the first 56 bytes of the
4163   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
4164   // the remaining arguments.
4165   // Using constant offset within the va_arg TLS array allows fast copy
4166   // in the finalize instrumentation.
4167   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
4168     unsigned GrOffset = AArch64GrBegOffset;
4169     unsigned VrOffset = AArch64VrBegOffset;
4170     unsigned OverflowOffset = AArch64VAEndOffset;
4171 
4172     const DataLayout &DL = F.getParent()->getDataLayout();
4173     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
4174          ArgIt != End; ++ArgIt) {
4175       Value *A = *ArgIt;
4176       unsigned ArgNo = CS.getArgumentNo(ArgIt);
4177       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
4178       ArgKind AK = classifyArgument(A);
4179       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
4180         AK = AK_Memory;
4181       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
4182         AK = AK_Memory;
4183       Value *Base;
4184       switch (AK) {
4185         case AK_GeneralPurpose:
4186           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
4187           GrOffset += 8;
4188           break;
4189         case AK_FloatingPoint:
4190           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
4191           VrOffset += 16;
4192           break;
4193         case AK_Memory:
4194           // Don't count fixed arguments in the overflow area - va_start will
4195           // skip right over them.
4196           if (IsFixed)
4197             continue;
4198           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4199           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
4200                                            alignTo(ArgSize, 8));
4201           OverflowOffset += alignTo(ArgSize, 8);
4202           break;
4203       }
4204       // Count Gp/Vr fixed arguments to their respective offsets, but don't
4205       // bother to actually store a shadow.
4206       if (IsFixed)
4207         continue;
4208       if (!Base)
4209         continue;
4210       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4211     }
4212     Constant *OverflowSize =
4213       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
4214     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4215   }
4216 
4217   /// Compute the shadow address for a given va_arg.
4218   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4219                                    unsigned ArgOffset, unsigned ArgSize) {
4220     // Make sure we don't overflow __msan_va_arg_tls.
4221     if (ArgOffset + ArgSize > kParamTLSSize)
4222       return nullptr;
4223     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4224     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4225     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4226                               "_msarg");
4227   }
4228 
4229   void visitVAStartInst(VAStartInst &I) override {
4230     IRBuilder<> IRB(&I);
4231     VAStartInstrumentationList.push_back(&I);
4232     Value *VAListTag = I.getArgOperand(0);
4233     Value *ShadowPtr, *OriginPtr;
4234     unsigned Alignment = 8;
4235     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4236         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4237     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4238                      /* size */ 32, Alignment, false);
4239   }
4240 
4241   void visitVACopyInst(VACopyInst &I) override {
4242     IRBuilder<> IRB(&I);
4243     VAStartInstrumentationList.push_back(&I);
4244     Value *VAListTag = I.getArgOperand(0);
4245     Value *ShadowPtr, *OriginPtr;
4246     unsigned Alignment = 8;
4247     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4248         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4249     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4250                      /* size */ 32, Alignment, false);
4251   }
4252 
4253   // Retrieve a va_list field of 'void*' size.
4254   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4255     Value *SaveAreaPtrPtr =
4256       IRB.CreateIntToPtr(
4257         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4258                       ConstantInt::get(MS.IntptrTy, offset)),
4259         Type::getInt64PtrTy(*MS.C));
4260     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
4261   }
4262 
4263   // Retrieve a va_list field of 'int' size.
4264   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4265     Value *SaveAreaPtr =
4266       IRB.CreateIntToPtr(
4267         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4268                       ConstantInt::get(MS.IntptrTy, offset)),
4269         Type::getInt32PtrTy(*MS.C));
4270     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
4271     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
4272   }
4273 
4274   void finalizeInstrumentation() override {
4275     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4276            "finalizeInstrumentation called twice");
4277     if (!VAStartInstrumentationList.empty()) {
4278       // If there is a va_start in this function, make a backup copy of
4279       // va_arg_tls somewhere in the function entry block.
4280       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4281       VAArgOverflowSize =
4282           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4283       Value *CopySize =
4284         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
4285                       VAArgOverflowSize);
4286       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4287       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
4288     }
4289 
4290     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
4291     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
4292 
4293     // Instrument va_start, copy va_list shadow from the backup copy of
4294     // the TLS contents.
4295     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4296       CallInst *OrigInst = VAStartInstrumentationList[i];
4297       IRBuilder<> IRB(OrigInst->getNextNode());
4298 
4299       Value *VAListTag = OrigInst->getArgOperand(0);
4300 
4301       // The variadic ABI for AArch64 creates two areas to save the incoming
4302       // argument registers (one for 64-bit general register xn-x7 and another
4303       // for 128-bit FP/SIMD vn-v7).
4304       // We need then to propagate the shadow arguments on both regions
4305       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
4306       // The remaning arguments are saved on shadow for 'va::stack'.
4307       // One caveat is it requires only to propagate the non-named arguments,
4308       // however on the call site instrumentation 'all' the arguments are
4309       // saved. So to copy the shadow values from the va_arg TLS array
4310       // we need to adjust the offset for both GR and VR fields based on
4311       // the __{gr,vr}_offs value (since they are stores based on incoming
4312       // named arguments).
4313 
4314       // Read the stack pointer from the va_list.
4315       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
4316 
4317       // Read both the __gr_top and __gr_off and add them up.
4318       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
4319       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
4320 
4321       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
4322 
4323       // Read both the __vr_top and __vr_off and add them up.
4324       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
4325       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
4326 
4327       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
4328 
4329       // It does not know how many named arguments is being used and, on the
4330       // callsite all the arguments were saved.  Since __gr_off is defined as
4331       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
4332       // argument by ignoring the bytes of shadow from named arguments.
4333       Value *GrRegSaveAreaShadowPtrOff =
4334         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
4335 
4336       Value *GrRegSaveAreaShadowPtr =
4337           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4338                                  /*Alignment*/ 8, /*isStore*/ true)
4339               .first;
4340 
4341       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4342                                               GrRegSaveAreaShadowPtrOff);
4343       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
4344 
4345       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, 8, GrSrcPtr, 8, GrCopySize);
4346 
4347       // Again, but for FP/SIMD values.
4348       Value *VrRegSaveAreaShadowPtrOff =
4349           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
4350 
4351       Value *VrRegSaveAreaShadowPtr =
4352           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4353                                  /*Alignment*/ 8, /*isStore*/ true)
4354               .first;
4355 
4356       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
4357         IRB.getInt8Ty(),
4358         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4359                               IRB.getInt32(AArch64VrBegOffset)),
4360         VrRegSaveAreaShadowPtrOff);
4361       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
4362 
4363       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, 8, VrSrcPtr, 8, VrCopySize);
4364 
4365       // And finally for remaining arguments.
4366       Value *StackSaveAreaShadowPtr =
4367           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
4368                                  /*Alignment*/ 16, /*isStore*/ true)
4369               .first;
4370 
4371       Value *StackSrcPtr =
4372         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4373                               IRB.getInt32(AArch64VAEndOffset));
4374 
4375       IRB.CreateMemCpy(StackSaveAreaShadowPtr, 16, StackSrcPtr, 16,
4376                        VAArgOverflowSize);
4377     }
4378   }
4379 };
4380 
4381 /// PowerPC64-specific implementation of VarArgHelper.
4382 struct VarArgPowerPC64Helper : public VarArgHelper {
4383   Function &F;
4384   MemorySanitizer &MS;
4385   MemorySanitizerVisitor &MSV;
4386   Value *VAArgTLSCopy = nullptr;
4387   Value *VAArgSize = nullptr;
4388 
4389   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4390 
4391   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
4392                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4393 
4394   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
4395     // For PowerPC, we need to deal with alignment of stack arguments -
4396     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
4397     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
4398     // and QPX vectors are aligned to 32 bytes.  For that reason, we
4399     // compute current offset from stack pointer (which is always properly
4400     // aligned), and offset for the first vararg, then subtract them.
4401     unsigned VAArgBase;
4402     Triple TargetTriple(F.getParent()->getTargetTriple());
4403     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
4404     // and 32 bytes for ABIv2.  This is usually determined by target
4405     // endianness, but in theory could be overriden by function attribute.
4406     // For simplicity, we ignore it here (it'd only matter for QPX vectors).
4407     if (TargetTriple.getArch() == Triple::ppc64)
4408       VAArgBase = 48;
4409     else
4410       VAArgBase = 32;
4411     unsigned VAArgOffset = VAArgBase;
4412     const DataLayout &DL = F.getParent()->getDataLayout();
4413     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
4414          ArgIt != End; ++ArgIt) {
4415       Value *A = *ArgIt;
4416       unsigned ArgNo = CS.getArgumentNo(ArgIt);
4417       bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
4418       bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
4419       if (IsByVal) {
4420         assert(A->getType()->isPointerTy());
4421         Type *RealTy = A->getType()->getPointerElementType();
4422         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4423         uint64_t ArgAlign = CS.getParamAlignment(ArgNo);
4424         if (ArgAlign < 8)
4425           ArgAlign = 8;
4426         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4427         if (!IsFixed) {
4428           Value *Base = getShadowPtrForVAArgument(
4429               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
4430           if (Base) {
4431             Value *AShadowPtr, *AOriginPtr;
4432             std::tie(AShadowPtr, AOriginPtr) =
4433                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
4434                                        kShadowTLSAlignment, /*isStore*/ false);
4435 
4436             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
4437                              kShadowTLSAlignment, ArgSize);
4438           }
4439         }
4440         VAArgOffset += alignTo(ArgSize, 8);
4441       } else {
4442         Value *Base;
4443         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4444         uint64_t ArgAlign = 8;
4445         if (A->getType()->isArrayTy()) {
4446           // Arrays are aligned to element size, except for long double
4447           // arrays, which are aligned to 8 bytes.
4448           Type *ElementTy = A->getType()->getArrayElementType();
4449           if (!ElementTy->isPPC_FP128Ty())
4450             ArgAlign = DL.getTypeAllocSize(ElementTy);
4451         } else if (A->getType()->isVectorTy()) {
4452           // Vectors are naturally aligned.
4453           ArgAlign = DL.getTypeAllocSize(A->getType());
4454         }
4455         if (ArgAlign < 8)
4456           ArgAlign = 8;
4457         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4458         if (DL.isBigEndian()) {
4459           // Adjusting the shadow for argument with size < 8 to match the placement
4460           // of bits in big endian system
4461           if (ArgSize < 8)
4462             VAArgOffset += (8 - ArgSize);
4463         }
4464         if (!IsFixed) {
4465           Base = getShadowPtrForVAArgument(A->getType(), IRB,
4466                                            VAArgOffset - VAArgBase, ArgSize);
4467           if (Base)
4468             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4469         }
4470         VAArgOffset += ArgSize;
4471         VAArgOffset = alignTo(VAArgOffset, 8);
4472       }
4473       if (IsFixed)
4474         VAArgBase = VAArgOffset;
4475     }
4476 
4477     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
4478                                                 VAArgOffset - VAArgBase);
4479     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4480     // a new class member i.e. it is the total size of all VarArgs.
4481     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4482   }
4483 
4484   /// Compute the shadow address for a given va_arg.
4485   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4486                                    unsigned ArgOffset, unsigned ArgSize) {
4487     // Make sure we don't overflow __msan_va_arg_tls.
4488     if (ArgOffset + ArgSize > kParamTLSSize)
4489       return nullptr;
4490     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4491     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4492     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4493                               "_msarg");
4494   }
4495 
4496   void visitVAStartInst(VAStartInst &I) override {
4497     IRBuilder<> IRB(&I);
4498     VAStartInstrumentationList.push_back(&I);
4499     Value *VAListTag = I.getArgOperand(0);
4500     Value *ShadowPtr, *OriginPtr;
4501     unsigned Alignment = 8;
4502     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4503         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4504     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4505                      /* size */ 8, Alignment, false);
4506   }
4507 
4508   void visitVACopyInst(VACopyInst &I) override {
4509     IRBuilder<> IRB(&I);
4510     Value *VAListTag = I.getArgOperand(0);
4511     Value *ShadowPtr, *OriginPtr;
4512     unsigned Alignment = 8;
4513     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4514         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4515     // Unpoison the whole __va_list_tag.
4516     // FIXME: magic ABI constants.
4517     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4518                      /* size */ 8, Alignment, false);
4519   }
4520 
4521   void finalizeInstrumentation() override {
4522     assert(!VAArgSize && !VAArgTLSCopy &&
4523            "finalizeInstrumentation called twice");
4524     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4525     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4526     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4527                                     VAArgSize);
4528 
4529     if (!VAStartInstrumentationList.empty()) {
4530       // If there is a va_start in this function, make a backup copy of
4531       // va_arg_tls somewhere in the function entry block.
4532       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4533       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
4534     }
4535 
4536     // Instrument va_start.
4537     // Copy va_list shadow from the backup copy of the TLS contents.
4538     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4539       CallInst *OrigInst = VAStartInstrumentationList[i];
4540       IRBuilder<> IRB(OrigInst->getNextNode());
4541       Value *VAListTag = OrigInst->getArgOperand(0);
4542       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4543       Value *RegSaveAreaPtrPtr =
4544           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4545                              PointerType::get(RegSaveAreaPtrTy, 0));
4546       Value *RegSaveAreaPtr =
4547           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4548       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4549       unsigned Alignment = 8;
4550       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4551           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4552                                  Alignment, /*isStore*/ true);
4553       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4554                        CopySize);
4555     }
4556   }
4557 };
4558 
4559 /// A no-op implementation of VarArgHelper.
4560 struct VarArgNoOpHelper : public VarArgHelper {
4561   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
4562                    MemorySanitizerVisitor &MSV) {}
4563 
4564   void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {}
4565 
4566   void visitVAStartInst(VAStartInst &I) override {}
4567 
4568   void visitVACopyInst(VACopyInst &I) override {}
4569 
4570   void finalizeInstrumentation() override {}
4571 };
4572 
4573 } // end anonymous namespace
4574 
4575 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
4576                                         MemorySanitizerVisitor &Visitor) {
4577   // VarArg handling is only implemented on AMD64. False positives are possible
4578   // on other platforms.
4579   Triple TargetTriple(Func.getParent()->getTargetTriple());
4580   if (TargetTriple.getArch() == Triple::x86_64)
4581     return new VarArgAMD64Helper(Func, Msan, Visitor);
4582   else if (TargetTriple.isMIPS64())
4583     return new VarArgMIPS64Helper(Func, Msan, Visitor);
4584   else if (TargetTriple.getArch() == Triple::aarch64)
4585     return new VarArgAArch64Helper(Func, Msan, Visitor);
4586   else if (TargetTriple.getArch() == Triple::ppc64 ||
4587            TargetTriple.getArch() == Triple::ppc64le)
4588     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
4589   else
4590     return new VarArgNoOpHelper(Func, Msan, Visitor);
4591 }
4592 
4593 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
4594   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
4595     return false;
4596 
4597   MemorySanitizerVisitor Visitor(F, *this, TLI);
4598 
4599   // Clear out readonly/readnone attributes.
4600   AttrBuilder B;
4601   B.addAttribute(Attribute::ReadOnly)
4602       .addAttribute(Attribute::ReadNone)
4603       .addAttribute(Attribute::WriteOnly)
4604       .addAttribute(Attribute::ArgMemOnly)
4605       .addAttribute(Attribute::Speculatable);
4606   F.removeAttributes(AttributeList::FunctionIndex, B);
4607 
4608   return Visitor.runOnFunction();
4609 }
4610