1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///  - TLS variables are stored in a single per-task struct. A call to a
126 ///    function __msan_get_context_state() returning a pointer to that struct
127 ///    is inserted into every instrumented function before the entry block;
128 ///  - __msan_warning() takes a 32-bit origin parameter;
129 ///  - local variables are poisoned with __msan_poison_alloca() upon function
130 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
131 ///    function;
132 ///  - the pass doesn't declare any global variables or add global constructors
133 ///    to the translation unit.
134 ///
135 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
136 /// calls, making sure we're on the safe side wrt. possible false positives.
137 ///
138 ///  KernelMemorySanitizer only supports X86_64 at the moment.
139 ///
140 //
141 // FIXME: This sanitizer does not yet handle scalable vectors
142 //
143 //===----------------------------------------------------------------------===//
144 
145 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
146 #include "llvm/ADT/APInt.h"
147 #include "llvm/ADT/ArrayRef.h"
148 #include "llvm/ADT/DepthFirstIterator.h"
149 #include "llvm/ADT/SmallSet.h"
150 #include "llvm/ADT/SmallString.h"
151 #include "llvm/ADT/SmallVector.h"
152 #include "llvm/ADT/StringExtras.h"
153 #include "llvm/ADT/StringRef.h"
154 #include "llvm/ADT/Triple.h"
155 #include "llvm/Analysis/TargetLibraryInfo.h"
156 #include "llvm/Analysis/ValueTracking.h"
157 #include "llvm/IR/Argument.h"
158 #include "llvm/IR/Attributes.h"
159 #include "llvm/IR/BasicBlock.h"
160 #include "llvm/IR/CallingConv.h"
161 #include "llvm/IR/Constant.h"
162 #include "llvm/IR/Constants.h"
163 #include "llvm/IR/DataLayout.h"
164 #include "llvm/IR/DerivedTypes.h"
165 #include "llvm/IR/Function.h"
166 #include "llvm/IR/GlobalValue.h"
167 #include "llvm/IR/GlobalVariable.h"
168 #include "llvm/IR/IRBuilder.h"
169 #include "llvm/IR/InlineAsm.h"
170 #include "llvm/IR/InstVisitor.h"
171 #include "llvm/IR/InstrTypes.h"
172 #include "llvm/IR/Instruction.h"
173 #include "llvm/IR/Instructions.h"
174 #include "llvm/IR/IntrinsicInst.h"
175 #include "llvm/IR/Intrinsics.h"
176 #include "llvm/IR/IntrinsicsX86.h"
177 #include "llvm/IR/LLVMContext.h"
178 #include "llvm/IR/MDBuilder.h"
179 #include "llvm/IR/Module.h"
180 #include "llvm/IR/Type.h"
181 #include "llvm/IR/Value.h"
182 #include "llvm/IR/ValueMap.h"
183 #include "llvm/InitializePasses.h"
184 #include "llvm/Pass.h"
185 #include "llvm/Support/AtomicOrdering.h"
186 #include "llvm/Support/Casting.h"
187 #include "llvm/Support/CommandLine.h"
188 #include "llvm/Support/Compiler.h"
189 #include "llvm/Support/Debug.h"
190 #include "llvm/Support/ErrorHandling.h"
191 #include "llvm/Support/MathExtras.h"
192 #include "llvm/Support/raw_ostream.h"
193 #include "llvm/Transforms/Instrumentation.h"
194 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
195 #include "llvm/Transforms/Utils/Local.h"
196 #include "llvm/Transforms/Utils/ModuleUtils.h"
197 #include <algorithm>
198 #include <cassert>
199 #include <cstddef>
200 #include <cstdint>
201 #include <memory>
202 #include <string>
203 #include <tuple>
204 
205 using namespace llvm;
206 
207 #define DEBUG_TYPE "msan"
208 
209 static const unsigned kOriginSize = 4;
210 static const Align kMinOriginAlignment = Align(4);
211 static const Align kShadowTLSAlignment = Align(8);
212 
213 // These constants must be kept in sync with the ones in msan.h.
214 static const unsigned kParamTLSSize = 800;
215 static const unsigned kRetvalTLSSize = 800;
216 
217 // Accesses sizes are powers of two: 1, 2, 4, 8.
218 static const size_t kNumberOfAccessSizes = 4;
219 
220 /// Track origins of uninitialized values.
221 ///
222 /// Adds a section to MemorySanitizer report that points to the allocation
223 /// (stack or heap) the uninitialized bits came from originally.
224 static cl::opt<int> ClTrackOrigins("msan-track-origins",
225        cl::desc("Track origins (allocation sites) of poisoned memory"),
226        cl::Hidden, cl::init(0));
227 
228 static cl::opt<bool> ClKeepGoing("msan-keep-going",
229        cl::desc("keep going after reporting a UMR"),
230        cl::Hidden, cl::init(false));
231 
232 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
233        cl::desc("poison uninitialized stack variables"),
234        cl::Hidden, cl::init(true));
235 
236 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
237        cl::desc("poison uninitialized stack variables with a call"),
238        cl::Hidden, cl::init(false));
239 
240 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
241        cl::desc("poison uninitialized stack variables with the given pattern"),
242        cl::Hidden, cl::init(0xff));
243 
244 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
245        cl::desc("poison undef temps"),
246        cl::Hidden, cl::init(true));
247 
248 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
249        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
250        cl::Hidden, cl::init(true));
251 
252 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
253        cl::desc("exact handling of relational integer ICmp"),
254        cl::Hidden, cl::init(false));
255 
256 static cl::opt<bool> ClHandleLifetimeIntrinsics(
257     "msan-handle-lifetime-intrinsics",
258     cl::desc(
259         "when possible, poison scoped variables at the beginning of the scope "
260         "(slower, but more precise)"),
261     cl::Hidden, cl::init(true));
262 
263 // When compiling the Linux kernel, we sometimes see false positives related to
264 // MSan being unable to understand that inline assembly calls may initialize
265 // local variables.
266 // This flag makes the compiler conservatively unpoison every memory location
267 // passed into an assembly call. Note that this may cause false positives.
268 // Because it's impossible to figure out the array sizes, we can only unpoison
269 // the first sizeof(type) bytes for each type* pointer.
270 // The instrumentation is only enabled in KMSAN builds, and only if
271 // -msan-handle-asm-conservative is on. This is done because we may want to
272 // quickly disable assembly instrumentation when it breaks.
273 static cl::opt<bool> ClHandleAsmConservative(
274     "msan-handle-asm-conservative",
275     cl::desc("conservative handling of inline assembly"), cl::Hidden,
276     cl::init(true));
277 
278 // This flag controls whether we check the shadow of the address
279 // operand of load or store. Such bugs are very rare, since load from
280 // a garbage address typically results in SEGV, but still happen
281 // (e.g. only lower bits of address are garbage, or the access happens
282 // early at program startup where malloc-ed memory is more likely to
283 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
284 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
285        cl::desc("report accesses through a pointer which has poisoned shadow"),
286        cl::Hidden, cl::init(true));
287 
288 static cl::opt<bool> ClEagerChecks(
289     "msan-eager-checks",
290     cl::desc("check arguments and return values at function call boundaries"),
291     cl::Hidden, cl::init(false));
292 
293 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
294        cl::desc("print out instructions with default strict semantics"),
295        cl::Hidden, cl::init(false));
296 
297 static cl::opt<int> ClInstrumentationWithCallThreshold(
298     "msan-instrumentation-with-call-threshold",
299     cl::desc(
300         "If the function being instrumented requires more than "
301         "this number of checks and origin stores, use callbacks instead of "
302         "inline checks (-1 means never use callbacks)."),
303     cl::Hidden, cl::init(3500));
304 
305 static cl::opt<bool>
306     ClEnableKmsan("msan-kernel",
307                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
308                   cl::Hidden, cl::init(false));
309 
310 // This is an experiment to enable handling of cases where shadow is a non-zero
311 // compile-time constant. For some unexplainable reason they were silently
312 // ignored in the instrumentation.
313 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
314        cl::desc("Insert checks for constant shadow values"),
315        cl::Hidden, cl::init(false));
316 
317 // This is off by default because of a bug in gold:
318 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
319 static cl::opt<bool> ClWithComdat("msan-with-comdat",
320        cl::desc("Place MSan constructors in comdat sections"),
321        cl::Hidden, cl::init(false));
322 
323 // These options allow to specify custom memory map parameters
324 // See MemoryMapParams for details.
325 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
326                                    cl::desc("Define custom MSan AndMask"),
327                                    cl::Hidden, cl::init(0));
328 
329 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
330                                    cl::desc("Define custom MSan XorMask"),
331                                    cl::Hidden, cl::init(0));
332 
333 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
334                                       cl::desc("Define custom MSan ShadowBase"),
335                                       cl::Hidden, cl::init(0));
336 
337 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
338                                       cl::desc("Define custom MSan OriginBase"),
339                                       cl::Hidden, cl::init(0));
340 
341 static const char *const kMsanModuleCtorName = "msan.module_ctor";
342 static const char *const kMsanInitName = "__msan_init";
343 
344 namespace {
345 
346 // Memory map parameters used in application-to-shadow address calculation.
347 // Offset = (Addr & ~AndMask) ^ XorMask
348 // Shadow = ShadowBase + Offset
349 // Origin = OriginBase + Offset
350 struct MemoryMapParams {
351   uint64_t AndMask;
352   uint64_t XorMask;
353   uint64_t ShadowBase;
354   uint64_t OriginBase;
355 };
356 
357 struct PlatformMemoryMapParams {
358   const MemoryMapParams *bits32;
359   const MemoryMapParams *bits64;
360 };
361 
362 } // end anonymous namespace
363 
364 // i386 Linux
365 static const MemoryMapParams Linux_I386_MemoryMapParams = {
366   0x000080000000,  // AndMask
367   0,               // XorMask (not used)
368   0,               // ShadowBase (not used)
369   0x000040000000,  // OriginBase
370 };
371 
372 // x86_64 Linux
373 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
374 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
375   0x400000000000,  // AndMask
376   0,               // XorMask (not used)
377   0,               // ShadowBase (not used)
378   0x200000000000,  // OriginBase
379 #else
380   0,               // AndMask (not used)
381   0x500000000000,  // XorMask
382   0,               // ShadowBase (not used)
383   0x100000000000,  // OriginBase
384 #endif
385 };
386 
387 // mips64 Linux
388 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
389   0,               // AndMask (not used)
390   0x008000000000,  // XorMask
391   0,               // ShadowBase (not used)
392   0x002000000000,  // OriginBase
393 };
394 
395 // ppc64 Linux
396 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
397   0xE00000000000,  // AndMask
398   0x100000000000,  // XorMask
399   0x080000000000,  // ShadowBase
400   0x1C0000000000,  // OriginBase
401 };
402 
403 // s390x Linux
404 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
405     0xC00000000000, // AndMask
406     0,              // XorMask (not used)
407     0x080000000000, // ShadowBase
408     0x1C0000000000, // OriginBase
409 };
410 
411 // aarch64 Linux
412 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
413   0,               // AndMask (not used)
414   0x06000000000,   // XorMask
415   0,               // ShadowBase (not used)
416   0x01000000000,   // OriginBase
417 };
418 
419 // i386 FreeBSD
420 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
421   0x000180000000,  // AndMask
422   0x000040000000,  // XorMask
423   0x000020000000,  // ShadowBase
424   0x000700000000,  // OriginBase
425 };
426 
427 // x86_64 FreeBSD
428 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
429   0xc00000000000,  // AndMask
430   0x200000000000,  // XorMask
431   0x100000000000,  // ShadowBase
432   0x380000000000,  // OriginBase
433 };
434 
435 // x86_64 NetBSD
436 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
437   0,               // AndMask
438   0x500000000000,  // XorMask
439   0,               // ShadowBase
440   0x100000000000,  // OriginBase
441 };
442 
443 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
444   &Linux_I386_MemoryMapParams,
445   &Linux_X86_64_MemoryMapParams,
446 };
447 
448 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
449   nullptr,
450   &Linux_MIPS64_MemoryMapParams,
451 };
452 
453 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
454   nullptr,
455   &Linux_PowerPC64_MemoryMapParams,
456 };
457 
458 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
459     nullptr,
460     &Linux_S390X_MemoryMapParams,
461 };
462 
463 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
464   nullptr,
465   &Linux_AArch64_MemoryMapParams,
466 };
467 
468 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
469   &FreeBSD_I386_MemoryMapParams,
470   &FreeBSD_X86_64_MemoryMapParams,
471 };
472 
473 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
474   nullptr,
475   &NetBSD_X86_64_MemoryMapParams,
476 };
477 
478 namespace {
479 
480 /// Instrument functions of a module to detect uninitialized reads.
481 ///
482 /// Instantiating MemorySanitizer inserts the msan runtime library API function
483 /// declarations into the module if they don't exist already. Instantiating
484 /// ensures the __msan_init function is in the list of global constructors for
485 /// the module.
486 class MemorySanitizer {
487 public:
488   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
489       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
490         Recover(Options.Recover) {
491     initializeModule(M);
492   }
493 
494   // MSan cannot be moved or copied because of MapParams.
495   MemorySanitizer(MemorySanitizer &&) = delete;
496   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
497   MemorySanitizer(const MemorySanitizer &) = delete;
498   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
499 
500   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
501 
502 private:
503   friend struct MemorySanitizerVisitor;
504   friend struct VarArgAMD64Helper;
505   friend struct VarArgMIPS64Helper;
506   friend struct VarArgAArch64Helper;
507   friend struct VarArgPowerPC64Helper;
508   friend struct VarArgSystemZHelper;
509 
510   void initializeModule(Module &M);
511   void initializeCallbacks(Module &M);
512   void createKernelApi(Module &M);
513   void createUserspaceApi(Module &M);
514 
515   /// True if we're compiling the Linux kernel.
516   bool CompileKernel;
517   /// Track origins (allocation points) of uninitialized values.
518   int TrackOrigins;
519   bool Recover;
520 
521   LLVMContext *C;
522   Type *IntptrTy;
523   Type *OriginTy;
524 
525   // XxxTLS variables represent the per-thread state in MSan and per-task state
526   // in KMSAN.
527   // For the userspace these point to thread-local globals. In the kernel land
528   // they point to the members of a per-task struct obtained via a call to
529   // __msan_get_context_state().
530 
531   /// Thread-local shadow storage for function parameters.
532   Value *ParamTLS;
533 
534   /// Thread-local origin storage for function parameters.
535   Value *ParamOriginTLS;
536 
537   /// Thread-local shadow storage for function return value.
538   Value *RetvalTLS;
539 
540   /// Thread-local origin storage for function return value.
541   Value *RetvalOriginTLS;
542 
543   /// Thread-local shadow storage for in-register va_arg function
544   /// parameters (x86_64-specific).
545   Value *VAArgTLS;
546 
547   /// Thread-local shadow storage for in-register va_arg function
548   /// parameters (x86_64-specific).
549   Value *VAArgOriginTLS;
550 
551   /// Thread-local shadow storage for va_arg overflow area
552   /// (x86_64-specific).
553   Value *VAArgOverflowSizeTLS;
554 
555   /// Are the instrumentation callbacks set up?
556   bool CallbacksInitialized = false;
557 
558   /// The run-time callback to print a warning.
559   FunctionCallee WarningFn;
560 
561   // These arrays are indexed by log2(AccessSize).
562   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
563   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
564 
565   /// Run-time helper that generates a new origin value for a stack
566   /// allocation.
567   FunctionCallee MsanSetAllocaOrigin4Fn;
568 
569   /// Run-time helper that poisons stack on function entry.
570   FunctionCallee MsanPoisonStackFn;
571 
572   /// Run-time helper that records a store (or any event) of an
573   /// uninitialized value and returns an updated origin id encoding this info.
574   FunctionCallee MsanChainOriginFn;
575 
576   /// MSan runtime replacements for memmove, memcpy and memset.
577   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
578 
579   /// KMSAN callback for task-local function argument shadow.
580   StructType *MsanContextStateTy;
581   FunctionCallee MsanGetContextStateFn;
582 
583   /// Functions for poisoning/unpoisoning local variables
584   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
585 
586   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
587   /// pointers.
588   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
589   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
590   FunctionCallee MsanMetadataPtrForStore_1_8[4];
591   FunctionCallee MsanInstrumentAsmStoreFn;
592 
593   /// Helper to choose between different MsanMetadataPtrXxx().
594   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
595 
596   /// Memory map parameters used in application-to-shadow calculation.
597   const MemoryMapParams *MapParams;
598 
599   /// Custom memory map parameters used when -msan-shadow-base or
600   // -msan-origin-base is provided.
601   MemoryMapParams CustomMapParams;
602 
603   MDNode *ColdCallWeights;
604 
605   /// Branch weights for origin store.
606   MDNode *OriginStoreWeights;
607 };
608 
609 void insertModuleCtor(Module &M) {
610   getOrCreateSanitizerCtorAndInitFunctions(
611       M, kMsanModuleCtorName, kMsanInitName,
612       /*InitArgTypes=*/{},
613       /*InitArgs=*/{},
614       // This callback is invoked when the functions are created the first
615       // time. Hook them into the global ctors list in that case:
616       [&](Function *Ctor, FunctionCallee) {
617         if (!ClWithComdat) {
618           appendToGlobalCtors(M, Ctor, 0);
619           return;
620         }
621         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
622         Ctor->setComdat(MsanCtorComdat);
623         appendToGlobalCtors(M, Ctor, 0, Ctor);
624       });
625 }
626 
627 /// A legacy function pass for msan instrumentation.
628 ///
629 /// Instruments functions to detect uninitialized reads.
630 struct MemorySanitizerLegacyPass : public FunctionPass {
631   // Pass identification, replacement for typeid.
632   static char ID;
633 
634   MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
635       : FunctionPass(ID), Options(Options) {
636     initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
637   }
638   StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
639 
640   void getAnalysisUsage(AnalysisUsage &AU) const override {
641     AU.addRequired<TargetLibraryInfoWrapperPass>();
642   }
643 
644   bool runOnFunction(Function &F) override {
645     return MSan->sanitizeFunction(
646         F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
647   }
648   bool doInitialization(Module &M) override;
649 
650   Optional<MemorySanitizer> MSan;
651   MemorySanitizerOptions Options;
652 };
653 
654 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
655   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
656 }
657 
658 } // end anonymous namespace
659 
660 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
661     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
662       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
663       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
664 
665 PreservedAnalyses MemorySanitizerPass::run(Function &F,
666                                            FunctionAnalysisManager &FAM) {
667   MemorySanitizer Msan(*F.getParent(), Options);
668   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
669     return PreservedAnalyses::none();
670   return PreservedAnalyses::all();
671 }
672 
673 PreservedAnalyses MemorySanitizerPass::run(Module &M,
674                                            ModuleAnalysisManager &AM) {
675   if (Options.Kernel)
676     return PreservedAnalyses::all();
677   insertModuleCtor(M);
678   return PreservedAnalyses::none();
679 }
680 
681 char MemorySanitizerLegacyPass::ID = 0;
682 
683 INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
684                       "MemorySanitizer: detects uninitialized reads.", false,
685                       false)
686 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
687 INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
688                     "MemorySanitizer: detects uninitialized reads.", false,
689                     false)
690 
691 FunctionPass *
692 llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
693   return new MemorySanitizerLegacyPass(Options);
694 }
695 
696 /// Create a non-const global initialized with the given string.
697 ///
698 /// Creates a writable global for Str so that we can pass it to the
699 /// run-time lib. Runtime uses first 4 bytes of the string to store the
700 /// frame ID, so the string needs to be mutable.
701 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
702                                                             StringRef Str) {
703   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
704   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
705                             GlobalValue::PrivateLinkage, StrConst, "");
706 }
707 
708 /// Create KMSAN API callbacks.
709 void MemorySanitizer::createKernelApi(Module &M) {
710   IRBuilder<> IRB(*C);
711 
712   // These will be initialized in insertKmsanPrologue().
713   RetvalTLS = nullptr;
714   RetvalOriginTLS = nullptr;
715   ParamTLS = nullptr;
716   ParamOriginTLS = nullptr;
717   VAArgTLS = nullptr;
718   VAArgOriginTLS = nullptr;
719   VAArgOverflowSizeTLS = nullptr;
720 
721   WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
722                                     IRB.getInt32Ty());
723   // Requests the per-task context state (kmsan_context_state*) from the
724   // runtime library.
725   MsanContextStateTy = StructType::get(
726       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
727       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
728       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
729       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
730       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
731       OriginTy);
732   MsanGetContextStateFn = M.getOrInsertFunction(
733       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
734 
735   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
736                                 PointerType::get(IRB.getInt32Ty(), 0));
737 
738   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
739     std::string name_load =
740         "__msan_metadata_ptr_for_load_" + std::to_string(size);
741     std::string name_store =
742         "__msan_metadata_ptr_for_store_" + std::to_string(size);
743     MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
744         name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
745     MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
746         name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
747   }
748 
749   MsanMetadataPtrForLoadN = M.getOrInsertFunction(
750       "__msan_metadata_ptr_for_load_n", RetTy,
751       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
752   MsanMetadataPtrForStoreN = M.getOrInsertFunction(
753       "__msan_metadata_ptr_for_store_n", RetTy,
754       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
755 
756   // Functions for poisoning and unpoisoning memory.
757   MsanPoisonAllocaFn =
758       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
759                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
760   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
761       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
762 }
763 
764 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
765   return M.getOrInsertGlobal(Name, Ty, [&] {
766     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
767                               nullptr, Name, nullptr,
768                               GlobalVariable::InitialExecTLSModel);
769   });
770 }
771 
772 /// Insert declarations for userspace-specific functions and globals.
773 void MemorySanitizer::createUserspaceApi(Module &M) {
774   IRBuilder<> IRB(*C);
775 
776   // Create the callback.
777   // FIXME: this function should have "Cold" calling conv,
778   // which is not yet implemented.
779   StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
780                                     : "__msan_warning_with_origin_noreturn";
781   WarningFn =
782       M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty());
783 
784   // Create the global TLS variables.
785   RetvalTLS =
786       getOrInsertGlobal(M, "__msan_retval_tls",
787                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
788 
789   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
790 
791   ParamTLS =
792       getOrInsertGlobal(M, "__msan_param_tls",
793                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
794 
795   ParamOriginTLS =
796       getOrInsertGlobal(M, "__msan_param_origin_tls",
797                         ArrayType::get(OriginTy, kParamTLSSize / 4));
798 
799   VAArgTLS =
800       getOrInsertGlobal(M, "__msan_va_arg_tls",
801                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
802 
803   VAArgOriginTLS =
804       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
805                         ArrayType::get(OriginTy, kParamTLSSize / 4));
806 
807   VAArgOverflowSizeTLS =
808       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
809 
810   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
811        AccessSizeIndex++) {
812     unsigned AccessSize = 1 << AccessSizeIndex;
813     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
814     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
815     MaybeWarningFnAttrs.push_back(std::make_pair(
816         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
817     MaybeWarningFnAttrs.push_back(std::make_pair(
818         AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
819     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
820         FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
821         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
822 
823     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
824     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
825     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
826         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
827     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
828         AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
829     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
830         FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
831         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
832         IRB.getInt32Ty());
833   }
834 
835   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
836     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
837     IRB.getInt8PtrTy(), IntptrTy);
838   MsanPoisonStackFn =
839       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
840                             IRB.getInt8PtrTy(), IntptrTy);
841 }
842 
843 /// Insert extern declaration of runtime-provided functions and globals.
844 void MemorySanitizer::initializeCallbacks(Module &M) {
845   // Only do this once.
846   if (CallbacksInitialized)
847     return;
848 
849   IRBuilder<> IRB(*C);
850   // Initialize callbacks that are common for kernel and userspace
851   // instrumentation.
852   MsanChainOriginFn = M.getOrInsertFunction(
853     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
854   MemmoveFn = M.getOrInsertFunction(
855     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
856     IRB.getInt8PtrTy(), IntptrTy);
857   MemcpyFn = M.getOrInsertFunction(
858     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
859     IntptrTy);
860   MemsetFn = M.getOrInsertFunction(
861     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
862     IntptrTy);
863 
864   MsanInstrumentAsmStoreFn =
865       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
866                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
867 
868   if (CompileKernel) {
869     createKernelApi(M);
870   } else {
871     createUserspaceApi(M);
872   }
873   CallbacksInitialized = true;
874 }
875 
876 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
877                                                              int size) {
878   FunctionCallee *Fns =
879       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
880   switch (size) {
881   case 1:
882     return Fns[0];
883   case 2:
884     return Fns[1];
885   case 4:
886     return Fns[2];
887   case 8:
888     return Fns[3];
889   default:
890     return nullptr;
891   }
892 }
893 
894 /// Module-level initialization.
895 ///
896 /// inserts a call to __msan_init to the module's constructor list.
897 void MemorySanitizer::initializeModule(Module &M) {
898   auto &DL = M.getDataLayout();
899 
900   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
901   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
902   // Check the overrides first
903   if (ShadowPassed || OriginPassed) {
904     CustomMapParams.AndMask = ClAndMask;
905     CustomMapParams.XorMask = ClXorMask;
906     CustomMapParams.ShadowBase = ClShadowBase;
907     CustomMapParams.OriginBase = ClOriginBase;
908     MapParams = &CustomMapParams;
909   } else {
910     Triple TargetTriple(M.getTargetTriple());
911     switch (TargetTriple.getOS()) {
912       case Triple::FreeBSD:
913         switch (TargetTriple.getArch()) {
914           case Triple::x86_64:
915             MapParams = FreeBSD_X86_MemoryMapParams.bits64;
916             break;
917           case Triple::x86:
918             MapParams = FreeBSD_X86_MemoryMapParams.bits32;
919             break;
920           default:
921             report_fatal_error("unsupported architecture");
922         }
923         break;
924       case Triple::NetBSD:
925         switch (TargetTriple.getArch()) {
926           case Triple::x86_64:
927             MapParams = NetBSD_X86_MemoryMapParams.bits64;
928             break;
929           default:
930             report_fatal_error("unsupported architecture");
931         }
932         break;
933       case Triple::Linux:
934         switch (TargetTriple.getArch()) {
935           case Triple::x86_64:
936             MapParams = Linux_X86_MemoryMapParams.bits64;
937             break;
938           case Triple::x86:
939             MapParams = Linux_X86_MemoryMapParams.bits32;
940             break;
941           case Triple::mips64:
942           case Triple::mips64el:
943             MapParams = Linux_MIPS_MemoryMapParams.bits64;
944             break;
945           case Triple::ppc64:
946           case Triple::ppc64le:
947             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
948             break;
949           case Triple::systemz:
950             MapParams = Linux_S390_MemoryMapParams.bits64;
951             break;
952           case Triple::aarch64:
953           case Triple::aarch64_be:
954             MapParams = Linux_ARM_MemoryMapParams.bits64;
955             break;
956           default:
957             report_fatal_error("unsupported architecture");
958         }
959         break;
960       default:
961         report_fatal_error("unsupported operating system");
962     }
963   }
964 
965   C = &(M.getContext());
966   IRBuilder<> IRB(*C);
967   IntptrTy = IRB.getIntPtrTy(DL);
968   OriginTy = IRB.getInt32Ty();
969 
970   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
971   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
972 
973   if (!CompileKernel) {
974     if (TrackOrigins)
975       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
976         return new GlobalVariable(
977             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
978             IRB.getInt32(TrackOrigins), "__msan_track_origins");
979       });
980 
981     if (Recover)
982       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
983         return new GlobalVariable(M, IRB.getInt32Ty(), true,
984                                   GlobalValue::WeakODRLinkage,
985                                   IRB.getInt32(Recover), "__msan_keep_going");
986       });
987 }
988 }
989 
990 bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
991   if (!Options.Kernel)
992     insertModuleCtor(M);
993   MSan.emplace(M, Options);
994   return true;
995 }
996 
997 namespace {
998 
999 /// A helper class that handles instrumentation of VarArg
1000 /// functions on a particular platform.
1001 ///
1002 /// Implementations are expected to insert the instrumentation
1003 /// necessary to propagate argument shadow through VarArg function
1004 /// calls. Visit* methods are called during an InstVisitor pass over
1005 /// the function, and should avoid creating new basic blocks. A new
1006 /// instance of this class is created for each instrumented function.
1007 struct VarArgHelper {
1008   virtual ~VarArgHelper() = default;
1009 
1010   /// Visit a CallBase.
1011   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1012 
1013   /// Visit a va_start call.
1014   virtual void visitVAStartInst(VAStartInst &I) = 0;
1015 
1016   /// Visit a va_copy call.
1017   virtual void visitVACopyInst(VACopyInst &I) = 0;
1018 
1019   /// Finalize function instrumentation.
1020   ///
1021   /// This method is called after visiting all interesting (see above)
1022   /// instructions in a function.
1023   virtual void finalizeInstrumentation() = 0;
1024 };
1025 
1026 struct MemorySanitizerVisitor;
1027 
1028 } // end anonymous namespace
1029 
1030 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1031                                         MemorySanitizerVisitor &Visitor);
1032 
1033 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
1034   if (TypeSize <= 8) return 0;
1035   return Log2_32_Ceil((TypeSize + 7) / 8);
1036 }
1037 
1038 namespace {
1039 
1040 /// This class does all the work for a given function. Store and Load
1041 /// instructions store and load corresponding shadow and origin
1042 /// values. Most instructions propagate shadow from arguments to their
1043 /// return values. Certain instructions (most importantly, BranchInst)
1044 /// test their argument shadow and print reports (with a runtime call) if it's
1045 /// non-zero.
1046 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1047   Function &F;
1048   MemorySanitizer &MS;
1049   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1050   ValueMap<Value*, Value*> ShadowMap, OriginMap;
1051   std::unique_ptr<VarArgHelper> VAHelper;
1052   const TargetLibraryInfo *TLI;
1053   BasicBlock *ActualFnStart;
1054 
1055   // The following flags disable parts of MSan instrumentation based on
1056   // exclusion list contents and command-line options.
1057   bool InsertChecks;
1058   bool PropagateShadow;
1059   bool PoisonStack;
1060   bool PoisonUndef;
1061 
1062   struct ShadowOriginAndInsertPoint {
1063     Value *Shadow;
1064     Value *Origin;
1065     Instruction *OrigIns;
1066 
1067     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1068       : Shadow(S), Origin(O), OrigIns(I) {}
1069   };
1070   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1071   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1072   SmallSet<AllocaInst *, 16> AllocaSet;
1073   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1074   SmallVector<StoreInst *, 16> StoreList;
1075 
1076   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1077                          const TargetLibraryInfo &TLI)
1078       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1079     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
1080     InsertChecks = SanitizeFunction;
1081     PropagateShadow = SanitizeFunction;
1082     PoisonStack = SanitizeFunction && ClPoisonStack;
1083     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1084 
1085     MS.initializeCallbacks(*F.getParent());
1086     if (MS.CompileKernel)
1087       ActualFnStart = insertKmsanPrologue(F);
1088     else
1089       ActualFnStart = &F.getEntryBlock();
1090 
1091     LLVM_DEBUG(if (!InsertChecks) dbgs()
1092                << "MemorySanitizer is not inserting checks into '"
1093                << F.getName() << "'\n");
1094   }
1095 
1096   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1097     if (MS.TrackOrigins <= 1) return V;
1098     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1099   }
1100 
1101   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1102     const DataLayout &DL = F.getParent()->getDataLayout();
1103     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1104     if (IntptrSize == kOriginSize) return Origin;
1105     assert(IntptrSize == kOriginSize * 2);
1106     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1107     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1108   }
1109 
1110   /// Fill memory range with the given origin value.
1111   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1112                    unsigned Size, Align Alignment) {
1113     const DataLayout &DL = F.getParent()->getDataLayout();
1114     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1115     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1116     assert(IntptrAlignment >= kMinOriginAlignment);
1117     assert(IntptrSize >= kOriginSize);
1118 
1119     unsigned Ofs = 0;
1120     Align CurrentAlignment = Alignment;
1121     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1122       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1123       Value *IntptrOriginPtr =
1124           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1125       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1126         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1127                        : IntptrOriginPtr;
1128         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1129         Ofs += IntptrSize / kOriginSize;
1130         CurrentAlignment = IntptrAlignment;
1131       }
1132     }
1133 
1134     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1135       Value *GEP =
1136           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1137       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1138       CurrentAlignment = kMinOriginAlignment;
1139     }
1140   }
1141 
1142   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1143                    Value *OriginPtr, Align Alignment, bool AsCall) {
1144     const DataLayout &DL = F.getParent()->getDataLayout();
1145     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1146     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
1147     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1148     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1149       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
1150         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1151                     OriginAlignment);
1152       return;
1153     }
1154 
1155     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1156     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1157     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1158       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1159       Value *ConvertedShadow2 =
1160           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1161       IRB.CreateCall(Fn,
1162                      {ConvertedShadow2,
1163                       IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
1164     } else {
1165       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1166       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1167           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1168       IRBuilder<> IRBNew(CheckTerm);
1169       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1170                   OriginAlignment);
1171     }
1172   }
1173 
1174   void materializeStores(bool InstrumentWithCalls) {
1175     for (StoreInst *SI : StoreList) {
1176       IRBuilder<> IRB(SI);
1177       Value *Val = SI->getValueOperand();
1178       Value *Addr = SI->getPointerOperand();
1179       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1180       Value *ShadowPtr, *OriginPtr;
1181       Type *ShadowTy = Shadow->getType();
1182       const Align Alignment = assumeAligned(SI->getAlignment());
1183       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1184       std::tie(ShadowPtr, OriginPtr) =
1185           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1186 
1187       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1188       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1189       (void)NewSI;
1190 
1191       if (SI->isAtomic())
1192         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1193 
1194       if (MS.TrackOrigins && !SI->isAtomic())
1195         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1196                     OriginAlignment, InstrumentWithCalls);
1197     }
1198   }
1199 
1200   /// Helper function to insert a warning at IRB's current insert point.
1201   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1202     if (!Origin)
1203       Origin = (Value *)IRB.getInt32(0);
1204     assert(Origin->getType()->isIntegerTy());
1205     IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1206     // FIXME: Insert UnreachableInst if !MS.Recover?
1207     // This may invalidate some of the following checks and needs to be done
1208     // at the very end.
1209   }
1210 
1211   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
1212                            bool AsCall) {
1213     IRBuilder<> IRB(OrigIns);
1214     LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
1215     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1216     LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
1217 
1218     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1219       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
1220         insertWarningFn(IRB, Origin);
1221       }
1222       return;
1223     }
1224 
1225     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
1226 
1227     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1228     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1229     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1230       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1231       Value *ConvertedShadow2 =
1232           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1233       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
1234                                                 ? Origin
1235                                                 : (Value *)IRB.getInt32(0)});
1236     } else {
1237       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1238       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1239           Cmp, OrigIns,
1240           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1241 
1242       IRB.SetInsertPoint(CheckTerm);
1243       insertWarningFn(IRB, Origin);
1244       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1245     }
1246   }
1247 
1248   void materializeChecks(bool InstrumentWithCalls) {
1249     for (const auto &ShadowData : InstrumentationList) {
1250       Instruction *OrigIns = ShadowData.OrigIns;
1251       Value *Shadow = ShadowData.Shadow;
1252       Value *Origin = ShadowData.Origin;
1253       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
1254     }
1255     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1256   }
1257 
1258   BasicBlock *insertKmsanPrologue(Function &F) {
1259     BasicBlock *ret =
1260         SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
1261     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
1262     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1263     Constant *Zero = IRB.getInt32(0);
1264     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1265                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1266     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1267                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1268     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1269                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1270     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1271                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1272     MS.VAArgOverflowSizeTLS =
1273         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1274                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1275     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1276                                       {Zero, IRB.getInt32(5)}, "param_origin");
1277     MS.RetvalOriginTLS =
1278         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1279                       {Zero, IRB.getInt32(6)}, "retval_origin");
1280     return ret;
1281   }
1282 
1283   /// Add MemorySanitizer instrumentation to a function.
1284   bool runOnFunction() {
1285     // In the presence of unreachable blocks, we may see Phi nodes with
1286     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1287     // blocks, such nodes will not have any shadow value associated with them.
1288     // It's easier to remove unreachable blocks than deal with missing shadow.
1289     removeUnreachableBlocks(F);
1290 
1291     // Iterate all BBs in depth-first order and create shadow instructions
1292     // for all instructions (where applicable).
1293     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1294     for (BasicBlock *BB : depth_first(ActualFnStart))
1295       visit(*BB);
1296 
1297     // Finalize PHI nodes.
1298     for (PHINode *PN : ShadowPHINodes) {
1299       PHINode *PNS = cast<PHINode>(getShadow(PN));
1300       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1301       size_t NumValues = PN->getNumIncomingValues();
1302       for (size_t v = 0; v < NumValues; v++) {
1303         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1304         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1305       }
1306     }
1307 
1308     VAHelper->finalizeInstrumentation();
1309 
1310     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1311     // instrumenting only allocas.
1312     if (InstrumentLifetimeStart) {
1313       for (auto Item : LifetimeStartList) {
1314         instrumentAlloca(*Item.second, Item.first);
1315         AllocaSet.erase(Item.second);
1316       }
1317     }
1318     // Poison the allocas for which we didn't instrument the corresponding
1319     // lifetime intrinsics.
1320     for (AllocaInst *AI : AllocaSet)
1321       instrumentAlloca(*AI);
1322 
1323     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
1324                                InstrumentationList.size() + StoreList.size() >
1325                                    (unsigned)ClInstrumentationWithCallThreshold;
1326 
1327     // Insert shadow value checks.
1328     materializeChecks(InstrumentWithCalls);
1329 
1330     // Delayed instrumentation of StoreInst.
1331     // This may not add new address checks.
1332     materializeStores(InstrumentWithCalls);
1333 
1334     return true;
1335   }
1336 
1337   /// Compute the shadow type that corresponds to a given Value.
1338   Type *getShadowTy(Value *V) {
1339     return getShadowTy(V->getType());
1340   }
1341 
1342   /// Compute the shadow type that corresponds to a given Type.
1343   Type *getShadowTy(Type *OrigTy) {
1344     if (!OrigTy->isSized()) {
1345       return nullptr;
1346     }
1347     // For integer type, shadow is the same as the original type.
1348     // This may return weird-sized types like i1.
1349     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1350       return IT;
1351     const DataLayout &DL = F.getParent()->getDataLayout();
1352     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1353       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1354       return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
1355                                   cast<FixedVectorType>(VT)->getNumElements());
1356     }
1357     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1358       return ArrayType::get(getShadowTy(AT->getElementType()),
1359                             AT->getNumElements());
1360     }
1361     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1362       SmallVector<Type*, 4> Elements;
1363       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1364         Elements.push_back(getShadowTy(ST->getElementType(i)));
1365       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1366       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1367       return Res;
1368     }
1369     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1370     return IntegerType::get(*MS.C, TypeSize);
1371   }
1372 
1373   /// Flatten a vector type.
1374   Type *getShadowTyNoVec(Type *ty) {
1375     if (VectorType *vt = dyn_cast<VectorType>(ty))
1376       return IntegerType::get(*MS.C,
1377                               vt->getPrimitiveSizeInBits().getFixedSize());
1378     return ty;
1379   }
1380 
1381   /// Extract combined shadow of struct elements as a bool
1382   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1383                               IRBuilder<> &IRB) {
1384     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1385     Value *Aggregator = FalseVal;
1386 
1387     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1388       // Combine by ORing together each element's bool shadow
1389       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1390       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1391       Value *ShadowBool = convertToBool(ShadowInner, IRB);
1392 
1393       if (Aggregator != FalseVal)
1394         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1395       else
1396         Aggregator = ShadowBool;
1397     }
1398 
1399     return Aggregator;
1400   }
1401 
1402   // Extract combined shadow of array elements
1403   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1404                              IRBuilder<> &IRB) {
1405     if (!Array->getNumElements())
1406       return IRB.getIntN(/* width */ 1, /* value */ 0);
1407 
1408     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1409     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1410 
1411     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1412       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1413       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1414       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1415     }
1416     return Aggregator;
1417   }
1418 
1419   /// Convert a shadow value to it's flattened variant. The resulting
1420   /// shadow may not necessarily have the same bit width as the input
1421   /// value, but it will always be comparable to zero.
1422   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1423     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1424       return collapseStructShadow(Struct, V, IRB);
1425     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1426       return collapseArrayShadow(Array, V, IRB);
1427     Type *Ty = V->getType();
1428     Type *NoVecTy = getShadowTyNoVec(Ty);
1429     if (Ty == NoVecTy) return V;
1430     return IRB.CreateBitCast(V, NoVecTy);
1431   }
1432 
1433   // Convert a scalar value to an i1 by comparing with 0
1434   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1435     Type *VTy = V->getType();
1436     assert(VTy->isIntegerTy());
1437     if (VTy->getIntegerBitWidth() == 1)
1438       // Just converting a bool to a bool, so do nothing.
1439       return V;
1440     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1441   }
1442 
1443   /// Compute the integer shadow offset that corresponds to a given
1444   /// application address.
1445   ///
1446   /// Offset = (Addr & ~AndMask) ^ XorMask
1447   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1448     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1449 
1450     uint64_t AndMask = MS.MapParams->AndMask;
1451     if (AndMask)
1452       OffsetLong =
1453           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1454 
1455     uint64_t XorMask = MS.MapParams->XorMask;
1456     if (XorMask)
1457       OffsetLong =
1458           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1459     return OffsetLong;
1460   }
1461 
1462   /// Compute the shadow and origin addresses corresponding to a given
1463   /// application address.
1464   ///
1465   /// Shadow = ShadowBase + Offset
1466   /// Origin = (OriginBase + Offset) & ~3ULL
1467   std::pair<Value *, Value *>
1468   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1469                               MaybeAlign Alignment) {
1470     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1471     Value *ShadowLong = ShadowOffset;
1472     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1473     if (ShadowBase != 0) {
1474       ShadowLong =
1475         IRB.CreateAdd(ShadowLong,
1476                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1477     }
1478     Value *ShadowPtr =
1479         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1480     Value *OriginPtr = nullptr;
1481     if (MS.TrackOrigins) {
1482       Value *OriginLong = ShadowOffset;
1483       uint64_t OriginBase = MS.MapParams->OriginBase;
1484       if (OriginBase != 0)
1485         OriginLong = IRB.CreateAdd(OriginLong,
1486                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1487       if (!Alignment || *Alignment < kMinOriginAlignment) {
1488         uint64_t Mask = kMinOriginAlignment.value() - 1;
1489         OriginLong =
1490             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1491       }
1492       OriginPtr =
1493           IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
1494     }
1495     return std::make_pair(ShadowPtr, OriginPtr);
1496   }
1497 
1498   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1499                                                        IRBuilder<> &IRB,
1500                                                        Type *ShadowTy,
1501                                                        bool isStore) {
1502     Value *ShadowOriginPtrs;
1503     const DataLayout &DL = F.getParent()->getDataLayout();
1504     int Size = DL.getTypeStoreSize(ShadowTy);
1505 
1506     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1507     Value *AddrCast =
1508         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1509     if (Getter) {
1510       ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
1511     } else {
1512       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1513       ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
1514                                                 : MS.MsanMetadataPtrForLoadN,
1515                                         {AddrCast, SizeVal});
1516     }
1517     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1518     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1519     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1520 
1521     return std::make_pair(ShadowPtr, OriginPtr);
1522   }
1523 
1524   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1525                                                  Type *ShadowTy,
1526                                                  MaybeAlign Alignment,
1527                                                  bool isStore) {
1528     if (MS.CompileKernel)
1529       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1530     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1531   }
1532 
1533   /// Compute the shadow address for a given function argument.
1534   ///
1535   /// Shadow = ParamTLS+ArgOffset.
1536   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1537                                  int ArgOffset) {
1538     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1539     if (ArgOffset)
1540       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1541     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1542                               "_msarg");
1543   }
1544 
1545   /// Compute the origin address for a given function argument.
1546   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1547                                  int ArgOffset) {
1548     if (!MS.TrackOrigins)
1549       return nullptr;
1550     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1551     if (ArgOffset)
1552       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1553     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1554                               "_msarg_o");
1555   }
1556 
1557   /// Compute the shadow address for a retval.
1558   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1559     return IRB.CreatePointerCast(MS.RetvalTLS,
1560                                  PointerType::get(getShadowTy(A), 0),
1561                                  "_msret");
1562   }
1563 
1564   /// Compute the origin address for a retval.
1565   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1566     // We keep a single origin for the entire retval. Might be too optimistic.
1567     return MS.RetvalOriginTLS;
1568   }
1569 
1570   /// Set SV to be the shadow value for V.
1571   void setShadow(Value *V, Value *SV) {
1572     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1573     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1574   }
1575 
1576   /// Set Origin to be the origin value for V.
1577   void setOrigin(Value *V, Value *Origin) {
1578     if (!MS.TrackOrigins) return;
1579     assert(!OriginMap.count(V) && "Values may only have one origin");
1580     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1581     OriginMap[V] = Origin;
1582   }
1583 
1584   Constant *getCleanShadow(Type *OrigTy) {
1585     Type *ShadowTy = getShadowTy(OrigTy);
1586     if (!ShadowTy)
1587       return nullptr;
1588     return Constant::getNullValue(ShadowTy);
1589   }
1590 
1591   /// Create a clean shadow value for a given value.
1592   ///
1593   /// Clean shadow (all zeroes) means all bits of the value are defined
1594   /// (initialized).
1595   Constant *getCleanShadow(Value *V) {
1596     return getCleanShadow(V->getType());
1597   }
1598 
1599   /// Create a dirty shadow of a given shadow type.
1600   Constant *getPoisonedShadow(Type *ShadowTy) {
1601     assert(ShadowTy);
1602     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1603       return Constant::getAllOnesValue(ShadowTy);
1604     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1605       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1606                                       getPoisonedShadow(AT->getElementType()));
1607       return ConstantArray::get(AT, Vals);
1608     }
1609     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1610       SmallVector<Constant *, 4> Vals;
1611       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1612         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1613       return ConstantStruct::get(ST, Vals);
1614     }
1615     llvm_unreachable("Unexpected shadow type");
1616   }
1617 
1618   /// Create a dirty shadow for a given value.
1619   Constant *getPoisonedShadow(Value *V) {
1620     Type *ShadowTy = getShadowTy(V);
1621     if (!ShadowTy)
1622       return nullptr;
1623     return getPoisonedShadow(ShadowTy);
1624   }
1625 
1626   /// Create a clean (zero) origin.
1627   Value *getCleanOrigin() {
1628     return Constant::getNullValue(MS.OriginTy);
1629   }
1630 
1631   /// Get the shadow value for a given Value.
1632   ///
1633   /// This function either returns the value set earlier with setShadow,
1634   /// or extracts if from ParamTLS (for function arguments).
1635   Value *getShadow(Value *V) {
1636     if (!PropagateShadow) return getCleanShadow(V);
1637     if (Instruction *I = dyn_cast<Instruction>(V)) {
1638       if (I->getMetadata("nosanitize"))
1639         return getCleanShadow(V);
1640       // For instructions the shadow is already stored in the map.
1641       Value *Shadow = ShadowMap[V];
1642       if (!Shadow) {
1643         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1644         (void)I;
1645         assert(Shadow && "No shadow for a value");
1646       }
1647       return Shadow;
1648     }
1649     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1650       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1651       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1652       (void)U;
1653       return AllOnes;
1654     }
1655     if (Argument *A = dyn_cast<Argument>(V)) {
1656       // For arguments we compute the shadow on demand and store it in the map.
1657       Value **ShadowPtr = &ShadowMap[V];
1658       if (*ShadowPtr)
1659         return *ShadowPtr;
1660       Function *F = A->getParent();
1661       IRBuilder<> EntryIRB(ActualFnStart->getFirstNonPHI());
1662       unsigned ArgOffset = 0;
1663       const DataLayout &DL = F->getParent()->getDataLayout();
1664       for (auto &FArg : F->args()) {
1665         if (!FArg.getType()->isSized()) {
1666           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1667           continue;
1668         }
1669 
1670         bool FArgByVal = FArg.hasByValAttr();
1671         bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
1672         bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef;
1673         unsigned Size =
1674             FArg.hasByValAttr()
1675                 ? DL.getTypeAllocSize(FArg.getParamByValType())
1676                 : DL.getTypeAllocSize(FArg.getType());
1677 
1678         if (A == &FArg) {
1679           bool Overflow = ArgOffset + Size > kParamTLSSize;
1680           if (FArgEagerCheck) {
1681             *ShadowPtr = getCleanShadow(V);
1682             setOrigin(A, getCleanOrigin());
1683             continue;
1684           } else if (FArgByVal) {
1685             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1686             // ByVal pointer itself has clean shadow. We copy the actual
1687             // argument shadow to the underlying memory.
1688             // Figure out maximal valid memcpy alignment.
1689             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1690                 MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
1691             Value *CpShadowPtr =
1692                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1693                                    /*isStore*/ true)
1694                     .first;
1695             // TODO(glider): need to copy origins.
1696             if (Overflow) {
1697               // ParamTLS overflow.
1698               EntryIRB.CreateMemSet(
1699                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1700                   Size, ArgAlign);
1701             } else {
1702               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1703               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1704                                                  CopyAlign, Size);
1705               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1706               (void)Cpy;
1707             }
1708             *ShadowPtr = getCleanShadow(V);
1709           } else {
1710             // Shadow over TLS
1711             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1712             if (Overflow) {
1713               // ParamTLS overflow.
1714               *ShadowPtr = getCleanShadow(V);
1715             } else {
1716               *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
1717                                                       kShadowTLSAlignment);
1718             }
1719           }
1720           LLVM_DEBUG(dbgs()
1721                      << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n");
1722           if (MS.TrackOrigins && !Overflow) {
1723             Value *OriginPtr =
1724                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1725             setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
1726           } else {
1727             setOrigin(A, getCleanOrigin());
1728           }
1729         }
1730 
1731         if (!FArgEagerCheck)
1732           ArgOffset += alignTo(Size, kShadowTLSAlignment);
1733       }
1734       assert(*ShadowPtr && "Could not find shadow for an argument");
1735       return *ShadowPtr;
1736     }
1737     // For everything else the shadow is zero.
1738     return getCleanShadow(V);
1739   }
1740 
1741   /// Get the shadow for i-th argument of the instruction I.
1742   Value *getShadow(Instruction *I, int i) {
1743     return getShadow(I->getOperand(i));
1744   }
1745 
1746   /// Get the origin for a value.
1747   Value *getOrigin(Value *V) {
1748     if (!MS.TrackOrigins) return nullptr;
1749     if (!PropagateShadow) return getCleanOrigin();
1750     if (isa<Constant>(V)) return getCleanOrigin();
1751     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1752            "Unexpected value type in getOrigin()");
1753     if (Instruction *I = dyn_cast<Instruction>(V)) {
1754       if (I->getMetadata("nosanitize"))
1755         return getCleanOrigin();
1756     }
1757     Value *Origin = OriginMap[V];
1758     assert(Origin && "Missing origin");
1759     return Origin;
1760   }
1761 
1762   /// Get the origin for i-th argument of the instruction I.
1763   Value *getOrigin(Instruction *I, int i) {
1764     return getOrigin(I->getOperand(i));
1765   }
1766 
1767   /// Remember the place where a shadow check should be inserted.
1768   ///
1769   /// This location will be later instrumented with a check that will print a
1770   /// UMR warning in runtime if the shadow value is not 0.
1771   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1772     assert(Shadow);
1773     if (!InsertChecks) return;
1774 #ifndef NDEBUG
1775     Type *ShadowTy = Shadow->getType();
1776     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
1777             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
1778            "Can only insert checks for integer, vector, and aggregate shadow "
1779            "types");
1780 #endif
1781     InstrumentationList.push_back(
1782         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1783   }
1784 
1785   /// Remember the place where a shadow check should be inserted.
1786   ///
1787   /// This location will be later instrumented with a check that will print a
1788   /// UMR warning in runtime if the value is not fully defined.
1789   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1790     assert(Val);
1791     Value *Shadow, *Origin;
1792     if (ClCheckConstantShadow) {
1793       Shadow = getShadow(Val);
1794       if (!Shadow) return;
1795       Origin = getOrigin(Val);
1796     } else {
1797       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1798       if (!Shadow) return;
1799       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1800     }
1801     insertShadowCheck(Shadow, Origin, OrigIns);
1802   }
1803 
1804   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1805     switch (a) {
1806       case AtomicOrdering::NotAtomic:
1807         return AtomicOrdering::NotAtomic;
1808       case AtomicOrdering::Unordered:
1809       case AtomicOrdering::Monotonic:
1810       case AtomicOrdering::Release:
1811         return AtomicOrdering::Release;
1812       case AtomicOrdering::Acquire:
1813       case AtomicOrdering::AcquireRelease:
1814         return AtomicOrdering::AcquireRelease;
1815       case AtomicOrdering::SequentiallyConsistent:
1816         return AtomicOrdering::SequentiallyConsistent;
1817     }
1818     llvm_unreachable("Unknown ordering");
1819   }
1820 
1821   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1822     switch (a) {
1823       case AtomicOrdering::NotAtomic:
1824         return AtomicOrdering::NotAtomic;
1825       case AtomicOrdering::Unordered:
1826       case AtomicOrdering::Monotonic:
1827       case AtomicOrdering::Acquire:
1828         return AtomicOrdering::Acquire;
1829       case AtomicOrdering::Release:
1830       case AtomicOrdering::AcquireRelease:
1831         return AtomicOrdering::AcquireRelease;
1832       case AtomicOrdering::SequentiallyConsistent:
1833         return AtomicOrdering::SequentiallyConsistent;
1834     }
1835     llvm_unreachable("Unknown ordering");
1836   }
1837 
1838   // ------------------- Visitors.
1839   using InstVisitor<MemorySanitizerVisitor>::visit;
1840   void visit(Instruction &I) {
1841     if (!I.getMetadata("nosanitize"))
1842       InstVisitor<MemorySanitizerVisitor>::visit(I);
1843   }
1844 
1845   /// Instrument LoadInst
1846   ///
1847   /// Loads the corresponding shadow and (optionally) origin.
1848   /// Optionally, checks that the load address is fully defined.
1849   void visitLoadInst(LoadInst &I) {
1850     assert(I.getType()->isSized() && "Load type must have size");
1851     assert(!I.getMetadata("nosanitize"));
1852     IRBuilder<> IRB(I.getNextNode());
1853     Type *ShadowTy = getShadowTy(&I);
1854     Value *Addr = I.getPointerOperand();
1855     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
1856     const Align Alignment = assumeAligned(I.getAlignment());
1857     if (PropagateShadow) {
1858       std::tie(ShadowPtr, OriginPtr) =
1859           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
1860       setShadow(&I,
1861                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
1862     } else {
1863       setShadow(&I, getCleanShadow(&I));
1864     }
1865 
1866     if (ClCheckAccessAddress)
1867       insertShadowCheck(I.getPointerOperand(), &I);
1868 
1869     if (I.isAtomic())
1870       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1871 
1872     if (MS.TrackOrigins) {
1873       if (PropagateShadow) {
1874         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1875         setOrigin(
1876             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
1877       } else {
1878         setOrigin(&I, getCleanOrigin());
1879       }
1880     }
1881   }
1882 
1883   /// Instrument StoreInst
1884   ///
1885   /// Stores the corresponding shadow and (optionally) origin.
1886   /// Optionally, checks that the store address is fully defined.
1887   void visitStoreInst(StoreInst &I) {
1888     StoreList.push_back(&I);
1889     if (ClCheckAccessAddress)
1890       insertShadowCheck(I.getPointerOperand(), &I);
1891   }
1892 
1893   void handleCASOrRMW(Instruction &I) {
1894     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1895 
1896     IRBuilder<> IRB(&I);
1897     Value *Addr = I.getOperand(0);
1898     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align(1),
1899                                           /*isStore*/ true)
1900                            .first;
1901 
1902     if (ClCheckAccessAddress)
1903       insertShadowCheck(Addr, &I);
1904 
1905     // Only test the conditional argument of cmpxchg instruction.
1906     // The other argument can potentially be uninitialized, but we can not
1907     // detect this situation reliably without possible false positives.
1908     if (isa<AtomicCmpXchgInst>(I))
1909       insertShadowCheck(I.getOperand(1), &I);
1910 
1911     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
1912 
1913     setShadow(&I, getCleanShadow(&I));
1914     setOrigin(&I, getCleanOrigin());
1915   }
1916 
1917   void visitAtomicRMWInst(AtomicRMWInst &I) {
1918     handleCASOrRMW(I);
1919     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1920   }
1921 
1922   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1923     handleCASOrRMW(I);
1924     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1925   }
1926 
1927   // Vector manipulation.
1928   void visitExtractElementInst(ExtractElementInst &I) {
1929     insertShadowCheck(I.getOperand(1), &I);
1930     IRBuilder<> IRB(&I);
1931     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1932               "_msprop"));
1933     setOrigin(&I, getOrigin(&I, 0));
1934   }
1935 
1936   void visitInsertElementInst(InsertElementInst &I) {
1937     insertShadowCheck(I.getOperand(2), &I);
1938     IRBuilder<> IRB(&I);
1939     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1940               I.getOperand(2), "_msprop"));
1941     setOriginForNaryOp(I);
1942   }
1943 
1944   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1945     IRBuilder<> IRB(&I);
1946     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1947                                           I.getShuffleMask(), "_msprop"));
1948     setOriginForNaryOp(I);
1949   }
1950 
1951   // Casts.
1952   void visitSExtInst(SExtInst &I) {
1953     IRBuilder<> IRB(&I);
1954     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
1955     setOrigin(&I, getOrigin(&I, 0));
1956   }
1957 
1958   void visitZExtInst(ZExtInst &I) {
1959     IRBuilder<> IRB(&I);
1960     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
1961     setOrigin(&I, getOrigin(&I, 0));
1962   }
1963 
1964   void visitTruncInst(TruncInst &I) {
1965     IRBuilder<> IRB(&I);
1966     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
1967     setOrigin(&I, getOrigin(&I, 0));
1968   }
1969 
1970   void visitBitCastInst(BitCastInst &I) {
1971     // Special case: if this is the bitcast (there is exactly 1 allowed) between
1972     // a musttail call and a ret, don't instrument. New instructions are not
1973     // allowed after a musttail call.
1974     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
1975       if (CI->isMustTailCall())
1976         return;
1977     IRBuilder<> IRB(&I);
1978     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
1979     setOrigin(&I, getOrigin(&I, 0));
1980   }
1981 
1982   void visitPtrToIntInst(PtrToIntInst &I) {
1983     IRBuilder<> IRB(&I);
1984     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1985              "_msprop_ptrtoint"));
1986     setOrigin(&I, getOrigin(&I, 0));
1987   }
1988 
1989   void visitIntToPtrInst(IntToPtrInst &I) {
1990     IRBuilder<> IRB(&I);
1991     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1992              "_msprop_inttoptr"));
1993     setOrigin(&I, getOrigin(&I, 0));
1994   }
1995 
1996   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
1997   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
1998   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
1999   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
2000   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
2001   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
2002 
2003   /// Propagate shadow for bitwise AND.
2004   ///
2005   /// This code is exact, i.e. if, for example, a bit in the left argument
2006   /// is defined and 0, then neither the value not definedness of the
2007   /// corresponding bit in B don't affect the resulting shadow.
2008   void visitAnd(BinaryOperator &I) {
2009     IRBuilder<> IRB(&I);
2010     //  "And" of 0 and a poisoned value results in unpoisoned value.
2011     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2012     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2013     //  1&p => p;     0&p => 0;     p&p => p;
2014     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2015     Value *S1 = getShadow(&I, 0);
2016     Value *S2 = getShadow(&I, 1);
2017     Value *V1 = I.getOperand(0);
2018     Value *V2 = I.getOperand(1);
2019     if (V1->getType() != S1->getType()) {
2020       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2021       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2022     }
2023     Value *S1S2 = IRB.CreateAnd(S1, S2);
2024     Value *V1S2 = IRB.CreateAnd(V1, S2);
2025     Value *S1V2 = IRB.CreateAnd(S1, V2);
2026     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2027     setOriginForNaryOp(I);
2028   }
2029 
2030   void visitOr(BinaryOperator &I) {
2031     IRBuilder<> IRB(&I);
2032     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2033     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2034     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2035     //  1|p => 1;     0|p => p;     p|p => p;
2036     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2037     Value *S1 = getShadow(&I, 0);
2038     Value *S2 = getShadow(&I, 1);
2039     Value *V1 = IRB.CreateNot(I.getOperand(0));
2040     Value *V2 = IRB.CreateNot(I.getOperand(1));
2041     if (V1->getType() != S1->getType()) {
2042       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2043       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2044     }
2045     Value *S1S2 = IRB.CreateAnd(S1, S2);
2046     Value *V1S2 = IRB.CreateAnd(V1, S2);
2047     Value *S1V2 = IRB.CreateAnd(S1, V2);
2048     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2049     setOriginForNaryOp(I);
2050   }
2051 
2052   /// Default propagation of shadow and/or origin.
2053   ///
2054   /// This class implements the general case of shadow propagation, used in all
2055   /// cases where we don't know and/or don't care about what the operation
2056   /// actually does. It converts all input shadow values to a common type
2057   /// (extending or truncating as necessary), and bitwise OR's them.
2058   ///
2059   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2060   /// fully initialized), and less prone to false positives.
2061   ///
2062   /// This class also implements the general case of origin propagation. For a
2063   /// Nary operation, result origin is set to the origin of an argument that is
2064   /// not entirely initialized. If there is more than one such arguments, the
2065   /// rightmost of them is picked. It does not matter which one is picked if all
2066   /// arguments are initialized.
2067   template <bool CombineShadow>
2068   class Combiner {
2069     Value *Shadow = nullptr;
2070     Value *Origin = nullptr;
2071     IRBuilder<> &IRB;
2072     MemorySanitizerVisitor *MSV;
2073 
2074   public:
2075     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2076         : IRB(IRB), MSV(MSV) {}
2077 
2078     /// Add a pair of shadow and origin values to the mix.
2079     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2080       if (CombineShadow) {
2081         assert(OpShadow);
2082         if (!Shadow)
2083           Shadow = OpShadow;
2084         else {
2085           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2086           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2087         }
2088       }
2089 
2090       if (MSV->MS.TrackOrigins) {
2091         assert(OpOrigin);
2092         if (!Origin) {
2093           Origin = OpOrigin;
2094         } else {
2095           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2096           // No point in adding something that might result in 0 origin value.
2097           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2098             Value *FlatShadow = MSV->convertShadowToScalar(OpShadow, IRB);
2099             Value *Cond =
2100                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
2101             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2102           }
2103         }
2104       }
2105       return *this;
2106     }
2107 
2108     /// Add an application value to the mix.
2109     Combiner &Add(Value *V) {
2110       Value *OpShadow = MSV->getShadow(V);
2111       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2112       return Add(OpShadow, OpOrigin);
2113     }
2114 
2115     /// Set the current combined values as the given instruction's shadow
2116     /// and origin.
2117     void Done(Instruction *I) {
2118       if (CombineShadow) {
2119         assert(Shadow);
2120         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2121         MSV->setShadow(I, Shadow);
2122       }
2123       if (MSV->MS.TrackOrigins) {
2124         assert(Origin);
2125         MSV->setOrigin(I, Origin);
2126       }
2127     }
2128   };
2129 
2130   using ShadowAndOriginCombiner = Combiner<true>;
2131   using OriginCombiner = Combiner<false>;
2132 
2133   /// Propagate origin for arbitrary operation.
2134   void setOriginForNaryOp(Instruction &I) {
2135     if (!MS.TrackOrigins) return;
2136     IRBuilder<> IRB(&I);
2137     OriginCombiner OC(this, IRB);
2138     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2139       OC.Add(OI->get());
2140     OC.Done(&I);
2141   }
2142 
2143   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2144     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2145            "Vector of pointers is not a valid shadow type");
2146     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2147                                   Ty->getScalarSizeInBits()
2148                             : Ty->getPrimitiveSizeInBits();
2149   }
2150 
2151   /// Cast between two shadow types, extending or truncating as
2152   /// necessary.
2153   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2154                           bool Signed = false) {
2155     Type *srcTy = V->getType();
2156     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2157     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2158     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2159       return IRB.CreateICmpNE(V, getCleanShadow(V));
2160 
2161     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2162       return IRB.CreateIntCast(V, dstTy, Signed);
2163     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2164         cast<FixedVectorType>(dstTy)->getNumElements() ==
2165             cast<FixedVectorType>(srcTy)->getNumElements())
2166       return IRB.CreateIntCast(V, dstTy, Signed);
2167     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2168     Value *V2 =
2169       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2170     return IRB.CreateBitCast(V2, dstTy);
2171     // TODO: handle struct types.
2172   }
2173 
2174   /// Cast an application value to the type of its own shadow.
2175   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2176     Type *ShadowTy = getShadowTy(V);
2177     if (V->getType() == ShadowTy)
2178       return V;
2179     if (V->getType()->isPtrOrPtrVectorTy())
2180       return IRB.CreatePtrToInt(V, ShadowTy);
2181     else
2182       return IRB.CreateBitCast(V, ShadowTy);
2183   }
2184 
2185   /// Propagate shadow for arbitrary operation.
2186   void handleShadowOr(Instruction &I) {
2187     IRBuilder<> IRB(&I);
2188     ShadowAndOriginCombiner SC(this, IRB);
2189     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2190       SC.Add(OI->get());
2191     SC.Done(&I);
2192   }
2193 
2194   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2195 
2196   // Handle multiplication by constant.
2197   //
2198   // Handle a special case of multiplication by constant that may have one or
2199   // more zeros in the lower bits. This makes corresponding number of lower bits
2200   // of the result zero as well. We model it by shifting the other operand
2201   // shadow left by the required number of bits. Effectively, we transform
2202   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2203   // We use multiplication by 2**N instead of shift to cover the case of
2204   // multiplication by 0, which may occur in some elements of a vector operand.
2205   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2206                            Value *OtherArg) {
2207     Constant *ShadowMul;
2208     Type *Ty = ConstArg->getType();
2209     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2210       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2211       Type *EltTy = VTy->getElementType();
2212       SmallVector<Constant *, 16> Elements;
2213       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2214         if (ConstantInt *Elt =
2215                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2216           const APInt &V = Elt->getValue();
2217           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2218           Elements.push_back(ConstantInt::get(EltTy, V2));
2219         } else {
2220           Elements.push_back(ConstantInt::get(EltTy, 1));
2221         }
2222       }
2223       ShadowMul = ConstantVector::get(Elements);
2224     } else {
2225       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2226         const APInt &V = Elt->getValue();
2227         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2228         ShadowMul = ConstantInt::get(Ty, V2);
2229       } else {
2230         ShadowMul = ConstantInt::get(Ty, 1);
2231       }
2232     }
2233 
2234     IRBuilder<> IRB(&I);
2235     setShadow(&I,
2236               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2237     setOrigin(&I, getOrigin(OtherArg));
2238   }
2239 
2240   void visitMul(BinaryOperator &I) {
2241     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2242     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2243     if (constOp0 && !constOp1)
2244       handleMulByConstant(I, constOp0, I.getOperand(1));
2245     else if (constOp1 && !constOp0)
2246       handleMulByConstant(I, constOp1, I.getOperand(0));
2247     else
2248       handleShadowOr(I);
2249   }
2250 
2251   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2252   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2253   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2254   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2255   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2256   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2257 
2258   void handleIntegerDiv(Instruction &I) {
2259     IRBuilder<> IRB(&I);
2260     // Strict on the second argument.
2261     insertShadowCheck(I.getOperand(1), &I);
2262     setShadow(&I, getShadow(&I, 0));
2263     setOrigin(&I, getOrigin(&I, 0));
2264   }
2265 
2266   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2267   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2268   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2269   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2270 
2271   // Floating point division is side-effect free. We can not require that the
2272   // divisor is fully initialized and must propagate shadow. See PR37523.
2273   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2274   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2275 
2276   /// Instrument == and != comparisons.
2277   ///
2278   /// Sometimes the comparison result is known even if some of the bits of the
2279   /// arguments are not.
2280   void handleEqualityComparison(ICmpInst &I) {
2281     IRBuilder<> IRB(&I);
2282     Value *A = I.getOperand(0);
2283     Value *B = I.getOperand(1);
2284     Value *Sa = getShadow(A);
2285     Value *Sb = getShadow(B);
2286 
2287     // Get rid of pointers and vectors of pointers.
2288     // For ints (and vectors of ints), types of A and Sa match,
2289     // and this is a no-op.
2290     A = IRB.CreatePointerCast(A, Sa->getType());
2291     B = IRB.CreatePointerCast(B, Sb->getType());
2292 
2293     // A == B  <==>  (C = A^B) == 0
2294     // A != B  <==>  (C = A^B) != 0
2295     // Sc = Sa | Sb
2296     Value *C = IRB.CreateXor(A, B);
2297     Value *Sc = IRB.CreateOr(Sa, Sb);
2298     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2299     // Result is defined if one of the following is true
2300     // * there is a defined 1 bit in C
2301     // * C is fully defined
2302     // Si = !(C & ~Sc) && Sc
2303     Value *Zero = Constant::getNullValue(Sc->getType());
2304     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2305     Value *Si =
2306       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
2307                     IRB.CreateICmpEQ(
2308                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
2309     Si->setName("_msprop_icmp");
2310     setShadow(&I, Si);
2311     setOriginForNaryOp(I);
2312   }
2313 
2314   /// Build the lowest possible value of V, taking into account V's
2315   ///        uninitialized bits.
2316   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2317                                 bool isSigned) {
2318     if (isSigned) {
2319       // Split shadow into sign bit and other bits.
2320       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2321       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2322       // Maximise the undefined shadow bit, minimize other undefined bits.
2323       return
2324         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
2325     } else {
2326       // Minimize undefined bits.
2327       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2328     }
2329   }
2330 
2331   /// Build the highest possible value of V, taking into account V's
2332   ///        uninitialized bits.
2333   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2334                                 bool isSigned) {
2335     if (isSigned) {
2336       // Split shadow into sign bit and other bits.
2337       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2338       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2339       // Minimise the undefined shadow bit, maximise other undefined bits.
2340       return
2341         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
2342     } else {
2343       // Maximize undefined bits.
2344       return IRB.CreateOr(A, Sa);
2345     }
2346   }
2347 
2348   /// Instrument relational comparisons.
2349   ///
2350   /// This function does exact shadow propagation for all relational
2351   /// comparisons of integers, pointers and vectors of those.
2352   /// FIXME: output seems suboptimal when one of the operands is a constant
2353   void handleRelationalComparisonExact(ICmpInst &I) {
2354     IRBuilder<> IRB(&I);
2355     Value *A = I.getOperand(0);
2356     Value *B = I.getOperand(1);
2357     Value *Sa = getShadow(A);
2358     Value *Sb = getShadow(B);
2359 
2360     // Get rid of pointers and vectors of pointers.
2361     // For ints (and vectors of ints), types of A and Sa match,
2362     // and this is a no-op.
2363     A = IRB.CreatePointerCast(A, Sa->getType());
2364     B = IRB.CreatePointerCast(B, Sb->getType());
2365 
2366     // Let [a0, a1] be the interval of possible values of A, taking into account
2367     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2368     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2369     bool IsSigned = I.isSigned();
2370     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2371                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2372                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2373     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2374                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2375                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2376     Value *Si = IRB.CreateXor(S1, S2);
2377     setShadow(&I, Si);
2378     setOriginForNaryOp(I);
2379   }
2380 
2381   /// Instrument signed relational comparisons.
2382   ///
2383   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2384   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2385   void handleSignedRelationalComparison(ICmpInst &I) {
2386     Constant *constOp;
2387     Value *op = nullptr;
2388     CmpInst::Predicate pre;
2389     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2390       op = I.getOperand(0);
2391       pre = I.getPredicate();
2392     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2393       op = I.getOperand(1);
2394       pre = I.getSwappedPredicate();
2395     } else {
2396       handleShadowOr(I);
2397       return;
2398     }
2399 
2400     if ((constOp->isNullValue() &&
2401          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2402         (constOp->isAllOnesValue() &&
2403          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2404       IRBuilder<> IRB(&I);
2405       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2406                                         "_msprop_icmp_s");
2407       setShadow(&I, Shadow);
2408       setOrigin(&I, getOrigin(op));
2409     } else {
2410       handleShadowOr(I);
2411     }
2412   }
2413 
2414   void visitICmpInst(ICmpInst &I) {
2415     if (!ClHandleICmp) {
2416       handleShadowOr(I);
2417       return;
2418     }
2419     if (I.isEquality()) {
2420       handleEqualityComparison(I);
2421       return;
2422     }
2423 
2424     assert(I.isRelational());
2425     if (ClHandleICmpExact) {
2426       handleRelationalComparisonExact(I);
2427       return;
2428     }
2429     if (I.isSigned()) {
2430       handleSignedRelationalComparison(I);
2431       return;
2432     }
2433 
2434     assert(I.isUnsigned());
2435     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2436       handleRelationalComparisonExact(I);
2437       return;
2438     }
2439 
2440     handleShadowOr(I);
2441   }
2442 
2443   void visitFCmpInst(FCmpInst &I) {
2444     handleShadowOr(I);
2445   }
2446 
2447   void handleShift(BinaryOperator &I) {
2448     IRBuilder<> IRB(&I);
2449     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2450     // Otherwise perform the same shift on S1.
2451     Value *S1 = getShadow(&I, 0);
2452     Value *S2 = getShadow(&I, 1);
2453     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
2454                                    S2->getType());
2455     Value *V2 = I.getOperand(1);
2456     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2457     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2458     setOriginForNaryOp(I);
2459   }
2460 
2461   void visitShl(BinaryOperator &I) { handleShift(I); }
2462   void visitAShr(BinaryOperator &I) { handleShift(I); }
2463   void visitLShr(BinaryOperator &I) { handleShift(I); }
2464 
2465   /// Instrument llvm.memmove
2466   ///
2467   /// At this point we don't know if llvm.memmove will be inlined or not.
2468   /// If we don't instrument it and it gets inlined,
2469   /// our interceptor will not kick in and we will lose the memmove.
2470   /// If we instrument the call here, but it does not get inlined,
2471   /// we will memove the shadow twice: which is bad in case
2472   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2473   ///
2474   /// Similar situation exists for memcpy and memset.
2475   void visitMemMoveInst(MemMoveInst &I) {
2476     IRBuilder<> IRB(&I);
2477     IRB.CreateCall(
2478         MS.MemmoveFn,
2479         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2480          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2481          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2482     I.eraseFromParent();
2483   }
2484 
2485   // Similar to memmove: avoid copying shadow twice.
2486   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2487   // FIXME: consider doing manual inline for small constant sizes and proper
2488   // alignment.
2489   void visitMemCpyInst(MemCpyInst &I) {
2490     IRBuilder<> IRB(&I);
2491     IRB.CreateCall(
2492         MS.MemcpyFn,
2493         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2494          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2495          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2496     I.eraseFromParent();
2497   }
2498 
2499   // Same as memcpy.
2500   void visitMemSetInst(MemSetInst &I) {
2501     IRBuilder<> IRB(&I);
2502     IRB.CreateCall(
2503         MS.MemsetFn,
2504         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2505          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2506          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2507     I.eraseFromParent();
2508   }
2509 
2510   void visitVAStartInst(VAStartInst &I) {
2511     VAHelper->visitVAStartInst(I);
2512   }
2513 
2514   void visitVACopyInst(VACopyInst &I) {
2515     VAHelper->visitVACopyInst(I);
2516   }
2517 
2518   /// Handle vector store-like intrinsics.
2519   ///
2520   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2521   /// has 1 pointer argument and 1 vector argument, returns void.
2522   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2523     IRBuilder<> IRB(&I);
2524     Value* Addr = I.getArgOperand(0);
2525     Value *Shadow = getShadow(&I, 1);
2526     Value *ShadowPtr, *OriginPtr;
2527 
2528     // We don't know the pointer alignment (could be unaligned SSE store!).
2529     // Have to assume to worst case.
2530     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2531         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2532     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2533 
2534     if (ClCheckAccessAddress)
2535       insertShadowCheck(Addr, &I);
2536 
2537     // FIXME: factor out common code from materializeStores
2538     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2539     return true;
2540   }
2541 
2542   /// Handle vector load-like intrinsics.
2543   ///
2544   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2545   /// has 1 pointer argument, returns a vector.
2546   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2547     IRBuilder<> IRB(&I);
2548     Value *Addr = I.getArgOperand(0);
2549 
2550     Type *ShadowTy = getShadowTy(&I);
2551     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2552     if (PropagateShadow) {
2553       // We don't know the pointer alignment (could be unaligned SSE load!).
2554       // Have to assume to worst case.
2555       const Align Alignment = Align(1);
2556       std::tie(ShadowPtr, OriginPtr) =
2557           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2558       setShadow(&I,
2559                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2560     } else {
2561       setShadow(&I, getCleanShadow(&I));
2562     }
2563 
2564     if (ClCheckAccessAddress)
2565       insertShadowCheck(Addr, &I);
2566 
2567     if (MS.TrackOrigins) {
2568       if (PropagateShadow)
2569         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2570       else
2571         setOrigin(&I, getCleanOrigin());
2572     }
2573     return true;
2574   }
2575 
2576   /// Handle (SIMD arithmetic)-like intrinsics.
2577   ///
2578   /// Instrument intrinsics with any number of arguments of the same type,
2579   /// equal to the return type. The type should be simple (no aggregates or
2580   /// pointers; vectors are fine).
2581   /// Caller guarantees that this intrinsic does not access memory.
2582   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2583     Type *RetTy = I.getType();
2584     if (!(RetTy->isIntOrIntVectorTy() ||
2585           RetTy->isFPOrFPVectorTy() ||
2586           RetTy->isX86_MMXTy()))
2587       return false;
2588 
2589     unsigned NumArgOperands = I.getNumArgOperands();
2590 
2591     for (unsigned i = 0; i < NumArgOperands; ++i) {
2592       Type *Ty = I.getArgOperand(i)->getType();
2593       if (Ty != RetTy)
2594         return false;
2595     }
2596 
2597     IRBuilder<> IRB(&I);
2598     ShadowAndOriginCombiner SC(this, IRB);
2599     for (unsigned i = 0; i < NumArgOperands; ++i)
2600       SC.Add(I.getArgOperand(i));
2601     SC.Done(&I);
2602 
2603     return true;
2604   }
2605 
2606   /// Heuristically instrument unknown intrinsics.
2607   ///
2608   /// The main purpose of this code is to do something reasonable with all
2609   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2610   /// We recognize several classes of intrinsics by their argument types and
2611   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2612   /// sure that we know what the intrinsic does.
2613   ///
2614   /// We special-case intrinsics where this approach fails. See llvm.bswap
2615   /// handling as an example of that.
2616   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2617     unsigned NumArgOperands = I.getNumArgOperands();
2618     if (NumArgOperands == 0)
2619       return false;
2620 
2621     if (NumArgOperands == 2 &&
2622         I.getArgOperand(0)->getType()->isPointerTy() &&
2623         I.getArgOperand(1)->getType()->isVectorTy() &&
2624         I.getType()->isVoidTy() &&
2625         !I.onlyReadsMemory()) {
2626       // This looks like a vector store.
2627       return handleVectorStoreIntrinsic(I);
2628     }
2629 
2630     if (NumArgOperands == 1 &&
2631         I.getArgOperand(0)->getType()->isPointerTy() &&
2632         I.getType()->isVectorTy() &&
2633         I.onlyReadsMemory()) {
2634       // This looks like a vector load.
2635       return handleVectorLoadIntrinsic(I);
2636     }
2637 
2638     if (I.doesNotAccessMemory())
2639       if (maybeHandleSimpleNomemIntrinsic(I))
2640         return true;
2641 
2642     // FIXME: detect and handle SSE maskstore/maskload
2643     return false;
2644   }
2645 
2646   void handleInvariantGroup(IntrinsicInst &I) {
2647     setShadow(&I, getShadow(&I, 0));
2648     setOrigin(&I, getOrigin(&I, 0));
2649   }
2650 
2651   void handleLifetimeStart(IntrinsicInst &I) {
2652     if (!PoisonStack)
2653       return;
2654     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
2655     if (!AI)
2656       InstrumentLifetimeStart = false;
2657     LifetimeStartList.push_back(std::make_pair(&I, AI));
2658   }
2659 
2660   void handleBswap(IntrinsicInst &I) {
2661     IRBuilder<> IRB(&I);
2662     Value *Op = I.getArgOperand(0);
2663     Type *OpType = Op->getType();
2664     Function *BswapFunc = Intrinsic::getDeclaration(
2665       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2666     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2667     setOrigin(&I, getOrigin(Op));
2668   }
2669 
2670   // Instrument vector convert intrinsic.
2671   //
2672   // This function instruments intrinsics like cvtsi2ss:
2673   // %Out = int_xxx_cvtyyy(%ConvertOp)
2674   // or
2675   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2676   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2677   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2678   // elements from \p CopyOp.
2679   // In most cases conversion involves floating-point value which may trigger a
2680   // hardware exception when not fully initialized. For this reason we require
2681   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2682   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2683   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2684   // return a fully initialized value.
2685   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
2686     IRBuilder<> IRB(&I);
2687     Value *CopyOp, *ConvertOp;
2688 
2689     switch (I.getNumArgOperands()) {
2690     case 3:
2691       assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
2692       LLVM_FALLTHROUGH;
2693     case 2:
2694       CopyOp = I.getArgOperand(0);
2695       ConvertOp = I.getArgOperand(1);
2696       break;
2697     case 1:
2698       ConvertOp = I.getArgOperand(0);
2699       CopyOp = nullptr;
2700       break;
2701     default:
2702       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2703     }
2704 
2705     // The first *NumUsedElements* elements of ConvertOp are converted to the
2706     // same number of output elements. The rest of the output is copied from
2707     // CopyOp, or (if not available) filled with zeroes.
2708     // Combine shadow for elements of ConvertOp that are used in this operation,
2709     // and insert a check.
2710     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2711     // int->any conversion.
2712     Value *ConvertShadow = getShadow(ConvertOp);
2713     Value *AggShadow = nullptr;
2714     if (ConvertOp->getType()->isVectorTy()) {
2715       AggShadow = IRB.CreateExtractElement(
2716           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2717       for (int i = 1; i < NumUsedElements; ++i) {
2718         Value *MoreShadow = IRB.CreateExtractElement(
2719             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2720         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2721       }
2722     } else {
2723       AggShadow = ConvertShadow;
2724     }
2725     assert(AggShadow->getType()->isIntegerTy());
2726     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2727 
2728     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2729     // ConvertOp.
2730     if (CopyOp) {
2731       assert(CopyOp->getType() == I.getType());
2732       assert(CopyOp->getType()->isVectorTy());
2733       Value *ResultShadow = getShadow(CopyOp);
2734       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
2735       for (int i = 0; i < NumUsedElements; ++i) {
2736         ResultShadow = IRB.CreateInsertElement(
2737             ResultShadow, ConstantInt::getNullValue(EltTy),
2738             ConstantInt::get(IRB.getInt32Ty(), i));
2739       }
2740       setShadow(&I, ResultShadow);
2741       setOrigin(&I, getOrigin(CopyOp));
2742     } else {
2743       setShadow(&I, getCleanShadow(&I));
2744       setOrigin(&I, getCleanOrigin());
2745     }
2746   }
2747 
2748   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2749   // zeroes if it is zero, and all ones otherwise.
2750   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2751     if (S->getType()->isVectorTy())
2752       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2753     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2754     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2755     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2756   }
2757 
2758   // Given a vector, extract its first element, and return all
2759   // zeroes if it is zero, and all ones otherwise.
2760   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2761     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2762     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2763     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2764   }
2765 
2766   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2767     Type *T = S->getType();
2768     assert(T->isVectorTy());
2769     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2770     return IRB.CreateSExt(S2, T);
2771   }
2772 
2773   // Instrument vector shift intrinsic.
2774   //
2775   // This function instruments intrinsics like int_x86_avx2_psll_w.
2776   // Intrinsic shifts %In by %ShiftSize bits.
2777   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2778   // size, and the rest is ignored. Behavior is defined even if shift size is
2779   // greater than register (or field) width.
2780   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2781     assert(I.getNumArgOperands() == 2);
2782     IRBuilder<> IRB(&I);
2783     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2784     // Otherwise perform the same shift on S1.
2785     Value *S1 = getShadow(&I, 0);
2786     Value *S2 = getShadow(&I, 1);
2787     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2788                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2789     Value *V1 = I.getOperand(0);
2790     Value *V2 = I.getOperand(1);
2791     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2792                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2793     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2794     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2795     setOriginForNaryOp(I);
2796   }
2797 
2798   // Get an X86_MMX-sized vector type.
2799   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2800     const unsigned X86_MMXSizeInBits = 64;
2801     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
2802            "Illegal MMX vector element size");
2803     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2804                                 X86_MMXSizeInBits / EltSizeInBits);
2805   }
2806 
2807   // Returns a signed counterpart for an (un)signed-saturate-and-pack
2808   // intrinsic.
2809   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2810     switch (id) {
2811       case Intrinsic::x86_sse2_packsswb_128:
2812       case Intrinsic::x86_sse2_packuswb_128:
2813         return Intrinsic::x86_sse2_packsswb_128;
2814 
2815       case Intrinsic::x86_sse2_packssdw_128:
2816       case Intrinsic::x86_sse41_packusdw:
2817         return Intrinsic::x86_sse2_packssdw_128;
2818 
2819       case Intrinsic::x86_avx2_packsswb:
2820       case Intrinsic::x86_avx2_packuswb:
2821         return Intrinsic::x86_avx2_packsswb;
2822 
2823       case Intrinsic::x86_avx2_packssdw:
2824       case Intrinsic::x86_avx2_packusdw:
2825         return Intrinsic::x86_avx2_packssdw;
2826 
2827       case Intrinsic::x86_mmx_packsswb:
2828       case Intrinsic::x86_mmx_packuswb:
2829         return Intrinsic::x86_mmx_packsswb;
2830 
2831       case Intrinsic::x86_mmx_packssdw:
2832         return Intrinsic::x86_mmx_packssdw;
2833       default:
2834         llvm_unreachable("unexpected intrinsic id");
2835     }
2836   }
2837 
2838   // Instrument vector pack intrinsic.
2839   //
2840   // This function instruments intrinsics like x86_mmx_packsswb, that
2841   // packs elements of 2 input vectors into half as many bits with saturation.
2842   // Shadow is propagated with the signed variant of the same intrinsic applied
2843   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2844   // EltSizeInBits is used only for x86mmx arguments.
2845   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2846     assert(I.getNumArgOperands() == 2);
2847     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2848     IRBuilder<> IRB(&I);
2849     Value *S1 = getShadow(&I, 0);
2850     Value *S2 = getShadow(&I, 1);
2851     assert(isX86_MMX || S1->getType()->isVectorTy());
2852 
2853     // SExt and ICmpNE below must apply to individual elements of input vectors.
2854     // In case of x86mmx arguments, cast them to appropriate vector types and
2855     // back.
2856     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2857     if (isX86_MMX) {
2858       S1 = IRB.CreateBitCast(S1, T);
2859       S2 = IRB.CreateBitCast(S2, T);
2860     }
2861     Value *S1_ext = IRB.CreateSExt(
2862         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2863     Value *S2_ext = IRB.CreateSExt(
2864         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2865     if (isX86_MMX) {
2866       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2867       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2868       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2869     }
2870 
2871     Function *ShadowFn = Intrinsic::getDeclaration(
2872         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2873 
2874     Value *S =
2875         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2876     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2877     setShadow(&I, S);
2878     setOriginForNaryOp(I);
2879   }
2880 
2881   // Instrument sum-of-absolute-differences intrinsic.
2882   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2883     const unsigned SignificantBitsPerResultElement = 16;
2884     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2885     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2886     unsigned ZeroBitsPerResultElement =
2887         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2888 
2889     IRBuilder<> IRB(&I);
2890     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2891     S = IRB.CreateBitCast(S, ResTy);
2892     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2893                        ResTy);
2894     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2895     S = IRB.CreateBitCast(S, getShadowTy(&I));
2896     setShadow(&I, S);
2897     setOriginForNaryOp(I);
2898   }
2899 
2900   // Instrument multiply-add intrinsic.
2901   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2902                                   unsigned EltSizeInBits = 0) {
2903     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2904     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2905     IRBuilder<> IRB(&I);
2906     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2907     S = IRB.CreateBitCast(S, ResTy);
2908     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2909                        ResTy);
2910     S = IRB.CreateBitCast(S, getShadowTy(&I));
2911     setShadow(&I, S);
2912     setOriginForNaryOp(I);
2913   }
2914 
2915   // Instrument compare-packed intrinsic.
2916   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2917   // all-ones shadow.
2918   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2919     IRBuilder<> IRB(&I);
2920     Type *ResTy = getShadowTy(&I);
2921     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2922     Value *S = IRB.CreateSExt(
2923         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2924     setShadow(&I, S);
2925     setOriginForNaryOp(I);
2926   }
2927 
2928   // Instrument compare-scalar intrinsic.
2929   // This handles both cmp* intrinsics which return the result in the first
2930   // element of a vector, and comi* which return the result as i32.
2931   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2932     IRBuilder<> IRB(&I);
2933     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2934     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
2935     setShadow(&I, S);
2936     setOriginForNaryOp(I);
2937   }
2938 
2939   // Instrument generic vector reduction intrinsics
2940   // by ORing together all their fields.
2941   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
2942     IRBuilder<> IRB(&I);
2943     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
2944     setShadow(&I, S);
2945     setOrigin(&I, getOrigin(&I, 0));
2946   }
2947 
2948   // Instrument experimental.vector.reduce.or intrinsic.
2949   // Valid (non-poisoned) set bits in the operand pull low the
2950   // corresponding shadow bits.
2951   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
2952     IRBuilder<> IRB(&I);
2953     Value *OperandShadow = getShadow(&I, 0);
2954     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
2955     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
2956     // Bit N is clean if any field's bit N is 1 and unpoison
2957     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
2958     // Otherwise, it is clean if every field's bit N is unpoison
2959     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
2960     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
2961 
2962     setShadow(&I, S);
2963     setOrigin(&I, getOrigin(&I, 0));
2964   }
2965 
2966   // Instrument experimental.vector.reduce.or intrinsic.
2967   // Valid (non-poisoned) unset bits in the operand pull down the
2968   // corresponding shadow bits.
2969   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
2970     IRBuilder<> IRB(&I);
2971     Value *OperandShadow = getShadow(&I, 0);
2972     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
2973     // Bit N is clean if any field's bit N is 0 and unpoison
2974     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
2975     // Otherwise, it is clean if every field's bit N is unpoison
2976     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
2977     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
2978 
2979     setShadow(&I, S);
2980     setOrigin(&I, getOrigin(&I, 0));
2981   }
2982 
2983   void handleStmxcsr(IntrinsicInst &I) {
2984     IRBuilder<> IRB(&I);
2985     Value* Addr = I.getArgOperand(0);
2986     Type *Ty = IRB.getInt32Ty();
2987     Value *ShadowPtr =
2988         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
2989 
2990     IRB.CreateStore(getCleanShadow(Ty),
2991                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
2992 
2993     if (ClCheckAccessAddress)
2994       insertShadowCheck(Addr, &I);
2995   }
2996 
2997   void handleLdmxcsr(IntrinsicInst &I) {
2998     if (!InsertChecks) return;
2999 
3000     IRBuilder<> IRB(&I);
3001     Value *Addr = I.getArgOperand(0);
3002     Type *Ty = IRB.getInt32Ty();
3003     const Align Alignment = Align(1);
3004     Value *ShadowPtr, *OriginPtr;
3005     std::tie(ShadowPtr, OriginPtr) =
3006         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3007 
3008     if (ClCheckAccessAddress)
3009       insertShadowCheck(Addr, &I);
3010 
3011     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3012     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3013                                     : getCleanOrigin();
3014     insertShadowCheck(Shadow, Origin, &I);
3015   }
3016 
3017   void handleMaskedStore(IntrinsicInst &I) {
3018     IRBuilder<> IRB(&I);
3019     Value *V = I.getArgOperand(0);
3020     Value *Addr = I.getArgOperand(1);
3021     const Align Alignment(
3022         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3023     Value *Mask = I.getArgOperand(3);
3024     Value *Shadow = getShadow(V);
3025 
3026     Value *ShadowPtr;
3027     Value *OriginPtr;
3028     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3029         Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3030 
3031     if (ClCheckAccessAddress) {
3032       insertShadowCheck(Addr, &I);
3033       // Uninitialized mask is kind of like uninitialized address, but not as
3034       // scary.
3035       insertShadowCheck(Mask, &I);
3036     }
3037 
3038     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3039 
3040     if (MS.TrackOrigins) {
3041       auto &DL = F.getParent()->getDataLayout();
3042       paintOrigin(IRB, getOrigin(V), OriginPtr,
3043                   DL.getTypeStoreSize(Shadow->getType()),
3044                   std::max(Alignment, kMinOriginAlignment));
3045     }
3046   }
3047 
3048   bool handleMaskedLoad(IntrinsicInst &I) {
3049     IRBuilder<> IRB(&I);
3050     Value *Addr = I.getArgOperand(0);
3051     const Align Alignment(
3052         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3053     Value *Mask = I.getArgOperand(2);
3054     Value *PassThru = I.getArgOperand(3);
3055 
3056     Type *ShadowTy = getShadowTy(&I);
3057     Value *ShadowPtr, *OriginPtr;
3058     if (PropagateShadow) {
3059       std::tie(ShadowPtr, OriginPtr) =
3060           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3061       setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Alignment, Mask,
3062                                          getShadow(PassThru), "_msmaskedld"));
3063     } else {
3064       setShadow(&I, getCleanShadow(&I));
3065     }
3066 
3067     if (ClCheckAccessAddress) {
3068       insertShadowCheck(Addr, &I);
3069       insertShadowCheck(Mask, &I);
3070     }
3071 
3072     if (MS.TrackOrigins) {
3073       if (PropagateShadow) {
3074         // Choose between PassThru's and the loaded value's origins.
3075         Value *MaskedPassThruShadow = IRB.CreateAnd(
3076             getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3077 
3078         Value *Acc = IRB.CreateExtractElement(
3079             MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3080         for (int i = 1, N = cast<FixedVectorType>(PassThru->getType())
3081                                 ->getNumElements();
3082              i < N; ++i) {
3083           Value *More = IRB.CreateExtractElement(
3084               MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3085           Acc = IRB.CreateOr(Acc, More);
3086         }
3087 
3088         Value *Origin = IRB.CreateSelect(
3089             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
3090             getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
3091 
3092         setOrigin(&I, Origin);
3093       } else {
3094         setOrigin(&I, getCleanOrigin());
3095       }
3096     }
3097     return true;
3098   }
3099 
3100   // Instrument BMI / BMI2 intrinsics.
3101   // All of these intrinsics are Z = I(X, Y)
3102   // where the types of all operands and the result match, and are either i32 or i64.
3103   // The following instrumentation happens to work for all of them:
3104   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3105   void handleBmiIntrinsic(IntrinsicInst &I) {
3106     IRBuilder<> IRB(&I);
3107     Type *ShadowTy = getShadowTy(&I);
3108 
3109     // If any bit of the mask operand is poisoned, then the whole thing is.
3110     Value *SMask = getShadow(&I, 1);
3111     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3112                            ShadowTy);
3113     // Apply the same intrinsic to the shadow of the first operand.
3114     Value *S = IRB.CreateCall(I.getCalledFunction(),
3115                               {getShadow(&I, 0), I.getOperand(1)});
3116     S = IRB.CreateOr(SMask, S);
3117     setShadow(&I, S);
3118     setOriginForNaryOp(I);
3119   }
3120 
3121   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3122     SmallVector<int, 8> Mask;
3123     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3124       Mask.append(2, X);
3125     }
3126     return Mask;
3127   }
3128 
3129   // Instrument pclmul intrinsics.
3130   // These intrinsics operate either on odd or on even elements of the input
3131   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3132   // Replace the unused elements with copies of the used ones, ex:
3133   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3134   // or
3135   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3136   // and then apply the usual shadow combining logic.
3137   void handlePclmulIntrinsic(IntrinsicInst &I) {
3138     IRBuilder<> IRB(&I);
3139     Type *ShadowTy = getShadowTy(&I);
3140     unsigned Width =
3141         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3142     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3143            "pclmul 3rd operand must be a constant");
3144     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3145     Value *Shuf0 =
3146         IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
3147                                 getPclmulMask(Width, Imm & 0x01));
3148     Value *Shuf1 =
3149         IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
3150                                 getPclmulMask(Width, Imm & 0x10));
3151     ShadowAndOriginCombiner SOC(this, IRB);
3152     SOC.Add(Shuf0, getOrigin(&I, 0));
3153     SOC.Add(Shuf1, getOrigin(&I, 1));
3154     SOC.Done(&I);
3155   }
3156 
3157   // Instrument _mm_*_sd intrinsics
3158   void handleUnarySdIntrinsic(IntrinsicInst &I) {
3159     IRBuilder<> IRB(&I);
3160     Value *First = getShadow(&I, 0);
3161     Value *Second = getShadow(&I, 1);
3162     // High word of first operand, low word of second
3163     Value *Shadow =
3164         IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
3165 
3166     setShadow(&I, Shadow);
3167     setOriginForNaryOp(I);
3168   }
3169 
3170   void handleBinarySdIntrinsic(IntrinsicInst &I) {
3171     IRBuilder<> IRB(&I);
3172     Value *First = getShadow(&I, 0);
3173     Value *Second = getShadow(&I, 1);
3174     Value *OrShadow = IRB.CreateOr(First, Second);
3175     // High word of first operand, low word of both OR'd together
3176     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
3177                                             llvm::makeArrayRef<int>({2, 1}));
3178 
3179     setShadow(&I, Shadow);
3180     setOriginForNaryOp(I);
3181   }
3182 
3183   void visitIntrinsicInst(IntrinsicInst &I) {
3184     switch (I.getIntrinsicID()) {
3185     case Intrinsic::lifetime_start:
3186       handleLifetimeStart(I);
3187       break;
3188     case Intrinsic::launder_invariant_group:
3189     case Intrinsic::strip_invariant_group:
3190       handleInvariantGroup(I);
3191       break;
3192     case Intrinsic::bswap:
3193       handleBswap(I);
3194       break;
3195     case Intrinsic::masked_store:
3196       handleMaskedStore(I);
3197       break;
3198     case Intrinsic::masked_load:
3199       handleMaskedLoad(I);
3200       break;
3201     case Intrinsic::experimental_vector_reduce_and:
3202       handleVectorReduceAndIntrinsic(I);
3203       break;
3204     case Intrinsic::experimental_vector_reduce_or:
3205       handleVectorReduceOrIntrinsic(I);
3206       break;
3207     case Intrinsic::experimental_vector_reduce_add:
3208     case Intrinsic::experimental_vector_reduce_xor:
3209     case Intrinsic::experimental_vector_reduce_mul:
3210       handleVectorReduceIntrinsic(I);
3211       break;
3212     case Intrinsic::x86_sse_stmxcsr:
3213       handleStmxcsr(I);
3214       break;
3215     case Intrinsic::x86_sse_ldmxcsr:
3216       handleLdmxcsr(I);
3217       break;
3218     case Intrinsic::x86_avx512_vcvtsd2usi64:
3219     case Intrinsic::x86_avx512_vcvtsd2usi32:
3220     case Intrinsic::x86_avx512_vcvtss2usi64:
3221     case Intrinsic::x86_avx512_vcvtss2usi32:
3222     case Intrinsic::x86_avx512_cvttss2usi64:
3223     case Intrinsic::x86_avx512_cvttss2usi:
3224     case Intrinsic::x86_avx512_cvttsd2usi64:
3225     case Intrinsic::x86_avx512_cvttsd2usi:
3226     case Intrinsic::x86_avx512_cvtusi2ss:
3227     case Intrinsic::x86_avx512_cvtusi642sd:
3228     case Intrinsic::x86_avx512_cvtusi642ss:
3229     case Intrinsic::x86_sse2_cvtsd2si64:
3230     case Intrinsic::x86_sse2_cvtsd2si:
3231     case Intrinsic::x86_sse2_cvtsd2ss:
3232     case Intrinsic::x86_sse2_cvttsd2si64:
3233     case Intrinsic::x86_sse2_cvttsd2si:
3234     case Intrinsic::x86_sse_cvtss2si64:
3235     case Intrinsic::x86_sse_cvtss2si:
3236     case Intrinsic::x86_sse_cvttss2si64:
3237     case Intrinsic::x86_sse_cvttss2si:
3238       handleVectorConvertIntrinsic(I, 1);
3239       break;
3240     case Intrinsic::x86_sse_cvtps2pi:
3241     case Intrinsic::x86_sse_cvttps2pi:
3242       handleVectorConvertIntrinsic(I, 2);
3243       break;
3244 
3245     case Intrinsic::x86_avx512_psll_w_512:
3246     case Intrinsic::x86_avx512_psll_d_512:
3247     case Intrinsic::x86_avx512_psll_q_512:
3248     case Intrinsic::x86_avx512_pslli_w_512:
3249     case Intrinsic::x86_avx512_pslli_d_512:
3250     case Intrinsic::x86_avx512_pslli_q_512:
3251     case Intrinsic::x86_avx512_psrl_w_512:
3252     case Intrinsic::x86_avx512_psrl_d_512:
3253     case Intrinsic::x86_avx512_psrl_q_512:
3254     case Intrinsic::x86_avx512_psra_w_512:
3255     case Intrinsic::x86_avx512_psra_d_512:
3256     case Intrinsic::x86_avx512_psra_q_512:
3257     case Intrinsic::x86_avx512_psrli_w_512:
3258     case Intrinsic::x86_avx512_psrli_d_512:
3259     case Intrinsic::x86_avx512_psrli_q_512:
3260     case Intrinsic::x86_avx512_psrai_w_512:
3261     case Intrinsic::x86_avx512_psrai_d_512:
3262     case Intrinsic::x86_avx512_psrai_q_512:
3263     case Intrinsic::x86_avx512_psra_q_256:
3264     case Intrinsic::x86_avx512_psra_q_128:
3265     case Intrinsic::x86_avx512_psrai_q_256:
3266     case Intrinsic::x86_avx512_psrai_q_128:
3267     case Intrinsic::x86_avx2_psll_w:
3268     case Intrinsic::x86_avx2_psll_d:
3269     case Intrinsic::x86_avx2_psll_q:
3270     case Intrinsic::x86_avx2_pslli_w:
3271     case Intrinsic::x86_avx2_pslli_d:
3272     case Intrinsic::x86_avx2_pslli_q:
3273     case Intrinsic::x86_avx2_psrl_w:
3274     case Intrinsic::x86_avx2_psrl_d:
3275     case Intrinsic::x86_avx2_psrl_q:
3276     case Intrinsic::x86_avx2_psra_w:
3277     case Intrinsic::x86_avx2_psra_d:
3278     case Intrinsic::x86_avx2_psrli_w:
3279     case Intrinsic::x86_avx2_psrli_d:
3280     case Intrinsic::x86_avx2_psrli_q:
3281     case Intrinsic::x86_avx2_psrai_w:
3282     case Intrinsic::x86_avx2_psrai_d:
3283     case Intrinsic::x86_sse2_psll_w:
3284     case Intrinsic::x86_sse2_psll_d:
3285     case Intrinsic::x86_sse2_psll_q:
3286     case Intrinsic::x86_sse2_pslli_w:
3287     case Intrinsic::x86_sse2_pslli_d:
3288     case Intrinsic::x86_sse2_pslli_q:
3289     case Intrinsic::x86_sse2_psrl_w:
3290     case Intrinsic::x86_sse2_psrl_d:
3291     case Intrinsic::x86_sse2_psrl_q:
3292     case Intrinsic::x86_sse2_psra_w:
3293     case Intrinsic::x86_sse2_psra_d:
3294     case Intrinsic::x86_sse2_psrli_w:
3295     case Intrinsic::x86_sse2_psrli_d:
3296     case Intrinsic::x86_sse2_psrli_q:
3297     case Intrinsic::x86_sse2_psrai_w:
3298     case Intrinsic::x86_sse2_psrai_d:
3299     case Intrinsic::x86_mmx_psll_w:
3300     case Intrinsic::x86_mmx_psll_d:
3301     case Intrinsic::x86_mmx_psll_q:
3302     case Intrinsic::x86_mmx_pslli_w:
3303     case Intrinsic::x86_mmx_pslli_d:
3304     case Intrinsic::x86_mmx_pslli_q:
3305     case Intrinsic::x86_mmx_psrl_w:
3306     case Intrinsic::x86_mmx_psrl_d:
3307     case Intrinsic::x86_mmx_psrl_q:
3308     case Intrinsic::x86_mmx_psra_w:
3309     case Intrinsic::x86_mmx_psra_d:
3310     case Intrinsic::x86_mmx_psrli_w:
3311     case Intrinsic::x86_mmx_psrli_d:
3312     case Intrinsic::x86_mmx_psrli_q:
3313     case Intrinsic::x86_mmx_psrai_w:
3314     case Intrinsic::x86_mmx_psrai_d:
3315       handleVectorShiftIntrinsic(I, /* Variable */ false);
3316       break;
3317     case Intrinsic::x86_avx2_psllv_d:
3318     case Intrinsic::x86_avx2_psllv_d_256:
3319     case Intrinsic::x86_avx512_psllv_d_512:
3320     case Intrinsic::x86_avx2_psllv_q:
3321     case Intrinsic::x86_avx2_psllv_q_256:
3322     case Intrinsic::x86_avx512_psllv_q_512:
3323     case Intrinsic::x86_avx2_psrlv_d:
3324     case Intrinsic::x86_avx2_psrlv_d_256:
3325     case Intrinsic::x86_avx512_psrlv_d_512:
3326     case Intrinsic::x86_avx2_psrlv_q:
3327     case Intrinsic::x86_avx2_psrlv_q_256:
3328     case Intrinsic::x86_avx512_psrlv_q_512:
3329     case Intrinsic::x86_avx2_psrav_d:
3330     case Intrinsic::x86_avx2_psrav_d_256:
3331     case Intrinsic::x86_avx512_psrav_d_512:
3332     case Intrinsic::x86_avx512_psrav_q_128:
3333     case Intrinsic::x86_avx512_psrav_q_256:
3334     case Intrinsic::x86_avx512_psrav_q_512:
3335       handleVectorShiftIntrinsic(I, /* Variable */ true);
3336       break;
3337 
3338     case Intrinsic::x86_sse2_packsswb_128:
3339     case Intrinsic::x86_sse2_packssdw_128:
3340     case Intrinsic::x86_sse2_packuswb_128:
3341     case Intrinsic::x86_sse41_packusdw:
3342     case Intrinsic::x86_avx2_packsswb:
3343     case Intrinsic::x86_avx2_packssdw:
3344     case Intrinsic::x86_avx2_packuswb:
3345     case Intrinsic::x86_avx2_packusdw:
3346       handleVectorPackIntrinsic(I);
3347       break;
3348 
3349     case Intrinsic::x86_mmx_packsswb:
3350     case Intrinsic::x86_mmx_packuswb:
3351       handleVectorPackIntrinsic(I, 16);
3352       break;
3353 
3354     case Intrinsic::x86_mmx_packssdw:
3355       handleVectorPackIntrinsic(I, 32);
3356       break;
3357 
3358     case Intrinsic::x86_mmx_psad_bw:
3359     case Intrinsic::x86_sse2_psad_bw:
3360     case Intrinsic::x86_avx2_psad_bw:
3361       handleVectorSadIntrinsic(I);
3362       break;
3363 
3364     case Intrinsic::x86_sse2_pmadd_wd:
3365     case Intrinsic::x86_avx2_pmadd_wd:
3366     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3367     case Intrinsic::x86_avx2_pmadd_ub_sw:
3368       handleVectorPmaddIntrinsic(I);
3369       break;
3370 
3371     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3372       handleVectorPmaddIntrinsic(I, 8);
3373       break;
3374 
3375     case Intrinsic::x86_mmx_pmadd_wd:
3376       handleVectorPmaddIntrinsic(I, 16);
3377       break;
3378 
3379     case Intrinsic::x86_sse_cmp_ss:
3380     case Intrinsic::x86_sse2_cmp_sd:
3381     case Intrinsic::x86_sse_comieq_ss:
3382     case Intrinsic::x86_sse_comilt_ss:
3383     case Intrinsic::x86_sse_comile_ss:
3384     case Intrinsic::x86_sse_comigt_ss:
3385     case Intrinsic::x86_sse_comige_ss:
3386     case Intrinsic::x86_sse_comineq_ss:
3387     case Intrinsic::x86_sse_ucomieq_ss:
3388     case Intrinsic::x86_sse_ucomilt_ss:
3389     case Intrinsic::x86_sse_ucomile_ss:
3390     case Intrinsic::x86_sse_ucomigt_ss:
3391     case Intrinsic::x86_sse_ucomige_ss:
3392     case Intrinsic::x86_sse_ucomineq_ss:
3393     case Intrinsic::x86_sse2_comieq_sd:
3394     case Intrinsic::x86_sse2_comilt_sd:
3395     case Intrinsic::x86_sse2_comile_sd:
3396     case Intrinsic::x86_sse2_comigt_sd:
3397     case Intrinsic::x86_sse2_comige_sd:
3398     case Intrinsic::x86_sse2_comineq_sd:
3399     case Intrinsic::x86_sse2_ucomieq_sd:
3400     case Intrinsic::x86_sse2_ucomilt_sd:
3401     case Intrinsic::x86_sse2_ucomile_sd:
3402     case Intrinsic::x86_sse2_ucomigt_sd:
3403     case Intrinsic::x86_sse2_ucomige_sd:
3404     case Intrinsic::x86_sse2_ucomineq_sd:
3405       handleVectorCompareScalarIntrinsic(I);
3406       break;
3407 
3408     case Intrinsic::x86_sse_cmp_ps:
3409     case Intrinsic::x86_sse2_cmp_pd:
3410       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
3411       // generates reasonably looking IR that fails in the backend with "Do not
3412       // know how to split the result of this operator!".
3413       handleVectorComparePackedIntrinsic(I);
3414       break;
3415 
3416     case Intrinsic::x86_bmi_bextr_32:
3417     case Intrinsic::x86_bmi_bextr_64:
3418     case Intrinsic::x86_bmi_bzhi_32:
3419     case Intrinsic::x86_bmi_bzhi_64:
3420     case Intrinsic::x86_bmi_pdep_32:
3421     case Intrinsic::x86_bmi_pdep_64:
3422     case Intrinsic::x86_bmi_pext_32:
3423     case Intrinsic::x86_bmi_pext_64:
3424       handleBmiIntrinsic(I);
3425       break;
3426 
3427     case Intrinsic::x86_pclmulqdq:
3428     case Intrinsic::x86_pclmulqdq_256:
3429     case Intrinsic::x86_pclmulqdq_512:
3430       handlePclmulIntrinsic(I);
3431       break;
3432 
3433     case Intrinsic::x86_sse41_round_sd:
3434       handleUnarySdIntrinsic(I);
3435       break;
3436     case Intrinsic::x86_sse2_max_sd:
3437     case Intrinsic::x86_sse2_min_sd:
3438       handleBinarySdIntrinsic(I);
3439       break;
3440 
3441     case Intrinsic::is_constant:
3442       // The result of llvm.is.constant() is always defined.
3443       setShadow(&I, getCleanShadow(&I));
3444       setOrigin(&I, getCleanOrigin());
3445       break;
3446 
3447     default:
3448       if (!handleUnknownIntrinsic(I))
3449         visitInstruction(I);
3450       break;
3451     }
3452   }
3453 
3454   void visitCallBase(CallBase &CB) {
3455     assert(!CB.getMetadata("nosanitize"));
3456     if (CB.isInlineAsm()) {
3457       // For inline asm (either a call to asm function, or callbr instruction),
3458       // do the usual thing: check argument shadow and mark all outputs as
3459       // clean. Note that any side effects of the inline asm that are not
3460       // immediately visible in its constraints are not handled.
3461       if (ClHandleAsmConservative && MS.CompileKernel)
3462         visitAsmInstruction(CB);
3463       else
3464         visitInstruction(CB);
3465       return;
3466     }
3467     if (auto *Call = dyn_cast<CallInst>(&CB)) {
3468       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
3469 
3470       // We are going to insert code that relies on the fact that the callee
3471       // will become a non-readonly function after it is instrumented by us. To
3472       // prevent this code from being optimized out, mark that function
3473       // non-readonly in advance.
3474       AttrBuilder B;
3475       B.addAttribute(Attribute::ReadOnly)
3476           .addAttribute(Attribute::ReadNone)
3477           .addAttribute(Attribute::WriteOnly)
3478           .addAttribute(Attribute::ArgMemOnly)
3479           .addAttribute(Attribute::Speculatable);
3480 
3481       Call->removeAttributes(AttributeList::FunctionIndex, B);
3482       if (Function *Func = Call->getCalledFunction()) {
3483         Func->removeAttributes(AttributeList::FunctionIndex, B);
3484       }
3485 
3486       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
3487     }
3488     IRBuilder<> IRB(&CB);
3489     bool MayCheckCall = ClEagerChecks;
3490     if (Function *Func = CB.getCalledFunction()) {
3491       // __sanitizer_unaligned_{load,store} functions may be called by users
3492       // and always expects shadows in the TLS. So don't check them.
3493       MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
3494     }
3495 
3496     unsigned ArgOffset = 0;
3497     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
3498     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
3499          ++ArgIt) {
3500       Value *A = *ArgIt;
3501       unsigned i = ArgIt - CB.arg_begin();
3502       if (!A->getType()->isSized()) {
3503         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
3504         continue;
3505       }
3506       unsigned Size = 0;
3507       Value *Store = nullptr;
3508       // Compute the Shadow for arg even if it is ByVal, because
3509       // in that case getShadow() will copy the actual arg shadow to
3510       // __msan_param_tls.
3511       Value *ArgShadow = getShadow(A);
3512       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
3513       LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
3514                         << " Shadow: " << *ArgShadow << "\n");
3515       bool ArgIsInitialized = false;
3516       const DataLayout &DL = F.getParent()->getDataLayout();
3517 
3518       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
3519       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
3520       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
3521 
3522       if (EagerCheck) {
3523         insertShadowCheck(A, &CB);
3524         continue;
3525       }
3526       if (ByVal) {
3527         // ByVal requires some special handling as it's too big for a single
3528         // load
3529         assert(A->getType()->isPointerTy() &&
3530                "ByVal argument is not a pointer!");
3531         Size = DL.getTypeAllocSize(CB.getParamByValType(i));
3532         if (ArgOffset + Size > kParamTLSSize) break;
3533         const MaybeAlign ParamAlignment(CB.getParamAlign(i));
3534         MaybeAlign Alignment = llvm::None;
3535         if (ParamAlignment)
3536           Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
3537         Value *AShadowPtr =
3538             getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
3539                                /*isStore*/ false)
3540                 .first;
3541 
3542         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
3543                                  Alignment, Size);
3544         // TODO(glider): need to copy origins.
3545       } else {
3546         // Any other parameters mean we need bit-grained tracking of uninit data
3547         Size = DL.getTypeAllocSize(A->getType());
3548         if (ArgOffset + Size > kParamTLSSize) break;
3549         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
3550                                        kShadowTLSAlignment);
3551         Constant *Cst = dyn_cast<Constant>(ArgShadow);
3552         if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
3553       }
3554       if (MS.TrackOrigins && !ArgIsInitialized)
3555         IRB.CreateStore(getOrigin(A),
3556                         getOriginPtrForArgument(A, IRB, ArgOffset));
3557       (void)Store;
3558       assert(Size != 0 && Store != nullptr);
3559       LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
3560       ArgOffset += alignTo(Size, 8);
3561     }
3562     LLVM_DEBUG(dbgs() << "  done with call args\n");
3563 
3564     FunctionType *FT = CB.getFunctionType();
3565     if (FT->isVarArg()) {
3566       VAHelper->visitCallBase(CB, IRB);
3567     }
3568 
3569     // Now, get the shadow for the RetVal.
3570     if (!CB.getType()->isSized())
3571       return;
3572     // Don't emit the epilogue for musttail call returns.
3573     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3574       return;
3575 
3576     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
3577       setShadow(&CB, getCleanShadow(&CB));
3578       setOrigin(&CB, getCleanOrigin());
3579       return;
3580     }
3581 
3582     IRBuilder<> IRBBefore(&CB);
3583     // Until we have full dynamic coverage, make sure the retval shadow is 0.
3584     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
3585     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
3586                                  kShadowTLSAlignment);
3587     BasicBlock::iterator NextInsn;
3588     if (isa<CallInst>(CB)) {
3589       NextInsn = ++CB.getIterator();
3590       assert(NextInsn != CB.getParent()->end());
3591     } else {
3592       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
3593       if (!NormalDest->getSinglePredecessor()) {
3594         // FIXME: this case is tricky, so we are just conservative here.
3595         // Perhaps we need to split the edge between this BB and NormalDest,
3596         // but a naive attempt to use SplitEdge leads to a crash.
3597         setShadow(&CB, getCleanShadow(&CB));
3598         setOrigin(&CB, getCleanOrigin());
3599         return;
3600       }
3601       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
3602       // Anything inserted there will be instrumented by MSan later!
3603       NextInsn = NormalDest->getFirstInsertionPt();
3604       assert(NextInsn != NormalDest->end() &&
3605              "Could not find insertion point for retval shadow load");
3606     }
3607     IRBuilder<> IRBAfter(&*NextInsn);
3608     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
3609         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
3610         kShadowTLSAlignment, "_msret");
3611     setShadow(&CB, RetvalShadow);
3612     if (MS.TrackOrigins)
3613       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
3614                                          getOriginPtrForRetval(IRBAfter)));
3615   }
3616 
3617   bool isAMustTailRetVal(Value *RetVal) {
3618     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
3619       RetVal = I->getOperand(0);
3620     }
3621     if (auto *I = dyn_cast<CallInst>(RetVal)) {
3622       return I->isMustTailCall();
3623     }
3624     return false;
3625   }
3626 
3627   void visitReturnInst(ReturnInst &I) {
3628     IRBuilder<> IRB(&I);
3629     Value *RetVal = I.getReturnValue();
3630     if (!RetVal) return;
3631     // Don't emit the epilogue for musttail call returns.
3632     if (isAMustTailRetVal(RetVal)) return;
3633     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
3634     bool HasNoUndef =
3635         F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
3636     bool StoreShadow = !(ClEagerChecks && HasNoUndef);
3637     // FIXME: Consider using SpecialCaseList to specify a list of functions that
3638     // must always return fully initialized values. For now, we hardcode "main".
3639     bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main");
3640 
3641     Value *Shadow = getShadow(RetVal);
3642     bool StoreOrigin = true;
3643     if (EagerCheck) {
3644       insertShadowCheck(RetVal, &I);
3645       Shadow = getCleanShadow(RetVal);
3646       StoreOrigin = false;
3647     }
3648 
3649     // The caller may still expect information passed over TLS if we pass our
3650     // check
3651     if (StoreShadow) {
3652       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3653       if (MS.TrackOrigins && StoreOrigin)
3654         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
3655     }
3656   }
3657 
3658   void visitPHINode(PHINode &I) {
3659     IRBuilder<> IRB(&I);
3660     if (!PropagateShadow) {
3661       setShadow(&I, getCleanShadow(&I));
3662       setOrigin(&I, getCleanOrigin());
3663       return;
3664     }
3665 
3666     ShadowPHINodes.push_back(&I);
3667     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
3668                                 "_msphi_s"));
3669     if (MS.TrackOrigins)
3670       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
3671                                   "_msphi_o"));
3672   }
3673 
3674   Value *getLocalVarDescription(AllocaInst &I) {
3675     SmallString<2048> StackDescriptionStorage;
3676     raw_svector_ostream StackDescription(StackDescriptionStorage);
3677     // We create a string with a description of the stack allocation and
3678     // pass it into __msan_set_alloca_origin.
3679     // It will be printed by the run-time if stack-originated UMR is found.
3680     // The first 4 bytes of the string are set to '----' and will be replaced
3681     // by __msan_va_arg_overflow_size_tls at the first call.
3682     StackDescription << "----" << I.getName() << "@" << F.getName();
3683     return createPrivateNonConstGlobalForString(*F.getParent(),
3684                                                 StackDescription.str());
3685   }
3686 
3687   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3688     if (PoisonStack && ClPoisonStackWithCall) {
3689       IRB.CreateCall(MS.MsanPoisonStackFn,
3690                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3691     } else {
3692       Value *ShadowBase, *OriginBase;
3693       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
3694           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
3695 
3696       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
3697       IRB.CreateMemSet(ShadowBase, PoisonValue, Len,
3698                        MaybeAlign(I.getAlignment()));
3699     }
3700 
3701     if (PoisonStack && MS.TrackOrigins) {
3702       Value *Descr = getLocalVarDescription(I);
3703       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
3704                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3705                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
3706                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
3707     }
3708   }
3709 
3710   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3711     Value *Descr = getLocalVarDescription(I);
3712     if (PoisonStack) {
3713       IRB.CreateCall(MS.MsanPoisonAllocaFn,
3714                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3715                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
3716     } else {
3717       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
3718                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3719     }
3720   }
3721 
3722   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
3723     if (!InsPoint)
3724       InsPoint = &I;
3725     IRBuilder<> IRB(InsPoint->getNextNode());
3726     const DataLayout &DL = F.getParent()->getDataLayout();
3727     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
3728     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
3729     if (I.isArrayAllocation())
3730       Len = IRB.CreateMul(Len, I.getArraySize());
3731 
3732     if (MS.CompileKernel)
3733       poisonAllocaKmsan(I, IRB, Len);
3734     else
3735       poisonAllocaUserspace(I, IRB, Len);
3736   }
3737 
3738   void visitAllocaInst(AllocaInst &I) {
3739     setShadow(&I, getCleanShadow(&I));
3740     setOrigin(&I, getCleanOrigin());
3741     // We'll get to this alloca later unless it's poisoned at the corresponding
3742     // llvm.lifetime.start.
3743     AllocaSet.insert(&I);
3744   }
3745 
3746   void visitSelectInst(SelectInst& I) {
3747     IRBuilder<> IRB(&I);
3748     // a = select b, c, d
3749     Value *B = I.getCondition();
3750     Value *C = I.getTrueValue();
3751     Value *D = I.getFalseValue();
3752     Value *Sb = getShadow(B);
3753     Value *Sc = getShadow(C);
3754     Value *Sd = getShadow(D);
3755 
3756     // Result shadow if condition shadow is 0.
3757     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
3758     Value *Sa1;
3759     if (I.getType()->isAggregateType()) {
3760       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
3761       // an extra "select". This results in much more compact IR.
3762       // Sa = select Sb, poisoned, (select b, Sc, Sd)
3763       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
3764     } else {
3765       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
3766       // If Sb (condition is poisoned), look for bits in c and d that are equal
3767       // and both unpoisoned.
3768       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
3769 
3770       // Cast arguments to shadow-compatible type.
3771       C = CreateAppToShadowCast(IRB, C);
3772       D = CreateAppToShadowCast(IRB, D);
3773 
3774       // Result shadow if condition shadow is 1.
3775       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
3776     }
3777     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
3778     setShadow(&I, Sa);
3779     if (MS.TrackOrigins) {
3780       // Origins are always i32, so any vector conditions must be flattened.
3781       // FIXME: consider tracking vector origins for app vectors?
3782       if (B->getType()->isVectorTy()) {
3783         Type *FlatTy = getShadowTyNoVec(B->getType());
3784         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
3785                                 ConstantInt::getNullValue(FlatTy));
3786         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
3787                                       ConstantInt::getNullValue(FlatTy));
3788       }
3789       // a = select b, c, d
3790       // Oa = Sb ? Ob : (b ? Oc : Od)
3791       setOrigin(
3792           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
3793                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
3794                                                 getOrigin(I.getFalseValue()))));
3795     }
3796   }
3797 
3798   void visitLandingPadInst(LandingPadInst &I) {
3799     // Do nothing.
3800     // See https://github.com/google/sanitizers/issues/504
3801     setShadow(&I, getCleanShadow(&I));
3802     setOrigin(&I, getCleanOrigin());
3803   }
3804 
3805   void visitCatchSwitchInst(CatchSwitchInst &I) {
3806     setShadow(&I, getCleanShadow(&I));
3807     setOrigin(&I, getCleanOrigin());
3808   }
3809 
3810   void visitFuncletPadInst(FuncletPadInst &I) {
3811     setShadow(&I, getCleanShadow(&I));
3812     setOrigin(&I, getCleanOrigin());
3813   }
3814 
3815   void visitGetElementPtrInst(GetElementPtrInst &I) {
3816     handleShadowOr(I);
3817   }
3818 
3819   void visitExtractValueInst(ExtractValueInst &I) {
3820     IRBuilder<> IRB(&I);
3821     Value *Agg = I.getAggregateOperand();
3822     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
3823     Value *AggShadow = getShadow(Agg);
3824     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3825     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
3826     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
3827     setShadow(&I, ResShadow);
3828     setOriginForNaryOp(I);
3829   }
3830 
3831   void visitInsertValueInst(InsertValueInst &I) {
3832     IRBuilder<> IRB(&I);
3833     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
3834     Value *AggShadow = getShadow(I.getAggregateOperand());
3835     Value *InsShadow = getShadow(I.getInsertedValueOperand());
3836     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3837     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
3838     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
3839     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
3840     setShadow(&I, Res);
3841     setOriginForNaryOp(I);
3842   }
3843 
3844   void dumpInst(Instruction &I) {
3845     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3846       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
3847     } else {
3848       errs() << "ZZZ " << I.getOpcodeName() << "\n";
3849     }
3850     errs() << "QQQ " << I << "\n";
3851   }
3852 
3853   void visitResumeInst(ResumeInst &I) {
3854     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
3855     // Nothing to do here.
3856   }
3857 
3858   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
3859     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
3860     // Nothing to do here.
3861   }
3862 
3863   void visitCatchReturnInst(CatchReturnInst &CRI) {
3864     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
3865     // Nothing to do here.
3866   }
3867 
3868   void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
3869                              const DataLayout &DL, bool isOutput) {
3870     // For each assembly argument, we check its value for being initialized.
3871     // If the argument is a pointer, we assume it points to a single element
3872     // of the corresponding type (or to a 8-byte word, if the type is unsized).
3873     // Each such pointer is instrumented with a call to the runtime library.
3874     Type *OpType = Operand->getType();
3875     // Check the operand value itself.
3876     insertShadowCheck(Operand, &I);
3877     if (!OpType->isPointerTy() || !isOutput) {
3878       assert(!isOutput);
3879       return;
3880     }
3881     Type *ElType = OpType->getPointerElementType();
3882     if (!ElType->isSized())
3883       return;
3884     int Size = DL.getTypeStoreSize(ElType);
3885     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
3886     Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
3887     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
3888   }
3889 
3890   /// Get the number of output arguments returned by pointers.
3891   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
3892     int NumRetOutputs = 0;
3893     int NumOutputs = 0;
3894     Type *RetTy = cast<Value>(CB)->getType();
3895     if (!RetTy->isVoidTy()) {
3896       // Register outputs are returned via the CallInst return value.
3897       auto *ST = dyn_cast<StructType>(RetTy);
3898       if (ST)
3899         NumRetOutputs = ST->getNumElements();
3900       else
3901         NumRetOutputs = 1;
3902     }
3903     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
3904     for (size_t i = 0, n = Constraints.size(); i < n; i++) {
3905       InlineAsm::ConstraintInfo Info = Constraints[i];
3906       switch (Info.Type) {
3907       case InlineAsm::isOutput:
3908         NumOutputs++;
3909         break;
3910       default:
3911         break;
3912       }
3913     }
3914     return NumOutputs - NumRetOutputs;
3915   }
3916 
3917   void visitAsmInstruction(Instruction &I) {
3918     // Conservative inline assembly handling: check for poisoned shadow of
3919     // asm() arguments, then unpoison the result and all the memory locations
3920     // pointed to by those arguments.
3921     // An inline asm() statement in C++ contains lists of input and output
3922     // arguments used by the assembly code. These are mapped to operands of the
3923     // CallInst as follows:
3924     //  - nR register outputs ("=r) are returned by value in a single structure
3925     //  (SSA value of the CallInst);
3926     //  - nO other outputs ("=m" and others) are returned by pointer as first
3927     // nO operands of the CallInst;
3928     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
3929     // remaining nI operands.
3930     // The total number of asm() arguments in the source is nR+nO+nI, and the
3931     // corresponding CallInst has nO+nI+1 operands (the last operand is the
3932     // function to be called).
3933     const DataLayout &DL = F.getParent()->getDataLayout();
3934     CallBase *CB = cast<CallBase>(&I);
3935     IRBuilder<> IRB(&I);
3936     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
3937     int OutputArgs = getNumOutputArgs(IA, CB);
3938     // The last operand of a CallInst is the function itself.
3939     int NumOperands = CB->getNumOperands() - 1;
3940 
3941     // Check input arguments. Doing so before unpoisoning output arguments, so
3942     // that we won't overwrite uninit values before checking them.
3943     for (int i = OutputArgs; i < NumOperands; i++) {
3944       Value *Operand = CB->getOperand(i);
3945       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
3946     }
3947     // Unpoison output arguments. This must happen before the actual InlineAsm
3948     // call, so that the shadow for memory published in the asm() statement
3949     // remains valid.
3950     for (int i = 0; i < OutputArgs; i++) {
3951       Value *Operand = CB->getOperand(i);
3952       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
3953     }
3954 
3955     setShadow(&I, getCleanShadow(&I));
3956     setOrigin(&I, getCleanOrigin());
3957   }
3958 
3959   void visitFreezeInst(FreezeInst &I) {
3960     // Freeze always returns a fully defined value.
3961     setShadow(&I, getCleanShadow(&I));
3962     setOrigin(&I, getCleanOrigin());
3963   }
3964 
3965   void visitInstruction(Instruction &I) {
3966     // Everything else: stop propagating and check for poisoned shadow.
3967     if (ClDumpStrictInstructions)
3968       dumpInst(I);
3969     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
3970     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
3971       Value *Operand = I.getOperand(i);
3972       if (Operand->getType()->isSized())
3973         insertShadowCheck(Operand, &I);
3974     }
3975     setShadow(&I, getCleanShadow(&I));
3976     setOrigin(&I, getCleanOrigin());
3977   }
3978 };
3979 
3980 /// AMD64-specific implementation of VarArgHelper.
3981 struct VarArgAMD64Helper : public VarArgHelper {
3982   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
3983   // See a comment in visitCallBase for more details.
3984   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
3985   static const unsigned AMD64FpEndOffsetSSE = 176;
3986   // If SSE is disabled, fp_offset in va_list is zero.
3987   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
3988 
3989   unsigned AMD64FpEndOffset;
3990   Function &F;
3991   MemorySanitizer &MS;
3992   MemorySanitizerVisitor &MSV;
3993   Value *VAArgTLSCopy = nullptr;
3994   Value *VAArgTLSOriginCopy = nullptr;
3995   Value *VAArgOverflowSize = nullptr;
3996 
3997   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3998 
3999   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4000 
4001   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
4002                     MemorySanitizerVisitor &MSV)
4003       : F(F), MS(MS), MSV(MSV) {
4004     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
4005     for (const auto &Attr : F.getAttributes().getFnAttributes()) {
4006       if (Attr.isStringAttribute() &&
4007           (Attr.getKindAsString() == "target-features")) {
4008         if (Attr.getValueAsString().contains("-sse"))
4009           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
4010         break;
4011       }
4012     }
4013   }
4014 
4015   ArgKind classifyArgument(Value* arg) {
4016     // A very rough approximation of X86_64 argument classification rules.
4017     Type *T = arg->getType();
4018     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
4019       return AK_FloatingPoint;
4020     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4021       return AK_GeneralPurpose;
4022     if (T->isPointerTy())
4023       return AK_GeneralPurpose;
4024     return AK_Memory;
4025   }
4026 
4027   // For VarArg functions, store the argument shadow in an ABI-specific format
4028   // that corresponds to va_list layout.
4029   // We do this because Clang lowers va_arg in the frontend, and this pass
4030   // only sees the low level code that deals with va_list internals.
4031   // A much easier alternative (provided that Clang emits va_arg instructions)
4032   // would have been to associate each live instance of va_list with a copy of
4033   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
4034   // order.
4035   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4036     unsigned GpOffset = 0;
4037     unsigned FpOffset = AMD64GpEndOffset;
4038     unsigned OverflowOffset = AMD64FpEndOffset;
4039     const DataLayout &DL = F.getParent()->getDataLayout();
4040     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4041          ++ArgIt) {
4042       Value *A = *ArgIt;
4043       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4044       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4045       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4046       if (IsByVal) {
4047         // ByVal arguments always go to the overflow area.
4048         // Fixed arguments passed through the overflow area will be stepped
4049         // over by va_start, so don't count them towards the offset.
4050         if (IsFixed)
4051           continue;
4052         assert(A->getType()->isPointerTy());
4053         Type *RealTy = CB.getParamByValType(ArgNo);
4054         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4055         Value *ShadowBase = getShadowPtrForVAArgument(
4056             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
4057         Value *OriginBase = nullptr;
4058         if (MS.TrackOrigins)
4059           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4060         OverflowOffset += alignTo(ArgSize, 8);
4061         if (!ShadowBase)
4062           continue;
4063         Value *ShadowPtr, *OriginPtr;
4064         std::tie(ShadowPtr, OriginPtr) =
4065             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4066                                    /*isStore*/ false);
4067 
4068         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4069                          kShadowTLSAlignment, ArgSize);
4070         if (MS.TrackOrigins)
4071           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4072                            kShadowTLSAlignment, ArgSize);
4073       } else {
4074         ArgKind AK = classifyArgument(A);
4075         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4076           AK = AK_Memory;
4077         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4078           AK = AK_Memory;
4079         Value *ShadowBase, *OriginBase = nullptr;
4080         switch (AK) {
4081           case AK_GeneralPurpose:
4082             ShadowBase =
4083                 getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4084             if (MS.TrackOrigins)
4085               OriginBase =
4086                   getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4087             GpOffset += 8;
4088             break;
4089           case AK_FloatingPoint:
4090             ShadowBase =
4091                 getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4092             if (MS.TrackOrigins)
4093               OriginBase =
4094                   getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4095             FpOffset += 16;
4096             break;
4097           case AK_Memory:
4098             if (IsFixed)
4099               continue;
4100             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4101             ShadowBase =
4102                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4103             if (MS.TrackOrigins)
4104               OriginBase =
4105                   getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4106             OverflowOffset += alignTo(ArgSize, 8);
4107         }
4108         // Take fixed arguments into account for GpOffset and FpOffset,
4109         // but don't actually store shadows for them.
4110         // TODO(glider): don't call get*PtrForVAArgument() for them.
4111         if (IsFixed)
4112           continue;
4113         if (!ShadowBase)
4114           continue;
4115         Value *Shadow = MSV.getShadow(A);
4116         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4117         if (MS.TrackOrigins) {
4118           Value *Origin = MSV.getOrigin(A);
4119           unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4120           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4121                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4122         }
4123       }
4124     }
4125     Constant *OverflowSize =
4126       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4127     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4128   }
4129 
4130   /// Compute the shadow address for a given va_arg.
4131   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4132                                    unsigned ArgOffset, unsigned ArgSize) {
4133     // Make sure we don't overflow __msan_va_arg_tls.
4134     if (ArgOffset + ArgSize > kParamTLSSize)
4135       return nullptr;
4136     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4137     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4138     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4139                               "_msarg_va_s");
4140   }
4141 
4142   /// Compute the origin address for a given va_arg.
4143   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4144     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4145     // getOriginPtrForVAArgument() is always called after
4146     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4147     // overflow.
4148     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4149     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4150                               "_msarg_va_o");
4151   }
4152 
4153   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4154     IRBuilder<> IRB(&I);
4155     Value *VAListTag = I.getArgOperand(0);
4156     Value *ShadowPtr, *OriginPtr;
4157     const Align Alignment = Align(8);
4158     std::tie(ShadowPtr, OriginPtr) =
4159         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4160                                /*isStore*/ true);
4161 
4162     // Unpoison the whole __va_list_tag.
4163     // FIXME: magic ABI constants.
4164     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4165                      /* size */ 24, Alignment, false);
4166     // We shouldn't need to zero out the origins, as they're only checked for
4167     // nonzero shadow.
4168   }
4169 
4170   void visitVAStartInst(VAStartInst &I) override {
4171     if (F.getCallingConv() == CallingConv::Win64)
4172       return;
4173     VAStartInstrumentationList.push_back(&I);
4174     unpoisonVAListTagForInst(I);
4175   }
4176 
4177   void visitVACopyInst(VACopyInst &I) override {
4178     if (F.getCallingConv() == CallingConv::Win64) return;
4179     unpoisonVAListTagForInst(I);
4180   }
4181 
4182   void finalizeInstrumentation() override {
4183     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4184            "finalizeInstrumentation called twice");
4185     if (!VAStartInstrumentationList.empty()) {
4186       // If there is a va_start in this function, make a backup copy of
4187       // va_arg_tls somewhere in the function entry block.
4188       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4189       VAArgOverflowSize =
4190           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4191       Value *CopySize =
4192         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
4193                       VAArgOverflowSize);
4194       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4195       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4196       if (MS.TrackOrigins) {
4197         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4198         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
4199                          Align(8), CopySize);
4200       }
4201     }
4202 
4203     // Instrument va_start.
4204     // Copy va_list shadow from the backup copy of the TLS contents.
4205     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4206       CallInst *OrigInst = VAStartInstrumentationList[i];
4207       IRBuilder<> IRB(OrigInst->getNextNode());
4208       Value *VAListTag = OrigInst->getArgOperand(0);
4209 
4210       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4211       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4212           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4213                         ConstantInt::get(MS.IntptrTy, 16)),
4214           PointerType::get(RegSaveAreaPtrTy, 0));
4215       Value *RegSaveAreaPtr =
4216           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4217       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4218       const Align Alignment = Align(16);
4219       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4220           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4221                                  Alignment, /*isStore*/ true);
4222       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4223                        AMD64FpEndOffset);
4224       if (MS.TrackOrigins)
4225         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4226                          Alignment, AMD64FpEndOffset);
4227       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4228       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4229           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4230                         ConstantInt::get(MS.IntptrTy, 8)),
4231           PointerType::get(OverflowArgAreaPtrTy, 0));
4232       Value *OverflowArgAreaPtr =
4233           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4234       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4235       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4236           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4237                                  Alignment, /*isStore*/ true);
4238       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4239                                              AMD64FpEndOffset);
4240       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4241                        VAArgOverflowSize);
4242       if (MS.TrackOrigins) {
4243         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4244                                         AMD64FpEndOffset);
4245         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4246                          VAArgOverflowSize);
4247       }
4248     }
4249   }
4250 };
4251 
4252 /// MIPS64-specific implementation of VarArgHelper.
4253 struct VarArgMIPS64Helper : public VarArgHelper {
4254   Function &F;
4255   MemorySanitizer &MS;
4256   MemorySanitizerVisitor &MSV;
4257   Value *VAArgTLSCopy = nullptr;
4258   Value *VAArgSize = nullptr;
4259 
4260   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4261 
4262   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4263                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4264 
4265   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4266     unsigned VAArgOffset = 0;
4267     const DataLayout &DL = F.getParent()->getDataLayout();
4268     for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
4269               End = CB.arg_end();
4270          ArgIt != End; ++ArgIt) {
4271       Triple TargetTriple(F.getParent()->getTargetTriple());
4272       Value *A = *ArgIt;
4273       Value *Base;
4274       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4275       if (TargetTriple.getArch() == Triple::mips64) {
4276         // Adjusting the shadow for argument with size < 8 to match the placement
4277         // of bits in big endian system
4278         if (ArgSize < 8)
4279           VAArgOffset += (8 - ArgSize);
4280       }
4281       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4282       VAArgOffset += ArgSize;
4283       VAArgOffset = alignTo(VAArgOffset, 8);
4284       if (!Base)
4285         continue;
4286       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4287     }
4288 
4289     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4290     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4291     // a new class member i.e. it is the total size of all VarArgs.
4292     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4293   }
4294 
4295   /// Compute the shadow address for a given va_arg.
4296   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4297                                    unsigned ArgOffset, unsigned ArgSize) {
4298     // Make sure we don't overflow __msan_va_arg_tls.
4299     if (ArgOffset + ArgSize > kParamTLSSize)
4300       return nullptr;
4301     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4302     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4303     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4304                               "_msarg");
4305   }
4306 
4307   void visitVAStartInst(VAStartInst &I) override {
4308     IRBuilder<> IRB(&I);
4309     VAStartInstrumentationList.push_back(&I);
4310     Value *VAListTag = I.getArgOperand(0);
4311     Value *ShadowPtr, *OriginPtr;
4312     const Align Alignment = Align(8);
4313     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4314         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4315     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4316                      /* size */ 8, Alignment, false);
4317   }
4318 
4319   void visitVACopyInst(VACopyInst &I) override {
4320     IRBuilder<> IRB(&I);
4321     VAStartInstrumentationList.push_back(&I);
4322     Value *VAListTag = I.getArgOperand(0);
4323     Value *ShadowPtr, *OriginPtr;
4324     const Align Alignment = Align(8);
4325     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4326         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4327     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4328                      /* size */ 8, Alignment, false);
4329   }
4330 
4331   void finalizeInstrumentation() override {
4332     assert(!VAArgSize && !VAArgTLSCopy &&
4333            "finalizeInstrumentation called twice");
4334     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4335     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4336     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4337                                     VAArgSize);
4338 
4339     if (!VAStartInstrumentationList.empty()) {
4340       // If there is a va_start in this function, make a backup copy of
4341       // va_arg_tls somewhere in the function entry block.
4342       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4343       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4344     }
4345 
4346     // Instrument va_start.
4347     // Copy va_list shadow from the backup copy of the TLS contents.
4348     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4349       CallInst *OrigInst = VAStartInstrumentationList[i];
4350       IRBuilder<> IRB(OrigInst->getNextNode());
4351       Value *VAListTag = OrigInst->getArgOperand(0);
4352       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4353       Value *RegSaveAreaPtrPtr =
4354           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4355                              PointerType::get(RegSaveAreaPtrTy, 0));
4356       Value *RegSaveAreaPtr =
4357           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4358       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4359       const Align Alignment = Align(8);
4360       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4361           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4362                                  Alignment, /*isStore*/ true);
4363       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4364                        CopySize);
4365     }
4366   }
4367 };
4368 
4369 /// AArch64-specific implementation of VarArgHelper.
4370 struct VarArgAArch64Helper : public VarArgHelper {
4371   static const unsigned kAArch64GrArgSize = 64;
4372   static const unsigned kAArch64VrArgSize = 128;
4373 
4374   static const unsigned AArch64GrBegOffset = 0;
4375   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
4376   // Make VR space aligned to 16 bytes.
4377   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
4378   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
4379                                              + kAArch64VrArgSize;
4380   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
4381 
4382   Function &F;
4383   MemorySanitizer &MS;
4384   MemorySanitizerVisitor &MSV;
4385   Value *VAArgTLSCopy = nullptr;
4386   Value *VAArgOverflowSize = nullptr;
4387 
4388   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4389 
4390   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4391 
4392   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
4393                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4394 
4395   ArgKind classifyArgument(Value* arg) {
4396     Type *T = arg->getType();
4397     if (T->isFPOrFPVectorTy())
4398       return AK_FloatingPoint;
4399     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4400         || (T->isPointerTy()))
4401       return AK_GeneralPurpose;
4402     return AK_Memory;
4403   }
4404 
4405   // The instrumentation stores the argument shadow in a non ABI-specific
4406   // format because it does not know which argument is named (since Clang,
4407   // like x86_64 case, lowers the va_args in the frontend and this pass only
4408   // sees the low level code that deals with va_list internals).
4409   // The first seven GR registers are saved in the first 56 bytes of the
4410   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
4411   // the remaining arguments.
4412   // Using constant offset within the va_arg TLS array allows fast copy
4413   // in the finalize instrumentation.
4414   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4415     unsigned GrOffset = AArch64GrBegOffset;
4416     unsigned VrOffset = AArch64VrBegOffset;
4417     unsigned OverflowOffset = AArch64VAEndOffset;
4418 
4419     const DataLayout &DL = F.getParent()->getDataLayout();
4420     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4421          ++ArgIt) {
4422       Value *A = *ArgIt;
4423       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4424       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4425       ArgKind AK = classifyArgument(A);
4426       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
4427         AK = AK_Memory;
4428       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
4429         AK = AK_Memory;
4430       Value *Base;
4431       switch (AK) {
4432         case AK_GeneralPurpose:
4433           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
4434           GrOffset += 8;
4435           break;
4436         case AK_FloatingPoint:
4437           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
4438           VrOffset += 16;
4439           break;
4440         case AK_Memory:
4441           // Don't count fixed arguments in the overflow area - va_start will
4442           // skip right over them.
4443           if (IsFixed)
4444             continue;
4445           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4446           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
4447                                            alignTo(ArgSize, 8));
4448           OverflowOffset += alignTo(ArgSize, 8);
4449           break;
4450       }
4451       // Count Gp/Vr fixed arguments to their respective offsets, but don't
4452       // bother to actually store a shadow.
4453       if (IsFixed)
4454         continue;
4455       if (!Base)
4456         continue;
4457       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4458     }
4459     Constant *OverflowSize =
4460       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
4461     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4462   }
4463 
4464   /// Compute the shadow address for a given va_arg.
4465   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4466                                    unsigned ArgOffset, unsigned ArgSize) {
4467     // Make sure we don't overflow __msan_va_arg_tls.
4468     if (ArgOffset + ArgSize > kParamTLSSize)
4469       return nullptr;
4470     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4471     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4472     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4473                               "_msarg");
4474   }
4475 
4476   void visitVAStartInst(VAStartInst &I) override {
4477     IRBuilder<> IRB(&I);
4478     VAStartInstrumentationList.push_back(&I);
4479     Value *VAListTag = I.getArgOperand(0);
4480     Value *ShadowPtr, *OriginPtr;
4481     const Align Alignment = Align(8);
4482     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4483         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4484     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4485                      /* size */ 32, Alignment, false);
4486   }
4487 
4488   void visitVACopyInst(VACopyInst &I) override {
4489     IRBuilder<> IRB(&I);
4490     VAStartInstrumentationList.push_back(&I);
4491     Value *VAListTag = I.getArgOperand(0);
4492     Value *ShadowPtr, *OriginPtr;
4493     const Align Alignment = Align(8);
4494     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4495         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4496     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4497                      /* size */ 32, Alignment, false);
4498   }
4499 
4500   // Retrieve a va_list field of 'void*' size.
4501   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4502     Value *SaveAreaPtrPtr =
4503       IRB.CreateIntToPtr(
4504         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4505                       ConstantInt::get(MS.IntptrTy, offset)),
4506         Type::getInt64PtrTy(*MS.C));
4507     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
4508   }
4509 
4510   // Retrieve a va_list field of 'int' size.
4511   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4512     Value *SaveAreaPtr =
4513       IRB.CreateIntToPtr(
4514         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4515                       ConstantInt::get(MS.IntptrTy, offset)),
4516         Type::getInt32PtrTy(*MS.C));
4517     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
4518     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
4519   }
4520 
4521   void finalizeInstrumentation() override {
4522     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4523            "finalizeInstrumentation called twice");
4524     if (!VAStartInstrumentationList.empty()) {
4525       // If there is a va_start in this function, make a backup copy of
4526       // va_arg_tls somewhere in the function entry block.
4527       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4528       VAArgOverflowSize =
4529           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4530       Value *CopySize =
4531         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
4532                       VAArgOverflowSize);
4533       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4534       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4535     }
4536 
4537     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
4538     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
4539 
4540     // Instrument va_start, copy va_list shadow from the backup copy of
4541     // the TLS contents.
4542     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4543       CallInst *OrigInst = VAStartInstrumentationList[i];
4544       IRBuilder<> IRB(OrigInst->getNextNode());
4545 
4546       Value *VAListTag = OrigInst->getArgOperand(0);
4547 
4548       // The variadic ABI for AArch64 creates two areas to save the incoming
4549       // argument registers (one for 64-bit general register xn-x7 and another
4550       // for 128-bit FP/SIMD vn-v7).
4551       // We need then to propagate the shadow arguments on both regions
4552       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
4553       // The remaining arguments are saved on shadow for 'va::stack'.
4554       // One caveat is it requires only to propagate the non-named arguments,
4555       // however on the call site instrumentation 'all' the arguments are
4556       // saved. So to copy the shadow values from the va_arg TLS array
4557       // we need to adjust the offset for both GR and VR fields based on
4558       // the __{gr,vr}_offs value (since they are stores based on incoming
4559       // named arguments).
4560 
4561       // Read the stack pointer from the va_list.
4562       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
4563 
4564       // Read both the __gr_top and __gr_off and add them up.
4565       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
4566       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
4567 
4568       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
4569 
4570       // Read both the __vr_top and __vr_off and add them up.
4571       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
4572       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
4573 
4574       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
4575 
4576       // It does not know how many named arguments is being used and, on the
4577       // callsite all the arguments were saved.  Since __gr_off is defined as
4578       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
4579       // argument by ignoring the bytes of shadow from named arguments.
4580       Value *GrRegSaveAreaShadowPtrOff =
4581         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
4582 
4583       Value *GrRegSaveAreaShadowPtr =
4584           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4585                                  Align(8), /*isStore*/ true)
4586               .first;
4587 
4588       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4589                                               GrRegSaveAreaShadowPtrOff);
4590       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
4591 
4592       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
4593                        GrCopySize);
4594 
4595       // Again, but for FP/SIMD values.
4596       Value *VrRegSaveAreaShadowPtrOff =
4597           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
4598 
4599       Value *VrRegSaveAreaShadowPtr =
4600           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4601                                  Align(8), /*isStore*/ true)
4602               .first;
4603 
4604       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
4605         IRB.getInt8Ty(),
4606         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4607                               IRB.getInt32(AArch64VrBegOffset)),
4608         VrRegSaveAreaShadowPtrOff);
4609       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
4610 
4611       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
4612                        VrCopySize);
4613 
4614       // And finally for remaining arguments.
4615       Value *StackSaveAreaShadowPtr =
4616           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
4617                                  Align(16), /*isStore*/ true)
4618               .first;
4619 
4620       Value *StackSrcPtr =
4621         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4622                               IRB.getInt32(AArch64VAEndOffset));
4623 
4624       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
4625                        Align(16), VAArgOverflowSize);
4626     }
4627   }
4628 };
4629 
4630 /// PowerPC64-specific implementation of VarArgHelper.
4631 struct VarArgPowerPC64Helper : public VarArgHelper {
4632   Function &F;
4633   MemorySanitizer &MS;
4634   MemorySanitizerVisitor &MSV;
4635   Value *VAArgTLSCopy = nullptr;
4636   Value *VAArgSize = nullptr;
4637 
4638   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4639 
4640   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
4641                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4642 
4643   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4644     // For PowerPC, we need to deal with alignment of stack arguments -
4645     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
4646     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
4647     // For that reason, we compute current offset from stack pointer (which is
4648     // always properly aligned), and offset for the first vararg, then subtract
4649     // them.
4650     unsigned VAArgBase;
4651     Triple TargetTriple(F.getParent()->getTargetTriple());
4652     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
4653     // and 32 bytes for ABIv2.  This is usually determined by target
4654     // endianness, but in theory could be overridden by function attribute.
4655     if (TargetTriple.getArch() == Triple::ppc64)
4656       VAArgBase = 48;
4657     else
4658       VAArgBase = 32;
4659     unsigned VAArgOffset = VAArgBase;
4660     const DataLayout &DL = F.getParent()->getDataLayout();
4661     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4662          ++ArgIt) {
4663       Value *A = *ArgIt;
4664       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4665       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4666       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4667       if (IsByVal) {
4668         assert(A->getType()->isPointerTy());
4669         Type *RealTy = CB.getParamByValType(ArgNo);
4670         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4671         MaybeAlign ArgAlign = CB.getParamAlign(ArgNo);
4672         if (!ArgAlign || *ArgAlign < Align(8))
4673           ArgAlign = Align(8);
4674         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4675         if (!IsFixed) {
4676           Value *Base = getShadowPtrForVAArgument(
4677               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
4678           if (Base) {
4679             Value *AShadowPtr, *AOriginPtr;
4680             std::tie(AShadowPtr, AOriginPtr) =
4681                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
4682                                        kShadowTLSAlignment, /*isStore*/ false);
4683 
4684             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
4685                              kShadowTLSAlignment, ArgSize);
4686           }
4687         }
4688         VAArgOffset += alignTo(ArgSize, 8);
4689       } else {
4690         Value *Base;
4691         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4692         uint64_t ArgAlign = 8;
4693         if (A->getType()->isArrayTy()) {
4694           // Arrays are aligned to element size, except for long double
4695           // arrays, which are aligned to 8 bytes.
4696           Type *ElementTy = A->getType()->getArrayElementType();
4697           if (!ElementTy->isPPC_FP128Ty())
4698             ArgAlign = DL.getTypeAllocSize(ElementTy);
4699         } else if (A->getType()->isVectorTy()) {
4700           // Vectors are naturally aligned.
4701           ArgAlign = DL.getTypeAllocSize(A->getType());
4702         }
4703         if (ArgAlign < 8)
4704           ArgAlign = 8;
4705         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4706         if (DL.isBigEndian()) {
4707           // Adjusting the shadow for argument with size < 8 to match the placement
4708           // of bits in big endian system
4709           if (ArgSize < 8)
4710             VAArgOffset += (8 - ArgSize);
4711         }
4712         if (!IsFixed) {
4713           Base = getShadowPtrForVAArgument(A->getType(), IRB,
4714                                            VAArgOffset - VAArgBase, ArgSize);
4715           if (Base)
4716             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4717         }
4718         VAArgOffset += ArgSize;
4719         VAArgOffset = alignTo(VAArgOffset, 8);
4720       }
4721       if (IsFixed)
4722         VAArgBase = VAArgOffset;
4723     }
4724 
4725     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
4726                                                 VAArgOffset - VAArgBase);
4727     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4728     // a new class member i.e. it is the total size of all VarArgs.
4729     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4730   }
4731 
4732   /// Compute the shadow address for a given va_arg.
4733   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4734                                    unsigned ArgOffset, unsigned ArgSize) {
4735     // Make sure we don't overflow __msan_va_arg_tls.
4736     if (ArgOffset + ArgSize > kParamTLSSize)
4737       return nullptr;
4738     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4739     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4740     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4741                               "_msarg");
4742   }
4743 
4744   void visitVAStartInst(VAStartInst &I) override {
4745     IRBuilder<> IRB(&I);
4746     VAStartInstrumentationList.push_back(&I);
4747     Value *VAListTag = I.getArgOperand(0);
4748     Value *ShadowPtr, *OriginPtr;
4749     const Align Alignment = Align(8);
4750     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4751         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4752     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4753                      /* size */ 8, Alignment, false);
4754   }
4755 
4756   void visitVACopyInst(VACopyInst &I) override {
4757     IRBuilder<> IRB(&I);
4758     Value *VAListTag = I.getArgOperand(0);
4759     Value *ShadowPtr, *OriginPtr;
4760     const Align Alignment = Align(8);
4761     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4762         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4763     // Unpoison the whole __va_list_tag.
4764     // FIXME: magic ABI constants.
4765     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4766                      /* size */ 8, Alignment, false);
4767   }
4768 
4769   void finalizeInstrumentation() override {
4770     assert(!VAArgSize && !VAArgTLSCopy &&
4771            "finalizeInstrumentation called twice");
4772     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4773     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4774     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4775                                     VAArgSize);
4776 
4777     if (!VAStartInstrumentationList.empty()) {
4778       // If there is a va_start in this function, make a backup copy of
4779       // va_arg_tls somewhere in the function entry block.
4780       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4781       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4782     }
4783 
4784     // Instrument va_start.
4785     // Copy va_list shadow from the backup copy of the TLS contents.
4786     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4787       CallInst *OrigInst = VAStartInstrumentationList[i];
4788       IRBuilder<> IRB(OrigInst->getNextNode());
4789       Value *VAListTag = OrigInst->getArgOperand(0);
4790       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4791       Value *RegSaveAreaPtrPtr =
4792           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4793                              PointerType::get(RegSaveAreaPtrTy, 0));
4794       Value *RegSaveAreaPtr =
4795           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4796       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4797       const Align Alignment = Align(8);
4798       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4799           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4800                                  Alignment, /*isStore*/ true);
4801       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4802                        CopySize);
4803     }
4804   }
4805 };
4806 
4807 /// SystemZ-specific implementation of VarArgHelper.
4808 struct VarArgSystemZHelper : public VarArgHelper {
4809   static const unsigned SystemZGpOffset = 16;
4810   static const unsigned SystemZGpEndOffset = 56;
4811   static const unsigned SystemZFpOffset = 128;
4812   static const unsigned SystemZFpEndOffset = 160;
4813   static const unsigned SystemZMaxVrArgs = 8;
4814   static const unsigned SystemZRegSaveAreaSize = 160;
4815   static const unsigned SystemZOverflowOffset = 160;
4816   static const unsigned SystemZVAListTagSize = 32;
4817   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
4818   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
4819 
4820   Function &F;
4821   MemorySanitizer &MS;
4822   MemorySanitizerVisitor &MSV;
4823   Value *VAArgTLSCopy = nullptr;
4824   Value *VAArgTLSOriginCopy = nullptr;
4825   Value *VAArgOverflowSize = nullptr;
4826 
4827   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4828 
4829   enum class ArgKind {
4830     GeneralPurpose,
4831     FloatingPoint,
4832     Vector,
4833     Memory,
4834     Indirect,
4835   };
4836 
4837   enum class ShadowExtension { None, Zero, Sign };
4838 
4839   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
4840                       MemorySanitizerVisitor &MSV)
4841       : F(F), MS(MS), MSV(MSV) {}
4842 
4843   ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
4844     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
4845     // only a few possibilities of what it can be. In particular, enums, single
4846     // element structs and large types have already been taken care of.
4847 
4848     // Some i128 and fp128 arguments are converted to pointers only in the
4849     // back end.
4850     if (T->isIntegerTy(128) || T->isFP128Ty())
4851       return ArgKind::Indirect;
4852     if (T->isFloatingPointTy())
4853       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
4854     if (T->isIntegerTy() || T->isPointerTy())
4855       return ArgKind::GeneralPurpose;
4856     if (T->isVectorTy())
4857       return ArgKind::Vector;
4858     return ArgKind::Memory;
4859   }
4860 
4861   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
4862     // ABI says: "One of the simple integer types no more than 64 bits wide.
4863     // ... If such an argument is shorter than 64 bits, replace it by a full
4864     // 64-bit integer representing the same number, using sign or zero
4865     // extension". Shadow for an integer argument has the same type as the
4866     // argument itself, so it can be sign or zero extended as well.
4867     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
4868     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
4869     if (ZExt) {
4870       assert(!SExt);
4871       return ShadowExtension::Zero;
4872     }
4873     if (SExt) {
4874       assert(!ZExt);
4875       return ShadowExtension::Sign;
4876     }
4877     return ShadowExtension::None;
4878   }
4879 
4880   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4881     bool IsSoftFloatABI = CB.getCalledFunction()
4882                               ->getFnAttribute("use-soft-float")
4883                               .getValueAsString() == "true";
4884     unsigned GpOffset = SystemZGpOffset;
4885     unsigned FpOffset = SystemZFpOffset;
4886     unsigned VrIndex = 0;
4887     unsigned OverflowOffset = SystemZOverflowOffset;
4888     const DataLayout &DL = F.getParent()->getDataLayout();
4889     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4890          ++ArgIt) {
4891       Value *A = *ArgIt;
4892       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4893       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4894       // SystemZABIInfo does not produce ByVal parameters.
4895       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
4896       Type *T = A->getType();
4897       ArgKind AK = classifyArgument(T, IsSoftFloatABI);
4898       if (AK == ArgKind::Indirect) {
4899         T = PointerType::get(T, 0);
4900         AK = ArgKind::GeneralPurpose;
4901       }
4902       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
4903         AK = ArgKind::Memory;
4904       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
4905         AK = ArgKind::Memory;
4906       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
4907         AK = ArgKind::Memory;
4908       Value *ShadowBase = nullptr;
4909       Value *OriginBase = nullptr;
4910       ShadowExtension SE = ShadowExtension::None;
4911       switch (AK) {
4912       case ArgKind::GeneralPurpose: {
4913         // Always keep track of GpOffset, but store shadow only for varargs.
4914         uint64_t ArgSize = 8;
4915         if (GpOffset + ArgSize <= kParamTLSSize) {
4916           if (!IsFixed) {
4917             SE = getShadowExtension(CB, ArgNo);
4918             uint64_t GapSize = 0;
4919             if (SE == ShadowExtension::None) {
4920               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
4921               assert(ArgAllocSize <= ArgSize);
4922               GapSize = ArgSize - ArgAllocSize;
4923             }
4924             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
4925             if (MS.TrackOrigins)
4926               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
4927           }
4928           GpOffset += ArgSize;
4929         } else {
4930           GpOffset = kParamTLSSize;
4931         }
4932         break;
4933       }
4934       case ArgKind::FloatingPoint: {
4935         // Always keep track of FpOffset, but store shadow only for varargs.
4936         uint64_t ArgSize = 8;
4937         if (FpOffset + ArgSize <= kParamTLSSize) {
4938           if (!IsFixed) {
4939             // PoP says: "A short floating-point datum requires only the
4940             // left-most 32 bit positions of a floating-point register".
4941             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
4942             // don't extend shadow and don't mind the gap.
4943             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
4944             if (MS.TrackOrigins)
4945               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
4946           }
4947           FpOffset += ArgSize;
4948         } else {
4949           FpOffset = kParamTLSSize;
4950         }
4951         break;
4952       }
4953       case ArgKind::Vector: {
4954         // Keep track of VrIndex. No need to store shadow, since vector varargs
4955         // go through AK_Memory.
4956         assert(IsFixed);
4957         VrIndex++;
4958         break;
4959       }
4960       case ArgKind::Memory: {
4961         // Keep track of OverflowOffset and store shadow only for varargs.
4962         // Ignore fixed args, since we need to copy only the vararg portion of
4963         // the overflow area shadow.
4964         if (!IsFixed) {
4965           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
4966           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
4967           if (OverflowOffset + ArgSize <= kParamTLSSize) {
4968             SE = getShadowExtension(CB, ArgNo);
4969             uint64_t GapSize =
4970                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
4971             ShadowBase =
4972                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
4973             if (MS.TrackOrigins)
4974               OriginBase =
4975                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
4976             OverflowOffset += ArgSize;
4977           } else {
4978             OverflowOffset = kParamTLSSize;
4979           }
4980         }
4981         break;
4982       }
4983       case ArgKind::Indirect:
4984         llvm_unreachable("Indirect must be converted to GeneralPurpose");
4985       }
4986       if (ShadowBase == nullptr)
4987         continue;
4988       Value *Shadow = MSV.getShadow(A);
4989       if (SE != ShadowExtension::None)
4990         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
4991                                       /*Signed*/ SE == ShadowExtension::Sign);
4992       ShadowBase = IRB.CreateIntToPtr(
4993           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
4994       IRB.CreateStore(Shadow, ShadowBase);
4995       if (MS.TrackOrigins) {
4996         Value *Origin = MSV.getOrigin(A);
4997         unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4998         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4999                         kMinOriginAlignment);
5000       }
5001     }
5002     Constant *OverflowSize = ConstantInt::get(
5003         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5004     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5005   }
5006 
5007   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5008     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5009     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5010   }
5011 
5012   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5013     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5014     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5015     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
5016                               "_msarg_va_o");
5017   }
5018 
5019   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5020     IRBuilder<> IRB(&I);
5021     Value *VAListTag = I.getArgOperand(0);
5022     Value *ShadowPtr, *OriginPtr;
5023     const Align Alignment = Align(8);
5024     std::tie(ShadowPtr, OriginPtr) =
5025         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
5026                                /*isStore*/ true);
5027     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5028                      SystemZVAListTagSize, Alignment, false);
5029   }
5030 
5031   void visitVAStartInst(VAStartInst &I) override {
5032     VAStartInstrumentationList.push_back(&I);
5033     unpoisonVAListTagForInst(I);
5034   }
5035 
5036   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
5037 
5038   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5039     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5040     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5041         IRB.CreateAdd(
5042             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5043             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5044         PointerType::get(RegSaveAreaPtrTy, 0));
5045     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5046     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5047     const Align Alignment = Align(8);
5048     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5049         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5050                                /*isStore*/ true);
5051     // TODO(iii): copy only fragments filled by visitCallBase()
5052     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5053                      SystemZRegSaveAreaSize);
5054     if (MS.TrackOrigins)
5055       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5056                        Alignment, SystemZRegSaveAreaSize);
5057   }
5058 
5059   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5060     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5061     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5062         IRB.CreateAdd(
5063             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5064             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5065         PointerType::get(OverflowArgAreaPtrTy, 0));
5066     Value *OverflowArgAreaPtr =
5067         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5068     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5069     const Align Alignment = Align(8);
5070     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5071         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5072                                Alignment, /*isStore*/ true);
5073     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5074                                            SystemZOverflowOffset);
5075     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5076                      VAArgOverflowSize);
5077     if (MS.TrackOrigins) {
5078       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5079                                       SystemZOverflowOffset);
5080       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5081                        VAArgOverflowSize);
5082     }
5083   }
5084 
5085   void finalizeInstrumentation() override {
5086     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5087            "finalizeInstrumentation called twice");
5088     if (!VAStartInstrumentationList.empty()) {
5089       // If there is a va_start in this function, make a backup copy of
5090       // va_arg_tls somewhere in the function entry block.
5091       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
5092       VAArgOverflowSize =
5093           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5094       Value *CopySize =
5095           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5096                         VAArgOverflowSize);
5097       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5098       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
5099       if (MS.TrackOrigins) {
5100         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5101         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
5102                          Align(8), CopySize);
5103       }
5104     }
5105 
5106     // Instrument va_start.
5107     // Copy va_list shadow from the backup copy of the TLS contents.
5108     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5109          VaStartNo < VaStartNum; VaStartNo++) {
5110       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5111       IRBuilder<> IRB(OrigInst->getNextNode());
5112       Value *VAListTag = OrigInst->getArgOperand(0);
5113       copyRegSaveArea(IRB, VAListTag);
5114       copyOverflowArea(IRB, VAListTag);
5115     }
5116   }
5117 };
5118 
5119 /// A no-op implementation of VarArgHelper.
5120 struct VarArgNoOpHelper : public VarArgHelper {
5121   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5122                    MemorySanitizerVisitor &MSV) {}
5123 
5124   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5125 
5126   void visitVAStartInst(VAStartInst &I) override {}
5127 
5128   void visitVACopyInst(VACopyInst &I) override {}
5129 
5130   void finalizeInstrumentation() override {}
5131 };
5132 
5133 } // end anonymous namespace
5134 
5135 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5136                                         MemorySanitizerVisitor &Visitor) {
5137   // VarArg handling is only implemented on AMD64. False positives are possible
5138   // on other platforms.
5139   Triple TargetTriple(Func.getParent()->getTargetTriple());
5140   if (TargetTriple.getArch() == Triple::x86_64)
5141     return new VarArgAMD64Helper(Func, Msan, Visitor);
5142   else if (TargetTriple.isMIPS64())
5143     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5144   else if (TargetTriple.getArch() == Triple::aarch64)
5145     return new VarArgAArch64Helper(Func, Msan, Visitor);
5146   else if (TargetTriple.getArch() == Triple::ppc64 ||
5147            TargetTriple.getArch() == Triple::ppc64le)
5148     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5149   else if (TargetTriple.getArch() == Triple::systemz)
5150     return new VarArgSystemZHelper(Func, Msan, Visitor);
5151   else
5152     return new VarArgNoOpHelper(Func, Msan, Visitor);
5153 }
5154 
5155 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5156   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5157     return false;
5158 
5159   MemorySanitizerVisitor Visitor(F, *this, TLI);
5160 
5161   // Clear out readonly/readnone attributes.
5162   AttrBuilder B;
5163   B.addAttribute(Attribute::ReadOnly)
5164       .addAttribute(Attribute::ReadNone)
5165       .addAttribute(Attribute::WriteOnly)
5166       .addAttribute(Attribute::ArgMemOnly)
5167       .addAttribute(Attribute::Speculatable);
5168   F.removeAttributes(AttributeList::FunctionIndex, B);
5169 
5170   return Visitor.runOnFunction();
5171 }
5172