1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///  - TLS variables are stored in a single per-task struct. A call to a
126 ///    function __msan_get_context_state() returning a pointer to that struct
127 ///    is inserted into every instrumented function before the entry block;
128 ///  - __msan_warning() takes a 32-bit origin parameter;
129 ///  - local variables are poisoned with __msan_poison_alloca() upon function
130 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
131 ///    function;
132 ///  - the pass doesn't declare any global variables or add global constructors
133 ///    to the translation unit.
134 ///
135 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
136 /// calls, making sure we're on the safe side wrt. possible false positives.
137 ///
138 ///  KernelMemorySanitizer only supports X86_64 at the moment.
139 ///
140 //
141 // FIXME: This sanitizer does not yet handle scalable vectors
142 //
143 //===----------------------------------------------------------------------===//
144 
145 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
146 #include "llvm/ADT/APInt.h"
147 #include "llvm/ADT/ArrayRef.h"
148 #include "llvm/ADT/DepthFirstIterator.h"
149 #include "llvm/ADT/SmallSet.h"
150 #include "llvm/ADT/SmallString.h"
151 #include "llvm/ADT/SmallVector.h"
152 #include "llvm/ADT/StringExtras.h"
153 #include "llvm/ADT/StringRef.h"
154 #include "llvm/ADT/Triple.h"
155 #include "llvm/Analysis/TargetLibraryInfo.h"
156 #include "llvm/Analysis/ValueTracking.h"
157 #include "llvm/IR/Argument.h"
158 #include "llvm/IR/Attributes.h"
159 #include "llvm/IR/BasicBlock.h"
160 #include "llvm/IR/CallingConv.h"
161 #include "llvm/IR/Constant.h"
162 #include "llvm/IR/Constants.h"
163 #include "llvm/IR/DataLayout.h"
164 #include "llvm/IR/DerivedTypes.h"
165 #include "llvm/IR/Function.h"
166 #include "llvm/IR/GlobalValue.h"
167 #include "llvm/IR/GlobalVariable.h"
168 #include "llvm/IR/IRBuilder.h"
169 #include "llvm/IR/InlineAsm.h"
170 #include "llvm/IR/InstVisitor.h"
171 #include "llvm/IR/InstrTypes.h"
172 #include "llvm/IR/Instruction.h"
173 #include "llvm/IR/Instructions.h"
174 #include "llvm/IR/IntrinsicInst.h"
175 #include "llvm/IR/Intrinsics.h"
176 #include "llvm/IR/IntrinsicsX86.h"
177 #include "llvm/IR/LLVMContext.h"
178 #include "llvm/IR/MDBuilder.h"
179 #include "llvm/IR/Module.h"
180 #include "llvm/IR/Type.h"
181 #include "llvm/IR/Value.h"
182 #include "llvm/IR/ValueMap.h"
183 #include "llvm/InitializePasses.h"
184 #include "llvm/Pass.h"
185 #include "llvm/Support/AtomicOrdering.h"
186 #include "llvm/Support/Casting.h"
187 #include "llvm/Support/CommandLine.h"
188 #include "llvm/Support/Compiler.h"
189 #include "llvm/Support/Debug.h"
190 #include "llvm/Support/ErrorHandling.h"
191 #include "llvm/Support/MathExtras.h"
192 #include "llvm/Support/raw_ostream.h"
193 #include "llvm/Transforms/Instrumentation.h"
194 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
195 #include "llvm/Transforms/Utils/Local.h"
196 #include "llvm/Transforms/Utils/ModuleUtils.h"
197 #include <algorithm>
198 #include <cassert>
199 #include <cstddef>
200 #include <cstdint>
201 #include <memory>
202 #include <string>
203 #include <tuple>
204 
205 using namespace llvm;
206 
207 #define DEBUG_TYPE "msan"
208 
209 static const unsigned kOriginSize = 4;
210 static const Align kMinOriginAlignment = Align(4);
211 static const Align kShadowTLSAlignment = Align(8);
212 
213 // These constants must be kept in sync with the ones in msan.h.
214 static const unsigned kParamTLSSize = 800;
215 static const unsigned kRetvalTLSSize = 800;
216 
217 // Accesses sizes are powers of two: 1, 2, 4, 8.
218 static const size_t kNumberOfAccessSizes = 4;
219 
220 /// Track origins of uninitialized values.
221 ///
222 /// Adds a section to MemorySanitizer report that points to the allocation
223 /// (stack or heap) the uninitialized bits came from originally.
224 static cl::opt<int> ClTrackOrigins("msan-track-origins",
225        cl::desc("Track origins (allocation sites) of poisoned memory"),
226        cl::Hidden, cl::init(0));
227 
228 static cl::opt<bool> ClKeepGoing("msan-keep-going",
229        cl::desc("keep going after reporting a UMR"),
230        cl::Hidden, cl::init(false));
231 
232 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
233        cl::desc("poison uninitialized stack variables"),
234        cl::Hidden, cl::init(true));
235 
236 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
237        cl::desc("poison uninitialized stack variables with a call"),
238        cl::Hidden, cl::init(false));
239 
240 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
241        cl::desc("poison uninitialized stack variables with the given pattern"),
242        cl::Hidden, cl::init(0xff));
243 
244 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
245        cl::desc("poison undef temps"),
246        cl::Hidden, cl::init(true));
247 
248 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
249        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
250        cl::Hidden, cl::init(true));
251 
252 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
253        cl::desc("exact handling of relational integer ICmp"),
254        cl::Hidden, cl::init(false));
255 
256 static cl::opt<bool> ClHandleLifetimeIntrinsics(
257     "msan-handle-lifetime-intrinsics",
258     cl::desc(
259         "when possible, poison scoped variables at the beginning of the scope "
260         "(slower, but more precise)"),
261     cl::Hidden, cl::init(true));
262 
263 // When compiling the Linux kernel, we sometimes see false positives related to
264 // MSan being unable to understand that inline assembly calls may initialize
265 // local variables.
266 // This flag makes the compiler conservatively unpoison every memory location
267 // passed into an assembly call. Note that this may cause false positives.
268 // Because it's impossible to figure out the array sizes, we can only unpoison
269 // the first sizeof(type) bytes for each type* pointer.
270 // The instrumentation is only enabled in KMSAN builds, and only if
271 // -msan-handle-asm-conservative is on. This is done because we may want to
272 // quickly disable assembly instrumentation when it breaks.
273 static cl::opt<bool> ClHandleAsmConservative(
274     "msan-handle-asm-conservative",
275     cl::desc("conservative handling of inline assembly"), cl::Hidden,
276     cl::init(true));
277 
278 // This flag controls whether we check the shadow of the address
279 // operand of load or store. Such bugs are very rare, since load from
280 // a garbage address typically results in SEGV, but still happen
281 // (e.g. only lower bits of address are garbage, or the access happens
282 // early at program startup where malloc-ed memory is more likely to
283 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
284 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
285        cl::desc("report accesses through a pointer which has poisoned shadow"),
286        cl::Hidden, cl::init(true));
287 
288 static cl::opt<bool> ClEagerChecks(
289     "msan-eager-checks",
290     cl::desc("check arguments and return values at function call boundaries"),
291     cl::Hidden, cl::init(false));
292 
293 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
294        cl::desc("print out instructions with default strict semantics"),
295        cl::Hidden, cl::init(false));
296 
297 static cl::opt<int> ClInstrumentationWithCallThreshold(
298     "msan-instrumentation-with-call-threshold",
299     cl::desc(
300         "If the function being instrumented requires more than "
301         "this number of checks and origin stores, use callbacks instead of "
302         "inline checks (-1 means never use callbacks)."),
303     cl::Hidden, cl::init(3500));
304 
305 static cl::opt<bool>
306     ClEnableKmsan("msan-kernel",
307                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
308                   cl::Hidden, cl::init(false));
309 
310 static cl::opt<bool>
311     ClDisableChecks("msan-disable-checks",
312                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
313                     cl::init(false));
314 
315 // This is an experiment to enable handling of cases where shadow is a non-zero
316 // compile-time constant. For some unexplainable reason they were silently
317 // ignored in the instrumentation.
318 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
319        cl::desc("Insert checks for constant shadow values"),
320        cl::Hidden, cl::init(false));
321 
322 // This is off by default because of a bug in gold:
323 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
324 static cl::opt<bool> ClWithComdat("msan-with-comdat",
325        cl::desc("Place MSan constructors in comdat sections"),
326        cl::Hidden, cl::init(false));
327 
328 // These options allow to specify custom memory map parameters
329 // See MemoryMapParams for details.
330 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
331                                    cl::desc("Define custom MSan AndMask"),
332                                    cl::Hidden, cl::init(0));
333 
334 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
335                                    cl::desc("Define custom MSan XorMask"),
336                                    cl::Hidden, cl::init(0));
337 
338 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
339                                       cl::desc("Define custom MSan ShadowBase"),
340                                       cl::Hidden, cl::init(0));
341 
342 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
343                                       cl::desc("Define custom MSan OriginBase"),
344                                       cl::Hidden, cl::init(0));
345 
346 const char kMsanModuleCtorName[] = "msan.module_ctor";
347 const char kMsanInitName[] = "__msan_init";
348 
349 namespace {
350 
351 // Memory map parameters used in application-to-shadow address calculation.
352 // Offset = (Addr & ~AndMask) ^ XorMask
353 // Shadow = ShadowBase + Offset
354 // Origin = OriginBase + Offset
355 struct MemoryMapParams {
356   uint64_t AndMask;
357   uint64_t XorMask;
358   uint64_t ShadowBase;
359   uint64_t OriginBase;
360 };
361 
362 struct PlatformMemoryMapParams {
363   const MemoryMapParams *bits32;
364   const MemoryMapParams *bits64;
365 };
366 
367 } // end anonymous namespace
368 
369 // i386 Linux
370 static const MemoryMapParams Linux_I386_MemoryMapParams = {
371   0x000080000000,  // AndMask
372   0,               // XorMask (not used)
373   0,               // ShadowBase (not used)
374   0x000040000000,  // OriginBase
375 };
376 
377 // x86_64 Linux
378 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
379 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
380   0x400000000000,  // AndMask
381   0,               // XorMask (not used)
382   0,               // ShadowBase (not used)
383   0x200000000000,  // OriginBase
384 #else
385   0,               // AndMask (not used)
386   0x500000000000,  // XorMask
387   0,               // ShadowBase (not used)
388   0x100000000000,  // OriginBase
389 #endif
390 };
391 
392 // mips64 Linux
393 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
394   0,               // AndMask (not used)
395   0x008000000000,  // XorMask
396   0,               // ShadowBase (not used)
397   0x002000000000,  // OriginBase
398 };
399 
400 // ppc64 Linux
401 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
402   0xE00000000000,  // AndMask
403   0x100000000000,  // XorMask
404   0x080000000000,  // ShadowBase
405   0x1C0000000000,  // OriginBase
406 };
407 
408 // s390x Linux
409 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
410     0xC00000000000, // AndMask
411     0,              // XorMask (not used)
412     0x080000000000, // ShadowBase
413     0x1C0000000000, // OriginBase
414 };
415 
416 // aarch64 Linux
417 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
418   0,               // AndMask (not used)
419   0x06000000000,   // XorMask
420   0,               // ShadowBase (not used)
421   0x01000000000,   // OriginBase
422 };
423 
424 // i386 FreeBSD
425 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
426   0x000180000000,  // AndMask
427   0x000040000000,  // XorMask
428   0x000020000000,  // ShadowBase
429   0x000700000000,  // OriginBase
430 };
431 
432 // x86_64 FreeBSD
433 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
434   0xc00000000000,  // AndMask
435   0x200000000000,  // XorMask
436   0x100000000000,  // ShadowBase
437   0x380000000000,  // OriginBase
438 };
439 
440 // x86_64 NetBSD
441 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
442   0,               // AndMask
443   0x500000000000,  // XorMask
444   0,               // ShadowBase
445   0x100000000000,  // OriginBase
446 };
447 
448 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
449   &Linux_I386_MemoryMapParams,
450   &Linux_X86_64_MemoryMapParams,
451 };
452 
453 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
454   nullptr,
455   &Linux_MIPS64_MemoryMapParams,
456 };
457 
458 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
459   nullptr,
460   &Linux_PowerPC64_MemoryMapParams,
461 };
462 
463 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
464     nullptr,
465     &Linux_S390X_MemoryMapParams,
466 };
467 
468 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
469   nullptr,
470   &Linux_AArch64_MemoryMapParams,
471 };
472 
473 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
474   &FreeBSD_I386_MemoryMapParams,
475   &FreeBSD_X86_64_MemoryMapParams,
476 };
477 
478 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
479   nullptr,
480   &NetBSD_X86_64_MemoryMapParams,
481 };
482 
483 namespace {
484 
485 /// Instrument functions of a module to detect uninitialized reads.
486 ///
487 /// Instantiating MemorySanitizer inserts the msan runtime library API function
488 /// declarations into the module if they don't exist already. Instantiating
489 /// ensures the __msan_init function is in the list of global constructors for
490 /// the module.
491 class MemorySanitizer {
492 public:
493   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
494       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
495         Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
496     initializeModule(M);
497   }
498 
499   // MSan cannot be moved or copied because of MapParams.
500   MemorySanitizer(MemorySanitizer &&) = delete;
501   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
502   MemorySanitizer(const MemorySanitizer &) = delete;
503   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
504 
505   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
506 
507 private:
508   friend struct MemorySanitizerVisitor;
509   friend struct VarArgAMD64Helper;
510   friend struct VarArgMIPS64Helper;
511   friend struct VarArgAArch64Helper;
512   friend struct VarArgPowerPC64Helper;
513   friend struct VarArgSystemZHelper;
514 
515   void initializeModule(Module &M);
516   void initializeCallbacks(Module &M);
517   void createKernelApi(Module &M);
518   void createUserspaceApi(Module &M);
519 
520   /// True if we're compiling the Linux kernel.
521   bool CompileKernel;
522   /// Track origins (allocation points) of uninitialized values.
523   int TrackOrigins;
524   bool Recover;
525   bool EagerChecks;
526 
527   LLVMContext *C;
528   Type *IntptrTy;
529   Type *OriginTy;
530 
531   // XxxTLS variables represent the per-thread state in MSan and per-task state
532   // in KMSAN.
533   // For the userspace these point to thread-local globals. In the kernel land
534   // they point to the members of a per-task struct obtained via a call to
535   // __msan_get_context_state().
536 
537   /// Thread-local shadow storage for function parameters.
538   Value *ParamTLS;
539 
540   /// Thread-local origin storage for function parameters.
541   Value *ParamOriginTLS;
542 
543   /// Thread-local shadow storage for function return value.
544   Value *RetvalTLS;
545 
546   /// Thread-local origin storage for function return value.
547   Value *RetvalOriginTLS;
548 
549   /// Thread-local shadow storage for in-register va_arg function
550   /// parameters (x86_64-specific).
551   Value *VAArgTLS;
552 
553   /// Thread-local shadow storage for in-register va_arg function
554   /// parameters (x86_64-specific).
555   Value *VAArgOriginTLS;
556 
557   /// Thread-local shadow storage for va_arg overflow area
558   /// (x86_64-specific).
559   Value *VAArgOverflowSizeTLS;
560 
561   /// Are the instrumentation callbacks set up?
562   bool CallbacksInitialized = false;
563 
564   /// The run-time callback to print a warning.
565   FunctionCallee WarningFn;
566 
567   // These arrays are indexed by log2(AccessSize).
568   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
569   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
570 
571   /// Run-time helper that generates a new origin value for a stack
572   /// allocation.
573   FunctionCallee MsanSetAllocaOrigin4Fn;
574 
575   /// Run-time helper that poisons stack on function entry.
576   FunctionCallee MsanPoisonStackFn;
577 
578   /// Run-time helper that records a store (or any event) of an
579   /// uninitialized value and returns an updated origin id encoding this info.
580   FunctionCallee MsanChainOriginFn;
581 
582   /// Run-time helper that paints an origin over a region.
583   FunctionCallee MsanSetOriginFn;
584 
585   /// MSan runtime replacements for memmove, memcpy and memset.
586   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
587 
588   /// KMSAN callback for task-local function argument shadow.
589   StructType *MsanContextStateTy;
590   FunctionCallee MsanGetContextStateFn;
591 
592   /// Functions for poisoning/unpoisoning local variables
593   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
594 
595   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
596   /// pointers.
597   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
598   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
599   FunctionCallee MsanMetadataPtrForStore_1_8[4];
600   FunctionCallee MsanInstrumentAsmStoreFn;
601 
602   /// Helper to choose between different MsanMetadataPtrXxx().
603   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
604 
605   /// Memory map parameters used in application-to-shadow calculation.
606   const MemoryMapParams *MapParams;
607 
608   /// Custom memory map parameters used when -msan-shadow-base or
609   // -msan-origin-base is provided.
610   MemoryMapParams CustomMapParams;
611 
612   MDNode *ColdCallWeights;
613 
614   /// Branch weights for origin store.
615   MDNode *OriginStoreWeights;
616 };
617 
618 void insertModuleCtor(Module &M) {
619   getOrCreateSanitizerCtorAndInitFunctions(
620       M, kMsanModuleCtorName, kMsanInitName,
621       /*InitArgTypes=*/{},
622       /*InitArgs=*/{},
623       // This callback is invoked when the functions are created the first
624       // time. Hook them into the global ctors list in that case:
625       [&](Function *Ctor, FunctionCallee) {
626         if (!ClWithComdat) {
627           appendToGlobalCtors(M, Ctor, 0);
628           return;
629         }
630         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
631         Ctor->setComdat(MsanCtorComdat);
632         appendToGlobalCtors(M, Ctor, 0, Ctor);
633       });
634 }
635 
636 /// A legacy function pass for msan instrumentation.
637 ///
638 /// Instruments functions to detect uninitialized reads.
639 struct MemorySanitizerLegacyPass : public FunctionPass {
640   // Pass identification, replacement for typeid.
641   static char ID;
642 
643   MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
644       : FunctionPass(ID), Options(Options) {
645     initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
646   }
647   StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
648 
649   void getAnalysisUsage(AnalysisUsage &AU) const override {
650     AU.addRequired<TargetLibraryInfoWrapperPass>();
651   }
652 
653   bool runOnFunction(Function &F) override {
654     return MSan->sanitizeFunction(
655         F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
656   }
657   bool doInitialization(Module &M) override;
658 
659   Optional<MemorySanitizer> MSan;
660   MemorySanitizerOptions Options;
661 };
662 
663 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
664   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
665 }
666 
667 } // end anonymous namespace
668 
669 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
670                                                bool EagerChecks)
671     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
672       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
673       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)),
674       EagerChecks(getOptOrDefault(ClEagerChecks, EagerChecks)) {}
675 
676 PreservedAnalyses MemorySanitizerPass::run(Function &F,
677                                            FunctionAnalysisManager &FAM) {
678   MemorySanitizer Msan(*F.getParent(), Options);
679   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
680     return PreservedAnalyses::none();
681   return PreservedAnalyses::all();
682 }
683 
684 PreservedAnalyses
685 ModuleMemorySanitizerPass::run(Module &M, ModuleAnalysisManager &AM) {
686   if (Options.Kernel)
687     return PreservedAnalyses::all();
688   insertModuleCtor(M);
689   return PreservedAnalyses::none();
690 }
691 
692 void MemorySanitizerPass::printPipeline(
693     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
694   static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
695       OS, MapClassName2PassName);
696   OS << "<";
697   if (Options.Recover)
698     OS << "recover;";
699   if (Options.Kernel)
700     OS << "kernel;";
701   if (Options.EagerChecks)
702     OS << "eager-checks;";
703   OS << "track-origins=" << Options.TrackOrigins;
704   OS << ">";
705 }
706 
707 char MemorySanitizerLegacyPass::ID = 0;
708 
709 INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
710                       "MemorySanitizer: detects uninitialized reads.", false,
711                       false)
712 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
713 INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
714                     "MemorySanitizer: detects uninitialized reads.", false,
715                     false)
716 
717 FunctionPass *
718 llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
719   return new MemorySanitizerLegacyPass(Options);
720 }
721 
722 /// Create a non-const global initialized with the given string.
723 ///
724 /// Creates a writable global for Str so that we can pass it to the
725 /// run-time lib. Runtime uses first 4 bytes of the string to store the
726 /// frame ID, so the string needs to be mutable.
727 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
728                                                             StringRef Str) {
729   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
730   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
731                             GlobalValue::PrivateLinkage, StrConst, "");
732 }
733 
734 /// Create KMSAN API callbacks.
735 void MemorySanitizer::createKernelApi(Module &M) {
736   IRBuilder<> IRB(*C);
737 
738   // These will be initialized in insertKmsanPrologue().
739   RetvalTLS = nullptr;
740   RetvalOriginTLS = nullptr;
741   ParamTLS = nullptr;
742   ParamOriginTLS = nullptr;
743   VAArgTLS = nullptr;
744   VAArgOriginTLS = nullptr;
745   VAArgOverflowSizeTLS = nullptr;
746 
747   WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
748                                     IRB.getInt32Ty());
749   // Requests the per-task context state (kmsan_context_state*) from the
750   // runtime library.
751   MsanContextStateTy = StructType::get(
752       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
753       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
754       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
755       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
756       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
757       OriginTy);
758   MsanGetContextStateFn = M.getOrInsertFunction(
759       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
760 
761   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
762                                 PointerType::get(IRB.getInt32Ty(), 0));
763 
764   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
765     std::string name_load =
766         "__msan_metadata_ptr_for_load_" + std::to_string(size);
767     std::string name_store =
768         "__msan_metadata_ptr_for_store_" + std::to_string(size);
769     MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
770         name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
771     MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
772         name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
773   }
774 
775   MsanMetadataPtrForLoadN = M.getOrInsertFunction(
776       "__msan_metadata_ptr_for_load_n", RetTy,
777       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
778   MsanMetadataPtrForStoreN = M.getOrInsertFunction(
779       "__msan_metadata_ptr_for_store_n", RetTy,
780       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
781 
782   // Functions for poisoning and unpoisoning memory.
783   MsanPoisonAllocaFn =
784       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
785                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
786   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
787       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
788 }
789 
790 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
791   return M.getOrInsertGlobal(Name, Ty, [&] {
792     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
793                               nullptr, Name, nullptr,
794                               GlobalVariable::InitialExecTLSModel);
795   });
796 }
797 
798 /// Insert declarations for userspace-specific functions and globals.
799 void MemorySanitizer::createUserspaceApi(Module &M) {
800   IRBuilder<> IRB(*C);
801 
802   // Create the callback.
803   // FIXME: this function should have "Cold" calling conv,
804   // which is not yet implemented.
805   StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
806                                     : "__msan_warning_with_origin_noreturn";
807   WarningFn =
808       M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty());
809 
810   // Create the global TLS variables.
811   RetvalTLS =
812       getOrInsertGlobal(M, "__msan_retval_tls",
813                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
814 
815   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
816 
817   ParamTLS =
818       getOrInsertGlobal(M, "__msan_param_tls",
819                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
820 
821   ParamOriginTLS =
822       getOrInsertGlobal(M, "__msan_param_origin_tls",
823                         ArrayType::get(OriginTy, kParamTLSSize / 4));
824 
825   VAArgTLS =
826       getOrInsertGlobal(M, "__msan_va_arg_tls",
827                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
828 
829   VAArgOriginTLS =
830       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
831                         ArrayType::get(OriginTy, kParamTLSSize / 4));
832 
833   VAArgOverflowSizeTLS =
834       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
835 
836   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
837        AccessSizeIndex++) {
838     unsigned AccessSize = 1 << AccessSizeIndex;
839     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
840     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
841     MaybeWarningFnAttrs.push_back(std::make_pair(
842         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
843     MaybeWarningFnAttrs.push_back(std::make_pair(
844         AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
845     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
846         FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
847         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
848 
849     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
850     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
851     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
852         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
853     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
854         AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
855     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
856         FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
857         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
858         IRB.getInt32Ty());
859   }
860 
861   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
862     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
863     IRB.getInt8PtrTy(), IntptrTy);
864   MsanPoisonStackFn =
865       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
866                             IRB.getInt8PtrTy(), IntptrTy);
867 }
868 
869 /// Insert extern declaration of runtime-provided functions and globals.
870 void MemorySanitizer::initializeCallbacks(Module &M) {
871   // Only do this once.
872   if (CallbacksInitialized)
873     return;
874 
875   IRBuilder<> IRB(*C);
876   // Initialize callbacks that are common for kernel and userspace
877   // instrumentation.
878   MsanChainOriginFn = M.getOrInsertFunction(
879     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
880   MsanSetOriginFn =
881       M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
882                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
883   MemmoveFn = M.getOrInsertFunction(
884     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
885     IRB.getInt8PtrTy(), IntptrTy);
886   MemcpyFn = M.getOrInsertFunction(
887     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
888     IntptrTy);
889   MemsetFn = M.getOrInsertFunction(
890     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
891     IntptrTy);
892 
893   MsanInstrumentAsmStoreFn =
894       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
895                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
896 
897   if (CompileKernel) {
898     createKernelApi(M);
899   } else {
900     createUserspaceApi(M);
901   }
902   CallbacksInitialized = true;
903 }
904 
905 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
906                                                              int size) {
907   FunctionCallee *Fns =
908       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
909   switch (size) {
910   case 1:
911     return Fns[0];
912   case 2:
913     return Fns[1];
914   case 4:
915     return Fns[2];
916   case 8:
917     return Fns[3];
918   default:
919     return nullptr;
920   }
921 }
922 
923 /// Module-level initialization.
924 ///
925 /// inserts a call to __msan_init to the module's constructor list.
926 void MemorySanitizer::initializeModule(Module &M) {
927   auto &DL = M.getDataLayout();
928 
929   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
930   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
931   // Check the overrides first
932   if (ShadowPassed || OriginPassed) {
933     CustomMapParams.AndMask = ClAndMask;
934     CustomMapParams.XorMask = ClXorMask;
935     CustomMapParams.ShadowBase = ClShadowBase;
936     CustomMapParams.OriginBase = ClOriginBase;
937     MapParams = &CustomMapParams;
938   } else {
939     Triple TargetTriple(M.getTargetTriple());
940     switch (TargetTriple.getOS()) {
941       case Triple::FreeBSD:
942         switch (TargetTriple.getArch()) {
943           case Triple::x86_64:
944             MapParams = FreeBSD_X86_MemoryMapParams.bits64;
945             break;
946           case Triple::x86:
947             MapParams = FreeBSD_X86_MemoryMapParams.bits32;
948             break;
949           default:
950             report_fatal_error("unsupported architecture");
951         }
952         break;
953       case Triple::NetBSD:
954         switch (TargetTriple.getArch()) {
955           case Triple::x86_64:
956             MapParams = NetBSD_X86_MemoryMapParams.bits64;
957             break;
958           default:
959             report_fatal_error("unsupported architecture");
960         }
961         break;
962       case Triple::Linux:
963         switch (TargetTriple.getArch()) {
964           case Triple::x86_64:
965             MapParams = Linux_X86_MemoryMapParams.bits64;
966             break;
967           case Triple::x86:
968             MapParams = Linux_X86_MemoryMapParams.bits32;
969             break;
970           case Triple::mips64:
971           case Triple::mips64el:
972             MapParams = Linux_MIPS_MemoryMapParams.bits64;
973             break;
974           case Triple::ppc64:
975           case Triple::ppc64le:
976             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
977             break;
978           case Triple::systemz:
979             MapParams = Linux_S390_MemoryMapParams.bits64;
980             break;
981           case Triple::aarch64:
982           case Triple::aarch64_be:
983             MapParams = Linux_ARM_MemoryMapParams.bits64;
984             break;
985           default:
986             report_fatal_error("unsupported architecture");
987         }
988         break;
989       default:
990         report_fatal_error("unsupported operating system");
991     }
992   }
993 
994   C = &(M.getContext());
995   IRBuilder<> IRB(*C);
996   IntptrTy = IRB.getIntPtrTy(DL);
997   OriginTy = IRB.getInt32Ty();
998 
999   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
1000   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
1001 
1002   if (!CompileKernel) {
1003     if (TrackOrigins)
1004       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
1005         return new GlobalVariable(
1006             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
1007             IRB.getInt32(TrackOrigins), "__msan_track_origins");
1008       });
1009 
1010     if (Recover)
1011       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
1012         return new GlobalVariable(M, IRB.getInt32Ty(), true,
1013                                   GlobalValue::WeakODRLinkage,
1014                                   IRB.getInt32(Recover), "__msan_keep_going");
1015       });
1016 }
1017 }
1018 
1019 bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
1020   if (!Options.Kernel)
1021     insertModuleCtor(M);
1022   MSan.emplace(M, Options);
1023   return true;
1024 }
1025 
1026 namespace {
1027 
1028 /// A helper class that handles instrumentation of VarArg
1029 /// functions on a particular platform.
1030 ///
1031 /// Implementations are expected to insert the instrumentation
1032 /// necessary to propagate argument shadow through VarArg function
1033 /// calls. Visit* methods are called during an InstVisitor pass over
1034 /// the function, and should avoid creating new basic blocks. A new
1035 /// instance of this class is created for each instrumented function.
1036 struct VarArgHelper {
1037   virtual ~VarArgHelper() = default;
1038 
1039   /// Visit a CallBase.
1040   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1041 
1042   /// Visit a va_start call.
1043   virtual void visitVAStartInst(VAStartInst &I) = 0;
1044 
1045   /// Visit a va_copy call.
1046   virtual void visitVACopyInst(VACopyInst &I) = 0;
1047 
1048   /// Finalize function instrumentation.
1049   ///
1050   /// This method is called after visiting all interesting (see above)
1051   /// instructions in a function.
1052   virtual void finalizeInstrumentation() = 0;
1053 };
1054 
1055 struct MemorySanitizerVisitor;
1056 
1057 } // end anonymous namespace
1058 
1059 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1060                                         MemorySanitizerVisitor &Visitor);
1061 
1062 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
1063   if (TypeSize <= 8) return 0;
1064   return Log2_32_Ceil((TypeSize + 7) / 8);
1065 }
1066 
1067 namespace {
1068 
1069 /// This class does all the work for a given function. Store and Load
1070 /// instructions store and load corresponding shadow and origin
1071 /// values. Most instructions propagate shadow from arguments to their
1072 /// return values. Certain instructions (most importantly, BranchInst)
1073 /// test their argument shadow and print reports (with a runtime call) if it's
1074 /// non-zero.
1075 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1076   Function &F;
1077   MemorySanitizer &MS;
1078   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1079   ValueMap<Value*, Value*> ShadowMap, OriginMap;
1080   std::unique_ptr<VarArgHelper> VAHelper;
1081   const TargetLibraryInfo *TLI;
1082   Instruction *FnPrologueEnd;
1083 
1084   // The following flags disable parts of MSan instrumentation based on
1085   // exclusion list contents and command-line options.
1086   bool InsertChecks;
1087   bool PropagateShadow;
1088   bool PoisonStack;
1089   bool PoisonUndef;
1090 
1091   struct ShadowOriginAndInsertPoint {
1092     Value *Shadow;
1093     Value *Origin;
1094     Instruction *OrigIns;
1095 
1096     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1097       : Shadow(S), Origin(O), OrigIns(I) {}
1098   };
1099   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1100   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1101   SmallSet<AllocaInst *, 16> AllocaSet;
1102   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1103   SmallVector<StoreInst *, 16> StoreList;
1104 
1105   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1106                          const TargetLibraryInfo &TLI)
1107       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1108     bool SanitizeFunction =
1109         F.hasFnAttribute(Attribute::SanitizeMemory) && !ClDisableChecks;
1110     InsertChecks = SanitizeFunction;
1111     PropagateShadow = SanitizeFunction;
1112     PoisonStack = SanitizeFunction && ClPoisonStack;
1113     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1114 
1115     // In the presence of unreachable blocks, we may see Phi nodes with
1116     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1117     // blocks, such nodes will not have any shadow value associated with them.
1118     // It's easier to remove unreachable blocks than deal with missing shadow.
1119     removeUnreachableBlocks(F);
1120 
1121     MS.initializeCallbacks(*F.getParent());
1122     FnPrologueEnd = IRBuilder<>(F.getEntryBlock().getFirstNonPHI())
1123                         .CreateIntrinsic(Intrinsic::donothing, {}, {});
1124 
1125     if (MS.CompileKernel) {
1126       IRBuilder<> IRB(FnPrologueEnd);
1127       insertKmsanPrologue(IRB);
1128     }
1129 
1130     LLVM_DEBUG(if (!InsertChecks) dbgs()
1131                << "MemorySanitizer is not inserting checks into '"
1132                << F.getName() << "'\n");
1133   }
1134 
1135   bool isInPrologue(Instruction &I) {
1136     return I.getParent() == FnPrologueEnd->getParent() &&
1137            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
1138   }
1139 
1140   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1141     if (MS.TrackOrigins <= 1) return V;
1142     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1143   }
1144 
1145   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1146     const DataLayout &DL = F.getParent()->getDataLayout();
1147     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1148     if (IntptrSize == kOriginSize) return Origin;
1149     assert(IntptrSize == kOriginSize * 2);
1150     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1151     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1152   }
1153 
1154   /// Fill memory range with the given origin value.
1155   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1156                    unsigned Size, Align Alignment) {
1157     const DataLayout &DL = F.getParent()->getDataLayout();
1158     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1159     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1160     assert(IntptrAlignment >= kMinOriginAlignment);
1161     assert(IntptrSize >= kOriginSize);
1162 
1163     unsigned Ofs = 0;
1164     Align CurrentAlignment = Alignment;
1165     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1166       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1167       Value *IntptrOriginPtr =
1168           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1169       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1170         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1171                        : IntptrOriginPtr;
1172         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1173         Ofs += IntptrSize / kOriginSize;
1174         CurrentAlignment = IntptrAlignment;
1175       }
1176     }
1177 
1178     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1179       Value *GEP =
1180           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1181       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1182       CurrentAlignment = kMinOriginAlignment;
1183     }
1184   }
1185 
1186   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1187                    Value *OriginPtr, Align Alignment, bool AsCall) {
1188     const DataLayout &DL = F.getParent()->getDataLayout();
1189     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1190     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
1191     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1192     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1193       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
1194         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1195                     OriginAlignment);
1196       return;
1197     }
1198 
1199     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1200     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1201     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1202       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1203       Value *ConvertedShadow2 =
1204           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1205       CallBase *CB = IRB.CreateCall(
1206           Fn, {ConvertedShadow2,
1207                IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
1208       CB->addParamAttr(0, Attribute::ZExt);
1209       CB->addParamAttr(2, Attribute::ZExt);
1210     } else {
1211       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1212       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1213           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1214       IRBuilder<> IRBNew(CheckTerm);
1215       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1216                   OriginAlignment);
1217     }
1218   }
1219 
1220   void materializeStores(bool InstrumentWithCalls) {
1221     for (StoreInst *SI : StoreList) {
1222       IRBuilder<> IRB(SI);
1223       Value *Val = SI->getValueOperand();
1224       Value *Addr = SI->getPointerOperand();
1225       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1226       Value *ShadowPtr, *OriginPtr;
1227       Type *ShadowTy = Shadow->getType();
1228       const Align Alignment = SI->getAlign();
1229       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1230       std::tie(ShadowPtr, OriginPtr) =
1231           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1232 
1233       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1234       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1235       (void)NewSI;
1236 
1237       if (SI->isAtomic())
1238         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1239 
1240       if (MS.TrackOrigins && !SI->isAtomic())
1241         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1242                     OriginAlignment, InstrumentWithCalls);
1243     }
1244   }
1245 
1246   /// Helper function to insert a warning at IRB's current insert point.
1247   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1248     if (!Origin)
1249       Origin = (Value *)IRB.getInt32(0);
1250     assert(Origin->getType()->isIntegerTy());
1251     IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1252     // FIXME: Insert UnreachableInst if !MS.Recover?
1253     // This may invalidate some of the following checks and needs to be done
1254     // at the very end.
1255   }
1256 
1257   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
1258                            bool AsCall) {
1259     IRBuilder<> IRB(OrigIns);
1260     LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
1261     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1262     LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
1263 
1264     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1265       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
1266         insertWarningFn(IRB, Origin);
1267       }
1268       return;
1269     }
1270 
1271     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
1272 
1273     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1274     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1275     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1276       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1277       Value *ConvertedShadow2 =
1278           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1279       CallBase *CB = IRB.CreateCall(
1280           Fn, {ConvertedShadow2,
1281                MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
1282       CB->addParamAttr(0, Attribute::ZExt);
1283       CB->addParamAttr(1, Attribute::ZExt);
1284     } else {
1285       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1286       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1287           Cmp, OrigIns,
1288           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1289 
1290       IRB.SetInsertPoint(CheckTerm);
1291       insertWarningFn(IRB, Origin);
1292       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1293     }
1294   }
1295 
1296   void materializeChecks(bool InstrumentWithCalls) {
1297     for (const auto &ShadowData : InstrumentationList) {
1298       Instruction *OrigIns = ShadowData.OrigIns;
1299       Value *Shadow = ShadowData.Shadow;
1300       Value *Origin = ShadowData.Origin;
1301       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
1302     }
1303     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1304   }
1305 
1306   // Returns the last instruction in the new prologue
1307   void insertKmsanPrologue(IRBuilder<> &IRB) {
1308     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1309     Constant *Zero = IRB.getInt32(0);
1310     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1311                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1312     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1313                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1314     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1315                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1316     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1317                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1318     MS.VAArgOverflowSizeTLS =
1319         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1320                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1321     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1322                                       {Zero, IRB.getInt32(5)}, "param_origin");
1323     MS.RetvalOriginTLS =
1324         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1325                       {Zero, IRB.getInt32(6)}, "retval_origin");
1326   }
1327 
1328   /// Add MemorySanitizer instrumentation to a function.
1329   bool runOnFunction() {
1330     // Iterate all BBs in depth-first order and create shadow instructions
1331     // for all instructions (where applicable).
1332     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1333     for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
1334       visit(*BB);
1335 
1336     // Finalize PHI nodes.
1337     for (PHINode *PN : ShadowPHINodes) {
1338       PHINode *PNS = cast<PHINode>(getShadow(PN));
1339       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1340       size_t NumValues = PN->getNumIncomingValues();
1341       for (size_t v = 0; v < NumValues; v++) {
1342         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1343         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1344       }
1345     }
1346 
1347     VAHelper->finalizeInstrumentation();
1348 
1349     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1350     // instrumenting only allocas.
1351     if (InstrumentLifetimeStart) {
1352       for (auto Item : LifetimeStartList) {
1353         instrumentAlloca(*Item.second, Item.first);
1354         AllocaSet.erase(Item.second);
1355       }
1356     }
1357     // Poison the allocas for which we didn't instrument the corresponding
1358     // lifetime intrinsics.
1359     for (AllocaInst *AI : AllocaSet)
1360       instrumentAlloca(*AI);
1361 
1362     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
1363                                InstrumentationList.size() + StoreList.size() >
1364                                    (unsigned)ClInstrumentationWithCallThreshold;
1365 
1366     // Insert shadow value checks.
1367     materializeChecks(InstrumentWithCalls);
1368 
1369     // Delayed instrumentation of StoreInst.
1370     // This may not add new address checks.
1371     materializeStores(InstrumentWithCalls);
1372 
1373     return true;
1374   }
1375 
1376   /// Compute the shadow type that corresponds to a given Value.
1377   Type *getShadowTy(Value *V) {
1378     return getShadowTy(V->getType());
1379   }
1380 
1381   /// Compute the shadow type that corresponds to a given Type.
1382   Type *getShadowTy(Type *OrigTy) {
1383     if (!OrigTy->isSized()) {
1384       return nullptr;
1385     }
1386     // For integer type, shadow is the same as the original type.
1387     // This may return weird-sized types like i1.
1388     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1389       return IT;
1390     const DataLayout &DL = F.getParent()->getDataLayout();
1391     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1392       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1393       return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
1394                                   cast<FixedVectorType>(VT)->getNumElements());
1395     }
1396     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1397       return ArrayType::get(getShadowTy(AT->getElementType()),
1398                             AT->getNumElements());
1399     }
1400     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1401       SmallVector<Type*, 4> Elements;
1402       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1403         Elements.push_back(getShadowTy(ST->getElementType(i)));
1404       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1405       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1406       return Res;
1407     }
1408     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1409     return IntegerType::get(*MS.C, TypeSize);
1410   }
1411 
1412   /// Flatten a vector type.
1413   Type *getShadowTyNoVec(Type *ty) {
1414     if (VectorType *vt = dyn_cast<VectorType>(ty))
1415       return IntegerType::get(*MS.C,
1416                               vt->getPrimitiveSizeInBits().getFixedSize());
1417     return ty;
1418   }
1419 
1420   /// Extract combined shadow of struct elements as a bool
1421   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1422                               IRBuilder<> &IRB) {
1423     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1424     Value *Aggregator = FalseVal;
1425 
1426     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1427       // Combine by ORing together each element's bool shadow
1428       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1429       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1430       Value *ShadowBool = convertToBool(ShadowInner, IRB);
1431 
1432       if (Aggregator != FalseVal)
1433         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1434       else
1435         Aggregator = ShadowBool;
1436     }
1437 
1438     return Aggregator;
1439   }
1440 
1441   // Extract combined shadow of array elements
1442   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1443                              IRBuilder<> &IRB) {
1444     if (!Array->getNumElements())
1445       return IRB.getIntN(/* width */ 1, /* value */ 0);
1446 
1447     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1448     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1449 
1450     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1451       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1452       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1453       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1454     }
1455     return Aggregator;
1456   }
1457 
1458   /// Convert a shadow value to it's flattened variant. The resulting
1459   /// shadow may not necessarily have the same bit width as the input
1460   /// value, but it will always be comparable to zero.
1461   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1462     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1463       return collapseStructShadow(Struct, V, IRB);
1464     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1465       return collapseArrayShadow(Array, V, IRB);
1466     Type *Ty = V->getType();
1467     Type *NoVecTy = getShadowTyNoVec(Ty);
1468     if (Ty == NoVecTy) return V;
1469     return IRB.CreateBitCast(V, NoVecTy);
1470   }
1471 
1472   // Convert a scalar value to an i1 by comparing with 0
1473   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1474     Type *VTy = V->getType();
1475     assert(VTy->isIntegerTy());
1476     if (VTy->getIntegerBitWidth() == 1)
1477       // Just converting a bool to a bool, so do nothing.
1478       return V;
1479     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1480   }
1481 
1482   /// Compute the integer shadow offset that corresponds to a given
1483   /// application address.
1484   ///
1485   /// Offset = (Addr & ~AndMask) ^ XorMask
1486   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1487     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1488 
1489     uint64_t AndMask = MS.MapParams->AndMask;
1490     if (AndMask)
1491       OffsetLong =
1492           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1493 
1494     uint64_t XorMask = MS.MapParams->XorMask;
1495     if (XorMask)
1496       OffsetLong =
1497           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1498     return OffsetLong;
1499   }
1500 
1501   /// Compute the shadow and origin addresses corresponding to a given
1502   /// application address.
1503   ///
1504   /// Shadow = ShadowBase + Offset
1505   /// Origin = (OriginBase + Offset) & ~3ULL
1506   std::pair<Value *, Value *>
1507   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1508                               MaybeAlign Alignment) {
1509     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1510     Value *ShadowLong = ShadowOffset;
1511     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1512     if (ShadowBase != 0) {
1513       ShadowLong =
1514         IRB.CreateAdd(ShadowLong,
1515                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1516     }
1517     Value *ShadowPtr =
1518         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1519     Value *OriginPtr = nullptr;
1520     if (MS.TrackOrigins) {
1521       Value *OriginLong = ShadowOffset;
1522       uint64_t OriginBase = MS.MapParams->OriginBase;
1523       if (OriginBase != 0)
1524         OriginLong = IRB.CreateAdd(OriginLong,
1525                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1526       if (!Alignment || *Alignment < kMinOriginAlignment) {
1527         uint64_t Mask = kMinOriginAlignment.value() - 1;
1528         OriginLong =
1529             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1530       }
1531       OriginPtr =
1532           IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
1533     }
1534     return std::make_pair(ShadowPtr, OriginPtr);
1535   }
1536 
1537   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1538                                                        IRBuilder<> &IRB,
1539                                                        Type *ShadowTy,
1540                                                        bool isStore) {
1541     Value *ShadowOriginPtrs;
1542     const DataLayout &DL = F.getParent()->getDataLayout();
1543     int Size = DL.getTypeStoreSize(ShadowTy);
1544 
1545     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1546     Value *AddrCast =
1547         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1548     if (Getter) {
1549       ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
1550     } else {
1551       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1552       ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
1553                                                 : MS.MsanMetadataPtrForLoadN,
1554                                         {AddrCast, SizeVal});
1555     }
1556     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1557     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1558     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1559 
1560     return std::make_pair(ShadowPtr, OriginPtr);
1561   }
1562 
1563   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1564                                                  Type *ShadowTy,
1565                                                  MaybeAlign Alignment,
1566                                                  bool isStore) {
1567     if (MS.CompileKernel)
1568       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1569     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1570   }
1571 
1572   /// Compute the shadow address for a given function argument.
1573   ///
1574   /// Shadow = ParamTLS+ArgOffset.
1575   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1576                                  int ArgOffset) {
1577     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1578     if (ArgOffset)
1579       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1580     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1581                               "_msarg");
1582   }
1583 
1584   /// Compute the origin address for a given function argument.
1585   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1586                                  int ArgOffset) {
1587     if (!MS.TrackOrigins)
1588       return nullptr;
1589     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1590     if (ArgOffset)
1591       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1592     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1593                               "_msarg_o");
1594   }
1595 
1596   /// Compute the shadow address for a retval.
1597   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1598     return IRB.CreatePointerCast(MS.RetvalTLS,
1599                                  PointerType::get(getShadowTy(A), 0),
1600                                  "_msret");
1601   }
1602 
1603   /// Compute the origin address for a retval.
1604   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1605     // We keep a single origin for the entire retval. Might be too optimistic.
1606     return MS.RetvalOriginTLS;
1607   }
1608 
1609   /// Set SV to be the shadow value for V.
1610   void setShadow(Value *V, Value *SV) {
1611     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1612     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1613   }
1614 
1615   /// Set Origin to be the origin value for V.
1616   void setOrigin(Value *V, Value *Origin) {
1617     if (!MS.TrackOrigins) return;
1618     assert(!OriginMap.count(V) && "Values may only have one origin");
1619     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1620     OriginMap[V] = Origin;
1621   }
1622 
1623   Constant *getCleanShadow(Type *OrigTy) {
1624     Type *ShadowTy = getShadowTy(OrigTy);
1625     if (!ShadowTy)
1626       return nullptr;
1627     return Constant::getNullValue(ShadowTy);
1628   }
1629 
1630   /// Create a clean shadow value for a given value.
1631   ///
1632   /// Clean shadow (all zeroes) means all bits of the value are defined
1633   /// (initialized).
1634   Constant *getCleanShadow(Value *V) {
1635     return getCleanShadow(V->getType());
1636   }
1637 
1638   /// Create a dirty shadow of a given shadow type.
1639   Constant *getPoisonedShadow(Type *ShadowTy) {
1640     assert(ShadowTy);
1641     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1642       return Constant::getAllOnesValue(ShadowTy);
1643     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1644       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1645                                       getPoisonedShadow(AT->getElementType()));
1646       return ConstantArray::get(AT, Vals);
1647     }
1648     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1649       SmallVector<Constant *, 4> Vals;
1650       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1651         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1652       return ConstantStruct::get(ST, Vals);
1653     }
1654     llvm_unreachable("Unexpected shadow type");
1655   }
1656 
1657   /// Create a dirty shadow for a given value.
1658   Constant *getPoisonedShadow(Value *V) {
1659     Type *ShadowTy = getShadowTy(V);
1660     if (!ShadowTy)
1661       return nullptr;
1662     return getPoisonedShadow(ShadowTy);
1663   }
1664 
1665   /// Create a clean (zero) origin.
1666   Value *getCleanOrigin() {
1667     return Constant::getNullValue(MS.OriginTy);
1668   }
1669 
1670   /// Get the shadow value for a given Value.
1671   ///
1672   /// This function either returns the value set earlier with setShadow,
1673   /// or extracts if from ParamTLS (for function arguments).
1674   Value *getShadow(Value *V) {
1675     if (!PropagateShadow) return getCleanShadow(V);
1676     if (Instruction *I = dyn_cast<Instruction>(V)) {
1677       if (I->getMetadata("nosanitize"))
1678         return getCleanShadow(V);
1679       // For instructions the shadow is already stored in the map.
1680       Value *Shadow = ShadowMap[V];
1681       if (!Shadow) {
1682         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1683         (void)I;
1684         assert(Shadow && "No shadow for a value");
1685       }
1686       return Shadow;
1687     }
1688     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1689       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1690       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1691       (void)U;
1692       return AllOnes;
1693     }
1694     if (Argument *A = dyn_cast<Argument>(V)) {
1695       // For arguments we compute the shadow on demand and store it in the map.
1696       Value **ShadowPtr = &ShadowMap[V];
1697       if (*ShadowPtr)
1698         return *ShadowPtr;
1699       Function *F = A->getParent();
1700       IRBuilder<> EntryIRB(FnPrologueEnd);
1701       unsigned ArgOffset = 0;
1702       const DataLayout &DL = F->getParent()->getDataLayout();
1703       for (auto &FArg : F->args()) {
1704         if (!FArg.getType()->isSized()) {
1705           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1706           continue;
1707         }
1708 
1709         bool FArgByVal = FArg.hasByValAttr();
1710         bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
1711         bool FArgEagerCheck = MS.EagerChecks && !FArgByVal && FArgNoUndef;
1712         unsigned Size =
1713             FArg.hasByValAttr()
1714                 ? DL.getTypeAllocSize(FArg.getParamByValType())
1715                 : DL.getTypeAllocSize(FArg.getType());
1716 
1717         if (A == &FArg) {
1718           bool Overflow = ArgOffset + Size > kParamTLSSize;
1719           if (FArgEagerCheck) {
1720             *ShadowPtr = getCleanShadow(V);
1721             setOrigin(A, getCleanOrigin());
1722             break;
1723           } else if (FArgByVal) {
1724             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1725             // ByVal pointer itself has clean shadow. We copy the actual
1726             // argument shadow to the underlying memory.
1727             // Figure out maximal valid memcpy alignment.
1728             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1729                 MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
1730             Value *CpShadowPtr =
1731                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1732                                    /*isStore*/ true)
1733                     .first;
1734             // TODO(glider): need to copy origins.
1735             if (Overflow) {
1736               // ParamTLS overflow.
1737               EntryIRB.CreateMemSet(
1738                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1739                   Size, ArgAlign);
1740             } else {
1741               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1742               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1743                                                  CopyAlign, Size);
1744               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1745               (void)Cpy;
1746             }
1747             *ShadowPtr = getCleanShadow(V);
1748           } else {
1749             // Shadow over TLS
1750             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1751             if (Overflow) {
1752               // ParamTLS overflow.
1753               *ShadowPtr = getCleanShadow(V);
1754             } else {
1755               *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
1756                                                       kShadowTLSAlignment);
1757             }
1758           }
1759           LLVM_DEBUG(dbgs()
1760                      << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n");
1761           if (MS.TrackOrigins && !Overflow) {
1762             Value *OriginPtr =
1763                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1764             setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
1765           } else {
1766             setOrigin(A, getCleanOrigin());
1767           }
1768 
1769           break;
1770         }
1771 
1772         ArgOffset += alignTo(Size, kShadowTLSAlignment);
1773       }
1774       assert(*ShadowPtr && "Could not find shadow for an argument");
1775       return *ShadowPtr;
1776     }
1777     // For everything else the shadow is zero.
1778     return getCleanShadow(V);
1779   }
1780 
1781   /// Get the shadow for i-th argument of the instruction I.
1782   Value *getShadow(Instruction *I, int i) {
1783     return getShadow(I->getOperand(i));
1784   }
1785 
1786   /// Get the origin for a value.
1787   Value *getOrigin(Value *V) {
1788     if (!MS.TrackOrigins) return nullptr;
1789     if (!PropagateShadow) return getCleanOrigin();
1790     if (isa<Constant>(V)) return getCleanOrigin();
1791     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1792            "Unexpected value type in getOrigin()");
1793     if (Instruction *I = dyn_cast<Instruction>(V)) {
1794       if (I->getMetadata("nosanitize"))
1795         return getCleanOrigin();
1796     }
1797     Value *Origin = OriginMap[V];
1798     assert(Origin && "Missing origin");
1799     return Origin;
1800   }
1801 
1802   /// Get the origin for i-th argument of the instruction I.
1803   Value *getOrigin(Instruction *I, int i) {
1804     return getOrigin(I->getOperand(i));
1805   }
1806 
1807   /// Remember the place where a shadow check should be inserted.
1808   ///
1809   /// This location will be later instrumented with a check that will print a
1810   /// UMR warning in runtime if the shadow value is not 0.
1811   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1812     assert(Shadow);
1813     if (!InsertChecks) return;
1814 #ifndef NDEBUG
1815     Type *ShadowTy = Shadow->getType();
1816     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
1817             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
1818            "Can only insert checks for integer, vector, and aggregate shadow "
1819            "types");
1820 #endif
1821     InstrumentationList.push_back(
1822         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1823   }
1824 
1825   /// Remember the place where a shadow check should be inserted.
1826   ///
1827   /// This location will be later instrumented with a check that will print a
1828   /// UMR warning in runtime if the value is not fully defined.
1829   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1830     assert(Val);
1831     Value *Shadow, *Origin;
1832     if (ClCheckConstantShadow) {
1833       Shadow = getShadow(Val);
1834       if (!Shadow) return;
1835       Origin = getOrigin(Val);
1836     } else {
1837       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1838       if (!Shadow) return;
1839       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1840     }
1841     insertShadowCheck(Shadow, Origin, OrigIns);
1842   }
1843 
1844   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1845     switch (a) {
1846       case AtomicOrdering::NotAtomic:
1847         return AtomicOrdering::NotAtomic;
1848       case AtomicOrdering::Unordered:
1849       case AtomicOrdering::Monotonic:
1850       case AtomicOrdering::Release:
1851         return AtomicOrdering::Release;
1852       case AtomicOrdering::Acquire:
1853       case AtomicOrdering::AcquireRelease:
1854         return AtomicOrdering::AcquireRelease;
1855       case AtomicOrdering::SequentiallyConsistent:
1856         return AtomicOrdering::SequentiallyConsistent;
1857     }
1858     llvm_unreachable("Unknown ordering");
1859   }
1860 
1861   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
1862     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1863     uint32_t OrderingTable[NumOrderings] = {};
1864 
1865     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1866         OrderingTable[(int)AtomicOrderingCABI::release] =
1867             (int)AtomicOrderingCABI::release;
1868     OrderingTable[(int)AtomicOrderingCABI::consume] =
1869         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1870             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1871                 (int)AtomicOrderingCABI::acq_rel;
1872     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1873         (int)AtomicOrderingCABI::seq_cst;
1874 
1875     return ConstantDataVector::get(IRB.getContext(),
1876                                    makeArrayRef(OrderingTable, NumOrderings));
1877   }
1878 
1879   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1880     switch (a) {
1881       case AtomicOrdering::NotAtomic:
1882         return AtomicOrdering::NotAtomic;
1883       case AtomicOrdering::Unordered:
1884       case AtomicOrdering::Monotonic:
1885       case AtomicOrdering::Acquire:
1886         return AtomicOrdering::Acquire;
1887       case AtomicOrdering::Release:
1888       case AtomicOrdering::AcquireRelease:
1889         return AtomicOrdering::AcquireRelease;
1890       case AtomicOrdering::SequentiallyConsistent:
1891         return AtomicOrdering::SequentiallyConsistent;
1892     }
1893     llvm_unreachable("Unknown ordering");
1894   }
1895 
1896   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
1897     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1898     uint32_t OrderingTable[NumOrderings] = {};
1899 
1900     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1901         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1902             OrderingTable[(int)AtomicOrderingCABI::consume] =
1903                 (int)AtomicOrderingCABI::acquire;
1904     OrderingTable[(int)AtomicOrderingCABI::release] =
1905         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1906             (int)AtomicOrderingCABI::acq_rel;
1907     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1908         (int)AtomicOrderingCABI::seq_cst;
1909 
1910     return ConstantDataVector::get(IRB.getContext(),
1911                                    makeArrayRef(OrderingTable, NumOrderings));
1912   }
1913 
1914   // ------------------- Visitors.
1915   using InstVisitor<MemorySanitizerVisitor>::visit;
1916   void visit(Instruction &I) {
1917     if (I.getMetadata("nosanitize"))
1918       return;
1919     // Don't want to visit if we're in the prologue
1920     if (isInPrologue(I))
1921       return;
1922     InstVisitor<MemorySanitizerVisitor>::visit(I);
1923   }
1924 
1925   /// Instrument LoadInst
1926   ///
1927   /// Loads the corresponding shadow and (optionally) origin.
1928   /// Optionally, checks that the load address is fully defined.
1929   void visitLoadInst(LoadInst &I) {
1930     assert(I.getType()->isSized() && "Load type must have size");
1931     assert(!I.getMetadata("nosanitize"));
1932     IRBuilder<> IRB(I.getNextNode());
1933     Type *ShadowTy = getShadowTy(&I);
1934     Value *Addr = I.getPointerOperand();
1935     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
1936     const Align Alignment = assumeAligned(I.getAlignment());
1937     if (PropagateShadow) {
1938       std::tie(ShadowPtr, OriginPtr) =
1939           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
1940       setShadow(&I,
1941                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
1942     } else {
1943       setShadow(&I, getCleanShadow(&I));
1944     }
1945 
1946     if (ClCheckAccessAddress)
1947       insertShadowCheck(I.getPointerOperand(), &I);
1948 
1949     if (I.isAtomic())
1950       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1951 
1952     if (MS.TrackOrigins) {
1953       if (PropagateShadow) {
1954         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1955         setOrigin(
1956             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
1957       } else {
1958         setOrigin(&I, getCleanOrigin());
1959       }
1960     }
1961   }
1962 
1963   /// Instrument StoreInst
1964   ///
1965   /// Stores the corresponding shadow and (optionally) origin.
1966   /// Optionally, checks that the store address is fully defined.
1967   void visitStoreInst(StoreInst &I) {
1968     StoreList.push_back(&I);
1969     if (ClCheckAccessAddress)
1970       insertShadowCheck(I.getPointerOperand(), &I);
1971   }
1972 
1973   void handleCASOrRMW(Instruction &I) {
1974     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1975 
1976     IRBuilder<> IRB(&I);
1977     Value *Addr = I.getOperand(0);
1978     Value *Val = I.getOperand(1);
1979     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, Val->getType(), Align(1),
1980                                           /*isStore*/ true)
1981                            .first;
1982 
1983     if (ClCheckAccessAddress)
1984       insertShadowCheck(Addr, &I);
1985 
1986     // Only test the conditional argument of cmpxchg instruction.
1987     // The other argument can potentially be uninitialized, but we can not
1988     // detect this situation reliably without possible false positives.
1989     if (isa<AtomicCmpXchgInst>(I))
1990       insertShadowCheck(Val, &I);
1991 
1992     IRB.CreateStore(getCleanShadow(Val), ShadowPtr);
1993 
1994     setShadow(&I, getCleanShadow(&I));
1995     setOrigin(&I, getCleanOrigin());
1996   }
1997 
1998   void visitAtomicRMWInst(AtomicRMWInst &I) {
1999     handleCASOrRMW(I);
2000     I.setOrdering(addReleaseOrdering(I.getOrdering()));
2001   }
2002 
2003   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2004     handleCASOrRMW(I);
2005     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2006   }
2007 
2008   // Vector manipulation.
2009   void visitExtractElementInst(ExtractElementInst &I) {
2010     insertShadowCheck(I.getOperand(1), &I);
2011     IRBuilder<> IRB(&I);
2012     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
2013               "_msprop"));
2014     setOrigin(&I, getOrigin(&I, 0));
2015   }
2016 
2017   void visitInsertElementInst(InsertElementInst &I) {
2018     insertShadowCheck(I.getOperand(2), &I);
2019     IRBuilder<> IRB(&I);
2020     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
2021               I.getOperand(2), "_msprop"));
2022     setOriginForNaryOp(I);
2023   }
2024 
2025   void visitShuffleVectorInst(ShuffleVectorInst &I) {
2026     IRBuilder<> IRB(&I);
2027     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
2028                                           I.getShuffleMask(), "_msprop"));
2029     setOriginForNaryOp(I);
2030   }
2031 
2032   // Casts.
2033   void visitSExtInst(SExtInst &I) {
2034     IRBuilder<> IRB(&I);
2035     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
2036     setOrigin(&I, getOrigin(&I, 0));
2037   }
2038 
2039   void visitZExtInst(ZExtInst &I) {
2040     IRBuilder<> IRB(&I);
2041     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
2042     setOrigin(&I, getOrigin(&I, 0));
2043   }
2044 
2045   void visitTruncInst(TruncInst &I) {
2046     IRBuilder<> IRB(&I);
2047     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
2048     setOrigin(&I, getOrigin(&I, 0));
2049   }
2050 
2051   void visitBitCastInst(BitCastInst &I) {
2052     // Special case: if this is the bitcast (there is exactly 1 allowed) between
2053     // a musttail call and a ret, don't instrument. New instructions are not
2054     // allowed after a musttail call.
2055     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
2056       if (CI->isMustTailCall())
2057         return;
2058     IRBuilder<> IRB(&I);
2059     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
2060     setOrigin(&I, getOrigin(&I, 0));
2061   }
2062 
2063   void visitPtrToIntInst(PtrToIntInst &I) {
2064     IRBuilder<> IRB(&I);
2065     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2066              "_msprop_ptrtoint"));
2067     setOrigin(&I, getOrigin(&I, 0));
2068   }
2069 
2070   void visitIntToPtrInst(IntToPtrInst &I) {
2071     IRBuilder<> IRB(&I);
2072     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2073              "_msprop_inttoptr"));
2074     setOrigin(&I, getOrigin(&I, 0));
2075   }
2076 
2077   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
2078   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
2079   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
2080   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
2081   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
2082   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
2083 
2084   /// Propagate shadow for bitwise AND.
2085   ///
2086   /// This code is exact, i.e. if, for example, a bit in the left argument
2087   /// is defined and 0, then neither the value not definedness of the
2088   /// corresponding bit in B don't affect the resulting shadow.
2089   void visitAnd(BinaryOperator &I) {
2090     IRBuilder<> IRB(&I);
2091     //  "And" of 0 and a poisoned value results in unpoisoned value.
2092     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2093     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2094     //  1&p => p;     0&p => 0;     p&p => p;
2095     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2096     Value *S1 = getShadow(&I, 0);
2097     Value *S2 = getShadow(&I, 1);
2098     Value *V1 = I.getOperand(0);
2099     Value *V2 = I.getOperand(1);
2100     if (V1->getType() != S1->getType()) {
2101       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2102       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2103     }
2104     Value *S1S2 = IRB.CreateAnd(S1, S2);
2105     Value *V1S2 = IRB.CreateAnd(V1, S2);
2106     Value *S1V2 = IRB.CreateAnd(S1, V2);
2107     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2108     setOriginForNaryOp(I);
2109   }
2110 
2111   void visitOr(BinaryOperator &I) {
2112     IRBuilder<> IRB(&I);
2113     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2114     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2115     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2116     //  1|p => 1;     0|p => p;     p|p => p;
2117     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2118     Value *S1 = getShadow(&I, 0);
2119     Value *S2 = getShadow(&I, 1);
2120     Value *V1 = IRB.CreateNot(I.getOperand(0));
2121     Value *V2 = IRB.CreateNot(I.getOperand(1));
2122     if (V1->getType() != S1->getType()) {
2123       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2124       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2125     }
2126     Value *S1S2 = IRB.CreateAnd(S1, S2);
2127     Value *V1S2 = IRB.CreateAnd(V1, S2);
2128     Value *S1V2 = IRB.CreateAnd(S1, V2);
2129     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2130     setOriginForNaryOp(I);
2131   }
2132 
2133   /// Default propagation of shadow and/or origin.
2134   ///
2135   /// This class implements the general case of shadow propagation, used in all
2136   /// cases where we don't know and/or don't care about what the operation
2137   /// actually does. It converts all input shadow values to a common type
2138   /// (extending or truncating as necessary), and bitwise OR's them.
2139   ///
2140   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2141   /// fully initialized), and less prone to false positives.
2142   ///
2143   /// This class also implements the general case of origin propagation. For a
2144   /// Nary operation, result origin is set to the origin of an argument that is
2145   /// not entirely initialized. If there is more than one such arguments, the
2146   /// rightmost of them is picked. It does not matter which one is picked if all
2147   /// arguments are initialized.
2148   template <bool CombineShadow>
2149   class Combiner {
2150     Value *Shadow = nullptr;
2151     Value *Origin = nullptr;
2152     IRBuilder<> &IRB;
2153     MemorySanitizerVisitor *MSV;
2154 
2155   public:
2156     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2157         : IRB(IRB), MSV(MSV) {}
2158 
2159     /// Add a pair of shadow and origin values to the mix.
2160     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2161       if (CombineShadow) {
2162         assert(OpShadow);
2163         if (!Shadow)
2164           Shadow = OpShadow;
2165         else {
2166           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2167           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2168         }
2169       }
2170 
2171       if (MSV->MS.TrackOrigins) {
2172         assert(OpOrigin);
2173         if (!Origin) {
2174           Origin = OpOrigin;
2175         } else {
2176           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2177           // No point in adding something that might result in 0 origin value.
2178           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2179             Value *FlatShadow = MSV->convertShadowToScalar(OpShadow, IRB);
2180             Value *Cond =
2181                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
2182             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2183           }
2184         }
2185       }
2186       return *this;
2187     }
2188 
2189     /// Add an application value to the mix.
2190     Combiner &Add(Value *V) {
2191       Value *OpShadow = MSV->getShadow(V);
2192       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2193       return Add(OpShadow, OpOrigin);
2194     }
2195 
2196     /// Set the current combined values as the given instruction's shadow
2197     /// and origin.
2198     void Done(Instruction *I) {
2199       if (CombineShadow) {
2200         assert(Shadow);
2201         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2202         MSV->setShadow(I, Shadow);
2203       }
2204       if (MSV->MS.TrackOrigins) {
2205         assert(Origin);
2206         MSV->setOrigin(I, Origin);
2207       }
2208     }
2209   };
2210 
2211   using ShadowAndOriginCombiner = Combiner<true>;
2212   using OriginCombiner = Combiner<false>;
2213 
2214   /// Propagate origin for arbitrary operation.
2215   void setOriginForNaryOp(Instruction &I) {
2216     if (!MS.TrackOrigins) return;
2217     IRBuilder<> IRB(&I);
2218     OriginCombiner OC(this, IRB);
2219     for (Use &Op : I.operands())
2220       OC.Add(Op.get());
2221     OC.Done(&I);
2222   }
2223 
2224   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2225     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2226            "Vector of pointers is not a valid shadow type");
2227     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2228                                   Ty->getScalarSizeInBits()
2229                             : Ty->getPrimitiveSizeInBits();
2230   }
2231 
2232   /// Cast between two shadow types, extending or truncating as
2233   /// necessary.
2234   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2235                           bool Signed = false) {
2236     Type *srcTy = V->getType();
2237     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2238     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2239     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2240       return IRB.CreateICmpNE(V, getCleanShadow(V));
2241 
2242     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2243       return IRB.CreateIntCast(V, dstTy, Signed);
2244     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2245         cast<FixedVectorType>(dstTy)->getNumElements() ==
2246             cast<FixedVectorType>(srcTy)->getNumElements())
2247       return IRB.CreateIntCast(V, dstTy, Signed);
2248     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2249     Value *V2 =
2250       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2251     return IRB.CreateBitCast(V2, dstTy);
2252     // TODO: handle struct types.
2253   }
2254 
2255   /// Cast an application value to the type of its own shadow.
2256   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2257     Type *ShadowTy = getShadowTy(V);
2258     if (V->getType() == ShadowTy)
2259       return V;
2260     if (V->getType()->isPtrOrPtrVectorTy())
2261       return IRB.CreatePtrToInt(V, ShadowTy);
2262     else
2263       return IRB.CreateBitCast(V, ShadowTy);
2264   }
2265 
2266   /// Propagate shadow for arbitrary operation.
2267   void handleShadowOr(Instruction &I) {
2268     IRBuilder<> IRB(&I);
2269     ShadowAndOriginCombiner SC(this, IRB);
2270     for (Use &Op : I.operands())
2271       SC.Add(Op.get());
2272     SC.Done(&I);
2273   }
2274 
2275   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2276 
2277   // Handle multiplication by constant.
2278   //
2279   // Handle a special case of multiplication by constant that may have one or
2280   // more zeros in the lower bits. This makes corresponding number of lower bits
2281   // of the result zero as well. We model it by shifting the other operand
2282   // shadow left by the required number of bits. Effectively, we transform
2283   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2284   // We use multiplication by 2**N instead of shift to cover the case of
2285   // multiplication by 0, which may occur in some elements of a vector operand.
2286   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2287                            Value *OtherArg) {
2288     Constant *ShadowMul;
2289     Type *Ty = ConstArg->getType();
2290     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2291       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2292       Type *EltTy = VTy->getElementType();
2293       SmallVector<Constant *, 16> Elements;
2294       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2295         if (ConstantInt *Elt =
2296                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2297           const APInt &V = Elt->getValue();
2298           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2299           Elements.push_back(ConstantInt::get(EltTy, V2));
2300         } else {
2301           Elements.push_back(ConstantInt::get(EltTy, 1));
2302         }
2303       }
2304       ShadowMul = ConstantVector::get(Elements);
2305     } else {
2306       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2307         const APInt &V = Elt->getValue();
2308         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2309         ShadowMul = ConstantInt::get(Ty, V2);
2310       } else {
2311         ShadowMul = ConstantInt::get(Ty, 1);
2312       }
2313     }
2314 
2315     IRBuilder<> IRB(&I);
2316     setShadow(&I,
2317               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2318     setOrigin(&I, getOrigin(OtherArg));
2319   }
2320 
2321   void visitMul(BinaryOperator &I) {
2322     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2323     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2324     if (constOp0 && !constOp1)
2325       handleMulByConstant(I, constOp0, I.getOperand(1));
2326     else if (constOp1 && !constOp0)
2327       handleMulByConstant(I, constOp1, I.getOperand(0));
2328     else
2329       handleShadowOr(I);
2330   }
2331 
2332   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2333   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2334   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2335   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2336   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2337   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2338 
2339   void handleIntegerDiv(Instruction &I) {
2340     IRBuilder<> IRB(&I);
2341     // Strict on the second argument.
2342     insertShadowCheck(I.getOperand(1), &I);
2343     setShadow(&I, getShadow(&I, 0));
2344     setOrigin(&I, getOrigin(&I, 0));
2345   }
2346 
2347   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2348   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2349   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2350   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2351 
2352   // Floating point division is side-effect free. We can not require that the
2353   // divisor is fully initialized and must propagate shadow. See PR37523.
2354   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2355   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2356 
2357   /// Instrument == and != comparisons.
2358   ///
2359   /// Sometimes the comparison result is known even if some of the bits of the
2360   /// arguments are not.
2361   void handleEqualityComparison(ICmpInst &I) {
2362     IRBuilder<> IRB(&I);
2363     Value *A = I.getOperand(0);
2364     Value *B = I.getOperand(1);
2365     Value *Sa = getShadow(A);
2366     Value *Sb = getShadow(B);
2367 
2368     // Get rid of pointers and vectors of pointers.
2369     // For ints (and vectors of ints), types of A and Sa match,
2370     // and this is a no-op.
2371     A = IRB.CreatePointerCast(A, Sa->getType());
2372     B = IRB.CreatePointerCast(B, Sb->getType());
2373 
2374     // A == B  <==>  (C = A^B) == 0
2375     // A != B  <==>  (C = A^B) != 0
2376     // Sc = Sa | Sb
2377     Value *C = IRB.CreateXor(A, B);
2378     Value *Sc = IRB.CreateOr(Sa, Sb);
2379     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2380     // Result is defined if one of the following is true
2381     // * there is a defined 1 bit in C
2382     // * C is fully defined
2383     // Si = !(C & ~Sc) && Sc
2384     Value *Zero = Constant::getNullValue(Sc->getType());
2385     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2386     Value *Si =
2387       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
2388                     IRB.CreateICmpEQ(
2389                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
2390     Si->setName("_msprop_icmp");
2391     setShadow(&I, Si);
2392     setOriginForNaryOp(I);
2393   }
2394 
2395   /// Build the lowest possible value of V, taking into account V's
2396   ///        uninitialized bits.
2397   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2398                                 bool isSigned) {
2399     if (isSigned) {
2400       // Split shadow into sign bit and other bits.
2401       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2402       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2403       // Maximise the undefined shadow bit, minimize other undefined bits.
2404       return
2405         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
2406     } else {
2407       // Minimize undefined bits.
2408       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2409     }
2410   }
2411 
2412   /// Build the highest possible value of V, taking into account V's
2413   ///        uninitialized bits.
2414   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2415                                 bool isSigned) {
2416     if (isSigned) {
2417       // Split shadow into sign bit and other bits.
2418       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2419       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2420       // Minimise the undefined shadow bit, maximise other undefined bits.
2421       return
2422         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
2423     } else {
2424       // Maximize undefined bits.
2425       return IRB.CreateOr(A, Sa);
2426     }
2427   }
2428 
2429   /// Instrument relational comparisons.
2430   ///
2431   /// This function does exact shadow propagation for all relational
2432   /// comparisons of integers, pointers and vectors of those.
2433   /// FIXME: output seems suboptimal when one of the operands is a constant
2434   void handleRelationalComparisonExact(ICmpInst &I) {
2435     IRBuilder<> IRB(&I);
2436     Value *A = I.getOperand(0);
2437     Value *B = I.getOperand(1);
2438     Value *Sa = getShadow(A);
2439     Value *Sb = getShadow(B);
2440 
2441     // Get rid of pointers and vectors of pointers.
2442     // For ints (and vectors of ints), types of A and Sa match,
2443     // and this is a no-op.
2444     A = IRB.CreatePointerCast(A, Sa->getType());
2445     B = IRB.CreatePointerCast(B, Sb->getType());
2446 
2447     // Let [a0, a1] be the interval of possible values of A, taking into account
2448     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2449     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2450     bool IsSigned = I.isSigned();
2451     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2452                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2453                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2454     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2455                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2456                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2457     Value *Si = IRB.CreateXor(S1, S2);
2458     setShadow(&I, Si);
2459     setOriginForNaryOp(I);
2460   }
2461 
2462   /// Instrument signed relational comparisons.
2463   ///
2464   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2465   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2466   void handleSignedRelationalComparison(ICmpInst &I) {
2467     Constant *constOp;
2468     Value *op = nullptr;
2469     CmpInst::Predicate pre;
2470     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2471       op = I.getOperand(0);
2472       pre = I.getPredicate();
2473     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2474       op = I.getOperand(1);
2475       pre = I.getSwappedPredicate();
2476     } else {
2477       handleShadowOr(I);
2478       return;
2479     }
2480 
2481     if ((constOp->isNullValue() &&
2482          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2483         (constOp->isAllOnesValue() &&
2484          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2485       IRBuilder<> IRB(&I);
2486       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2487                                         "_msprop_icmp_s");
2488       setShadow(&I, Shadow);
2489       setOrigin(&I, getOrigin(op));
2490     } else {
2491       handleShadowOr(I);
2492     }
2493   }
2494 
2495   void visitICmpInst(ICmpInst &I) {
2496     if (!ClHandleICmp) {
2497       handleShadowOr(I);
2498       return;
2499     }
2500     if (I.isEquality()) {
2501       handleEqualityComparison(I);
2502       return;
2503     }
2504 
2505     assert(I.isRelational());
2506     if (ClHandleICmpExact) {
2507       handleRelationalComparisonExact(I);
2508       return;
2509     }
2510     if (I.isSigned()) {
2511       handleSignedRelationalComparison(I);
2512       return;
2513     }
2514 
2515     assert(I.isUnsigned());
2516     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2517       handleRelationalComparisonExact(I);
2518       return;
2519     }
2520 
2521     handleShadowOr(I);
2522   }
2523 
2524   void visitFCmpInst(FCmpInst &I) {
2525     handleShadowOr(I);
2526   }
2527 
2528   void handleShift(BinaryOperator &I) {
2529     IRBuilder<> IRB(&I);
2530     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2531     // Otherwise perform the same shift on S1.
2532     Value *S1 = getShadow(&I, 0);
2533     Value *S2 = getShadow(&I, 1);
2534     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
2535                                    S2->getType());
2536     Value *V2 = I.getOperand(1);
2537     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2538     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2539     setOriginForNaryOp(I);
2540   }
2541 
2542   void visitShl(BinaryOperator &I) { handleShift(I); }
2543   void visitAShr(BinaryOperator &I) { handleShift(I); }
2544   void visitLShr(BinaryOperator &I) { handleShift(I); }
2545 
2546   void handleFunnelShift(IntrinsicInst &I) {
2547     IRBuilder<> IRB(&I);
2548     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2549     // Otherwise perform the same shift on S0 and S1.
2550     Value *S0 = getShadow(&I, 0);
2551     Value *S1 = getShadow(&I, 1);
2552     Value *S2 = getShadow(&I, 2);
2553     Value *S2Conv =
2554         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2555     Value *V2 = I.getOperand(2);
2556     Function *Intrin = Intrinsic::getDeclaration(
2557         I.getModule(), I.getIntrinsicID(), S2Conv->getType());
2558     Value *Shift = IRB.CreateCall(Intrin, {S0, S1, V2});
2559     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2560     setOriginForNaryOp(I);
2561   }
2562 
2563   /// Instrument llvm.memmove
2564   ///
2565   /// At this point we don't know if llvm.memmove will be inlined or not.
2566   /// If we don't instrument it and it gets inlined,
2567   /// our interceptor will not kick in and we will lose the memmove.
2568   /// If we instrument the call here, but it does not get inlined,
2569   /// we will memove the shadow twice: which is bad in case
2570   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2571   ///
2572   /// Similar situation exists for memcpy and memset.
2573   void visitMemMoveInst(MemMoveInst &I) {
2574     IRBuilder<> IRB(&I);
2575     IRB.CreateCall(
2576         MS.MemmoveFn,
2577         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2578          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2579          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2580     I.eraseFromParent();
2581   }
2582 
2583   // Similar to memmove: avoid copying shadow twice.
2584   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2585   // FIXME: consider doing manual inline for small constant sizes and proper
2586   // alignment.
2587   void visitMemCpyInst(MemCpyInst &I) {
2588     IRBuilder<> IRB(&I);
2589     IRB.CreateCall(
2590         MS.MemcpyFn,
2591         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2592          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2593          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2594     I.eraseFromParent();
2595   }
2596 
2597   // Same as memcpy.
2598   void visitMemSetInst(MemSetInst &I) {
2599     IRBuilder<> IRB(&I);
2600     IRB.CreateCall(
2601         MS.MemsetFn,
2602         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2603          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2604          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2605     I.eraseFromParent();
2606   }
2607 
2608   void visitVAStartInst(VAStartInst &I) {
2609     VAHelper->visitVAStartInst(I);
2610   }
2611 
2612   void visitVACopyInst(VACopyInst &I) {
2613     VAHelper->visitVACopyInst(I);
2614   }
2615 
2616   /// Handle vector store-like intrinsics.
2617   ///
2618   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2619   /// has 1 pointer argument and 1 vector argument, returns void.
2620   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2621     IRBuilder<> IRB(&I);
2622     Value* Addr = I.getArgOperand(0);
2623     Value *Shadow = getShadow(&I, 1);
2624     Value *ShadowPtr, *OriginPtr;
2625 
2626     // We don't know the pointer alignment (could be unaligned SSE store!).
2627     // Have to assume to worst case.
2628     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2629         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2630     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2631 
2632     if (ClCheckAccessAddress)
2633       insertShadowCheck(Addr, &I);
2634 
2635     // FIXME: factor out common code from materializeStores
2636     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2637     return true;
2638   }
2639 
2640   /// Handle vector load-like intrinsics.
2641   ///
2642   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2643   /// has 1 pointer argument, returns a vector.
2644   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2645     IRBuilder<> IRB(&I);
2646     Value *Addr = I.getArgOperand(0);
2647 
2648     Type *ShadowTy = getShadowTy(&I);
2649     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2650     if (PropagateShadow) {
2651       // We don't know the pointer alignment (could be unaligned SSE load!).
2652       // Have to assume to worst case.
2653       const Align Alignment = Align(1);
2654       std::tie(ShadowPtr, OriginPtr) =
2655           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2656       setShadow(&I,
2657                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2658     } else {
2659       setShadow(&I, getCleanShadow(&I));
2660     }
2661 
2662     if (ClCheckAccessAddress)
2663       insertShadowCheck(Addr, &I);
2664 
2665     if (MS.TrackOrigins) {
2666       if (PropagateShadow)
2667         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2668       else
2669         setOrigin(&I, getCleanOrigin());
2670     }
2671     return true;
2672   }
2673 
2674   /// Handle (SIMD arithmetic)-like intrinsics.
2675   ///
2676   /// Instrument intrinsics with any number of arguments of the same type,
2677   /// equal to the return type. The type should be simple (no aggregates or
2678   /// pointers; vectors are fine).
2679   /// Caller guarantees that this intrinsic does not access memory.
2680   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2681     Type *RetTy = I.getType();
2682     if (!(RetTy->isIntOrIntVectorTy() ||
2683           RetTy->isFPOrFPVectorTy() ||
2684           RetTy->isX86_MMXTy()))
2685       return false;
2686 
2687     unsigned NumArgOperands = I.arg_size();
2688     for (unsigned i = 0; i < NumArgOperands; ++i) {
2689       Type *Ty = I.getArgOperand(i)->getType();
2690       if (Ty != RetTy)
2691         return false;
2692     }
2693 
2694     IRBuilder<> IRB(&I);
2695     ShadowAndOriginCombiner SC(this, IRB);
2696     for (unsigned i = 0; i < NumArgOperands; ++i)
2697       SC.Add(I.getArgOperand(i));
2698     SC.Done(&I);
2699 
2700     return true;
2701   }
2702 
2703   /// Heuristically instrument unknown intrinsics.
2704   ///
2705   /// The main purpose of this code is to do something reasonable with all
2706   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2707   /// We recognize several classes of intrinsics by their argument types and
2708   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2709   /// sure that we know what the intrinsic does.
2710   ///
2711   /// We special-case intrinsics where this approach fails. See llvm.bswap
2712   /// handling as an example of that.
2713   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2714     unsigned NumArgOperands = I.arg_size();
2715     if (NumArgOperands == 0)
2716       return false;
2717 
2718     if (NumArgOperands == 2 &&
2719         I.getArgOperand(0)->getType()->isPointerTy() &&
2720         I.getArgOperand(1)->getType()->isVectorTy() &&
2721         I.getType()->isVoidTy() &&
2722         !I.onlyReadsMemory()) {
2723       // This looks like a vector store.
2724       return handleVectorStoreIntrinsic(I);
2725     }
2726 
2727     if (NumArgOperands == 1 &&
2728         I.getArgOperand(0)->getType()->isPointerTy() &&
2729         I.getType()->isVectorTy() &&
2730         I.onlyReadsMemory()) {
2731       // This looks like a vector load.
2732       return handleVectorLoadIntrinsic(I);
2733     }
2734 
2735     if (I.doesNotAccessMemory())
2736       if (maybeHandleSimpleNomemIntrinsic(I))
2737         return true;
2738 
2739     // FIXME: detect and handle SSE maskstore/maskload
2740     return false;
2741   }
2742 
2743   void handleInvariantGroup(IntrinsicInst &I) {
2744     setShadow(&I, getShadow(&I, 0));
2745     setOrigin(&I, getOrigin(&I, 0));
2746   }
2747 
2748   void handleLifetimeStart(IntrinsicInst &I) {
2749     if (!PoisonStack)
2750       return;
2751     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
2752     if (!AI)
2753       InstrumentLifetimeStart = false;
2754     LifetimeStartList.push_back(std::make_pair(&I, AI));
2755   }
2756 
2757   void handleBswap(IntrinsicInst &I) {
2758     IRBuilder<> IRB(&I);
2759     Value *Op = I.getArgOperand(0);
2760     Type *OpType = Op->getType();
2761     Function *BswapFunc = Intrinsic::getDeclaration(
2762       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2763     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2764     setOrigin(&I, getOrigin(Op));
2765   }
2766 
2767   // Instrument vector convert intrinsic.
2768   //
2769   // This function instruments intrinsics like cvtsi2ss:
2770   // %Out = int_xxx_cvtyyy(%ConvertOp)
2771   // or
2772   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2773   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2774   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2775   // elements from \p CopyOp.
2776   // In most cases conversion involves floating-point value which may trigger a
2777   // hardware exception when not fully initialized. For this reason we require
2778   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2779   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2780   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2781   // return a fully initialized value.
2782   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
2783                                     bool HasRoundingMode = false) {
2784     IRBuilder<> IRB(&I);
2785     Value *CopyOp, *ConvertOp;
2786 
2787     assert((!HasRoundingMode ||
2788             isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
2789            "Invalid rounding mode");
2790 
2791     switch (I.arg_size() - HasRoundingMode) {
2792     case 2:
2793       CopyOp = I.getArgOperand(0);
2794       ConvertOp = I.getArgOperand(1);
2795       break;
2796     case 1:
2797       ConvertOp = I.getArgOperand(0);
2798       CopyOp = nullptr;
2799       break;
2800     default:
2801       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2802     }
2803 
2804     // The first *NumUsedElements* elements of ConvertOp are converted to the
2805     // same number of output elements. The rest of the output is copied from
2806     // CopyOp, or (if not available) filled with zeroes.
2807     // Combine shadow for elements of ConvertOp that are used in this operation,
2808     // and insert a check.
2809     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2810     // int->any conversion.
2811     Value *ConvertShadow = getShadow(ConvertOp);
2812     Value *AggShadow = nullptr;
2813     if (ConvertOp->getType()->isVectorTy()) {
2814       AggShadow = IRB.CreateExtractElement(
2815           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2816       for (int i = 1; i < NumUsedElements; ++i) {
2817         Value *MoreShadow = IRB.CreateExtractElement(
2818             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2819         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2820       }
2821     } else {
2822       AggShadow = ConvertShadow;
2823     }
2824     assert(AggShadow->getType()->isIntegerTy());
2825     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2826 
2827     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2828     // ConvertOp.
2829     if (CopyOp) {
2830       assert(CopyOp->getType() == I.getType());
2831       assert(CopyOp->getType()->isVectorTy());
2832       Value *ResultShadow = getShadow(CopyOp);
2833       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
2834       for (int i = 0; i < NumUsedElements; ++i) {
2835         ResultShadow = IRB.CreateInsertElement(
2836             ResultShadow, ConstantInt::getNullValue(EltTy),
2837             ConstantInt::get(IRB.getInt32Ty(), i));
2838       }
2839       setShadow(&I, ResultShadow);
2840       setOrigin(&I, getOrigin(CopyOp));
2841     } else {
2842       setShadow(&I, getCleanShadow(&I));
2843       setOrigin(&I, getCleanOrigin());
2844     }
2845   }
2846 
2847   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2848   // zeroes if it is zero, and all ones otherwise.
2849   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2850     if (S->getType()->isVectorTy())
2851       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2852     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2853     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2854     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2855   }
2856 
2857   // Given a vector, extract its first element, and return all
2858   // zeroes if it is zero, and all ones otherwise.
2859   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2860     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2861     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2862     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2863   }
2864 
2865   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2866     Type *T = S->getType();
2867     assert(T->isVectorTy());
2868     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2869     return IRB.CreateSExt(S2, T);
2870   }
2871 
2872   // Instrument vector shift intrinsic.
2873   //
2874   // This function instruments intrinsics like int_x86_avx2_psll_w.
2875   // Intrinsic shifts %In by %ShiftSize bits.
2876   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2877   // size, and the rest is ignored. Behavior is defined even if shift size is
2878   // greater than register (or field) width.
2879   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2880     assert(I.arg_size() == 2);
2881     IRBuilder<> IRB(&I);
2882     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2883     // Otherwise perform the same shift on S1.
2884     Value *S1 = getShadow(&I, 0);
2885     Value *S2 = getShadow(&I, 1);
2886     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2887                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2888     Value *V1 = I.getOperand(0);
2889     Value *V2 = I.getOperand(1);
2890     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2891                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2892     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2893     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2894     setOriginForNaryOp(I);
2895   }
2896 
2897   // Get an X86_MMX-sized vector type.
2898   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2899     const unsigned X86_MMXSizeInBits = 64;
2900     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
2901            "Illegal MMX vector element size");
2902     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2903                                 X86_MMXSizeInBits / EltSizeInBits);
2904   }
2905 
2906   // Returns a signed counterpart for an (un)signed-saturate-and-pack
2907   // intrinsic.
2908   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2909     switch (id) {
2910       case Intrinsic::x86_sse2_packsswb_128:
2911       case Intrinsic::x86_sse2_packuswb_128:
2912         return Intrinsic::x86_sse2_packsswb_128;
2913 
2914       case Intrinsic::x86_sse2_packssdw_128:
2915       case Intrinsic::x86_sse41_packusdw:
2916         return Intrinsic::x86_sse2_packssdw_128;
2917 
2918       case Intrinsic::x86_avx2_packsswb:
2919       case Intrinsic::x86_avx2_packuswb:
2920         return Intrinsic::x86_avx2_packsswb;
2921 
2922       case Intrinsic::x86_avx2_packssdw:
2923       case Intrinsic::x86_avx2_packusdw:
2924         return Intrinsic::x86_avx2_packssdw;
2925 
2926       case Intrinsic::x86_mmx_packsswb:
2927       case Intrinsic::x86_mmx_packuswb:
2928         return Intrinsic::x86_mmx_packsswb;
2929 
2930       case Intrinsic::x86_mmx_packssdw:
2931         return Intrinsic::x86_mmx_packssdw;
2932       default:
2933         llvm_unreachable("unexpected intrinsic id");
2934     }
2935   }
2936 
2937   // Instrument vector pack intrinsic.
2938   //
2939   // This function instruments intrinsics like x86_mmx_packsswb, that
2940   // packs elements of 2 input vectors into half as many bits with saturation.
2941   // Shadow is propagated with the signed variant of the same intrinsic applied
2942   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2943   // EltSizeInBits is used only for x86mmx arguments.
2944   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2945     assert(I.arg_size() == 2);
2946     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2947     IRBuilder<> IRB(&I);
2948     Value *S1 = getShadow(&I, 0);
2949     Value *S2 = getShadow(&I, 1);
2950     assert(isX86_MMX || S1->getType()->isVectorTy());
2951 
2952     // SExt and ICmpNE below must apply to individual elements of input vectors.
2953     // In case of x86mmx arguments, cast them to appropriate vector types and
2954     // back.
2955     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2956     if (isX86_MMX) {
2957       S1 = IRB.CreateBitCast(S1, T);
2958       S2 = IRB.CreateBitCast(S2, T);
2959     }
2960     Value *S1_ext = IRB.CreateSExt(
2961         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2962     Value *S2_ext = IRB.CreateSExt(
2963         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2964     if (isX86_MMX) {
2965       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2966       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2967       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2968     }
2969 
2970     Function *ShadowFn = Intrinsic::getDeclaration(
2971         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2972 
2973     Value *S =
2974         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2975     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2976     setShadow(&I, S);
2977     setOriginForNaryOp(I);
2978   }
2979 
2980   // Instrument sum-of-absolute-differences intrinsic.
2981   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2982     const unsigned SignificantBitsPerResultElement = 16;
2983     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2984     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2985     unsigned ZeroBitsPerResultElement =
2986         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2987 
2988     IRBuilder<> IRB(&I);
2989     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2990     S = IRB.CreateBitCast(S, ResTy);
2991     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2992                        ResTy);
2993     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2994     S = IRB.CreateBitCast(S, getShadowTy(&I));
2995     setShadow(&I, S);
2996     setOriginForNaryOp(I);
2997   }
2998 
2999   // Instrument multiply-add intrinsic.
3000   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
3001                                   unsigned EltSizeInBits = 0) {
3002     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3003     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
3004     IRBuilder<> IRB(&I);
3005     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
3006     S = IRB.CreateBitCast(S, ResTy);
3007     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3008                        ResTy);
3009     S = IRB.CreateBitCast(S, getShadowTy(&I));
3010     setShadow(&I, S);
3011     setOriginForNaryOp(I);
3012   }
3013 
3014   // Instrument compare-packed intrinsic.
3015   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
3016   // all-ones shadow.
3017   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
3018     IRBuilder<> IRB(&I);
3019     Type *ResTy = getShadowTy(&I);
3020     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
3021     Value *S = IRB.CreateSExt(
3022         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
3023     setShadow(&I, S);
3024     setOriginForNaryOp(I);
3025   }
3026 
3027   // Instrument compare-scalar intrinsic.
3028   // This handles both cmp* intrinsics which return the result in the first
3029   // element of a vector, and comi* which return the result as i32.
3030   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
3031     IRBuilder<> IRB(&I);
3032     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
3033     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
3034     setShadow(&I, S);
3035     setOriginForNaryOp(I);
3036   }
3037 
3038   // Instrument generic vector reduction intrinsics
3039   // by ORing together all their fields.
3040   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
3041     IRBuilder<> IRB(&I);
3042     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
3043     setShadow(&I, S);
3044     setOrigin(&I, getOrigin(&I, 0));
3045   }
3046 
3047   // Instrument vector.reduce.or intrinsic.
3048   // Valid (non-poisoned) set bits in the operand pull low the
3049   // corresponding shadow bits.
3050   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
3051     IRBuilder<> IRB(&I);
3052     Value *OperandShadow = getShadow(&I, 0);
3053     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
3054     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
3055     // Bit N is clean if any field's bit N is 1 and unpoison
3056     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
3057     // Otherwise, it is clean if every field's bit N is unpoison
3058     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3059     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3060 
3061     setShadow(&I, S);
3062     setOrigin(&I, getOrigin(&I, 0));
3063   }
3064 
3065   // Instrument vector.reduce.and intrinsic.
3066   // Valid (non-poisoned) unset bits in the operand pull down the
3067   // corresponding shadow bits.
3068   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
3069     IRBuilder<> IRB(&I);
3070     Value *OperandShadow = getShadow(&I, 0);
3071     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
3072     // Bit N is clean if any field's bit N is 0 and unpoison
3073     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
3074     // Otherwise, it is clean if every field's bit N is unpoison
3075     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3076     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3077 
3078     setShadow(&I, S);
3079     setOrigin(&I, getOrigin(&I, 0));
3080   }
3081 
3082   void handleStmxcsr(IntrinsicInst &I) {
3083     IRBuilder<> IRB(&I);
3084     Value* Addr = I.getArgOperand(0);
3085     Type *Ty = IRB.getInt32Ty();
3086     Value *ShadowPtr =
3087         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
3088 
3089     IRB.CreateStore(getCleanShadow(Ty),
3090                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
3091 
3092     if (ClCheckAccessAddress)
3093       insertShadowCheck(Addr, &I);
3094   }
3095 
3096   void handleLdmxcsr(IntrinsicInst &I) {
3097     if (!InsertChecks) return;
3098 
3099     IRBuilder<> IRB(&I);
3100     Value *Addr = I.getArgOperand(0);
3101     Type *Ty = IRB.getInt32Ty();
3102     const Align Alignment = Align(1);
3103     Value *ShadowPtr, *OriginPtr;
3104     std::tie(ShadowPtr, OriginPtr) =
3105         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3106 
3107     if (ClCheckAccessAddress)
3108       insertShadowCheck(Addr, &I);
3109 
3110     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3111     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3112                                     : getCleanOrigin();
3113     insertShadowCheck(Shadow, Origin, &I);
3114   }
3115 
3116   void handleMaskedStore(IntrinsicInst &I) {
3117     IRBuilder<> IRB(&I);
3118     Value *V = I.getArgOperand(0);
3119     Value *Addr = I.getArgOperand(1);
3120     const Align Alignment(
3121         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3122     Value *Mask = I.getArgOperand(3);
3123     Value *Shadow = getShadow(V);
3124 
3125     Value *ShadowPtr;
3126     Value *OriginPtr;
3127     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3128         Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3129 
3130     if (ClCheckAccessAddress) {
3131       insertShadowCheck(Addr, &I);
3132       // Uninitialized mask is kind of like uninitialized address, but not as
3133       // scary.
3134       insertShadowCheck(Mask, &I);
3135     }
3136 
3137     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3138 
3139     if (MS.TrackOrigins) {
3140       auto &DL = F.getParent()->getDataLayout();
3141       paintOrigin(IRB, getOrigin(V), OriginPtr,
3142                   DL.getTypeStoreSize(Shadow->getType()),
3143                   std::max(Alignment, kMinOriginAlignment));
3144     }
3145   }
3146 
3147   bool handleMaskedLoad(IntrinsicInst &I) {
3148     IRBuilder<> IRB(&I);
3149     Value *Addr = I.getArgOperand(0);
3150     const Align Alignment(
3151         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3152     Value *Mask = I.getArgOperand(2);
3153     Value *PassThru = I.getArgOperand(3);
3154 
3155     Type *ShadowTy = getShadowTy(&I);
3156     Value *ShadowPtr, *OriginPtr;
3157     if (PropagateShadow) {
3158       std::tie(ShadowPtr, OriginPtr) =
3159           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3160       setShadow(&I, IRB.CreateMaskedLoad(ShadowTy, ShadowPtr, Alignment, Mask,
3161                                          getShadow(PassThru), "_msmaskedld"));
3162     } else {
3163       setShadow(&I, getCleanShadow(&I));
3164     }
3165 
3166     if (ClCheckAccessAddress) {
3167       insertShadowCheck(Addr, &I);
3168       insertShadowCheck(Mask, &I);
3169     }
3170 
3171     if (MS.TrackOrigins) {
3172       if (PropagateShadow) {
3173         // Choose between PassThru's and the loaded value's origins.
3174         Value *MaskedPassThruShadow = IRB.CreateAnd(
3175             getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3176 
3177         Value *Acc = IRB.CreateExtractElement(
3178             MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3179         for (int i = 1, N = cast<FixedVectorType>(PassThru->getType())
3180                                 ->getNumElements();
3181              i < N; ++i) {
3182           Value *More = IRB.CreateExtractElement(
3183               MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3184           Acc = IRB.CreateOr(Acc, More);
3185         }
3186 
3187         Value *Origin = IRB.CreateSelect(
3188             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
3189             getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
3190 
3191         setOrigin(&I, Origin);
3192       } else {
3193         setOrigin(&I, getCleanOrigin());
3194       }
3195     }
3196     return true;
3197   }
3198 
3199   // Instrument BMI / BMI2 intrinsics.
3200   // All of these intrinsics are Z = I(X, Y)
3201   // where the types of all operands and the result match, and are either i32 or i64.
3202   // The following instrumentation happens to work for all of them:
3203   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3204   void handleBmiIntrinsic(IntrinsicInst &I) {
3205     IRBuilder<> IRB(&I);
3206     Type *ShadowTy = getShadowTy(&I);
3207 
3208     // If any bit of the mask operand is poisoned, then the whole thing is.
3209     Value *SMask = getShadow(&I, 1);
3210     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3211                            ShadowTy);
3212     // Apply the same intrinsic to the shadow of the first operand.
3213     Value *S = IRB.CreateCall(I.getCalledFunction(),
3214                               {getShadow(&I, 0), I.getOperand(1)});
3215     S = IRB.CreateOr(SMask, S);
3216     setShadow(&I, S);
3217     setOriginForNaryOp(I);
3218   }
3219 
3220   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3221     SmallVector<int, 8> Mask;
3222     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3223       Mask.append(2, X);
3224     }
3225     return Mask;
3226   }
3227 
3228   // Instrument pclmul intrinsics.
3229   // These intrinsics operate either on odd or on even elements of the input
3230   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3231   // Replace the unused elements with copies of the used ones, ex:
3232   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3233   // or
3234   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3235   // and then apply the usual shadow combining logic.
3236   void handlePclmulIntrinsic(IntrinsicInst &I) {
3237     IRBuilder<> IRB(&I);
3238     unsigned Width =
3239         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3240     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3241            "pclmul 3rd operand must be a constant");
3242     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3243     Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
3244                                            getPclmulMask(Width, Imm & 0x01));
3245     Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
3246                                            getPclmulMask(Width, Imm & 0x10));
3247     ShadowAndOriginCombiner SOC(this, IRB);
3248     SOC.Add(Shuf0, getOrigin(&I, 0));
3249     SOC.Add(Shuf1, getOrigin(&I, 1));
3250     SOC.Done(&I);
3251   }
3252 
3253   // Instrument _mm_*_sd intrinsics
3254   void handleUnarySdIntrinsic(IntrinsicInst &I) {
3255     IRBuilder<> IRB(&I);
3256     Value *First = getShadow(&I, 0);
3257     Value *Second = getShadow(&I, 1);
3258     // High word of first operand, low word of second
3259     Value *Shadow =
3260         IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
3261 
3262     setShadow(&I, Shadow);
3263     setOriginForNaryOp(I);
3264   }
3265 
3266   void handleBinarySdIntrinsic(IntrinsicInst &I) {
3267     IRBuilder<> IRB(&I);
3268     Value *First = getShadow(&I, 0);
3269     Value *Second = getShadow(&I, 1);
3270     Value *OrShadow = IRB.CreateOr(First, Second);
3271     // High word of first operand, low word of both OR'd together
3272     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
3273                                             llvm::makeArrayRef<int>({2, 1}));
3274 
3275     setShadow(&I, Shadow);
3276     setOriginForNaryOp(I);
3277   }
3278 
3279   // Instrument abs intrinsic.
3280   // handleUnknownIntrinsic can't handle it because of the last
3281   // is_int_min_poison argument which does not match the result type.
3282   void handleAbsIntrinsic(IntrinsicInst &I) {
3283     assert(I.getType()->isIntOrIntVectorTy());
3284     assert(I.getArgOperand(0)->getType() == I.getType());
3285 
3286     // FIXME: Handle is_int_min_poison.
3287     IRBuilder<> IRB(&I);
3288     setShadow(&I, getShadow(&I, 0));
3289     setOrigin(&I, getOrigin(&I, 0));
3290   }
3291 
3292   void visitIntrinsicInst(IntrinsicInst &I) {
3293     switch (I.getIntrinsicID()) {
3294     case Intrinsic::abs:
3295       handleAbsIntrinsic(I);
3296       break;
3297     case Intrinsic::lifetime_start:
3298       handleLifetimeStart(I);
3299       break;
3300     case Intrinsic::launder_invariant_group:
3301     case Intrinsic::strip_invariant_group:
3302       handleInvariantGroup(I);
3303       break;
3304     case Intrinsic::bswap:
3305       handleBswap(I);
3306       break;
3307     case Intrinsic::masked_store:
3308       handleMaskedStore(I);
3309       break;
3310     case Intrinsic::masked_load:
3311       handleMaskedLoad(I);
3312       break;
3313     case Intrinsic::vector_reduce_and:
3314       handleVectorReduceAndIntrinsic(I);
3315       break;
3316     case Intrinsic::vector_reduce_or:
3317       handleVectorReduceOrIntrinsic(I);
3318       break;
3319     case Intrinsic::vector_reduce_add:
3320     case Intrinsic::vector_reduce_xor:
3321     case Intrinsic::vector_reduce_mul:
3322       handleVectorReduceIntrinsic(I);
3323       break;
3324     case Intrinsic::x86_sse_stmxcsr:
3325       handleStmxcsr(I);
3326       break;
3327     case Intrinsic::x86_sse_ldmxcsr:
3328       handleLdmxcsr(I);
3329       break;
3330     case Intrinsic::x86_avx512_vcvtsd2usi64:
3331     case Intrinsic::x86_avx512_vcvtsd2usi32:
3332     case Intrinsic::x86_avx512_vcvtss2usi64:
3333     case Intrinsic::x86_avx512_vcvtss2usi32:
3334     case Intrinsic::x86_avx512_cvttss2usi64:
3335     case Intrinsic::x86_avx512_cvttss2usi:
3336     case Intrinsic::x86_avx512_cvttsd2usi64:
3337     case Intrinsic::x86_avx512_cvttsd2usi:
3338     case Intrinsic::x86_avx512_cvtusi2ss:
3339     case Intrinsic::x86_avx512_cvtusi642sd:
3340     case Intrinsic::x86_avx512_cvtusi642ss:
3341       handleVectorConvertIntrinsic(I, 1, true);
3342       break;
3343     case Intrinsic::x86_sse2_cvtsd2si64:
3344     case Intrinsic::x86_sse2_cvtsd2si:
3345     case Intrinsic::x86_sse2_cvtsd2ss:
3346     case Intrinsic::x86_sse2_cvttsd2si64:
3347     case Intrinsic::x86_sse2_cvttsd2si:
3348     case Intrinsic::x86_sse_cvtss2si64:
3349     case Intrinsic::x86_sse_cvtss2si:
3350     case Intrinsic::x86_sse_cvttss2si64:
3351     case Intrinsic::x86_sse_cvttss2si:
3352       handleVectorConvertIntrinsic(I, 1);
3353       break;
3354     case Intrinsic::x86_sse_cvtps2pi:
3355     case Intrinsic::x86_sse_cvttps2pi:
3356       handleVectorConvertIntrinsic(I, 2);
3357       break;
3358 
3359     case Intrinsic::x86_avx512_psll_w_512:
3360     case Intrinsic::x86_avx512_psll_d_512:
3361     case Intrinsic::x86_avx512_psll_q_512:
3362     case Intrinsic::x86_avx512_pslli_w_512:
3363     case Intrinsic::x86_avx512_pslli_d_512:
3364     case Intrinsic::x86_avx512_pslli_q_512:
3365     case Intrinsic::x86_avx512_psrl_w_512:
3366     case Intrinsic::x86_avx512_psrl_d_512:
3367     case Intrinsic::x86_avx512_psrl_q_512:
3368     case Intrinsic::x86_avx512_psra_w_512:
3369     case Intrinsic::x86_avx512_psra_d_512:
3370     case Intrinsic::x86_avx512_psra_q_512:
3371     case Intrinsic::x86_avx512_psrli_w_512:
3372     case Intrinsic::x86_avx512_psrli_d_512:
3373     case Intrinsic::x86_avx512_psrli_q_512:
3374     case Intrinsic::x86_avx512_psrai_w_512:
3375     case Intrinsic::x86_avx512_psrai_d_512:
3376     case Intrinsic::x86_avx512_psrai_q_512:
3377     case Intrinsic::x86_avx512_psra_q_256:
3378     case Intrinsic::x86_avx512_psra_q_128:
3379     case Intrinsic::x86_avx512_psrai_q_256:
3380     case Intrinsic::x86_avx512_psrai_q_128:
3381     case Intrinsic::x86_avx2_psll_w:
3382     case Intrinsic::x86_avx2_psll_d:
3383     case Intrinsic::x86_avx2_psll_q:
3384     case Intrinsic::x86_avx2_pslli_w:
3385     case Intrinsic::x86_avx2_pslli_d:
3386     case Intrinsic::x86_avx2_pslli_q:
3387     case Intrinsic::x86_avx2_psrl_w:
3388     case Intrinsic::x86_avx2_psrl_d:
3389     case Intrinsic::x86_avx2_psrl_q:
3390     case Intrinsic::x86_avx2_psra_w:
3391     case Intrinsic::x86_avx2_psra_d:
3392     case Intrinsic::x86_avx2_psrli_w:
3393     case Intrinsic::x86_avx2_psrli_d:
3394     case Intrinsic::x86_avx2_psrli_q:
3395     case Intrinsic::x86_avx2_psrai_w:
3396     case Intrinsic::x86_avx2_psrai_d:
3397     case Intrinsic::x86_sse2_psll_w:
3398     case Intrinsic::x86_sse2_psll_d:
3399     case Intrinsic::x86_sse2_psll_q:
3400     case Intrinsic::x86_sse2_pslli_w:
3401     case Intrinsic::x86_sse2_pslli_d:
3402     case Intrinsic::x86_sse2_pslli_q:
3403     case Intrinsic::x86_sse2_psrl_w:
3404     case Intrinsic::x86_sse2_psrl_d:
3405     case Intrinsic::x86_sse2_psrl_q:
3406     case Intrinsic::x86_sse2_psra_w:
3407     case Intrinsic::x86_sse2_psra_d:
3408     case Intrinsic::x86_sse2_psrli_w:
3409     case Intrinsic::x86_sse2_psrli_d:
3410     case Intrinsic::x86_sse2_psrli_q:
3411     case Intrinsic::x86_sse2_psrai_w:
3412     case Intrinsic::x86_sse2_psrai_d:
3413     case Intrinsic::x86_mmx_psll_w:
3414     case Intrinsic::x86_mmx_psll_d:
3415     case Intrinsic::x86_mmx_psll_q:
3416     case Intrinsic::x86_mmx_pslli_w:
3417     case Intrinsic::x86_mmx_pslli_d:
3418     case Intrinsic::x86_mmx_pslli_q:
3419     case Intrinsic::x86_mmx_psrl_w:
3420     case Intrinsic::x86_mmx_psrl_d:
3421     case Intrinsic::x86_mmx_psrl_q:
3422     case Intrinsic::x86_mmx_psra_w:
3423     case Intrinsic::x86_mmx_psra_d:
3424     case Intrinsic::x86_mmx_psrli_w:
3425     case Intrinsic::x86_mmx_psrli_d:
3426     case Intrinsic::x86_mmx_psrli_q:
3427     case Intrinsic::x86_mmx_psrai_w:
3428     case Intrinsic::x86_mmx_psrai_d:
3429       handleVectorShiftIntrinsic(I, /* Variable */ false);
3430       break;
3431     case Intrinsic::x86_avx2_psllv_d:
3432     case Intrinsic::x86_avx2_psllv_d_256:
3433     case Intrinsic::x86_avx512_psllv_d_512:
3434     case Intrinsic::x86_avx2_psllv_q:
3435     case Intrinsic::x86_avx2_psllv_q_256:
3436     case Intrinsic::x86_avx512_psllv_q_512:
3437     case Intrinsic::x86_avx2_psrlv_d:
3438     case Intrinsic::x86_avx2_psrlv_d_256:
3439     case Intrinsic::x86_avx512_psrlv_d_512:
3440     case Intrinsic::x86_avx2_psrlv_q:
3441     case Intrinsic::x86_avx2_psrlv_q_256:
3442     case Intrinsic::x86_avx512_psrlv_q_512:
3443     case Intrinsic::x86_avx2_psrav_d:
3444     case Intrinsic::x86_avx2_psrav_d_256:
3445     case Intrinsic::x86_avx512_psrav_d_512:
3446     case Intrinsic::x86_avx512_psrav_q_128:
3447     case Intrinsic::x86_avx512_psrav_q_256:
3448     case Intrinsic::x86_avx512_psrav_q_512:
3449       handleVectorShiftIntrinsic(I, /* Variable */ true);
3450       break;
3451 
3452     case Intrinsic::x86_sse2_packsswb_128:
3453     case Intrinsic::x86_sse2_packssdw_128:
3454     case Intrinsic::x86_sse2_packuswb_128:
3455     case Intrinsic::x86_sse41_packusdw:
3456     case Intrinsic::x86_avx2_packsswb:
3457     case Intrinsic::x86_avx2_packssdw:
3458     case Intrinsic::x86_avx2_packuswb:
3459     case Intrinsic::x86_avx2_packusdw:
3460       handleVectorPackIntrinsic(I);
3461       break;
3462 
3463     case Intrinsic::x86_mmx_packsswb:
3464     case Intrinsic::x86_mmx_packuswb:
3465       handleVectorPackIntrinsic(I, 16);
3466       break;
3467 
3468     case Intrinsic::x86_mmx_packssdw:
3469       handleVectorPackIntrinsic(I, 32);
3470       break;
3471 
3472     case Intrinsic::x86_mmx_psad_bw:
3473     case Intrinsic::x86_sse2_psad_bw:
3474     case Intrinsic::x86_avx2_psad_bw:
3475       handleVectorSadIntrinsic(I);
3476       break;
3477 
3478     case Intrinsic::x86_sse2_pmadd_wd:
3479     case Intrinsic::x86_avx2_pmadd_wd:
3480     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3481     case Intrinsic::x86_avx2_pmadd_ub_sw:
3482       handleVectorPmaddIntrinsic(I);
3483       break;
3484 
3485     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3486       handleVectorPmaddIntrinsic(I, 8);
3487       break;
3488 
3489     case Intrinsic::x86_mmx_pmadd_wd:
3490       handleVectorPmaddIntrinsic(I, 16);
3491       break;
3492 
3493     case Intrinsic::x86_sse_cmp_ss:
3494     case Intrinsic::x86_sse2_cmp_sd:
3495     case Intrinsic::x86_sse_comieq_ss:
3496     case Intrinsic::x86_sse_comilt_ss:
3497     case Intrinsic::x86_sse_comile_ss:
3498     case Intrinsic::x86_sse_comigt_ss:
3499     case Intrinsic::x86_sse_comige_ss:
3500     case Intrinsic::x86_sse_comineq_ss:
3501     case Intrinsic::x86_sse_ucomieq_ss:
3502     case Intrinsic::x86_sse_ucomilt_ss:
3503     case Intrinsic::x86_sse_ucomile_ss:
3504     case Intrinsic::x86_sse_ucomigt_ss:
3505     case Intrinsic::x86_sse_ucomige_ss:
3506     case Intrinsic::x86_sse_ucomineq_ss:
3507     case Intrinsic::x86_sse2_comieq_sd:
3508     case Intrinsic::x86_sse2_comilt_sd:
3509     case Intrinsic::x86_sse2_comile_sd:
3510     case Intrinsic::x86_sse2_comigt_sd:
3511     case Intrinsic::x86_sse2_comige_sd:
3512     case Intrinsic::x86_sse2_comineq_sd:
3513     case Intrinsic::x86_sse2_ucomieq_sd:
3514     case Intrinsic::x86_sse2_ucomilt_sd:
3515     case Intrinsic::x86_sse2_ucomile_sd:
3516     case Intrinsic::x86_sse2_ucomigt_sd:
3517     case Intrinsic::x86_sse2_ucomige_sd:
3518     case Intrinsic::x86_sse2_ucomineq_sd:
3519       handleVectorCompareScalarIntrinsic(I);
3520       break;
3521 
3522     case Intrinsic::x86_sse_cmp_ps:
3523     case Intrinsic::x86_sse2_cmp_pd:
3524       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
3525       // generates reasonably looking IR that fails in the backend with "Do not
3526       // know how to split the result of this operator!".
3527       handleVectorComparePackedIntrinsic(I);
3528       break;
3529 
3530     case Intrinsic::x86_bmi_bextr_32:
3531     case Intrinsic::x86_bmi_bextr_64:
3532     case Intrinsic::x86_bmi_bzhi_32:
3533     case Intrinsic::x86_bmi_bzhi_64:
3534     case Intrinsic::x86_bmi_pdep_32:
3535     case Intrinsic::x86_bmi_pdep_64:
3536     case Intrinsic::x86_bmi_pext_32:
3537     case Intrinsic::x86_bmi_pext_64:
3538       handleBmiIntrinsic(I);
3539       break;
3540 
3541     case Intrinsic::x86_pclmulqdq:
3542     case Intrinsic::x86_pclmulqdq_256:
3543     case Intrinsic::x86_pclmulqdq_512:
3544       handlePclmulIntrinsic(I);
3545       break;
3546 
3547     case Intrinsic::x86_sse41_round_sd:
3548       handleUnarySdIntrinsic(I);
3549       break;
3550     case Intrinsic::x86_sse2_max_sd:
3551     case Intrinsic::x86_sse2_min_sd:
3552       handleBinarySdIntrinsic(I);
3553       break;
3554 
3555     case Intrinsic::fshl:
3556     case Intrinsic::fshr:
3557       handleFunnelShift(I);
3558       break;
3559 
3560     case Intrinsic::is_constant:
3561       // The result of llvm.is.constant() is always defined.
3562       setShadow(&I, getCleanShadow(&I));
3563       setOrigin(&I, getCleanOrigin());
3564       break;
3565 
3566     default:
3567       if (!handleUnknownIntrinsic(I))
3568         visitInstruction(I);
3569       break;
3570     }
3571   }
3572 
3573   void visitLibAtomicLoad(CallBase &CB) {
3574     // Since we use getNextNode here, we can't have CB terminate the BB.
3575     assert(isa<CallInst>(CB));
3576 
3577     IRBuilder<> IRB(&CB);
3578     Value *Size = CB.getArgOperand(0);
3579     Value *SrcPtr = CB.getArgOperand(1);
3580     Value *DstPtr = CB.getArgOperand(2);
3581     Value *Ordering = CB.getArgOperand(3);
3582     // Convert the call to have at least Acquire ordering to make sure
3583     // the shadow operations aren't reordered before it.
3584     Value *NewOrdering =
3585         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3586     CB.setArgOperand(3, NewOrdering);
3587 
3588     IRBuilder<> NextIRB(CB.getNextNode());
3589     NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3590 
3591     Value *SrcShadowPtr, *SrcOriginPtr;
3592     std::tie(SrcShadowPtr, SrcOriginPtr) =
3593         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3594                            /*isStore*/ false);
3595     Value *DstShadowPtr =
3596         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3597                            /*isStore*/ true)
3598             .first;
3599 
3600     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
3601     if (MS.TrackOrigins) {
3602       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
3603                                                    kMinOriginAlignment);
3604       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
3605       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
3606     }
3607   }
3608 
3609   void visitLibAtomicStore(CallBase &CB) {
3610     IRBuilder<> IRB(&CB);
3611     Value *Size = CB.getArgOperand(0);
3612     Value *DstPtr = CB.getArgOperand(2);
3613     Value *Ordering = CB.getArgOperand(3);
3614     // Convert the call to have at least Release ordering to make sure
3615     // the shadow operations aren't reordered after it.
3616     Value *NewOrdering =
3617         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3618     CB.setArgOperand(3, NewOrdering);
3619 
3620     Value *DstShadowPtr =
3621         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
3622                            /*isStore*/ true)
3623             .first;
3624 
3625     // Atomic store always paints clean shadow/origin. See file header.
3626     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
3627                      Align(1));
3628   }
3629 
3630   void visitCallBase(CallBase &CB) {
3631     assert(!CB.getMetadata("nosanitize"));
3632     if (CB.isInlineAsm()) {
3633       // For inline asm (either a call to asm function, or callbr instruction),
3634       // do the usual thing: check argument shadow and mark all outputs as
3635       // clean. Note that any side effects of the inline asm that are not
3636       // immediately visible in its constraints are not handled.
3637       if (ClHandleAsmConservative && MS.CompileKernel)
3638         visitAsmInstruction(CB);
3639       else
3640         visitInstruction(CB);
3641       return;
3642     }
3643     LibFunc LF;
3644     if (TLI->getLibFunc(CB, LF)) {
3645       // libatomic.a functions need to have special handling because there isn't
3646       // a good way to intercept them or compile the library with
3647       // instrumentation.
3648       switch (LF) {
3649       case LibFunc_atomic_load:
3650         if (!isa<CallInst>(CB)) {
3651           llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
3652                           "Ignoring!\n";
3653           break;
3654         }
3655         visitLibAtomicLoad(CB);
3656         return;
3657       case LibFunc_atomic_store:
3658         visitLibAtomicStore(CB);
3659         return;
3660       default:
3661         break;
3662       }
3663     }
3664 
3665     if (auto *Call = dyn_cast<CallInst>(&CB)) {
3666       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
3667 
3668       // We are going to insert code that relies on the fact that the callee
3669       // will become a non-readonly function after it is instrumented by us. To
3670       // prevent this code from being optimized out, mark that function
3671       // non-readonly in advance.
3672       AttributeMask B;
3673       B.addAttribute(Attribute::ReadOnly)
3674           .addAttribute(Attribute::ReadNone)
3675           .addAttribute(Attribute::WriteOnly)
3676           .addAttribute(Attribute::ArgMemOnly)
3677           .addAttribute(Attribute::Speculatable);
3678 
3679       Call->removeFnAttrs(B);
3680       if (Function *Func = Call->getCalledFunction()) {
3681         Func->removeFnAttrs(B);
3682       }
3683 
3684       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
3685     }
3686     IRBuilder<> IRB(&CB);
3687     bool MayCheckCall = MS.EagerChecks;
3688     if (Function *Func = CB.getCalledFunction()) {
3689       // __sanitizer_unaligned_{load,store} functions may be called by users
3690       // and always expects shadows in the TLS. So don't check them.
3691       MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
3692     }
3693 
3694     unsigned ArgOffset = 0;
3695     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
3696     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
3697          ++ArgIt) {
3698       Value *A = *ArgIt;
3699       unsigned i = ArgIt - CB.arg_begin();
3700       if (!A->getType()->isSized()) {
3701         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
3702         continue;
3703       }
3704       unsigned Size = 0;
3705       Value *Store = nullptr;
3706       // Compute the Shadow for arg even if it is ByVal, because
3707       // in that case getShadow() will copy the actual arg shadow to
3708       // __msan_param_tls.
3709       Value *ArgShadow = getShadow(A);
3710       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
3711       LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
3712                         << " Shadow: " << *ArgShadow << "\n");
3713       bool ArgIsInitialized = false;
3714       const DataLayout &DL = F.getParent()->getDataLayout();
3715 
3716       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
3717       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
3718       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
3719 
3720       if (EagerCheck) {
3721         insertShadowCheck(A, &CB);
3722         Size = DL.getTypeAllocSize(A->getType());
3723       } else {
3724         if (ByVal) {
3725           // ByVal requires some special handling as it's too big for a single
3726           // load
3727           assert(A->getType()->isPointerTy() &&
3728                  "ByVal argument is not a pointer!");
3729           Size = DL.getTypeAllocSize(CB.getParamByValType(i));
3730           if (ArgOffset + Size > kParamTLSSize)
3731             break;
3732           const MaybeAlign ParamAlignment(CB.getParamAlign(i));
3733           MaybeAlign Alignment = llvm::None;
3734           if (ParamAlignment)
3735             Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
3736           Value *AShadowPtr =
3737               getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
3738                                  /*isStore*/ false)
3739                   .first;
3740 
3741           Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
3742                                    Alignment, Size);
3743           // TODO(glider): need to copy origins.
3744         } else {
3745           // Any other parameters mean we need bit-grained tracking of uninit
3746           // data
3747           Size = DL.getTypeAllocSize(A->getType());
3748           if (ArgOffset + Size > kParamTLSSize)
3749             break;
3750           Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
3751                                          kShadowTLSAlignment);
3752           Constant *Cst = dyn_cast<Constant>(ArgShadow);
3753           if (Cst && Cst->isNullValue())
3754             ArgIsInitialized = true;
3755         }
3756         if (MS.TrackOrigins && !ArgIsInitialized)
3757           IRB.CreateStore(getOrigin(A),
3758                           getOriginPtrForArgument(A, IRB, ArgOffset));
3759         (void)Store;
3760         assert(Store != nullptr);
3761         LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
3762       }
3763       assert(Size != 0);
3764       ArgOffset += alignTo(Size, kShadowTLSAlignment);
3765     }
3766     LLVM_DEBUG(dbgs() << "  done with call args\n");
3767 
3768     FunctionType *FT = CB.getFunctionType();
3769     if (FT->isVarArg()) {
3770       VAHelper->visitCallBase(CB, IRB);
3771     }
3772 
3773     // Now, get the shadow for the RetVal.
3774     if (!CB.getType()->isSized())
3775       return;
3776     // Don't emit the epilogue for musttail call returns.
3777     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3778       return;
3779 
3780     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
3781       setShadow(&CB, getCleanShadow(&CB));
3782       setOrigin(&CB, getCleanOrigin());
3783       return;
3784     }
3785 
3786     IRBuilder<> IRBBefore(&CB);
3787     // Until we have full dynamic coverage, make sure the retval shadow is 0.
3788     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
3789     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
3790                                  kShadowTLSAlignment);
3791     BasicBlock::iterator NextInsn;
3792     if (isa<CallInst>(CB)) {
3793       NextInsn = ++CB.getIterator();
3794       assert(NextInsn != CB.getParent()->end());
3795     } else {
3796       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
3797       if (!NormalDest->getSinglePredecessor()) {
3798         // FIXME: this case is tricky, so we are just conservative here.
3799         // Perhaps we need to split the edge between this BB and NormalDest,
3800         // but a naive attempt to use SplitEdge leads to a crash.
3801         setShadow(&CB, getCleanShadow(&CB));
3802         setOrigin(&CB, getCleanOrigin());
3803         return;
3804       }
3805       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
3806       // Anything inserted there will be instrumented by MSan later!
3807       NextInsn = NormalDest->getFirstInsertionPt();
3808       assert(NextInsn != NormalDest->end() &&
3809              "Could not find insertion point for retval shadow load");
3810     }
3811     IRBuilder<> IRBAfter(&*NextInsn);
3812     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
3813         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
3814         kShadowTLSAlignment, "_msret");
3815     setShadow(&CB, RetvalShadow);
3816     if (MS.TrackOrigins)
3817       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
3818                                          getOriginPtrForRetval(IRBAfter)));
3819   }
3820 
3821   bool isAMustTailRetVal(Value *RetVal) {
3822     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
3823       RetVal = I->getOperand(0);
3824     }
3825     if (auto *I = dyn_cast<CallInst>(RetVal)) {
3826       return I->isMustTailCall();
3827     }
3828     return false;
3829   }
3830 
3831   void visitReturnInst(ReturnInst &I) {
3832     IRBuilder<> IRB(&I);
3833     Value *RetVal = I.getReturnValue();
3834     if (!RetVal) return;
3835     // Don't emit the epilogue for musttail call returns.
3836     if (isAMustTailRetVal(RetVal)) return;
3837     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
3838     bool HasNoUndef =
3839         F.hasRetAttribute(Attribute::NoUndef);
3840     bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
3841     // FIXME: Consider using SpecialCaseList to specify a list of functions that
3842     // must always return fully initialized values. For now, we hardcode "main".
3843     bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
3844 
3845     Value *Shadow = getShadow(RetVal);
3846     bool StoreOrigin = true;
3847     if (EagerCheck) {
3848       insertShadowCheck(RetVal, &I);
3849       Shadow = getCleanShadow(RetVal);
3850       StoreOrigin = false;
3851     }
3852 
3853     // The caller may still expect information passed over TLS if we pass our
3854     // check
3855     if (StoreShadow) {
3856       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3857       if (MS.TrackOrigins && StoreOrigin)
3858         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
3859     }
3860   }
3861 
3862   void visitPHINode(PHINode &I) {
3863     IRBuilder<> IRB(&I);
3864     if (!PropagateShadow) {
3865       setShadow(&I, getCleanShadow(&I));
3866       setOrigin(&I, getCleanOrigin());
3867       return;
3868     }
3869 
3870     ShadowPHINodes.push_back(&I);
3871     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
3872                                 "_msphi_s"));
3873     if (MS.TrackOrigins)
3874       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
3875                                   "_msphi_o"));
3876   }
3877 
3878   Value *getLocalVarDescription(AllocaInst &I) {
3879     SmallString<2048> StackDescriptionStorage;
3880     raw_svector_ostream StackDescription(StackDescriptionStorage);
3881     // We create a string with a description of the stack allocation and
3882     // pass it into __msan_set_alloca_origin.
3883     // It will be printed by the run-time if stack-originated UMR is found.
3884     // The first 4 bytes of the string are set to '----' and will be replaced
3885     // by __msan_va_arg_overflow_size_tls at the first call.
3886     StackDescription << "----" << I.getName() << "@" << F.getName();
3887     return createPrivateNonConstGlobalForString(*F.getParent(),
3888                                                 StackDescription.str());
3889   }
3890 
3891   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3892     if (PoisonStack && ClPoisonStackWithCall) {
3893       IRB.CreateCall(MS.MsanPoisonStackFn,
3894                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3895     } else {
3896       Value *ShadowBase, *OriginBase;
3897       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
3898           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
3899 
3900       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
3901       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign());
3902     }
3903 
3904     if (PoisonStack && MS.TrackOrigins) {
3905       Value *Descr = getLocalVarDescription(I);
3906       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
3907                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3908                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
3909                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
3910     }
3911   }
3912 
3913   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3914     Value *Descr = getLocalVarDescription(I);
3915     if (PoisonStack) {
3916       IRB.CreateCall(MS.MsanPoisonAllocaFn,
3917                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3918                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
3919     } else {
3920       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
3921                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3922     }
3923   }
3924 
3925   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
3926     if (!InsPoint)
3927       InsPoint = &I;
3928     IRBuilder<> IRB(InsPoint->getNextNode());
3929     const DataLayout &DL = F.getParent()->getDataLayout();
3930     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
3931     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
3932     if (I.isArrayAllocation())
3933       Len = IRB.CreateMul(Len, I.getArraySize());
3934 
3935     if (MS.CompileKernel)
3936       poisonAllocaKmsan(I, IRB, Len);
3937     else
3938       poisonAllocaUserspace(I, IRB, Len);
3939   }
3940 
3941   void visitAllocaInst(AllocaInst &I) {
3942     setShadow(&I, getCleanShadow(&I));
3943     setOrigin(&I, getCleanOrigin());
3944     // We'll get to this alloca later unless it's poisoned at the corresponding
3945     // llvm.lifetime.start.
3946     AllocaSet.insert(&I);
3947   }
3948 
3949   void visitSelectInst(SelectInst& I) {
3950     IRBuilder<> IRB(&I);
3951     // a = select b, c, d
3952     Value *B = I.getCondition();
3953     Value *C = I.getTrueValue();
3954     Value *D = I.getFalseValue();
3955     Value *Sb = getShadow(B);
3956     Value *Sc = getShadow(C);
3957     Value *Sd = getShadow(D);
3958 
3959     // Result shadow if condition shadow is 0.
3960     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
3961     Value *Sa1;
3962     if (I.getType()->isAggregateType()) {
3963       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
3964       // an extra "select". This results in much more compact IR.
3965       // Sa = select Sb, poisoned, (select b, Sc, Sd)
3966       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
3967     } else {
3968       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
3969       // If Sb (condition is poisoned), look for bits in c and d that are equal
3970       // and both unpoisoned.
3971       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
3972 
3973       // Cast arguments to shadow-compatible type.
3974       C = CreateAppToShadowCast(IRB, C);
3975       D = CreateAppToShadowCast(IRB, D);
3976 
3977       // Result shadow if condition shadow is 1.
3978       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
3979     }
3980     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
3981     setShadow(&I, Sa);
3982     if (MS.TrackOrigins) {
3983       // Origins are always i32, so any vector conditions must be flattened.
3984       // FIXME: consider tracking vector origins for app vectors?
3985       if (B->getType()->isVectorTy()) {
3986         Type *FlatTy = getShadowTyNoVec(B->getType());
3987         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
3988                                 ConstantInt::getNullValue(FlatTy));
3989         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
3990                                       ConstantInt::getNullValue(FlatTy));
3991       }
3992       // a = select b, c, d
3993       // Oa = Sb ? Ob : (b ? Oc : Od)
3994       setOrigin(
3995           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
3996                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
3997                                                 getOrigin(I.getFalseValue()))));
3998     }
3999   }
4000 
4001   void visitLandingPadInst(LandingPadInst &I) {
4002     // Do nothing.
4003     // See https://github.com/google/sanitizers/issues/504
4004     setShadow(&I, getCleanShadow(&I));
4005     setOrigin(&I, getCleanOrigin());
4006   }
4007 
4008   void visitCatchSwitchInst(CatchSwitchInst &I) {
4009     setShadow(&I, getCleanShadow(&I));
4010     setOrigin(&I, getCleanOrigin());
4011   }
4012 
4013   void visitFuncletPadInst(FuncletPadInst &I) {
4014     setShadow(&I, getCleanShadow(&I));
4015     setOrigin(&I, getCleanOrigin());
4016   }
4017 
4018   void visitGetElementPtrInst(GetElementPtrInst &I) {
4019     handleShadowOr(I);
4020   }
4021 
4022   void visitExtractValueInst(ExtractValueInst &I) {
4023     IRBuilder<> IRB(&I);
4024     Value *Agg = I.getAggregateOperand();
4025     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
4026     Value *AggShadow = getShadow(Agg);
4027     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4028     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
4029     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
4030     setShadow(&I, ResShadow);
4031     setOriginForNaryOp(I);
4032   }
4033 
4034   void visitInsertValueInst(InsertValueInst &I) {
4035     IRBuilder<> IRB(&I);
4036     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
4037     Value *AggShadow = getShadow(I.getAggregateOperand());
4038     Value *InsShadow = getShadow(I.getInsertedValueOperand());
4039     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4040     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
4041     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
4042     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
4043     setShadow(&I, Res);
4044     setOriginForNaryOp(I);
4045   }
4046 
4047   void dumpInst(Instruction &I) {
4048     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
4049       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
4050     } else {
4051       errs() << "ZZZ " << I.getOpcodeName() << "\n";
4052     }
4053     errs() << "QQQ " << I << "\n";
4054   }
4055 
4056   void visitResumeInst(ResumeInst &I) {
4057     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
4058     // Nothing to do here.
4059   }
4060 
4061   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
4062     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
4063     // Nothing to do here.
4064   }
4065 
4066   void visitCatchReturnInst(CatchReturnInst &CRI) {
4067     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
4068     // Nothing to do here.
4069   }
4070 
4071   void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
4072                              const DataLayout &DL, bool isOutput) {
4073     // For each assembly argument, we check its value for being initialized.
4074     // If the argument is a pointer, we assume it points to a single element
4075     // of the corresponding type (or to a 8-byte word, if the type is unsized).
4076     // Each such pointer is instrumented with a call to the runtime library.
4077     Type *OpType = Operand->getType();
4078     // Check the operand value itself.
4079     insertShadowCheck(Operand, &I);
4080     if (!OpType->isPointerTy() || !isOutput) {
4081       assert(!isOutput);
4082       return;
4083     }
4084     Type *ElType = OpType->getPointerElementType();
4085     if (!ElType->isSized())
4086       return;
4087     int Size = DL.getTypeStoreSize(ElType);
4088     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
4089     Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
4090     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
4091   }
4092 
4093   /// Get the number of output arguments returned by pointers.
4094   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
4095     int NumRetOutputs = 0;
4096     int NumOutputs = 0;
4097     Type *RetTy = cast<Value>(CB)->getType();
4098     if (!RetTy->isVoidTy()) {
4099       // Register outputs are returned via the CallInst return value.
4100       auto *ST = dyn_cast<StructType>(RetTy);
4101       if (ST)
4102         NumRetOutputs = ST->getNumElements();
4103       else
4104         NumRetOutputs = 1;
4105     }
4106     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
4107     for (const InlineAsm::ConstraintInfo &Info : Constraints) {
4108       switch (Info.Type) {
4109       case InlineAsm::isOutput:
4110         NumOutputs++;
4111         break;
4112       default:
4113         break;
4114       }
4115     }
4116     return NumOutputs - NumRetOutputs;
4117   }
4118 
4119   void visitAsmInstruction(Instruction &I) {
4120     // Conservative inline assembly handling: check for poisoned shadow of
4121     // asm() arguments, then unpoison the result and all the memory locations
4122     // pointed to by those arguments.
4123     // An inline asm() statement in C++ contains lists of input and output
4124     // arguments used by the assembly code. These are mapped to operands of the
4125     // CallInst as follows:
4126     //  - nR register outputs ("=r) are returned by value in a single structure
4127     //  (SSA value of the CallInst);
4128     //  - nO other outputs ("=m" and others) are returned by pointer as first
4129     // nO operands of the CallInst;
4130     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
4131     // remaining nI operands.
4132     // The total number of asm() arguments in the source is nR+nO+nI, and the
4133     // corresponding CallInst has nO+nI+1 operands (the last operand is the
4134     // function to be called).
4135     const DataLayout &DL = F.getParent()->getDataLayout();
4136     CallBase *CB = cast<CallBase>(&I);
4137     IRBuilder<> IRB(&I);
4138     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
4139     int OutputArgs = getNumOutputArgs(IA, CB);
4140     // The last operand of a CallInst is the function itself.
4141     int NumOperands = CB->getNumOperands() - 1;
4142 
4143     // Check input arguments. Doing so before unpoisoning output arguments, so
4144     // that we won't overwrite uninit values before checking them.
4145     for (int i = OutputArgs; i < NumOperands; i++) {
4146       Value *Operand = CB->getOperand(i);
4147       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
4148     }
4149     // Unpoison output arguments. This must happen before the actual InlineAsm
4150     // call, so that the shadow for memory published in the asm() statement
4151     // remains valid.
4152     for (int i = 0; i < OutputArgs; i++) {
4153       Value *Operand = CB->getOperand(i);
4154       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
4155     }
4156 
4157     setShadow(&I, getCleanShadow(&I));
4158     setOrigin(&I, getCleanOrigin());
4159   }
4160 
4161   void visitFreezeInst(FreezeInst &I) {
4162     // Freeze always returns a fully defined value.
4163     setShadow(&I, getCleanShadow(&I));
4164     setOrigin(&I, getCleanOrigin());
4165   }
4166 
4167   void visitInstruction(Instruction &I) {
4168     // Everything else: stop propagating and check for poisoned shadow.
4169     if (ClDumpStrictInstructions)
4170       dumpInst(I);
4171     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
4172     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
4173       Value *Operand = I.getOperand(i);
4174       if (Operand->getType()->isSized())
4175         insertShadowCheck(Operand, &I);
4176     }
4177     setShadow(&I, getCleanShadow(&I));
4178     setOrigin(&I, getCleanOrigin());
4179   }
4180 };
4181 
4182 /// AMD64-specific implementation of VarArgHelper.
4183 struct VarArgAMD64Helper : public VarArgHelper {
4184   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
4185   // See a comment in visitCallBase for more details.
4186   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
4187   static const unsigned AMD64FpEndOffsetSSE = 176;
4188   // If SSE is disabled, fp_offset in va_list is zero.
4189   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
4190 
4191   unsigned AMD64FpEndOffset;
4192   Function &F;
4193   MemorySanitizer &MS;
4194   MemorySanitizerVisitor &MSV;
4195   Value *VAArgTLSCopy = nullptr;
4196   Value *VAArgTLSOriginCopy = nullptr;
4197   Value *VAArgOverflowSize = nullptr;
4198 
4199   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4200 
4201   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4202 
4203   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
4204                     MemorySanitizerVisitor &MSV)
4205       : F(F), MS(MS), MSV(MSV) {
4206     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
4207     for (const auto &Attr : F.getAttributes().getFnAttrs()) {
4208       if (Attr.isStringAttribute() &&
4209           (Attr.getKindAsString() == "target-features")) {
4210         if (Attr.getValueAsString().contains("-sse"))
4211           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
4212         break;
4213       }
4214     }
4215   }
4216 
4217   ArgKind classifyArgument(Value* arg) {
4218     // A very rough approximation of X86_64 argument classification rules.
4219     Type *T = arg->getType();
4220     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
4221       return AK_FloatingPoint;
4222     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4223       return AK_GeneralPurpose;
4224     if (T->isPointerTy())
4225       return AK_GeneralPurpose;
4226     return AK_Memory;
4227   }
4228 
4229   // For VarArg functions, store the argument shadow in an ABI-specific format
4230   // that corresponds to va_list layout.
4231   // We do this because Clang lowers va_arg in the frontend, and this pass
4232   // only sees the low level code that deals with va_list internals.
4233   // A much easier alternative (provided that Clang emits va_arg instructions)
4234   // would have been to associate each live instance of va_list with a copy of
4235   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
4236   // order.
4237   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4238     unsigned GpOffset = 0;
4239     unsigned FpOffset = AMD64GpEndOffset;
4240     unsigned OverflowOffset = AMD64FpEndOffset;
4241     const DataLayout &DL = F.getParent()->getDataLayout();
4242     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4243          ++ArgIt) {
4244       Value *A = *ArgIt;
4245       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4246       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4247       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4248       if (IsByVal) {
4249         // ByVal arguments always go to the overflow area.
4250         // Fixed arguments passed through the overflow area will be stepped
4251         // over by va_start, so don't count them towards the offset.
4252         if (IsFixed)
4253           continue;
4254         assert(A->getType()->isPointerTy());
4255         Type *RealTy = CB.getParamByValType(ArgNo);
4256         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4257         Value *ShadowBase = getShadowPtrForVAArgument(
4258             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
4259         Value *OriginBase = nullptr;
4260         if (MS.TrackOrigins)
4261           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4262         OverflowOffset += alignTo(ArgSize, 8);
4263         if (!ShadowBase)
4264           continue;
4265         Value *ShadowPtr, *OriginPtr;
4266         std::tie(ShadowPtr, OriginPtr) =
4267             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4268                                    /*isStore*/ false);
4269 
4270         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4271                          kShadowTLSAlignment, ArgSize);
4272         if (MS.TrackOrigins)
4273           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4274                            kShadowTLSAlignment, ArgSize);
4275       } else {
4276         ArgKind AK = classifyArgument(A);
4277         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4278           AK = AK_Memory;
4279         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4280           AK = AK_Memory;
4281         Value *ShadowBase, *OriginBase = nullptr;
4282         switch (AK) {
4283           case AK_GeneralPurpose:
4284             ShadowBase =
4285                 getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4286             if (MS.TrackOrigins)
4287               OriginBase =
4288                   getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4289             GpOffset += 8;
4290             break;
4291           case AK_FloatingPoint:
4292             ShadowBase =
4293                 getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4294             if (MS.TrackOrigins)
4295               OriginBase =
4296                   getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4297             FpOffset += 16;
4298             break;
4299           case AK_Memory:
4300             if (IsFixed)
4301               continue;
4302             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4303             ShadowBase =
4304                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4305             if (MS.TrackOrigins)
4306               OriginBase =
4307                   getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4308             OverflowOffset += alignTo(ArgSize, 8);
4309         }
4310         // Take fixed arguments into account for GpOffset and FpOffset,
4311         // but don't actually store shadows for them.
4312         // TODO(glider): don't call get*PtrForVAArgument() for them.
4313         if (IsFixed)
4314           continue;
4315         if (!ShadowBase)
4316           continue;
4317         Value *Shadow = MSV.getShadow(A);
4318         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4319         if (MS.TrackOrigins) {
4320           Value *Origin = MSV.getOrigin(A);
4321           unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4322           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4323                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4324         }
4325       }
4326     }
4327     Constant *OverflowSize =
4328       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4329     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4330   }
4331 
4332   /// Compute the shadow address for a given va_arg.
4333   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4334                                    unsigned ArgOffset, unsigned ArgSize) {
4335     // Make sure we don't overflow __msan_va_arg_tls.
4336     if (ArgOffset + ArgSize > kParamTLSSize)
4337       return nullptr;
4338     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4339     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4340     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4341                               "_msarg_va_s");
4342   }
4343 
4344   /// Compute the origin address for a given va_arg.
4345   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4346     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4347     // getOriginPtrForVAArgument() is always called after
4348     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4349     // overflow.
4350     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4351     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4352                               "_msarg_va_o");
4353   }
4354 
4355   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4356     IRBuilder<> IRB(&I);
4357     Value *VAListTag = I.getArgOperand(0);
4358     Value *ShadowPtr, *OriginPtr;
4359     const Align Alignment = Align(8);
4360     std::tie(ShadowPtr, OriginPtr) =
4361         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4362                                /*isStore*/ true);
4363 
4364     // Unpoison the whole __va_list_tag.
4365     // FIXME: magic ABI constants.
4366     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4367                      /* size */ 24, Alignment, false);
4368     // We shouldn't need to zero out the origins, as they're only checked for
4369     // nonzero shadow.
4370   }
4371 
4372   void visitVAStartInst(VAStartInst &I) override {
4373     if (F.getCallingConv() == CallingConv::Win64)
4374       return;
4375     VAStartInstrumentationList.push_back(&I);
4376     unpoisonVAListTagForInst(I);
4377   }
4378 
4379   void visitVACopyInst(VACopyInst &I) override {
4380     if (F.getCallingConv() == CallingConv::Win64) return;
4381     unpoisonVAListTagForInst(I);
4382   }
4383 
4384   void finalizeInstrumentation() override {
4385     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4386            "finalizeInstrumentation called twice");
4387     if (!VAStartInstrumentationList.empty()) {
4388       // If there is a va_start in this function, make a backup copy of
4389       // va_arg_tls somewhere in the function entry block.
4390       IRBuilder<> IRB(MSV.FnPrologueEnd);
4391       VAArgOverflowSize =
4392           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4393       Value *CopySize =
4394         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
4395                       VAArgOverflowSize);
4396       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4397       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4398       if (MS.TrackOrigins) {
4399         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4400         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
4401                          Align(8), CopySize);
4402       }
4403     }
4404 
4405     // Instrument va_start.
4406     // Copy va_list shadow from the backup copy of the TLS contents.
4407     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4408       CallInst *OrigInst = VAStartInstrumentationList[i];
4409       IRBuilder<> IRB(OrigInst->getNextNode());
4410       Value *VAListTag = OrigInst->getArgOperand(0);
4411 
4412       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4413       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4414           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4415                         ConstantInt::get(MS.IntptrTy, 16)),
4416           PointerType::get(RegSaveAreaPtrTy, 0));
4417       Value *RegSaveAreaPtr =
4418           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4419       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4420       const Align Alignment = Align(16);
4421       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4422           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4423                                  Alignment, /*isStore*/ true);
4424       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4425                        AMD64FpEndOffset);
4426       if (MS.TrackOrigins)
4427         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4428                          Alignment, AMD64FpEndOffset);
4429       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4430       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4431           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4432                         ConstantInt::get(MS.IntptrTy, 8)),
4433           PointerType::get(OverflowArgAreaPtrTy, 0));
4434       Value *OverflowArgAreaPtr =
4435           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4436       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4437       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4438           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4439                                  Alignment, /*isStore*/ true);
4440       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4441                                              AMD64FpEndOffset);
4442       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4443                        VAArgOverflowSize);
4444       if (MS.TrackOrigins) {
4445         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4446                                         AMD64FpEndOffset);
4447         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4448                          VAArgOverflowSize);
4449       }
4450     }
4451   }
4452 };
4453 
4454 /// MIPS64-specific implementation of VarArgHelper.
4455 struct VarArgMIPS64Helper : public VarArgHelper {
4456   Function &F;
4457   MemorySanitizer &MS;
4458   MemorySanitizerVisitor &MSV;
4459   Value *VAArgTLSCopy = nullptr;
4460   Value *VAArgSize = nullptr;
4461 
4462   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4463 
4464   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4465                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4466 
4467   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4468     unsigned VAArgOffset = 0;
4469     const DataLayout &DL = F.getParent()->getDataLayout();
4470     for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
4471               End = CB.arg_end();
4472          ArgIt != End; ++ArgIt) {
4473       Triple TargetTriple(F.getParent()->getTargetTriple());
4474       Value *A = *ArgIt;
4475       Value *Base;
4476       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4477       if (TargetTriple.getArch() == Triple::mips64) {
4478         // Adjusting the shadow for argument with size < 8 to match the placement
4479         // of bits in big endian system
4480         if (ArgSize < 8)
4481           VAArgOffset += (8 - ArgSize);
4482       }
4483       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4484       VAArgOffset += ArgSize;
4485       VAArgOffset = alignTo(VAArgOffset, 8);
4486       if (!Base)
4487         continue;
4488       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4489     }
4490 
4491     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4492     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4493     // a new class member i.e. it is the total size of all VarArgs.
4494     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4495   }
4496 
4497   /// Compute the shadow address for a given va_arg.
4498   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4499                                    unsigned ArgOffset, unsigned ArgSize) {
4500     // Make sure we don't overflow __msan_va_arg_tls.
4501     if (ArgOffset + ArgSize > kParamTLSSize)
4502       return nullptr;
4503     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4504     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4505     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4506                               "_msarg");
4507   }
4508 
4509   void visitVAStartInst(VAStartInst &I) override {
4510     IRBuilder<> IRB(&I);
4511     VAStartInstrumentationList.push_back(&I);
4512     Value *VAListTag = I.getArgOperand(0);
4513     Value *ShadowPtr, *OriginPtr;
4514     const Align Alignment = Align(8);
4515     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4516         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4517     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4518                      /* size */ 8, Alignment, false);
4519   }
4520 
4521   void visitVACopyInst(VACopyInst &I) override {
4522     IRBuilder<> IRB(&I);
4523     VAStartInstrumentationList.push_back(&I);
4524     Value *VAListTag = I.getArgOperand(0);
4525     Value *ShadowPtr, *OriginPtr;
4526     const Align Alignment = Align(8);
4527     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4528         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4529     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4530                      /* size */ 8, Alignment, false);
4531   }
4532 
4533   void finalizeInstrumentation() override {
4534     assert(!VAArgSize && !VAArgTLSCopy &&
4535            "finalizeInstrumentation called twice");
4536     IRBuilder<> IRB(MSV.FnPrologueEnd);
4537     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4538     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4539                                     VAArgSize);
4540 
4541     if (!VAStartInstrumentationList.empty()) {
4542       // If there is a va_start in this function, make a backup copy of
4543       // va_arg_tls somewhere in the function entry block.
4544       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4545       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4546     }
4547 
4548     // Instrument va_start.
4549     // Copy va_list shadow from the backup copy of the TLS contents.
4550     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4551       CallInst *OrigInst = VAStartInstrumentationList[i];
4552       IRBuilder<> IRB(OrigInst->getNextNode());
4553       Value *VAListTag = OrigInst->getArgOperand(0);
4554       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4555       Value *RegSaveAreaPtrPtr =
4556           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4557                              PointerType::get(RegSaveAreaPtrTy, 0));
4558       Value *RegSaveAreaPtr =
4559           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4560       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4561       const Align Alignment = Align(8);
4562       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4563           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4564                                  Alignment, /*isStore*/ true);
4565       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4566                        CopySize);
4567     }
4568   }
4569 };
4570 
4571 /// AArch64-specific implementation of VarArgHelper.
4572 struct VarArgAArch64Helper : public VarArgHelper {
4573   static const unsigned kAArch64GrArgSize = 64;
4574   static const unsigned kAArch64VrArgSize = 128;
4575 
4576   static const unsigned AArch64GrBegOffset = 0;
4577   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
4578   // Make VR space aligned to 16 bytes.
4579   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
4580   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
4581                                              + kAArch64VrArgSize;
4582   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
4583 
4584   Function &F;
4585   MemorySanitizer &MS;
4586   MemorySanitizerVisitor &MSV;
4587   Value *VAArgTLSCopy = nullptr;
4588   Value *VAArgOverflowSize = nullptr;
4589 
4590   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4591 
4592   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4593 
4594   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
4595                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4596 
4597   ArgKind classifyArgument(Value* arg) {
4598     Type *T = arg->getType();
4599     if (T->isFPOrFPVectorTy())
4600       return AK_FloatingPoint;
4601     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4602         || (T->isPointerTy()))
4603       return AK_GeneralPurpose;
4604     return AK_Memory;
4605   }
4606 
4607   // The instrumentation stores the argument shadow in a non ABI-specific
4608   // format because it does not know which argument is named (since Clang,
4609   // like x86_64 case, lowers the va_args in the frontend and this pass only
4610   // sees the low level code that deals with va_list internals).
4611   // The first seven GR registers are saved in the first 56 bytes of the
4612   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
4613   // the remaining arguments.
4614   // Using constant offset within the va_arg TLS array allows fast copy
4615   // in the finalize instrumentation.
4616   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4617     unsigned GrOffset = AArch64GrBegOffset;
4618     unsigned VrOffset = AArch64VrBegOffset;
4619     unsigned OverflowOffset = AArch64VAEndOffset;
4620 
4621     const DataLayout &DL = F.getParent()->getDataLayout();
4622     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4623          ++ArgIt) {
4624       Value *A = *ArgIt;
4625       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4626       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4627       ArgKind AK = classifyArgument(A);
4628       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
4629         AK = AK_Memory;
4630       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
4631         AK = AK_Memory;
4632       Value *Base;
4633       switch (AK) {
4634         case AK_GeneralPurpose:
4635           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
4636           GrOffset += 8;
4637           break;
4638         case AK_FloatingPoint:
4639           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
4640           VrOffset += 16;
4641           break;
4642         case AK_Memory:
4643           // Don't count fixed arguments in the overflow area - va_start will
4644           // skip right over them.
4645           if (IsFixed)
4646             continue;
4647           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4648           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
4649                                            alignTo(ArgSize, 8));
4650           OverflowOffset += alignTo(ArgSize, 8);
4651           break;
4652       }
4653       // Count Gp/Vr fixed arguments to their respective offsets, but don't
4654       // bother to actually store a shadow.
4655       if (IsFixed)
4656         continue;
4657       if (!Base)
4658         continue;
4659       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4660     }
4661     Constant *OverflowSize =
4662       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
4663     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4664   }
4665 
4666   /// Compute the shadow address for a given va_arg.
4667   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4668                                    unsigned ArgOffset, unsigned ArgSize) {
4669     // Make sure we don't overflow __msan_va_arg_tls.
4670     if (ArgOffset + ArgSize > kParamTLSSize)
4671       return nullptr;
4672     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4673     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4674     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4675                               "_msarg");
4676   }
4677 
4678   void visitVAStartInst(VAStartInst &I) override {
4679     IRBuilder<> IRB(&I);
4680     VAStartInstrumentationList.push_back(&I);
4681     Value *VAListTag = I.getArgOperand(0);
4682     Value *ShadowPtr, *OriginPtr;
4683     const Align Alignment = Align(8);
4684     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4685         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4686     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4687                      /* size */ 32, Alignment, false);
4688   }
4689 
4690   void visitVACopyInst(VACopyInst &I) override {
4691     IRBuilder<> IRB(&I);
4692     VAStartInstrumentationList.push_back(&I);
4693     Value *VAListTag = I.getArgOperand(0);
4694     Value *ShadowPtr, *OriginPtr;
4695     const Align Alignment = Align(8);
4696     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4697         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4698     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4699                      /* size */ 32, Alignment, false);
4700   }
4701 
4702   // Retrieve a va_list field of 'void*' size.
4703   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4704     Value *SaveAreaPtrPtr =
4705       IRB.CreateIntToPtr(
4706         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4707                       ConstantInt::get(MS.IntptrTy, offset)),
4708         Type::getInt64PtrTy(*MS.C));
4709     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
4710   }
4711 
4712   // Retrieve a va_list field of 'int' size.
4713   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4714     Value *SaveAreaPtr =
4715       IRB.CreateIntToPtr(
4716         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4717                       ConstantInt::get(MS.IntptrTy, offset)),
4718         Type::getInt32PtrTy(*MS.C));
4719     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
4720     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
4721   }
4722 
4723   void finalizeInstrumentation() override {
4724     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4725            "finalizeInstrumentation called twice");
4726     if (!VAStartInstrumentationList.empty()) {
4727       // If there is a va_start in this function, make a backup copy of
4728       // va_arg_tls somewhere in the function entry block.
4729       IRBuilder<> IRB(MSV.FnPrologueEnd);
4730       VAArgOverflowSize =
4731           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4732       Value *CopySize =
4733         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
4734                       VAArgOverflowSize);
4735       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4736       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4737     }
4738 
4739     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
4740     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
4741 
4742     // Instrument va_start, copy va_list shadow from the backup copy of
4743     // the TLS contents.
4744     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4745       CallInst *OrigInst = VAStartInstrumentationList[i];
4746       IRBuilder<> IRB(OrigInst->getNextNode());
4747 
4748       Value *VAListTag = OrigInst->getArgOperand(0);
4749 
4750       // The variadic ABI for AArch64 creates two areas to save the incoming
4751       // argument registers (one for 64-bit general register xn-x7 and another
4752       // for 128-bit FP/SIMD vn-v7).
4753       // We need then to propagate the shadow arguments on both regions
4754       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
4755       // The remaining arguments are saved on shadow for 'va::stack'.
4756       // One caveat is it requires only to propagate the non-named arguments,
4757       // however on the call site instrumentation 'all' the arguments are
4758       // saved. So to copy the shadow values from the va_arg TLS array
4759       // we need to adjust the offset for both GR and VR fields based on
4760       // the __{gr,vr}_offs value (since they are stores based on incoming
4761       // named arguments).
4762 
4763       // Read the stack pointer from the va_list.
4764       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
4765 
4766       // Read both the __gr_top and __gr_off and add them up.
4767       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
4768       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
4769 
4770       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
4771 
4772       // Read both the __vr_top and __vr_off and add them up.
4773       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
4774       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
4775 
4776       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
4777 
4778       // It does not know how many named arguments is being used and, on the
4779       // callsite all the arguments were saved.  Since __gr_off is defined as
4780       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
4781       // argument by ignoring the bytes of shadow from named arguments.
4782       Value *GrRegSaveAreaShadowPtrOff =
4783         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
4784 
4785       Value *GrRegSaveAreaShadowPtr =
4786           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4787                                  Align(8), /*isStore*/ true)
4788               .first;
4789 
4790       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4791                                               GrRegSaveAreaShadowPtrOff);
4792       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
4793 
4794       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
4795                        GrCopySize);
4796 
4797       // Again, but for FP/SIMD values.
4798       Value *VrRegSaveAreaShadowPtrOff =
4799           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
4800 
4801       Value *VrRegSaveAreaShadowPtr =
4802           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4803                                  Align(8), /*isStore*/ true)
4804               .first;
4805 
4806       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
4807         IRB.getInt8Ty(),
4808         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4809                               IRB.getInt32(AArch64VrBegOffset)),
4810         VrRegSaveAreaShadowPtrOff);
4811       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
4812 
4813       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
4814                        VrCopySize);
4815 
4816       // And finally for remaining arguments.
4817       Value *StackSaveAreaShadowPtr =
4818           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
4819                                  Align(16), /*isStore*/ true)
4820               .first;
4821 
4822       Value *StackSrcPtr =
4823         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4824                               IRB.getInt32(AArch64VAEndOffset));
4825 
4826       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
4827                        Align(16), VAArgOverflowSize);
4828     }
4829   }
4830 };
4831 
4832 /// PowerPC64-specific implementation of VarArgHelper.
4833 struct VarArgPowerPC64Helper : public VarArgHelper {
4834   Function &F;
4835   MemorySanitizer &MS;
4836   MemorySanitizerVisitor &MSV;
4837   Value *VAArgTLSCopy = nullptr;
4838   Value *VAArgSize = nullptr;
4839 
4840   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4841 
4842   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
4843                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4844 
4845   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4846     // For PowerPC, we need to deal with alignment of stack arguments -
4847     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
4848     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
4849     // For that reason, we compute current offset from stack pointer (which is
4850     // always properly aligned), and offset for the first vararg, then subtract
4851     // them.
4852     unsigned VAArgBase;
4853     Triple TargetTriple(F.getParent()->getTargetTriple());
4854     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
4855     // and 32 bytes for ABIv2.  This is usually determined by target
4856     // endianness, but in theory could be overridden by function attribute.
4857     if (TargetTriple.getArch() == Triple::ppc64)
4858       VAArgBase = 48;
4859     else
4860       VAArgBase = 32;
4861     unsigned VAArgOffset = VAArgBase;
4862     const DataLayout &DL = F.getParent()->getDataLayout();
4863     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4864          ++ArgIt) {
4865       Value *A = *ArgIt;
4866       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4867       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4868       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4869       if (IsByVal) {
4870         assert(A->getType()->isPointerTy());
4871         Type *RealTy = CB.getParamByValType(ArgNo);
4872         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4873         MaybeAlign ArgAlign = CB.getParamAlign(ArgNo);
4874         if (!ArgAlign || *ArgAlign < Align(8))
4875           ArgAlign = Align(8);
4876         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4877         if (!IsFixed) {
4878           Value *Base = getShadowPtrForVAArgument(
4879               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
4880           if (Base) {
4881             Value *AShadowPtr, *AOriginPtr;
4882             std::tie(AShadowPtr, AOriginPtr) =
4883                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
4884                                        kShadowTLSAlignment, /*isStore*/ false);
4885 
4886             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
4887                              kShadowTLSAlignment, ArgSize);
4888           }
4889         }
4890         VAArgOffset += alignTo(ArgSize, 8);
4891       } else {
4892         Value *Base;
4893         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4894         uint64_t ArgAlign = 8;
4895         if (A->getType()->isArrayTy()) {
4896           // Arrays are aligned to element size, except for long double
4897           // arrays, which are aligned to 8 bytes.
4898           Type *ElementTy = A->getType()->getArrayElementType();
4899           if (!ElementTy->isPPC_FP128Ty())
4900             ArgAlign = DL.getTypeAllocSize(ElementTy);
4901         } else if (A->getType()->isVectorTy()) {
4902           // Vectors are naturally aligned.
4903           ArgAlign = DL.getTypeAllocSize(A->getType());
4904         }
4905         if (ArgAlign < 8)
4906           ArgAlign = 8;
4907         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4908         if (DL.isBigEndian()) {
4909           // Adjusting the shadow for argument with size < 8 to match the placement
4910           // of bits in big endian system
4911           if (ArgSize < 8)
4912             VAArgOffset += (8 - ArgSize);
4913         }
4914         if (!IsFixed) {
4915           Base = getShadowPtrForVAArgument(A->getType(), IRB,
4916                                            VAArgOffset - VAArgBase, ArgSize);
4917           if (Base)
4918             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4919         }
4920         VAArgOffset += ArgSize;
4921         VAArgOffset = alignTo(VAArgOffset, 8);
4922       }
4923       if (IsFixed)
4924         VAArgBase = VAArgOffset;
4925     }
4926 
4927     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
4928                                                 VAArgOffset - VAArgBase);
4929     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4930     // a new class member i.e. it is the total size of all VarArgs.
4931     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4932   }
4933 
4934   /// Compute the shadow address for a given va_arg.
4935   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4936                                    unsigned ArgOffset, unsigned ArgSize) {
4937     // Make sure we don't overflow __msan_va_arg_tls.
4938     if (ArgOffset + ArgSize > kParamTLSSize)
4939       return nullptr;
4940     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4941     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4942     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4943                               "_msarg");
4944   }
4945 
4946   void visitVAStartInst(VAStartInst &I) override {
4947     IRBuilder<> IRB(&I);
4948     VAStartInstrumentationList.push_back(&I);
4949     Value *VAListTag = I.getArgOperand(0);
4950     Value *ShadowPtr, *OriginPtr;
4951     const Align Alignment = Align(8);
4952     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4953         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4954     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4955                      /* size */ 8, Alignment, false);
4956   }
4957 
4958   void visitVACopyInst(VACopyInst &I) override {
4959     IRBuilder<> IRB(&I);
4960     Value *VAListTag = I.getArgOperand(0);
4961     Value *ShadowPtr, *OriginPtr;
4962     const Align Alignment = Align(8);
4963     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4964         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4965     // Unpoison the whole __va_list_tag.
4966     // FIXME: magic ABI constants.
4967     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4968                      /* size */ 8, Alignment, false);
4969   }
4970 
4971   void finalizeInstrumentation() override {
4972     assert(!VAArgSize && !VAArgTLSCopy &&
4973            "finalizeInstrumentation called twice");
4974     IRBuilder<> IRB(MSV.FnPrologueEnd);
4975     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4976     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4977                                     VAArgSize);
4978 
4979     if (!VAStartInstrumentationList.empty()) {
4980       // If there is a va_start in this function, make a backup copy of
4981       // va_arg_tls somewhere in the function entry block.
4982       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4983       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4984     }
4985 
4986     // Instrument va_start.
4987     // Copy va_list shadow from the backup copy of the TLS contents.
4988     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4989       CallInst *OrigInst = VAStartInstrumentationList[i];
4990       IRBuilder<> IRB(OrigInst->getNextNode());
4991       Value *VAListTag = OrigInst->getArgOperand(0);
4992       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4993       Value *RegSaveAreaPtrPtr =
4994           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4995                              PointerType::get(RegSaveAreaPtrTy, 0));
4996       Value *RegSaveAreaPtr =
4997           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4998       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4999       const Align Alignment = Align(8);
5000       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5001           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5002                                  Alignment, /*isStore*/ true);
5003       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5004                        CopySize);
5005     }
5006   }
5007 };
5008 
5009 /// SystemZ-specific implementation of VarArgHelper.
5010 struct VarArgSystemZHelper : public VarArgHelper {
5011   static const unsigned SystemZGpOffset = 16;
5012   static const unsigned SystemZGpEndOffset = 56;
5013   static const unsigned SystemZFpOffset = 128;
5014   static const unsigned SystemZFpEndOffset = 160;
5015   static const unsigned SystemZMaxVrArgs = 8;
5016   static const unsigned SystemZRegSaveAreaSize = 160;
5017   static const unsigned SystemZOverflowOffset = 160;
5018   static const unsigned SystemZVAListTagSize = 32;
5019   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
5020   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
5021 
5022   Function &F;
5023   MemorySanitizer &MS;
5024   MemorySanitizerVisitor &MSV;
5025   Value *VAArgTLSCopy = nullptr;
5026   Value *VAArgTLSOriginCopy = nullptr;
5027   Value *VAArgOverflowSize = nullptr;
5028 
5029   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5030 
5031   enum class ArgKind {
5032     GeneralPurpose,
5033     FloatingPoint,
5034     Vector,
5035     Memory,
5036     Indirect,
5037   };
5038 
5039   enum class ShadowExtension { None, Zero, Sign };
5040 
5041   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
5042                       MemorySanitizerVisitor &MSV)
5043       : F(F), MS(MS), MSV(MSV) {}
5044 
5045   ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
5046     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
5047     // only a few possibilities of what it can be. In particular, enums, single
5048     // element structs and large types have already been taken care of.
5049 
5050     // Some i128 and fp128 arguments are converted to pointers only in the
5051     // back end.
5052     if (T->isIntegerTy(128) || T->isFP128Ty())
5053       return ArgKind::Indirect;
5054     if (T->isFloatingPointTy())
5055       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
5056     if (T->isIntegerTy() || T->isPointerTy())
5057       return ArgKind::GeneralPurpose;
5058     if (T->isVectorTy())
5059       return ArgKind::Vector;
5060     return ArgKind::Memory;
5061   }
5062 
5063   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
5064     // ABI says: "One of the simple integer types no more than 64 bits wide.
5065     // ... If such an argument is shorter than 64 bits, replace it by a full
5066     // 64-bit integer representing the same number, using sign or zero
5067     // extension". Shadow for an integer argument has the same type as the
5068     // argument itself, so it can be sign or zero extended as well.
5069     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
5070     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
5071     if (ZExt) {
5072       assert(!SExt);
5073       return ShadowExtension::Zero;
5074     }
5075     if (SExt) {
5076       assert(!ZExt);
5077       return ShadowExtension::Sign;
5078     }
5079     return ShadowExtension::None;
5080   }
5081 
5082   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5083     bool IsSoftFloatABI = CB.getCalledFunction()
5084                               ->getFnAttribute("use-soft-float")
5085                               .getValueAsBool();
5086     unsigned GpOffset = SystemZGpOffset;
5087     unsigned FpOffset = SystemZFpOffset;
5088     unsigned VrIndex = 0;
5089     unsigned OverflowOffset = SystemZOverflowOffset;
5090     const DataLayout &DL = F.getParent()->getDataLayout();
5091     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
5092          ++ArgIt) {
5093       Value *A = *ArgIt;
5094       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
5095       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5096       // SystemZABIInfo does not produce ByVal parameters.
5097       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
5098       Type *T = A->getType();
5099       ArgKind AK = classifyArgument(T, IsSoftFloatABI);
5100       if (AK == ArgKind::Indirect) {
5101         T = PointerType::get(T, 0);
5102         AK = ArgKind::GeneralPurpose;
5103       }
5104       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
5105         AK = ArgKind::Memory;
5106       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
5107         AK = ArgKind::Memory;
5108       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
5109         AK = ArgKind::Memory;
5110       Value *ShadowBase = nullptr;
5111       Value *OriginBase = nullptr;
5112       ShadowExtension SE = ShadowExtension::None;
5113       switch (AK) {
5114       case ArgKind::GeneralPurpose: {
5115         // Always keep track of GpOffset, but store shadow only for varargs.
5116         uint64_t ArgSize = 8;
5117         if (GpOffset + ArgSize <= kParamTLSSize) {
5118           if (!IsFixed) {
5119             SE = getShadowExtension(CB, ArgNo);
5120             uint64_t GapSize = 0;
5121             if (SE == ShadowExtension::None) {
5122               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5123               assert(ArgAllocSize <= ArgSize);
5124               GapSize = ArgSize - ArgAllocSize;
5125             }
5126             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
5127             if (MS.TrackOrigins)
5128               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
5129           }
5130           GpOffset += ArgSize;
5131         } else {
5132           GpOffset = kParamTLSSize;
5133         }
5134         break;
5135       }
5136       case ArgKind::FloatingPoint: {
5137         // Always keep track of FpOffset, but store shadow only for varargs.
5138         uint64_t ArgSize = 8;
5139         if (FpOffset + ArgSize <= kParamTLSSize) {
5140           if (!IsFixed) {
5141             // PoP says: "A short floating-point datum requires only the
5142             // left-most 32 bit positions of a floating-point register".
5143             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
5144             // don't extend shadow and don't mind the gap.
5145             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
5146             if (MS.TrackOrigins)
5147               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5148           }
5149           FpOffset += ArgSize;
5150         } else {
5151           FpOffset = kParamTLSSize;
5152         }
5153         break;
5154       }
5155       case ArgKind::Vector: {
5156         // Keep track of VrIndex. No need to store shadow, since vector varargs
5157         // go through AK_Memory.
5158         assert(IsFixed);
5159         VrIndex++;
5160         break;
5161       }
5162       case ArgKind::Memory: {
5163         // Keep track of OverflowOffset and store shadow only for varargs.
5164         // Ignore fixed args, since we need to copy only the vararg portion of
5165         // the overflow area shadow.
5166         if (!IsFixed) {
5167           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5168           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
5169           if (OverflowOffset + ArgSize <= kParamTLSSize) {
5170             SE = getShadowExtension(CB, ArgNo);
5171             uint64_t GapSize =
5172                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
5173             ShadowBase =
5174                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
5175             if (MS.TrackOrigins)
5176               OriginBase =
5177                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
5178             OverflowOffset += ArgSize;
5179           } else {
5180             OverflowOffset = kParamTLSSize;
5181           }
5182         }
5183         break;
5184       }
5185       case ArgKind::Indirect:
5186         llvm_unreachable("Indirect must be converted to GeneralPurpose");
5187       }
5188       if (ShadowBase == nullptr)
5189         continue;
5190       Value *Shadow = MSV.getShadow(A);
5191       if (SE != ShadowExtension::None)
5192         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
5193                                       /*Signed*/ SE == ShadowExtension::Sign);
5194       ShadowBase = IRB.CreateIntToPtr(
5195           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
5196       IRB.CreateStore(Shadow, ShadowBase);
5197       if (MS.TrackOrigins) {
5198         Value *Origin = MSV.getOrigin(A);
5199         unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
5200         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5201                         kMinOriginAlignment);
5202       }
5203     }
5204     Constant *OverflowSize = ConstantInt::get(
5205         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5206     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5207   }
5208 
5209   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5210     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5211     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5212   }
5213 
5214   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5215     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5216     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5217     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
5218                               "_msarg_va_o");
5219   }
5220 
5221   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5222     IRBuilder<> IRB(&I);
5223     Value *VAListTag = I.getArgOperand(0);
5224     Value *ShadowPtr, *OriginPtr;
5225     const Align Alignment = Align(8);
5226     std::tie(ShadowPtr, OriginPtr) =
5227         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
5228                                /*isStore*/ true);
5229     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5230                      SystemZVAListTagSize, Alignment, false);
5231   }
5232 
5233   void visitVAStartInst(VAStartInst &I) override {
5234     VAStartInstrumentationList.push_back(&I);
5235     unpoisonVAListTagForInst(I);
5236   }
5237 
5238   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
5239 
5240   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5241     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5242     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5243         IRB.CreateAdd(
5244             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5245             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5246         PointerType::get(RegSaveAreaPtrTy, 0));
5247     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5248     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5249     const Align Alignment = Align(8);
5250     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5251         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5252                                /*isStore*/ true);
5253     // TODO(iii): copy only fragments filled by visitCallBase()
5254     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5255                      SystemZRegSaveAreaSize);
5256     if (MS.TrackOrigins)
5257       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5258                        Alignment, SystemZRegSaveAreaSize);
5259   }
5260 
5261   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5262     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5263     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5264         IRB.CreateAdd(
5265             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5266             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5267         PointerType::get(OverflowArgAreaPtrTy, 0));
5268     Value *OverflowArgAreaPtr =
5269         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5270     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5271     const Align Alignment = Align(8);
5272     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5273         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5274                                Alignment, /*isStore*/ true);
5275     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5276                                            SystemZOverflowOffset);
5277     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5278                      VAArgOverflowSize);
5279     if (MS.TrackOrigins) {
5280       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5281                                       SystemZOverflowOffset);
5282       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5283                        VAArgOverflowSize);
5284     }
5285   }
5286 
5287   void finalizeInstrumentation() override {
5288     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5289            "finalizeInstrumentation called twice");
5290     if (!VAStartInstrumentationList.empty()) {
5291       // If there is a va_start in this function, make a backup copy of
5292       // va_arg_tls somewhere in the function entry block.
5293       IRBuilder<> IRB(MSV.FnPrologueEnd);
5294       VAArgOverflowSize =
5295           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5296       Value *CopySize =
5297           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5298                         VAArgOverflowSize);
5299       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5300       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
5301       if (MS.TrackOrigins) {
5302         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5303         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
5304                          Align(8), CopySize);
5305       }
5306     }
5307 
5308     // Instrument va_start.
5309     // Copy va_list shadow from the backup copy of the TLS contents.
5310     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5311          VaStartNo < VaStartNum; VaStartNo++) {
5312       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5313       IRBuilder<> IRB(OrigInst->getNextNode());
5314       Value *VAListTag = OrigInst->getArgOperand(0);
5315       copyRegSaveArea(IRB, VAListTag);
5316       copyOverflowArea(IRB, VAListTag);
5317     }
5318   }
5319 };
5320 
5321 /// A no-op implementation of VarArgHelper.
5322 struct VarArgNoOpHelper : public VarArgHelper {
5323   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5324                    MemorySanitizerVisitor &MSV) {}
5325 
5326   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5327 
5328   void visitVAStartInst(VAStartInst &I) override {}
5329 
5330   void visitVACopyInst(VACopyInst &I) override {}
5331 
5332   void finalizeInstrumentation() override {}
5333 };
5334 
5335 } // end anonymous namespace
5336 
5337 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5338                                         MemorySanitizerVisitor &Visitor) {
5339   // VarArg handling is only implemented on AMD64. False positives are possible
5340   // on other platforms.
5341   Triple TargetTriple(Func.getParent()->getTargetTriple());
5342   if (TargetTriple.getArch() == Triple::x86_64)
5343     return new VarArgAMD64Helper(Func, Msan, Visitor);
5344   else if (TargetTriple.isMIPS64())
5345     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5346   else if (TargetTriple.getArch() == Triple::aarch64)
5347     return new VarArgAArch64Helper(Func, Msan, Visitor);
5348   else if (TargetTriple.getArch() == Triple::ppc64 ||
5349            TargetTriple.getArch() == Triple::ppc64le)
5350     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5351   else if (TargetTriple.getArch() == Triple::systemz)
5352     return new VarArgSystemZHelper(Func, Msan, Visitor);
5353   else
5354     return new VarArgNoOpHelper(Func, Msan, Visitor);
5355 }
5356 
5357 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5358   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5359     return false;
5360 
5361   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
5362     return false;
5363 
5364   MemorySanitizerVisitor Visitor(F, *this, TLI);
5365 
5366   // Clear out readonly/readnone attributes.
5367   AttributeMask B;
5368   B.addAttribute(Attribute::ReadOnly)
5369       .addAttribute(Attribute::ReadNone)
5370       .addAttribute(Attribute::WriteOnly)
5371       .addAttribute(Attribute::ArgMemOnly)
5372       .addAttribute(Attribute::Speculatable);
5373   F.removeFnAttrs(B);
5374 
5375   return Visitor.runOnFunction();
5376 }
5377