1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///  - TLS variables are stored in a single per-task struct. A call to a
126 ///    function __msan_get_context_state() returning a pointer to that struct
127 ///    is inserted into every instrumented function before the entry block;
128 ///  - __msan_warning() takes a 32-bit origin parameter;
129 ///  - local variables are poisoned with __msan_poison_alloca() upon function
130 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
131 ///    function;
132 ///  - the pass doesn't declare any global variables or add global constructors
133 ///    to the translation unit.
134 ///
135 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
136 /// calls, making sure we're on the safe side wrt. possible false positives.
137 ///
138 ///  KernelMemorySanitizer only supports X86_64 at the moment.
139 ///
140 //
141 // FIXME: This sanitizer does not yet handle scalable vectors
142 //
143 //===----------------------------------------------------------------------===//
144 
145 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
146 #include "llvm/ADT/APInt.h"
147 #include "llvm/ADT/ArrayRef.h"
148 #include "llvm/ADT/DepthFirstIterator.h"
149 #include "llvm/ADT/SmallSet.h"
150 #include "llvm/ADT/SmallString.h"
151 #include "llvm/ADT/SmallVector.h"
152 #include "llvm/ADT/StringExtras.h"
153 #include "llvm/ADT/StringRef.h"
154 #include "llvm/ADT/Triple.h"
155 #include "llvm/Analysis/TargetLibraryInfo.h"
156 #include "llvm/IR/Argument.h"
157 #include "llvm/IR/Attributes.h"
158 #include "llvm/IR/BasicBlock.h"
159 #include "llvm/IR/CallingConv.h"
160 #include "llvm/IR/Constant.h"
161 #include "llvm/IR/Constants.h"
162 #include "llvm/IR/DataLayout.h"
163 #include "llvm/IR/DerivedTypes.h"
164 #include "llvm/IR/Function.h"
165 #include "llvm/IR/GlobalValue.h"
166 #include "llvm/IR/GlobalVariable.h"
167 #include "llvm/IR/IRBuilder.h"
168 #include "llvm/IR/InlineAsm.h"
169 #include "llvm/IR/InstVisitor.h"
170 #include "llvm/IR/InstrTypes.h"
171 #include "llvm/IR/Instruction.h"
172 #include "llvm/IR/Instructions.h"
173 #include "llvm/IR/IntrinsicInst.h"
174 #include "llvm/IR/Intrinsics.h"
175 #include "llvm/IR/IntrinsicsX86.h"
176 #include "llvm/IR/LLVMContext.h"
177 #include "llvm/IR/MDBuilder.h"
178 #include "llvm/IR/Module.h"
179 #include "llvm/IR/Type.h"
180 #include "llvm/IR/Value.h"
181 #include "llvm/IR/ValueMap.h"
182 #include "llvm/InitializePasses.h"
183 #include "llvm/Pass.h"
184 #include "llvm/Support/AtomicOrdering.h"
185 #include "llvm/Support/Casting.h"
186 #include "llvm/Support/CommandLine.h"
187 #include "llvm/Support/Compiler.h"
188 #include "llvm/Support/Debug.h"
189 #include "llvm/Support/ErrorHandling.h"
190 #include "llvm/Support/MathExtras.h"
191 #include "llvm/Support/raw_ostream.h"
192 #include "llvm/Transforms/Instrumentation.h"
193 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
194 #include "llvm/Transforms/Utils/Local.h"
195 #include "llvm/Transforms/Utils/ModuleUtils.h"
196 #include <algorithm>
197 #include <cassert>
198 #include <cstddef>
199 #include <cstdint>
200 #include <memory>
201 #include <string>
202 #include <tuple>
203 
204 using namespace llvm;
205 
206 #define DEBUG_TYPE "msan"
207 
208 static const unsigned kOriginSize = 4;
209 static const Align kMinOriginAlignment = Align(4);
210 static const Align kShadowTLSAlignment = Align(8);
211 
212 // These constants must be kept in sync with the ones in msan.h.
213 static const unsigned kParamTLSSize = 800;
214 static const unsigned kRetvalTLSSize = 800;
215 
216 // Accesses sizes are powers of two: 1, 2, 4, 8.
217 static const size_t kNumberOfAccessSizes = 4;
218 
219 /// Track origins of uninitialized values.
220 ///
221 /// Adds a section to MemorySanitizer report that points to the allocation
222 /// (stack or heap) the uninitialized bits came from originally.
223 static cl::opt<int> ClTrackOrigins("msan-track-origins",
224        cl::desc("Track origins (allocation sites) of poisoned memory"),
225        cl::Hidden, cl::init(0));
226 
227 static cl::opt<bool> ClKeepGoing("msan-keep-going",
228        cl::desc("keep going after reporting a UMR"),
229        cl::Hidden, cl::init(false));
230 
231 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
232        cl::desc("poison uninitialized stack variables"),
233        cl::Hidden, cl::init(true));
234 
235 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
236        cl::desc("poison uninitialized stack variables with a call"),
237        cl::Hidden, cl::init(false));
238 
239 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
240        cl::desc("poison uninitialized stack variables with the given pattern"),
241        cl::Hidden, cl::init(0xff));
242 
243 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
244        cl::desc("poison undef temps"),
245        cl::Hidden, cl::init(true));
246 
247 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
248        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
249        cl::Hidden, cl::init(true));
250 
251 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
252        cl::desc("exact handling of relational integer ICmp"),
253        cl::Hidden, cl::init(false));
254 
255 static cl::opt<bool> ClHandleLifetimeIntrinsics(
256     "msan-handle-lifetime-intrinsics",
257     cl::desc(
258         "when possible, poison scoped variables at the beginning of the scope "
259         "(slower, but more precise)"),
260     cl::Hidden, cl::init(true));
261 
262 // When compiling the Linux kernel, we sometimes see false positives related to
263 // MSan being unable to understand that inline assembly calls may initialize
264 // local variables.
265 // This flag makes the compiler conservatively unpoison every memory location
266 // passed into an assembly call. Note that this may cause false positives.
267 // Because it's impossible to figure out the array sizes, we can only unpoison
268 // the first sizeof(type) bytes for each type* pointer.
269 // The instrumentation is only enabled in KMSAN builds, and only if
270 // -msan-handle-asm-conservative is on. This is done because we may want to
271 // quickly disable assembly instrumentation when it breaks.
272 static cl::opt<bool> ClHandleAsmConservative(
273     "msan-handle-asm-conservative",
274     cl::desc("conservative handling of inline assembly"), cl::Hidden,
275     cl::init(true));
276 
277 // This flag controls whether we check the shadow of the address
278 // operand of load or store. Such bugs are very rare, since load from
279 // a garbage address typically results in SEGV, but still happen
280 // (e.g. only lower bits of address are garbage, or the access happens
281 // early at program startup where malloc-ed memory is more likely to
282 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
283 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
284        cl::desc("report accesses through a pointer which has poisoned shadow"),
285        cl::Hidden, cl::init(true));
286 
287 static cl::opt<bool> ClEagerChecks(
288     "msan-eager-checks",
289     cl::desc("check arguments and return values at function call boundaries"),
290     cl::Hidden, cl::init(false));
291 
292 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
293        cl::desc("print out instructions with default strict semantics"),
294        cl::Hidden, cl::init(false));
295 
296 static cl::opt<int> ClInstrumentationWithCallThreshold(
297     "msan-instrumentation-with-call-threshold",
298     cl::desc(
299         "If the function being instrumented requires more than "
300         "this number of checks and origin stores, use callbacks instead of "
301         "inline checks (-1 means never use callbacks)."),
302     cl::Hidden, cl::init(3500));
303 
304 static cl::opt<bool>
305     ClEnableKmsan("msan-kernel",
306                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
307                   cl::Hidden, cl::init(false));
308 
309 // This is an experiment to enable handling of cases where shadow is a non-zero
310 // compile-time constant. For some unexplainable reason they were silently
311 // ignored in the instrumentation.
312 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
313        cl::desc("Insert checks for constant shadow values"),
314        cl::Hidden, cl::init(false));
315 
316 // This is off by default because of a bug in gold:
317 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
318 static cl::opt<bool> ClWithComdat("msan-with-comdat",
319        cl::desc("Place MSan constructors in comdat sections"),
320        cl::Hidden, cl::init(false));
321 
322 // These options allow to specify custom memory map parameters
323 // See MemoryMapParams for details.
324 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
325                                    cl::desc("Define custom MSan AndMask"),
326                                    cl::Hidden, cl::init(0));
327 
328 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
329                                    cl::desc("Define custom MSan XorMask"),
330                                    cl::Hidden, cl::init(0));
331 
332 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
333                                       cl::desc("Define custom MSan ShadowBase"),
334                                       cl::Hidden, cl::init(0));
335 
336 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
337                                       cl::desc("Define custom MSan OriginBase"),
338                                       cl::Hidden, cl::init(0));
339 
340 static const char *const kMsanModuleCtorName = "msan.module_ctor";
341 static const char *const kMsanInitName = "__msan_init";
342 
343 namespace {
344 
345 // Memory map parameters used in application-to-shadow address calculation.
346 // Offset = (Addr & ~AndMask) ^ XorMask
347 // Shadow = ShadowBase + Offset
348 // Origin = OriginBase + Offset
349 struct MemoryMapParams {
350   uint64_t AndMask;
351   uint64_t XorMask;
352   uint64_t ShadowBase;
353   uint64_t OriginBase;
354 };
355 
356 struct PlatformMemoryMapParams {
357   const MemoryMapParams *bits32;
358   const MemoryMapParams *bits64;
359 };
360 
361 } // end anonymous namespace
362 
363 // i386 Linux
364 static const MemoryMapParams Linux_I386_MemoryMapParams = {
365   0x000080000000,  // AndMask
366   0,               // XorMask (not used)
367   0,               // ShadowBase (not used)
368   0x000040000000,  // OriginBase
369 };
370 
371 // x86_64 Linux
372 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
373 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
374   0x400000000000,  // AndMask
375   0,               // XorMask (not used)
376   0,               // ShadowBase (not used)
377   0x200000000000,  // OriginBase
378 #else
379   0,               // AndMask (not used)
380   0x500000000000,  // XorMask
381   0,               // ShadowBase (not used)
382   0x100000000000,  // OriginBase
383 #endif
384 };
385 
386 // mips64 Linux
387 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
388   0,               // AndMask (not used)
389   0x008000000000,  // XorMask
390   0,               // ShadowBase (not used)
391   0x002000000000,  // OriginBase
392 };
393 
394 // ppc64 Linux
395 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
396   0xE00000000000,  // AndMask
397   0x100000000000,  // XorMask
398   0x080000000000,  // ShadowBase
399   0x1C0000000000,  // OriginBase
400 };
401 
402 // s390x Linux
403 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
404     0xC00000000000, // AndMask
405     0,              // XorMask (not used)
406     0x080000000000, // ShadowBase
407     0x1C0000000000, // OriginBase
408 };
409 
410 // aarch64 Linux
411 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
412   0,               // AndMask (not used)
413   0x06000000000,   // XorMask
414   0,               // ShadowBase (not used)
415   0x01000000000,   // OriginBase
416 };
417 
418 // i386 FreeBSD
419 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
420   0x000180000000,  // AndMask
421   0x000040000000,  // XorMask
422   0x000020000000,  // ShadowBase
423   0x000700000000,  // OriginBase
424 };
425 
426 // x86_64 FreeBSD
427 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
428   0xc00000000000,  // AndMask
429   0x200000000000,  // XorMask
430   0x100000000000,  // ShadowBase
431   0x380000000000,  // OriginBase
432 };
433 
434 // x86_64 NetBSD
435 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
436   0,               // AndMask
437   0x500000000000,  // XorMask
438   0,               // ShadowBase
439   0x100000000000,  // OriginBase
440 };
441 
442 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
443   &Linux_I386_MemoryMapParams,
444   &Linux_X86_64_MemoryMapParams,
445 };
446 
447 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
448   nullptr,
449   &Linux_MIPS64_MemoryMapParams,
450 };
451 
452 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
453   nullptr,
454   &Linux_PowerPC64_MemoryMapParams,
455 };
456 
457 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
458     nullptr,
459     &Linux_S390X_MemoryMapParams,
460 };
461 
462 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
463   nullptr,
464   &Linux_AArch64_MemoryMapParams,
465 };
466 
467 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
468   &FreeBSD_I386_MemoryMapParams,
469   &FreeBSD_X86_64_MemoryMapParams,
470 };
471 
472 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
473   nullptr,
474   &NetBSD_X86_64_MemoryMapParams,
475 };
476 
477 namespace {
478 
479 /// Instrument functions of a module to detect uninitialized reads.
480 ///
481 /// Instantiating MemorySanitizer inserts the msan runtime library API function
482 /// declarations into the module if they don't exist already. Instantiating
483 /// ensures the __msan_init function is in the list of global constructors for
484 /// the module.
485 class MemorySanitizer {
486 public:
487   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
488       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
489         Recover(Options.Recover) {
490     initializeModule(M);
491   }
492 
493   // MSan cannot be moved or copied because of MapParams.
494   MemorySanitizer(MemorySanitizer &&) = delete;
495   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
496   MemorySanitizer(const MemorySanitizer &) = delete;
497   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
498 
499   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
500 
501 private:
502   friend struct MemorySanitizerVisitor;
503   friend struct VarArgAMD64Helper;
504   friend struct VarArgMIPS64Helper;
505   friend struct VarArgAArch64Helper;
506   friend struct VarArgPowerPC64Helper;
507   friend struct VarArgSystemZHelper;
508 
509   void initializeModule(Module &M);
510   void initializeCallbacks(Module &M);
511   void createKernelApi(Module &M);
512   void createUserspaceApi(Module &M);
513 
514   /// True if we're compiling the Linux kernel.
515   bool CompileKernel;
516   /// Track origins (allocation points) of uninitialized values.
517   int TrackOrigins;
518   bool Recover;
519 
520   LLVMContext *C;
521   Type *IntptrTy;
522   Type *OriginTy;
523 
524   // XxxTLS variables represent the per-thread state in MSan and per-task state
525   // in KMSAN.
526   // For the userspace these point to thread-local globals. In the kernel land
527   // they point to the members of a per-task struct obtained via a call to
528   // __msan_get_context_state().
529 
530   /// Thread-local shadow storage for function parameters.
531   Value *ParamTLS;
532 
533   /// Thread-local origin storage for function parameters.
534   Value *ParamOriginTLS;
535 
536   /// Thread-local shadow storage for function return value.
537   Value *RetvalTLS;
538 
539   /// Thread-local origin storage for function return value.
540   Value *RetvalOriginTLS;
541 
542   /// Thread-local shadow storage for in-register va_arg function
543   /// parameters (x86_64-specific).
544   Value *VAArgTLS;
545 
546   /// Thread-local shadow storage for in-register va_arg function
547   /// parameters (x86_64-specific).
548   Value *VAArgOriginTLS;
549 
550   /// Thread-local shadow storage for va_arg overflow area
551   /// (x86_64-specific).
552   Value *VAArgOverflowSizeTLS;
553 
554   /// Are the instrumentation callbacks set up?
555   bool CallbacksInitialized = false;
556 
557   /// The run-time callback to print a warning.
558   FunctionCallee WarningFn;
559 
560   // These arrays are indexed by log2(AccessSize).
561   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
562   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
563 
564   /// Run-time helper that generates a new origin value for a stack
565   /// allocation.
566   FunctionCallee MsanSetAllocaOrigin4Fn;
567 
568   /// Run-time helper that poisons stack on function entry.
569   FunctionCallee MsanPoisonStackFn;
570 
571   /// Run-time helper that records a store (or any event) of an
572   /// uninitialized value and returns an updated origin id encoding this info.
573   FunctionCallee MsanChainOriginFn;
574 
575   /// Run-time helper that paints an origin over a region.
576   FunctionCallee MsanSetOriginFn;
577 
578   /// MSan runtime replacements for memmove, memcpy and memset.
579   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
580 
581   /// KMSAN callback for task-local function argument shadow.
582   StructType *MsanContextStateTy;
583   FunctionCallee MsanGetContextStateFn;
584 
585   /// Functions for poisoning/unpoisoning local variables
586   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
587 
588   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
589   /// pointers.
590   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
591   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
592   FunctionCallee MsanMetadataPtrForStore_1_8[4];
593   FunctionCallee MsanInstrumentAsmStoreFn;
594 
595   /// Helper to choose between different MsanMetadataPtrXxx().
596   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
597 
598   /// Memory map parameters used in application-to-shadow calculation.
599   const MemoryMapParams *MapParams;
600 
601   /// Custom memory map parameters used when -msan-shadow-base or
602   // -msan-origin-base is provided.
603   MemoryMapParams CustomMapParams;
604 
605   MDNode *ColdCallWeights;
606 
607   /// Branch weights for origin store.
608   MDNode *OriginStoreWeights;
609 };
610 
611 void insertModuleCtor(Module &M) {
612   getOrCreateSanitizerCtorAndInitFunctions(
613       M, kMsanModuleCtorName, kMsanInitName,
614       /*InitArgTypes=*/{},
615       /*InitArgs=*/{},
616       // This callback is invoked when the functions are created the first
617       // time. Hook them into the global ctors list in that case:
618       [&](Function *Ctor, FunctionCallee) {
619         if (!ClWithComdat) {
620           appendToGlobalCtors(M, Ctor, 0);
621           return;
622         }
623         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
624         Ctor->setComdat(MsanCtorComdat);
625         appendToGlobalCtors(M, Ctor, 0, Ctor);
626       });
627 }
628 
629 /// A legacy function pass for msan instrumentation.
630 ///
631 /// Instruments functions to detect uninitialized reads.
632 struct MemorySanitizerLegacyPass : public FunctionPass {
633   // Pass identification, replacement for typeid.
634   static char ID;
635 
636   MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
637       : FunctionPass(ID), Options(Options) {
638     initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
639   }
640   StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
641 
642   void getAnalysisUsage(AnalysisUsage &AU) const override {
643     AU.addRequired<TargetLibraryInfoWrapperPass>();
644   }
645 
646   bool runOnFunction(Function &F) override {
647     return MSan->sanitizeFunction(
648         F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
649   }
650   bool doInitialization(Module &M) override;
651 
652   Optional<MemorySanitizer> MSan;
653   MemorySanitizerOptions Options;
654 };
655 
656 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
657   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
658 }
659 
660 } // end anonymous namespace
661 
662 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
663     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
664       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
665       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
666 
667 PreservedAnalyses MemorySanitizerPass::run(Function &F,
668                                            FunctionAnalysisManager &FAM) {
669   MemorySanitizer Msan(*F.getParent(), Options);
670   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
671     return PreservedAnalyses::none();
672   return PreservedAnalyses::all();
673 }
674 
675 PreservedAnalyses MemorySanitizerPass::run(Module &M,
676                                            ModuleAnalysisManager &AM) {
677   if (Options.Kernel)
678     return PreservedAnalyses::all();
679   insertModuleCtor(M);
680   return PreservedAnalyses::none();
681 }
682 
683 char MemorySanitizerLegacyPass::ID = 0;
684 
685 INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
686                       "MemorySanitizer: detects uninitialized reads.", false,
687                       false)
688 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
689 INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
690                     "MemorySanitizer: detects uninitialized reads.", false,
691                     false)
692 
693 FunctionPass *
694 llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
695   return new MemorySanitizerLegacyPass(Options);
696 }
697 
698 /// Create a non-const global initialized with the given string.
699 ///
700 /// Creates a writable global for Str so that we can pass it to the
701 /// run-time lib. Runtime uses first 4 bytes of the string to store the
702 /// frame ID, so the string needs to be mutable.
703 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
704                                                             StringRef Str) {
705   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
706   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
707                             GlobalValue::PrivateLinkage, StrConst, "");
708 }
709 
710 /// Create KMSAN API callbacks.
711 void MemorySanitizer::createKernelApi(Module &M) {
712   IRBuilder<> IRB(*C);
713 
714   // These will be initialized in insertKmsanPrologue().
715   RetvalTLS = nullptr;
716   RetvalOriginTLS = nullptr;
717   ParamTLS = nullptr;
718   ParamOriginTLS = nullptr;
719   VAArgTLS = nullptr;
720   VAArgOriginTLS = nullptr;
721   VAArgOverflowSizeTLS = nullptr;
722 
723   WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
724                                     IRB.getInt32Ty());
725   // Requests the per-task context state (kmsan_context_state*) from the
726   // runtime library.
727   MsanContextStateTy = StructType::get(
728       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
729       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
730       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
731       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
732       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
733       OriginTy);
734   MsanGetContextStateFn = M.getOrInsertFunction(
735       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
736 
737   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
738                                 PointerType::get(IRB.getInt32Ty(), 0));
739 
740   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
741     std::string name_load =
742         "__msan_metadata_ptr_for_load_" + std::to_string(size);
743     std::string name_store =
744         "__msan_metadata_ptr_for_store_" + std::to_string(size);
745     MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
746         name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
747     MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
748         name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
749   }
750 
751   MsanMetadataPtrForLoadN = M.getOrInsertFunction(
752       "__msan_metadata_ptr_for_load_n", RetTy,
753       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
754   MsanMetadataPtrForStoreN = M.getOrInsertFunction(
755       "__msan_metadata_ptr_for_store_n", RetTy,
756       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
757 
758   // Functions for poisoning and unpoisoning memory.
759   MsanPoisonAllocaFn =
760       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
761                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
762   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
763       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
764 }
765 
766 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
767   return M.getOrInsertGlobal(Name, Ty, [&] {
768     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
769                               nullptr, Name, nullptr,
770                               GlobalVariable::InitialExecTLSModel);
771   });
772 }
773 
774 /// Insert declarations for userspace-specific functions and globals.
775 void MemorySanitizer::createUserspaceApi(Module &M) {
776   IRBuilder<> IRB(*C);
777 
778   // Create the callback.
779   // FIXME: this function should have "Cold" calling conv,
780   // which is not yet implemented.
781   StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
782                                     : "__msan_warning_with_origin_noreturn";
783   WarningFn =
784       M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty());
785 
786   // Create the global TLS variables.
787   RetvalTLS =
788       getOrInsertGlobal(M, "__msan_retval_tls",
789                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
790 
791   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
792 
793   ParamTLS =
794       getOrInsertGlobal(M, "__msan_param_tls",
795                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
796 
797   ParamOriginTLS =
798       getOrInsertGlobal(M, "__msan_param_origin_tls",
799                         ArrayType::get(OriginTy, kParamTLSSize / 4));
800 
801   VAArgTLS =
802       getOrInsertGlobal(M, "__msan_va_arg_tls",
803                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
804 
805   VAArgOriginTLS =
806       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
807                         ArrayType::get(OriginTy, kParamTLSSize / 4));
808 
809   VAArgOverflowSizeTLS =
810       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
811 
812   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
813        AccessSizeIndex++) {
814     unsigned AccessSize = 1 << AccessSizeIndex;
815     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
816     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
817     MaybeWarningFnAttrs.push_back(std::make_pair(
818         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
819     MaybeWarningFnAttrs.push_back(std::make_pair(
820         AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
821     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
822         FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
823         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
824 
825     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
826     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
827     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
828         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
829     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
830         AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
831     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
832         FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
833         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
834         IRB.getInt32Ty());
835   }
836 
837   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
838     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
839     IRB.getInt8PtrTy(), IntptrTy);
840   MsanPoisonStackFn =
841       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
842                             IRB.getInt8PtrTy(), IntptrTy);
843 }
844 
845 /// Insert extern declaration of runtime-provided functions and globals.
846 void MemorySanitizer::initializeCallbacks(Module &M) {
847   // Only do this once.
848   if (CallbacksInitialized)
849     return;
850 
851   IRBuilder<> IRB(*C);
852   // Initialize callbacks that are common for kernel and userspace
853   // instrumentation.
854   MsanChainOriginFn = M.getOrInsertFunction(
855     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
856   MsanSetOriginFn =
857       M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
858                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
859   MemmoveFn = M.getOrInsertFunction(
860     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
861     IRB.getInt8PtrTy(), IntptrTy);
862   MemcpyFn = M.getOrInsertFunction(
863     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
864     IntptrTy);
865   MemsetFn = M.getOrInsertFunction(
866     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
867     IntptrTy);
868 
869   MsanInstrumentAsmStoreFn =
870       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
871                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
872 
873   if (CompileKernel) {
874     createKernelApi(M);
875   } else {
876     createUserspaceApi(M);
877   }
878   CallbacksInitialized = true;
879 }
880 
881 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
882                                                              int size) {
883   FunctionCallee *Fns =
884       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
885   switch (size) {
886   case 1:
887     return Fns[0];
888   case 2:
889     return Fns[1];
890   case 4:
891     return Fns[2];
892   case 8:
893     return Fns[3];
894   default:
895     return nullptr;
896   }
897 }
898 
899 /// Module-level initialization.
900 ///
901 /// inserts a call to __msan_init to the module's constructor list.
902 void MemorySanitizer::initializeModule(Module &M) {
903   auto &DL = M.getDataLayout();
904 
905   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
906   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
907   // Check the overrides first
908   if (ShadowPassed || OriginPassed) {
909     CustomMapParams.AndMask = ClAndMask;
910     CustomMapParams.XorMask = ClXorMask;
911     CustomMapParams.ShadowBase = ClShadowBase;
912     CustomMapParams.OriginBase = ClOriginBase;
913     MapParams = &CustomMapParams;
914   } else {
915     Triple TargetTriple(M.getTargetTriple());
916     switch (TargetTriple.getOS()) {
917       case Triple::FreeBSD:
918         switch (TargetTriple.getArch()) {
919           case Triple::x86_64:
920             MapParams = FreeBSD_X86_MemoryMapParams.bits64;
921             break;
922           case Triple::x86:
923             MapParams = FreeBSD_X86_MemoryMapParams.bits32;
924             break;
925           default:
926             report_fatal_error("unsupported architecture");
927         }
928         break;
929       case Triple::NetBSD:
930         switch (TargetTriple.getArch()) {
931           case Triple::x86_64:
932             MapParams = NetBSD_X86_MemoryMapParams.bits64;
933             break;
934           default:
935             report_fatal_error("unsupported architecture");
936         }
937         break;
938       case Triple::Linux:
939         switch (TargetTriple.getArch()) {
940           case Triple::x86_64:
941             MapParams = Linux_X86_MemoryMapParams.bits64;
942             break;
943           case Triple::x86:
944             MapParams = Linux_X86_MemoryMapParams.bits32;
945             break;
946           case Triple::mips64:
947           case Triple::mips64el:
948             MapParams = Linux_MIPS_MemoryMapParams.bits64;
949             break;
950           case Triple::ppc64:
951           case Triple::ppc64le:
952             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
953             break;
954           case Triple::systemz:
955             MapParams = Linux_S390_MemoryMapParams.bits64;
956             break;
957           case Triple::aarch64:
958           case Triple::aarch64_be:
959             MapParams = Linux_ARM_MemoryMapParams.bits64;
960             break;
961           default:
962             report_fatal_error("unsupported architecture");
963         }
964         break;
965       default:
966         report_fatal_error("unsupported operating system");
967     }
968   }
969 
970   C = &(M.getContext());
971   IRBuilder<> IRB(*C);
972   IntptrTy = IRB.getIntPtrTy(DL);
973   OriginTy = IRB.getInt32Ty();
974 
975   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
976   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
977 
978   if (!CompileKernel) {
979     if (TrackOrigins)
980       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
981         return new GlobalVariable(
982             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
983             IRB.getInt32(TrackOrigins), "__msan_track_origins");
984       });
985 
986     if (Recover)
987       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
988         return new GlobalVariable(M, IRB.getInt32Ty(), true,
989                                   GlobalValue::WeakODRLinkage,
990                                   IRB.getInt32(Recover), "__msan_keep_going");
991       });
992 }
993 }
994 
995 bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
996   if (!Options.Kernel)
997     insertModuleCtor(M);
998   MSan.emplace(M, Options);
999   return true;
1000 }
1001 
1002 namespace {
1003 
1004 /// A helper class that handles instrumentation of VarArg
1005 /// functions on a particular platform.
1006 ///
1007 /// Implementations are expected to insert the instrumentation
1008 /// necessary to propagate argument shadow through VarArg function
1009 /// calls. Visit* methods are called during an InstVisitor pass over
1010 /// the function, and should avoid creating new basic blocks. A new
1011 /// instance of this class is created for each instrumented function.
1012 struct VarArgHelper {
1013   virtual ~VarArgHelper() = default;
1014 
1015   /// Visit a CallBase.
1016   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1017 
1018   /// Visit a va_start call.
1019   virtual void visitVAStartInst(VAStartInst &I) = 0;
1020 
1021   /// Visit a va_copy call.
1022   virtual void visitVACopyInst(VACopyInst &I) = 0;
1023 
1024   /// Finalize function instrumentation.
1025   ///
1026   /// This method is called after visiting all interesting (see above)
1027   /// instructions in a function.
1028   virtual void finalizeInstrumentation() = 0;
1029 };
1030 
1031 struct MemorySanitizerVisitor;
1032 
1033 } // end anonymous namespace
1034 
1035 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1036                                         MemorySanitizerVisitor &Visitor);
1037 
1038 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
1039   if (TypeSize <= 8) return 0;
1040   return Log2_32_Ceil((TypeSize + 7) / 8);
1041 }
1042 
1043 namespace {
1044 
1045 /// This class does all the work for a given function. Store and Load
1046 /// instructions store and load corresponding shadow and origin
1047 /// values. Most instructions propagate shadow from arguments to their
1048 /// return values. Certain instructions (most importantly, BranchInst)
1049 /// test their argument shadow and print reports (with a runtime call) if it's
1050 /// non-zero.
1051 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1052   Function &F;
1053   MemorySanitizer &MS;
1054   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1055   ValueMap<Value*, Value*> ShadowMap, OriginMap;
1056   std::unique_ptr<VarArgHelper> VAHelper;
1057   const TargetLibraryInfo *TLI;
1058   BasicBlock *ActualFnStart;
1059 
1060   // The following flags disable parts of MSan instrumentation based on
1061   // exclusion list contents and command-line options.
1062   bool InsertChecks;
1063   bool PropagateShadow;
1064   bool PoisonStack;
1065   bool PoisonUndef;
1066 
1067   struct ShadowOriginAndInsertPoint {
1068     Value *Shadow;
1069     Value *Origin;
1070     Instruction *OrigIns;
1071 
1072     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1073       : Shadow(S), Origin(O), OrigIns(I) {}
1074   };
1075   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1076   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1077   SmallSet<AllocaInst *, 16> AllocaSet;
1078   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1079   SmallVector<StoreInst *, 16> StoreList;
1080 
1081   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1082                          const TargetLibraryInfo &TLI)
1083       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1084     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
1085     InsertChecks = SanitizeFunction;
1086     PropagateShadow = SanitizeFunction;
1087     PoisonStack = SanitizeFunction && ClPoisonStack;
1088     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1089 
1090     MS.initializeCallbacks(*F.getParent());
1091     if (MS.CompileKernel)
1092       ActualFnStart = insertKmsanPrologue(F);
1093     else
1094       ActualFnStart = &F.getEntryBlock();
1095 
1096     LLVM_DEBUG(if (!InsertChecks) dbgs()
1097                << "MemorySanitizer is not inserting checks into '"
1098                << F.getName() << "'\n");
1099   }
1100 
1101   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1102     if (MS.TrackOrigins <= 1) return V;
1103     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1104   }
1105 
1106   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1107     const DataLayout &DL = F.getParent()->getDataLayout();
1108     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1109     if (IntptrSize == kOriginSize) return Origin;
1110     assert(IntptrSize == kOriginSize * 2);
1111     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1112     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1113   }
1114 
1115   /// Fill memory range with the given origin value.
1116   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1117                    unsigned Size, Align Alignment) {
1118     const DataLayout &DL = F.getParent()->getDataLayout();
1119     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1120     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1121     assert(IntptrAlignment >= kMinOriginAlignment);
1122     assert(IntptrSize >= kOriginSize);
1123 
1124     unsigned Ofs = 0;
1125     Align CurrentAlignment = Alignment;
1126     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1127       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1128       Value *IntptrOriginPtr =
1129           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1130       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1131         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1132                        : IntptrOriginPtr;
1133         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1134         Ofs += IntptrSize / kOriginSize;
1135         CurrentAlignment = IntptrAlignment;
1136       }
1137     }
1138 
1139     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1140       Value *GEP =
1141           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1142       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1143       CurrentAlignment = kMinOriginAlignment;
1144     }
1145   }
1146 
1147   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1148                    Value *OriginPtr, Align Alignment, bool AsCall) {
1149     const DataLayout &DL = F.getParent()->getDataLayout();
1150     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1151     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
1152     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1153     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1154       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
1155         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1156                     OriginAlignment);
1157       return;
1158     }
1159 
1160     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1161     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1162     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1163       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1164       Value *ConvertedShadow2 =
1165           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1166       IRB.CreateCall(Fn,
1167                      {ConvertedShadow2,
1168                       IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
1169     } else {
1170       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1171       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1172           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1173       IRBuilder<> IRBNew(CheckTerm);
1174       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1175                   OriginAlignment);
1176     }
1177   }
1178 
1179   void materializeStores(bool InstrumentWithCalls) {
1180     for (StoreInst *SI : StoreList) {
1181       IRBuilder<> IRB(SI);
1182       Value *Val = SI->getValueOperand();
1183       Value *Addr = SI->getPointerOperand();
1184       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1185       Value *ShadowPtr, *OriginPtr;
1186       Type *ShadowTy = Shadow->getType();
1187       const Align Alignment = assumeAligned(SI->getAlignment());
1188       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1189       std::tie(ShadowPtr, OriginPtr) =
1190           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1191 
1192       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1193       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1194       (void)NewSI;
1195 
1196       if (SI->isAtomic())
1197         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1198 
1199       if (MS.TrackOrigins && !SI->isAtomic())
1200         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1201                     OriginAlignment, InstrumentWithCalls);
1202     }
1203   }
1204 
1205   /// Helper function to insert a warning at IRB's current insert point.
1206   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1207     if (!Origin)
1208       Origin = (Value *)IRB.getInt32(0);
1209     assert(Origin->getType()->isIntegerTy());
1210     IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1211     // FIXME: Insert UnreachableInst if !MS.Recover?
1212     // This may invalidate some of the following checks and needs to be done
1213     // at the very end.
1214   }
1215 
1216   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
1217                            bool AsCall) {
1218     IRBuilder<> IRB(OrigIns);
1219     LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
1220     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1221     LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
1222 
1223     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1224       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
1225         insertWarningFn(IRB, Origin);
1226       }
1227       return;
1228     }
1229 
1230     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
1231 
1232     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1233     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1234     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1235       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1236       Value *ConvertedShadow2 =
1237           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1238       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
1239                                                 ? Origin
1240                                                 : (Value *)IRB.getInt32(0)});
1241     } else {
1242       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1243       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1244           Cmp, OrigIns,
1245           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1246 
1247       IRB.SetInsertPoint(CheckTerm);
1248       insertWarningFn(IRB, Origin);
1249       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1250     }
1251   }
1252 
1253   void materializeChecks(bool InstrumentWithCalls) {
1254     for (const auto &ShadowData : InstrumentationList) {
1255       Instruction *OrigIns = ShadowData.OrigIns;
1256       Value *Shadow = ShadowData.Shadow;
1257       Value *Origin = ShadowData.Origin;
1258       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
1259     }
1260     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1261   }
1262 
1263   BasicBlock *insertKmsanPrologue(Function &F) {
1264     BasicBlock *ret =
1265         SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
1266     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
1267     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1268     Constant *Zero = IRB.getInt32(0);
1269     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1270                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1271     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1272                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1273     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1274                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1275     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1276                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1277     MS.VAArgOverflowSizeTLS =
1278         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1279                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1280     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1281                                       {Zero, IRB.getInt32(5)}, "param_origin");
1282     MS.RetvalOriginTLS =
1283         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1284                       {Zero, IRB.getInt32(6)}, "retval_origin");
1285     return ret;
1286   }
1287 
1288   /// Add MemorySanitizer instrumentation to a function.
1289   bool runOnFunction() {
1290     // In the presence of unreachable blocks, we may see Phi nodes with
1291     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1292     // blocks, such nodes will not have any shadow value associated with them.
1293     // It's easier to remove unreachable blocks than deal with missing shadow.
1294     removeUnreachableBlocks(F);
1295 
1296     // Iterate all BBs in depth-first order and create shadow instructions
1297     // for all instructions (where applicable).
1298     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1299     for (BasicBlock *BB : depth_first(ActualFnStart))
1300       visit(*BB);
1301 
1302     // Finalize PHI nodes.
1303     for (PHINode *PN : ShadowPHINodes) {
1304       PHINode *PNS = cast<PHINode>(getShadow(PN));
1305       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1306       size_t NumValues = PN->getNumIncomingValues();
1307       for (size_t v = 0; v < NumValues; v++) {
1308         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1309         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1310       }
1311     }
1312 
1313     VAHelper->finalizeInstrumentation();
1314 
1315     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1316     // instrumenting only allocas.
1317     if (InstrumentLifetimeStart) {
1318       for (auto Item : LifetimeStartList) {
1319         instrumentAlloca(*Item.second, Item.first);
1320         AllocaSet.erase(Item.second);
1321       }
1322     }
1323     // Poison the allocas for which we didn't instrument the corresponding
1324     // lifetime intrinsics.
1325     for (AllocaInst *AI : AllocaSet)
1326       instrumentAlloca(*AI);
1327 
1328     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
1329                                InstrumentationList.size() + StoreList.size() >
1330                                    (unsigned)ClInstrumentationWithCallThreshold;
1331 
1332     // Insert shadow value checks.
1333     materializeChecks(InstrumentWithCalls);
1334 
1335     // Delayed instrumentation of StoreInst.
1336     // This may not add new address checks.
1337     materializeStores(InstrumentWithCalls);
1338 
1339     return true;
1340   }
1341 
1342   /// Compute the shadow type that corresponds to a given Value.
1343   Type *getShadowTy(Value *V) {
1344     return getShadowTy(V->getType());
1345   }
1346 
1347   /// Compute the shadow type that corresponds to a given Type.
1348   Type *getShadowTy(Type *OrigTy) {
1349     if (!OrigTy->isSized()) {
1350       return nullptr;
1351     }
1352     // For integer type, shadow is the same as the original type.
1353     // This may return weird-sized types like i1.
1354     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1355       return IT;
1356     const DataLayout &DL = F.getParent()->getDataLayout();
1357     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1358       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1359       return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
1360                                   cast<FixedVectorType>(VT)->getNumElements());
1361     }
1362     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1363       return ArrayType::get(getShadowTy(AT->getElementType()),
1364                             AT->getNumElements());
1365     }
1366     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1367       SmallVector<Type*, 4> Elements;
1368       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1369         Elements.push_back(getShadowTy(ST->getElementType(i)));
1370       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1371       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1372       return Res;
1373     }
1374     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1375     return IntegerType::get(*MS.C, TypeSize);
1376   }
1377 
1378   /// Flatten a vector type.
1379   Type *getShadowTyNoVec(Type *ty) {
1380     if (VectorType *vt = dyn_cast<VectorType>(ty))
1381       return IntegerType::get(*MS.C,
1382                               vt->getPrimitiveSizeInBits().getFixedSize());
1383     return ty;
1384   }
1385 
1386   /// Extract combined shadow of struct elements as a bool
1387   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1388                               IRBuilder<> &IRB) {
1389     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1390     Value *Aggregator = FalseVal;
1391 
1392     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1393       // Combine by ORing together each element's bool shadow
1394       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1395       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1396       Value *ShadowBool = convertToBool(ShadowInner, IRB);
1397 
1398       if (Aggregator != FalseVal)
1399         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1400       else
1401         Aggregator = ShadowBool;
1402     }
1403 
1404     return Aggregator;
1405   }
1406 
1407   // Extract combined shadow of array elements
1408   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1409                              IRBuilder<> &IRB) {
1410     if (!Array->getNumElements())
1411       return IRB.getIntN(/* width */ 1, /* value */ 0);
1412 
1413     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1414     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1415 
1416     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1417       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1418       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1419       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1420     }
1421     return Aggregator;
1422   }
1423 
1424   /// Convert a shadow value to it's flattened variant. The resulting
1425   /// shadow may not necessarily have the same bit width as the input
1426   /// value, but it will always be comparable to zero.
1427   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1428     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1429       return collapseStructShadow(Struct, V, IRB);
1430     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1431       return collapseArrayShadow(Array, V, IRB);
1432     Type *Ty = V->getType();
1433     Type *NoVecTy = getShadowTyNoVec(Ty);
1434     if (Ty == NoVecTy) return V;
1435     return IRB.CreateBitCast(V, NoVecTy);
1436   }
1437 
1438   // Convert a scalar value to an i1 by comparing with 0
1439   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1440     Type *VTy = V->getType();
1441     assert(VTy->isIntegerTy());
1442     if (VTy->getIntegerBitWidth() == 1)
1443       // Just converting a bool to a bool, so do nothing.
1444       return V;
1445     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1446   }
1447 
1448   /// Compute the integer shadow offset that corresponds to a given
1449   /// application address.
1450   ///
1451   /// Offset = (Addr & ~AndMask) ^ XorMask
1452   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1453     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1454 
1455     uint64_t AndMask = MS.MapParams->AndMask;
1456     if (AndMask)
1457       OffsetLong =
1458           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1459 
1460     uint64_t XorMask = MS.MapParams->XorMask;
1461     if (XorMask)
1462       OffsetLong =
1463           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1464     return OffsetLong;
1465   }
1466 
1467   /// Compute the shadow and origin addresses corresponding to a given
1468   /// application address.
1469   ///
1470   /// Shadow = ShadowBase + Offset
1471   /// Origin = (OriginBase + Offset) & ~3ULL
1472   std::pair<Value *, Value *>
1473   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1474                               MaybeAlign Alignment) {
1475     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1476     Value *ShadowLong = ShadowOffset;
1477     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1478     if (ShadowBase != 0) {
1479       ShadowLong =
1480         IRB.CreateAdd(ShadowLong,
1481                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1482     }
1483     Value *ShadowPtr =
1484         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1485     Value *OriginPtr = nullptr;
1486     if (MS.TrackOrigins) {
1487       Value *OriginLong = ShadowOffset;
1488       uint64_t OriginBase = MS.MapParams->OriginBase;
1489       if (OriginBase != 0)
1490         OriginLong = IRB.CreateAdd(OriginLong,
1491                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1492       if (!Alignment || *Alignment < kMinOriginAlignment) {
1493         uint64_t Mask = kMinOriginAlignment.value() - 1;
1494         OriginLong =
1495             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1496       }
1497       OriginPtr =
1498           IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
1499     }
1500     return std::make_pair(ShadowPtr, OriginPtr);
1501   }
1502 
1503   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1504                                                        IRBuilder<> &IRB,
1505                                                        Type *ShadowTy,
1506                                                        bool isStore) {
1507     Value *ShadowOriginPtrs;
1508     const DataLayout &DL = F.getParent()->getDataLayout();
1509     int Size = DL.getTypeStoreSize(ShadowTy);
1510 
1511     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1512     Value *AddrCast =
1513         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1514     if (Getter) {
1515       ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
1516     } else {
1517       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1518       ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
1519                                                 : MS.MsanMetadataPtrForLoadN,
1520                                         {AddrCast, SizeVal});
1521     }
1522     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1523     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1524     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1525 
1526     return std::make_pair(ShadowPtr, OriginPtr);
1527   }
1528 
1529   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1530                                                  Type *ShadowTy,
1531                                                  MaybeAlign Alignment,
1532                                                  bool isStore) {
1533     if (MS.CompileKernel)
1534       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1535     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1536   }
1537 
1538   /// Compute the shadow address for a given function argument.
1539   ///
1540   /// Shadow = ParamTLS+ArgOffset.
1541   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1542                                  int ArgOffset) {
1543     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1544     if (ArgOffset)
1545       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1546     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1547                               "_msarg");
1548   }
1549 
1550   /// Compute the origin address for a given function argument.
1551   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1552                                  int ArgOffset) {
1553     if (!MS.TrackOrigins)
1554       return nullptr;
1555     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1556     if (ArgOffset)
1557       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1558     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1559                               "_msarg_o");
1560   }
1561 
1562   /// Compute the shadow address for a retval.
1563   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1564     return IRB.CreatePointerCast(MS.RetvalTLS,
1565                                  PointerType::get(getShadowTy(A), 0),
1566                                  "_msret");
1567   }
1568 
1569   /// Compute the origin address for a retval.
1570   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1571     // We keep a single origin for the entire retval. Might be too optimistic.
1572     return MS.RetvalOriginTLS;
1573   }
1574 
1575   /// Set SV to be the shadow value for V.
1576   void setShadow(Value *V, Value *SV) {
1577     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1578     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1579   }
1580 
1581   /// Set Origin to be the origin value for V.
1582   void setOrigin(Value *V, Value *Origin) {
1583     if (!MS.TrackOrigins) return;
1584     assert(!OriginMap.count(V) && "Values may only have one origin");
1585     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1586     OriginMap[V] = Origin;
1587   }
1588 
1589   Constant *getCleanShadow(Type *OrigTy) {
1590     Type *ShadowTy = getShadowTy(OrigTy);
1591     if (!ShadowTy)
1592       return nullptr;
1593     return Constant::getNullValue(ShadowTy);
1594   }
1595 
1596   /// Create a clean shadow value for a given value.
1597   ///
1598   /// Clean shadow (all zeroes) means all bits of the value are defined
1599   /// (initialized).
1600   Constant *getCleanShadow(Value *V) {
1601     return getCleanShadow(V->getType());
1602   }
1603 
1604   /// Create a dirty shadow of a given shadow type.
1605   Constant *getPoisonedShadow(Type *ShadowTy) {
1606     assert(ShadowTy);
1607     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1608       return Constant::getAllOnesValue(ShadowTy);
1609     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1610       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1611                                       getPoisonedShadow(AT->getElementType()));
1612       return ConstantArray::get(AT, Vals);
1613     }
1614     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1615       SmallVector<Constant *, 4> Vals;
1616       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1617         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1618       return ConstantStruct::get(ST, Vals);
1619     }
1620     llvm_unreachable("Unexpected shadow type");
1621   }
1622 
1623   /// Create a dirty shadow for a given value.
1624   Constant *getPoisonedShadow(Value *V) {
1625     Type *ShadowTy = getShadowTy(V);
1626     if (!ShadowTy)
1627       return nullptr;
1628     return getPoisonedShadow(ShadowTy);
1629   }
1630 
1631   /// Create a clean (zero) origin.
1632   Value *getCleanOrigin() {
1633     return Constant::getNullValue(MS.OriginTy);
1634   }
1635 
1636   /// Get the shadow value for a given Value.
1637   ///
1638   /// This function either returns the value set earlier with setShadow,
1639   /// or extracts if from ParamTLS (for function arguments).
1640   Value *getShadow(Value *V) {
1641     if (!PropagateShadow) return getCleanShadow(V);
1642     if (Instruction *I = dyn_cast<Instruction>(V)) {
1643       if (I->getMetadata("nosanitize"))
1644         return getCleanShadow(V);
1645       // For instructions the shadow is already stored in the map.
1646       Value *Shadow = ShadowMap[V];
1647       if (!Shadow) {
1648         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1649         (void)I;
1650         assert(Shadow && "No shadow for a value");
1651       }
1652       return Shadow;
1653     }
1654     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1655       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1656       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1657       (void)U;
1658       return AllOnes;
1659     }
1660     if (Argument *A = dyn_cast<Argument>(V)) {
1661       // For arguments we compute the shadow on demand and store it in the map.
1662       Value **ShadowPtr = &ShadowMap[V];
1663       if (*ShadowPtr)
1664         return *ShadowPtr;
1665       Function *F = A->getParent();
1666       IRBuilder<> EntryIRB(ActualFnStart->getFirstNonPHI());
1667       unsigned ArgOffset = 0;
1668       const DataLayout &DL = F->getParent()->getDataLayout();
1669       for (auto &FArg : F->args()) {
1670         if (!FArg.getType()->isSized()) {
1671           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1672           continue;
1673         }
1674 
1675         bool FArgByVal = FArg.hasByValAttr();
1676         bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
1677         bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef;
1678         unsigned Size =
1679             FArg.hasByValAttr()
1680                 ? DL.getTypeAllocSize(FArg.getParamByValType())
1681                 : DL.getTypeAllocSize(FArg.getType());
1682 
1683         if (A == &FArg) {
1684           bool Overflow = ArgOffset + Size > kParamTLSSize;
1685           if (FArgEagerCheck) {
1686             *ShadowPtr = getCleanShadow(V);
1687             setOrigin(A, getCleanOrigin());
1688             continue;
1689           } else if (FArgByVal) {
1690             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1691             // ByVal pointer itself has clean shadow. We copy the actual
1692             // argument shadow to the underlying memory.
1693             // Figure out maximal valid memcpy alignment.
1694             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1695                 MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
1696             Value *CpShadowPtr =
1697                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1698                                    /*isStore*/ true)
1699                     .first;
1700             // TODO(glider): need to copy origins.
1701             if (Overflow) {
1702               // ParamTLS overflow.
1703               EntryIRB.CreateMemSet(
1704                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1705                   Size, ArgAlign);
1706             } else {
1707               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1708               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1709                                                  CopyAlign, Size);
1710               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1711               (void)Cpy;
1712             }
1713             *ShadowPtr = getCleanShadow(V);
1714           } else {
1715             // Shadow over TLS
1716             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1717             if (Overflow) {
1718               // ParamTLS overflow.
1719               *ShadowPtr = getCleanShadow(V);
1720             } else {
1721               *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
1722                                                       kShadowTLSAlignment);
1723             }
1724           }
1725           LLVM_DEBUG(dbgs()
1726                      << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n");
1727           if (MS.TrackOrigins && !Overflow) {
1728             Value *OriginPtr =
1729                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1730             setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
1731           } else {
1732             setOrigin(A, getCleanOrigin());
1733           }
1734         }
1735 
1736         if (!FArgEagerCheck)
1737           ArgOffset += alignTo(Size, kShadowTLSAlignment);
1738       }
1739       assert(*ShadowPtr && "Could not find shadow for an argument");
1740       return *ShadowPtr;
1741     }
1742     // For everything else the shadow is zero.
1743     return getCleanShadow(V);
1744   }
1745 
1746   /// Get the shadow for i-th argument of the instruction I.
1747   Value *getShadow(Instruction *I, int i) {
1748     return getShadow(I->getOperand(i));
1749   }
1750 
1751   /// Get the origin for a value.
1752   Value *getOrigin(Value *V) {
1753     if (!MS.TrackOrigins) return nullptr;
1754     if (!PropagateShadow) return getCleanOrigin();
1755     if (isa<Constant>(V)) return getCleanOrigin();
1756     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1757            "Unexpected value type in getOrigin()");
1758     if (Instruction *I = dyn_cast<Instruction>(V)) {
1759       if (I->getMetadata("nosanitize"))
1760         return getCleanOrigin();
1761     }
1762     Value *Origin = OriginMap[V];
1763     assert(Origin && "Missing origin");
1764     return Origin;
1765   }
1766 
1767   /// Get the origin for i-th argument of the instruction I.
1768   Value *getOrigin(Instruction *I, int i) {
1769     return getOrigin(I->getOperand(i));
1770   }
1771 
1772   /// Remember the place where a shadow check should be inserted.
1773   ///
1774   /// This location will be later instrumented with a check that will print a
1775   /// UMR warning in runtime if the shadow value is not 0.
1776   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1777     assert(Shadow);
1778     if (!InsertChecks) return;
1779 #ifndef NDEBUG
1780     Type *ShadowTy = Shadow->getType();
1781     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
1782             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
1783            "Can only insert checks for integer, vector, and aggregate shadow "
1784            "types");
1785 #endif
1786     InstrumentationList.push_back(
1787         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1788   }
1789 
1790   /// Remember the place where a shadow check should be inserted.
1791   ///
1792   /// This location will be later instrumented with a check that will print a
1793   /// UMR warning in runtime if the value is not fully defined.
1794   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1795     assert(Val);
1796     Value *Shadow, *Origin;
1797     if (ClCheckConstantShadow) {
1798       Shadow = getShadow(Val);
1799       if (!Shadow) return;
1800       Origin = getOrigin(Val);
1801     } else {
1802       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1803       if (!Shadow) return;
1804       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1805     }
1806     insertShadowCheck(Shadow, Origin, OrigIns);
1807   }
1808 
1809   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1810     switch (a) {
1811       case AtomicOrdering::NotAtomic:
1812         return AtomicOrdering::NotAtomic;
1813       case AtomicOrdering::Unordered:
1814       case AtomicOrdering::Monotonic:
1815       case AtomicOrdering::Release:
1816         return AtomicOrdering::Release;
1817       case AtomicOrdering::Acquire:
1818       case AtomicOrdering::AcquireRelease:
1819         return AtomicOrdering::AcquireRelease;
1820       case AtomicOrdering::SequentiallyConsistent:
1821         return AtomicOrdering::SequentiallyConsistent;
1822     }
1823     llvm_unreachable("Unknown ordering");
1824   }
1825 
1826   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
1827     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1828     uint32_t OrderingTable[NumOrderings] = {};
1829 
1830     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1831         OrderingTable[(int)AtomicOrderingCABI::release] =
1832             (int)AtomicOrderingCABI::release;
1833     OrderingTable[(int)AtomicOrderingCABI::consume] =
1834         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1835             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1836                 (int)AtomicOrderingCABI::acq_rel;
1837     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1838         (int)AtomicOrderingCABI::seq_cst;
1839 
1840     return ConstantDataVector::get(IRB.getContext(),
1841                                    makeArrayRef(OrderingTable, NumOrderings));
1842   }
1843 
1844   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1845     switch (a) {
1846       case AtomicOrdering::NotAtomic:
1847         return AtomicOrdering::NotAtomic;
1848       case AtomicOrdering::Unordered:
1849       case AtomicOrdering::Monotonic:
1850       case AtomicOrdering::Acquire:
1851         return AtomicOrdering::Acquire;
1852       case AtomicOrdering::Release:
1853       case AtomicOrdering::AcquireRelease:
1854         return AtomicOrdering::AcquireRelease;
1855       case AtomicOrdering::SequentiallyConsistent:
1856         return AtomicOrdering::SequentiallyConsistent;
1857     }
1858     llvm_unreachable("Unknown ordering");
1859   }
1860 
1861   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
1862     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1863     uint32_t OrderingTable[NumOrderings] = {};
1864 
1865     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1866         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1867             OrderingTable[(int)AtomicOrderingCABI::consume] =
1868                 (int)AtomicOrderingCABI::acquire;
1869     OrderingTable[(int)AtomicOrderingCABI::release] =
1870         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1871             (int)AtomicOrderingCABI::acq_rel;
1872     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1873         (int)AtomicOrderingCABI::seq_cst;
1874 
1875     return ConstantDataVector::get(IRB.getContext(),
1876                                    makeArrayRef(OrderingTable, NumOrderings));
1877   }
1878 
1879   // ------------------- Visitors.
1880   using InstVisitor<MemorySanitizerVisitor>::visit;
1881   void visit(Instruction &I) {
1882     if (!I.getMetadata("nosanitize"))
1883       InstVisitor<MemorySanitizerVisitor>::visit(I);
1884   }
1885 
1886   /// Instrument LoadInst
1887   ///
1888   /// Loads the corresponding shadow and (optionally) origin.
1889   /// Optionally, checks that the load address is fully defined.
1890   void visitLoadInst(LoadInst &I) {
1891     assert(I.getType()->isSized() && "Load type must have size");
1892     assert(!I.getMetadata("nosanitize"));
1893     IRBuilder<> IRB(I.getNextNode());
1894     Type *ShadowTy = getShadowTy(&I);
1895     Value *Addr = I.getPointerOperand();
1896     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
1897     const Align Alignment = assumeAligned(I.getAlignment());
1898     if (PropagateShadow) {
1899       std::tie(ShadowPtr, OriginPtr) =
1900           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
1901       setShadow(&I,
1902                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
1903     } else {
1904       setShadow(&I, getCleanShadow(&I));
1905     }
1906 
1907     if (ClCheckAccessAddress)
1908       insertShadowCheck(I.getPointerOperand(), &I);
1909 
1910     if (I.isAtomic())
1911       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1912 
1913     if (MS.TrackOrigins) {
1914       if (PropagateShadow) {
1915         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1916         setOrigin(
1917             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
1918       } else {
1919         setOrigin(&I, getCleanOrigin());
1920       }
1921     }
1922   }
1923 
1924   /// Instrument StoreInst
1925   ///
1926   /// Stores the corresponding shadow and (optionally) origin.
1927   /// Optionally, checks that the store address is fully defined.
1928   void visitStoreInst(StoreInst &I) {
1929     StoreList.push_back(&I);
1930     if (ClCheckAccessAddress)
1931       insertShadowCheck(I.getPointerOperand(), &I);
1932   }
1933 
1934   void handleCASOrRMW(Instruction &I) {
1935     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1936 
1937     IRBuilder<> IRB(&I);
1938     Value *Addr = I.getOperand(0);
1939     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align(1),
1940                                           /*isStore*/ true)
1941                            .first;
1942 
1943     if (ClCheckAccessAddress)
1944       insertShadowCheck(Addr, &I);
1945 
1946     // Only test the conditional argument of cmpxchg instruction.
1947     // The other argument can potentially be uninitialized, but we can not
1948     // detect this situation reliably without possible false positives.
1949     if (isa<AtomicCmpXchgInst>(I))
1950       insertShadowCheck(I.getOperand(1), &I);
1951 
1952     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
1953 
1954     setShadow(&I, getCleanShadow(&I));
1955     setOrigin(&I, getCleanOrigin());
1956   }
1957 
1958   void visitAtomicRMWInst(AtomicRMWInst &I) {
1959     handleCASOrRMW(I);
1960     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1961   }
1962 
1963   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1964     handleCASOrRMW(I);
1965     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1966   }
1967 
1968   // Vector manipulation.
1969   void visitExtractElementInst(ExtractElementInst &I) {
1970     insertShadowCheck(I.getOperand(1), &I);
1971     IRBuilder<> IRB(&I);
1972     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1973               "_msprop"));
1974     setOrigin(&I, getOrigin(&I, 0));
1975   }
1976 
1977   void visitInsertElementInst(InsertElementInst &I) {
1978     insertShadowCheck(I.getOperand(2), &I);
1979     IRBuilder<> IRB(&I);
1980     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1981               I.getOperand(2), "_msprop"));
1982     setOriginForNaryOp(I);
1983   }
1984 
1985   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1986     IRBuilder<> IRB(&I);
1987     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1988                                           I.getShuffleMask(), "_msprop"));
1989     setOriginForNaryOp(I);
1990   }
1991 
1992   // Casts.
1993   void visitSExtInst(SExtInst &I) {
1994     IRBuilder<> IRB(&I);
1995     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
1996     setOrigin(&I, getOrigin(&I, 0));
1997   }
1998 
1999   void visitZExtInst(ZExtInst &I) {
2000     IRBuilder<> IRB(&I);
2001     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
2002     setOrigin(&I, getOrigin(&I, 0));
2003   }
2004 
2005   void visitTruncInst(TruncInst &I) {
2006     IRBuilder<> IRB(&I);
2007     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
2008     setOrigin(&I, getOrigin(&I, 0));
2009   }
2010 
2011   void visitBitCastInst(BitCastInst &I) {
2012     // Special case: if this is the bitcast (there is exactly 1 allowed) between
2013     // a musttail call and a ret, don't instrument. New instructions are not
2014     // allowed after a musttail call.
2015     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
2016       if (CI->isMustTailCall())
2017         return;
2018     IRBuilder<> IRB(&I);
2019     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
2020     setOrigin(&I, getOrigin(&I, 0));
2021   }
2022 
2023   void visitPtrToIntInst(PtrToIntInst &I) {
2024     IRBuilder<> IRB(&I);
2025     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2026              "_msprop_ptrtoint"));
2027     setOrigin(&I, getOrigin(&I, 0));
2028   }
2029 
2030   void visitIntToPtrInst(IntToPtrInst &I) {
2031     IRBuilder<> IRB(&I);
2032     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2033              "_msprop_inttoptr"));
2034     setOrigin(&I, getOrigin(&I, 0));
2035   }
2036 
2037   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
2038   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
2039   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
2040   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
2041   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
2042   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
2043 
2044   /// Propagate shadow for bitwise AND.
2045   ///
2046   /// This code is exact, i.e. if, for example, a bit in the left argument
2047   /// is defined and 0, then neither the value not definedness of the
2048   /// corresponding bit in B don't affect the resulting shadow.
2049   void visitAnd(BinaryOperator &I) {
2050     IRBuilder<> IRB(&I);
2051     //  "And" of 0 and a poisoned value results in unpoisoned value.
2052     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2053     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2054     //  1&p => p;     0&p => 0;     p&p => p;
2055     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2056     Value *S1 = getShadow(&I, 0);
2057     Value *S2 = getShadow(&I, 1);
2058     Value *V1 = I.getOperand(0);
2059     Value *V2 = I.getOperand(1);
2060     if (V1->getType() != S1->getType()) {
2061       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2062       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2063     }
2064     Value *S1S2 = IRB.CreateAnd(S1, S2);
2065     Value *V1S2 = IRB.CreateAnd(V1, S2);
2066     Value *S1V2 = IRB.CreateAnd(S1, V2);
2067     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2068     setOriginForNaryOp(I);
2069   }
2070 
2071   void visitOr(BinaryOperator &I) {
2072     IRBuilder<> IRB(&I);
2073     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2074     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2075     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2076     //  1|p => 1;     0|p => p;     p|p => p;
2077     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2078     Value *S1 = getShadow(&I, 0);
2079     Value *S2 = getShadow(&I, 1);
2080     Value *V1 = IRB.CreateNot(I.getOperand(0));
2081     Value *V2 = IRB.CreateNot(I.getOperand(1));
2082     if (V1->getType() != S1->getType()) {
2083       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2084       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2085     }
2086     Value *S1S2 = IRB.CreateAnd(S1, S2);
2087     Value *V1S2 = IRB.CreateAnd(V1, S2);
2088     Value *S1V2 = IRB.CreateAnd(S1, V2);
2089     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2090     setOriginForNaryOp(I);
2091   }
2092 
2093   /// Default propagation of shadow and/or origin.
2094   ///
2095   /// This class implements the general case of shadow propagation, used in all
2096   /// cases where we don't know and/or don't care about what the operation
2097   /// actually does. It converts all input shadow values to a common type
2098   /// (extending or truncating as necessary), and bitwise OR's them.
2099   ///
2100   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2101   /// fully initialized), and less prone to false positives.
2102   ///
2103   /// This class also implements the general case of origin propagation. For a
2104   /// Nary operation, result origin is set to the origin of an argument that is
2105   /// not entirely initialized. If there is more than one such arguments, the
2106   /// rightmost of them is picked. It does not matter which one is picked if all
2107   /// arguments are initialized.
2108   template <bool CombineShadow>
2109   class Combiner {
2110     Value *Shadow = nullptr;
2111     Value *Origin = nullptr;
2112     IRBuilder<> &IRB;
2113     MemorySanitizerVisitor *MSV;
2114 
2115   public:
2116     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2117         : IRB(IRB), MSV(MSV) {}
2118 
2119     /// Add a pair of shadow and origin values to the mix.
2120     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2121       if (CombineShadow) {
2122         assert(OpShadow);
2123         if (!Shadow)
2124           Shadow = OpShadow;
2125         else {
2126           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2127           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2128         }
2129       }
2130 
2131       if (MSV->MS.TrackOrigins) {
2132         assert(OpOrigin);
2133         if (!Origin) {
2134           Origin = OpOrigin;
2135         } else {
2136           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2137           // No point in adding something that might result in 0 origin value.
2138           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2139             Value *FlatShadow = MSV->convertShadowToScalar(OpShadow, IRB);
2140             Value *Cond =
2141                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
2142             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2143           }
2144         }
2145       }
2146       return *this;
2147     }
2148 
2149     /// Add an application value to the mix.
2150     Combiner &Add(Value *V) {
2151       Value *OpShadow = MSV->getShadow(V);
2152       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2153       return Add(OpShadow, OpOrigin);
2154     }
2155 
2156     /// Set the current combined values as the given instruction's shadow
2157     /// and origin.
2158     void Done(Instruction *I) {
2159       if (CombineShadow) {
2160         assert(Shadow);
2161         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2162         MSV->setShadow(I, Shadow);
2163       }
2164       if (MSV->MS.TrackOrigins) {
2165         assert(Origin);
2166         MSV->setOrigin(I, Origin);
2167       }
2168     }
2169   };
2170 
2171   using ShadowAndOriginCombiner = Combiner<true>;
2172   using OriginCombiner = Combiner<false>;
2173 
2174   /// Propagate origin for arbitrary operation.
2175   void setOriginForNaryOp(Instruction &I) {
2176     if (!MS.TrackOrigins) return;
2177     IRBuilder<> IRB(&I);
2178     OriginCombiner OC(this, IRB);
2179     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2180       OC.Add(OI->get());
2181     OC.Done(&I);
2182   }
2183 
2184   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2185     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2186            "Vector of pointers is not a valid shadow type");
2187     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2188                                   Ty->getScalarSizeInBits()
2189                             : Ty->getPrimitiveSizeInBits();
2190   }
2191 
2192   /// Cast between two shadow types, extending or truncating as
2193   /// necessary.
2194   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2195                           bool Signed = false) {
2196     Type *srcTy = V->getType();
2197     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2198     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2199     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2200       return IRB.CreateICmpNE(V, getCleanShadow(V));
2201 
2202     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2203       return IRB.CreateIntCast(V, dstTy, Signed);
2204     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2205         cast<FixedVectorType>(dstTy)->getNumElements() ==
2206             cast<FixedVectorType>(srcTy)->getNumElements())
2207       return IRB.CreateIntCast(V, dstTy, Signed);
2208     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2209     Value *V2 =
2210       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2211     return IRB.CreateBitCast(V2, dstTy);
2212     // TODO: handle struct types.
2213   }
2214 
2215   /// Cast an application value to the type of its own shadow.
2216   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2217     Type *ShadowTy = getShadowTy(V);
2218     if (V->getType() == ShadowTy)
2219       return V;
2220     if (V->getType()->isPtrOrPtrVectorTy())
2221       return IRB.CreatePtrToInt(V, ShadowTy);
2222     else
2223       return IRB.CreateBitCast(V, ShadowTy);
2224   }
2225 
2226   /// Propagate shadow for arbitrary operation.
2227   void handleShadowOr(Instruction &I) {
2228     IRBuilder<> IRB(&I);
2229     ShadowAndOriginCombiner SC(this, IRB);
2230     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2231       SC.Add(OI->get());
2232     SC.Done(&I);
2233   }
2234 
2235   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2236 
2237   // Handle multiplication by constant.
2238   //
2239   // Handle a special case of multiplication by constant that may have one or
2240   // more zeros in the lower bits. This makes corresponding number of lower bits
2241   // of the result zero as well. We model it by shifting the other operand
2242   // shadow left by the required number of bits. Effectively, we transform
2243   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2244   // We use multiplication by 2**N instead of shift to cover the case of
2245   // multiplication by 0, which may occur in some elements of a vector operand.
2246   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2247                            Value *OtherArg) {
2248     Constant *ShadowMul;
2249     Type *Ty = ConstArg->getType();
2250     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2251       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2252       Type *EltTy = VTy->getElementType();
2253       SmallVector<Constant *, 16> Elements;
2254       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2255         if (ConstantInt *Elt =
2256                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2257           const APInt &V = Elt->getValue();
2258           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2259           Elements.push_back(ConstantInt::get(EltTy, V2));
2260         } else {
2261           Elements.push_back(ConstantInt::get(EltTy, 1));
2262         }
2263       }
2264       ShadowMul = ConstantVector::get(Elements);
2265     } else {
2266       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2267         const APInt &V = Elt->getValue();
2268         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2269         ShadowMul = ConstantInt::get(Ty, V2);
2270       } else {
2271         ShadowMul = ConstantInt::get(Ty, 1);
2272       }
2273     }
2274 
2275     IRBuilder<> IRB(&I);
2276     setShadow(&I,
2277               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2278     setOrigin(&I, getOrigin(OtherArg));
2279   }
2280 
2281   void visitMul(BinaryOperator &I) {
2282     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2283     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2284     if (constOp0 && !constOp1)
2285       handleMulByConstant(I, constOp0, I.getOperand(1));
2286     else if (constOp1 && !constOp0)
2287       handleMulByConstant(I, constOp1, I.getOperand(0));
2288     else
2289       handleShadowOr(I);
2290   }
2291 
2292   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2293   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2294   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2295   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2296   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2297   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2298 
2299   void handleIntegerDiv(Instruction &I) {
2300     IRBuilder<> IRB(&I);
2301     // Strict on the second argument.
2302     insertShadowCheck(I.getOperand(1), &I);
2303     setShadow(&I, getShadow(&I, 0));
2304     setOrigin(&I, getOrigin(&I, 0));
2305   }
2306 
2307   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2308   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2309   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2310   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2311 
2312   // Floating point division is side-effect free. We can not require that the
2313   // divisor is fully initialized and must propagate shadow. See PR37523.
2314   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2315   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2316 
2317   /// Instrument == and != comparisons.
2318   ///
2319   /// Sometimes the comparison result is known even if some of the bits of the
2320   /// arguments are not.
2321   void handleEqualityComparison(ICmpInst &I) {
2322     IRBuilder<> IRB(&I);
2323     Value *A = I.getOperand(0);
2324     Value *B = I.getOperand(1);
2325     Value *Sa = getShadow(A);
2326     Value *Sb = getShadow(B);
2327 
2328     // Get rid of pointers and vectors of pointers.
2329     // For ints (and vectors of ints), types of A and Sa match,
2330     // and this is a no-op.
2331     A = IRB.CreatePointerCast(A, Sa->getType());
2332     B = IRB.CreatePointerCast(B, Sb->getType());
2333 
2334     // A == B  <==>  (C = A^B) == 0
2335     // A != B  <==>  (C = A^B) != 0
2336     // Sc = Sa | Sb
2337     Value *C = IRB.CreateXor(A, B);
2338     Value *Sc = IRB.CreateOr(Sa, Sb);
2339     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2340     // Result is defined if one of the following is true
2341     // * there is a defined 1 bit in C
2342     // * C is fully defined
2343     // Si = !(C & ~Sc) && Sc
2344     Value *Zero = Constant::getNullValue(Sc->getType());
2345     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2346     Value *Si =
2347       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
2348                     IRB.CreateICmpEQ(
2349                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
2350     Si->setName("_msprop_icmp");
2351     setShadow(&I, Si);
2352     setOriginForNaryOp(I);
2353   }
2354 
2355   /// Build the lowest possible value of V, taking into account V's
2356   ///        uninitialized bits.
2357   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2358                                 bool isSigned) {
2359     if (isSigned) {
2360       // Split shadow into sign bit and other bits.
2361       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2362       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2363       // Maximise the undefined shadow bit, minimize other undefined bits.
2364       return
2365         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
2366     } else {
2367       // Minimize undefined bits.
2368       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2369     }
2370   }
2371 
2372   /// Build the highest possible value of V, taking into account V's
2373   ///        uninitialized bits.
2374   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2375                                 bool isSigned) {
2376     if (isSigned) {
2377       // Split shadow into sign bit and other bits.
2378       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2379       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2380       // Minimise the undefined shadow bit, maximise other undefined bits.
2381       return
2382         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
2383     } else {
2384       // Maximize undefined bits.
2385       return IRB.CreateOr(A, Sa);
2386     }
2387   }
2388 
2389   /// Instrument relational comparisons.
2390   ///
2391   /// This function does exact shadow propagation for all relational
2392   /// comparisons of integers, pointers and vectors of those.
2393   /// FIXME: output seems suboptimal when one of the operands is a constant
2394   void handleRelationalComparisonExact(ICmpInst &I) {
2395     IRBuilder<> IRB(&I);
2396     Value *A = I.getOperand(0);
2397     Value *B = I.getOperand(1);
2398     Value *Sa = getShadow(A);
2399     Value *Sb = getShadow(B);
2400 
2401     // Get rid of pointers and vectors of pointers.
2402     // For ints (and vectors of ints), types of A and Sa match,
2403     // and this is a no-op.
2404     A = IRB.CreatePointerCast(A, Sa->getType());
2405     B = IRB.CreatePointerCast(B, Sb->getType());
2406 
2407     // Let [a0, a1] be the interval of possible values of A, taking into account
2408     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2409     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2410     bool IsSigned = I.isSigned();
2411     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2412                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2413                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2414     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2415                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2416                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2417     Value *Si = IRB.CreateXor(S1, S2);
2418     setShadow(&I, Si);
2419     setOriginForNaryOp(I);
2420   }
2421 
2422   /// Instrument signed relational comparisons.
2423   ///
2424   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2425   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2426   void handleSignedRelationalComparison(ICmpInst &I) {
2427     Constant *constOp;
2428     Value *op = nullptr;
2429     CmpInst::Predicate pre;
2430     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2431       op = I.getOperand(0);
2432       pre = I.getPredicate();
2433     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2434       op = I.getOperand(1);
2435       pre = I.getSwappedPredicate();
2436     } else {
2437       handleShadowOr(I);
2438       return;
2439     }
2440 
2441     if ((constOp->isNullValue() &&
2442          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2443         (constOp->isAllOnesValue() &&
2444          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2445       IRBuilder<> IRB(&I);
2446       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2447                                         "_msprop_icmp_s");
2448       setShadow(&I, Shadow);
2449       setOrigin(&I, getOrigin(op));
2450     } else {
2451       handleShadowOr(I);
2452     }
2453   }
2454 
2455   void visitICmpInst(ICmpInst &I) {
2456     if (!ClHandleICmp) {
2457       handleShadowOr(I);
2458       return;
2459     }
2460     if (I.isEquality()) {
2461       handleEqualityComparison(I);
2462       return;
2463     }
2464 
2465     assert(I.isRelational());
2466     if (ClHandleICmpExact) {
2467       handleRelationalComparisonExact(I);
2468       return;
2469     }
2470     if (I.isSigned()) {
2471       handleSignedRelationalComparison(I);
2472       return;
2473     }
2474 
2475     assert(I.isUnsigned());
2476     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2477       handleRelationalComparisonExact(I);
2478       return;
2479     }
2480 
2481     handleShadowOr(I);
2482   }
2483 
2484   void visitFCmpInst(FCmpInst &I) {
2485     handleShadowOr(I);
2486   }
2487 
2488   void handleShift(BinaryOperator &I) {
2489     IRBuilder<> IRB(&I);
2490     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2491     // Otherwise perform the same shift on S1.
2492     Value *S1 = getShadow(&I, 0);
2493     Value *S2 = getShadow(&I, 1);
2494     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
2495                                    S2->getType());
2496     Value *V2 = I.getOperand(1);
2497     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2498     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2499     setOriginForNaryOp(I);
2500   }
2501 
2502   void visitShl(BinaryOperator &I) { handleShift(I); }
2503   void visitAShr(BinaryOperator &I) { handleShift(I); }
2504   void visitLShr(BinaryOperator &I) { handleShift(I); }
2505 
2506   /// Instrument llvm.memmove
2507   ///
2508   /// At this point we don't know if llvm.memmove will be inlined or not.
2509   /// If we don't instrument it and it gets inlined,
2510   /// our interceptor will not kick in and we will lose the memmove.
2511   /// If we instrument the call here, but it does not get inlined,
2512   /// we will memove the shadow twice: which is bad in case
2513   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2514   ///
2515   /// Similar situation exists for memcpy and memset.
2516   void visitMemMoveInst(MemMoveInst &I) {
2517     IRBuilder<> IRB(&I);
2518     IRB.CreateCall(
2519         MS.MemmoveFn,
2520         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2521          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2522          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2523     I.eraseFromParent();
2524   }
2525 
2526   // Similar to memmove: avoid copying shadow twice.
2527   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2528   // FIXME: consider doing manual inline for small constant sizes and proper
2529   // alignment.
2530   void visitMemCpyInst(MemCpyInst &I) {
2531     IRBuilder<> IRB(&I);
2532     IRB.CreateCall(
2533         MS.MemcpyFn,
2534         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2535          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2536          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2537     I.eraseFromParent();
2538   }
2539 
2540   // Same as memcpy.
2541   void visitMemSetInst(MemSetInst &I) {
2542     IRBuilder<> IRB(&I);
2543     IRB.CreateCall(
2544         MS.MemsetFn,
2545         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2546          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2547          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2548     I.eraseFromParent();
2549   }
2550 
2551   void visitVAStartInst(VAStartInst &I) {
2552     VAHelper->visitVAStartInst(I);
2553   }
2554 
2555   void visitVACopyInst(VACopyInst &I) {
2556     VAHelper->visitVACopyInst(I);
2557   }
2558 
2559   /// Handle vector store-like intrinsics.
2560   ///
2561   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2562   /// has 1 pointer argument and 1 vector argument, returns void.
2563   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2564     IRBuilder<> IRB(&I);
2565     Value* Addr = I.getArgOperand(0);
2566     Value *Shadow = getShadow(&I, 1);
2567     Value *ShadowPtr, *OriginPtr;
2568 
2569     // We don't know the pointer alignment (could be unaligned SSE store!).
2570     // Have to assume to worst case.
2571     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2572         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2573     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2574 
2575     if (ClCheckAccessAddress)
2576       insertShadowCheck(Addr, &I);
2577 
2578     // FIXME: factor out common code from materializeStores
2579     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2580     return true;
2581   }
2582 
2583   /// Handle vector load-like intrinsics.
2584   ///
2585   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2586   /// has 1 pointer argument, returns a vector.
2587   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2588     IRBuilder<> IRB(&I);
2589     Value *Addr = I.getArgOperand(0);
2590 
2591     Type *ShadowTy = getShadowTy(&I);
2592     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2593     if (PropagateShadow) {
2594       // We don't know the pointer alignment (could be unaligned SSE load!).
2595       // Have to assume to worst case.
2596       const Align Alignment = Align(1);
2597       std::tie(ShadowPtr, OriginPtr) =
2598           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2599       setShadow(&I,
2600                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2601     } else {
2602       setShadow(&I, getCleanShadow(&I));
2603     }
2604 
2605     if (ClCheckAccessAddress)
2606       insertShadowCheck(Addr, &I);
2607 
2608     if (MS.TrackOrigins) {
2609       if (PropagateShadow)
2610         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2611       else
2612         setOrigin(&I, getCleanOrigin());
2613     }
2614     return true;
2615   }
2616 
2617   /// Handle (SIMD arithmetic)-like intrinsics.
2618   ///
2619   /// Instrument intrinsics with any number of arguments of the same type,
2620   /// equal to the return type. The type should be simple (no aggregates or
2621   /// pointers; vectors are fine).
2622   /// Caller guarantees that this intrinsic does not access memory.
2623   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2624     Type *RetTy = I.getType();
2625     if (!(RetTy->isIntOrIntVectorTy() ||
2626           RetTy->isFPOrFPVectorTy() ||
2627           RetTy->isX86_MMXTy()))
2628       return false;
2629 
2630     unsigned NumArgOperands = I.getNumArgOperands();
2631 
2632     for (unsigned i = 0; i < NumArgOperands; ++i) {
2633       Type *Ty = I.getArgOperand(i)->getType();
2634       if (Ty != RetTy)
2635         return false;
2636     }
2637 
2638     IRBuilder<> IRB(&I);
2639     ShadowAndOriginCombiner SC(this, IRB);
2640     for (unsigned i = 0; i < NumArgOperands; ++i)
2641       SC.Add(I.getArgOperand(i));
2642     SC.Done(&I);
2643 
2644     return true;
2645   }
2646 
2647   /// Heuristically instrument unknown intrinsics.
2648   ///
2649   /// The main purpose of this code is to do something reasonable with all
2650   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2651   /// We recognize several classes of intrinsics by their argument types and
2652   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2653   /// sure that we know what the intrinsic does.
2654   ///
2655   /// We special-case intrinsics where this approach fails. See llvm.bswap
2656   /// handling as an example of that.
2657   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2658     unsigned NumArgOperands = I.getNumArgOperands();
2659     if (NumArgOperands == 0)
2660       return false;
2661 
2662     if (NumArgOperands == 2 &&
2663         I.getArgOperand(0)->getType()->isPointerTy() &&
2664         I.getArgOperand(1)->getType()->isVectorTy() &&
2665         I.getType()->isVoidTy() &&
2666         !I.onlyReadsMemory()) {
2667       // This looks like a vector store.
2668       return handleVectorStoreIntrinsic(I);
2669     }
2670 
2671     if (NumArgOperands == 1 &&
2672         I.getArgOperand(0)->getType()->isPointerTy() &&
2673         I.getType()->isVectorTy() &&
2674         I.onlyReadsMemory()) {
2675       // This looks like a vector load.
2676       return handleVectorLoadIntrinsic(I);
2677     }
2678 
2679     if (I.doesNotAccessMemory())
2680       if (maybeHandleSimpleNomemIntrinsic(I))
2681         return true;
2682 
2683     // FIXME: detect and handle SSE maskstore/maskload
2684     return false;
2685   }
2686 
2687   void handleInvariantGroup(IntrinsicInst &I) {
2688     setShadow(&I, getShadow(&I, 0));
2689     setOrigin(&I, getOrigin(&I, 0));
2690   }
2691 
2692   void handleLifetimeStart(IntrinsicInst &I) {
2693     if (!PoisonStack)
2694       return;
2695     DenseMap<Value *, AllocaInst *> AllocaForValue;
2696     AllocaInst *AI =
2697         llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
2698     if (!AI)
2699       InstrumentLifetimeStart = false;
2700     LifetimeStartList.push_back(std::make_pair(&I, AI));
2701   }
2702 
2703   void handleBswap(IntrinsicInst &I) {
2704     IRBuilder<> IRB(&I);
2705     Value *Op = I.getArgOperand(0);
2706     Type *OpType = Op->getType();
2707     Function *BswapFunc = Intrinsic::getDeclaration(
2708       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2709     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2710     setOrigin(&I, getOrigin(Op));
2711   }
2712 
2713   // Instrument vector convert intrinsic.
2714   //
2715   // This function instruments intrinsics like cvtsi2ss:
2716   // %Out = int_xxx_cvtyyy(%ConvertOp)
2717   // or
2718   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2719   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2720   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2721   // elements from \p CopyOp.
2722   // In most cases conversion involves floating-point value which may trigger a
2723   // hardware exception when not fully initialized. For this reason we require
2724   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2725   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2726   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2727   // return a fully initialized value.
2728   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
2729     IRBuilder<> IRB(&I);
2730     Value *CopyOp, *ConvertOp;
2731 
2732     switch (I.getNumArgOperands()) {
2733     case 3:
2734       assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
2735       LLVM_FALLTHROUGH;
2736     case 2:
2737       CopyOp = I.getArgOperand(0);
2738       ConvertOp = I.getArgOperand(1);
2739       break;
2740     case 1:
2741       ConvertOp = I.getArgOperand(0);
2742       CopyOp = nullptr;
2743       break;
2744     default:
2745       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2746     }
2747 
2748     // The first *NumUsedElements* elements of ConvertOp are converted to the
2749     // same number of output elements. The rest of the output is copied from
2750     // CopyOp, or (if not available) filled with zeroes.
2751     // Combine shadow for elements of ConvertOp that are used in this operation,
2752     // and insert a check.
2753     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2754     // int->any conversion.
2755     Value *ConvertShadow = getShadow(ConvertOp);
2756     Value *AggShadow = nullptr;
2757     if (ConvertOp->getType()->isVectorTy()) {
2758       AggShadow = IRB.CreateExtractElement(
2759           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2760       for (int i = 1; i < NumUsedElements; ++i) {
2761         Value *MoreShadow = IRB.CreateExtractElement(
2762             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2763         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2764       }
2765     } else {
2766       AggShadow = ConvertShadow;
2767     }
2768     assert(AggShadow->getType()->isIntegerTy());
2769     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2770 
2771     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2772     // ConvertOp.
2773     if (CopyOp) {
2774       assert(CopyOp->getType() == I.getType());
2775       assert(CopyOp->getType()->isVectorTy());
2776       Value *ResultShadow = getShadow(CopyOp);
2777       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
2778       for (int i = 0; i < NumUsedElements; ++i) {
2779         ResultShadow = IRB.CreateInsertElement(
2780             ResultShadow, ConstantInt::getNullValue(EltTy),
2781             ConstantInt::get(IRB.getInt32Ty(), i));
2782       }
2783       setShadow(&I, ResultShadow);
2784       setOrigin(&I, getOrigin(CopyOp));
2785     } else {
2786       setShadow(&I, getCleanShadow(&I));
2787       setOrigin(&I, getCleanOrigin());
2788     }
2789   }
2790 
2791   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2792   // zeroes if it is zero, and all ones otherwise.
2793   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2794     if (S->getType()->isVectorTy())
2795       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2796     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2797     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2798     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2799   }
2800 
2801   // Given a vector, extract its first element, and return all
2802   // zeroes if it is zero, and all ones otherwise.
2803   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2804     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2805     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2806     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2807   }
2808 
2809   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2810     Type *T = S->getType();
2811     assert(T->isVectorTy());
2812     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2813     return IRB.CreateSExt(S2, T);
2814   }
2815 
2816   // Instrument vector shift intrinsic.
2817   //
2818   // This function instruments intrinsics like int_x86_avx2_psll_w.
2819   // Intrinsic shifts %In by %ShiftSize bits.
2820   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2821   // size, and the rest is ignored. Behavior is defined even if shift size is
2822   // greater than register (or field) width.
2823   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2824     assert(I.getNumArgOperands() == 2);
2825     IRBuilder<> IRB(&I);
2826     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2827     // Otherwise perform the same shift on S1.
2828     Value *S1 = getShadow(&I, 0);
2829     Value *S2 = getShadow(&I, 1);
2830     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2831                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2832     Value *V1 = I.getOperand(0);
2833     Value *V2 = I.getOperand(1);
2834     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2835                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2836     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2837     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2838     setOriginForNaryOp(I);
2839   }
2840 
2841   // Get an X86_MMX-sized vector type.
2842   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2843     const unsigned X86_MMXSizeInBits = 64;
2844     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
2845            "Illegal MMX vector element size");
2846     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2847                                 X86_MMXSizeInBits / EltSizeInBits);
2848   }
2849 
2850   // Returns a signed counterpart for an (un)signed-saturate-and-pack
2851   // intrinsic.
2852   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2853     switch (id) {
2854       case Intrinsic::x86_sse2_packsswb_128:
2855       case Intrinsic::x86_sse2_packuswb_128:
2856         return Intrinsic::x86_sse2_packsswb_128;
2857 
2858       case Intrinsic::x86_sse2_packssdw_128:
2859       case Intrinsic::x86_sse41_packusdw:
2860         return Intrinsic::x86_sse2_packssdw_128;
2861 
2862       case Intrinsic::x86_avx2_packsswb:
2863       case Intrinsic::x86_avx2_packuswb:
2864         return Intrinsic::x86_avx2_packsswb;
2865 
2866       case Intrinsic::x86_avx2_packssdw:
2867       case Intrinsic::x86_avx2_packusdw:
2868         return Intrinsic::x86_avx2_packssdw;
2869 
2870       case Intrinsic::x86_mmx_packsswb:
2871       case Intrinsic::x86_mmx_packuswb:
2872         return Intrinsic::x86_mmx_packsswb;
2873 
2874       case Intrinsic::x86_mmx_packssdw:
2875         return Intrinsic::x86_mmx_packssdw;
2876       default:
2877         llvm_unreachable("unexpected intrinsic id");
2878     }
2879   }
2880 
2881   // Instrument vector pack intrinsic.
2882   //
2883   // This function instruments intrinsics like x86_mmx_packsswb, that
2884   // packs elements of 2 input vectors into half as many bits with saturation.
2885   // Shadow is propagated with the signed variant of the same intrinsic applied
2886   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2887   // EltSizeInBits is used only for x86mmx arguments.
2888   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2889     assert(I.getNumArgOperands() == 2);
2890     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2891     IRBuilder<> IRB(&I);
2892     Value *S1 = getShadow(&I, 0);
2893     Value *S2 = getShadow(&I, 1);
2894     assert(isX86_MMX || S1->getType()->isVectorTy());
2895 
2896     // SExt and ICmpNE below must apply to individual elements of input vectors.
2897     // In case of x86mmx arguments, cast them to appropriate vector types and
2898     // back.
2899     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2900     if (isX86_MMX) {
2901       S1 = IRB.CreateBitCast(S1, T);
2902       S2 = IRB.CreateBitCast(S2, T);
2903     }
2904     Value *S1_ext = IRB.CreateSExt(
2905         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2906     Value *S2_ext = IRB.CreateSExt(
2907         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2908     if (isX86_MMX) {
2909       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2910       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2911       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2912     }
2913 
2914     Function *ShadowFn = Intrinsic::getDeclaration(
2915         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2916 
2917     Value *S =
2918         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2919     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2920     setShadow(&I, S);
2921     setOriginForNaryOp(I);
2922   }
2923 
2924   // Instrument sum-of-absolute-differences intrinsic.
2925   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2926     const unsigned SignificantBitsPerResultElement = 16;
2927     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2928     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2929     unsigned ZeroBitsPerResultElement =
2930         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2931 
2932     IRBuilder<> IRB(&I);
2933     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2934     S = IRB.CreateBitCast(S, ResTy);
2935     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2936                        ResTy);
2937     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2938     S = IRB.CreateBitCast(S, getShadowTy(&I));
2939     setShadow(&I, S);
2940     setOriginForNaryOp(I);
2941   }
2942 
2943   // Instrument multiply-add intrinsic.
2944   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2945                                   unsigned EltSizeInBits = 0) {
2946     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2947     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2948     IRBuilder<> IRB(&I);
2949     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2950     S = IRB.CreateBitCast(S, ResTy);
2951     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2952                        ResTy);
2953     S = IRB.CreateBitCast(S, getShadowTy(&I));
2954     setShadow(&I, S);
2955     setOriginForNaryOp(I);
2956   }
2957 
2958   // Instrument compare-packed intrinsic.
2959   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2960   // all-ones shadow.
2961   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2962     IRBuilder<> IRB(&I);
2963     Type *ResTy = getShadowTy(&I);
2964     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2965     Value *S = IRB.CreateSExt(
2966         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2967     setShadow(&I, S);
2968     setOriginForNaryOp(I);
2969   }
2970 
2971   // Instrument compare-scalar intrinsic.
2972   // This handles both cmp* intrinsics which return the result in the first
2973   // element of a vector, and comi* which return the result as i32.
2974   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2975     IRBuilder<> IRB(&I);
2976     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2977     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
2978     setShadow(&I, S);
2979     setOriginForNaryOp(I);
2980   }
2981 
2982   // Instrument generic vector reduction intrinsics
2983   // by ORing together all their fields.
2984   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
2985     IRBuilder<> IRB(&I);
2986     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
2987     setShadow(&I, S);
2988     setOrigin(&I, getOrigin(&I, 0));
2989   }
2990 
2991   // Instrument experimental.vector.reduce.or intrinsic.
2992   // Valid (non-poisoned) set bits in the operand pull low the
2993   // corresponding shadow bits.
2994   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
2995     IRBuilder<> IRB(&I);
2996     Value *OperandShadow = getShadow(&I, 0);
2997     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
2998     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
2999     // Bit N is clean if any field's bit N is 1 and unpoison
3000     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
3001     // Otherwise, it is clean if every field's bit N is unpoison
3002     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3003     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3004 
3005     setShadow(&I, S);
3006     setOrigin(&I, getOrigin(&I, 0));
3007   }
3008 
3009   // Instrument experimental.vector.reduce.or intrinsic.
3010   // Valid (non-poisoned) unset bits in the operand pull down the
3011   // corresponding shadow bits.
3012   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
3013     IRBuilder<> IRB(&I);
3014     Value *OperandShadow = getShadow(&I, 0);
3015     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
3016     // Bit N is clean if any field's bit N is 0 and unpoison
3017     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
3018     // Otherwise, it is clean if every field's bit N is unpoison
3019     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3020     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3021 
3022     setShadow(&I, S);
3023     setOrigin(&I, getOrigin(&I, 0));
3024   }
3025 
3026   void handleStmxcsr(IntrinsicInst &I) {
3027     IRBuilder<> IRB(&I);
3028     Value* Addr = I.getArgOperand(0);
3029     Type *Ty = IRB.getInt32Ty();
3030     Value *ShadowPtr =
3031         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
3032 
3033     IRB.CreateStore(getCleanShadow(Ty),
3034                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
3035 
3036     if (ClCheckAccessAddress)
3037       insertShadowCheck(Addr, &I);
3038   }
3039 
3040   void handleLdmxcsr(IntrinsicInst &I) {
3041     if (!InsertChecks) return;
3042 
3043     IRBuilder<> IRB(&I);
3044     Value *Addr = I.getArgOperand(0);
3045     Type *Ty = IRB.getInt32Ty();
3046     const Align Alignment = Align(1);
3047     Value *ShadowPtr, *OriginPtr;
3048     std::tie(ShadowPtr, OriginPtr) =
3049         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3050 
3051     if (ClCheckAccessAddress)
3052       insertShadowCheck(Addr, &I);
3053 
3054     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3055     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3056                                     : getCleanOrigin();
3057     insertShadowCheck(Shadow, Origin, &I);
3058   }
3059 
3060   void handleMaskedStore(IntrinsicInst &I) {
3061     IRBuilder<> IRB(&I);
3062     Value *V = I.getArgOperand(0);
3063     Value *Addr = I.getArgOperand(1);
3064     const Align Alignment(
3065         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3066     Value *Mask = I.getArgOperand(3);
3067     Value *Shadow = getShadow(V);
3068 
3069     Value *ShadowPtr;
3070     Value *OriginPtr;
3071     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3072         Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3073 
3074     if (ClCheckAccessAddress) {
3075       insertShadowCheck(Addr, &I);
3076       // Uninitialized mask is kind of like uninitialized address, but not as
3077       // scary.
3078       insertShadowCheck(Mask, &I);
3079     }
3080 
3081     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3082 
3083     if (MS.TrackOrigins) {
3084       auto &DL = F.getParent()->getDataLayout();
3085       paintOrigin(IRB, getOrigin(V), OriginPtr,
3086                   DL.getTypeStoreSize(Shadow->getType()),
3087                   std::max(Alignment, kMinOriginAlignment));
3088     }
3089   }
3090 
3091   bool handleMaskedLoad(IntrinsicInst &I) {
3092     IRBuilder<> IRB(&I);
3093     Value *Addr = I.getArgOperand(0);
3094     const Align Alignment(
3095         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3096     Value *Mask = I.getArgOperand(2);
3097     Value *PassThru = I.getArgOperand(3);
3098 
3099     Type *ShadowTy = getShadowTy(&I);
3100     Value *ShadowPtr, *OriginPtr;
3101     if (PropagateShadow) {
3102       std::tie(ShadowPtr, OriginPtr) =
3103           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3104       setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Alignment, Mask,
3105                                          getShadow(PassThru), "_msmaskedld"));
3106     } else {
3107       setShadow(&I, getCleanShadow(&I));
3108     }
3109 
3110     if (ClCheckAccessAddress) {
3111       insertShadowCheck(Addr, &I);
3112       insertShadowCheck(Mask, &I);
3113     }
3114 
3115     if (MS.TrackOrigins) {
3116       if (PropagateShadow) {
3117         // Choose between PassThru's and the loaded value's origins.
3118         Value *MaskedPassThruShadow = IRB.CreateAnd(
3119             getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3120 
3121         Value *Acc = IRB.CreateExtractElement(
3122             MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3123         for (int i = 1, N = cast<FixedVectorType>(PassThru->getType())
3124                                 ->getNumElements();
3125              i < N; ++i) {
3126           Value *More = IRB.CreateExtractElement(
3127               MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3128           Acc = IRB.CreateOr(Acc, More);
3129         }
3130 
3131         Value *Origin = IRB.CreateSelect(
3132             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
3133             getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
3134 
3135         setOrigin(&I, Origin);
3136       } else {
3137         setOrigin(&I, getCleanOrigin());
3138       }
3139     }
3140     return true;
3141   }
3142 
3143   // Instrument BMI / BMI2 intrinsics.
3144   // All of these intrinsics are Z = I(X, Y)
3145   // where the types of all operands and the result match, and are either i32 or i64.
3146   // The following instrumentation happens to work for all of them:
3147   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3148   void handleBmiIntrinsic(IntrinsicInst &I) {
3149     IRBuilder<> IRB(&I);
3150     Type *ShadowTy = getShadowTy(&I);
3151 
3152     // If any bit of the mask operand is poisoned, then the whole thing is.
3153     Value *SMask = getShadow(&I, 1);
3154     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3155                            ShadowTy);
3156     // Apply the same intrinsic to the shadow of the first operand.
3157     Value *S = IRB.CreateCall(I.getCalledFunction(),
3158                               {getShadow(&I, 0), I.getOperand(1)});
3159     S = IRB.CreateOr(SMask, S);
3160     setShadow(&I, S);
3161     setOriginForNaryOp(I);
3162   }
3163 
3164   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3165     SmallVector<int, 8> Mask;
3166     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3167       Mask.append(2, X);
3168     }
3169     return Mask;
3170   }
3171 
3172   // Instrument pclmul intrinsics.
3173   // These intrinsics operate either on odd or on even elements of the input
3174   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3175   // Replace the unused elements with copies of the used ones, ex:
3176   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3177   // or
3178   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3179   // and then apply the usual shadow combining logic.
3180   void handlePclmulIntrinsic(IntrinsicInst &I) {
3181     IRBuilder<> IRB(&I);
3182     Type *ShadowTy = getShadowTy(&I);
3183     unsigned Width =
3184         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3185     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3186            "pclmul 3rd operand must be a constant");
3187     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3188     Value *Shuf0 =
3189         IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
3190                                 getPclmulMask(Width, Imm & 0x01));
3191     Value *Shuf1 =
3192         IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
3193                                 getPclmulMask(Width, Imm & 0x10));
3194     ShadowAndOriginCombiner SOC(this, IRB);
3195     SOC.Add(Shuf0, getOrigin(&I, 0));
3196     SOC.Add(Shuf1, getOrigin(&I, 1));
3197     SOC.Done(&I);
3198   }
3199 
3200   // Instrument _mm_*_sd intrinsics
3201   void handleUnarySdIntrinsic(IntrinsicInst &I) {
3202     IRBuilder<> IRB(&I);
3203     Value *First = getShadow(&I, 0);
3204     Value *Second = getShadow(&I, 1);
3205     // High word of first operand, low word of second
3206     Value *Shadow =
3207         IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
3208 
3209     setShadow(&I, Shadow);
3210     setOriginForNaryOp(I);
3211   }
3212 
3213   void handleBinarySdIntrinsic(IntrinsicInst &I) {
3214     IRBuilder<> IRB(&I);
3215     Value *First = getShadow(&I, 0);
3216     Value *Second = getShadow(&I, 1);
3217     Value *OrShadow = IRB.CreateOr(First, Second);
3218     // High word of first operand, low word of both OR'd together
3219     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
3220                                             llvm::makeArrayRef<int>({2, 1}));
3221 
3222     setShadow(&I, Shadow);
3223     setOriginForNaryOp(I);
3224   }
3225 
3226   void visitIntrinsicInst(IntrinsicInst &I) {
3227     switch (I.getIntrinsicID()) {
3228     case Intrinsic::lifetime_start:
3229       handleLifetimeStart(I);
3230       break;
3231     case Intrinsic::launder_invariant_group:
3232     case Intrinsic::strip_invariant_group:
3233       handleInvariantGroup(I);
3234       break;
3235     case Intrinsic::bswap:
3236       handleBswap(I);
3237       break;
3238     case Intrinsic::masked_store:
3239       handleMaskedStore(I);
3240       break;
3241     case Intrinsic::masked_load:
3242       handleMaskedLoad(I);
3243       break;
3244     case Intrinsic::experimental_vector_reduce_and:
3245       handleVectorReduceAndIntrinsic(I);
3246       break;
3247     case Intrinsic::experimental_vector_reduce_or:
3248       handleVectorReduceOrIntrinsic(I);
3249       break;
3250     case Intrinsic::experimental_vector_reduce_add:
3251     case Intrinsic::experimental_vector_reduce_xor:
3252     case Intrinsic::experimental_vector_reduce_mul:
3253       handleVectorReduceIntrinsic(I);
3254       break;
3255     case Intrinsic::x86_sse_stmxcsr:
3256       handleStmxcsr(I);
3257       break;
3258     case Intrinsic::x86_sse_ldmxcsr:
3259       handleLdmxcsr(I);
3260       break;
3261     case Intrinsic::x86_avx512_vcvtsd2usi64:
3262     case Intrinsic::x86_avx512_vcvtsd2usi32:
3263     case Intrinsic::x86_avx512_vcvtss2usi64:
3264     case Intrinsic::x86_avx512_vcvtss2usi32:
3265     case Intrinsic::x86_avx512_cvttss2usi64:
3266     case Intrinsic::x86_avx512_cvttss2usi:
3267     case Intrinsic::x86_avx512_cvttsd2usi64:
3268     case Intrinsic::x86_avx512_cvttsd2usi:
3269     case Intrinsic::x86_avx512_cvtusi2ss:
3270     case Intrinsic::x86_avx512_cvtusi642sd:
3271     case Intrinsic::x86_avx512_cvtusi642ss:
3272     case Intrinsic::x86_sse2_cvtsd2si64:
3273     case Intrinsic::x86_sse2_cvtsd2si:
3274     case Intrinsic::x86_sse2_cvtsd2ss:
3275     case Intrinsic::x86_sse2_cvttsd2si64:
3276     case Intrinsic::x86_sse2_cvttsd2si:
3277     case Intrinsic::x86_sse_cvtss2si64:
3278     case Intrinsic::x86_sse_cvtss2si:
3279     case Intrinsic::x86_sse_cvttss2si64:
3280     case Intrinsic::x86_sse_cvttss2si:
3281       handleVectorConvertIntrinsic(I, 1);
3282       break;
3283     case Intrinsic::x86_sse_cvtps2pi:
3284     case Intrinsic::x86_sse_cvttps2pi:
3285       handleVectorConvertIntrinsic(I, 2);
3286       break;
3287 
3288     case Intrinsic::x86_avx512_psll_w_512:
3289     case Intrinsic::x86_avx512_psll_d_512:
3290     case Intrinsic::x86_avx512_psll_q_512:
3291     case Intrinsic::x86_avx512_pslli_w_512:
3292     case Intrinsic::x86_avx512_pslli_d_512:
3293     case Intrinsic::x86_avx512_pslli_q_512:
3294     case Intrinsic::x86_avx512_psrl_w_512:
3295     case Intrinsic::x86_avx512_psrl_d_512:
3296     case Intrinsic::x86_avx512_psrl_q_512:
3297     case Intrinsic::x86_avx512_psra_w_512:
3298     case Intrinsic::x86_avx512_psra_d_512:
3299     case Intrinsic::x86_avx512_psra_q_512:
3300     case Intrinsic::x86_avx512_psrli_w_512:
3301     case Intrinsic::x86_avx512_psrli_d_512:
3302     case Intrinsic::x86_avx512_psrli_q_512:
3303     case Intrinsic::x86_avx512_psrai_w_512:
3304     case Intrinsic::x86_avx512_psrai_d_512:
3305     case Intrinsic::x86_avx512_psrai_q_512:
3306     case Intrinsic::x86_avx512_psra_q_256:
3307     case Intrinsic::x86_avx512_psra_q_128:
3308     case Intrinsic::x86_avx512_psrai_q_256:
3309     case Intrinsic::x86_avx512_psrai_q_128:
3310     case Intrinsic::x86_avx2_psll_w:
3311     case Intrinsic::x86_avx2_psll_d:
3312     case Intrinsic::x86_avx2_psll_q:
3313     case Intrinsic::x86_avx2_pslli_w:
3314     case Intrinsic::x86_avx2_pslli_d:
3315     case Intrinsic::x86_avx2_pslli_q:
3316     case Intrinsic::x86_avx2_psrl_w:
3317     case Intrinsic::x86_avx2_psrl_d:
3318     case Intrinsic::x86_avx2_psrl_q:
3319     case Intrinsic::x86_avx2_psra_w:
3320     case Intrinsic::x86_avx2_psra_d:
3321     case Intrinsic::x86_avx2_psrli_w:
3322     case Intrinsic::x86_avx2_psrli_d:
3323     case Intrinsic::x86_avx2_psrli_q:
3324     case Intrinsic::x86_avx2_psrai_w:
3325     case Intrinsic::x86_avx2_psrai_d:
3326     case Intrinsic::x86_sse2_psll_w:
3327     case Intrinsic::x86_sse2_psll_d:
3328     case Intrinsic::x86_sse2_psll_q:
3329     case Intrinsic::x86_sse2_pslli_w:
3330     case Intrinsic::x86_sse2_pslli_d:
3331     case Intrinsic::x86_sse2_pslli_q:
3332     case Intrinsic::x86_sse2_psrl_w:
3333     case Intrinsic::x86_sse2_psrl_d:
3334     case Intrinsic::x86_sse2_psrl_q:
3335     case Intrinsic::x86_sse2_psra_w:
3336     case Intrinsic::x86_sse2_psra_d:
3337     case Intrinsic::x86_sse2_psrli_w:
3338     case Intrinsic::x86_sse2_psrli_d:
3339     case Intrinsic::x86_sse2_psrli_q:
3340     case Intrinsic::x86_sse2_psrai_w:
3341     case Intrinsic::x86_sse2_psrai_d:
3342     case Intrinsic::x86_mmx_psll_w:
3343     case Intrinsic::x86_mmx_psll_d:
3344     case Intrinsic::x86_mmx_psll_q:
3345     case Intrinsic::x86_mmx_pslli_w:
3346     case Intrinsic::x86_mmx_pslli_d:
3347     case Intrinsic::x86_mmx_pslli_q:
3348     case Intrinsic::x86_mmx_psrl_w:
3349     case Intrinsic::x86_mmx_psrl_d:
3350     case Intrinsic::x86_mmx_psrl_q:
3351     case Intrinsic::x86_mmx_psra_w:
3352     case Intrinsic::x86_mmx_psra_d:
3353     case Intrinsic::x86_mmx_psrli_w:
3354     case Intrinsic::x86_mmx_psrli_d:
3355     case Intrinsic::x86_mmx_psrli_q:
3356     case Intrinsic::x86_mmx_psrai_w:
3357     case Intrinsic::x86_mmx_psrai_d:
3358       handleVectorShiftIntrinsic(I, /* Variable */ false);
3359       break;
3360     case Intrinsic::x86_avx2_psllv_d:
3361     case Intrinsic::x86_avx2_psllv_d_256:
3362     case Intrinsic::x86_avx512_psllv_d_512:
3363     case Intrinsic::x86_avx2_psllv_q:
3364     case Intrinsic::x86_avx2_psllv_q_256:
3365     case Intrinsic::x86_avx512_psllv_q_512:
3366     case Intrinsic::x86_avx2_psrlv_d:
3367     case Intrinsic::x86_avx2_psrlv_d_256:
3368     case Intrinsic::x86_avx512_psrlv_d_512:
3369     case Intrinsic::x86_avx2_psrlv_q:
3370     case Intrinsic::x86_avx2_psrlv_q_256:
3371     case Intrinsic::x86_avx512_psrlv_q_512:
3372     case Intrinsic::x86_avx2_psrav_d:
3373     case Intrinsic::x86_avx2_psrav_d_256:
3374     case Intrinsic::x86_avx512_psrav_d_512:
3375     case Intrinsic::x86_avx512_psrav_q_128:
3376     case Intrinsic::x86_avx512_psrav_q_256:
3377     case Intrinsic::x86_avx512_psrav_q_512:
3378       handleVectorShiftIntrinsic(I, /* Variable */ true);
3379       break;
3380 
3381     case Intrinsic::x86_sse2_packsswb_128:
3382     case Intrinsic::x86_sse2_packssdw_128:
3383     case Intrinsic::x86_sse2_packuswb_128:
3384     case Intrinsic::x86_sse41_packusdw:
3385     case Intrinsic::x86_avx2_packsswb:
3386     case Intrinsic::x86_avx2_packssdw:
3387     case Intrinsic::x86_avx2_packuswb:
3388     case Intrinsic::x86_avx2_packusdw:
3389       handleVectorPackIntrinsic(I);
3390       break;
3391 
3392     case Intrinsic::x86_mmx_packsswb:
3393     case Intrinsic::x86_mmx_packuswb:
3394       handleVectorPackIntrinsic(I, 16);
3395       break;
3396 
3397     case Intrinsic::x86_mmx_packssdw:
3398       handleVectorPackIntrinsic(I, 32);
3399       break;
3400 
3401     case Intrinsic::x86_mmx_psad_bw:
3402     case Intrinsic::x86_sse2_psad_bw:
3403     case Intrinsic::x86_avx2_psad_bw:
3404       handleVectorSadIntrinsic(I);
3405       break;
3406 
3407     case Intrinsic::x86_sse2_pmadd_wd:
3408     case Intrinsic::x86_avx2_pmadd_wd:
3409     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3410     case Intrinsic::x86_avx2_pmadd_ub_sw:
3411       handleVectorPmaddIntrinsic(I);
3412       break;
3413 
3414     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3415       handleVectorPmaddIntrinsic(I, 8);
3416       break;
3417 
3418     case Intrinsic::x86_mmx_pmadd_wd:
3419       handleVectorPmaddIntrinsic(I, 16);
3420       break;
3421 
3422     case Intrinsic::x86_sse_cmp_ss:
3423     case Intrinsic::x86_sse2_cmp_sd:
3424     case Intrinsic::x86_sse_comieq_ss:
3425     case Intrinsic::x86_sse_comilt_ss:
3426     case Intrinsic::x86_sse_comile_ss:
3427     case Intrinsic::x86_sse_comigt_ss:
3428     case Intrinsic::x86_sse_comige_ss:
3429     case Intrinsic::x86_sse_comineq_ss:
3430     case Intrinsic::x86_sse_ucomieq_ss:
3431     case Intrinsic::x86_sse_ucomilt_ss:
3432     case Intrinsic::x86_sse_ucomile_ss:
3433     case Intrinsic::x86_sse_ucomigt_ss:
3434     case Intrinsic::x86_sse_ucomige_ss:
3435     case Intrinsic::x86_sse_ucomineq_ss:
3436     case Intrinsic::x86_sse2_comieq_sd:
3437     case Intrinsic::x86_sse2_comilt_sd:
3438     case Intrinsic::x86_sse2_comile_sd:
3439     case Intrinsic::x86_sse2_comigt_sd:
3440     case Intrinsic::x86_sse2_comige_sd:
3441     case Intrinsic::x86_sse2_comineq_sd:
3442     case Intrinsic::x86_sse2_ucomieq_sd:
3443     case Intrinsic::x86_sse2_ucomilt_sd:
3444     case Intrinsic::x86_sse2_ucomile_sd:
3445     case Intrinsic::x86_sse2_ucomigt_sd:
3446     case Intrinsic::x86_sse2_ucomige_sd:
3447     case Intrinsic::x86_sse2_ucomineq_sd:
3448       handleVectorCompareScalarIntrinsic(I);
3449       break;
3450 
3451     case Intrinsic::x86_sse_cmp_ps:
3452     case Intrinsic::x86_sse2_cmp_pd:
3453       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
3454       // generates reasonably looking IR that fails in the backend with "Do not
3455       // know how to split the result of this operator!".
3456       handleVectorComparePackedIntrinsic(I);
3457       break;
3458 
3459     case Intrinsic::x86_bmi_bextr_32:
3460     case Intrinsic::x86_bmi_bextr_64:
3461     case Intrinsic::x86_bmi_bzhi_32:
3462     case Intrinsic::x86_bmi_bzhi_64:
3463     case Intrinsic::x86_bmi_pdep_32:
3464     case Intrinsic::x86_bmi_pdep_64:
3465     case Intrinsic::x86_bmi_pext_32:
3466     case Intrinsic::x86_bmi_pext_64:
3467       handleBmiIntrinsic(I);
3468       break;
3469 
3470     case Intrinsic::x86_pclmulqdq:
3471     case Intrinsic::x86_pclmulqdq_256:
3472     case Intrinsic::x86_pclmulqdq_512:
3473       handlePclmulIntrinsic(I);
3474       break;
3475 
3476     case Intrinsic::x86_sse41_round_sd:
3477       handleUnarySdIntrinsic(I);
3478       break;
3479     case Intrinsic::x86_sse2_max_sd:
3480     case Intrinsic::x86_sse2_min_sd:
3481       handleBinarySdIntrinsic(I);
3482       break;
3483 
3484     case Intrinsic::is_constant:
3485       // The result of llvm.is.constant() is always defined.
3486       setShadow(&I, getCleanShadow(&I));
3487       setOrigin(&I, getCleanOrigin());
3488       break;
3489 
3490     default:
3491       if (!handleUnknownIntrinsic(I))
3492         visitInstruction(I);
3493       break;
3494     }
3495   }
3496 
3497   void visitLibAtomicLoad(CallBase &CB) {
3498     IRBuilder<> IRB(&CB);
3499     Value *Size = CB.getArgOperand(0);
3500     Value *SrcPtr = CB.getArgOperand(1);
3501     Value *DstPtr = CB.getArgOperand(2);
3502     Value *Ordering = CB.getArgOperand(3);
3503     // Convert the call to have at least Acquire ordering to make sure
3504     // the shadow operations aren't reordered before it.
3505     Value *NewOrdering =
3506         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3507     CB.setArgOperand(3, NewOrdering);
3508 
3509     IRBuilder<> NextIRB(CB.getNextNode());
3510     NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3511 
3512     Value *SrcShadowPtr, *SrcOriginPtr;
3513     std::tie(SrcShadowPtr, SrcOriginPtr) =
3514         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3515                            /*isStore*/ false);
3516     Value *DstShadowPtr =
3517         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3518                            /*isStore*/ true)
3519             .first;
3520 
3521     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
3522     if (MS.TrackOrigins) {
3523       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
3524                                                    kMinOriginAlignment);
3525       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
3526       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
3527     }
3528   }
3529 
3530   void visitLibAtomicStore(CallBase &CB) {
3531     IRBuilder<> IRB(&CB);
3532     Value *Size = CB.getArgOperand(0);
3533     Value *DstPtr = CB.getArgOperand(2);
3534     Value *Ordering = CB.getArgOperand(3);
3535     // Convert the call to have at least Release ordering to make sure
3536     // the shadow operations aren't reordered after it.
3537     Value *NewOrdering =
3538         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3539     CB.setArgOperand(3, NewOrdering);
3540 
3541     Value *DstShadowPtr =
3542         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
3543                            /*isStore*/ true)
3544             .first;
3545 
3546     // Atomic store always paints clean shadow/origin. See file header.
3547     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
3548                      Align(1));
3549   }
3550 
3551   void visitCallBase(CallBase &CB) {
3552     assert(!CB.getMetadata("nosanitize"));
3553     if (CB.isInlineAsm()) {
3554       // For inline asm (either a call to asm function, or callbr instruction),
3555       // do the usual thing: check argument shadow and mark all outputs as
3556       // clean. Note that any side effects of the inline asm that are not
3557       // immediately visible in its constraints are not handled.
3558       if (ClHandleAsmConservative && MS.CompileKernel)
3559         visitAsmInstruction(CB);
3560       else
3561         visitInstruction(CB);
3562       return;
3563     }
3564     LibFunc LF;
3565     if (TLI->getLibFunc(CB, LF)) {
3566       // libatomic.a functions need to have special handling because there isn't
3567       // a good way to intercept them or compile the library with
3568       // instrumentation.
3569       switch (LF) {
3570       case LibFunc_atomic_load:
3571         visitLibAtomicLoad(CB);
3572         return;
3573       case LibFunc_atomic_store:
3574         visitLibAtomicStore(CB);
3575         return;
3576       default:
3577         break;
3578       }
3579     }
3580 
3581     if (auto *Call = dyn_cast<CallInst>(&CB)) {
3582       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
3583 
3584       // We are going to insert code that relies on the fact that the callee
3585       // will become a non-readonly function after it is instrumented by us. To
3586       // prevent this code from being optimized out, mark that function
3587       // non-readonly in advance.
3588       if (Function *Func = Call->getCalledFunction()) {
3589         // Clear out readonly/readnone attributes.
3590         AttrBuilder B;
3591         B.addAttribute(Attribute::ReadOnly)
3592             .addAttribute(Attribute::ReadNone)
3593             .addAttribute(Attribute::WriteOnly)
3594             .addAttribute(Attribute::ArgMemOnly)
3595             .addAttribute(Attribute::Speculatable);
3596         Func->removeAttributes(AttributeList::FunctionIndex, B);
3597       }
3598 
3599       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
3600     }
3601     IRBuilder<> IRB(&CB);
3602     bool MayCheckCall = ClEagerChecks;
3603     if (Function *Func = CB.getCalledFunction()) {
3604       // __sanitizer_unaligned_{load,store} functions may be called by users
3605       // and always expects shadows in the TLS. So don't check them.
3606       MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
3607     }
3608 
3609     unsigned ArgOffset = 0;
3610     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
3611     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
3612          ++ArgIt) {
3613       Value *A = *ArgIt;
3614       unsigned i = ArgIt - CB.arg_begin();
3615       if (!A->getType()->isSized()) {
3616         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
3617         continue;
3618       }
3619       unsigned Size = 0;
3620       Value *Store = nullptr;
3621       // Compute the Shadow for arg even if it is ByVal, because
3622       // in that case getShadow() will copy the actual arg shadow to
3623       // __msan_param_tls.
3624       Value *ArgShadow = getShadow(A);
3625       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
3626       LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
3627                         << " Shadow: " << *ArgShadow << "\n");
3628       bool ArgIsInitialized = false;
3629       const DataLayout &DL = F.getParent()->getDataLayout();
3630 
3631       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
3632       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
3633       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
3634 
3635       if (EagerCheck) {
3636         insertShadowCheck(A, &CB);
3637         continue;
3638       }
3639       if (ByVal) {
3640         // ByVal requires some special handling as it's too big for a single
3641         // load
3642         assert(A->getType()->isPointerTy() &&
3643                "ByVal argument is not a pointer!");
3644         Size = DL.getTypeAllocSize(CB.getParamByValType(i));
3645         if (ArgOffset + Size > kParamTLSSize) break;
3646         const MaybeAlign ParamAlignment(CB.getParamAlign(i));
3647         MaybeAlign Alignment = llvm::None;
3648         if (ParamAlignment)
3649           Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
3650         Value *AShadowPtr =
3651             getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
3652                                /*isStore*/ false)
3653                 .first;
3654 
3655         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
3656                                  Alignment, Size);
3657         // TODO(glider): need to copy origins.
3658       } else {
3659         // Any other parameters mean we need bit-grained tracking of uninit data
3660         Size = DL.getTypeAllocSize(A->getType());
3661         if (ArgOffset + Size > kParamTLSSize) break;
3662         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
3663                                        kShadowTLSAlignment);
3664         Constant *Cst = dyn_cast<Constant>(ArgShadow);
3665         if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
3666       }
3667       if (MS.TrackOrigins && !ArgIsInitialized)
3668         IRB.CreateStore(getOrigin(A),
3669                         getOriginPtrForArgument(A, IRB, ArgOffset));
3670       (void)Store;
3671       assert(Size != 0 && Store != nullptr);
3672       LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
3673       ArgOffset += alignTo(Size, 8);
3674     }
3675     LLVM_DEBUG(dbgs() << "  done with call args\n");
3676 
3677     FunctionType *FT = CB.getFunctionType();
3678     if (FT->isVarArg()) {
3679       VAHelper->visitCallBase(CB, IRB);
3680     }
3681 
3682     // Now, get the shadow for the RetVal.
3683     if (!CB.getType()->isSized())
3684       return;
3685     // Don't emit the epilogue for musttail call returns.
3686     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3687       return;
3688 
3689     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
3690       setShadow(&CB, getCleanShadow(&CB));
3691       setOrigin(&CB, getCleanOrigin());
3692       return;
3693     }
3694 
3695     IRBuilder<> IRBBefore(&CB);
3696     // Until we have full dynamic coverage, make sure the retval shadow is 0.
3697     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
3698     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
3699                                  kShadowTLSAlignment);
3700     BasicBlock::iterator NextInsn;
3701     if (isa<CallInst>(CB)) {
3702       NextInsn = ++CB.getIterator();
3703       assert(NextInsn != CB.getParent()->end());
3704     } else {
3705       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
3706       if (!NormalDest->getSinglePredecessor()) {
3707         // FIXME: this case is tricky, so we are just conservative here.
3708         // Perhaps we need to split the edge between this BB and NormalDest,
3709         // but a naive attempt to use SplitEdge leads to a crash.
3710         setShadow(&CB, getCleanShadow(&CB));
3711         setOrigin(&CB, getCleanOrigin());
3712         return;
3713       }
3714       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
3715       // Anything inserted there will be instrumented by MSan later!
3716       NextInsn = NormalDest->getFirstInsertionPt();
3717       assert(NextInsn != NormalDest->end() &&
3718              "Could not find insertion point for retval shadow load");
3719     }
3720     IRBuilder<> IRBAfter(&*NextInsn);
3721     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
3722         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
3723         kShadowTLSAlignment, "_msret");
3724     setShadow(&CB, RetvalShadow);
3725     if (MS.TrackOrigins)
3726       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
3727                                          getOriginPtrForRetval(IRBAfter)));
3728   }
3729 
3730   bool isAMustTailRetVal(Value *RetVal) {
3731     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
3732       RetVal = I->getOperand(0);
3733     }
3734     if (auto *I = dyn_cast<CallInst>(RetVal)) {
3735       return I->isMustTailCall();
3736     }
3737     return false;
3738   }
3739 
3740   void visitReturnInst(ReturnInst &I) {
3741     IRBuilder<> IRB(&I);
3742     Value *RetVal = I.getReturnValue();
3743     if (!RetVal) return;
3744     // Don't emit the epilogue for musttail call returns.
3745     if (isAMustTailRetVal(RetVal)) return;
3746     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
3747     bool HasNoUndef =
3748         F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
3749     bool StoreShadow = !(ClEagerChecks && HasNoUndef);
3750     // FIXME: Consider using SpecialCaseList to specify a list of functions that
3751     // must always return fully initialized values. For now, we hardcode "main".
3752     bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main");
3753 
3754     Value *Shadow = getShadow(RetVal);
3755     bool StoreOrigin = true;
3756     if (EagerCheck) {
3757       insertShadowCheck(RetVal, &I);
3758       Shadow = getCleanShadow(RetVal);
3759       StoreOrigin = false;
3760     }
3761 
3762     // The caller may still expect information passed over TLS if we pass our
3763     // check
3764     if (StoreShadow) {
3765       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3766       if (MS.TrackOrigins && StoreOrigin)
3767         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
3768     }
3769   }
3770 
3771   void visitPHINode(PHINode &I) {
3772     IRBuilder<> IRB(&I);
3773     if (!PropagateShadow) {
3774       setShadow(&I, getCleanShadow(&I));
3775       setOrigin(&I, getCleanOrigin());
3776       return;
3777     }
3778 
3779     ShadowPHINodes.push_back(&I);
3780     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
3781                                 "_msphi_s"));
3782     if (MS.TrackOrigins)
3783       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
3784                                   "_msphi_o"));
3785   }
3786 
3787   Value *getLocalVarDescription(AllocaInst &I) {
3788     SmallString<2048> StackDescriptionStorage;
3789     raw_svector_ostream StackDescription(StackDescriptionStorage);
3790     // We create a string with a description of the stack allocation and
3791     // pass it into __msan_set_alloca_origin.
3792     // It will be printed by the run-time if stack-originated UMR is found.
3793     // The first 4 bytes of the string are set to '----' and will be replaced
3794     // by __msan_va_arg_overflow_size_tls at the first call.
3795     StackDescription << "----" << I.getName() << "@" << F.getName();
3796     return createPrivateNonConstGlobalForString(*F.getParent(),
3797                                                 StackDescription.str());
3798   }
3799 
3800   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3801     if (PoisonStack && ClPoisonStackWithCall) {
3802       IRB.CreateCall(MS.MsanPoisonStackFn,
3803                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3804     } else {
3805       Value *ShadowBase, *OriginBase;
3806       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
3807           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
3808 
3809       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
3810       IRB.CreateMemSet(ShadowBase, PoisonValue, Len,
3811                        MaybeAlign(I.getAlignment()));
3812     }
3813 
3814     if (PoisonStack && MS.TrackOrigins) {
3815       Value *Descr = getLocalVarDescription(I);
3816       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
3817                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3818                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
3819                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
3820     }
3821   }
3822 
3823   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3824     Value *Descr = getLocalVarDescription(I);
3825     if (PoisonStack) {
3826       IRB.CreateCall(MS.MsanPoisonAllocaFn,
3827                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3828                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
3829     } else {
3830       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
3831                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3832     }
3833   }
3834 
3835   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
3836     if (!InsPoint)
3837       InsPoint = &I;
3838     IRBuilder<> IRB(InsPoint->getNextNode());
3839     const DataLayout &DL = F.getParent()->getDataLayout();
3840     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
3841     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
3842     if (I.isArrayAllocation())
3843       Len = IRB.CreateMul(Len, I.getArraySize());
3844 
3845     if (MS.CompileKernel)
3846       poisonAllocaKmsan(I, IRB, Len);
3847     else
3848       poisonAllocaUserspace(I, IRB, Len);
3849   }
3850 
3851   void visitAllocaInst(AllocaInst &I) {
3852     setShadow(&I, getCleanShadow(&I));
3853     setOrigin(&I, getCleanOrigin());
3854     // We'll get to this alloca later unless it's poisoned at the corresponding
3855     // llvm.lifetime.start.
3856     AllocaSet.insert(&I);
3857   }
3858 
3859   void visitSelectInst(SelectInst& I) {
3860     IRBuilder<> IRB(&I);
3861     // a = select b, c, d
3862     Value *B = I.getCondition();
3863     Value *C = I.getTrueValue();
3864     Value *D = I.getFalseValue();
3865     Value *Sb = getShadow(B);
3866     Value *Sc = getShadow(C);
3867     Value *Sd = getShadow(D);
3868 
3869     // Result shadow if condition shadow is 0.
3870     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
3871     Value *Sa1;
3872     if (I.getType()->isAggregateType()) {
3873       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
3874       // an extra "select". This results in much more compact IR.
3875       // Sa = select Sb, poisoned, (select b, Sc, Sd)
3876       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
3877     } else {
3878       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
3879       // If Sb (condition is poisoned), look for bits in c and d that are equal
3880       // and both unpoisoned.
3881       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
3882 
3883       // Cast arguments to shadow-compatible type.
3884       C = CreateAppToShadowCast(IRB, C);
3885       D = CreateAppToShadowCast(IRB, D);
3886 
3887       // Result shadow if condition shadow is 1.
3888       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
3889     }
3890     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
3891     setShadow(&I, Sa);
3892     if (MS.TrackOrigins) {
3893       // Origins are always i32, so any vector conditions must be flattened.
3894       // FIXME: consider tracking vector origins for app vectors?
3895       if (B->getType()->isVectorTy()) {
3896         Type *FlatTy = getShadowTyNoVec(B->getType());
3897         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
3898                                 ConstantInt::getNullValue(FlatTy));
3899         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
3900                                       ConstantInt::getNullValue(FlatTy));
3901       }
3902       // a = select b, c, d
3903       // Oa = Sb ? Ob : (b ? Oc : Od)
3904       setOrigin(
3905           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
3906                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
3907                                                 getOrigin(I.getFalseValue()))));
3908     }
3909   }
3910 
3911   void visitLandingPadInst(LandingPadInst &I) {
3912     // Do nothing.
3913     // See https://github.com/google/sanitizers/issues/504
3914     setShadow(&I, getCleanShadow(&I));
3915     setOrigin(&I, getCleanOrigin());
3916   }
3917 
3918   void visitCatchSwitchInst(CatchSwitchInst &I) {
3919     setShadow(&I, getCleanShadow(&I));
3920     setOrigin(&I, getCleanOrigin());
3921   }
3922 
3923   void visitFuncletPadInst(FuncletPadInst &I) {
3924     setShadow(&I, getCleanShadow(&I));
3925     setOrigin(&I, getCleanOrigin());
3926   }
3927 
3928   void visitGetElementPtrInst(GetElementPtrInst &I) {
3929     handleShadowOr(I);
3930   }
3931 
3932   void visitExtractValueInst(ExtractValueInst &I) {
3933     IRBuilder<> IRB(&I);
3934     Value *Agg = I.getAggregateOperand();
3935     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
3936     Value *AggShadow = getShadow(Agg);
3937     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3938     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
3939     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
3940     setShadow(&I, ResShadow);
3941     setOriginForNaryOp(I);
3942   }
3943 
3944   void visitInsertValueInst(InsertValueInst &I) {
3945     IRBuilder<> IRB(&I);
3946     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
3947     Value *AggShadow = getShadow(I.getAggregateOperand());
3948     Value *InsShadow = getShadow(I.getInsertedValueOperand());
3949     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3950     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
3951     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
3952     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
3953     setShadow(&I, Res);
3954     setOriginForNaryOp(I);
3955   }
3956 
3957   void dumpInst(Instruction &I) {
3958     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3959       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
3960     } else {
3961       errs() << "ZZZ " << I.getOpcodeName() << "\n";
3962     }
3963     errs() << "QQQ " << I << "\n";
3964   }
3965 
3966   void visitResumeInst(ResumeInst &I) {
3967     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
3968     // Nothing to do here.
3969   }
3970 
3971   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
3972     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
3973     // Nothing to do here.
3974   }
3975 
3976   void visitCatchReturnInst(CatchReturnInst &CRI) {
3977     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
3978     // Nothing to do here.
3979   }
3980 
3981   void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
3982                              const DataLayout &DL, bool isOutput) {
3983     // For each assembly argument, we check its value for being initialized.
3984     // If the argument is a pointer, we assume it points to a single element
3985     // of the corresponding type (or to a 8-byte word, if the type is unsized).
3986     // Each such pointer is instrumented with a call to the runtime library.
3987     Type *OpType = Operand->getType();
3988     // Check the operand value itself.
3989     insertShadowCheck(Operand, &I);
3990     if (!OpType->isPointerTy() || !isOutput) {
3991       assert(!isOutput);
3992       return;
3993     }
3994     Type *ElType = OpType->getPointerElementType();
3995     if (!ElType->isSized())
3996       return;
3997     int Size = DL.getTypeStoreSize(ElType);
3998     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
3999     Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
4000     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
4001   }
4002 
4003   /// Get the number of output arguments returned by pointers.
4004   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
4005     int NumRetOutputs = 0;
4006     int NumOutputs = 0;
4007     Type *RetTy = cast<Value>(CB)->getType();
4008     if (!RetTy->isVoidTy()) {
4009       // Register outputs are returned via the CallInst return value.
4010       auto *ST = dyn_cast<StructType>(RetTy);
4011       if (ST)
4012         NumRetOutputs = ST->getNumElements();
4013       else
4014         NumRetOutputs = 1;
4015     }
4016     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
4017     for (size_t i = 0, n = Constraints.size(); i < n; i++) {
4018       InlineAsm::ConstraintInfo Info = Constraints[i];
4019       switch (Info.Type) {
4020       case InlineAsm::isOutput:
4021         NumOutputs++;
4022         break;
4023       default:
4024         break;
4025       }
4026     }
4027     return NumOutputs - NumRetOutputs;
4028   }
4029 
4030   void visitAsmInstruction(Instruction &I) {
4031     // Conservative inline assembly handling: check for poisoned shadow of
4032     // asm() arguments, then unpoison the result and all the memory locations
4033     // pointed to by those arguments.
4034     // An inline asm() statement in C++ contains lists of input and output
4035     // arguments used by the assembly code. These are mapped to operands of the
4036     // CallInst as follows:
4037     //  - nR register outputs ("=r) are returned by value in a single structure
4038     //  (SSA value of the CallInst);
4039     //  - nO other outputs ("=m" and others) are returned by pointer as first
4040     // nO operands of the CallInst;
4041     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
4042     // remaining nI operands.
4043     // The total number of asm() arguments in the source is nR+nO+nI, and the
4044     // corresponding CallInst has nO+nI+1 operands (the last operand is the
4045     // function to be called).
4046     const DataLayout &DL = F.getParent()->getDataLayout();
4047     CallBase *CB = cast<CallBase>(&I);
4048     IRBuilder<> IRB(&I);
4049     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
4050     int OutputArgs = getNumOutputArgs(IA, CB);
4051     // The last operand of a CallInst is the function itself.
4052     int NumOperands = CB->getNumOperands() - 1;
4053 
4054     // Check input arguments. Doing so before unpoisoning output arguments, so
4055     // that we won't overwrite uninit values before checking them.
4056     for (int i = OutputArgs; i < NumOperands; i++) {
4057       Value *Operand = CB->getOperand(i);
4058       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
4059     }
4060     // Unpoison output arguments. This must happen before the actual InlineAsm
4061     // call, so that the shadow for memory published in the asm() statement
4062     // remains valid.
4063     for (int i = 0; i < OutputArgs; i++) {
4064       Value *Operand = CB->getOperand(i);
4065       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
4066     }
4067 
4068     setShadow(&I, getCleanShadow(&I));
4069     setOrigin(&I, getCleanOrigin());
4070   }
4071 
4072   void visitInstruction(Instruction &I) {
4073     // Everything else: stop propagating and check for poisoned shadow.
4074     if (ClDumpStrictInstructions)
4075       dumpInst(I);
4076     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
4077     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
4078       Value *Operand = I.getOperand(i);
4079       if (Operand->getType()->isSized())
4080         insertShadowCheck(Operand, &I);
4081     }
4082     setShadow(&I, getCleanShadow(&I));
4083     setOrigin(&I, getCleanOrigin());
4084   }
4085 };
4086 
4087 /// AMD64-specific implementation of VarArgHelper.
4088 struct VarArgAMD64Helper : public VarArgHelper {
4089   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
4090   // See a comment in visitCallBase for more details.
4091   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
4092   static const unsigned AMD64FpEndOffsetSSE = 176;
4093   // If SSE is disabled, fp_offset in va_list is zero.
4094   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
4095 
4096   unsigned AMD64FpEndOffset;
4097   Function &F;
4098   MemorySanitizer &MS;
4099   MemorySanitizerVisitor &MSV;
4100   Value *VAArgTLSCopy = nullptr;
4101   Value *VAArgTLSOriginCopy = nullptr;
4102   Value *VAArgOverflowSize = nullptr;
4103 
4104   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4105 
4106   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4107 
4108   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
4109                     MemorySanitizerVisitor &MSV)
4110       : F(F), MS(MS), MSV(MSV) {
4111     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
4112     for (const auto &Attr : F.getAttributes().getFnAttributes()) {
4113       if (Attr.isStringAttribute() &&
4114           (Attr.getKindAsString() == "target-features")) {
4115         if (Attr.getValueAsString().contains("-sse"))
4116           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
4117         break;
4118       }
4119     }
4120   }
4121 
4122   ArgKind classifyArgument(Value* arg) {
4123     // A very rough approximation of X86_64 argument classification rules.
4124     Type *T = arg->getType();
4125     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
4126       return AK_FloatingPoint;
4127     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4128       return AK_GeneralPurpose;
4129     if (T->isPointerTy())
4130       return AK_GeneralPurpose;
4131     return AK_Memory;
4132   }
4133 
4134   // For VarArg functions, store the argument shadow in an ABI-specific format
4135   // that corresponds to va_list layout.
4136   // We do this because Clang lowers va_arg in the frontend, and this pass
4137   // only sees the low level code that deals with va_list internals.
4138   // A much easier alternative (provided that Clang emits va_arg instructions)
4139   // would have been to associate each live instance of va_list with a copy of
4140   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
4141   // order.
4142   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4143     unsigned GpOffset = 0;
4144     unsigned FpOffset = AMD64GpEndOffset;
4145     unsigned OverflowOffset = AMD64FpEndOffset;
4146     const DataLayout &DL = F.getParent()->getDataLayout();
4147     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4148          ++ArgIt) {
4149       Value *A = *ArgIt;
4150       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4151       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4152       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4153       if (IsByVal) {
4154         // ByVal arguments always go to the overflow area.
4155         // Fixed arguments passed through the overflow area will be stepped
4156         // over by va_start, so don't count them towards the offset.
4157         if (IsFixed)
4158           continue;
4159         assert(A->getType()->isPointerTy());
4160         Type *RealTy = CB.getParamByValType(ArgNo);
4161         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4162         Value *ShadowBase = getShadowPtrForVAArgument(
4163             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
4164         Value *OriginBase = nullptr;
4165         if (MS.TrackOrigins)
4166           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4167         OverflowOffset += alignTo(ArgSize, 8);
4168         if (!ShadowBase)
4169           continue;
4170         Value *ShadowPtr, *OriginPtr;
4171         std::tie(ShadowPtr, OriginPtr) =
4172             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4173                                    /*isStore*/ false);
4174 
4175         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4176                          kShadowTLSAlignment, ArgSize);
4177         if (MS.TrackOrigins)
4178           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4179                            kShadowTLSAlignment, ArgSize);
4180       } else {
4181         ArgKind AK = classifyArgument(A);
4182         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4183           AK = AK_Memory;
4184         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4185           AK = AK_Memory;
4186         Value *ShadowBase, *OriginBase = nullptr;
4187         switch (AK) {
4188           case AK_GeneralPurpose:
4189             ShadowBase =
4190                 getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4191             if (MS.TrackOrigins)
4192               OriginBase =
4193                   getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4194             GpOffset += 8;
4195             break;
4196           case AK_FloatingPoint:
4197             ShadowBase =
4198                 getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4199             if (MS.TrackOrigins)
4200               OriginBase =
4201                   getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4202             FpOffset += 16;
4203             break;
4204           case AK_Memory:
4205             if (IsFixed)
4206               continue;
4207             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4208             ShadowBase =
4209                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4210             if (MS.TrackOrigins)
4211               OriginBase =
4212                   getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4213             OverflowOffset += alignTo(ArgSize, 8);
4214         }
4215         // Take fixed arguments into account for GpOffset and FpOffset,
4216         // but don't actually store shadows for them.
4217         // TODO(glider): don't call get*PtrForVAArgument() for them.
4218         if (IsFixed)
4219           continue;
4220         if (!ShadowBase)
4221           continue;
4222         Value *Shadow = MSV.getShadow(A);
4223         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4224         if (MS.TrackOrigins) {
4225           Value *Origin = MSV.getOrigin(A);
4226           unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4227           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4228                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4229         }
4230       }
4231     }
4232     Constant *OverflowSize =
4233       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4234     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4235   }
4236 
4237   /// Compute the shadow address for a given va_arg.
4238   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4239                                    unsigned ArgOffset, unsigned ArgSize) {
4240     // Make sure we don't overflow __msan_va_arg_tls.
4241     if (ArgOffset + ArgSize > kParamTLSSize)
4242       return nullptr;
4243     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4244     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4245     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4246                               "_msarg_va_s");
4247   }
4248 
4249   /// Compute the origin address for a given va_arg.
4250   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4251     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4252     // getOriginPtrForVAArgument() is always called after
4253     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4254     // overflow.
4255     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4256     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4257                               "_msarg_va_o");
4258   }
4259 
4260   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4261     IRBuilder<> IRB(&I);
4262     Value *VAListTag = I.getArgOperand(0);
4263     Value *ShadowPtr, *OriginPtr;
4264     const Align Alignment = Align(8);
4265     std::tie(ShadowPtr, OriginPtr) =
4266         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4267                                /*isStore*/ true);
4268 
4269     // Unpoison the whole __va_list_tag.
4270     // FIXME: magic ABI constants.
4271     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4272                      /* size */ 24, Alignment, false);
4273     // We shouldn't need to zero out the origins, as they're only checked for
4274     // nonzero shadow.
4275   }
4276 
4277   void visitVAStartInst(VAStartInst &I) override {
4278     if (F.getCallingConv() == CallingConv::Win64)
4279       return;
4280     VAStartInstrumentationList.push_back(&I);
4281     unpoisonVAListTagForInst(I);
4282   }
4283 
4284   void visitVACopyInst(VACopyInst &I) override {
4285     if (F.getCallingConv() == CallingConv::Win64) return;
4286     unpoisonVAListTagForInst(I);
4287   }
4288 
4289   void finalizeInstrumentation() override {
4290     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4291            "finalizeInstrumentation called twice");
4292     if (!VAStartInstrumentationList.empty()) {
4293       // If there is a va_start in this function, make a backup copy of
4294       // va_arg_tls somewhere in the function entry block.
4295       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4296       VAArgOverflowSize =
4297           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4298       Value *CopySize =
4299         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
4300                       VAArgOverflowSize);
4301       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4302       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4303       if (MS.TrackOrigins) {
4304         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4305         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
4306                          Align(8), CopySize);
4307       }
4308     }
4309 
4310     // Instrument va_start.
4311     // Copy va_list shadow from the backup copy of the TLS contents.
4312     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4313       CallInst *OrigInst = VAStartInstrumentationList[i];
4314       IRBuilder<> IRB(OrigInst->getNextNode());
4315       Value *VAListTag = OrigInst->getArgOperand(0);
4316 
4317       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4318       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4319           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4320                         ConstantInt::get(MS.IntptrTy, 16)),
4321           PointerType::get(RegSaveAreaPtrTy, 0));
4322       Value *RegSaveAreaPtr =
4323           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4324       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4325       const Align Alignment = Align(16);
4326       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4327           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4328                                  Alignment, /*isStore*/ true);
4329       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4330                        AMD64FpEndOffset);
4331       if (MS.TrackOrigins)
4332         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4333                          Alignment, AMD64FpEndOffset);
4334       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4335       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4336           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4337                         ConstantInt::get(MS.IntptrTy, 8)),
4338           PointerType::get(OverflowArgAreaPtrTy, 0));
4339       Value *OverflowArgAreaPtr =
4340           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4341       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4342       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4343           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4344                                  Alignment, /*isStore*/ true);
4345       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4346                                              AMD64FpEndOffset);
4347       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4348                        VAArgOverflowSize);
4349       if (MS.TrackOrigins) {
4350         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4351                                         AMD64FpEndOffset);
4352         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4353                          VAArgOverflowSize);
4354       }
4355     }
4356   }
4357 };
4358 
4359 /// MIPS64-specific implementation of VarArgHelper.
4360 struct VarArgMIPS64Helper : public VarArgHelper {
4361   Function &F;
4362   MemorySanitizer &MS;
4363   MemorySanitizerVisitor &MSV;
4364   Value *VAArgTLSCopy = nullptr;
4365   Value *VAArgSize = nullptr;
4366 
4367   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4368 
4369   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4370                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4371 
4372   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4373     unsigned VAArgOffset = 0;
4374     const DataLayout &DL = F.getParent()->getDataLayout();
4375     for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
4376               End = CB.arg_end();
4377          ArgIt != End; ++ArgIt) {
4378       Triple TargetTriple(F.getParent()->getTargetTriple());
4379       Value *A = *ArgIt;
4380       Value *Base;
4381       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4382       if (TargetTriple.getArch() == Triple::mips64) {
4383         // Adjusting the shadow for argument with size < 8 to match the placement
4384         // of bits in big endian system
4385         if (ArgSize < 8)
4386           VAArgOffset += (8 - ArgSize);
4387       }
4388       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4389       VAArgOffset += ArgSize;
4390       VAArgOffset = alignTo(VAArgOffset, 8);
4391       if (!Base)
4392         continue;
4393       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4394     }
4395 
4396     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4397     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4398     // a new class member i.e. it is the total size of all VarArgs.
4399     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4400   }
4401 
4402   /// Compute the shadow address for a given va_arg.
4403   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4404                                    unsigned ArgOffset, unsigned ArgSize) {
4405     // Make sure we don't overflow __msan_va_arg_tls.
4406     if (ArgOffset + ArgSize > kParamTLSSize)
4407       return nullptr;
4408     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4409     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4410     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4411                               "_msarg");
4412   }
4413 
4414   void visitVAStartInst(VAStartInst &I) override {
4415     IRBuilder<> IRB(&I);
4416     VAStartInstrumentationList.push_back(&I);
4417     Value *VAListTag = I.getArgOperand(0);
4418     Value *ShadowPtr, *OriginPtr;
4419     const Align Alignment = Align(8);
4420     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4421         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4422     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4423                      /* size */ 8, Alignment, false);
4424   }
4425 
4426   void visitVACopyInst(VACopyInst &I) override {
4427     IRBuilder<> IRB(&I);
4428     VAStartInstrumentationList.push_back(&I);
4429     Value *VAListTag = I.getArgOperand(0);
4430     Value *ShadowPtr, *OriginPtr;
4431     const Align Alignment = Align(8);
4432     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4433         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4434     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4435                      /* size */ 8, Alignment, false);
4436   }
4437 
4438   void finalizeInstrumentation() override {
4439     assert(!VAArgSize && !VAArgTLSCopy &&
4440            "finalizeInstrumentation called twice");
4441     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4442     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4443     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4444                                     VAArgSize);
4445 
4446     if (!VAStartInstrumentationList.empty()) {
4447       // If there is a va_start in this function, make a backup copy of
4448       // va_arg_tls somewhere in the function entry block.
4449       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4450       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4451     }
4452 
4453     // Instrument va_start.
4454     // Copy va_list shadow from the backup copy of the TLS contents.
4455     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4456       CallInst *OrigInst = VAStartInstrumentationList[i];
4457       IRBuilder<> IRB(OrigInst->getNextNode());
4458       Value *VAListTag = OrigInst->getArgOperand(0);
4459       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4460       Value *RegSaveAreaPtrPtr =
4461           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4462                              PointerType::get(RegSaveAreaPtrTy, 0));
4463       Value *RegSaveAreaPtr =
4464           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4465       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4466       const Align Alignment = Align(8);
4467       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4468           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4469                                  Alignment, /*isStore*/ true);
4470       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4471                        CopySize);
4472     }
4473   }
4474 };
4475 
4476 /// AArch64-specific implementation of VarArgHelper.
4477 struct VarArgAArch64Helper : public VarArgHelper {
4478   static const unsigned kAArch64GrArgSize = 64;
4479   static const unsigned kAArch64VrArgSize = 128;
4480 
4481   static const unsigned AArch64GrBegOffset = 0;
4482   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
4483   // Make VR space aligned to 16 bytes.
4484   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
4485   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
4486                                              + kAArch64VrArgSize;
4487   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
4488 
4489   Function &F;
4490   MemorySanitizer &MS;
4491   MemorySanitizerVisitor &MSV;
4492   Value *VAArgTLSCopy = nullptr;
4493   Value *VAArgOverflowSize = nullptr;
4494 
4495   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4496 
4497   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4498 
4499   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
4500                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4501 
4502   ArgKind classifyArgument(Value* arg) {
4503     Type *T = arg->getType();
4504     if (T->isFPOrFPVectorTy())
4505       return AK_FloatingPoint;
4506     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4507         || (T->isPointerTy()))
4508       return AK_GeneralPurpose;
4509     return AK_Memory;
4510   }
4511 
4512   // The instrumentation stores the argument shadow in a non ABI-specific
4513   // format because it does not know which argument is named (since Clang,
4514   // like x86_64 case, lowers the va_args in the frontend and this pass only
4515   // sees the low level code that deals with va_list internals).
4516   // The first seven GR registers are saved in the first 56 bytes of the
4517   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
4518   // the remaining arguments.
4519   // Using constant offset within the va_arg TLS array allows fast copy
4520   // in the finalize instrumentation.
4521   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4522     unsigned GrOffset = AArch64GrBegOffset;
4523     unsigned VrOffset = AArch64VrBegOffset;
4524     unsigned OverflowOffset = AArch64VAEndOffset;
4525 
4526     const DataLayout &DL = F.getParent()->getDataLayout();
4527     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4528          ++ArgIt) {
4529       Value *A = *ArgIt;
4530       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4531       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4532       ArgKind AK = classifyArgument(A);
4533       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
4534         AK = AK_Memory;
4535       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
4536         AK = AK_Memory;
4537       Value *Base;
4538       switch (AK) {
4539         case AK_GeneralPurpose:
4540           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
4541           GrOffset += 8;
4542           break;
4543         case AK_FloatingPoint:
4544           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
4545           VrOffset += 16;
4546           break;
4547         case AK_Memory:
4548           // Don't count fixed arguments in the overflow area - va_start will
4549           // skip right over them.
4550           if (IsFixed)
4551             continue;
4552           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4553           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
4554                                            alignTo(ArgSize, 8));
4555           OverflowOffset += alignTo(ArgSize, 8);
4556           break;
4557       }
4558       // Count Gp/Vr fixed arguments to their respective offsets, but don't
4559       // bother to actually store a shadow.
4560       if (IsFixed)
4561         continue;
4562       if (!Base)
4563         continue;
4564       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4565     }
4566     Constant *OverflowSize =
4567       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
4568     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4569   }
4570 
4571   /// Compute the shadow address for a given va_arg.
4572   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4573                                    unsigned ArgOffset, unsigned ArgSize) {
4574     // Make sure we don't overflow __msan_va_arg_tls.
4575     if (ArgOffset + ArgSize > kParamTLSSize)
4576       return nullptr;
4577     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4578     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4579     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4580                               "_msarg");
4581   }
4582 
4583   void visitVAStartInst(VAStartInst &I) override {
4584     IRBuilder<> IRB(&I);
4585     VAStartInstrumentationList.push_back(&I);
4586     Value *VAListTag = I.getArgOperand(0);
4587     Value *ShadowPtr, *OriginPtr;
4588     const Align Alignment = Align(8);
4589     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4590         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4591     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4592                      /* size */ 32, Alignment, false);
4593   }
4594 
4595   void visitVACopyInst(VACopyInst &I) override {
4596     IRBuilder<> IRB(&I);
4597     VAStartInstrumentationList.push_back(&I);
4598     Value *VAListTag = I.getArgOperand(0);
4599     Value *ShadowPtr, *OriginPtr;
4600     const Align Alignment = Align(8);
4601     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4602         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4603     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4604                      /* size */ 32, Alignment, false);
4605   }
4606 
4607   // Retrieve a va_list field of 'void*' size.
4608   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4609     Value *SaveAreaPtrPtr =
4610       IRB.CreateIntToPtr(
4611         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4612                       ConstantInt::get(MS.IntptrTy, offset)),
4613         Type::getInt64PtrTy(*MS.C));
4614     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
4615   }
4616 
4617   // Retrieve a va_list field of 'int' size.
4618   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4619     Value *SaveAreaPtr =
4620       IRB.CreateIntToPtr(
4621         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4622                       ConstantInt::get(MS.IntptrTy, offset)),
4623         Type::getInt32PtrTy(*MS.C));
4624     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
4625     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
4626   }
4627 
4628   void finalizeInstrumentation() override {
4629     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4630            "finalizeInstrumentation called twice");
4631     if (!VAStartInstrumentationList.empty()) {
4632       // If there is a va_start in this function, make a backup copy of
4633       // va_arg_tls somewhere in the function entry block.
4634       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4635       VAArgOverflowSize =
4636           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4637       Value *CopySize =
4638         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
4639                       VAArgOverflowSize);
4640       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4641       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4642     }
4643 
4644     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
4645     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
4646 
4647     // Instrument va_start, copy va_list shadow from the backup copy of
4648     // the TLS contents.
4649     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4650       CallInst *OrigInst = VAStartInstrumentationList[i];
4651       IRBuilder<> IRB(OrigInst->getNextNode());
4652 
4653       Value *VAListTag = OrigInst->getArgOperand(0);
4654 
4655       // The variadic ABI for AArch64 creates two areas to save the incoming
4656       // argument registers (one for 64-bit general register xn-x7 and another
4657       // for 128-bit FP/SIMD vn-v7).
4658       // We need then to propagate the shadow arguments on both regions
4659       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
4660       // The remaining arguments are saved on shadow for 'va::stack'.
4661       // One caveat is it requires only to propagate the non-named arguments,
4662       // however on the call site instrumentation 'all' the arguments are
4663       // saved. So to copy the shadow values from the va_arg TLS array
4664       // we need to adjust the offset for both GR and VR fields based on
4665       // the __{gr,vr}_offs value (since they are stores based on incoming
4666       // named arguments).
4667 
4668       // Read the stack pointer from the va_list.
4669       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
4670 
4671       // Read both the __gr_top and __gr_off and add them up.
4672       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
4673       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
4674 
4675       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
4676 
4677       // Read both the __vr_top and __vr_off and add them up.
4678       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
4679       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
4680 
4681       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
4682 
4683       // It does not know how many named arguments is being used and, on the
4684       // callsite all the arguments were saved.  Since __gr_off is defined as
4685       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
4686       // argument by ignoring the bytes of shadow from named arguments.
4687       Value *GrRegSaveAreaShadowPtrOff =
4688         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
4689 
4690       Value *GrRegSaveAreaShadowPtr =
4691           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4692                                  Align(8), /*isStore*/ true)
4693               .first;
4694 
4695       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4696                                               GrRegSaveAreaShadowPtrOff);
4697       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
4698 
4699       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
4700                        GrCopySize);
4701 
4702       // Again, but for FP/SIMD values.
4703       Value *VrRegSaveAreaShadowPtrOff =
4704           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
4705 
4706       Value *VrRegSaveAreaShadowPtr =
4707           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4708                                  Align(8), /*isStore*/ true)
4709               .first;
4710 
4711       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
4712         IRB.getInt8Ty(),
4713         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4714                               IRB.getInt32(AArch64VrBegOffset)),
4715         VrRegSaveAreaShadowPtrOff);
4716       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
4717 
4718       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
4719                        VrCopySize);
4720 
4721       // And finally for remaining arguments.
4722       Value *StackSaveAreaShadowPtr =
4723           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
4724                                  Align(16), /*isStore*/ true)
4725               .first;
4726 
4727       Value *StackSrcPtr =
4728         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4729                               IRB.getInt32(AArch64VAEndOffset));
4730 
4731       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
4732                        Align(16), VAArgOverflowSize);
4733     }
4734   }
4735 };
4736 
4737 /// PowerPC64-specific implementation of VarArgHelper.
4738 struct VarArgPowerPC64Helper : public VarArgHelper {
4739   Function &F;
4740   MemorySanitizer &MS;
4741   MemorySanitizerVisitor &MSV;
4742   Value *VAArgTLSCopy = nullptr;
4743   Value *VAArgSize = nullptr;
4744 
4745   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4746 
4747   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
4748                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4749 
4750   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4751     // For PowerPC, we need to deal with alignment of stack arguments -
4752     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
4753     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
4754     // For that reason, we compute current offset from stack pointer (which is
4755     // always properly aligned), and offset for the first vararg, then subtract
4756     // them.
4757     unsigned VAArgBase;
4758     Triple TargetTriple(F.getParent()->getTargetTriple());
4759     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
4760     // and 32 bytes for ABIv2.  This is usually determined by target
4761     // endianness, but in theory could be overridden by function attribute.
4762     if (TargetTriple.getArch() == Triple::ppc64)
4763       VAArgBase = 48;
4764     else
4765       VAArgBase = 32;
4766     unsigned VAArgOffset = VAArgBase;
4767     const DataLayout &DL = F.getParent()->getDataLayout();
4768     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4769          ++ArgIt) {
4770       Value *A = *ArgIt;
4771       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4772       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4773       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4774       if (IsByVal) {
4775         assert(A->getType()->isPointerTy());
4776         Type *RealTy = CB.getParamByValType(ArgNo);
4777         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4778         MaybeAlign ArgAlign = CB.getParamAlign(ArgNo);
4779         if (!ArgAlign || *ArgAlign < Align(8))
4780           ArgAlign = Align(8);
4781         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4782         if (!IsFixed) {
4783           Value *Base = getShadowPtrForVAArgument(
4784               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
4785           if (Base) {
4786             Value *AShadowPtr, *AOriginPtr;
4787             std::tie(AShadowPtr, AOriginPtr) =
4788                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
4789                                        kShadowTLSAlignment, /*isStore*/ false);
4790 
4791             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
4792                              kShadowTLSAlignment, ArgSize);
4793           }
4794         }
4795         VAArgOffset += alignTo(ArgSize, 8);
4796       } else {
4797         Value *Base;
4798         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4799         uint64_t ArgAlign = 8;
4800         if (A->getType()->isArrayTy()) {
4801           // Arrays are aligned to element size, except for long double
4802           // arrays, which are aligned to 8 bytes.
4803           Type *ElementTy = A->getType()->getArrayElementType();
4804           if (!ElementTy->isPPC_FP128Ty())
4805             ArgAlign = DL.getTypeAllocSize(ElementTy);
4806         } else if (A->getType()->isVectorTy()) {
4807           // Vectors are naturally aligned.
4808           ArgAlign = DL.getTypeAllocSize(A->getType());
4809         }
4810         if (ArgAlign < 8)
4811           ArgAlign = 8;
4812         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4813         if (DL.isBigEndian()) {
4814           // Adjusting the shadow for argument with size < 8 to match the placement
4815           // of bits in big endian system
4816           if (ArgSize < 8)
4817             VAArgOffset += (8 - ArgSize);
4818         }
4819         if (!IsFixed) {
4820           Base = getShadowPtrForVAArgument(A->getType(), IRB,
4821                                            VAArgOffset - VAArgBase, ArgSize);
4822           if (Base)
4823             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4824         }
4825         VAArgOffset += ArgSize;
4826         VAArgOffset = alignTo(VAArgOffset, 8);
4827       }
4828       if (IsFixed)
4829         VAArgBase = VAArgOffset;
4830     }
4831 
4832     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
4833                                                 VAArgOffset - VAArgBase);
4834     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4835     // a new class member i.e. it is the total size of all VarArgs.
4836     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4837   }
4838 
4839   /// Compute the shadow address for a given va_arg.
4840   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4841                                    unsigned ArgOffset, unsigned ArgSize) {
4842     // Make sure we don't overflow __msan_va_arg_tls.
4843     if (ArgOffset + ArgSize > kParamTLSSize)
4844       return nullptr;
4845     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4846     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4847     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4848                               "_msarg");
4849   }
4850 
4851   void visitVAStartInst(VAStartInst &I) override {
4852     IRBuilder<> IRB(&I);
4853     VAStartInstrumentationList.push_back(&I);
4854     Value *VAListTag = I.getArgOperand(0);
4855     Value *ShadowPtr, *OriginPtr;
4856     const Align Alignment = Align(8);
4857     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4858         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4859     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4860                      /* size */ 8, Alignment, false);
4861   }
4862 
4863   void visitVACopyInst(VACopyInst &I) override {
4864     IRBuilder<> IRB(&I);
4865     Value *VAListTag = I.getArgOperand(0);
4866     Value *ShadowPtr, *OriginPtr;
4867     const Align Alignment = Align(8);
4868     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4869         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4870     // Unpoison the whole __va_list_tag.
4871     // FIXME: magic ABI constants.
4872     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4873                      /* size */ 8, Alignment, false);
4874   }
4875 
4876   void finalizeInstrumentation() override {
4877     assert(!VAArgSize && !VAArgTLSCopy &&
4878            "finalizeInstrumentation called twice");
4879     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4880     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4881     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4882                                     VAArgSize);
4883 
4884     if (!VAStartInstrumentationList.empty()) {
4885       // If there is a va_start in this function, make a backup copy of
4886       // va_arg_tls somewhere in the function entry block.
4887       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4888       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4889     }
4890 
4891     // Instrument va_start.
4892     // Copy va_list shadow from the backup copy of the TLS contents.
4893     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4894       CallInst *OrigInst = VAStartInstrumentationList[i];
4895       IRBuilder<> IRB(OrigInst->getNextNode());
4896       Value *VAListTag = OrigInst->getArgOperand(0);
4897       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4898       Value *RegSaveAreaPtrPtr =
4899           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4900                              PointerType::get(RegSaveAreaPtrTy, 0));
4901       Value *RegSaveAreaPtr =
4902           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4903       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4904       const Align Alignment = Align(8);
4905       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4906           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4907                                  Alignment, /*isStore*/ true);
4908       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4909                        CopySize);
4910     }
4911   }
4912 };
4913 
4914 /// SystemZ-specific implementation of VarArgHelper.
4915 struct VarArgSystemZHelper : public VarArgHelper {
4916   static const unsigned SystemZGpOffset = 16;
4917   static const unsigned SystemZGpEndOffset = 56;
4918   static const unsigned SystemZFpOffset = 128;
4919   static const unsigned SystemZFpEndOffset = 160;
4920   static const unsigned SystemZMaxVrArgs = 8;
4921   static const unsigned SystemZRegSaveAreaSize = 160;
4922   static const unsigned SystemZOverflowOffset = 160;
4923   static const unsigned SystemZVAListTagSize = 32;
4924   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
4925   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
4926 
4927   Function &F;
4928   MemorySanitizer &MS;
4929   MemorySanitizerVisitor &MSV;
4930   Value *VAArgTLSCopy = nullptr;
4931   Value *VAArgTLSOriginCopy = nullptr;
4932   Value *VAArgOverflowSize = nullptr;
4933 
4934   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4935 
4936   enum class ArgKind {
4937     GeneralPurpose,
4938     FloatingPoint,
4939     Vector,
4940     Memory,
4941     Indirect,
4942   };
4943 
4944   enum class ShadowExtension { None, Zero, Sign };
4945 
4946   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
4947                       MemorySanitizerVisitor &MSV)
4948       : F(F), MS(MS), MSV(MSV) {}
4949 
4950   ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
4951     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
4952     // only a few possibilities of what it can be. In particular, enums, single
4953     // element structs and large types have already been taken care of.
4954 
4955     // Some i128 and fp128 arguments are converted to pointers only in the
4956     // back end.
4957     if (T->isIntegerTy(128) || T->isFP128Ty())
4958       return ArgKind::Indirect;
4959     if (T->isFloatingPointTy())
4960       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
4961     if (T->isIntegerTy() || T->isPointerTy())
4962       return ArgKind::GeneralPurpose;
4963     if (T->isVectorTy())
4964       return ArgKind::Vector;
4965     return ArgKind::Memory;
4966   }
4967 
4968   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
4969     // ABI says: "One of the simple integer types no more than 64 bits wide.
4970     // ... If such an argument is shorter than 64 bits, replace it by a full
4971     // 64-bit integer representing the same number, using sign or zero
4972     // extension". Shadow for an integer argument has the same type as the
4973     // argument itself, so it can be sign or zero extended as well.
4974     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
4975     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
4976     if (ZExt) {
4977       assert(!SExt);
4978       return ShadowExtension::Zero;
4979     }
4980     if (SExt) {
4981       assert(!ZExt);
4982       return ShadowExtension::Sign;
4983     }
4984     return ShadowExtension::None;
4985   }
4986 
4987   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4988     bool IsSoftFloatABI = CB.getCalledFunction()
4989                               ->getFnAttribute("use-soft-float")
4990                               .getValueAsString() == "true";
4991     unsigned GpOffset = SystemZGpOffset;
4992     unsigned FpOffset = SystemZFpOffset;
4993     unsigned VrIndex = 0;
4994     unsigned OverflowOffset = SystemZOverflowOffset;
4995     const DataLayout &DL = F.getParent()->getDataLayout();
4996     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4997          ++ArgIt) {
4998       Value *A = *ArgIt;
4999       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
5000       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5001       // SystemZABIInfo does not produce ByVal parameters.
5002       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
5003       Type *T = A->getType();
5004       ArgKind AK = classifyArgument(T, IsSoftFloatABI);
5005       if (AK == ArgKind::Indirect) {
5006         T = PointerType::get(T, 0);
5007         AK = ArgKind::GeneralPurpose;
5008       }
5009       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
5010         AK = ArgKind::Memory;
5011       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
5012         AK = ArgKind::Memory;
5013       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
5014         AK = ArgKind::Memory;
5015       Value *ShadowBase = nullptr;
5016       Value *OriginBase = nullptr;
5017       ShadowExtension SE = ShadowExtension::None;
5018       switch (AK) {
5019       case ArgKind::GeneralPurpose: {
5020         // Always keep track of GpOffset, but store shadow only for varargs.
5021         uint64_t ArgSize = 8;
5022         if (GpOffset + ArgSize <= kParamTLSSize) {
5023           if (!IsFixed) {
5024             SE = getShadowExtension(CB, ArgNo);
5025             uint64_t GapSize = 0;
5026             if (SE == ShadowExtension::None) {
5027               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5028               assert(ArgAllocSize <= ArgSize);
5029               GapSize = ArgSize - ArgAllocSize;
5030             }
5031             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
5032             if (MS.TrackOrigins)
5033               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
5034           }
5035           GpOffset += ArgSize;
5036         } else {
5037           GpOffset = kParamTLSSize;
5038         }
5039         break;
5040       }
5041       case ArgKind::FloatingPoint: {
5042         // Always keep track of FpOffset, but store shadow only for varargs.
5043         uint64_t ArgSize = 8;
5044         if (FpOffset + ArgSize <= kParamTLSSize) {
5045           if (!IsFixed) {
5046             // PoP says: "A short floating-point datum requires only the
5047             // left-most 32 bit positions of a floating-point register".
5048             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
5049             // don't extend shadow and don't mind the gap.
5050             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
5051             if (MS.TrackOrigins)
5052               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5053           }
5054           FpOffset += ArgSize;
5055         } else {
5056           FpOffset = kParamTLSSize;
5057         }
5058         break;
5059       }
5060       case ArgKind::Vector: {
5061         // Keep track of VrIndex. No need to store shadow, since vector varargs
5062         // go through AK_Memory.
5063         assert(IsFixed);
5064         VrIndex++;
5065         break;
5066       }
5067       case ArgKind::Memory: {
5068         // Keep track of OverflowOffset and store shadow only for varargs.
5069         // Ignore fixed args, since we need to copy only the vararg portion of
5070         // the overflow area shadow.
5071         if (!IsFixed) {
5072           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5073           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
5074           if (OverflowOffset + ArgSize <= kParamTLSSize) {
5075             SE = getShadowExtension(CB, ArgNo);
5076             uint64_t GapSize =
5077                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
5078             ShadowBase =
5079                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
5080             if (MS.TrackOrigins)
5081               OriginBase =
5082                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
5083             OverflowOffset += ArgSize;
5084           } else {
5085             OverflowOffset = kParamTLSSize;
5086           }
5087         }
5088         break;
5089       }
5090       case ArgKind::Indirect:
5091         llvm_unreachable("Indirect must be converted to GeneralPurpose");
5092       }
5093       if (ShadowBase == nullptr)
5094         continue;
5095       Value *Shadow = MSV.getShadow(A);
5096       if (SE != ShadowExtension::None)
5097         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
5098                                       /*Signed*/ SE == ShadowExtension::Sign);
5099       ShadowBase = IRB.CreateIntToPtr(
5100           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
5101       IRB.CreateStore(Shadow, ShadowBase);
5102       if (MS.TrackOrigins) {
5103         Value *Origin = MSV.getOrigin(A);
5104         unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
5105         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5106                         kMinOriginAlignment);
5107       }
5108     }
5109     Constant *OverflowSize = ConstantInt::get(
5110         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5111     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5112   }
5113 
5114   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5115     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5116     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5117   }
5118 
5119   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5120     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5121     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5122     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
5123                               "_msarg_va_o");
5124   }
5125 
5126   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5127     IRBuilder<> IRB(&I);
5128     Value *VAListTag = I.getArgOperand(0);
5129     Value *ShadowPtr, *OriginPtr;
5130     const Align Alignment = Align(8);
5131     std::tie(ShadowPtr, OriginPtr) =
5132         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
5133                                /*isStore*/ true);
5134     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5135                      SystemZVAListTagSize, Alignment, false);
5136   }
5137 
5138   void visitVAStartInst(VAStartInst &I) override {
5139     VAStartInstrumentationList.push_back(&I);
5140     unpoisonVAListTagForInst(I);
5141   }
5142 
5143   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
5144 
5145   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5146     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5147     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5148         IRB.CreateAdd(
5149             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5150             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5151         PointerType::get(RegSaveAreaPtrTy, 0));
5152     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5153     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5154     const Align Alignment = Align(8);
5155     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5156         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5157                                /*isStore*/ true);
5158     // TODO(iii): copy only fragments filled by visitCallBase()
5159     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5160                      SystemZRegSaveAreaSize);
5161     if (MS.TrackOrigins)
5162       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5163                        Alignment, SystemZRegSaveAreaSize);
5164   }
5165 
5166   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5167     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5168     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5169         IRB.CreateAdd(
5170             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5171             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5172         PointerType::get(OverflowArgAreaPtrTy, 0));
5173     Value *OverflowArgAreaPtr =
5174         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5175     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5176     const Align Alignment = Align(8);
5177     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5178         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5179                                Alignment, /*isStore*/ true);
5180     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5181                                            SystemZOverflowOffset);
5182     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5183                      VAArgOverflowSize);
5184     if (MS.TrackOrigins) {
5185       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5186                                       SystemZOverflowOffset);
5187       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5188                        VAArgOverflowSize);
5189     }
5190   }
5191 
5192   void finalizeInstrumentation() override {
5193     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5194            "finalizeInstrumentation called twice");
5195     if (!VAStartInstrumentationList.empty()) {
5196       // If there is a va_start in this function, make a backup copy of
5197       // va_arg_tls somewhere in the function entry block.
5198       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
5199       VAArgOverflowSize =
5200           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5201       Value *CopySize =
5202           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5203                         VAArgOverflowSize);
5204       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5205       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
5206       if (MS.TrackOrigins) {
5207         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5208         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
5209                          Align(8), CopySize);
5210       }
5211     }
5212 
5213     // Instrument va_start.
5214     // Copy va_list shadow from the backup copy of the TLS contents.
5215     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5216          VaStartNo < VaStartNum; VaStartNo++) {
5217       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5218       IRBuilder<> IRB(OrigInst->getNextNode());
5219       Value *VAListTag = OrigInst->getArgOperand(0);
5220       copyRegSaveArea(IRB, VAListTag);
5221       copyOverflowArea(IRB, VAListTag);
5222     }
5223   }
5224 };
5225 
5226 /// A no-op implementation of VarArgHelper.
5227 struct VarArgNoOpHelper : public VarArgHelper {
5228   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5229                    MemorySanitizerVisitor &MSV) {}
5230 
5231   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5232 
5233   void visitVAStartInst(VAStartInst &I) override {}
5234 
5235   void visitVACopyInst(VACopyInst &I) override {}
5236 
5237   void finalizeInstrumentation() override {}
5238 };
5239 
5240 } // end anonymous namespace
5241 
5242 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5243                                         MemorySanitizerVisitor &Visitor) {
5244   // VarArg handling is only implemented on AMD64. False positives are possible
5245   // on other platforms.
5246   Triple TargetTriple(Func.getParent()->getTargetTriple());
5247   if (TargetTriple.getArch() == Triple::x86_64)
5248     return new VarArgAMD64Helper(Func, Msan, Visitor);
5249   else if (TargetTriple.isMIPS64())
5250     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5251   else if (TargetTriple.getArch() == Triple::aarch64)
5252     return new VarArgAArch64Helper(Func, Msan, Visitor);
5253   else if (TargetTriple.getArch() == Triple::ppc64 ||
5254            TargetTriple.getArch() == Triple::ppc64le)
5255     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5256   else if (TargetTriple.getArch() == Triple::systemz)
5257     return new VarArgSystemZHelper(Func, Msan, Visitor);
5258   else
5259     return new VarArgNoOpHelper(Func, Msan, Visitor);
5260 }
5261 
5262 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5263   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5264     return false;
5265 
5266   MemorySanitizerVisitor Visitor(F, *this, TLI);
5267 
5268   // Clear out readonly/readnone attributes.
5269   AttrBuilder B;
5270   B.addAttribute(Attribute::ReadOnly)
5271       .addAttribute(Attribute::ReadNone)
5272       .addAttribute(Attribute::WriteOnly)
5273       .addAttribute(Attribute::ArgMemOnly)
5274       .addAttribute(Attribute::Speculatable);
5275   F.removeAttributes(AttributeList::FunctionIndex, B);
5276 
5277   return Visitor.runOnFunction();
5278 }
5279