1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///  - TLS variables are stored in a single per-task struct. A call to a
126 ///    function __msan_get_context_state() returning a pointer to that struct
127 ///    is inserted into every instrumented function before the entry block;
128 ///  - __msan_warning() takes a 32-bit origin parameter;
129 ///  - local variables are poisoned with __msan_poison_alloca() upon function
130 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
131 ///    function;
132 ///  - the pass doesn't declare any global variables or add global constructors
133 ///    to the translation unit.
134 ///
135 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
136 /// calls, making sure we're on the safe side wrt. possible false positives.
137 ///
138 ///  KernelMemorySanitizer only supports X86_64 at the moment.
139 ///
140 //
141 // FIXME: This sanitizer does not yet handle scalable vectors
142 //
143 //===----------------------------------------------------------------------===//
144 
145 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
146 #include "llvm/ADT/APInt.h"
147 #include "llvm/ADT/ArrayRef.h"
148 #include "llvm/ADT/DepthFirstIterator.h"
149 #include "llvm/ADT/SmallSet.h"
150 #include "llvm/ADT/SmallString.h"
151 #include "llvm/ADT/SmallVector.h"
152 #include "llvm/ADT/StringExtras.h"
153 #include "llvm/ADT/StringRef.h"
154 #include "llvm/ADT/Triple.h"
155 #include "llvm/Analysis/TargetLibraryInfo.h"
156 #include "llvm/IR/Argument.h"
157 #include "llvm/IR/Attributes.h"
158 #include "llvm/IR/BasicBlock.h"
159 #include "llvm/IR/CallingConv.h"
160 #include "llvm/IR/Constant.h"
161 #include "llvm/IR/Constants.h"
162 #include "llvm/IR/DataLayout.h"
163 #include "llvm/IR/DerivedTypes.h"
164 #include "llvm/IR/Function.h"
165 #include "llvm/IR/GlobalValue.h"
166 #include "llvm/IR/GlobalVariable.h"
167 #include "llvm/IR/IRBuilder.h"
168 #include "llvm/IR/InlineAsm.h"
169 #include "llvm/IR/InstVisitor.h"
170 #include "llvm/IR/InstrTypes.h"
171 #include "llvm/IR/Instruction.h"
172 #include "llvm/IR/Instructions.h"
173 #include "llvm/IR/IntrinsicInst.h"
174 #include "llvm/IR/Intrinsics.h"
175 #include "llvm/IR/IntrinsicsX86.h"
176 #include "llvm/IR/LLVMContext.h"
177 #include "llvm/IR/MDBuilder.h"
178 #include "llvm/IR/Module.h"
179 #include "llvm/IR/Type.h"
180 #include "llvm/IR/Value.h"
181 #include "llvm/IR/ValueMap.h"
182 #include "llvm/InitializePasses.h"
183 #include "llvm/Pass.h"
184 #include "llvm/Support/AtomicOrdering.h"
185 #include "llvm/Support/Casting.h"
186 #include "llvm/Support/CommandLine.h"
187 #include "llvm/Support/Compiler.h"
188 #include "llvm/Support/Debug.h"
189 #include "llvm/Support/ErrorHandling.h"
190 #include "llvm/Support/MathExtras.h"
191 #include "llvm/Support/raw_ostream.h"
192 #include "llvm/Transforms/Instrumentation.h"
193 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
194 #include "llvm/Transforms/Utils/Local.h"
195 #include "llvm/Transforms/Utils/ModuleUtils.h"
196 #include <algorithm>
197 #include <cassert>
198 #include <cstddef>
199 #include <cstdint>
200 #include <memory>
201 #include <string>
202 #include <tuple>
203 
204 using namespace llvm;
205 
206 #define DEBUG_TYPE "msan"
207 
208 static const unsigned kOriginSize = 4;
209 static const Align kMinOriginAlignment = Align(4);
210 static const Align kShadowTLSAlignment = Align(8);
211 
212 // These constants must be kept in sync with the ones in msan.h.
213 static const unsigned kParamTLSSize = 800;
214 static const unsigned kRetvalTLSSize = 800;
215 
216 // Accesses sizes are powers of two: 1, 2, 4, 8.
217 static const size_t kNumberOfAccessSizes = 4;
218 
219 /// Track origins of uninitialized values.
220 ///
221 /// Adds a section to MemorySanitizer report that points to the allocation
222 /// (stack or heap) the uninitialized bits came from originally.
223 static cl::opt<int> ClTrackOrigins("msan-track-origins",
224        cl::desc("Track origins (allocation sites) of poisoned memory"),
225        cl::Hidden, cl::init(0));
226 
227 static cl::opt<bool> ClKeepGoing("msan-keep-going",
228        cl::desc("keep going after reporting a UMR"),
229        cl::Hidden, cl::init(false));
230 
231 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
232        cl::desc("poison uninitialized stack variables"),
233        cl::Hidden, cl::init(true));
234 
235 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
236        cl::desc("poison uninitialized stack variables with a call"),
237        cl::Hidden, cl::init(false));
238 
239 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
240        cl::desc("poison uninitialized stack variables with the given pattern"),
241        cl::Hidden, cl::init(0xff));
242 
243 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
244        cl::desc("poison undef temps"),
245        cl::Hidden, cl::init(true));
246 
247 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
248        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
249        cl::Hidden, cl::init(true));
250 
251 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
252        cl::desc("exact handling of relational integer ICmp"),
253        cl::Hidden, cl::init(false));
254 
255 static cl::opt<bool> ClHandleLifetimeIntrinsics(
256     "msan-handle-lifetime-intrinsics",
257     cl::desc(
258         "when possible, poison scoped variables at the beginning of the scope "
259         "(slower, but more precise)"),
260     cl::Hidden, cl::init(true));
261 
262 // When compiling the Linux kernel, we sometimes see false positives related to
263 // MSan being unable to understand that inline assembly calls may initialize
264 // local variables.
265 // This flag makes the compiler conservatively unpoison every memory location
266 // passed into an assembly call. Note that this may cause false positives.
267 // Because it's impossible to figure out the array sizes, we can only unpoison
268 // the first sizeof(type) bytes for each type* pointer.
269 // The instrumentation is only enabled in KMSAN builds, and only if
270 // -msan-handle-asm-conservative is on. This is done because we may want to
271 // quickly disable assembly instrumentation when it breaks.
272 static cl::opt<bool> ClHandleAsmConservative(
273     "msan-handle-asm-conservative",
274     cl::desc("conservative handling of inline assembly"), cl::Hidden,
275     cl::init(true));
276 
277 // This flag controls whether we check the shadow of the address
278 // operand of load or store. Such bugs are very rare, since load from
279 // a garbage address typically results in SEGV, but still happen
280 // (e.g. only lower bits of address are garbage, or the access happens
281 // early at program startup where malloc-ed memory is more likely to
282 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
283 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
284        cl::desc("report accesses through a pointer which has poisoned shadow"),
285        cl::Hidden, cl::init(true));
286 
287 static cl::opt<bool> ClEagerChecks(
288     "msan-eager-checks",
289     cl::desc("check arguments and return values at function call boundaries"),
290     cl::Hidden, cl::init(false));
291 
292 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
293        cl::desc("print out instructions with default strict semantics"),
294        cl::Hidden, cl::init(false));
295 
296 static cl::opt<int> ClInstrumentationWithCallThreshold(
297     "msan-instrumentation-with-call-threshold",
298     cl::desc(
299         "If the function being instrumented requires more than "
300         "this number of checks and origin stores, use callbacks instead of "
301         "inline checks (-1 means never use callbacks)."),
302     cl::Hidden, cl::init(3500));
303 
304 static cl::opt<bool>
305     ClEnableKmsan("msan-kernel",
306                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
307                   cl::Hidden, cl::init(false));
308 
309 // This is an experiment to enable handling of cases where shadow is a non-zero
310 // compile-time constant. For some unexplainable reason they were silently
311 // ignored in the instrumentation.
312 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
313        cl::desc("Insert checks for constant shadow values"),
314        cl::Hidden, cl::init(false));
315 
316 // This is off by default because of a bug in gold:
317 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
318 static cl::opt<bool> ClWithComdat("msan-with-comdat",
319        cl::desc("Place MSan constructors in comdat sections"),
320        cl::Hidden, cl::init(false));
321 
322 // These options allow to specify custom memory map parameters
323 // See MemoryMapParams for details.
324 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
325                                    cl::desc("Define custom MSan AndMask"),
326                                    cl::Hidden, cl::init(0));
327 
328 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
329                                    cl::desc("Define custom MSan XorMask"),
330                                    cl::Hidden, cl::init(0));
331 
332 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
333                                       cl::desc("Define custom MSan ShadowBase"),
334                                       cl::Hidden, cl::init(0));
335 
336 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
337                                       cl::desc("Define custom MSan OriginBase"),
338                                       cl::Hidden, cl::init(0));
339 
340 static const char *const kMsanModuleCtorName = "msan.module_ctor";
341 static const char *const kMsanInitName = "__msan_init";
342 
343 namespace {
344 
345 // Memory map parameters used in application-to-shadow address calculation.
346 // Offset = (Addr & ~AndMask) ^ XorMask
347 // Shadow = ShadowBase + Offset
348 // Origin = OriginBase + Offset
349 struct MemoryMapParams {
350   uint64_t AndMask;
351   uint64_t XorMask;
352   uint64_t ShadowBase;
353   uint64_t OriginBase;
354 };
355 
356 struct PlatformMemoryMapParams {
357   const MemoryMapParams *bits32;
358   const MemoryMapParams *bits64;
359 };
360 
361 } // end anonymous namespace
362 
363 // i386 Linux
364 static const MemoryMapParams Linux_I386_MemoryMapParams = {
365   0x000080000000,  // AndMask
366   0,               // XorMask (not used)
367   0,               // ShadowBase (not used)
368   0x000040000000,  // OriginBase
369 };
370 
371 // x86_64 Linux
372 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
373 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
374   0x400000000000,  // AndMask
375   0,               // XorMask (not used)
376   0,               // ShadowBase (not used)
377   0x200000000000,  // OriginBase
378 #else
379   0,               // AndMask (not used)
380   0x500000000000,  // XorMask
381   0,               // ShadowBase (not used)
382   0x100000000000,  // OriginBase
383 #endif
384 };
385 
386 // mips64 Linux
387 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
388   0,               // AndMask (not used)
389   0x008000000000,  // XorMask
390   0,               // ShadowBase (not used)
391   0x002000000000,  // OriginBase
392 };
393 
394 // ppc64 Linux
395 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
396   0xE00000000000,  // AndMask
397   0x100000000000,  // XorMask
398   0x080000000000,  // ShadowBase
399   0x1C0000000000,  // OriginBase
400 };
401 
402 // s390x Linux
403 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
404     0xC00000000000, // AndMask
405     0,              // XorMask (not used)
406     0x080000000000, // ShadowBase
407     0x1C0000000000, // OriginBase
408 };
409 
410 // aarch64 Linux
411 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
412   0,               // AndMask (not used)
413   0x06000000000,   // XorMask
414   0,               // ShadowBase (not used)
415   0x01000000000,   // OriginBase
416 };
417 
418 // i386 FreeBSD
419 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
420   0x000180000000,  // AndMask
421   0x000040000000,  // XorMask
422   0x000020000000,  // ShadowBase
423   0x000700000000,  // OriginBase
424 };
425 
426 // x86_64 FreeBSD
427 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
428   0xc00000000000,  // AndMask
429   0x200000000000,  // XorMask
430   0x100000000000,  // ShadowBase
431   0x380000000000,  // OriginBase
432 };
433 
434 // x86_64 NetBSD
435 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
436   0,               // AndMask
437   0x500000000000,  // XorMask
438   0,               // ShadowBase
439   0x100000000000,  // OriginBase
440 };
441 
442 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
443   &Linux_I386_MemoryMapParams,
444   &Linux_X86_64_MemoryMapParams,
445 };
446 
447 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
448   nullptr,
449   &Linux_MIPS64_MemoryMapParams,
450 };
451 
452 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
453   nullptr,
454   &Linux_PowerPC64_MemoryMapParams,
455 };
456 
457 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
458     nullptr,
459     &Linux_S390X_MemoryMapParams,
460 };
461 
462 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
463   nullptr,
464   &Linux_AArch64_MemoryMapParams,
465 };
466 
467 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
468   &FreeBSD_I386_MemoryMapParams,
469   &FreeBSD_X86_64_MemoryMapParams,
470 };
471 
472 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
473   nullptr,
474   &NetBSD_X86_64_MemoryMapParams,
475 };
476 
477 namespace {
478 
479 /// Instrument functions of a module to detect uninitialized reads.
480 ///
481 /// Instantiating MemorySanitizer inserts the msan runtime library API function
482 /// declarations into the module if they don't exist already. Instantiating
483 /// ensures the __msan_init function is in the list of global constructors for
484 /// the module.
485 class MemorySanitizer {
486 public:
487   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
488       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
489         Recover(Options.Recover) {
490     initializeModule(M);
491   }
492 
493   // MSan cannot be moved or copied because of MapParams.
494   MemorySanitizer(MemorySanitizer &&) = delete;
495   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
496   MemorySanitizer(const MemorySanitizer &) = delete;
497   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
498 
499   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
500 
501 private:
502   friend struct MemorySanitizerVisitor;
503   friend struct VarArgAMD64Helper;
504   friend struct VarArgMIPS64Helper;
505   friend struct VarArgAArch64Helper;
506   friend struct VarArgPowerPC64Helper;
507   friend struct VarArgSystemZHelper;
508 
509   void initializeModule(Module &M);
510   void initializeCallbacks(Module &M);
511   void createKernelApi(Module &M);
512   void createUserspaceApi(Module &M);
513 
514   /// True if we're compiling the Linux kernel.
515   bool CompileKernel;
516   /// Track origins (allocation points) of uninitialized values.
517   int TrackOrigins;
518   bool Recover;
519 
520   LLVMContext *C;
521   Type *IntptrTy;
522   Type *OriginTy;
523 
524   // XxxTLS variables represent the per-thread state in MSan and per-task state
525   // in KMSAN.
526   // For the userspace these point to thread-local globals. In the kernel land
527   // they point to the members of a per-task struct obtained via a call to
528   // __msan_get_context_state().
529 
530   /// Thread-local shadow storage for function parameters.
531   Value *ParamTLS;
532 
533   /// Thread-local origin storage for function parameters.
534   Value *ParamOriginTLS;
535 
536   /// Thread-local shadow storage for function return value.
537   Value *RetvalTLS;
538 
539   /// Thread-local origin storage for function return value.
540   Value *RetvalOriginTLS;
541 
542   /// Thread-local shadow storage for in-register va_arg function
543   /// parameters (x86_64-specific).
544   Value *VAArgTLS;
545 
546   /// Thread-local shadow storage for in-register va_arg function
547   /// parameters (x86_64-specific).
548   Value *VAArgOriginTLS;
549 
550   /// Thread-local shadow storage for va_arg overflow area
551   /// (x86_64-specific).
552   Value *VAArgOverflowSizeTLS;
553 
554   /// Are the instrumentation callbacks set up?
555   bool CallbacksInitialized = false;
556 
557   /// The run-time callback to print a warning.
558   FunctionCallee WarningFn;
559 
560   // These arrays are indexed by log2(AccessSize).
561   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
562   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
563 
564   /// Run-time helper that generates a new origin value for a stack
565   /// allocation.
566   FunctionCallee MsanSetAllocaOrigin4Fn;
567 
568   /// Run-time helper that poisons stack on function entry.
569   FunctionCallee MsanPoisonStackFn;
570 
571   /// Run-time helper that records a store (or any event) of an
572   /// uninitialized value and returns an updated origin id encoding this info.
573   FunctionCallee MsanChainOriginFn;
574 
575   /// Run-time helper that paints an origin over a region.
576   FunctionCallee MsanSetOriginFn;
577 
578   /// MSan runtime replacements for memmove, memcpy and memset.
579   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
580 
581   /// KMSAN callback for task-local function argument shadow.
582   StructType *MsanContextStateTy;
583   FunctionCallee MsanGetContextStateFn;
584 
585   /// Functions for poisoning/unpoisoning local variables
586   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
587 
588   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
589   /// pointers.
590   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
591   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
592   FunctionCallee MsanMetadataPtrForStore_1_8[4];
593   FunctionCallee MsanInstrumentAsmStoreFn;
594 
595   /// Helper to choose between different MsanMetadataPtrXxx().
596   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
597 
598   /// Memory map parameters used in application-to-shadow calculation.
599   const MemoryMapParams *MapParams;
600 
601   /// Custom memory map parameters used when -msan-shadow-base or
602   // -msan-origin-base is provided.
603   MemoryMapParams CustomMapParams;
604 
605   MDNode *ColdCallWeights;
606 
607   /// Branch weights for origin store.
608   MDNode *OriginStoreWeights;
609 };
610 
611 void insertModuleCtor(Module &M) {
612   getOrCreateSanitizerCtorAndInitFunctions(
613       M, kMsanModuleCtorName, kMsanInitName,
614       /*InitArgTypes=*/{},
615       /*InitArgs=*/{},
616       // This callback is invoked when the functions are created the first
617       // time. Hook them into the global ctors list in that case:
618       [&](Function *Ctor, FunctionCallee) {
619         if (!ClWithComdat) {
620           appendToGlobalCtors(M, Ctor, 0);
621           return;
622         }
623         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
624         Ctor->setComdat(MsanCtorComdat);
625         appendToGlobalCtors(M, Ctor, 0, Ctor);
626       });
627 }
628 
629 /// A legacy function pass for msan instrumentation.
630 ///
631 /// Instruments functions to detect uninitialized reads.
632 struct MemorySanitizerLegacyPass : public FunctionPass {
633   // Pass identification, replacement for typeid.
634   static char ID;
635 
636   MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
637       : FunctionPass(ID), Options(Options) {
638     initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
639   }
640   StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
641 
642   void getAnalysisUsage(AnalysisUsage &AU) const override {
643     AU.addRequired<TargetLibraryInfoWrapperPass>();
644   }
645 
646   bool runOnFunction(Function &F) override {
647     return MSan->sanitizeFunction(
648         F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
649   }
650   bool doInitialization(Module &M) override;
651 
652   Optional<MemorySanitizer> MSan;
653   MemorySanitizerOptions Options;
654 };
655 
656 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
657   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
658 }
659 
660 } // end anonymous namespace
661 
662 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
663     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
664       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
665       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
666 
667 PreservedAnalyses MemorySanitizerPass::run(Function &F,
668                                            FunctionAnalysisManager &FAM) {
669   MemorySanitizer Msan(*F.getParent(), Options);
670   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
671     return PreservedAnalyses::none();
672   return PreservedAnalyses::all();
673 }
674 
675 PreservedAnalyses MemorySanitizerPass::run(Module &M,
676                                            ModuleAnalysisManager &AM) {
677   if (Options.Kernel)
678     return PreservedAnalyses::all();
679   insertModuleCtor(M);
680   return PreservedAnalyses::none();
681 }
682 
683 char MemorySanitizerLegacyPass::ID = 0;
684 
685 INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
686                       "MemorySanitizer: detects uninitialized reads.", false,
687                       false)
688 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
689 INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
690                     "MemorySanitizer: detects uninitialized reads.", false,
691                     false)
692 
693 FunctionPass *
694 llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
695   return new MemorySanitizerLegacyPass(Options);
696 }
697 
698 /// Create a non-const global initialized with the given string.
699 ///
700 /// Creates a writable global for Str so that we can pass it to the
701 /// run-time lib. Runtime uses first 4 bytes of the string to store the
702 /// frame ID, so the string needs to be mutable.
703 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
704                                                             StringRef Str) {
705   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
706   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
707                             GlobalValue::PrivateLinkage, StrConst, "");
708 }
709 
710 /// Create KMSAN API callbacks.
711 void MemorySanitizer::createKernelApi(Module &M) {
712   IRBuilder<> IRB(*C);
713 
714   // These will be initialized in insertKmsanPrologue().
715   RetvalTLS = nullptr;
716   RetvalOriginTLS = nullptr;
717   ParamTLS = nullptr;
718   ParamOriginTLS = nullptr;
719   VAArgTLS = nullptr;
720   VAArgOriginTLS = nullptr;
721   VAArgOverflowSizeTLS = nullptr;
722 
723   WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
724                                     IRB.getInt32Ty());
725   // Requests the per-task context state (kmsan_context_state*) from the
726   // runtime library.
727   MsanContextStateTy = StructType::get(
728       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
729       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
730       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
731       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
732       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
733       OriginTy);
734   MsanGetContextStateFn = M.getOrInsertFunction(
735       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
736 
737   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
738                                 PointerType::get(IRB.getInt32Ty(), 0));
739 
740   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
741     std::string name_load =
742         "__msan_metadata_ptr_for_load_" + std::to_string(size);
743     std::string name_store =
744         "__msan_metadata_ptr_for_store_" + std::to_string(size);
745     MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
746         name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
747     MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
748         name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
749   }
750 
751   MsanMetadataPtrForLoadN = M.getOrInsertFunction(
752       "__msan_metadata_ptr_for_load_n", RetTy,
753       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
754   MsanMetadataPtrForStoreN = M.getOrInsertFunction(
755       "__msan_metadata_ptr_for_store_n", RetTy,
756       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
757 
758   // Functions for poisoning and unpoisoning memory.
759   MsanPoisonAllocaFn =
760       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
761                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
762   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
763       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
764 }
765 
766 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
767   return M.getOrInsertGlobal(Name, Ty, [&] {
768     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
769                               nullptr, Name, nullptr,
770                               GlobalVariable::InitialExecTLSModel);
771   });
772 }
773 
774 /// Insert declarations for userspace-specific functions and globals.
775 void MemorySanitizer::createUserspaceApi(Module &M) {
776   IRBuilder<> IRB(*C);
777 
778   // Create the callback.
779   // FIXME: this function should have "Cold" calling conv,
780   // which is not yet implemented.
781   StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
782                                     : "__msan_warning_with_origin_noreturn";
783   WarningFn =
784       M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty());
785 
786   // Create the global TLS variables.
787   RetvalTLS =
788       getOrInsertGlobal(M, "__msan_retval_tls",
789                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
790 
791   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
792 
793   ParamTLS =
794       getOrInsertGlobal(M, "__msan_param_tls",
795                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
796 
797   ParamOriginTLS =
798       getOrInsertGlobal(M, "__msan_param_origin_tls",
799                         ArrayType::get(OriginTy, kParamTLSSize / 4));
800 
801   VAArgTLS =
802       getOrInsertGlobal(M, "__msan_va_arg_tls",
803                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
804 
805   VAArgOriginTLS =
806       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
807                         ArrayType::get(OriginTy, kParamTLSSize / 4));
808 
809   VAArgOverflowSizeTLS =
810       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
811 
812   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
813        AccessSizeIndex++) {
814     unsigned AccessSize = 1 << AccessSizeIndex;
815     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
816     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
817     MaybeWarningFnAttrs.push_back(std::make_pair(
818         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
819     MaybeWarningFnAttrs.push_back(std::make_pair(
820         AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
821     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
822         FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
823         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
824 
825     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
826     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
827     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
828         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
829     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
830         AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
831     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
832         FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
833         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
834         IRB.getInt32Ty());
835   }
836 
837   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
838     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
839     IRB.getInt8PtrTy(), IntptrTy);
840   MsanPoisonStackFn =
841       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
842                             IRB.getInt8PtrTy(), IntptrTy);
843 }
844 
845 /// Insert extern declaration of runtime-provided functions and globals.
846 void MemorySanitizer::initializeCallbacks(Module &M) {
847   // Only do this once.
848   if (CallbacksInitialized)
849     return;
850 
851   IRBuilder<> IRB(*C);
852   // Initialize callbacks that are common for kernel and userspace
853   // instrumentation.
854   MsanChainOriginFn = M.getOrInsertFunction(
855     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
856   MsanSetOriginFn =
857       M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
858                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
859   MemmoveFn = M.getOrInsertFunction(
860     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
861     IRB.getInt8PtrTy(), IntptrTy);
862   MemcpyFn = M.getOrInsertFunction(
863     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
864     IntptrTy);
865   MemsetFn = M.getOrInsertFunction(
866     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
867     IntptrTy);
868 
869   MsanInstrumentAsmStoreFn =
870       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
871                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
872 
873   if (CompileKernel) {
874     createKernelApi(M);
875   } else {
876     createUserspaceApi(M);
877   }
878   CallbacksInitialized = true;
879 }
880 
881 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
882                                                              int size) {
883   FunctionCallee *Fns =
884       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
885   switch (size) {
886   case 1:
887     return Fns[0];
888   case 2:
889     return Fns[1];
890   case 4:
891     return Fns[2];
892   case 8:
893     return Fns[3];
894   default:
895     return nullptr;
896   }
897 }
898 
899 /// Module-level initialization.
900 ///
901 /// inserts a call to __msan_init to the module's constructor list.
902 void MemorySanitizer::initializeModule(Module &M) {
903   auto &DL = M.getDataLayout();
904 
905   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
906   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
907   // Check the overrides first
908   if (ShadowPassed || OriginPassed) {
909     CustomMapParams.AndMask = ClAndMask;
910     CustomMapParams.XorMask = ClXorMask;
911     CustomMapParams.ShadowBase = ClShadowBase;
912     CustomMapParams.OriginBase = ClOriginBase;
913     MapParams = &CustomMapParams;
914   } else {
915     Triple TargetTriple(M.getTargetTriple());
916     switch (TargetTriple.getOS()) {
917       case Triple::FreeBSD:
918         switch (TargetTriple.getArch()) {
919           case Triple::x86_64:
920             MapParams = FreeBSD_X86_MemoryMapParams.bits64;
921             break;
922           case Triple::x86:
923             MapParams = FreeBSD_X86_MemoryMapParams.bits32;
924             break;
925           default:
926             report_fatal_error("unsupported architecture");
927         }
928         break;
929       case Triple::NetBSD:
930         switch (TargetTriple.getArch()) {
931           case Triple::x86_64:
932             MapParams = NetBSD_X86_MemoryMapParams.bits64;
933             break;
934           default:
935             report_fatal_error("unsupported architecture");
936         }
937         break;
938       case Triple::Linux:
939         switch (TargetTriple.getArch()) {
940           case Triple::x86_64:
941             MapParams = Linux_X86_MemoryMapParams.bits64;
942             break;
943           case Triple::x86:
944             MapParams = Linux_X86_MemoryMapParams.bits32;
945             break;
946           case Triple::mips64:
947           case Triple::mips64el:
948             MapParams = Linux_MIPS_MemoryMapParams.bits64;
949             break;
950           case Triple::ppc64:
951           case Triple::ppc64le:
952             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
953             break;
954           case Triple::systemz:
955             MapParams = Linux_S390_MemoryMapParams.bits64;
956             break;
957           case Triple::aarch64:
958           case Triple::aarch64_be:
959             MapParams = Linux_ARM_MemoryMapParams.bits64;
960             break;
961           default:
962             report_fatal_error("unsupported architecture");
963         }
964         break;
965       default:
966         report_fatal_error("unsupported operating system");
967     }
968   }
969 
970   C = &(M.getContext());
971   IRBuilder<> IRB(*C);
972   IntptrTy = IRB.getIntPtrTy(DL);
973   OriginTy = IRB.getInt32Ty();
974 
975   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
976   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
977 
978   if (!CompileKernel) {
979     if (TrackOrigins)
980       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
981         return new GlobalVariable(
982             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
983             IRB.getInt32(TrackOrigins), "__msan_track_origins");
984       });
985 
986     if (Recover)
987       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
988         return new GlobalVariable(M, IRB.getInt32Ty(), true,
989                                   GlobalValue::WeakODRLinkage,
990                                   IRB.getInt32(Recover), "__msan_keep_going");
991       });
992 }
993 }
994 
995 bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
996   if (!Options.Kernel)
997     insertModuleCtor(M);
998   MSan.emplace(M, Options);
999   return true;
1000 }
1001 
1002 namespace {
1003 
1004 /// A helper class that handles instrumentation of VarArg
1005 /// functions on a particular platform.
1006 ///
1007 /// Implementations are expected to insert the instrumentation
1008 /// necessary to propagate argument shadow through VarArg function
1009 /// calls. Visit* methods are called during an InstVisitor pass over
1010 /// the function, and should avoid creating new basic blocks. A new
1011 /// instance of this class is created for each instrumented function.
1012 struct VarArgHelper {
1013   virtual ~VarArgHelper() = default;
1014 
1015   /// Visit a CallBase.
1016   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1017 
1018   /// Visit a va_start call.
1019   virtual void visitVAStartInst(VAStartInst &I) = 0;
1020 
1021   /// Visit a va_copy call.
1022   virtual void visitVACopyInst(VACopyInst &I) = 0;
1023 
1024   /// Finalize function instrumentation.
1025   ///
1026   /// This method is called after visiting all interesting (see above)
1027   /// instructions in a function.
1028   virtual void finalizeInstrumentation() = 0;
1029 };
1030 
1031 struct MemorySanitizerVisitor;
1032 
1033 } // end anonymous namespace
1034 
1035 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1036                                         MemorySanitizerVisitor &Visitor);
1037 
1038 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
1039   if (TypeSize <= 8) return 0;
1040   return Log2_32_Ceil((TypeSize + 7) / 8);
1041 }
1042 
1043 namespace {
1044 
1045 /// This class does all the work for a given function. Store and Load
1046 /// instructions store and load corresponding shadow and origin
1047 /// values. Most instructions propagate shadow from arguments to their
1048 /// return values. Certain instructions (most importantly, BranchInst)
1049 /// test their argument shadow and print reports (with a runtime call) if it's
1050 /// non-zero.
1051 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1052   Function &F;
1053   MemorySanitizer &MS;
1054   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1055   ValueMap<Value*, Value*> ShadowMap, OriginMap;
1056   std::unique_ptr<VarArgHelper> VAHelper;
1057   const TargetLibraryInfo *TLI;
1058   BasicBlock *ActualFnStart;
1059 
1060   // The following flags disable parts of MSan instrumentation based on
1061   // exclusion list contents and command-line options.
1062   bool InsertChecks;
1063   bool PropagateShadow;
1064   bool PoisonStack;
1065   bool PoisonUndef;
1066 
1067   struct ShadowOriginAndInsertPoint {
1068     Value *Shadow;
1069     Value *Origin;
1070     Instruction *OrigIns;
1071 
1072     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1073       : Shadow(S), Origin(O), OrigIns(I) {}
1074   };
1075   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1076   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1077   SmallSet<AllocaInst *, 16> AllocaSet;
1078   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1079   SmallVector<StoreInst *, 16> StoreList;
1080 
1081   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1082                          const TargetLibraryInfo &TLI)
1083       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1084     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
1085     InsertChecks = SanitizeFunction;
1086     PropagateShadow = SanitizeFunction;
1087     PoisonStack = SanitizeFunction && ClPoisonStack;
1088     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1089 
1090     MS.initializeCallbacks(*F.getParent());
1091     if (MS.CompileKernel)
1092       ActualFnStart = insertKmsanPrologue(F);
1093     else
1094       ActualFnStart = &F.getEntryBlock();
1095 
1096     LLVM_DEBUG(if (!InsertChecks) dbgs()
1097                << "MemorySanitizer is not inserting checks into '"
1098                << F.getName() << "'\n");
1099   }
1100 
1101   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1102     if (MS.TrackOrigins <= 1) return V;
1103     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1104   }
1105 
1106   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1107     const DataLayout &DL = F.getParent()->getDataLayout();
1108     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1109     if (IntptrSize == kOriginSize) return Origin;
1110     assert(IntptrSize == kOriginSize * 2);
1111     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1112     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1113   }
1114 
1115   /// Fill memory range with the given origin value.
1116   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1117                    unsigned Size, Align Alignment) {
1118     const DataLayout &DL = F.getParent()->getDataLayout();
1119     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1120     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1121     assert(IntptrAlignment >= kMinOriginAlignment);
1122     assert(IntptrSize >= kOriginSize);
1123 
1124     unsigned Ofs = 0;
1125     Align CurrentAlignment = Alignment;
1126     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1127       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1128       Value *IntptrOriginPtr =
1129           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1130       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1131         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1132                        : IntptrOriginPtr;
1133         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1134         Ofs += IntptrSize / kOriginSize;
1135         CurrentAlignment = IntptrAlignment;
1136       }
1137     }
1138 
1139     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1140       Value *GEP =
1141           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1142       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1143       CurrentAlignment = kMinOriginAlignment;
1144     }
1145   }
1146 
1147   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1148                    Value *OriginPtr, Align Alignment, bool AsCall) {
1149     const DataLayout &DL = F.getParent()->getDataLayout();
1150     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1151     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
1152     if (Shadow->getType()->isAggregateType()) {
1153       paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1154                   OriginAlignment);
1155     } else {
1156       Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
1157       if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1158         if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
1159           paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1160                       OriginAlignment);
1161         return;
1162       }
1163 
1164       unsigned TypeSizeInBits =
1165           DL.getTypeSizeInBits(ConvertedShadow->getType());
1166       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1167       if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1168         FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1169         Value *ConvertedShadow2 = IRB.CreateZExt(
1170             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1171         IRB.CreateCall(Fn, {ConvertedShadow2,
1172                             IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
1173                             Origin});
1174       } else {
1175         Value *Cmp = IRB.CreateICmpNE(
1176             ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
1177         Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1178             Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1179         IRBuilder<> IRBNew(CheckTerm);
1180         paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1181                     OriginAlignment);
1182       }
1183     }
1184   }
1185 
1186   void materializeStores(bool InstrumentWithCalls) {
1187     for (StoreInst *SI : StoreList) {
1188       IRBuilder<> IRB(SI);
1189       Value *Val = SI->getValueOperand();
1190       Value *Addr = SI->getPointerOperand();
1191       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1192       Value *ShadowPtr, *OriginPtr;
1193       Type *ShadowTy = Shadow->getType();
1194       const Align Alignment = assumeAligned(SI->getAlignment());
1195       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1196       std::tie(ShadowPtr, OriginPtr) =
1197           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1198 
1199       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1200       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1201       (void)NewSI;
1202 
1203       if (SI->isAtomic())
1204         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1205 
1206       if (MS.TrackOrigins && !SI->isAtomic())
1207         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1208                     OriginAlignment, InstrumentWithCalls);
1209     }
1210   }
1211 
1212   /// Helper function to insert a warning at IRB's current insert point.
1213   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1214     if (!Origin)
1215       Origin = (Value *)IRB.getInt32(0);
1216     assert(Origin->getType()->isIntegerTy());
1217     IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1218     // FIXME: Insert UnreachableInst if !MS.Recover?
1219     // This may invalidate some of the following checks and needs to be done
1220     // at the very end.
1221   }
1222 
1223   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
1224                            bool AsCall) {
1225     IRBuilder<> IRB(OrigIns);
1226     LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
1227     Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
1228     LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
1229 
1230     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1231       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
1232         insertWarningFn(IRB, Origin);
1233       }
1234       return;
1235     }
1236 
1237     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
1238 
1239     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1240     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1241     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1242       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1243       Value *ConvertedShadow2 =
1244           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1245       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
1246                                                 ? Origin
1247                                                 : (Value *)IRB.getInt32(0)});
1248     } else {
1249       Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
1250                                     getCleanShadow(ConvertedShadow), "_mscmp");
1251       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1252           Cmp, OrigIns,
1253           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1254 
1255       IRB.SetInsertPoint(CheckTerm);
1256       insertWarningFn(IRB, Origin);
1257       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1258     }
1259   }
1260 
1261   void materializeChecks(bool InstrumentWithCalls) {
1262     for (const auto &ShadowData : InstrumentationList) {
1263       Instruction *OrigIns = ShadowData.OrigIns;
1264       Value *Shadow = ShadowData.Shadow;
1265       Value *Origin = ShadowData.Origin;
1266       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
1267     }
1268     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1269   }
1270 
1271   BasicBlock *insertKmsanPrologue(Function &F) {
1272     BasicBlock *ret =
1273         SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
1274     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
1275     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1276     Constant *Zero = IRB.getInt32(0);
1277     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1278                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1279     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1280                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1281     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1282                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1283     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1284                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1285     MS.VAArgOverflowSizeTLS =
1286         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1287                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1288     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1289                                       {Zero, IRB.getInt32(5)}, "param_origin");
1290     MS.RetvalOriginTLS =
1291         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1292                       {Zero, IRB.getInt32(6)}, "retval_origin");
1293     return ret;
1294   }
1295 
1296   /// Add MemorySanitizer instrumentation to a function.
1297   bool runOnFunction() {
1298     // In the presence of unreachable blocks, we may see Phi nodes with
1299     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1300     // blocks, such nodes will not have any shadow value associated with them.
1301     // It's easier to remove unreachable blocks than deal with missing shadow.
1302     removeUnreachableBlocks(F);
1303 
1304     // Iterate all BBs in depth-first order and create shadow instructions
1305     // for all instructions (where applicable).
1306     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1307     for (BasicBlock *BB : depth_first(ActualFnStart))
1308       visit(*BB);
1309 
1310     // Finalize PHI nodes.
1311     for (PHINode *PN : ShadowPHINodes) {
1312       PHINode *PNS = cast<PHINode>(getShadow(PN));
1313       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1314       size_t NumValues = PN->getNumIncomingValues();
1315       for (size_t v = 0; v < NumValues; v++) {
1316         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1317         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1318       }
1319     }
1320 
1321     VAHelper->finalizeInstrumentation();
1322 
1323     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1324     // instrumenting only allocas.
1325     if (InstrumentLifetimeStart) {
1326       for (auto Item : LifetimeStartList) {
1327         instrumentAlloca(*Item.second, Item.first);
1328         AllocaSet.erase(Item.second);
1329       }
1330     }
1331     // Poison the allocas for which we didn't instrument the corresponding
1332     // lifetime intrinsics.
1333     for (AllocaInst *AI : AllocaSet)
1334       instrumentAlloca(*AI);
1335 
1336     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
1337                                InstrumentationList.size() + StoreList.size() >
1338                                    (unsigned)ClInstrumentationWithCallThreshold;
1339 
1340     // Insert shadow value checks.
1341     materializeChecks(InstrumentWithCalls);
1342 
1343     // Delayed instrumentation of StoreInst.
1344     // This may not add new address checks.
1345     materializeStores(InstrumentWithCalls);
1346 
1347     return true;
1348   }
1349 
1350   /// Compute the shadow type that corresponds to a given Value.
1351   Type *getShadowTy(Value *V) {
1352     return getShadowTy(V->getType());
1353   }
1354 
1355   /// Compute the shadow type that corresponds to a given Type.
1356   Type *getShadowTy(Type *OrigTy) {
1357     if (!OrigTy->isSized()) {
1358       return nullptr;
1359     }
1360     // For integer type, shadow is the same as the original type.
1361     // This may return weird-sized types like i1.
1362     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1363       return IT;
1364     const DataLayout &DL = F.getParent()->getDataLayout();
1365     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1366       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1367       return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
1368                                   cast<FixedVectorType>(VT)->getNumElements());
1369     }
1370     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1371       return ArrayType::get(getShadowTy(AT->getElementType()),
1372                             AT->getNumElements());
1373     }
1374     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1375       SmallVector<Type*, 4> Elements;
1376       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1377         Elements.push_back(getShadowTy(ST->getElementType(i)));
1378       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1379       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1380       return Res;
1381     }
1382     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1383     return IntegerType::get(*MS.C, TypeSize);
1384   }
1385 
1386   /// Flatten a vector type.
1387   Type *getShadowTyNoVec(Type *ty) {
1388     if (VectorType *vt = dyn_cast<VectorType>(ty))
1389       return IntegerType::get(*MS.C,
1390                               vt->getPrimitiveSizeInBits().getFixedSize());
1391     return ty;
1392   }
1393 
1394   /// Convert a shadow value to it's flattened variant.
1395   Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
1396     Type *Ty = V->getType();
1397     Type *NoVecTy = getShadowTyNoVec(Ty);
1398     if (Ty == NoVecTy) return V;
1399     return IRB.CreateBitCast(V, NoVecTy);
1400   }
1401 
1402   /// Compute the integer shadow offset that corresponds to a given
1403   /// application address.
1404   ///
1405   /// Offset = (Addr & ~AndMask) ^ XorMask
1406   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1407     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1408 
1409     uint64_t AndMask = MS.MapParams->AndMask;
1410     if (AndMask)
1411       OffsetLong =
1412           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1413 
1414     uint64_t XorMask = MS.MapParams->XorMask;
1415     if (XorMask)
1416       OffsetLong =
1417           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1418     return OffsetLong;
1419   }
1420 
1421   /// Compute the shadow and origin addresses corresponding to a given
1422   /// application address.
1423   ///
1424   /// Shadow = ShadowBase + Offset
1425   /// Origin = (OriginBase + Offset) & ~3ULL
1426   std::pair<Value *, Value *>
1427   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1428                               MaybeAlign Alignment) {
1429     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1430     Value *ShadowLong = ShadowOffset;
1431     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1432     if (ShadowBase != 0) {
1433       ShadowLong =
1434         IRB.CreateAdd(ShadowLong,
1435                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1436     }
1437     Value *ShadowPtr =
1438         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1439     Value *OriginPtr = nullptr;
1440     if (MS.TrackOrigins) {
1441       Value *OriginLong = ShadowOffset;
1442       uint64_t OriginBase = MS.MapParams->OriginBase;
1443       if (OriginBase != 0)
1444         OriginLong = IRB.CreateAdd(OriginLong,
1445                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1446       if (!Alignment || *Alignment < kMinOriginAlignment) {
1447         uint64_t Mask = kMinOriginAlignment.value() - 1;
1448         OriginLong =
1449             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1450       }
1451       OriginPtr =
1452           IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
1453     }
1454     return std::make_pair(ShadowPtr, OriginPtr);
1455   }
1456 
1457   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1458                                                        IRBuilder<> &IRB,
1459                                                        Type *ShadowTy,
1460                                                        bool isStore) {
1461     Value *ShadowOriginPtrs;
1462     const DataLayout &DL = F.getParent()->getDataLayout();
1463     int Size = DL.getTypeStoreSize(ShadowTy);
1464 
1465     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1466     Value *AddrCast =
1467         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1468     if (Getter) {
1469       ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
1470     } else {
1471       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1472       ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
1473                                                 : MS.MsanMetadataPtrForLoadN,
1474                                         {AddrCast, SizeVal});
1475     }
1476     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1477     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1478     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1479 
1480     return std::make_pair(ShadowPtr, OriginPtr);
1481   }
1482 
1483   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1484                                                  Type *ShadowTy,
1485                                                  MaybeAlign Alignment,
1486                                                  bool isStore) {
1487     if (MS.CompileKernel)
1488       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1489     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1490   }
1491 
1492   /// Compute the shadow address for a given function argument.
1493   ///
1494   /// Shadow = ParamTLS+ArgOffset.
1495   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1496                                  int ArgOffset) {
1497     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1498     if (ArgOffset)
1499       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1500     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1501                               "_msarg");
1502   }
1503 
1504   /// Compute the origin address for a given function argument.
1505   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1506                                  int ArgOffset) {
1507     if (!MS.TrackOrigins)
1508       return nullptr;
1509     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1510     if (ArgOffset)
1511       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1512     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1513                               "_msarg_o");
1514   }
1515 
1516   /// Compute the shadow address for a retval.
1517   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1518     return IRB.CreatePointerCast(MS.RetvalTLS,
1519                                  PointerType::get(getShadowTy(A), 0),
1520                                  "_msret");
1521   }
1522 
1523   /// Compute the origin address for a retval.
1524   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1525     // We keep a single origin for the entire retval. Might be too optimistic.
1526     return MS.RetvalOriginTLS;
1527   }
1528 
1529   /// Set SV to be the shadow value for V.
1530   void setShadow(Value *V, Value *SV) {
1531     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1532     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1533   }
1534 
1535   /// Set Origin to be the origin value for V.
1536   void setOrigin(Value *V, Value *Origin) {
1537     if (!MS.TrackOrigins) return;
1538     assert(!OriginMap.count(V) && "Values may only have one origin");
1539     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1540     OriginMap[V] = Origin;
1541   }
1542 
1543   Constant *getCleanShadow(Type *OrigTy) {
1544     Type *ShadowTy = getShadowTy(OrigTy);
1545     if (!ShadowTy)
1546       return nullptr;
1547     return Constant::getNullValue(ShadowTy);
1548   }
1549 
1550   /// Create a clean shadow value for a given value.
1551   ///
1552   /// Clean shadow (all zeroes) means all bits of the value are defined
1553   /// (initialized).
1554   Constant *getCleanShadow(Value *V) {
1555     return getCleanShadow(V->getType());
1556   }
1557 
1558   /// Create a dirty shadow of a given shadow type.
1559   Constant *getPoisonedShadow(Type *ShadowTy) {
1560     assert(ShadowTy);
1561     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1562       return Constant::getAllOnesValue(ShadowTy);
1563     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1564       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1565                                       getPoisonedShadow(AT->getElementType()));
1566       return ConstantArray::get(AT, Vals);
1567     }
1568     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1569       SmallVector<Constant *, 4> Vals;
1570       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1571         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1572       return ConstantStruct::get(ST, Vals);
1573     }
1574     llvm_unreachable("Unexpected shadow type");
1575   }
1576 
1577   /// Create a dirty shadow for a given value.
1578   Constant *getPoisonedShadow(Value *V) {
1579     Type *ShadowTy = getShadowTy(V);
1580     if (!ShadowTy)
1581       return nullptr;
1582     return getPoisonedShadow(ShadowTy);
1583   }
1584 
1585   /// Create a clean (zero) origin.
1586   Value *getCleanOrigin() {
1587     return Constant::getNullValue(MS.OriginTy);
1588   }
1589 
1590   /// Get the shadow value for a given Value.
1591   ///
1592   /// This function either returns the value set earlier with setShadow,
1593   /// or extracts if from ParamTLS (for function arguments).
1594   Value *getShadow(Value *V) {
1595     if (!PropagateShadow) return getCleanShadow(V);
1596     if (Instruction *I = dyn_cast<Instruction>(V)) {
1597       if (I->getMetadata("nosanitize"))
1598         return getCleanShadow(V);
1599       // For instructions the shadow is already stored in the map.
1600       Value *Shadow = ShadowMap[V];
1601       if (!Shadow) {
1602         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1603         (void)I;
1604         assert(Shadow && "No shadow for a value");
1605       }
1606       return Shadow;
1607     }
1608     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1609       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1610       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1611       (void)U;
1612       return AllOnes;
1613     }
1614     if (Argument *A = dyn_cast<Argument>(V)) {
1615       // For arguments we compute the shadow on demand and store it in the map.
1616       Value **ShadowPtr = &ShadowMap[V];
1617       if (*ShadowPtr)
1618         return *ShadowPtr;
1619       Function *F = A->getParent();
1620       IRBuilder<> EntryIRB(ActualFnStart->getFirstNonPHI());
1621       unsigned ArgOffset = 0;
1622       const DataLayout &DL = F->getParent()->getDataLayout();
1623       for (auto &FArg : F->args()) {
1624         if (!FArg.getType()->isSized()) {
1625           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1626           continue;
1627         }
1628 
1629         bool FArgByVal = FArg.hasByValAttr();
1630         bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
1631         bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef;
1632         unsigned Size =
1633             FArg.hasByValAttr()
1634                 ? DL.getTypeAllocSize(FArg.getParamByValType())
1635                 : DL.getTypeAllocSize(FArg.getType());
1636 
1637         if (A == &FArg) {
1638           bool Overflow = ArgOffset + Size > kParamTLSSize;
1639           if (FArgEagerCheck) {
1640             *ShadowPtr = getCleanShadow(V);
1641             setOrigin(A, getCleanOrigin());
1642             continue;
1643           } else if (FArgByVal) {
1644             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1645             // ByVal pointer itself has clean shadow. We copy the actual
1646             // argument shadow to the underlying memory.
1647             // Figure out maximal valid memcpy alignment.
1648             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1649                 MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
1650             Value *CpShadowPtr =
1651                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1652                                    /*isStore*/ true)
1653                     .first;
1654             // TODO(glider): need to copy origins.
1655             if (Overflow) {
1656               // ParamTLS overflow.
1657               EntryIRB.CreateMemSet(
1658                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1659                   Size, ArgAlign);
1660             } else {
1661               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1662               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1663                                                  CopyAlign, Size);
1664               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1665               (void)Cpy;
1666             }
1667             *ShadowPtr = getCleanShadow(V);
1668           } else {
1669             // Shadow over TLS
1670             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1671             if (Overflow) {
1672               // ParamTLS overflow.
1673               *ShadowPtr = getCleanShadow(V);
1674             } else {
1675               *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
1676                                                       kShadowTLSAlignment);
1677             }
1678           }
1679           LLVM_DEBUG(dbgs()
1680                      << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n");
1681           if (MS.TrackOrigins && !Overflow) {
1682             Value *OriginPtr =
1683                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1684             setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
1685           } else {
1686             setOrigin(A, getCleanOrigin());
1687           }
1688         }
1689 
1690         if (!FArgEagerCheck)
1691           ArgOffset += alignTo(Size, kShadowTLSAlignment);
1692       }
1693       assert(*ShadowPtr && "Could not find shadow for an argument");
1694       return *ShadowPtr;
1695     }
1696     // For everything else the shadow is zero.
1697     return getCleanShadow(V);
1698   }
1699 
1700   /// Get the shadow for i-th argument of the instruction I.
1701   Value *getShadow(Instruction *I, int i) {
1702     return getShadow(I->getOperand(i));
1703   }
1704 
1705   /// Get the origin for a value.
1706   Value *getOrigin(Value *V) {
1707     if (!MS.TrackOrigins) return nullptr;
1708     if (!PropagateShadow) return getCleanOrigin();
1709     if (isa<Constant>(V)) return getCleanOrigin();
1710     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1711            "Unexpected value type in getOrigin()");
1712     if (Instruction *I = dyn_cast<Instruction>(V)) {
1713       if (I->getMetadata("nosanitize"))
1714         return getCleanOrigin();
1715     }
1716     Value *Origin = OriginMap[V];
1717     assert(Origin && "Missing origin");
1718     return Origin;
1719   }
1720 
1721   /// Get the origin for i-th argument of the instruction I.
1722   Value *getOrigin(Instruction *I, int i) {
1723     return getOrigin(I->getOperand(i));
1724   }
1725 
1726   /// Remember the place where a shadow check should be inserted.
1727   ///
1728   /// This location will be later instrumented with a check that will print a
1729   /// UMR warning in runtime if the shadow value is not 0.
1730   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1731     assert(Shadow);
1732     if (!InsertChecks) return;
1733 #ifndef NDEBUG
1734     Type *ShadowTy = Shadow->getType();
1735     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
1736            "Can only insert checks for integer and vector shadow types");
1737 #endif
1738     InstrumentationList.push_back(
1739         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1740   }
1741 
1742   /// Remember the place where a shadow check should be inserted.
1743   ///
1744   /// This location will be later instrumented with a check that will print a
1745   /// UMR warning in runtime if the value is not fully defined.
1746   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1747     assert(Val);
1748     Value *Shadow, *Origin;
1749     if (ClCheckConstantShadow) {
1750       Shadow = getShadow(Val);
1751       if (!Shadow) return;
1752       Origin = getOrigin(Val);
1753     } else {
1754       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1755       if (!Shadow) return;
1756       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1757     }
1758     insertShadowCheck(Shadow, Origin, OrigIns);
1759   }
1760 
1761   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1762     switch (a) {
1763       case AtomicOrdering::NotAtomic:
1764         return AtomicOrdering::NotAtomic;
1765       case AtomicOrdering::Unordered:
1766       case AtomicOrdering::Monotonic:
1767       case AtomicOrdering::Release:
1768         return AtomicOrdering::Release;
1769       case AtomicOrdering::Acquire:
1770       case AtomicOrdering::AcquireRelease:
1771         return AtomicOrdering::AcquireRelease;
1772       case AtomicOrdering::SequentiallyConsistent:
1773         return AtomicOrdering::SequentiallyConsistent;
1774     }
1775     llvm_unreachable("Unknown ordering");
1776   }
1777 
1778   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
1779     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1780     uint32_t OrderingTable[NumOrderings] = {};
1781 
1782     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1783         OrderingTable[(int)AtomicOrderingCABI::release] =
1784             (int)AtomicOrderingCABI::release;
1785     OrderingTable[(int)AtomicOrderingCABI::consume] =
1786         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1787             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1788                 (int)AtomicOrderingCABI::acq_rel;
1789     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1790         (int)AtomicOrderingCABI::seq_cst;
1791 
1792     return ConstantDataVector::get(IRB.getContext(),
1793                                    makeArrayRef(OrderingTable, NumOrderings));
1794   }
1795 
1796   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1797     switch (a) {
1798       case AtomicOrdering::NotAtomic:
1799         return AtomicOrdering::NotAtomic;
1800       case AtomicOrdering::Unordered:
1801       case AtomicOrdering::Monotonic:
1802       case AtomicOrdering::Acquire:
1803         return AtomicOrdering::Acquire;
1804       case AtomicOrdering::Release:
1805       case AtomicOrdering::AcquireRelease:
1806         return AtomicOrdering::AcquireRelease;
1807       case AtomicOrdering::SequentiallyConsistent:
1808         return AtomicOrdering::SequentiallyConsistent;
1809     }
1810     llvm_unreachable("Unknown ordering");
1811   }
1812 
1813   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
1814     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1815     uint32_t OrderingTable[NumOrderings] = {};
1816 
1817     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1818         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1819             OrderingTable[(int)AtomicOrderingCABI::consume] =
1820                 (int)AtomicOrderingCABI::acquire;
1821     OrderingTable[(int)AtomicOrderingCABI::release] =
1822         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1823             (int)AtomicOrderingCABI::acq_rel;
1824     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1825         (int)AtomicOrderingCABI::seq_cst;
1826 
1827     return ConstantDataVector::get(IRB.getContext(),
1828                                    makeArrayRef(OrderingTable, NumOrderings));
1829   }
1830 
1831   // ------------------- Visitors.
1832   using InstVisitor<MemorySanitizerVisitor>::visit;
1833   void visit(Instruction &I) {
1834     if (!I.getMetadata("nosanitize"))
1835       InstVisitor<MemorySanitizerVisitor>::visit(I);
1836   }
1837 
1838   /// Instrument LoadInst
1839   ///
1840   /// Loads the corresponding shadow and (optionally) origin.
1841   /// Optionally, checks that the load address is fully defined.
1842   void visitLoadInst(LoadInst &I) {
1843     assert(I.getType()->isSized() && "Load type must have size");
1844     assert(!I.getMetadata("nosanitize"));
1845     IRBuilder<> IRB(I.getNextNode());
1846     Type *ShadowTy = getShadowTy(&I);
1847     Value *Addr = I.getPointerOperand();
1848     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
1849     const Align Alignment = assumeAligned(I.getAlignment());
1850     if (PropagateShadow) {
1851       std::tie(ShadowPtr, OriginPtr) =
1852           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
1853       setShadow(&I,
1854                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
1855     } else {
1856       setShadow(&I, getCleanShadow(&I));
1857     }
1858 
1859     if (ClCheckAccessAddress)
1860       insertShadowCheck(I.getPointerOperand(), &I);
1861 
1862     if (I.isAtomic())
1863       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1864 
1865     if (MS.TrackOrigins) {
1866       if (PropagateShadow) {
1867         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1868         setOrigin(
1869             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
1870       } else {
1871         setOrigin(&I, getCleanOrigin());
1872       }
1873     }
1874   }
1875 
1876   /// Instrument StoreInst
1877   ///
1878   /// Stores the corresponding shadow and (optionally) origin.
1879   /// Optionally, checks that the store address is fully defined.
1880   void visitStoreInst(StoreInst &I) {
1881     StoreList.push_back(&I);
1882     if (ClCheckAccessAddress)
1883       insertShadowCheck(I.getPointerOperand(), &I);
1884   }
1885 
1886   void handleCASOrRMW(Instruction &I) {
1887     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1888 
1889     IRBuilder<> IRB(&I);
1890     Value *Addr = I.getOperand(0);
1891     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align(1),
1892                                           /*isStore*/ true)
1893                            .first;
1894 
1895     if (ClCheckAccessAddress)
1896       insertShadowCheck(Addr, &I);
1897 
1898     // Only test the conditional argument of cmpxchg instruction.
1899     // The other argument can potentially be uninitialized, but we can not
1900     // detect this situation reliably without possible false positives.
1901     if (isa<AtomicCmpXchgInst>(I))
1902       insertShadowCheck(I.getOperand(1), &I);
1903 
1904     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
1905 
1906     setShadow(&I, getCleanShadow(&I));
1907     setOrigin(&I, getCleanOrigin());
1908   }
1909 
1910   void visitAtomicRMWInst(AtomicRMWInst &I) {
1911     handleCASOrRMW(I);
1912     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1913   }
1914 
1915   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1916     handleCASOrRMW(I);
1917     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1918   }
1919 
1920   // Vector manipulation.
1921   void visitExtractElementInst(ExtractElementInst &I) {
1922     insertShadowCheck(I.getOperand(1), &I);
1923     IRBuilder<> IRB(&I);
1924     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1925               "_msprop"));
1926     setOrigin(&I, getOrigin(&I, 0));
1927   }
1928 
1929   void visitInsertElementInst(InsertElementInst &I) {
1930     insertShadowCheck(I.getOperand(2), &I);
1931     IRBuilder<> IRB(&I);
1932     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1933               I.getOperand(2), "_msprop"));
1934     setOriginForNaryOp(I);
1935   }
1936 
1937   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1938     IRBuilder<> IRB(&I);
1939     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1940                                           I.getShuffleMask(), "_msprop"));
1941     setOriginForNaryOp(I);
1942   }
1943 
1944   // Casts.
1945   void visitSExtInst(SExtInst &I) {
1946     IRBuilder<> IRB(&I);
1947     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
1948     setOrigin(&I, getOrigin(&I, 0));
1949   }
1950 
1951   void visitZExtInst(ZExtInst &I) {
1952     IRBuilder<> IRB(&I);
1953     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
1954     setOrigin(&I, getOrigin(&I, 0));
1955   }
1956 
1957   void visitTruncInst(TruncInst &I) {
1958     IRBuilder<> IRB(&I);
1959     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
1960     setOrigin(&I, getOrigin(&I, 0));
1961   }
1962 
1963   void visitBitCastInst(BitCastInst &I) {
1964     // Special case: if this is the bitcast (there is exactly 1 allowed) between
1965     // a musttail call and a ret, don't instrument. New instructions are not
1966     // allowed after a musttail call.
1967     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
1968       if (CI->isMustTailCall())
1969         return;
1970     IRBuilder<> IRB(&I);
1971     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
1972     setOrigin(&I, getOrigin(&I, 0));
1973   }
1974 
1975   void visitPtrToIntInst(PtrToIntInst &I) {
1976     IRBuilder<> IRB(&I);
1977     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1978              "_msprop_ptrtoint"));
1979     setOrigin(&I, getOrigin(&I, 0));
1980   }
1981 
1982   void visitIntToPtrInst(IntToPtrInst &I) {
1983     IRBuilder<> IRB(&I);
1984     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1985              "_msprop_inttoptr"));
1986     setOrigin(&I, getOrigin(&I, 0));
1987   }
1988 
1989   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
1990   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
1991   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
1992   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
1993   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
1994   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
1995 
1996   /// Propagate shadow for bitwise AND.
1997   ///
1998   /// This code is exact, i.e. if, for example, a bit in the left argument
1999   /// is defined and 0, then neither the value not definedness of the
2000   /// corresponding bit in B don't affect the resulting shadow.
2001   void visitAnd(BinaryOperator &I) {
2002     IRBuilder<> IRB(&I);
2003     //  "And" of 0 and a poisoned value results in unpoisoned value.
2004     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2005     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2006     //  1&p => p;     0&p => 0;     p&p => p;
2007     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2008     Value *S1 = getShadow(&I, 0);
2009     Value *S2 = getShadow(&I, 1);
2010     Value *V1 = I.getOperand(0);
2011     Value *V2 = I.getOperand(1);
2012     if (V1->getType() != S1->getType()) {
2013       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2014       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2015     }
2016     Value *S1S2 = IRB.CreateAnd(S1, S2);
2017     Value *V1S2 = IRB.CreateAnd(V1, S2);
2018     Value *S1V2 = IRB.CreateAnd(S1, V2);
2019     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2020     setOriginForNaryOp(I);
2021   }
2022 
2023   void visitOr(BinaryOperator &I) {
2024     IRBuilder<> IRB(&I);
2025     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2026     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2027     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2028     //  1|p => 1;     0|p => p;     p|p => p;
2029     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2030     Value *S1 = getShadow(&I, 0);
2031     Value *S2 = getShadow(&I, 1);
2032     Value *V1 = IRB.CreateNot(I.getOperand(0));
2033     Value *V2 = IRB.CreateNot(I.getOperand(1));
2034     if (V1->getType() != S1->getType()) {
2035       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2036       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2037     }
2038     Value *S1S2 = IRB.CreateAnd(S1, S2);
2039     Value *V1S2 = IRB.CreateAnd(V1, S2);
2040     Value *S1V2 = IRB.CreateAnd(S1, V2);
2041     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2042     setOriginForNaryOp(I);
2043   }
2044 
2045   /// Default propagation of shadow and/or origin.
2046   ///
2047   /// This class implements the general case of shadow propagation, used in all
2048   /// cases where we don't know and/or don't care about what the operation
2049   /// actually does. It converts all input shadow values to a common type
2050   /// (extending or truncating as necessary), and bitwise OR's them.
2051   ///
2052   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2053   /// fully initialized), and less prone to false positives.
2054   ///
2055   /// This class also implements the general case of origin propagation. For a
2056   /// Nary operation, result origin is set to the origin of an argument that is
2057   /// not entirely initialized. If there is more than one such arguments, the
2058   /// rightmost of them is picked. It does not matter which one is picked if all
2059   /// arguments are initialized.
2060   template <bool CombineShadow>
2061   class Combiner {
2062     Value *Shadow = nullptr;
2063     Value *Origin = nullptr;
2064     IRBuilder<> &IRB;
2065     MemorySanitizerVisitor *MSV;
2066 
2067   public:
2068     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2069         : IRB(IRB), MSV(MSV) {}
2070 
2071     /// Add a pair of shadow and origin values to the mix.
2072     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2073       if (CombineShadow) {
2074         assert(OpShadow);
2075         if (!Shadow)
2076           Shadow = OpShadow;
2077         else {
2078           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2079           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2080         }
2081       }
2082 
2083       if (MSV->MS.TrackOrigins) {
2084         assert(OpOrigin);
2085         if (!Origin) {
2086           Origin = OpOrigin;
2087         } else {
2088           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2089           // No point in adding something that might result in 0 origin value.
2090           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2091             Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
2092             Value *Cond =
2093                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
2094             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2095           }
2096         }
2097       }
2098       return *this;
2099     }
2100 
2101     /// Add an application value to the mix.
2102     Combiner &Add(Value *V) {
2103       Value *OpShadow = MSV->getShadow(V);
2104       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2105       return Add(OpShadow, OpOrigin);
2106     }
2107 
2108     /// Set the current combined values as the given instruction's shadow
2109     /// and origin.
2110     void Done(Instruction *I) {
2111       if (CombineShadow) {
2112         assert(Shadow);
2113         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2114         MSV->setShadow(I, Shadow);
2115       }
2116       if (MSV->MS.TrackOrigins) {
2117         assert(Origin);
2118         MSV->setOrigin(I, Origin);
2119       }
2120     }
2121   };
2122 
2123   using ShadowAndOriginCombiner = Combiner<true>;
2124   using OriginCombiner = Combiner<false>;
2125 
2126   /// Propagate origin for arbitrary operation.
2127   void setOriginForNaryOp(Instruction &I) {
2128     if (!MS.TrackOrigins) return;
2129     IRBuilder<> IRB(&I);
2130     OriginCombiner OC(this, IRB);
2131     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2132       OC.Add(OI->get());
2133     OC.Done(&I);
2134   }
2135 
2136   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2137     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2138            "Vector of pointers is not a valid shadow type");
2139     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2140                                   Ty->getScalarSizeInBits()
2141                             : Ty->getPrimitiveSizeInBits();
2142   }
2143 
2144   /// Cast between two shadow types, extending or truncating as
2145   /// necessary.
2146   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2147                           bool Signed = false) {
2148     Type *srcTy = V->getType();
2149     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2150     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2151     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2152       return IRB.CreateICmpNE(V, getCleanShadow(V));
2153 
2154     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2155       return IRB.CreateIntCast(V, dstTy, Signed);
2156     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2157         cast<FixedVectorType>(dstTy)->getNumElements() ==
2158             cast<FixedVectorType>(srcTy)->getNumElements())
2159       return IRB.CreateIntCast(V, dstTy, Signed);
2160     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2161     Value *V2 =
2162       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2163     return IRB.CreateBitCast(V2, dstTy);
2164     // TODO: handle struct types.
2165   }
2166 
2167   /// Cast an application value to the type of its own shadow.
2168   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2169     Type *ShadowTy = getShadowTy(V);
2170     if (V->getType() == ShadowTy)
2171       return V;
2172     if (V->getType()->isPtrOrPtrVectorTy())
2173       return IRB.CreatePtrToInt(V, ShadowTy);
2174     else
2175       return IRB.CreateBitCast(V, ShadowTy);
2176   }
2177 
2178   /// Propagate shadow for arbitrary operation.
2179   void handleShadowOr(Instruction &I) {
2180     IRBuilder<> IRB(&I);
2181     ShadowAndOriginCombiner SC(this, IRB);
2182     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2183       SC.Add(OI->get());
2184     SC.Done(&I);
2185   }
2186 
2187   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2188 
2189   // Handle multiplication by constant.
2190   //
2191   // Handle a special case of multiplication by constant that may have one or
2192   // more zeros in the lower bits. This makes corresponding number of lower bits
2193   // of the result zero as well. We model it by shifting the other operand
2194   // shadow left by the required number of bits. Effectively, we transform
2195   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2196   // We use multiplication by 2**N instead of shift to cover the case of
2197   // multiplication by 0, which may occur in some elements of a vector operand.
2198   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2199                            Value *OtherArg) {
2200     Constant *ShadowMul;
2201     Type *Ty = ConstArg->getType();
2202     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2203       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2204       Type *EltTy = VTy->getElementType();
2205       SmallVector<Constant *, 16> Elements;
2206       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2207         if (ConstantInt *Elt =
2208                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2209           const APInt &V = Elt->getValue();
2210           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2211           Elements.push_back(ConstantInt::get(EltTy, V2));
2212         } else {
2213           Elements.push_back(ConstantInt::get(EltTy, 1));
2214         }
2215       }
2216       ShadowMul = ConstantVector::get(Elements);
2217     } else {
2218       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2219         const APInt &V = Elt->getValue();
2220         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2221         ShadowMul = ConstantInt::get(Ty, V2);
2222       } else {
2223         ShadowMul = ConstantInt::get(Ty, 1);
2224       }
2225     }
2226 
2227     IRBuilder<> IRB(&I);
2228     setShadow(&I,
2229               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2230     setOrigin(&I, getOrigin(OtherArg));
2231   }
2232 
2233   void visitMul(BinaryOperator &I) {
2234     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2235     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2236     if (constOp0 && !constOp1)
2237       handleMulByConstant(I, constOp0, I.getOperand(1));
2238     else if (constOp1 && !constOp0)
2239       handleMulByConstant(I, constOp1, I.getOperand(0));
2240     else
2241       handleShadowOr(I);
2242   }
2243 
2244   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2245   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2246   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2247   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2248   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2249   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2250 
2251   void handleIntegerDiv(Instruction &I) {
2252     IRBuilder<> IRB(&I);
2253     // Strict on the second argument.
2254     insertShadowCheck(I.getOperand(1), &I);
2255     setShadow(&I, getShadow(&I, 0));
2256     setOrigin(&I, getOrigin(&I, 0));
2257   }
2258 
2259   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2260   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2261   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2262   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2263 
2264   // Floating point division is side-effect free. We can not require that the
2265   // divisor is fully initialized and must propagate shadow. See PR37523.
2266   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2267   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2268 
2269   /// Instrument == and != comparisons.
2270   ///
2271   /// Sometimes the comparison result is known even if some of the bits of the
2272   /// arguments are not.
2273   void handleEqualityComparison(ICmpInst &I) {
2274     IRBuilder<> IRB(&I);
2275     Value *A = I.getOperand(0);
2276     Value *B = I.getOperand(1);
2277     Value *Sa = getShadow(A);
2278     Value *Sb = getShadow(B);
2279 
2280     // Get rid of pointers and vectors of pointers.
2281     // For ints (and vectors of ints), types of A and Sa match,
2282     // and this is a no-op.
2283     A = IRB.CreatePointerCast(A, Sa->getType());
2284     B = IRB.CreatePointerCast(B, Sb->getType());
2285 
2286     // A == B  <==>  (C = A^B) == 0
2287     // A != B  <==>  (C = A^B) != 0
2288     // Sc = Sa | Sb
2289     Value *C = IRB.CreateXor(A, B);
2290     Value *Sc = IRB.CreateOr(Sa, Sb);
2291     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2292     // Result is defined if one of the following is true
2293     // * there is a defined 1 bit in C
2294     // * C is fully defined
2295     // Si = !(C & ~Sc) && Sc
2296     Value *Zero = Constant::getNullValue(Sc->getType());
2297     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2298     Value *Si =
2299       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
2300                     IRB.CreateICmpEQ(
2301                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
2302     Si->setName("_msprop_icmp");
2303     setShadow(&I, Si);
2304     setOriginForNaryOp(I);
2305   }
2306 
2307   /// Build the lowest possible value of V, taking into account V's
2308   ///        uninitialized bits.
2309   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2310                                 bool isSigned) {
2311     if (isSigned) {
2312       // Split shadow into sign bit and other bits.
2313       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2314       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2315       // Maximise the undefined shadow bit, minimize other undefined bits.
2316       return
2317         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
2318     } else {
2319       // Minimize undefined bits.
2320       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2321     }
2322   }
2323 
2324   /// Build the highest possible value of V, taking into account V's
2325   ///        uninitialized bits.
2326   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2327                                 bool isSigned) {
2328     if (isSigned) {
2329       // Split shadow into sign bit and other bits.
2330       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2331       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2332       // Minimise the undefined shadow bit, maximise other undefined bits.
2333       return
2334         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
2335     } else {
2336       // Maximize undefined bits.
2337       return IRB.CreateOr(A, Sa);
2338     }
2339   }
2340 
2341   /// Instrument relational comparisons.
2342   ///
2343   /// This function does exact shadow propagation for all relational
2344   /// comparisons of integers, pointers and vectors of those.
2345   /// FIXME: output seems suboptimal when one of the operands is a constant
2346   void handleRelationalComparisonExact(ICmpInst &I) {
2347     IRBuilder<> IRB(&I);
2348     Value *A = I.getOperand(0);
2349     Value *B = I.getOperand(1);
2350     Value *Sa = getShadow(A);
2351     Value *Sb = getShadow(B);
2352 
2353     // Get rid of pointers and vectors of pointers.
2354     // For ints (and vectors of ints), types of A and Sa match,
2355     // and this is a no-op.
2356     A = IRB.CreatePointerCast(A, Sa->getType());
2357     B = IRB.CreatePointerCast(B, Sb->getType());
2358 
2359     // Let [a0, a1] be the interval of possible values of A, taking into account
2360     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2361     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2362     bool IsSigned = I.isSigned();
2363     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2364                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2365                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2366     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2367                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2368                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2369     Value *Si = IRB.CreateXor(S1, S2);
2370     setShadow(&I, Si);
2371     setOriginForNaryOp(I);
2372   }
2373 
2374   /// Instrument signed relational comparisons.
2375   ///
2376   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2377   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2378   void handleSignedRelationalComparison(ICmpInst &I) {
2379     Constant *constOp;
2380     Value *op = nullptr;
2381     CmpInst::Predicate pre;
2382     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2383       op = I.getOperand(0);
2384       pre = I.getPredicate();
2385     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2386       op = I.getOperand(1);
2387       pre = I.getSwappedPredicate();
2388     } else {
2389       handleShadowOr(I);
2390       return;
2391     }
2392 
2393     if ((constOp->isNullValue() &&
2394          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2395         (constOp->isAllOnesValue() &&
2396          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2397       IRBuilder<> IRB(&I);
2398       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2399                                         "_msprop_icmp_s");
2400       setShadow(&I, Shadow);
2401       setOrigin(&I, getOrigin(op));
2402     } else {
2403       handleShadowOr(I);
2404     }
2405   }
2406 
2407   void visitICmpInst(ICmpInst &I) {
2408     if (!ClHandleICmp) {
2409       handleShadowOr(I);
2410       return;
2411     }
2412     if (I.isEquality()) {
2413       handleEqualityComparison(I);
2414       return;
2415     }
2416 
2417     assert(I.isRelational());
2418     if (ClHandleICmpExact) {
2419       handleRelationalComparisonExact(I);
2420       return;
2421     }
2422     if (I.isSigned()) {
2423       handleSignedRelationalComparison(I);
2424       return;
2425     }
2426 
2427     assert(I.isUnsigned());
2428     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2429       handleRelationalComparisonExact(I);
2430       return;
2431     }
2432 
2433     handleShadowOr(I);
2434   }
2435 
2436   void visitFCmpInst(FCmpInst &I) {
2437     handleShadowOr(I);
2438   }
2439 
2440   void handleShift(BinaryOperator &I) {
2441     IRBuilder<> IRB(&I);
2442     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2443     // Otherwise perform the same shift on S1.
2444     Value *S1 = getShadow(&I, 0);
2445     Value *S2 = getShadow(&I, 1);
2446     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
2447                                    S2->getType());
2448     Value *V2 = I.getOperand(1);
2449     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2450     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2451     setOriginForNaryOp(I);
2452   }
2453 
2454   void visitShl(BinaryOperator &I) { handleShift(I); }
2455   void visitAShr(BinaryOperator &I) { handleShift(I); }
2456   void visitLShr(BinaryOperator &I) { handleShift(I); }
2457 
2458   /// Instrument llvm.memmove
2459   ///
2460   /// At this point we don't know if llvm.memmove will be inlined or not.
2461   /// If we don't instrument it and it gets inlined,
2462   /// our interceptor will not kick in and we will lose the memmove.
2463   /// If we instrument the call here, but it does not get inlined,
2464   /// we will memove the shadow twice: which is bad in case
2465   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2466   ///
2467   /// Similar situation exists for memcpy and memset.
2468   void visitMemMoveInst(MemMoveInst &I) {
2469     IRBuilder<> IRB(&I);
2470     IRB.CreateCall(
2471         MS.MemmoveFn,
2472         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2473          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2474          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2475     I.eraseFromParent();
2476   }
2477 
2478   // Similar to memmove: avoid copying shadow twice.
2479   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2480   // FIXME: consider doing manual inline for small constant sizes and proper
2481   // alignment.
2482   void visitMemCpyInst(MemCpyInst &I) {
2483     IRBuilder<> IRB(&I);
2484     IRB.CreateCall(
2485         MS.MemcpyFn,
2486         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2487          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2488          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2489     I.eraseFromParent();
2490   }
2491 
2492   // Same as memcpy.
2493   void visitMemSetInst(MemSetInst &I) {
2494     IRBuilder<> IRB(&I);
2495     IRB.CreateCall(
2496         MS.MemsetFn,
2497         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2498          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2499          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2500     I.eraseFromParent();
2501   }
2502 
2503   void visitVAStartInst(VAStartInst &I) {
2504     VAHelper->visitVAStartInst(I);
2505   }
2506 
2507   void visitVACopyInst(VACopyInst &I) {
2508     VAHelper->visitVACopyInst(I);
2509   }
2510 
2511   /// Handle vector store-like intrinsics.
2512   ///
2513   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2514   /// has 1 pointer argument and 1 vector argument, returns void.
2515   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2516     IRBuilder<> IRB(&I);
2517     Value* Addr = I.getArgOperand(0);
2518     Value *Shadow = getShadow(&I, 1);
2519     Value *ShadowPtr, *OriginPtr;
2520 
2521     // We don't know the pointer alignment (could be unaligned SSE store!).
2522     // Have to assume to worst case.
2523     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2524         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2525     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2526 
2527     if (ClCheckAccessAddress)
2528       insertShadowCheck(Addr, &I);
2529 
2530     // FIXME: factor out common code from materializeStores
2531     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2532     return true;
2533   }
2534 
2535   /// Handle vector load-like intrinsics.
2536   ///
2537   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2538   /// has 1 pointer argument, returns a vector.
2539   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2540     IRBuilder<> IRB(&I);
2541     Value *Addr = I.getArgOperand(0);
2542 
2543     Type *ShadowTy = getShadowTy(&I);
2544     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2545     if (PropagateShadow) {
2546       // We don't know the pointer alignment (could be unaligned SSE load!).
2547       // Have to assume to worst case.
2548       const Align Alignment = Align(1);
2549       std::tie(ShadowPtr, OriginPtr) =
2550           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2551       setShadow(&I,
2552                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2553     } else {
2554       setShadow(&I, getCleanShadow(&I));
2555     }
2556 
2557     if (ClCheckAccessAddress)
2558       insertShadowCheck(Addr, &I);
2559 
2560     if (MS.TrackOrigins) {
2561       if (PropagateShadow)
2562         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2563       else
2564         setOrigin(&I, getCleanOrigin());
2565     }
2566     return true;
2567   }
2568 
2569   /// Handle (SIMD arithmetic)-like intrinsics.
2570   ///
2571   /// Instrument intrinsics with any number of arguments of the same type,
2572   /// equal to the return type. The type should be simple (no aggregates or
2573   /// pointers; vectors are fine).
2574   /// Caller guarantees that this intrinsic does not access memory.
2575   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2576     Type *RetTy = I.getType();
2577     if (!(RetTy->isIntOrIntVectorTy() ||
2578           RetTy->isFPOrFPVectorTy() ||
2579           RetTy->isX86_MMXTy()))
2580       return false;
2581 
2582     unsigned NumArgOperands = I.getNumArgOperands();
2583 
2584     for (unsigned i = 0; i < NumArgOperands; ++i) {
2585       Type *Ty = I.getArgOperand(i)->getType();
2586       if (Ty != RetTy)
2587         return false;
2588     }
2589 
2590     IRBuilder<> IRB(&I);
2591     ShadowAndOriginCombiner SC(this, IRB);
2592     for (unsigned i = 0; i < NumArgOperands; ++i)
2593       SC.Add(I.getArgOperand(i));
2594     SC.Done(&I);
2595 
2596     return true;
2597   }
2598 
2599   /// Heuristically instrument unknown intrinsics.
2600   ///
2601   /// The main purpose of this code is to do something reasonable with all
2602   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2603   /// We recognize several classes of intrinsics by their argument types and
2604   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2605   /// sure that we know what the intrinsic does.
2606   ///
2607   /// We special-case intrinsics where this approach fails. See llvm.bswap
2608   /// handling as an example of that.
2609   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2610     unsigned NumArgOperands = I.getNumArgOperands();
2611     if (NumArgOperands == 0)
2612       return false;
2613 
2614     if (NumArgOperands == 2 &&
2615         I.getArgOperand(0)->getType()->isPointerTy() &&
2616         I.getArgOperand(1)->getType()->isVectorTy() &&
2617         I.getType()->isVoidTy() &&
2618         !I.onlyReadsMemory()) {
2619       // This looks like a vector store.
2620       return handleVectorStoreIntrinsic(I);
2621     }
2622 
2623     if (NumArgOperands == 1 &&
2624         I.getArgOperand(0)->getType()->isPointerTy() &&
2625         I.getType()->isVectorTy() &&
2626         I.onlyReadsMemory()) {
2627       // This looks like a vector load.
2628       return handleVectorLoadIntrinsic(I);
2629     }
2630 
2631     if (I.doesNotAccessMemory())
2632       if (maybeHandleSimpleNomemIntrinsic(I))
2633         return true;
2634 
2635     // FIXME: detect and handle SSE maskstore/maskload
2636     return false;
2637   }
2638 
2639   void handleInvariantGroup(IntrinsicInst &I) {
2640     setShadow(&I, getShadow(&I, 0));
2641     setOrigin(&I, getOrigin(&I, 0));
2642   }
2643 
2644   void handleLifetimeStart(IntrinsicInst &I) {
2645     if (!PoisonStack)
2646       return;
2647     DenseMap<Value *, AllocaInst *> AllocaForValue;
2648     AllocaInst *AI =
2649         llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
2650     if (!AI)
2651       InstrumentLifetimeStart = false;
2652     LifetimeStartList.push_back(std::make_pair(&I, AI));
2653   }
2654 
2655   void handleBswap(IntrinsicInst &I) {
2656     IRBuilder<> IRB(&I);
2657     Value *Op = I.getArgOperand(0);
2658     Type *OpType = Op->getType();
2659     Function *BswapFunc = Intrinsic::getDeclaration(
2660       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2661     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2662     setOrigin(&I, getOrigin(Op));
2663   }
2664 
2665   // Instrument vector convert intrinsic.
2666   //
2667   // This function instruments intrinsics like cvtsi2ss:
2668   // %Out = int_xxx_cvtyyy(%ConvertOp)
2669   // or
2670   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2671   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2672   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2673   // elements from \p CopyOp.
2674   // In most cases conversion involves floating-point value which may trigger a
2675   // hardware exception when not fully initialized. For this reason we require
2676   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2677   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2678   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2679   // return a fully initialized value.
2680   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
2681     IRBuilder<> IRB(&I);
2682     Value *CopyOp, *ConvertOp;
2683 
2684     switch (I.getNumArgOperands()) {
2685     case 3:
2686       assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
2687       LLVM_FALLTHROUGH;
2688     case 2:
2689       CopyOp = I.getArgOperand(0);
2690       ConvertOp = I.getArgOperand(1);
2691       break;
2692     case 1:
2693       ConvertOp = I.getArgOperand(0);
2694       CopyOp = nullptr;
2695       break;
2696     default:
2697       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2698     }
2699 
2700     // The first *NumUsedElements* elements of ConvertOp are converted to the
2701     // same number of output elements. The rest of the output is copied from
2702     // CopyOp, or (if not available) filled with zeroes.
2703     // Combine shadow for elements of ConvertOp that are used in this operation,
2704     // and insert a check.
2705     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2706     // int->any conversion.
2707     Value *ConvertShadow = getShadow(ConvertOp);
2708     Value *AggShadow = nullptr;
2709     if (ConvertOp->getType()->isVectorTy()) {
2710       AggShadow = IRB.CreateExtractElement(
2711           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2712       for (int i = 1; i < NumUsedElements; ++i) {
2713         Value *MoreShadow = IRB.CreateExtractElement(
2714             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2715         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2716       }
2717     } else {
2718       AggShadow = ConvertShadow;
2719     }
2720     assert(AggShadow->getType()->isIntegerTy());
2721     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2722 
2723     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2724     // ConvertOp.
2725     if (CopyOp) {
2726       assert(CopyOp->getType() == I.getType());
2727       assert(CopyOp->getType()->isVectorTy());
2728       Value *ResultShadow = getShadow(CopyOp);
2729       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
2730       for (int i = 0; i < NumUsedElements; ++i) {
2731         ResultShadow = IRB.CreateInsertElement(
2732             ResultShadow, ConstantInt::getNullValue(EltTy),
2733             ConstantInt::get(IRB.getInt32Ty(), i));
2734       }
2735       setShadow(&I, ResultShadow);
2736       setOrigin(&I, getOrigin(CopyOp));
2737     } else {
2738       setShadow(&I, getCleanShadow(&I));
2739       setOrigin(&I, getCleanOrigin());
2740     }
2741   }
2742 
2743   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2744   // zeroes if it is zero, and all ones otherwise.
2745   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2746     if (S->getType()->isVectorTy())
2747       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2748     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2749     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2750     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2751   }
2752 
2753   // Given a vector, extract its first element, and return all
2754   // zeroes if it is zero, and all ones otherwise.
2755   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2756     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2757     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2758     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2759   }
2760 
2761   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2762     Type *T = S->getType();
2763     assert(T->isVectorTy());
2764     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2765     return IRB.CreateSExt(S2, T);
2766   }
2767 
2768   // Instrument vector shift intrinsic.
2769   //
2770   // This function instruments intrinsics like int_x86_avx2_psll_w.
2771   // Intrinsic shifts %In by %ShiftSize bits.
2772   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2773   // size, and the rest is ignored. Behavior is defined even if shift size is
2774   // greater than register (or field) width.
2775   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2776     assert(I.getNumArgOperands() == 2);
2777     IRBuilder<> IRB(&I);
2778     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2779     // Otherwise perform the same shift on S1.
2780     Value *S1 = getShadow(&I, 0);
2781     Value *S2 = getShadow(&I, 1);
2782     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2783                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2784     Value *V1 = I.getOperand(0);
2785     Value *V2 = I.getOperand(1);
2786     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2787                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2788     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2789     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2790     setOriginForNaryOp(I);
2791   }
2792 
2793   // Get an X86_MMX-sized vector type.
2794   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2795     const unsigned X86_MMXSizeInBits = 64;
2796     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
2797            "Illegal MMX vector element size");
2798     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2799                                 X86_MMXSizeInBits / EltSizeInBits);
2800   }
2801 
2802   // Returns a signed counterpart for an (un)signed-saturate-and-pack
2803   // intrinsic.
2804   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2805     switch (id) {
2806       case Intrinsic::x86_sse2_packsswb_128:
2807       case Intrinsic::x86_sse2_packuswb_128:
2808         return Intrinsic::x86_sse2_packsswb_128;
2809 
2810       case Intrinsic::x86_sse2_packssdw_128:
2811       case Intrinsic::x86_sse41_packusdw:
2812         return Intrinsic::x86_sse2_packssdw_128;
2813 
2814       case Intrinsic::x86_avx2_packsswb:
2815       case Intrinsic::x86_avx2_packuswb:
2816         return Intrinsic::x86_avx2_packsswb;
2817 
2818       case Intrinsic::x86_avx2_packssdw:
2819       case Intrinsic::x86_avx2_packusdw:
2820         return Intrinsic::x86_avx2_packssdw;
2821 
2822       case Intrinsic::x86_mmx_packsswb:
2823       case Intrinsic::x86_mmx_packuswb:
2824         return Intrinsic::x86_mmx_packsswb;
2825 
2826       case Intrinsic::x86_mmx_packssdw:
2827         return Intrinsic::x86_mmx_packssdw;
2828       default:
2829         llvm_unreachable("unexpected intrinsic id");
2830     }
2831   }
2832 
2833   // Instrument vector pack intrinsic.
2834   //
2835   // This function instruments intrinsics like x86_mmx_packsswb, that
2836   // packs elements of 2 input vectors into half as many bits with saturation.
2837   // Shadow is propagated with the signed variant of the same intrinsic applied
2838   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2839   // EltSizeInBits is used only for x86mmx arguments.
2840   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2841     assert(I.getNumArgOperands() == 2);
2842     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2843     IRBuilder<> IRB(&I);
2844     Value *S1 = getShadow(&I, 0);
2845     Value *S2 = getShadow(&I, 1);
2846     assert(isX86_MMX || S1->getType()->isVectorTy());
2847 
2848     // SExt and ICmpNE below must apply to individual elements of input vectors.
2849     // In case of x86mmx arguments, cast them to appropriate vector types and
2850     // back.
2851     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2852     if (isX86_MMX) {
2853       S1 = IRB.CreateBitCast(S1, T);
2854       S2 = IRB.CreateBitCast(S2, T);
2855     }
2856     Value *S1_ext = IRB.CreateSExt(
2857         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2858     Value *S2_ext = IRB.CreateSExt(
2859         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2860     if (isX86_MMX) {
2861       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2862       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2863       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2864     }
2865 
2866     Function *ShadowFn = Intrinsic::getDeclaration(
2867         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2868 
2869     Value *S =
2870         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2871     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2872     setShadow(&I, S);
2873     setOriginForNaryOp(I);
2874   }
2875 
2876   // Instrument sum-of-absolute-differences intrinsic.
2877   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2878     const unsigned SignificantBitsPerResultElement = 16;
2879     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2880     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2881     unsigned ZeroBitsPerResultElement =
2882         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2883 
2884     IRBuilder<> IRB(&I);
2885     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2886     S = IRB.CreateBitCast(S, ResTy);
2887     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2888                        ResTy);
2889     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2890     S = IRB.CreateBitCast(S, getShadowTy(&I));
2891     setShadow(&I, S);
2892     setOriginForNaryOp(I);
2893   }
2894 
2895   // Instrument multiply-add intrinsic.
2896   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2897                                   unsigned EltSizeInBits = 0) {
2898     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2899     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2900     IRBuilder<> IRB(&I);
2901     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2902     S = IRB.CreateBitCast(S, ResTy);
2903     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2904                        ResTy);
2905     S = IRB.CreateBitCast(S, getShadowTy(&I));
2906     setShadow(&I, S);
2907     setOriginForNaryOp(I);
2908   }
2909 
2910   // Instrument compare-packed intrinsic.
2911   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2912   // all-ones shadow.
2913   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2914     IRBuilder<> IRB(&I);
2915     Type *ResTy = getShadowTy(&I);
2916     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2917     Value *S = IRB.CreateSExt(
2918         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2919     setShadow(&I, S);
2920     setOriginForNaryOp(I);
2921   }
2922 
2923   // Instrument compare-scalar intrinsic.
2924   // This handles both cmp* intrinsics which return the result in the first
2925   // element of a vector, and comi* which return the result as i32.
2926   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2927     IRBuilder<> IRB(&I);
2928     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2929     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
2930     setShadow(&I, S);
2931     setOriginForNaryOp(I);
2932   }
2933 
2934   // Instrument generic vector reduction intrinsics
2935   // by ORing together all their fields.
2936   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
2937     IRBuilder<> IRB(&I);
2938     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
2939     setShadow(&I, S);
2940     setOrigin(&I, getOrigin(&I, 0));
2941   }
2942 
2943   // Instrument experimental.vector.reduce.or intrinsic.
2944   // Valid (non-poisoned) set bits in the operand pull low the
2945   // corresponding shadow bits.
2946   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
2947     IRBuilder<> IRB(&I);
2948     Value *OperandShadow = getShadow(&I, 0);
2949     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
2950     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
2951     // Bit N is clean if any field's bit N is 1 and unpoison
2952     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
2953     // Otherwise, it is clean if every field's bit N is unpoison
2954     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
2955     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
2956 
2957     setShadow(&I, S);
2958     setOrigin(&I, getOrigin(&I, 0));
2959   }
2960 
2961   // Instrument experimental.vector.reduce.or intrinsic.
2962   // Valid (non-poisoned) unset bits in the operand pull down the
2963   // corresponding shadow bits.
2964   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
2965     IRBuilder<> IRB(&I);
2966     Value *OperandShadow = getShadow(&I, 0);
2967     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
2968     // Bit N is clean if any field's bit N is 0 and unpoison
2969     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
2970     // Otherwise, it is clean if every field's bit N is unpoison
2971     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
2972     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
2973 
2974     setShadow(&I, S);
2975     setOrigin(&I, getOrigin(&I, 0));
2976   }
2977 
2978   void handleStmxcsr(IntrinsicInst &I) {
2979     IRBuilder<> IRB(&I);
2980     Value* Addr = I.getArgOperand(0);
2981     Type *Ty = IRB.getInt32Ty();
2982     Value *ShadowPtr =
2983         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
2984 
2985     IRB.CreateStore(getCleanShadow(Ty),
2986                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
2987 
2988     if (ClCheckAccessAddress)
2989       insertShadowCheck(Addr, &I);
2990   }
2991 
2992   void handleLdmxcsr(IntrinsicInst &I) {
2993     if (!InsertChecks) return;
2994 
2995     IRBuilder<> IRB(&I);
2996     Value *Addr = I.getArgOperand(0);
2997     Type *Ty = IRB.getInt32Ty();
2998     const Align Alignment = Align(1);
2999     Value *ShadowPtr, *OriginPtr;
3000     std::tie(ShadowPtr, OriginPtr) =
3001         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3002 
3003     if (ClCheckAccessAddress)
3004       insertShadowCheck(Addr, &I);
3005 
3006     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3007     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3008                                     : getCleanOrigin();
3009     insertShadowCheck(Shadow, Origin, &I);
3010   }
3011 
3012   void handleMaskedStore(IntrinsicInst &I) {
3013     IRBuilder<> IRB(&I);
3014     Value *V = I.getArgOperand(0);
3015     Value *Addr = I.getArgOperand(1);
3016     const Align Alignment(
3017         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3018     Value *Mask = I.getArgOperand(3);
3019     Value *Shadow = getShadow(V);
3020 
3021     Value *ShadowPtr;
3022     Value *OriginPtr;
3023     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3024         Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3025 
3026     if (ClCheckAccessAddress) {
3027       insertShadowCheck(Addr, &I);
3028       // Uninitialized mask is kind of like uninitialized address, but not as
3029       // scary.
3030       insertShadowCheck(Mask, &I);
3031     }
3032 
3033     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3034 
3035     if (MS.TrackOrigins) {
3036       auto &DL = F.getParent()->getDataLayout();
3037       paintOrigin(IRB, getOrigin(V), OriginPtr,
3038                   DL.getTypeStoreSize(Shadow->getType()),
3039                   std::max(Alignment, kMinOriginAlignment));
3040     }
3041   }
3042 
3043   bool handleMaskedLoad(IntrinsicInst &I) {
3044     IRBuilder<> IRB(&I);
3045     Value *Addr = I.getArgOperand(0);
3046     const Align Alignment(
3047         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3048     Value *Mask = I.getArgOperand(2);
3049     Value *PassThru = I.getArgOperand(3);
3050 
3051     Type *ShadowTy = getShadowTy(&I);
3052     Value *ShadowPtr, *OriginPtr;
3053     if (PropagateShadow) {
3054       std::tie(ShadowPtr, OriginPtr) =
3055           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3056       setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Alignment, Mask,
3057                                          getShadow(PassThru), "_msmaskedld"));
3058     } else {
3059       setShadow(&I, getCleanShadow(&I));
3060     }
3061 
3062     if (ClCheckAccessAddress) {
3063       insertShadowCheck(Addr, &I);
3064       insertShadowCheck(Mask, &I);
3065     }
3066 
3067     if (MS.TrackOrigins) {
3068       if (PropagateShadow) {
3069         // Choose between PassThru's and the loaded value's origins.
3070         Value *MaskedPassThruShadow = IRB.CreateAnd(
3071             getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3072 
3073         Value *Acc = IRB.CreateExtractElement(
3074             MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3075         for (int i = 1, N = cast<FixedVectorType>(PassThru->getType())
3076                                 ->getNumElements();
3077              i < N; ++i) {
3078           Value *More = IRB.CreateExtractElement(
3079               MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3080           Acc = IRB.CreateOr(Acc, More);
3081         }
3082 
3083         Value *Origin = IRB.CreateSelect(
3084             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
3085             getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
3086 
3087         setOrigin(&I, Origin);
3088       } else {
3089         setOrigin(&I, getCleanOrigin());
3090       }
3091     }
3092     return true;
3093   }
3094 
3095   // Instrument BMI / BMI2 intrinsics.
3096   // All of these intrinsics are Z = I(X, Y)
3097   // where the types of all operands and the result match, and are either i32 or i64.
3098   // The following instrumentation happens to work for all of them:
3099   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3100   void handleBmiIntrinsic(IntrinsicInst &I) {
3101     IRBuilder<> IRB(&I);
3102     Type *ShadowTy = getShadowTy(&I);
3103 
3104     // If any bit of the mask operand is poisoned, then the whole thing is.
3105     Value *SMask = getShadow(&I, 1);
3106     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3107                            ShadowTy);
3108     // Apply the same intrinsic to the shadow of the first operand.
3109     Value *S = IRB.CreateCall(I.getCalledFunction(),
3110                               {getShadow(&I, 0), I.getOperand(1)});
3111     S = IRB.CreateOr(SMask, S);
3112     setShadow(&I, S);
3113     setOriginForNaryOp(I);
3114   }
3115 
3116   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3117     SmallVector<int, 8> Mask;
3118     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3119       Mask.append(2, X);
3120     }
3121     return Mask;
3122   }
3123 
3124   // Instrument pclmul intrinsics.
3125   // These intrinsics operate either on odd or on even elements of the input
3126   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3127   // Replace the unused elements with copies of the used ones, ex:
3128   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3129   // or
3130   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3131   // and then apply the usual shadow combining logic.
3132   void handlePclmulIntrinsic(IntrinsicInst &I) {
3133     IRBuilder<> IRB(&I);
3134     Type *ShadowTy = getShadowTy(&I);
3135     unsigned Width =
3136         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3137     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3138            "pclmul 3rd operand must be a constant");
3139     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3140     Value *Shuf0 =
3141         IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
3142                                 getPclmulMask(Width, Imm & 0x01));
3143     Value *Shuf1 =
3144         IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
3145                                 getPclmulMask(Width, Imm & 0x10));
3146     ShadowAndOriginCombiner SOC(this, IRB);
3147     SOC.Add(Shuf0, getOrigin(&I, 0));
3148     SOC.Add(Shuf1, getOrigin(&I, 1));
3149     SOC.Done(&I);
3150   }
3151 
3152   // Instrument _mm_*_sd intrinsics
3153   void handleUnarySdIntrinsic(IntrinsicInst &I) {
3154     IRBuilder<> IRB(&I);
3155     Value *First = getShadow(&I, 0);
3156     Value *Second = getShadow(&I, 1);
3157     // High word of first operand, low word of second
3158     Value *Shadow =
3159         IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
3160 
3161     setShadow(&I, Shadow);
3162     setOriginForNaryOp(I);
3163   }
3164 
3165   void handleBinarySdIntrinsic(IntrinsicInst &I) {
3166     IRBuilder<> IRB(&I);
3167     Value *First = getShadow(&I, 0);
3168     Value *Second = getShadow(&I, 1);
3169     Value *OrShadow = IRB.CreateOr(First, Second);
3170     // High word of first operand, low word of both OR'd together
3171     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
3172                                             llvm::makeArrayRef<int>({2, 1}));
3173 
3174     setShadow(&I, Shadow);
3175     setOriginForNaryOp(I);
3176   }
3177 
3178   void visitIntrinsicInst(IntrinsicInst &I) {
3179     switch (I.getIntrinsicID()) {
3180     case Intrinsic::lifetime_start:
3181       handleLifetimeStart(I);
3182       break;
3183     case Intrinsic::launder_invariant_group:
3184     case Intrinsic::strip_invariant_group:
3185       handleInvariantGroup(I);
3186       break;
3187     case Intrinsic::bswap:
3188       handleBswap(I);
3189       break;
3190     case Intrinsic::masked_store:
3191       handleMaskedStore(I);
3192       break;
3193     case Intrinsic::masked_load:
3194       handleMaskedLoad(I);
3195       break;
3196     case Intrinsic::experimental_vector_reduce_and:
3197       handleVectorReduceAndIntrinsic(I);
3198       break;
3199     case Intrinsic::experimental_vector_reduce_or:
3200       handleVectorReduceOrIntrinsic(I);
3201       break;
3202     case Intrinsic::experimental_vector_reduce_add:
3203     case Intrinsic::experimental_vector_reduce_xor:
3204     case Intrinsic::experimental_vector_reduce_mul:
3205       handleVectorReduceIntrinsic(I);
3206       break;
3207     case Intrinsic::x86_sse_stmxcsr:
3208       handleStmxcsr(I);
3209       break;
3210     case Intrinsic::x86_sse_ldmxcsr:
3211       handleLdmxcsr(I);
3212       break;
3213     case Intrinsic::x86_avx512_vcvtsd2usi64:
3214     case Intrinsic::x86_avx512_vcvtsd2usi32:
3215     case Intrinsic::x86_avx512_vcvtss2usi64:
3216     case Intrinsic::x86_avx512_vcvtss2usi32:
3217     case Intrinsic::x86_avx512_cvttss2usi64:
3218     case Intrinsic::x86_avx512_cvttss2usi:
3219     case Intrinsic::x86_avx512_cvttsd2usi64:
3220     case Intrinsic::x86_avx512_cvttsd2usi:
3221     case Intrinsic::x86_avx512_cvtusi2ss:
3222     case Intrinsic::x86_avx512_cvtusi642sd:
3223     case Intrinsic::x86_avx512_cvtusi642ss:
3224     case Intrinsic::x86_sse2_cvtsd2si64:
3225     case Intrinsic::x86_sse2_cvtsd2si:
3226     case Intrinsic::x86_sse2_cvtsd2ss:
3227     case Intrinsic::x86_sse2_cvttsd2si64:
3228     case Intrinsic::x86_sse2_cvttsd2si:
3229     case Intrinsic::x86_sse_cvtss2si64:
3230     case Intrinsic::x86_sse_cvtss2si:
3231     case Intrinsic::x86_sse_cvttss2si64:
3232     case Intrinsic::x86_sse_cvttss2si:
3233       handleVectorConvertIntrinsic(I, 1);
3234       break;
3235     case Intrinsic::x86_sse_cvtps2pi:
3236     case Intrinsic::x86_sse_cvttps2pi:
3237       handleVectorConvertIntrinsic(I, 2);
3238       break;
3239 
3240     case Intrinsic::x86_avx512_psll_w_512:
3241     case Intrinsic::x86_avx512_psll_d_512:
3242     case Intrinsic::x86_avx512_psll_q_512:
3243     case Intrinsic::x86_avx512_pslli_w_512:
3244     case Intrinsic::x86_avx512_pslli_d_512:
3245     case Intrinsic::x86_avx512_pslli_q_512:
3246     case Intrinsic::x86_avx512_psrl_w_512:
3247     case Intrinsic::x86_avx512_psrl_d_512:
3248     case Intrinsic::x86_avx512_psrl_q_512:
3249     case Intrinsic::x86_avx512_psra_w_512:
3250     case Intrinsic::x86_avx512_psra_d_512:
3251     case Intrinsic::x86_avx512_psra_q_512:
3252     case Intrinsic::x86_avx512_psrli_w_512:
3253     case Intrinsic::x86_avx512_psrli_d_512:
3254     case Intrinsic::x86_avx512_psrli_q_512:
3255     case Intrinsic::x86_avx512_psrai_w_512:
3256     case Intrinsic::x86_avx512_psrai_d_512:
3257     case Intrinsic::x86_avx512_psrai_q_512:
3258     case Intrinsic::x86_avx512_psra_q_256:
3259     case Intrinsic::x86_avx512_psra_q_128:
3260     case Intrinsic::x86_avx512_psrai_q_256:
3261     case Intrinsic::x86_avx512_psrai_q_128:
3262     case Intrinsic::x86_avx2_psll_w:
3263     case Intrinsic::x86_avx2_psll_d:
3264     case Intrinsic::x86_avx2_psll_q:
3265     case Intrinsic::x86_avx2_pslli_w:
3266     case Intrinsic::x86_avx2_pslli_d:
3267     case Intrinsic::x86_avx2_pslli_q:
3268     case Intrinsic::x86_avx2_psrl_w:
3269     case Intrinsic::x86_avx2_psrl_d:
3270     case Intrinsic::x86_avx2_psrl_q:
3271     case Intrinsic::x86_avx2_psra_w:
3272     case Intrinsic::x86_avx2_psra_d:
3273     case Intrinsic::x86_avx2_psrli_w:
3274     case Intrinsic::x86_avx2_psrli_d:
3275     case Intrinsic::x86_avx2_psrli_q:
3276     case Intrinsic::x86_avx2_psrai_w:
3277     case Intrinsic::x86_avx2_psrai_d:
3278     case Intrinsic::x86_sse2_psll_w:
3279     case Intrinsic::x86_sse2_psll_d:
3280     case Intrinsic::x86_sse2_psll_q:
3281     case Intrinsic::x86_sse2_pslli_w:
3282     case Intrinsic::x86_sse2_pslli_d:
3283     case Intrinsic::x86_sse2_pslli_q:
3284     case Intrinsic::x86_sse2_psrl_w:
3285     case Intrinsic::x86_sse2_psrl_d:
3286     case Intrinsic::x86_sse2_psrl_q:
3287     case Intrinsic::x86_sse2_psra_w:
3288     case Intrinsic::x86_sse2_psra_d:
3289     case Intrinsic::x86_sse2_psrli_w:
3290     case Intrinsic::x86_sse2_psrli_d:
3291     case Intrinsic::x86_sse2_psrli_q:
3292     case Intrinsic::x86_sse2_psrai_w:
3293     case Intrinsic::x86_sse2_psrai_d:
3294     case Intrinsic::x86_mmx_psll_w:
3295     case Intrinsic::x86_mmx_psll_d:
3296     case Intrinsic::x86_mmx_psll_q:
3297     case Intrinsic::x86_mmx_pslli_w:
3298     case Intrinsic::x86_mmx_pslli_d:
3299     case Intrinsic::x86_mmx_pslli_q:
3300     case Intrinsic::x86_mmx_psrl_w:
3301     case Intrinsic::x86_mmx_psrl_d:
3302     case Intrinsic::x86_mmx_psrl_q:
3303     case Intrinsic::x86_mmx_psra_w:
3304     case Intrinsic::x86_mmx_psra_d:
3305     case Intrinsic::x86_mmx_psrli_w:
3306     case Intrinsic::x86_mmx_psrli_d:
3307     case Intrinsic::x86_mmx_psrli_q:
3308     case Intrinsic::x86_mmx_psrai_w:
3309     case Intrinsic::x86_mmx_psrai_d:
3310       handleVectorShiftIntrinsic(I, /* Variable */ false);
3311       break;
3312     case Intrinsic::x86_avx2_psllv_d:
3313     case Intrinsic::x86_avx2_psllv_d_256:
3314     case Intrinsic::x86_avx512_psllv_d_512:
3315     case Intrinsic::x86_avx2_psllv_q:
3316     case Intrinsic::x86_avx2_psllv_q_256:
3317     case Intrinsic::x86_avx512_psllv_q_512:
3318     case Intrinsic::x86_avx2_psrlv_d:
3319     case Intrinsic::x86_avx2_psrlv_d_256:
3320     case Intrinsic::x86_avx512_psrlv_d_512:
3321     case Intrinsic::x86_avx2_psrlv_q:
3322     case Intrinsic::x86_avx2_psrlv_q_256:
3323     case Intrinsic::x86_avx512_psrlv_q_512:
3324     case Intrinsic::x86_avx2_psrav_d:
3325     case Intrinsic::x86_avx2_psrav_d_256:
3326     case Intrinsic::x86_avx512_psrav_d_512:
3327     case Intrinsic::x86_avx512_psrav_q_128:
3328     case Intrinsic::x86_avx512_psrav_q_256:
3329     case Intrinsic::x86_avx512_psrav_q_512:
3330       handleVectorShiftIntrinsic(I, /* Variable */ true);
3331       break;
3332 
3333     case Intrinsic::x86_sse2_packsswb_128:
3334     case Intrinsic::x86_sse2_packssdw_128:
3335     case Intrinsic::x86_sse2_packuswb_128:
3336     case Intrinsic::x86_sse41_packusdw:
3337     case Intrinsic::x86_avx2_packsswb:
3338     case Intrinsic::x86_avx2_packssdw:
3339     case Intrinsic::x86_avx2_packuswb:
3340     case Intrinsic::x86_avx2_packusdw:
3341       handleVectorPackIntrinsic(I);
3342       break;
3343 
3344     case Intrinsic::x86_mmx_packsswb:
3345     case Intrinsic::x86_mmx_packuswb:
3346       handleVectorPackIntrinsic(I, 16);
3347       break;
3348 
3349     case Intrinsic::x86_mmx_packssdw:
3350       handleVectorPackIntrinsic(I, 32);
3351       break;
3352 
3353     case Intrinsic::x86_mmx_psad_bw:
3354     case Intrinsic::x86_sse2_psad_bw:
3355     case Intrinsic::x86_avx2_psad_bw:
3356       handleVectorSadIntrinsic(I);
3357       break;
3358 
3359     case Intrinsic::x86_sse2_pmadd_wd:
3360     case Intrinsic::x86_avx2_pmadd_wd:
3361     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3362     case Intrinsic::x86_avx2_pmadd_ub_sw:
3363       handleVectorPmaddIntrinsic(I);
3364       break;
3365 
3366     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3367       handleVectorPmaddIntrinsic(I, 8);
3368       break;
3369 
3370     case Intrinsic::x86_mmx_pmadd_wd:
3371       handleVectorPmaddIntrinsic(I, 16);
3372       break;
3373 
3374     case Intrinsic::x86_sse_cmp_ss:
3375     case Intrinsic::x86_sse2_cmp_sd:
3376     case Intrinsic::x86_sse_comieq_ss:
3377     case Intrinsic::x86_sse_comilt_ss:
3378     case Intrinsic::x86_sse_comile_ss:
3379     case Intrinsic::x86_sse_comigt_ss:
3380     case Intrinsic::x86_sse_comige_ss:
3381     case Intrinsic::x86_sse_comineq_ss:
3382     case Intrinsic::x86_sse_ucomieq_ss:
3383     case Intrinsic::x86_sse_ucomilt_ss:
3384     case Intrinsic::x86_sse_ucomile_ss:
3385     case Intrinsic::x86_sse_ucomigt_ss:
3386     case Intrinsic::x86_sse_ucomige_ss:
3387     case Intrinsic::x86_sse_ucomineq_ss:
3388     case Intrinsic::x86_sse2_comieq_sd:
3389     case Intrinsic::x86_sse2_comilt_sd:
3390     case Intrinsic::x86_sse2_comile_sd:
3391     case Intrinsic::x86_sse2_comigt_sd:
3392     case Intrinsic::x86_sse2_comige_sd:
3393     case Intrinsic::x86_sse2_comineq_sd:
3394     case Intrinsic::x86_sse2_ucomieq_sd:
3395     case Intrinsic::x86_sse2_ucomilt_sd:
3396     case Intrinsic::x86_sse2_ucomile_sd:
3397     case Intrinsic::x86_sse2_ucomigt_sd:
3398     case Intrinsic::x86_sse2_ucomige_sd:
3399     case Intrinsic::x86_sse2_ucomineq_sd:
3400       handleVectorCompareScalarIntrinsic(I);
3401       break;
3402 
3403     case Intrinsic::x86_sse_cmp_ps:
3404     case Intrinsic::x86_sse2_cmp_pd:
3405       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
3406       // generates reasonably looking IR that fails in the backend with "Do not
3407       // know how to split the result of this operator!".
3408       handleVectorComparePackedIntrinsic(I);
3409       break;
3410 
3411     case Intrinsic::x86_bmi_bextr_32:
3412     case Intrinsic::x86_bmi_bextr_64:
3413     case Intrinsic::x86_bmi_bzhi_32:
3414     case Intrinsic::x86_bmi_bzhi_64:
3415     case Intrinsic::x86_bmi_pdep_32:
3416     case Intrinsic::x86_bmi_pdep_64:
3417     case Intrinsic::x86_bmi_pext_32:
3418     case Intrinsic::x86_bmi_pext_64:
3419       handleBmiIntrinsic(I);
3420       break;
3421 
3422     case Intrinsic::x86_pclmulqdq:
3423     case Intrinsic::x86_pclmulqdq_256:
3424     case Intrinsic::x86_pclmulqdq_512:
3425       handlePclmulIntrinsic(I);
3426       break;
3427 
3428     case Intrinsic::x86_sse41_round_sd:
3429       handleUnarySdIntrinsic(I);
3430       break;
3431     case Intrinsic::x86_sse2_max_sd:
3432     case Intrinsic::x86_sse2_min_sd:
3433       handleBinarySdIntrinsic(I);
3434       break;
3435 
3436     case Intrinsic::is_constant:
3437       // The result of llvm.is.constant() is always defined.
3438       setShadow(&I, getCleanShadow(&I));
3439       setOrigin(&I, getCleanOrigin());
3440       break;
3441 
3442     default:
3443       if (!handleUnknownIntrinsic(I))
3444         visitInstruction(I);
3445       break;
3446     }
3447   }
3448 
3449   void visitLibAtomicLoad(CallBase &CB) {
3450     IRBuilder<> IRB(&CB);
3451     Value *Size = CB.getArgOperand(0);
3452     Value *SrcPtr = CB.getArgOperand(1);
3453     Value *DstPtr = CB.getArgOperand(2);
3454     Value *Ordering = CB.getArgOperand(3);
3455     // Convert the call to have at least Acquire ordering to make sure
3456     // the shadow operations aren't reordered before it.
3457     Value *NewOrdering =
3458         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3459     CB.setArgOperand(3, NewOrdering);
3460 
3461     IRBuilder<> NextIRB(CB.getNextNode());
3462     NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3463 
3464     Value *SrcShadowPtr, *SrcOriginPtr;
3465     std::tie(SrcShadowPtr, SrcOriginPtr) =
3466         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3467                            /*isStore*/ false);
3468     Value *DstShadowPtr =
3469         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3470                            /*isStore*/ true)
3471             .first;
3472 
3473     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
3474     if (MS.TrackOrigins) {
3475       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
3476                                                    kMinOriginAlignment);
3477       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
3478       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
3479     }
3480   }
3481 
3482   void visitLibAtomicStore(CallBase &CB) {
3483     IRBuilder<> IRB(&CB);
3484     Value *Size = CB.getArgOperand(0);
3485     Value *DstPtr = CB.getArgOperand(2);
3486     Value *Ordering = CB.getArgOperand(3);
3487     // Convert the call to have at least Release ordering to make sure
3488     // the shadow operations aren't reordered after it.
3489     Value *NewOrdering =
3490         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3491     CB.setArgOperand(3, NewOrdering);
3492 
3493     Value *DstShadowPtr =
3494         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
3495                            /*isStore*/ true)
3496             .first;
3497 
3498     // Atomic store always paints clean shadow/origin. See file header.
3499     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
3500                      Align(1));
3501   }
3502 
3503   void visitCallBase(CallBase &CB) {
3504     assert(!CB.getMetadata("nosanitize"));
3505     if (CB.isInlineAsm()) {
3506       // For inline asm (either a call to asm function, or callbr instruction),
3507       // do the usual thing: check argument shadow and mark all outputs as
3508       // clean. Note that any side effects of the inline asm that are not
3509       // immediately visible in its constraints are not handled.
3510       if (ClHandleAsmConservative && MS.CompileKernel)
3511         visitAsmInstruction(CB);
3512       else
3513         visitInstruction(CB);
3514       return;
3515     }
3516     LibFunc LF;
3517     if (TLI->getLibFunc(CB, LF)) {
3518       // libatomic.a functions need to have special handling because there isn't
3519       // a good way to intercept them or compile the library with
3520       // instrumentation.
3521       switch (LF) {
3522       case LibFunc_atomic_load:
3523         visitLibAtomicLoad(CB);
3524         return;
3525       case LibFunc_atomic_store:
3526         visitLibAtomicStore(CB);
3527         return;
3528       default:
3529         break;
3530       }
3531     }
3532 
3533     if (auto *Call = dyn_cast<CallInst>(&CB)) {
3534       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
3535 
3536       // We are going to insert code that relies on the fact that the callee
3537       // will become a non-readonly function after it is instrumented by us. To
3538       // prevent this code from being optimized out, mark that function
3539       // non-readonly in advance.
3540       if (Function *Func = Call->getCalledFunction()) {
3541         // Clear out readonly/readnone attributes.
3542         AttrBuilder B;
3543         B.addAttribute(Attribute::ReadOnly)
3544             .addAttribute(Attribute::ReadNone)
3545             .addAttribute(Attribute::WriteOnly)
3546             .addAttribute(Attribute::ArgMemOnly)
3547             .addAttribute(Attribute::Speculatable);
3548         Func->removeAttributes(AttributeList::FunctionIndex, B);
3549       }
3550 
3551       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
3552     }
3553     IRBuilder<> IRB(&CB);
3554 
3555     unsigned ArgOffset = 0;
3556     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
3557     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
3558          ++ArgIt) {
3559       Value *A = *ArgIt;
3560       unsigned i = ArgIt - CB.arg_begin();
3561       if (!A->getType()->isSized()) {
3562         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
3563         continue;
3564       }
3565       unsigned Size = 0;
3566       Value *Store = nullptr;
3567       // Compute the Shadow for arg even if it is ByVal, because
3568       // in that case getShadow() will copy the actual arg shadow to
3569       // __msan_param_tls.
3570       Value *ArgShadow = getShadow(A);
3571       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
3572       LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
3573                         << " Shadow: " << *ArgShadow << "\n");
3574       bool ArgIsInitialized = false;
3575       const DataLayout &DL = F.getParent()->getDataLayout();
3576 
3577       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
3578       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
3579       bool EagerCheck = ClEagerChecks && !ByVal && NoUndef;
3580 
3581       if (EagerCheck) {
3582         insertShadowCheck(A, &CB);
3583         continue;
3584       }
3585       if (ByVal) {
3586         // ByVal requires some special handling as it's too big for a single
3587         // load
3588         assert(A->getType()->isPointerTy() &&
3589                "ByVal argument is not a pointer!");
3590         Size = DL.getTypeAllocSize(CB.getParamByValType(i));
3591         if (ArgOffset + Size > kParamTLSSize) break;
3592         const MaybeAlign ParamAlignment(CB.getParamAlign(i));
3593         MaybeAlign Alignment = llvm::None;
3594         if (ParamAlignment)
3595           Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
3596         Value *AShadowPtr =
3597             getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
3598                                /*isStore*/ false)
3599                 .first;
3600 
3601         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
3602                                  Alignment, Size);
3603         // TODO(glider): need to copy origins.
3604       } else {
3605         // Any other parameters mean we need bit-grained tracking of uninit data
3606         Size = DL.getTypeAllocSize(A->getType());
3607         if (ArgOffset + Size > kParamTLSSize) break;
3608         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
3609                                        kShadowTLSAlignment);
3610         Constant *Cst = dyn_cast<Constant>(ArgShadow);
3611         if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
3612       }
3613       if (MS.TrackOrigins && !ArgIsInitialized)
3614         IRB.CreateStore(getOrigin(A),
3615                         getOriginPtrForArgument(A, IRB, ArgOffset));
3616       (void)Store;
3617       assert(Size != 0 && Store != nullptr);
3618       LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
3619       ArgOffset += alignTo(Size, 8);
3620     }
3621     LLVM_DEBUG(dbgs() << "  done with call args\n");
3622 
3623     FunctionType *FT = CB.getFunctionType();
3624     if (FT->isVarArg()) {
3625       VAHelper->visitCallBase(CB, IRB);
3626     }
3627 
3628     // Now, get the shadow for the RetVal.
3629     if (!CB.getType()->isSized())
3630       return;
3631     // Don't emit the epilogue for musttail call returns.
3632     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3633       return;
3634 
3635     if (ClEagerChecks && CB.hasRetAttr(Attribute::NoUndef)) {
3636       setShadow(&CB, getCleanShadow(&CB));
3637       setOrigin(&CB, getCleanOrigin());
3638       return;
3639     }
3640 
3641     IRBuilder<> IRBBefore(&CB);
3642     // Until we have full dynamic coverage, make sure the retval shadow is 0.
3643     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
3644     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
3645                                  kShadowTLSAlignment);
3646     BasicBlock::iterator NextInsn;
3647     if (isa<CallInst>(CB)) {
3648       NextInsn = ++CB.getIterator();
3649       assert(NextInsn != CB.getParent()->end());
3650     } else {
3651       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
3652       if (!NormalDest->getSinglePredecessor()) {
3653         // FIXME: this case is tricky, so we are just conservative here.
3654         // Perhaps we need to split the edge between this BB and NormalDest,
3655         // but a naive attempt to use SplitEdge leads to a crash.
3656         setShadow(&CB, getCleanShadow(&CB));
3657         setOrigin(&CB, getCleanOrigin());
3658         return;
3659       }
3660       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
3661       // Anything inserted there will be instrumented by MSan later!
3662       NextInsn = NormalDest->getFirstInsertionPt();
3663       assert(NextInsn != NormalDest->end() &&
3664              "Could not find insertion point for retval shadow load");
3665     }
3666     IRBuilder<> IRBAfter(&*NextInsn);
3667     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
3668         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
3669         kShadowTLSAlignment, "_msret");
3670     setShadow(&CB, RetvalShadow);
3671     if (MS.TrackOrigins)
3672       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
3673                                          getOriginPtrForRetval(IRBAfter)));
3674   }
3675 
3676   bool isAMustTailRetVal(Value *RetVal) {
3677     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
3678       RetVal = I->getOperand(0);
3679     }
3680     if (auto *I = dyn_cast<CallInst>(RetVal)) {
3681       return I->isMustTailCall();
3682     }
3683     return false;
3684   }
3685 
3686   void visitReturnInst(ReturnInst &I) {
3687     IRBuilder<> IRB(&I);
3688     Value *RetVal = I.getReturnValue();
3689     if (!RetVal) return;
3690     // Don't emit the epilogue for musttail call returns.
3691     if (isAMustTailRetVal(RetVal)) return;
3692     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
3693     bool HasNoUndef =
3694         F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
3695     bool StoreShadow = !(ClEagerChecks && HasNoUndef);
3696     // FIXME: Consider using SpecialCaseList to specify a list of functions that
3697     // must always return fully initialized values. For now, we hardcode "main".
3698     bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main");
3699 
3700     Value *Shadow = getShadow(RetVal);
3701     bool StoreOrigin = true;
3702     if (EagerCheck) {
3703       insertShadowCheck(RetVal, &I);
3704       Shadow = getCleanShadow(RetVal);
3705       StoreOrigin = false;
3706     }
3707 
3708     // The caller may still expect information passed over TLS if we pass our
3709     // check
3710     if (StoreShadow) {
3711       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3712       if (MS.TrackOrigins && StoreOrigin)
3713         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
3714     }
3715   }
3716 
3717   void visitPHINode(PHINode &I) {
3718     IRBuilder<> IRB(&I);
3719     if (!PropagateShadow) {
3720       setShadow(&I, getCleanShadow(&I));
3721       setOrigin(&I, getCleanOrigin());
3722       return;
3723     }
3724 
3725     ShadowPHINodes.push_back(&I);
3726     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
3727                                 "_msphi_s"));
3728     if (MS.TrackOrigins)
3729       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
3730                                   "_msphi_o"));
3731   }
3732 
3733   Value *getLocalVarDescription(AllocaInst &I) {
3734     SmallString<2048> StackDescriptionStorage;
3735     raw_svector_ostream StackDescription(StackDescriptionStorage);
3736     // We create a string with a description of the stack allocation and
3737     // pass it into __msan_set_alloca_origin.
3738     // It will be printed by the run-time if stack-originated UMR is found.
3739     // The first 4 bytes of the string are set to '----' and will be replaced
3740     // by __msan_va_arg_overflow_size_tls at the first call.
3741     StackDescription << "----" << I.getName() << "@" << F.getName();
3742     return createPrivateNonConstGlobalForString(*F.getParent(),
3743                                                 StackDescription.str());
3744   }
3745 
3746   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3747     if (PoisonStack && ClPoisonStackWithCall) {
3748       IRB.CreateCall(MS.MsanPoisonStackFn,
3749                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3750     } else {
3751       Value *ShadowBase, *OriginBase;
3752       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
3753           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
3754 
3755       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
3756       IRB.CreateMemSet(ShadowBase, PoisonValue, Len,
3757                        MaybeAlign(I.getAlignment()));
3758     }
3759 
3760     if (PoisonStack && MS.TrackOrigins) {
3761       Value *Descr = getLocalVarDescription(I);
3762       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
3763                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3764                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
3765                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
3766     }
3767   }
3768 
3769   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3770     Value *Descr = getLocalVarDescription(I);
3771     if (PoisonStack) {
3772       IRB.CreateCall(MS.MsanPoisonAllocaFn,
3773                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3774                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
3775     } else {
3776       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
3777                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3778     }
3779   }
3780 
3781   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
3782     if (!InsPoint)
3783       InsPoint = &I;
3784     IRBuilder<> IRB(InsPoint->getNextNode());
3785     const DataLayout &DL = F.getParent()->getDataLayout();
3786     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
3787     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
3788     if (I.isArrayAllocation())
3789       Len = IRB.CreateMul(Len, I.getArraySize());
3790 
3791     if (MS.CompileKernel)
3792       poisonAllocaKmsan(I, IRB, Len);
3793     else
3794       poisonAllocaUserspace(I, IRB, Len);
3795   }
3796 
3797   void visitAllocaInst(AllocaInst &I) {
3798     setShadow(&I, getCleanShadow(&I));
3799     setOrigin(&I, getCleanOrigin());
3800     // We'll get to this alloca later unless it's poisoned at the corresponding
3801     // llvm.lifetime.start.
3802     AllocaSet.insert(&I);
3803   }
3804 
3805   void visitSelectInst(SelectInst& I) {
3806     IRBuilder<> IRB(&I);
3807     // a = select b, c, d
3808     Value *B = I.getCondition();
3809     Value *C = I.getTrueValue();
3810     Value *D = I.getFalseValue();
3811     Value *Sb = getShadow(B);
3812     Value *Sc = getShadow(C);
3813     Value *Sd = getShadow(D);
3814 
3815     // Result shadow if condition shadow is 0.
3816     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
3817     Value *Sa1;
3818     if (I.getType()->isAggregateType()) {
3819       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
3820       // an extra "select". This results in much more compact IR.
3821       // Sa = select Sb, poisoned, (select b, Sc, Sd)
3822       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
3823     } else {
3824       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
3825       // If Sb (condition is poisoned), look for bits in c and d that are equal
3826       // and both unpoisoned.
3827       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
3828 
3829       // Cast arguments to shadow-compatible type.
3830       C = CreateAppToShadowCast(IRB, C);
3831       D = CreateAppToShadowCast(IRB, D);
3832 
3833       // Result shadow if condition shadow is 1.
3834       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
3835     }
3836     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
3837     setShadow(&I, Sa);
3838     if (MS.TrackOrigins) {
3839       // Origins are always i32, so any vector conditions must be flattened.
3840       // FIXME: consider tracking vector origins for app vectors?
3841       if (B->getType()->isVectorTy()) {
3842         Type *FlatTy = getShadowTyNoVec(B->getType());
3843         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
3844                                 ConstantInt::getNullValue(FlatTy));
3845         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
3846                                       ConstantInt::getNullValue(FlatTy));
3847       }
3848       // a = select b, c, d
3849       // Oa = Sb ? Ob : (b ? Oc : Od)
3850       setOrigin(
3851           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
3852                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
3853                                                 getOrigin(I.getFalseValue()))));
3854     }
3855   }
3856 
3857   void visitLandingPadInst(LandingPadInst &I) {
3858     // Do nothing.
3859     // See https://github.com/google/sanitizers/issues/504
3860     setShadow(&I, getCleanShadow(&I));
3861     setOrigin(&I, getCleanOrigin());
3862   }
3863 
3864   void visitCatchSwitchInst(CatchSwitchInst &I) {
3865     setShadow(&I, getCleanShadow(&I));
3866     setOrigin(&I, getCleanOrigin());
3867   }
3868 
3869   void visitFuncletPadInst(FuncletPadInst &I) {
3870     setShadow(&I, getCleanShadow(&I));
3871     setOrigin(&I, getCleanOrigin());
3872   }
3873 
3874   void visitGetElementPtrInst(GetElementPtrInst &I) {
3875     handleShadowOr(I);
3876   }
3877 
3878   void visitExtractValueInst(ExtractValueInst &I) {
3879     IRBuilder<> IRB(&I);
3880     Value *Agg = I.getAggregateOperand();
3881     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
3882     Value *AggShadow = getShadow(Agg);
3883     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3884     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
3885     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
3886     setShadow(&I, ResShadow);
3887     setOriginForNaryOp(I);
3888   }
3889 
3890   void visitInsertValueInst(InsertValueInst &I) {
3891     IRBuilder<> IRB(&I);
3892     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
3893     Value *AggShadow = getShadow(I.getAggregateOperand());
3894     Value *InsShadow = getShadow(I.getInsertedValueOperand());
3895     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3896     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
3897     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
3898     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
3899     setShadow(&I, Res);
3900     setOriginForNaryOp(I);
3901   }
3902 
3903   void dumpInst(Instruction &I) {
3904     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3905       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
3906     } else {
3907       errs() << "ZZZ " << I.getOpcodeName() << "\n";
3908     }
3909     errs() << "QQQ " << I << "\n";
3910   }
3911 
3912   void visitResumeInst(ResumeInst &I) {
3913     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
3914     // Nothing to do here.
3915   }
3916 
3917   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
3918     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
3919     // Nothing to do here.
3920   }
3921 
3922   void visitCatchReturnInst(CatchReturnInst &CRI) {
3923     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
3924     // Nothing to do here.
3925   }
3926 
3927   void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
3928                              const DataLayout &DL, bool isOutput) {
3929     // For each assembly argument, we check its value for being initialized.
3930     // If the argument is a pointer, we assume it points to a single element
3931     // of the corresponding type (or to a 8-byte word, if the type is unsized).
3932     // Each such pointer is instrumented with a call to the runtime library.
3933     Type *OpType = Operand->getType();
3934     // Check the operand value itself.
3935     insertShadowCheck(Operand, &I);
3936     if (!OpType->isPointerTy() || !isOutput) {
3937       assert(!isOutput);
3938       return;
3939     }
3940     Type *ElType = OpType->getPointerElementType();
3941     if (!ElType->isSized())
3942       return;
3943     int Size = DL.getTypeStoreSize(ElType);
3944     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
3945     Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
3946     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
3947   }
3948 
3949   /// Get the number of output arguments returned by pointers.
3950   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
3951     int NumRetOutputs = 0;
3952     int NumOutputs = 0;
3953     Type *RetTy = cast<Value>(CB)->getType();
3954     if (!RetTy->isVoidTy()) {
3955       // Register outputs are returned via the CallInst return value.
3956       auto *ST = dyn_cast<StructType>(RetTy);
3957       if (ST)
3958         NumRetOutputs = ST->getNumElements();
3959       else
3960         NumRetOutputs = 1;
3961     }
3962     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
3963     for (size_t i = 0, n = Constraints.size(); i < n; i++) {
3964       InlineAsm::ConstraintInfo Info = Constraints[i];
3965       switch (Info.Type) {
3966       case InlineAsm::isOutput:
3967         NumOutputs++;
3968         break;
3969       default:
3970         break;
3971       }
3972     }
3973     return NumOutputs - NumRetOutputs;
3974   }
3975 
3976   void visitAsmInstruction(Instruction &I) {
3977     // Conservative inline assembly handling: check for poisoned shadow of
3978     // asm() arguments, then unpoison the result and all the memory locations
3979     // pointed to by those arguments.
3980     // An inline asm() statement in C++ contains lists of input and output
3981     // arguments used by the assembly code. These are mapped to operands of the
3982     // CallInst as follows:
3983     //  - nR register outputs ("=r) are returned by value in a single structure
3984     //  (SSA value of the CallInst);
3985     //  - nO other outputs ("=m" and others) are returned by pointer as first
3986     // nO operands of the CallInst;
3987     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
3988     // remaining nI operands.
3989     // The total number of asm() arguments in the source is nR+nO+nI, and the
3990     // corresponding CallInst has nO+nI+1 operands (the last operand is the
3991     // function to be called).
3992     const DataLayout &DL = F.getParent()->getDataLayout();
3993     CallBase *CB = cast<CallBase>(&I);
3994     IRBuilder<> IRB(&I);
3995     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
3996     int OutputArgs = getNumOutputArgs(IA, CB);
3997     // The last operand of a CallInst is the function itself.
3998     int NumOperands = CB->getNumOperands() - 1;
3999 
4000     // Check input arguments. Doing so before unpoisoning output arguments, so
4001     // that we won't overwrite uninit values before checking them.
4002     for (int i = OutputArgs; i < NumOperands; i++) {
4003       Value *Operand = CB->getOperand(i);
4004       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
4005     }
4006     // Unpoison output arguments. This must happen before the actual InlineAsm
4007     // call, so that the shadow for memory published in the asm() statement
4008     // remains valid.
4009     for (int i = 0; i < OutputArgs; i++) {
4010       Value *Operand = CB->getOperand(i);
4011       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
4012     }
4013 
4014     setShadow(&I, getCleanShadow(&I));
4015     setOrigin(&I, getCleanOrigin());
4016   }
4017 
4018   void visitInstruction(Instruction &I) {
4019     // Everything else: stop propagating and check for poisoned shadow.
4020     if (ClDumpStrictInstructions)
4021       dumpInst(I);
4022     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
4023     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
4024       Value *Operand = I.getOperand(i);
4025       if (Operand->getType()->isSized())
4026         insertShadowCheck(Operand, &I);
4027     }
4028     setShadow(&I, getCleanShadow(&I));
4029     setOrigin(&I, getCleanOrigin());
4030   }
4031 };
4032 
4033 /// AMD64-specific implementation of VarArgHelper.
4034 struct VarArgAMD64Helper : public VarArgHelper {
4035   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
4036   // See a comment in visitCallBase for more details.
4037   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
4038   static const unsigned AMD64FpEndOffsetSSE = 176;
4039   // If SSE is disabled, fp_offset in va_list is zero.
4040   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
4041 
4042   unsigned AMD64FpEndOffset;
4043   Function &F;
4044   MemorySanitizer &MS;
4045   MemorySanitizerVisitor &MSV;
4046   Value *VAArgTLSCopy = nullptr;
4047   Value *VAArgTLSOriginCopy = nullptr;
4048   Value *VAArgOverflowSize = nullptr;
4049 
4050   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4051 
4052   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4053 
4054   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
4055                     MemorySanitizerVisitor &MSV)
4056       : F(F), MS(MS), MSV(MSV) {
4057     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
4058     for (const auto &Attr : F.getAttributes().getFnAttributes()) {
4059       if (Attr.isStringAttribute() &&
4060           (Attr.getKindAsString() == "target-features")) {
4061         if (Attr.getValueAsString().contains("-sse"))
4062           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
4063         break;
4064       }
4065     }
4066   }
4067 
4068   ArgKind classifyArgument(Value* arg) {
4069     // A very rough approximation of X86_64 argument classification rules.
4070     Type *T = arg->getType();
4071     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
4072       return AK_FloatingPoint;
4073     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4074       return AK_GeneralPurpose;
4075     if (T->isPointerTy())
4076       return AK_GeneralPurpose;
4077     return AK_Memory;
4078   }
4079 
4080   // For VarArg functions, store the argument shadow in an ABI-specific format
4081   // that corresponds to va_list layout.
4082   // We do this because Clang lowers va_arg in the frontend, and this pass
4083   // only sees the low level code that deals with va_list internals.
4084   // A much easier alternative (provided that Clang emits va_arg instructions)
4085   // would have been to associate each live instance of va_list with a copy of
4086   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
4087   // order.
4088   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4089     unsigned GpOffset = 0;
4090     unsigned FpOffset = AMD64GpEndOffset;
4091     unsigned OverflowOffset = AMD64FpEndOffset;
4092     const DataLayout &DL = F.getParent()->getDataLayout();
4093     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4094          ++ArgIt) {
4095       Value *A = *ArgIt;
4096       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4097       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4098       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4099       if (IsByVal) {
4100         // ByVal arguments always go to the overflow area.
4101         // Fixed arguments passed through the overflow area will be stepped
4102         // over by va_start, so don't count them towards the offset.
4103         if (IsFixed)
4104           continue;
4105         assert(A->getType()->isPointerTy());
4106         Type *RealTy = CB.getParamByValType(ArgNo);
4107         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4108         Value *ShadowBase = getShadowPtrForVAArgument(
4109             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
4110         Value *OriginBase = nullptr;
4111         if (MS.TrackOrigins)
4112           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4113         OverflowOffset += alignTo(ArgSize, 8);
4114         if (!ShadowBase)
4115           continue;
4116         Value *ShadowPtr, *OriginPtr;
4117         std::tie(ShadowPtr, OriginPtr) =
4118             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4119                                    /*isStore*/ false);
4120 
4121         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4122                          kShadowTLSAlignment, ArgSize);
4123         if (MS.TrackOrigins)
4124           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4125                            kShadowTLSAlignment, ArgSize);
4126       } else {
4127         ArgKind AK = classifyArgument(A);
4128         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4129           AK = AK_Memory;
4130         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4131           AK = AK_Memory;
4132         Value *ShadowBase, *OriginBase = nullptr;
4133         switch (AK) {
4134           case AK_GeneralPurpose:
4135             ShadowBase =
4136                 getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4137             if (MS.TrackOrigins)
4138               OriginBase =
4139                   getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4140             GpOffset += 8;
4141             break;
4142           case AK_FloatingPoint:
4143             ShadowBase =
4144                 getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4145             if (MS.TrackOrigins)
4146               OriginBase =
4147                   getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4148             FpOffset += 16;
4149             break;
4150           case AK_Memory:
4151             if (IsFixed)
4152               continue;
4153             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4154             ShadowBase =
4155                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4156             if (MS.TrackOrigins)
4157               OriginBase =
4158                   getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4159             OverflowOffset += alignTo(ArgSize, 8);
4160         }
4161         // Take fixed arguments into account for GpOffset and FpOffset,
4162         // but don't actually store shadows for them.
4163         // TODO(glider): don't call get*PtrForVAArgument() for them.
4164         if (IsFixed)
4165           continue;
4166         if (!ShadowBase)
4167           continue;
4168         Value *Shadow = MSV.getShadow(A);
4169         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4170         if (MS.TrackOrigins) {
4171           Value *Origin = MSV.getOrigin(A);
4172           unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4173           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4174                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4175         }
4176       }
4177     }
4178     Constant *OverflowSize =
4179       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4180     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4181   }
4182 
4183   /// Compute the shadow address for a given va_arg.
4184   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4185                                    unsigned ArgOffset, unsigned ArgSize) {
4186     // Make sure we don't overflow __msan_va_arg_tls.
4187     if (ArgOffset + ArgSize > kParamTLSSize)
4188       return nullptr;
4189     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4190     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4191     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4192                               "_msarg_va_s");
4193   }
4194 
4195   /// Compute the origin address for a given va_arg.
4196   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4197     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4198     // getOriginPtrForVAArgument() is always called after
4199     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4200     // overflow.
4201     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4202     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4203                               "_msarg_va_o");
4204   }
4205 
4206   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4207     IRBuilder<> IRB(&I);
4208     Value *VAListTag = I.getArgOperand(0);
4209     Value *ShadowPtr, *OriginPtr;
4210     const Align Alignment = Align(8);
4211     std::tie(ShadowPtr, OriginPtr) =
4212         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4213                                /*isStore*/ true);
4214 
4215     // Unpoison the whole __va_list_tag.
4216     // FIXME: magic ABI constants.
4217     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4218                      /* size */ 24, Alignment, false);
4219     // We shouldn't need to zero out the origins, as they're only checked for
4220     // nonzero shadow.
4221   }
4222 
4223   void visitVAStartInst(VAStartInst &I) override {
4224     if (F.getCallingConv() == CallingConv::Win64)
4225       return;
4226     VAStartInstrumentationList.push_back(&I);
4227     unpoisonVAListTagForInst(I);
4228   }
4229 
4230   void visitVACopyInst(VACopyInst &I) override {
4231     if (F.getCallingConv() == CallingConv::Win64) return;
4232     unpoisonVAListTagForInst(I);
4233   }
4234 
4235   void finalizeInstrumentation() override {
4236     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4237            "finalizeInstrumentation called twice");
4238     if (!VAStartInstrumentationList.empty()) {
4239       // If there is a va_start in this function, make a backup copy of
4240       // va_arg_tls somewhere in the function entry block.
4241       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4242       VAArgOverflowSize =
4243           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4244       Value *CopySize =
4245         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
4246                       VAArgOverflowSize);
4247       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4248       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4249       if (MS.TrackOrigins) {
4250         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4251         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
4252                          Align(8), CopySize);
4253       }
4254     }
4255 
4256     // Instrument va_start.
4257     // Copy va_list shadow from the backup copy of the TLS contents.
4258     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4259       CallInst *OrigInst = VAStartInstrumentationList[i];
4260       IRBuilder<> IRB(OrigInst->getNextNode());
4261       Value *VAListTag = OrigInst->getArgOperand(0);
4262 
4263       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4264       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4265           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4266                         ConstantInt::get(MS.IntptrTy, 16)),
4267           PointerType::get(RegSaveAreaPtrTy, 0));
4268       Value *RegSaveAreaPtr =
4269           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4270       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4271       const Align Alignment = Align(16);
4272       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4273           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4274                                  Alignment, /*isStore*/ true);
4275       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4276                        AMD64FpEndOffset);
4277       if (MS.TrackOrigins)
4278         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4279                          Alignment, AMD64FpEndOffset);
4280       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4281       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4282           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4283                         ConstantInt::get(MS.IntptrTy, 8)),
4284           PointerType::get(OverflowArgAreaPtrTy, 0));
4285       Value *OverflowArgAreaPtr =
4286           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4287       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4288       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4289           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4290                                  Alignment, /*isStore*/ true);
4291       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4292                                              AMD64FpEndOffset);
4293       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4294                        VAArgOverflowSize);
4295       if (MS.TrackOrigins) {
4296         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4297                                         AMD64FpEndOffset);
4298         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4299                          VAArgOverflowSize);
4300       }
4301     }
4302   }
4303 };
4304 
4305 /// MIPS64-specific implementation of VarArgHelper.
4306 struct VarArgMIPS64Helper : public VarArgHelper {
4307   Function &F;
4308   MemorySanitizer &MS;
4309   MemorySanitizerVisitor &MSV;
4310   Value *VAArgTLSCopy = nullptr;
4311   Value *VAArgSize = nullptr;
4312 
4313   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4314 
4315   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4316                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4317 
4318   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4319     unsigned VAArgOffset = 0;
4320     const DataLayout &DL = F.getParent()->getDataLayout();
4321     for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
4322               End = CB.arg_end();
4323          ArgIt != End; ++ArgIt) {
4324       Triple TargetTriple(F.getParent()->getTargetTriple());
4325       Value *A = *ArgIt;
4326       Value *Base;
4327       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4328       if (TargetTriple.getArch() == Triple::mips64) {
4329         // Adjusting the shadow for argument with size < 8 to match the placement
4330         // of bits in big endian system
4331         if (ArgSize < 8)
4332           VAArgOffset += (8 - ArgSize);
4333       }
4334       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4335       VAArgOffset += ArgSize;
4336       VAArgOffset = alignTo(VAArgOffset, 8);
4337       if (!Base)
4338         continue;
4339       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4340     }
4341 
4342     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4343     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4344     // a new class member i.e. it is the total size of all VarArgs.
4345     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4346   }
4347 
4348   /// Compute the shadow address for a given va_arg.
4349   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4350                                    unsigned ArgOffset, unsigned ArgSize) {
4351     // Make sure we don't overflow __msan_va_arg_tls.
4352     if (ArgOffset + ArgSize > kParamTLSSize)
4353       return nullptr;
4354     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4355     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4356     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4357                               "_msarg");
4358   }
4359 
4360   void visitVAStartInst(VAStartInst &I) override {
4361     IRBuilder<> IRB(&I);
4362     VAStartInstrumentationList.push_back(&I);
4363     Value *VAListTag = I.getArgOperand(0);
4364     Value *ShadowPtr, *OriginPtr;
4365     const Align Alignment = Align(8);
4366     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4367         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4368     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4369                      /* size */ 8, Alignment, false);
4370   }
4371 
4372   void visitVACopyInst(VACopyInst &I) override {
4373     IRBuilder<> IRB(&I);
4374     VAStartInstrumentationList.push_back(&I);
4375     Value *VAListTag = I.getArgOperand(0);
4376     Value *ShadowPtr, *OriginPtr;
4377     const Align Alignment = Align(8);
4378     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4379         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4380     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4381                      /* size */ 8, Alignment, false);
4382   }
4383 
4384   void finalizeInstrumentation() override {
4385     assert(!VAArgSize && !VAArgTLSCopy &&
4386            "finalizeInstrumentation called twice");
4387     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4388     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4389     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4390                                     VAArgSize);
4391 
4392     if (!VAStartInstrumentationList.empty()) {
4393       // If there is a va_start in this function, make a backup copy of
4394       // va_arg_tls somewhere in the function entry block.
4395       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4396       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4397     }
4398 
4399     // Instrument va_start.
4400     // Copy va_list shadow from the backup copy of the TLS contents.
4401     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4402       CallInst *OrigInst = VAStartInstrumentationList[i];
4403       IRBuilder<> IRB(OrigInst->getNextNode());
4404       Value *VAListTag = OrigInst->getArgOperand(0);
4405       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4406       Value *RegSaveAreaPtrPtr =
4407           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4408                              PointerType::get(RegSaveAreaPtrTy, 0));
4409       Value *RegSaveAreaPtr =
4410           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4411       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4412       const Align Alignment = Align(8);
4413       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4414           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4415                                  Alignment, /*isStore*/ true);
4416       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4417                        CopySize);
4418     }
4419   }
4420 };
4421 
4422 /// AArch64-specific implementation of VarArgHelper.
4423 struct VarArgAArch64Helper : public VarArgHelper {
4424   static const unsigned kAArch64GrArgSize = 64;
4425   static const unsigned kAArch64VrArgSize = 128;
4426 
4427   static const unsigned AArch64GrBegOffset = 0;
4428   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
4429   // Make VR space aligned to 16 bytes.
4430   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
4431   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
4432                                              + kAArch64VrArgSize;
4433   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
4434 
4435   Function &F;
4436   MemorySanitizer &MS;
4437   MemorySanitizerVisitor &MSV;
4438   Value *VAArgTLSCopy = nullptr;
4439   Value *VAArgOverflowSize = nullptr;
4440 
4441   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4442 
4443   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4444 
4445   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
4446                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4447 
4448   ArgKind classifyArgument(Value* arg) {
4449     Type *T = arg->getType();
4450     if (T->isFPOrFPVectorTy())
4451       return AK_FloatingPoint;
4452     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4453         || (T->isPointerTy()))
4454       return AK_GeneralPurpose;
4455     return AK_Memory;
4456   }
4457 
4458   // The instrumentation stores the argument shadow in a non ABI-specific
4459   // format because it does not know which argument is named (since Clang,
4460   // like x86_64 case, lowers the va_args in the frontend and this pass only
4461   // sees the low level code that deals with va_list internals).
4462   // The first seven GR registers are saved in the first 56 bytes of the
4463   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
4464   // the remaining arguments.
4465   // Using constant offset within the va_arg TLS array allows fast copy
4466   // in the finalize instrumentation.
4467   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4468     unsigned GrOffset = AArch64GrBegOffset;
4469     unsigned VrOffset = AArch64VrBegOffset;
4470     unsigned OverflowOffset = AArch64VAEndOffset;
4471 
4472     const DataLayout &DL = F.getParent()->getDataLayout();
4473     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4474          ++ArgIt) {
4475       Value *A = *ArgIt;
4476       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4477       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4478       ArgKind AK = classifyArgument(A);
4479       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
4480         AK = AK_Memory;
4481       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
4482         AK = AK_Memory;
4483       Value *Base;
4484       switch (AK) {
4485         case AK_GeneralPurpose:
4486           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
4487           GrOffset += 8;
4488           break;
4489         case AK_FloatingPoint:
4490           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
4491           VrOffset += 16;
4492           break;
4493         case AK_Memory:
4494           // Don't count fixed arguments in the overflow area - va_start will
4495           // skip right over them.
4496           if (IsFixed)
4497             continue;
4498           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4499           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
4500                                            alignTo(ArgSize, 8));
4501           OverflowOffset += alignTo(ArgSize, 8);
4502           break;
4503       }
4504       // Count Gp/Vr fixed arguments to their respective offsets, but don't
4505       // bother to actually store a shadow.
4506       if (IsFixed)
4507         continue;
4508       if (!Base)
4509         continue;
4510       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4511     }
4512     Constant *OverflowSize =
4513       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
4514     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4515   }
4516 
4517   /// Compute the shadow address for a given va_arg.
4518   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4519                                    unsigned ArgOffset, unsigned ArgSize) {
4520     // Make sure we don't overflow __msan_va_arg_tls.
4521     if (ArgOffset + ArgSize > kParamTLSSize)
4522       return nullptr;
4523     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4524     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4525     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4526                               "_msarg");
4527   }
4528 
4529   void visitVAStartInst(VAStartInst &I) override {
4530     IRBuilder<> IRB(&I);
4531     VAStartInstrumentationList.push_back(&I);
4532     Value *VAListTag = I.getArgOperand(0);
4533     Value *ShadowPtr, *OriginPtr;
4534     const Align Alignment = Align(8);
4535     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4536         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4537     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4538                      /* size */ 32, Alignment, false);
4539   }
4540 
4541   void visitVACopyInst(VACopyInst &I) override {
4542     IRBuilder<> IRB(&I);
4543     VAStartInstrumentationList.push_back(&I);
4544     Value *VAListTag = I.getArgOperand(0);
4545     Value *ShadowPtr, *OriginPtr;
4546     const Align Alignment = Align(8);
4547     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4548         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4549     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4550                      /* size */ 32, Alignment, false);
4551   }
4552 
4553   // Retrieve a va_list field of 'void*' size.
4554   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4555     Value *SaveAreaPtrPtr =
4556       IRB.CreateIntToPtr(
4557         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4558                       ConstantInt::get(MS.IntptrTy, offset)),
4559         Type::getInt64PtrTy(*MS.C));
4560     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
4561   }
4562 
4563   // Retrieve a va_list field of 'int' size.
4564   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4565     Value *SaveAreaPtr =
4566       IRB.CreateIntToPtr(
4567         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4568                       ConstantInt::get(MS.IntptrTy, offset)),
4569         Type::getInt32PtrTy(*MS.C));
4570     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
4571     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
4572   }
4573 
4574   void finalizeInstrumentation() override {
4575     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4576            "finalizeInstrumentation called twice");
4577     if (!VAStartInstrumentationList.empty()) {
4578       // If there is a va_start in this function, make a backup copy of
4579       // va_arg_tls somewhere in the function entry block.
4580       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4581       VAArgOverflowSize =
4582           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4583       Value *CopySize =
4584         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
4585                       VAArgOverflowSize);
4586       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4587       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4588     }
4589 
4590     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
4591     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
4592 
4593     // Instrument va_start, copy va_list shadow from the backup copy of
4594     // the TLS contents.
4595     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4596       CallInst *OrigInst = VAStartInstrumentationList[i];
4597       IRBuilder<> IRB(OrigInst->getNextNode());
4598 
4599       Value *VAListTag = OrigInst->getArgOperand(0);
4600 
4601       // The variadic ABI for AArch64 creates two areas to save the incoming
4602       // argument registers (one for 64-bit general register xn-x7 and another
4603       // for 128-bit FP/SIMD vn-v7).
4604       // We need then to propagate the shadow arguments on both regions
4605       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
4606       // The remaining arguments are saved on shadow for 'va::stack'.
4607       // One caveat is it requires only to propagate the non-named arguments,
4608       // however on the call site instrumentation 'all' the arguments are
4609       // saved. So to copy the shadow values from the va_arg TLS array
4610       // we need to adjust the offset for both GR and VR fields based on
4611       // the __{gr,vr}_offs value (since they are stores based on incoming
4612       // named arguments).
4613 
4614       // Read the stack pointer from the va_list.
4615       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
4616 
4617       // Read both the __gr_top and __gr_off and add them up.
4618       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
4619       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
4620 
4621       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
4622 
4623       // Read both the __vr_top and __vr_off and add them up.
4624       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
4625       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
4626 
4627       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
4628 
4629       // It does not know how many named arguments is being used and, on the
4630       // callsite all the arguments were saved.  Since __gr_off is defined as
4631       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
4632       // argument by ignoring the bytes of shadow from named arguments.
4633       Value *GrRegSaveAreaShadowPtrOff =
4634         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
4635 
4636       Value *GrRegSaveAreaShadowPtr =
4637           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4638                                  Align(8), /*isStore*/ true)
4639               .first;
4640 
4641       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4642                                               GrRegSaveAreaShadowPtrOff);
4643       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
4644 
4645       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
4646                        GrCopySize);
4647 
4648       // Again, but for FP/SIMD values.
4649       Value *VrRegSaveAreaShadowPtrOff =
4650           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
4651 
4652       Value *VrRegSaveAreaShadowPtr =
4653           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4654                                  Align(8), /*isStore*/ true)
4655               .first;
4656 
4657       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
4658         IRB.getInt8Ty(),
4659         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4660                               IRB.getInt32(AArch64VrBegOffset)),
4661         VrRegSaveAreaShadowPtrOff);
4662       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
4663 
4664       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
4665                        VrCopySize);
4666 
4667       // And finally for remaining arguments.
4668       Value *StackSaveAreaShadowPtr =
4669           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
4670                                  Align(16), /*isStore*/ true)
4671               .first;
4672 
4673       Value *StackSrcPtr =
4674         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4675                               IRB.getInt32(AArch64VAEndOffset));
4676 
4677       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
4678                        Align(16), VAArgOverflowSize);
4679     }
4680   }
4681 };
4682 
4683 /// PowerPC64-specific implementation of VarArgHelper.
4684 struct VarArgPowerPC64Helper : public VarArgHelper {
4685   Function &F;
4686   MemorySanitizer &MS;
4687   MemorySanitizerVisitor &MSV;
4688   Value *VAArgTLSCopy = nullptr;
4689   Value *VAArgSize = nullptr;
4690 
4691   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4692 
4693   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
4694                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4695 
4696   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4697     // For PowerPC, we need to deal with alignment of stack arguments -
4698     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
4699     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
4700     // and QPX vectors are aligned to 32 bytes.  For that reason, we
4701     // compute current offset from stack pointer (which is always properly
4702     // aligned), and offset for the first vararg, then subtract them.
4703     unsigned VAArgBase;
4704     Triple TargetTriple(F.getParent()->getTargetTriple());
4705     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
4706     // and 32 bytes for ABIv2.  This is usually determined by target
4707     // endianness, but in theory could be overridden by function attribute.
4708     // For simplicity, we ignore it here (it'd only matter for QPX vectors).
4709     if (TargetTriple.getArch() == Triple::ppc64)
4710       VAArgBase = 48;
4711     else
4712       VAArgBase = 32;
4713     unsigned VAArgOffset = VAArgBase;
4714     const DataLayout &DL = F.getParent()->getDataLayout();
4715     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4716          ++ArgIt) {
4717       Value *A = *ArgIt;
4718       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4719       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4720       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4721       if (IsByVal) {
4722         assert(A->getType()->isPointerTy());
4723         Type *RealTy = CB.getParamByValType(ArgNo);
4724         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4725         MaybeAlign ArgAlign = CB.getParamAlign(ArgNo);
4726         if (!ArgAlign || *ArgAlign < Align(8))
4727           ArgAlign = Align(8);
4728         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4729         if (!IsFixed) {
4730           Value *Base = getShadowPtrForVAArgument(
4731               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
4732           if (Base) {
4733             Value *AShadowPtr, *AOriginPtr;
4734             std::tie(AShadowPtr, AOriginPtr) =
4735                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
4736                                        kShadowTLSAlignment, /*isStore*/ false);
4737 
4738             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
4739                              kShadowTLSAlignment, ArgSize);
4740           }
4741         }
4742         VAArgOffset += alignTo(ArgSize, 8);
4743       } else {
4744         Value *Base;
4745         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4746         uint64_t ArgAlign = 8;
4747         if (A->getType()->isArrayTy()) {
4748           // Arrays are aligned to element size, except for long double
4749           // arrays, which are aligned to 8 bytes.
4750           Type *ElementTy = A->getType()->getArrayElementType();
4751           if (!ElementTy->isPPC_FP128Ty())
4752             ArgAlign = DL.getTypeAllocSize(ElementTy);
4753         } else if (A->getType()->isVectorTy()) {
4754           // Vectors are naturally aligned.
4755           ArgAlign = DL.getTypeAllocSize(A->getType());
4756         }
4757         if (ArgAlign < 8)
4758           ArgAlign = 8;
4759         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4760         if (DL.isBigEndian()) {
4761           // Adjusting the shadow for argument with size < 8 to match the placement
4762           // of bits in big endian system
4763           if (ArgSize < 8)
4764             VAArgOffset += (8 - ArgSize);
4765         }
4766         if (!IsFixed) {
4767           Base = getShadowPtrForVAArgument(A->getType(), IRB,
4768                                            VAArgOffset - VAArgBase, ArgSize);
4769           if (Base)
4770             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4771         }
4772         VAArgOffset += ArgSize;
4773         VAArgOffset = alignTo(VAArgOffset, 8);
4774       }
4775       if (IsFixed)
4776         VAArgBase = VAArgOffset;
4777     }
4778 
4779     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
4780                                                 VAArgOffset - VAArgBase);
4781     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4782     // a new class member i.e. it is the total size of all VarArgs.
4783     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4784   }
4785 
4786   /// Compute the shadow address for a given va_arg.
4787   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4788                                    unsigned ArgOffset, unsigned ArgSize) {
4789     // Make sure we don't overflow __msan_va_arg_tls.
4790     if (ArgOffset + ArgSize > kParamTLSSize)
4791       return nullptr;
4792     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4793     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4794     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4795                               "_msarg");
4796   }
4797 
4798   void visitVAStartInst(VAStartInst &I) override {
4799     IRBuilder<> IRB(&I);
4800     VAStartInstrumentationList.push_back(&I);
4801     Value *VAListTag = I.getArgOperand(0);
4802     Value *ShadowPtr, *OriginPtr;
4803     const Align Alignment = Align(8);
4804     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4805         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4806     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4807                      /* size */ 8, Alignment, false);
4808   }
4809 
4810   void visitVACopyInst(VACopyInst &I) override {
4811     IRBuilder<> IRB(&I);
4812     Value *VAListTag = I.getArgOperand(0);
4813     Value *ShadowPtr, *OriginPtr;
4814     const Align Alignment = Align(8);
4815     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4816         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4817     // Unpoison the whole __va_list_tag.
4818     // FIXME: magic ABI constants.
4819     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4820                      /* size */ 8, Alignment, false);
4821   }
4822 
4823   void finalizeInstrumentation() override {
4824     assert(!VAArgSize && !VAArgTLSCopy &&
4825            "finalizeInstrumentation called twice");
4826     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4827     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4828     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4829                                     VAArgSize);
4830 
4831     if (!VAStartInstrumentationList.empty()) {
4832       // If there is a va_start in this function, make a backup copy of
4833       // va_arg_tls somewhere in the function entry block.
4834       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4835       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4836     }
4837 
4838     // Instrument va_start.
4839     // Copy va_list shadow from the backup copy of the TLS contents.
4840     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4841       CallInst *OrigInst = VAStartInstrumentationList[i];
4842       IRBuilder<> IRB(OrigInst->getNextNode());
4843       Value *VAListTag = OrigInst->getArgOperand(0);
4844       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4845       Value *RegSaveAreaPtrPtr =
4846           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4847                              PointerType::get(RegSaveAreaPtrTy, 0));
4848       Value *RegSaveAreaPtr =
4849           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4850       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4851       const Align Alignment = Align(8);
4852       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4853           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4854                                  Alignment, /*isStore*/ true);
4855       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4856                        CopySize);
4857     }
4858   }
4859 };
4860 
4861 /// SystemZ-specific implementation of VarArgHelper.
4862 struct VarArgSystemZHelper : public VarArgHelper {
4863   static const unsigned SystemZGpOffset = 16;
4864   static const unsigned SystemZGpEndOffset = 56;
4865   static const unsigned SystemZFpOffset = 128;
4866   static const unsigned SystemZFpEndOffset = 160;
4867   static const unsigned SystemZMaxVrArgs = 8;
4868   static const unsigned SystemZRegSaveAreaSize = 160;
4869   static const unsigned SystemZOverflowOffset = 160;
4870   static const unsigned SystemZVAListTagSize = 32;
4871   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
4872   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
4873 
4874   Function &F;
4875   MemorySanitizer &MS;
4876   MemorySanitizerVisitor &MSV;
4877   Value *VAArgTLSCopy = nullptr;
4878   Value *VAArgTLSOriginCopy = nullptr;
4879   Value *VAArgOverflowSize = nullptr;
4880 
4881   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4882 
4883   enum class ArgKind {
4884     GeneralPurpose,
4885     FloatingPoint,
4886     Vector,
4887     Memory,
4888     Indirect,
4889   };
4890 
4891   enum class ShadowExtension { None, Zero, Sign };
4892 
4893   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
4894                       MemorySanitizerVisitor &MSV)
4895       : F(F), MS(MS), MSV(MSV) {}
4896 
4897   ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
4898     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
4899     // only a few possibilities of what it can be. In particular, enums, single
4900     // element structs and large types have already been taken care of.
4901 
4902     // Some i128 and fp128 arguments are converted to pointers only in the
4903     // back end.
4904     if (T->isIntegerTy(128) || T->isFP128Ty())
4905       return ArgKind::Indirect;
4906     if (T->isFloatingPointTy())
4907       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
4908     if (T->isIntegerTy() || T->isPointerTy())
4909       return ArgKind::GeneralPurpose;
4910     if (T->isVectorTy())
4911       return ArgKind::Vector;
4912     return ArgKind::Memory;
4913   }
4914 
4915   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
4916     // ABI says: "One of the simple integer types no more than 64 bits wide.
4917     // ... If such an argument is shorter than 64 bits, replace it by a full
4918     // 64-bit integer representing the same number, using sign or zero
4919     // extension". Shadow for an integer argument has the same type as the
4920     // argument itself, so it can be sign or zero extended as well.
4921     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
4922     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
4923     if (ZExt) {
4924       assert(!SExt);
4925       return ShadowExtension::Zero;
4926     }
4927     if (SExt) {
4928       assert(!ZExt);
4929       return ShadowExtension::Sign;
4930     }
4931     return ShadowExtension::None;
4932   }
4933 
4934   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4935     bool IsSoftFloatABI = CB.getCalledFunction()
4936                               ->getFnAttribute("use-soft-float")
4937                               .getValueAsString() == "true";
4938     unsigned GpOffset = SystemZGpOffset;
4939     unsigned FpOffset = SystemZFpOffset;
4940     unsigned VrIndex = 0;
4941     unsigned OverflowOffset = SystemZOverflowOffset;
4942     const DataLayout &DL = F.getParent()->getDataLayout();
4943     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4944          ++ArgIt) {
4945       Value *A = *ArgIt;
4946       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4947       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4948       // SystemZABIInfo does not produce ByVal parameters.
4949       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
4950       Type *T = A->getType();
4951       ArgKind AK = classifyArgument(T, IsSoftFloatABI);
4952       if (AK == ArgKind::Indirect) {
4953         T = PointerType::get(T, 0);
4954         AK = ArgKind::GeneralPurpose;
4955       }
4956       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
4957         AK = ArgKind::Memory;
4958       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
4959         AK = ArgKind::Memory;
4960       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
4961         AK = ArgKind::Memory;
4962       Value *ShadowBase = nullptr;
4963       Value *OriginBase = nullptr;
4964       ShadowExtension SE = ShadowExtension::None;
4965       switch (AK) {
4966       case ArgKind::GeneralPurpose: {
4967         // Always keep track of GpOffset, but store shadow only for varargs.
4968         uint64_t ArgSize = 8;
4969         if (GpOffset + ArgSize <= kParamTLSSize) {
4970           if (!IsFixed) {
4971             SE = getShadowExtension(CB, ArgNo);
4972             uint64_t GapSize = 0;
4973             if (SE == ShadowExtension::None) {
4974               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
4975               assert(ArgAllocSize <= ArgSize);
4976               GapSize = ArgSize - ArgAllocSize;
4977             }
4978             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
4979             if (MS.TrackOrigins)
4980               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
4981           }
4982           GpOffset += ArgSize;
4983         } else {
4984           GpOffset = kParamTLSSize;
4985         }
4986         break;
4987       }
4988       case ArgKind::FloatingPoint: {
4989         // Always keep track of FpOffset, but store shadow only for varargs.
4990         uint64_t ArgSize = 8;
4991         if (FpOffset + ArgSize <= kParamTLSSize) {
4992           if (!IsFixed) {
4993             // PoP says: "A short floating-point datum requires only the
4994             // left-most 32 bit positions of a floating-point register".
4995             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
4996             // don't extend shadow and don't mind the gap.
4997             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
4998             if (MS.TrackOrigins)
4999               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5000           }
5001           FpOffset += ArgSize;
5002         } else {
5003           FpOffset = kParamTLSSize;
5004         }
5005         break;
5006       }
5007       case ArgKind::Vector: {
5008         // Keep track of VrIndex. No need to store shadow, since vector varargs
5009         // go through AK_Memory.
5010         assert(IsFixed);
5011         VrIndex++;
5012         break;
5013       }
5014       case ArgKind::Memory: {
5015         // Keep track of OverflowOffset and store shadow only for varargs.
5016         // Ignore fixed args, since we need to copy only the vararg portion of
5017         // the overflow area shadow.
5018         if (!IsFixed) {
5019           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5020           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
5021           if (OverflowOffset + ArgSize <= kParamTLSSize) {
5022             SE = getShadowExtension(CB, ArgNo);
5023             uint64_t GapSize =
5024                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
5025             ShadowBase =
5026                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
5027             if (MS.TrackOrigins)
5028               OriginBase =
5029                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
5030             OverflowOffset += ArgSize;
5031           } else {
5032             OverflowOffset = kParamTLSSize;
5033           }
5034         }
5035         break;
5036       }
5037       case ArgKind::Indirect:
5038         llvm_unreachable("Indirect must be converted to GeneralPurpose");
5039       }
5040       if (ShadowBase == nullptr)
5041         continue;
5042       Value *Shadow = MSV.getShadow(A);
5043       if (SE != ShadowExtension::None)
5044         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
5045                                       /*Signed*/ SE == ShadowExtension::Sign);
5046       ShadowBase = IRB.CreateIntToPtr(
5047           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
5048       IRB.CreateStore(Shadow, ShadowBase);
5049       if (MS.TrackOrigins) {
5050         Value *Origin = MSV.getOrigin(A);
5051         unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
5052         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5053                         kMinOriginAlignment);
5054       }
5055     }
5056     Constant *OverflowSize = ConstantInt::get(
5057         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5058     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5059   }
5060 
5061   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5062     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5063     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5064   }
5065 
5066   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5067     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5068     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5069     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
5070                               "_msarg_va_o");
5071   }
5072 
5073   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5074     IRBuilder<> IRB(&I);
5075     Value *VAListTag = I.getArgOperand(0);
5076     Value *ShadowPtr, *OriginPtr;
5077     const Align Alignment = Align(8);
5078     std::tie(ShadowPtr, OriginPtr) =
5079         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
5080                                /*isStore*/ true);
5081     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5082                      SystemZVAListTagSize, Alignment, false);
5083   }
5084 
5085   void visitVAStartInst(VAStartInst &I) override {
5086     VAStartInstrumentationList.push_back(&I);
5087     unpoisonVAListTagForInst(I);
5088   }
5089 
5090   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
5091 
5092   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5093     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5094     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5095         IRB.CreateAdd(
5096             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5097             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5098         PointerType::get(RegSaveAreaPtrTy, 0));
5099     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5100     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5101     const Align Alignment = Align(8);
5102     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5103         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5104                                /*isStore*/ true);
5105     // TODO(iii): copy only fragments filled by visitCallBase()
5106     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5107                      SystemZRegSaveAreaSize);
5108     if (MS.TrackOrigins)
5109       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5110                        Alignment, SystemZRegSaveAreaSize);
5111   }
5112 
5113   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5114     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5115     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5116         IRB.CreateAdd(
5117             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5118             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5119         PointerType::get(OverflowArgAreaPtrTy, 0));
5120     Value *OverflowArgAreaPtr =
5121         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5122     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5123     const Align Alignment = Align(8);
5124     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5125         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5126                                Alignment, /*isStore*/ true);
5127     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5128                                            SystemZOverflowOffset);
5129     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5130                      VAArgOverflowSize);
5131     if (MS.TrackOrigins) {
5132       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5133                                       SystemZOverflowOffset);
5134       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5135                        VAArgOverflowSize);
5136     }
5137   }
5138 
5139   void finalizeInstrumentation() override {
5140     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5141            "finalizeInstrumentation called twice");
5142     if (!VAStartInstrumentationList.empty()) {
5143       // If there is a va_start in this function, make a backup copy of
5144       // va_arg_tls somewhere in the function entry block.
5145       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
5146       VAArgOverflowSize =
5147           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5148       Value *CopySize =
5149           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5150                         VAArgOverflowSize);
5151       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5152       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
5153       if (MS.TrackOrigins) {
5154         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5155         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
5156                          Align(8), CopySize);
5157       }
5158     }
5159 
5160     // Instrument va_start.
5161     // Copy va_list shadow from the backup copy of the TLS contents.
5162     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5163          VaStartNo < VaStartNum; VaStartNo++) {
5164       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5165       IRBuilder<> IRB(OrigInst->getNextNode());
5166       Value *VAListTag = OrigInst->getArgOperand(0);
5167       copyRegSaveArea(IRB, VAListTag);
5168       copyOverflowArea(IRB, VAListTag);
5169     }
5170   }
5171 };
5172 
5173 /// A no-op implementation of VarArgHelper.
5174 struct VarArgNoOpHelper : public VarArgHelper {
5175   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5176                    MemorySanitizerVisitor &MSV) {}
5177 
5178   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5179 
5180   void visitVAStartInst(VAStartInst &I) override {}
5181 
5182   void visitVACopyInst(VACopyInst &I) override {}
5183 
5184   void finalizeInstrumentation() override {}
5185 };
5186 
5187 } // end anonymous namespace
5188 
5189 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5190                                         MemorySanitizerVisitor &Visitor) {
5191   // VarArg handling is only implemented on AMD64. False positives are possible
5192   // on other platforms.
5193   Triple TargetTriple(Func.getParent()->getTargetTriple());
5194   if (TargetTriple.getArch() == Triple::x86_64)
5195     return new VarArgAMD64Helper(Func, Msan, Visitor);
5196   else if (TargetTriple.isMIPS64())
5197     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5198   else if (TargetTriple.getArch() == Triple::aarch64)
5199     return new VarArgAArch64Helper(Func, Msan, Visitor);
5200   else if (TargetTriple.getArch() == Triple::ppc64 ||
5201            TargetTriple.getArch() == Triple::ppc64le)
5202     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5203   else if (TargetTriple.getArch() == Triple::systemz)
5204     return new VarArgSystemZHelper(Func, Msan, Visitor);
5205   else
5206     return new VarArgNoOpHelper(Func, Msan, Visitor);
5207 }
5208 
5209 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5210   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5211     return false;
5212 
5213   MemorySanitizerVisitor Visitor(F, *this, TLI);
5214 
5215   // Clear out readonly/readnone attributes.
5216   AttrBuilder B;
5217   B.addAttribute(Attribute::ReadOnly)
5218       .addAttribute(Attribute::ReadNone)
5219       .addAttribute(Attribute::WriteOnly)
5220       .addAttribute(Attribute::ArgMemOnly)
5221       .addAttribute(Attribute::Speculatable);
5222   F.removeAttributes(AttributeList::FunctionIndex, B);
5223 
5224   return Visitor.runOnFunction();
5225 }
5226