1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Analysis/PostDominators.h"
21 #include "llvm/Analysis/StackSafetyAnalysis.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/BinaryFormat/Dwarf.h"
24 #include "llvm/BinaryFormat/ELF.h"
25 #include "llvm/IR/Attributes.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constant.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/InlineAsm.h"
36 #include "llvm/IR/InstIterator.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/MDBuilder.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/InitializePasses.h"
47 #include "llvm/Pass.h"
48 #include "llvm/PassRegistry.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
54 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
55 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
56 #include "llvm/Transforms/Utils/ModuleUtils.h"
57 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
58 
59 using namespace llvm;
60 
61 #define DEBUG_TYPE "hwasan"
62 
63 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
64 const char kHwasanNoteName[] = "hwasan.note";
65 const char kHwasanInitName[] = "__hwasan_init";
66 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
67 
68 const char kHwasanShadowMemoryDynamicAddress[] =
69     "__hwasan_shadow_memory_dynamic_address";
70 
71 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
72 static const size_t kNumberOfAccessSizes = 5;
73 
74 static const size_t kDefaultShadowScale = 4;
75 static const uint64_t kDynamicShadowSentinel =
76     std::numeric_limits<uint64_t>::max();
77 
78 static const unsigned kShadowBaseAlignment = 32;
79 
80 static cl::opt<std::string>
81     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
82                                  cl::desc("Prefix for memory access callbacks"),
83                                  cl::Hidden, cl::init("__hwasan_"));
84 
85 static cl::opt<bool> ClInstrumentWithCalls(
86     "hwasan-instrument-with-calls",
87     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
88     cl::init(false));
89 
90 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
91                                        cl::desc("instrument read instructions"),
92                                        cl::Hidden, cl::init(true));
93 
94 static cl::opt<bool>
95     ClInstrumentWrites("hwasan-instrument-writes",
96                        cl::desc("instrument write instructions"), cl::Hidden,
97                        cl::init(true));
98 
99 static cl::opt<bool> ClInstrumentAtomics(
100     "hwasan-instrument-atomics",
101     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
102     cl::init(true));
103 
104 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
105                                        cl::desc("instrument byval arguments"),
106                                        cl::Hidden, cl::init(true));
107 
108 static cl::opt<bool>
109     ClRecover("hwasan-recover",
110               cl::desc("Enable recovery mode (continue-after-error)."),
111               cl::Hidden, cl::init(false));
112 
113 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
114                                        cl::desc("instrument stack (allocas)"),
115                                        cl::Hidden, cl::init(true));
116 
117 static cl::opt<bool>
118     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
119                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
120                      cl::Optional);
121 
122 static cl::opt<size_t> ClMaxLifetimes(
123     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
124     cl::ReallyHidden,
125     cl::desc("How many lifetime ends to handle for a single alloca."),
126     cl::Optional);
127 
128 static cl::opt<bool>
129     ClUseAfterScope("hwasan-use-after-scope",
130                     cl::desc("detect use after scope within function"),
131                     cl::Hidden, cl::init(false));
132 
133 static cl::opt<bool> ClUARRetagToZero(
134     "hwasan-uar-retag-to-zero",
135     cl::desc("Clear alloca tags before returning from the function to allow "
136              "non-instrumented and instrumented function calls mix. When set "
137              "to false, allocas are retagged before returning from the "
138              "function to detect use after return."),
139     cl::Hidden, cl::init(true));
140 
141 static cl::opt<bool> ClGenerateTagsWithCalls(
142     "hwasan-generate-tags-with-calls",
143     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
144     cl::init(false));
145 
146 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
147                                cl::Hidden, cl::init(false), cl::ZeroOrMore);
148 
149 static cl::opt<int> ClMatchAllTag(
150     "hwasan-match-all-tag",
151     cl::desc("don't report bad accesses via pointers with this tag"),
152     cl::Hidden, cl::init(-1));
153 
154 static cl::opt<bool>
155     ClEnableKhwasan("hwasan-kernel",
156                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
157                     cl::Hidden, cl::init(false));
158 
159 // These flags allow to change the shadow mapping and control how shadow memory
160 // is accessed. The shadow mapping looks like:
161 //    Shadow = (Mem >> scale) + offset
162 
163 static cl::opt<uint64_t>
164     ClMappingOffset("hwasan-mapping-offset",
165                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
166                     cl::Hidden, cl::init(0));
167 
168 static cl::opt<bool>
169     ClWithIfunc("hwasan-with-ifunc",
170                 cl::desc("Access dynamic shadow through an ifunc global on "
171                          "platforms that support this"),
172                 cl::Hidden, cl::init(false));
173 
174 static cl::opt<bool> ClWithTls(
175     "hwasan-with-tls",
176     cl::desc("Access dynamic shadow through an thread-local pointer on "
177              "platforms that support this"),
178     cl::Hidden, cl::init(true));
179 
180 static cl::opt<bool>
181     ClRecordStackHistory("hwasan-record-stack-history",
182                          cl::desc("Record stack frames with tagged allocations "
183                                   "in a thread-local ring buffer"),
184                          cl::Hidden, cl::init(true));
185 static cl::opt<bool>
186     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
187                               cl::desc("instrument memory intrinsics"),
188                               cl::Hidden, cl::init(true));
189 
190 static cl::opt<bool>
191     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
192                             cl::desc("instrument landing pads"), cl::Hidden,
193                             cl::init(false), cl::ZeroOrMore);
194 
195 static cl::opt<bool> ClUseShortGranules(
196     "hwasan-use-short-granules",
197     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
198     cl::init(false), cl::ZeroOrMore);
199 
200 static cl::opt<bool> ClInstrumentPersonalityFunctions(
201     "hwasan-instrument-personality-functions",
202     cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
203     cl::ZeroOrMore);
204 
205 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
206                                        cl::desc("inline all checks"),
207                                        cl::Hidden, cl::init(false));
208 
209 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
210 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
211                                       cl::desc("Use page aliasing in HWASan"),
212                                       cl::Hidden, cl::init(false));
213 
214 namespace {
215 
216 bool shouldUsePageAliases(const Triple &TargetTriple) {
217   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
218 }
219 
220 bool shouldInstrumentStack(const Triple &TargetTriple) {
221   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
222 }
223 
224 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
225   return ClInstrumentWithCalls || TargetTriple.getArch() == Triple::x86_64;
226 }
227 
228 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
229   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
230                                               : !DisableOptimization;
231 }
232 
233 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
234                                   bool DisableOptimization) {
235   return shouldInstrumentStack(TargetTriple) &&
236          mightUseStackSafetyAnalysis(DisableOptimization);
237 }
238 
239 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
240   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
241 }
242 
243 /// An instrumentation pass implementing detection of addressability bugs
244 /// using tagged pointers.
245 class HWAddressSanitizer {
246 public:
247   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
248                      const StackSafetyGlobalInfo *SSI)
249       : M(M), SSI(SSI) {
250     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
251     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
252                               ? ClEnableKhwasan
253                               : CompileKernel;
254 
255     initializeModule();
256   }
257 
258   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
259 
260   bool sanitizeFunction(Function &F,
261                         llvm::function_ref<const DominatorTree &()> GetDT,
262                         llvm::function_ref<const PostDominatorTree &()> GetPDT);
263   void initializeModule();
264   void createHwasanCtorComdat();
265 
266   void initializeCallbacks(Module &M);
267 
268   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
269 
270   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
271   Value *getShadowNonTls(IRBuilder<> &IRB);
272 
273   void untagPointerOperand(Instruction *I, Value *Addr);
274   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
275 
276   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
277   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
278                                   unsigned AccessSizeIndex,
279                                   Instruction *InsertBefore);
280   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
281                                  unsigned AccessSizeIndex,
282                                  Instruction *InsertBefore);
283   bool ignoreMemIntrinsic(MemIntrinsic *MI);
284   void instrumentMemIntrinsic(MemIntrinsic *MI);
285   bool instrumentMemAccess(InterestingMemoryOperand &O);
286   bool ignoreAccess(Instruction *Inst, Value *Ptr);
287   void getInterestingMemoryOperands(
288       Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
289 
290   bool isInterestingAlloca(const AllocaInst &AI);
291   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
292   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
293   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
294   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag,
295                        llvm::function_ref<const DominatorTree &()> GetDT,
296                        llvm::function_ref<const PostDominatorTree &()> GetPDT);
297   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
298   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
299   Value *getNextTagWithCall(IRBuilder<> &IRB);
300   Value *getStackBaseTag(IRBuilder<> &IRB);
301   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
302                       unsigned AllocaNo);
303   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
304 
305   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
306   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
307   unsigned retagMask(unsigned AllocaNo);
308 
309   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
310 
311   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
312   void instrumentGlobals();
313 
314   void instrumentPersonalityFunctions();
315 
316 private:
317   LLVMContext *C;
318   Module &M;
319   const StackSafetyGlobalInfo *SSI;
320   Triple TargetTriple;
321   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
322   FunctionCallee HWAsanHandleVfork;
323 
324   /// This struct defines the shadow mapping using the rule:
325   ///   shadow = (mem >> Scale) + Offset.
326   /// If InGlobal is true, then
327   ///   extern char __hwasan_shadow[];
328   ///   shadow = (mem >> Scale) + &__hwasan_shadow
329   /// If InTls is true, then
330   ///   extern char *__hwasan_tls;
331   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
332   ///
333   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
334   /// ring buffer for storing stack allocations on targets that support it.
335   struct ShadowMapping {
336     int Scale;
337     uint64_t Offset;
338     bool InGlobal;
339     bool InTls;
340     bool WithFrameRecord;
341 
342     void init(Triple &TargetTriple, bool InstrumentWithCalls);
343     uint64_t getObjectAlignment() const { return 1ULL << Scale; }
344   };
345 
346   ShadowMapping Mapping;
347 
348   Type *VoidTy = Type::getVoidTy(M.getContext());
349   Type *IntptrTy;
350   Type *Int8PtrTy;
351   Type *Int8Ty;
352   Type *Int32Ty;
353   Type *Int64Ty = Type::getInt64Ty(M.getContext());
354 
355   bool CompileKernel;
356   bool Recover;
357   bool OutlinedChecks;
358   bool UseShortGranules;
359   bool InstrumentLandingPads;
360   bool InstrumentWithCalls;
361   bool InstrumentStack;
362   bool DetectUseAfterScope;
363   bool UsePageAliases;
364 
365   bool HasMatchAllTag = false;
366   uint8_t MatchAllTag = 0;
367 
368   unsigned PointerTagShift;
369   uint64_t TagMaskByte;
370 
371   Function *HwasanCtorFunction;
372 
373   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
374   FunctionCallee HwasanMemoryAccessCallbackSized[2];
375 
376   FunctionCallee HwasanTagMemoryFunc;
377   FunctionCallee HwasanGenerateTagFunc;
378 
379   Constant *ShadowGlobal;
380 
381   Value *ShadowBase = nullptr;
382   Value *StackBaseTag = nullptr;
383   GlobalValue *ThreadPtrGlobal = nullptr;
384 };
385 
386 class HWAddressSanitizerLegacyPass : public FunctionPass {
387 public:
388   // Pass identification, replacement for typeid.
389   static char ID;
390 
391   explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
392                                         bool Recover = false,
393                                         bool DisableOptimization = false)
394       : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
395         DisableOptimization(DisableOptimization) {
396     initializeHWAddressSanitizerLegacyPassPass(
397         *PassRegistry::getPassRegistry());
398   }
399 
400   StringRef getPassName() const override { return "HWAddressSanitizer"; }
401 
402   bool doInitialization(Module &M) override {
403     HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover,
404                                                   /*SSI=*/nullptr);
405     return true;
406   }
407 
408   bool runOnFunction(Function &F) override {
409     auto TargetTriple = Triple(F.getParent()->getTargetTriple());
410     if (shouldUseStackSafetyAnalysis(TargetTriple, DisableOptimization)) {
411       // We cannot call getAnalysis in doInitialization, that would cause a
412       // crash as the required analyses are not initialized yet.
413       HWASan->setSSI(
414           &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult());
415     }
416     return HWASan->sanitizeFunction(
417         F,
418         [&]() -> const DominatorTree & {
419           return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
420         },
421         [&]() -> const PostDominatorTree & {
422           return getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
423         });
424   }
425 
426   bool doFinalization(Module &M) override {
427     HWASan.reset();
428     return false;
429   }
430 
431   void getAnalysisUsage(AnalysisUsage &AU) const override {
432     // This is an over-estimation of, in case we are building for an
433     // architecture that doesn't allow stack tagging we will still load the
434     // analysis.
435     // This is so we don't need to plumb TargetTriple all the way to here.
436     if (mightUseStackSafetyAnalysis(DisableOptimization))
437       AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
438     AU.addRequired<DominatorTreeWrapperPass>();
439     AU.addRequired<PostDominatorTreeWrapperPass>();
440   }
441 
442 private:
443   std::unique_ptr<HWAddressSanitizer> HWASan;
444   bool CompileKernel;
445   bool Recover;
446   bool DisableOptimization;
447 };
448 
449 } // end anonymous namespace
450 
451 char HWAddressSanitizerLegacyPass::ID = 0;
452 
453 INITIALIZE_PASS_BEGIN(
454     HWAddressSanitizerLegacyPass, "hwasan",
455     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
456     false)
457 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
458 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
459 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
460 INITIALIZE_PASS_END(
461     HWAddressSanitizerLegacyPass, "hwasan",
462     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
463     false)
464 
465 FunctionPass *
466 llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, bool Recover,
467                                              bool DisableOptimization) {
468   assert(!CompileKernel || Recover);
469   return new HWAddressSanitizerLegacyPass(CompileKernel, Recover,
470                                           DisableOptimization);
471 }
472 
473 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
474                                               ModuleAnalysisManager &MAM) {
475   const StackSafetyGlobalInfo *SSI = nullptr;
476   auto TargetTriple = llvm::Triple(M.getTargetTriple());
477   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
478     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
479 
480   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
481   bool Modified = false;
482   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
483   for (Function &F : M) {
484     Modified |= HWASan.sanitizeFunction(
485         F,
486         [&]() -> const DominatorTree & {
487           return FAM.getResult<DominatorTreeAnalysis>(F);
488         },
489         [&]() -> const PostDominatorTree & {
490           return FAM.getResult<PostDominatorTreeAnalysis>(F);
491         });
492   }
493   if (Modified)
494     return PreservedAnalyses::none();
495   return PreservedAnalyses::all();
496 }
497 void HWAddressSanitizerPass::printPipeline(
498     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
499   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
500       OS, MapClassName2PassName);
501   OS << "<";
502   if (Options.CompileKernel)
503     OS << "kernel;";
504   if (Options.Recover)
505     OS << "recover";
506   OS << ">";
507 }
508 
509 void HWAddressSanitizer::createHwasanCtorComdat() {
510   std::tie(HwasanCtorFunction, std::ignore) =
511       getOrCreateSanitizerCtorAndInitFunctions(
512           M, kHwasanModuleCtorName, kHwasanInitName,
513           /*InitArgTypes=*/{},
514           /*InitArgs=*/{},
515           // This callback is invoked when the functions are created the first
516           // time. Hook them into the global ctors list in that case:
517           [&](Function *Ctor, FunctionCallee) {
518             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
519             Ctor->setComdat(CtorComdat);
520             appendToGlobalCtors(M, Ctor, 0, Ctor);
521           });
522 
523   // Create a note that contains pointers to the list of global
524   // descriptors. Adding a note to the output file will cause the linker to
525   // create a PT_NOTE program header pointing to the note that we can use to
526   // find the descriptor list starting from the program headers. A function
527   // provided by the runtime initializes the shadow memory for the globals by
528   // accessing the descriptor list via the note. The dynamic loader needs to
529   // call this function whenever a library is loaded.
530   //
531   // The reason why we use a note for this instead of a more conventional
532   // approach of having a global constructor pass a descriptor list pointer to
533   // the runtime is because of an order of initialization problem. With
534   // constructors we can encounter the following problematic scenario:
535   //
536   // 1) library A depends on library B and also interposes one of B's symbols
537   // 2) B's constructors are called before A's (as required for correctness)
538   // 3) during construction, B accesses one of its "own" globals (actually
539   //    interposed by A) and triggers a HWASAN failure due to the initialization
540   //    for A not having happened yet
541   //
542   // Even without interposition it is possible to run into similar situations in
543   // cases where two libraries mutually depend on each other.
544   //
545   // We only need one note per binary, so put everything for the note in a
546   // comdat. This needs to be a comdat with an .init_array section to prevent
547   // newer versions of lld from discarding the note.
548   //
549   // Create the note even if we aren't instrumenting globals. This ensures that
550   // binaries linked from object files with both instrumented and
551   // non-instrumented globals will end up with a note, even if a comdat from an
552   // object file with non-instrumented globals is selected. The note is harmless
553   // if the runtime doesn't support it, since it will just be ignored.
554   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
555 
556   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
557   auto Start =
558       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
559                          nullptr, "__start_hwasan_globals");
560   Start->setVisibility(GlobalValue::HiddenVisibility);
561   Start->setDSOLocal(true);
562   auto Stop =
563       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
564                          nullptr, "__stop_hwasan_globals");
565   Stop->setVisibility(GlobalValue::HiddenVisibility);
566   Stop->setDSOLocal(true);
567 
568   // Null-terminated so actually 8 bytes, which are required in order to align
569   // the note properly.
570   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
571 
572   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
573                                  Int32Ty, Int32Ty);
574   auto *Note =
575       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
576                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
577   Note->setSection(".note.hwasan.globals");
578   Note->setComdat(NoteComdat);
579   Note->setAlignment(Align(4));
580   Note->setDSOLocal(true);
581 
582   // The pointers in the note need to be relative so that the note ends up being
583   // placed in rodata, which is the standard location for notes.
584   auto CreateRelPtr = [&](Constant *Ptr) {
585     return ConstantExpr::getTrunc(
586         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
587                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
588         Int32Ty);
589   };
590   Note->setInitializer(ConstantStruct::getAnon(
591       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
592        ConstantInt::get(Int32Ty, 8),                           // n_descsz
593        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
594        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
595   appendToCompilerUsed(M, Note);
596 
597   // Create a zero-length global in hwasan_globals so that the linker will
598   // always create start and stop symbols.
599   auto Dummy = new GlobalVariable(
600       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
601       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
602   Dummy->setSection("hwasan_globals");
603   Dummy->setComdat(NoteComdat);
604   Dummy->setMetadata(LLVMContext::MD_associated,
605                      MDNode::get(*C, ValueAsMetadata::get(Note)));
606   appendToCompilerUsed(M, Dummy);
607 }
608 
609 /// Module-level initialization.
610 ///
611 /// inserts a call to __hwasan_init to the module's constructor list.
612 void HWAddressSanitizer::initializeModule() {
613   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
614   auto &DL = M.getDataLayout();
615 
616   TargetTriple = Triple(M.getTargetTriple());
617 
618   // x86_64 currently has two modes:
619   // - Intel LAM (default)
620   // - pointer aliasing (heap only)
621   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
622   UsePageAliases = shouldUsePageAliases(TargetTriple);
623   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
624   InstrumentStack = shouldInstrumentStack(TargetTriple);
625   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
626   PointerTagShift = IsX86_64 ? 57 : 56;
627   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
628 
629   Mapping.init(TargetTriple, InstrumentWithCalls);
630 
631   C = &(M.getContext());
632   IRBuilder<> IRB(*C);
633   IntptrTy = IRB.getIntPtrTy(DL);
634   Int8PtrTy = IRB.getInt8PtrTy();
635   Int8Ty = IRB.getInt8Ty();
636   Int32Ty = IRB.getInt32Ty();
637 
638   HwasanCtorFunction = nullptr;
639 
640   // Older versions of Android do not have the required runtime support for
641   // short granules, global or personality function instrumentation. On other
642   // platforms we currently require using the latest version of the runtime.
643   bool NewRuntime =
644       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
645 
646   UseShortGranules =
647       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
648   OutlinedChecks =
649       TargetTriple.isAArch64() && TargetTriple.isOSBinFormatELF() &&
650       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
651 
652   if (ClMatchAllTag.getNumOccurrences()) {
653     if (ClMatchAllTag != -1) {
654       HasMatchAllTag = true;
655       MatchAllTag = ClMatchAllTag & 0xFF;
656     }
657   } else if (CompileKernel) {
658     HasMatchAllTag = true;
659     MatchAllTag = 0xFF;
660   }
661 
662   // If we don't have personality function support, fall back to landing pads.
663   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
664                               ? ClInstrumentLandingPads
665                               : !NewRuntime;
666 
667   if (!CompileKernel) {
668     createHwasanCtorComdat();
669     bool InstrumentGlobals =
670         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
671 
672     if (InstrumentGlobals && !UsePageAliases)
673       instrumentGlobals();
674 
675     bool InstrumentPersonalityFunctions =
676         ClInstrumentPersonalityFunctions.getNumOccurrences()
677             ? ClInstrumentPersonalityFunctions
678             : NewRuntime;
679     if (InstrumentPersonalityFunctions)
680       instrumentPersonalityFunctions();
681   }
682 
683   if (!TargetTriple.isAndroid()) {
684     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
685       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
686                                     GlobalValue::ExternalLinkage, nullptr,
687                                     "__hwasan_tls", nullptr,
688                                     GlobalVariable::InitialExecTLSModel);
689       appendToCompilerUsed(M, GV);
690       return GV;
691     });
692     ThreadPtrGlobal = cast<GlobalVariable>(C);
693   }
694 }
695 
696 void HWAddressSanitizer::initializeCallbacks(Module &M) {
697   IRBuilder<> IRB(*C);
698   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
699     const std::string TypeStr = AccessIsWrite ? "store" : "load";
700     const std::string EndingStr = Recover ? "_noabort" : "";
701 
702     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
703         ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
704         FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
705 
706     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
707          AccessSizeIndex++) {
708       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
709           M.getOrInsertFunction(
710               ClMemoryAccessCallbackPrefix + TypeStr +
711                   itostr(1ULL << AccessSizeIndex) + EndingStr,
712               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
713     }
714   }
715 
716   HwasanTagMemoryFunc = M.getOrInsertFunction(
717       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
718   HwasanGenerateTagFunc =
719       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
720 
721   ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
722                                      ArrayType::get(IRB.getInt8Ty(), 0));
723 
724   const std::string MemIntrinCallbackPrefix =
725       CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
726   HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
727                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
728                                         IRB.getInt8PtrTy(), IntptrTy);
729   HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
730                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
731                                        IRB.getInt8PtrTy(), IntptrTy);
732   HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
733                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
734                                        IRB.getInt32Ty(), IntptrTy);
735 
736   HWAsanHandleVfork =
737       M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
738 }
739 
740 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
741   // An empty inline asm with input reg == output reg.
742   // An opaque no-op cast, basically.
743   // This prevents code bloat as a result of rematerializing trivial definitions
744   // such as constants or global addresses at every load and store.
745   InlineAsm *Asm =
746       InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
747                      StringRef(""), StringRef("=r,0"),
748                      /*hasSideEffects=*/false);
749   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
750 }
751 
752 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
753   return getOpaqueNoopCast(IRB, ShadowGlobal);
754 }
755 
756 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
757   if (Mapping.Offset != kDynamicShadowSentinel)
758     return getOpaqueNoopCast(
759         IRB, ConstantExpr::getIntToPtr(
760                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
761 
762   if (Mapping.InGlobal) {
763     return getDynamicShadowIfunc(IRB);
764   } else {
765     Value *GlobalDynamicAddress =
766         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
767             kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
768     return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
769   }
770 }
771 
772 bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
773   // Do not instrument acesses from different address spaces; we cannot deal
774   // with them.
775   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
776   if (PtrTy->getPointerAddressSpace() != 0)
777     return true;
778 
779   // Ignore swifterror addresses.
780   // swifterror memory addresses are mem2reg promoted by instruction
781   // selection. As such they cannot have regular uses like an instrumentation
782   // function and it makes no sense to track them as memory.
783   if (Ptr->isSwiftError())
784     return true;
785 
786   if (findAllocaForValue(Ptr)) {
787     if (!InstrumentStack)
788       return true;
789     if (SSI && SSI->stackAccessIsSafe(*Inst))
790       return true;
791   }
792   return false;
793 }
794 
795 void HWAddressSanitizer::getInterestingMemoryOperands(
796     Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
797   // Skip memory accesses inserted by another instrumentation.
798   if (I->hasMetadata("nosanitize"))
799     return;
800 
801   // Do not instrument the load fetching the dynamic shadow address.
802   if (ShadowBase == I)
803     return;
804 
805   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
806     if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
807       return;
808     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
809                              LI->getType(), LI->getAlign());
810   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
811     if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
812       return;
813     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
814                              SI->getValueOperand()->getType(), SI->getAlign());
815   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
816     if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
817       return;
818     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
819                              RMW->getValOperand()->getType(), None);
820   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
821     if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
822       return;
823     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
824                              XCHG->getCompareOperand()->getType(), None);
825   } else if (auto CI = dyn_cast<CallInst>(I)) {
826     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
827       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
828           ignoreAccess(I, CI->getArgOperand(ArgNo)))
829         continue;
830       Type *Ty = CI->getParamByValType(ArgNo);
831       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
832     }
833   }
834 }
835 
836 static unsigned getPointerOperandIndex(Instruction *I) {
837   if (LoadInst *LI = dyn_cast<LoadInst>(I))
838     return LI->getPointerOperandIndex();
839   if (StoreInst *SI = dyn_cast<StoreInst>(I))
840     return SI->getPointerOperandIndex();
841   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
842     return RMW->getPointerOperandIndex();
843   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
844     return XCHG->getPointerOperandIndex();
845   report_fatal_error("Unexpected instruction");
846   return -1;
847 }
848 
849 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
850   size_t Res = countTrailingZeros(TypeSize / 8);
851   assert(Res < kNumberOfAccessSizes);
852   return Res;
853 }
854 
855 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
856   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64)
857     return;
858 
859   IRBuilder<> IRB(I);
860   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
861   Value *UntaggedPtr =
862       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
863   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
864 }
865 
866 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
867   // Mem >> Scale
868   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
869   if (Mapping.Offset == 0)
870     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
871   // (Mem >> Scale) + Offset
872   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
873 }
874 
875 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
876                                           unsigned AccessSizeIndex) {
877   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
878          (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
879          (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
880          (Recover << HWASanAccessInfo::RecoverShift) +
881          (IsWrite << HWASanAccessInfo::IsWriteShift) +
882          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
883 }
884 
885 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
886                                                     unsigned AccessSizeIndex,
887                                                     Instruction *InsertBefore) {
888   assert(!UsePageAliases);
889   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
890   IRBuilder<> IRB(InsertBefore);
891   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
892   Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
893   IRB.CreateCall(Intrinsic::getDeclaration(
894                      M, UseShortGranules
895                             ? Intrinsic::hwasan_check_memaccess_shortgranules
896                             : Intrinsic::hwasan_check_memaccess),
897                  {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
898 }
899 
900 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
901                                                    unsigned AccessSizeIndex,
902                                                    Instruction *InsertBefore) {
903   assert(!UsePageAliases);
904   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
905   IRBuilder<> IRB(InsertBefore);
906 
907   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
908   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
909                                   IRB.getInt8Ty());
910   Value *AddrLong = untagPointer(IRB, PtrLong);
911   Value *Shadow = memToShadow(AddrLong, IRB);
912   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
913   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
914 
915   if (HasMatchAllTag) {
916     Value *TagNotIgnored = IRB.CreateICmpNE(
917         PtrTag, ConstantInt::get(PtrTag->getType(), MatchAllTag));
918     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
919   }
920 
921   Instruction *CheckTerm =
922       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
923                                 MDBuilder(*C).createBranchWeights(1, 100000));
924 
925   IRB.SetInsertPoint(CheckTerm);
926   Value *OutOfShortGranuleTagRange =
927       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
928   Instruction *CheckFailTerm =
929       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
930                                 MDBuilder(*C).createBranchWeights(1, 100000));
931 
932   IRB.SetInsertPoint(CheckTerm);
933   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
934   PtrLowBits = IRB.CreateAdd(
935       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
936   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
937   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
938                             MDBuilder(*C).createBranchWeights(1, 100000),
939                             (DomTreeUpdater *)nullptr, nullptr,
940                             CheckFailTerm->getParent());
941 
942   IRB.SetInsertPoint(CheckTerm);
943   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
944   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
945   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
946   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
947   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
948                             MDBuilder(*C).createBranchWeights(1, 100000),
949                             (DomTreeUpdater *)nullptr, nullptr,
950                             CheckFailTerm->getParent());
951 
952   IRB.SetInsertPoint(CheckFailTerm);
953   InlineAsm *Asm;
954   switch (TargetTriple.getArch()) {
955   case Triple::x86_64:
956     // The signal handler will find the data address in rdi.
957     Asm = InlineAsm::get(
958         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
959         "int3\nnopl " +
960             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
961             "(%rax)",
962         "{rdi}",
963         /*hasSideEffects=*/true);
964     break;
965   case Triple::aarch64:
966   case Triple::aarch64_be:
967     // The signal handler will find the data address in x0.
968     Asm = InlineAsm::get(
969         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
970         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
971         "{x0}",
972         /*hasSideEffects=*/true);
973     break;
974   default:
975     report_fatal_error("unsupported architecture");
976   }
977   IRB.CreateCall(Asm, PtrLong);
978   if (Recover)
979     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
980 }
981 
982 bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
983   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
984     return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
985            (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
986   }
987   if (isa<MemSetInst>(MI))
988     return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
989   return false;
990 }
991 
992 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
993   IRBuilder<> IRB(MI);
994   if (isa<MemTransferInst>(MI)) {
995     IRB.CreateCall(
996         isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
997         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
998          IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
999          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
1000   } else if (isa<MemSetInst>(MI)) {
1001     IRB.CreateCall(
1002         HWAsanMemset,
1003         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
1004          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1005          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
1006   }
1007   MI->eraseFromParent();
1008 }
1009 
1010 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
1011   Value *Addr = O.getPtr();
1012 
1013   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1014 
1015   if (O.MaybeMask)
1016     return false; // FIXME
1017 
1018   IRBuilder<> IRB(O.getInsn());
1019   if (isPowerOf2_64(O.TypeSize) &&
1020       (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1021       (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) ||
1022        *O.Alignment >= O.TypeSize / 8)) {
1023     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
1024     if (InstrumentWithCalls) {
1025       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1026                      IRB.CreatePointerCast(Addr, IntptrTy));
1027     } else if (OutlinedChecks) {
1028       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
1029     } else {
1030       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
1031     }
1032   } else {
1033     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
1034                    {IRB.CreatePointerCast(Addr, IntptrTy),
1035                     ConstantInt::get(IntptrTy, O.TypeSize / 8)});
1036   }
1037   untagPointerOperand(O.getInsn(), Addr);
1038 
1039   return true;
1040 }
1041 
1042 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1043                                    size_t Size) {
1044   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1045   if (!UseShortGranules)
1046     Size = AlignedSize;
1047 
1048   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
1049   if (InstrumentWithCalls) {
1050     IRB.CreateCall(HwasanTagMemoryFunc,
1051                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
1052                     ConstantInt::get(IntptrTy, AlignedSize)});
1053   } else {
1054     size_t ShadowSize = Size >> Mapping.Scale;
1055     Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
1056     // If this memset is not inlined, it will be intercepted in the hwasan
1057     // runtime library. That's OK, because the interceptor skips the checks if
1058     // the address is in the shadow region.
1059     // FIXME: the interceptor is not as fast as real memset. Consider lowering
1060     // llvm.memset right here into either a sequence of stores, or a call to
1061     // hwasan_tag_memory.
1062     if (ShadowSize)
1063       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
1064     if (Size != AlignedSize) {
1065       IRB.CreateStore(
1066           ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
1067           IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1068       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
1069                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
1070                                    AlignedSize - 1));
1071     }
1072   }
1073 }
1074 
1075 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1076   if (TargetTriple.getArch() == Triple::x86_64)
1077     return AllocaNo & TagMaskByte;
1078 
1079   // A list of 8-bit numbers that have at most one run of non-zero bits.
1080   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1081   // masks.
1082   // The list does not include the value 255, which is used for UAR.
1083   //
1084   // Because we are more likely to use earlier elements of this list than later
1085   // ones, it is sorted in increasing order of probability of collision with a
1086   // mask allocated (temporally) nearby. The program that generated this list
1087   // can be found at:
1088   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1089   static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
1090                                  48, 16,  120, 248, 56,  24,  8,   124, 252,
1091                                  60, 28,  12,  4,   126, 254, 62,  30,  14,
1092                                  6,  2,   127, 63,  31,  15,  7,   3,   1};
1093   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
1094 }
1095 
1096 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1097   if (TargetTriple.getArch() == Triple::x86_64) {
1098     Constant *TagMask = ConstantInt::get(IntptrTy, TagMaskByte);
1099     Value *NewTag = IRB.CreateAnd(OldTag, TagMask);
1100     return NewTag;
1101   }
1102   // aarch64 uses 8-bit tags, so no mask is needed.
1103   return OldTag;
1104 }
1105 
1106 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1107   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1108 }
1109 
1110 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1111   if (ClGenerateTagsWithCalls)
1112     return getNextTagWithCall(IRB);
1113   if (StackBaseTag)
1114     return StackBaseTag;
1115   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1116   // first).
1117   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1118   auto GetStackPointerFn = Intrinsic::getDeclaration(
1119       M, Intrinsic::frameaddress,
1120       IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1121   Value *StackPointer = IRB.CreateCall(
1122       GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
1123 
1124   // Extract some entropy from the stack pointer for the tags.
1125   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1126   // between functions).
1127   Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
1128   Value *StackTag =
1129       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1130                                       IRB.CreateLShr(StackPointerLong, 20)));
1131   StackTag->setName("hwasan.stack.base.tag");
1132   return StackTag;
1133 }
1134 
1135 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1136                                         AllocaInst *AI, unsigned AllocaNo) {
1137   if (ClGenerateTagsWithCalls)
1138     return getNextTagWithCall(IRB);
1139   return IRB.CreateXor(StackTag,
1140                        ConstantInt::get(IntptrTy, retagMask(AllocaNo)));
1141 }
1142 
1143 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
1144   if (ClUARRetagToZero)
1145     return ConstantInt::get(IntptrTy, 0);
1146   if (ClGenerateTagsWithCalls)
1147     return getNextTagWithCall(IRB);
1148   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, TagMaskByte));
1149 }
1150 
1151 // Add a tag to an address.
1152 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1153                                       Value *PtrLong, Value *Tag) {
1154   assert(!UsePageAliases);
1155   Value *TaggedPtrLong;
1156   if (CompileKernel) {
1157     // Kernel addresses have 0xFF in the most significant byte.
1158     Value *ShiftedTag =
1159         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1160                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1161     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1162   } else {
1163     // Userspace can simply do OR (tag << PointerTagShift);
1164     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1165     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1166   }
1167   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1168 }
1169 
1170 // Remove tag from an address.
1171 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1172   assert(!UsePageAliases);
1173   Value *UntaggedPtrLong;
1174   if (CompileKernel) {
1175     // Kernel addresses have 0xFF in the most significant byte.
1176     UntaggedPtrLong =
1177         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1178                                                0xFFULL << PointerTagShift));
1179   } else {
1180     // Userspace addresses have 0x00.
1181     UntaggedPtrLong =
1182         IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
1183                                                 ~(0xFFULL << PointerTagShift)));
1184   }
1185   return UntaggedPtrLong;
1186 }
1187 
1188 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1189   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1190   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1191     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1192     // in Bionic's libc/private/bionic_tls.h.
1193     Function *ThreadPointerFunc =
1194         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1195     Value *SlotPtr = IRB.CreatePointerCast(
1196         IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
1197                                IRB.CreateCall(ThreadPointerFunc), 0x30),
1198         Ty->getPointerTo(0));
1199     return SlotPtr;
1200   }
1201   if (ThreadPtrGlobal)
1202     return ThreadPtrGlobal;
1203 
1204   return nullptr;
1205 }
1206 
1207 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1208   if (!Mapping.InTls)
1209     ShadowBase = getShadowNonTls(IRB);
1210   else if (!WithFrameRecord && TargetTriple.isAndroid())
1211     ShadowBase = getDynamicShadowIfunc(IRB);
1212 
1213   if (!WithFrameRecord && ShadowBase)
1214     return;
1215 
1216   Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1217   assert(SlotPtr);
1218 
1219   Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1220   // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
1221   Value *ThreadLongMaybeUntagged =
1222       TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
1223 
1224   if (WithFrameRecord) {
1225     Function *F = IRB.GetInsertBlock()->getParent();
1226     StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1227 
1228     // Prepare ring buffer data.
1229     Value *PC;
1230     if (TargetTriple.getArch() == Triple::aarch64)
1231       PC = readRegister(IRB, "pc");
1232     else
1233       PC = IRB.CreatePtrToInt(F, IntptrTy);
1234     Module *M = F->getParent();
1235     auto GetStackPointerFn = Intrinsic::getDeclaration(
1236         M, Intrinsic::frameaddress,
1237         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1238     Value *SP = IRB.CreatePtrToInt(
1239         IRB.CreateCall(GetStackPointerFn,
1240                        {Constant::getNullValue(IRB.getInt32Ty())}),
1241         IntptrTy);
1242     // Mix SP and PC.
1243     // Assumptions:
1244     // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1245     // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1246     // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1247     //       0xSSSSPPPPPPPPPPPP
1248     SP = IRB.CreateShl(SP, 44);
1249 
1250     // Store data to ring buffer.
1251     Value *RecordPtr =
1252         IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
1253     IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
1254 
1255     // Update the ring buffer. Top byte of ThreadLong defines the size of the
1256     // buffer in pages, it must be a power of two, and the start of the buffer
1257     // must be aligned by twice that much. Therefore wrap around of the ring
1258     // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1259     // The use of AShr instead of LShr is due to
1260     //   https://bugs.llvm.org/show_bug.cgi?id=39030
1261     // Runtime library makes sure not to use the highest bit.
1262     Value *WrapMask = IRB.CreateXor(
1263         IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1264         ConstantInt::get(IntptrTy, (uint64_t)-1));
1265     Value *ThreadLongNew = IRB.CreateAnd(
1266         IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1267     IRB.CreateStore(ThreadLongNew, SlotPtr);
1268   }
1269 
1270   if (!ShadowBase) {
1271     // Get shadow base address by aligning RecordPtr up.
1272     // Note: this is not correct if the pointer is already aligned.
1273     // Runtime library will make sure this never happens.
1274     ShadowBase = IRB.CreateAdd(
1275         IRB.CreateOr(
1276             ThreadLongMaybeUntagged,
1277             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1278         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1279     ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
1280   }
1281 }
1282 
1283 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1284   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1285   Function *ReadRegister =
1286       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1287   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1288   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1289   return IRB.CreateCall(ReadRegister, Args);
1290 }
1291 
1292 bool HWAddressSanitizer::instrumentLandingPads(
1293     SmallVectorImpl<Instruction *> &LandingPadVec) {
1294   for (auto *LP : LandingPadVec) {
1295     IRBuilder<> IRB(LP->getNextNode());
1296     IRB.CreateCall(
1297         HWAsanHandleVfork,
1298         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1299                                                                       : "sp")});
1300   }
1301   return true;
1302 }
1303 
1304 bool HWAddressSanitizer::instrumentStack(
1305     memtag::StackInfo &SInfo, Value *StackTag,
1306     llvm::function_ref<const DominatorTree &()> GetDT,
1307     llvm::function_ref<const PostDominatorTree &()> GetPDT) {
1308   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1309   // alloca addresses using that. Unfortunately, offsets are not known yet
1310   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1311   // temp, shift-OR it into each alloca address and xor with the retag mask.
1312   // This generates one extra instruction per alloca use.
1313   unsigned int I = 0;
1314 
1315   for (auto &KV : SInfo.AllocasToInstrument) {
1316     auto N = I++;
1317     auto *AI = KV.first;
1318     memtag::AllocaInfo &Info = KV.second;
1319     IRBuilder<> IRB(AI->getNextNode());
1320 
1321     // Replace uses of the alloca with tagged address.
1322     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
1323     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1324     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
1325     std::string Name =
1326         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1327     Replacement->setName(Name + ".hwasan");
1328 
1329     AI->replaceUsesWithIf(Replacement,
1330                           [AILong](Use &U) { return U.getUser() != AILong; });
1331 
1332     for (auto *DDI : Info.DbgVariableIntrinsics) {
1333       // Prepend "tag_offset, N" to the dwarf expression.
1334       // Tag offset logically applies to the alloca pointer, and it makes sense
1335       // to put it at the beginning of the expression.
1336       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1337                                          retagMask(N)};
1338       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1339         if (DDI->getVariableLocationOp(LocNo) == AI)
1340           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1341                                                           NewOps, LocNo));
1342     }
1343 
1344     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1345     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1346     auto TagEnd = [&](Instruction *Node) {
1347       IRB.SetInsertPoint(Node);
1348       Value *UARTag = getUARTag(IRB, StackTag);
1349       // When untagging, use the `AlignedSize` because we need to set the tags
1350       // for the entire alloca to zero. If we used `Size` here, we would
1351       // keep the last granule tagged, and store zero in the last byte of the
1352       // last granule, due to how short granules are implemented.
1353       tagAlloca(IRB, AI, UARTag, AlignedSize);
1354     };
1355     // Calls to functions that may return twice (e.g. setjmp) confuse the
1356     // postdominator analysis, and will leave us to keep memory tagged after
1357     // function return. Work around this by always untagging at every return
1358     // statement if return_twice functions are called.
1359     bool StandardLifetime =
1360         SInfo.UnrecognizedLifetimes.empty() &&
1361         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd,
1362                                    &GetDT(), ClMaxLifetimes) &&
1363         !SInfo.CallsReturnTwice;
1364     if (DetectUseAfterScope && StandardLifetime) {
1365       IntrinsicInst *Start = Info.LifetimeStart[0];
1366       IRB.SetInsertPoint(Start->getNextNode());
1367       tagAlloca(IRB, AI, Tag, Size);
1368       if (!memtag::forAllReachableExits(GetDT(), GetPDT(), Start,
1369                                         Info.LifetimeEnd, SInfo.RetVec,
1370                                         TagEnd)) {
1371         for (auto *End : Info.LifetimeEnd)
1372           End->eraseFromParent();
1373       }
1374     } else {
1375       tagAlloca(IRB, AI, Tag, Size);
1376       for (auto *RI : SInfo.RetVec)
1377         TagEnd(RI);
1378       // We inserted tagging outside of the lifetimes, so we have to remove
1379       // them.
1380       for (auto &II : Info.LifetimeStart)
1381         II->eraseFromParent();
1382       for (auto &II : Info.LifetimeEnd)
1383         II->eraseFromParent();
1384     }
1385     memtag::alignAndPadAlloca(Info, Align(Mapping.getObjectAlignment()));
1386   }
1387   for (auto &I : SInfo.UnrecognizedLifetimes)
1388     I->eraseFromParent();
1389   return true;
1390 }
1391 
1392 bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
1393   return (AI.getAllocatedType()->isSized() &&
1394           // FIXME: instrument dynamic allocas, too
1395           AI.isStaticAlloca() &&
1396           // alloca() may be called with 0 size, ignore it.
1397           memtag::getAllocaSizeInBytes(AI) > 0 &&
1398           // We are only interested in allocas not promotable to registers.
1399           // Promotable allocas are common under -O0.
1400           !isAllocaPromotable(&AI) &&
1401           // inalloca allocas are not treated as static, and we don't want
1402           // dynamic alloca instrumentation for them as well.
1403           !AI.isUsedWithInAlloca() &&
1404           // swifterror allocas are register promoted by ISel
1405           !AI.isSwiftError()) &&
1406          // safe allocas are not interesting
1407          !(SSI && SSI->isSafe(AI));
1408 }
1409 
1410 bool HWAddressSanitizer::sanitizeFunction(
1411     Function &F, llvm::function_ref<const DominatorTree &()> GetDT,
1412     llvm::function_ref<const PostDominatorTree &()> GetPDT) {
1413   if (&F == HwasanCtorFunction)
1414     return false;
1415 
1416   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1417     return false;
1418 
1419   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1420 
1421   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1422   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1423   SmallVector<Instruction *, 8> LandingPadVec;
1424 
1425   memtag::StackInfoBuilder SIB(
1426       [this](const AllocaInst &AI) { return isInterestingAlloca(AI); });
1427   for (auto &Inst : instructions(F)) {
1428     if (InstrumentStack) {
1429       SIB.visit(Inst);
1430     }
1431 
1432     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1433       LandingPadVec.push_back(&Inst);
1434 
1435     getInterestingMemoryOperands(&Inst, OperandsToInstrument);
1436 
1437     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1438       if (!ignoreMemIntrinsic(MI))
1439         IntrinToInstrument.push_back(MI);
1440   }
1441 
1442   memtag::StackInfo &SInfo = SIB.get();
1443 
1444   initializeCallbacks(*F.getParent());
1445 
1446   bool Changed = false;
1447 
1448   if (!LandingPadVec.empty())
1449     Changed |= instrumentLandingPads(LandingPadVec);
1450 
1451   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1452       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1453     // __hwasan_personality_thunk is a no-op for functions without an
1454     // instrumented stack, so we can drop it.
1455     F.setPersonalityFn(nullptr);
1456     Changed = true;
1457   }
1458 
1459   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1460       IntrinToInstrument.empty())
1461     return Changed;
1462 
1463   assert(!ShadowBase);
1464 
1465   Instruction *InsertPt = &*F.getEntryBlock().begin();
1466   IRBuilder<> EntryIRB(InsertPt);
1467   emitPrologue(EntryIRB,
1468                /*WithFrameRecord*/ ClRecordStackHistory &&
1469                    Mapping.WithFrameRecord &&
1470                    !SInfo.AllocasToInstrument.empty());
1471 
1472   if (!SInfo.AllocasToInstrument.empty()) {
1473     Value *StackTag =
1474         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1475     instrumentStack(SInfo, StackTag, GetDT, GetPDT);
1476   }
1477 
1478   // If we split the entry block, move any allocas that were originally in the
1479   // entry block back into the entry block so that they aren't treated as
1480   // dynamic allocas.
1481   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1482     InsertPt = &*F.getEntryBlock().begin();
1483     for (Instruction &I :
1484          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1485       if (auto *AI = dyn_cast<AllocaInst>(&I))
1486         if (isa<ConstantInt>(AI->getArraySize()))
1487           I.moveBefore(InsertPt);
1488     }
1489   }
1490 
1491   for (auto &Operand : OperandsToInstrument)
1492     instrumentMemAccess(Operand);
1493 
1494   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1495     for (auto Inst : IntrinToInstrument)
1496       instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
1497   }
1498 
1499   ShadowBase = nullptr;
1500   StackBaseTag = nullptr;
1501 
1502   return true;
1503 }
1504 
1505 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1506   assert(!UsePageAliases);
1507   Constant *Initializer = GV->getInitializer();
1508   uint64_t SizeInBytes =
1509       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1510   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1511   if (SizeInBytes != NewSize) {
1512     // Pad the initializer out to the next multiple of 16 bytes and add the
1513     // required short granule tag.
1514     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1515     Init.back() = Tag;
1516     Constant *Padding = ConstantDataArray::get(*C, Init);
1517     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1518   }
1519 
1520   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1521                                    GlobalValue::ExternalLinkage, Initializer,
1522                                    GV->getName() + ".hwasan");
1523   NewGV->copyAttributesFrom(GV);
1524   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1525   NewGV->copyMetadata(GV, 0);
1526   NewGV->setAlignment(
1527       MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
1528 
1529   // It is invalid to ICF two globals that have different tags. In the case
1530   // where the size of the global is a multiple of the tag granularity the
1531   // contents of the globals may be the same but the tags (i.e. symbol values)
1532   // may be different, and the symbols are not considered during ICF. In the
1533   // case where the size is not a multiple of the granularity, the short granule
1534   // tags would discriminate two globals with different tags, but there would
1535   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1536   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1537   // granule tag in the last byte.
1538   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1539 
1540   // Descriptor format (assuming little-endian):
1541   // bytes 0-3: relative address of global
1542   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1543   // it isn't, we create multiple descriptors)
1544   // byte 7: tag
1545   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1546   const uint64_t MaxDescriptorSize = 0xfffff0;
1547   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1548        DescriptorPos += MaxDescriptorSize) {
1549     auto *Descriptor =
1550         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1551                            nullptr, GV->getName() + ".hwasan.descriptor");
1552     auto *GVRelPtr = ConstantExpr::getTrunc(
1553         ConstantExpr::getAdd(
1554             ConstantExpr::getSub(
1555                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1556                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1557             ConstantInt::get(Int64Ty, DescriptorPos)),
1558         Int32Ty);
1559     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1560     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1561     Descriptor->setComdat(NewGV->getComdat());
1562     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1563     Descriptor->setSection("hwasan_globals");
1564     Descriptor->setMetadata(LLVMContext::MD_associated,
1565                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1566     appendToCompilerUsed(M, Descriptor);
1567   }
1568 
1569   Constant *Aliasee = ConstantExpr::getIntToPtr(
1570       ConstantExpr::getAdd(
1571           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1572           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1573       GV->getType());
1574   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1575                                     GV->getLinkage(), "", Aliasee, &M);
1576   Alias->setVisibility(GV->getVisibility());
1577   Alias->takeName(GV);
1578   GV->replaceAllUsesWith(Alias);
1579   GV->eraseFromParent();
1580 }
1581 
1582 static DenseSet<GlobalVariable *> getExcludedGlobals(Module &M) {
1583   NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
1584   if (!Globals)
1585     return DenseSet<GlobalVariable *>();
1586   DenseSet<GlobalVariable *> Excluded(Globals->getNumOperands());
1587   for (auto MDN : Globals->operands()) {
1588     // Metadata node contains the global and the fields of "Entry".
1589     assert(MDN->getNumOperands() == 5);
1590     auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
1591     // The optimizer may optimize away a global entirely.
1592     if (!V)
1593       continue;
1594     auto *StrippedV = V->stripPointerCasts();
1595     auto *GV = dyn_cast<GlobalVariable>(StrippedV);
1596     if (!GV)
1597       continue;
1598     ConstantInt *IsExcluded = mdconst::extract<ConstantInt>(MDN->getOperand(4));
1599     if (IsExcluded->isOne())
1600       Excluded.insert(GV);
1601   }
1602   return Excluded;
1603 }
1604 
1605 void HWAddressSanitizer::instrumentGlobals() {
1606   std::vector<GlobalVariable *> Globals;
1607   auto ExcludedGlobals = getExcludedGlobals(M);
1608   for (GlobalVariable &GV : M.globals()) {
1609     if (ExcludedGlobals.count(&GV))
1610       continue;
1611 
1612     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1613         GV.isThreadLocal())
1614       continue;
1615 
1616     // Common symbols can't have aliases point to them, so they can't be tagged.
1617     if (GV.hasCommonLinkage())
1618       continue;
1619 
1620     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1621     // which would be broken both by adding tags and potentially by the extra
1622     // padding/alignment that we insert.
1623     if (GV.hasSection())
1624       continue;
1625 
1626     Globals.push_back(&GV);
1627   }
1628 
1629   MD5 Hasher;
1630   Hasher.update(M.getSourceFileName());
1631   MD5::MD5Result Hash;
1632   Hasher.final(Hash);
1633   uint8_t Tag = Hash[0];
1634 
1635   for (GlobalVariable *GV : Globals) {
1636     Tag &= TagMaskByte;
1637     // Skip tag 0 in order to avoid collisions with untagged memory.
1638     if (Tag == 0)
1639       Tag = 1;
1640     instrumentGlobal(GV, Tag++);
1641   }
1642 }
1643 
1644 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1645   // We need to untag stack frames as we unwind past them. That is the job of
1646   // the personality function wrapper, which either wraps an existing
1647   // personality function or acts as a personality function on its own. Each
1648   // function that has a personality function or that can be unwound past has
1649   // its personality function changed to a thunk that calls the personality
1650   // function wrapper in the runtime.
1651   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1652   for (Function &F : M) {
1653     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1654       continue;
1655 
1656     if (F.hasPersonalityFn()) {
1657       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1658     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1659       PersonalityFns[nullptr].push_back(&F);
1660     }
1661   }
1662 
1663   if (PersonalityFns.empty())
1664     return;
1665 
1666   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1667       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
1668       Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
1669   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1670   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1671 
1672   for (auto &P : PersonalityFns) {
1673     std::string ThunkName = kHwasanPersonalityThunkName;
1674     if (P.first)
1675       ThunkName += ("." + P.first->getName()).str();
1676     FunctionType *ThunkFnTy = FunctionType::get(
1677         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
1678     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1679                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1680     auto *ThunkFn = Function::Create(ThunkFnTy,
1681                                      IsLocal ? GlobalValue::InternalLinkage
1682                                              : GlobalValue::LinkOnceODRLinkage,
1683                                      ThunkName, &M);
1684     if (!IsLocal) {
1685       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1686       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1687     }
1688 
1689     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1690     IRBuilder<> IRB(BB);
1691     CallInst *WrapperCall = IRB.CreateCall(
1692         HwasanPersonalityWrapper,
1693         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1694          ThunkFn->getArg(3), ThunkFn->getArg(4),
1695          P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
1696                  : Constant::getNullValue(Int8PtrTy),
1697          IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
1698          IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
1699     WrapperCall->setTailCall();
1700     IRB.CreateRet(WrapperCall);
1701 
1702     for (Function *F : P.second)
1703       F->setPersonalityFn(ThunkFn);
1704   }
1705 }
1706 
1707 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1708                                              bool InstrumentWithCalls) {
1709   Scale = kDefaultShadowScale;
1710   if (TargetTriple.isOSFuchsia()) {
1711     // Fuchsia is always PIE, which means that the beginning of the address
1712     // space is always available.
1713     InGlobal = false;
1714     InTls = false;
1715     Offset = 0;
1716     WithFrameRecord = true;
1717   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1718     InGlobal = false;
1719     InTls = false;
1720     Offset = ClMappingOffset;
1721     WithFrameRecord = false;
1722   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1723     InGlobal = false;
1724     InTls = false;
1725     Offset = 0;
1726     WithFrameRecord = false;
1727   } else if (ClWithIfunc) {
1728     InGlobal = true;
1729     InTls = false;
1730     Offset = kDynamicShadowSentinel;
1731     WithFrameRecord = false;
1732   } else if (ClWithTls) {
1733     InGlobal = false;
1734     InTls = true;
1735     Offset = kDynamicShadowSentinel;
1736     WithFrameRecord = true;
1737   } else {
1738     InGlobal = false;
1739     InTls = false;
1740     Offset = kDynamicShadowSentinel;
1741     WithFrameRecord = false;
1742   }
1743 }
1744