1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address sanity checker
11 /// based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/BinaryFormat/ELF.h"
20 #include "llvm/IR/Attributes.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/DebugInfoMetadata.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Intrinsics.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/IR/MDBuilder.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/Instrumentation.h"
46 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
47 #include "llvm/Transforms/Utils/ModuleUtils.h"
48 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
49 #include <sstream>
50 
51 using namespace llvm;
52 
53 #define DEBUG_TYPE "hwasan"
54 
55 static const char *const kHwasanModuleCtorName = "hwasan.module_ctor";
56 static const char *const kHwasanNoteName = "hwasan.note";
57 static const char *const kHwasanInitName = "__hwasan_init";
58 
59 static const char *const kHwasanShadowMemoryDynamicAddress =
60     "__hwasan_shadow_memory_dynamic_address";
61 
62 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
63 static const size_t kNumberOfAccessSizes = 5;
64 
65 static const size_t kDefaultShadowScale = 4;
66 static const uint64_t kDynamicShadowSentinel =
67     std::numeric_limits<uint64_t>::max();
68 static const unsigned kPointerTagShift = 56;
69 
70 static const unsigned kShadowBaseAlignment = 32;
71 
72 static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
73     "hwasan-memory-access-callback-prefix",
74     cl::desc("Prefix for memory access callbacks"), cl::Hidden,
75     cl::init("__hwasan_"));
76 
77 static cl::opt<bool>
78     ClInstrumentWithCalls("hwasan-instrument-with-calls",
79                 cl::desc("instrument reads and writes with callbacks"),
80                 cl::Hidden, cl::init(false));
81 
82 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
83                                        cl::desc("instrument read instructions"),
84                                        cl::Hidden, cl::init(true));
85 
86 static cl::opt<bool> ClInstrumentWrites(
87     "hwasan-instrument-writes", cl::desc("instrument write instructions"),
88     cl::Hidden, cl::init(true));
89 
90 static cl::opt<bool> ClInstrumentAtomics(
91     "hwasan-instrument-atomics",
92     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
93     cl::init(true));
94 
95 static cl::opt<bool> ClRecover(
96     "hwasan-recover",
97     cl::desc("Enable recovery mode (continue-after-error)."),
98     cl::Hidden, cl::init(false));
99 
100 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
101                                        cl::desc("instrument stack (allocas)"),
102                                        cl::Hidden, cl::init(true));
103 
104 static cl::opt<bool> ClUARRetagToZero(
105     "hwasan-uar-retag-to-zero",
106     cl::desc("Clear alloca tags before returning from the function to allow "
107              "non-instrumented and instrumented function calls mix. When set "
108              "to false, allocas are retagged before returning from the "
109              "function to detect use after return."),
110     cl::Hidden, cl::init(true));
111 
112 static cl::opt<bool> ClGenerateTagsWithCalls(
113     "hwasan-generate-tags-with-calls",
114     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
115     cl::init(false));
116 
117 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
118                                cl::Hidden, cl::init(false));
119 
120 static cl::opt<int> ClMatchAllTag(
121     "hwasan-match-all-tag",
122     cl::desc("don't report bad accesses via pointers with this tag"),
123     cl::Hidden, cl::init(-1));
124 
125 static cl::opt<bool> ClEnableKhwasan(
126     "hwasan-kernel",
127     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
128     cl::Hidden, cl::init(false));
129 
130 // These flags allow to change the shadow mapping and control how shadow memory
131 // is accessed. The shadow mapping looks like:
132 //    Shadow = (Mem >> scale) + offset
133 
134 static cl::opt<uint64_t>
135     ClMappingOffset("hwasan-mapping-offset",
136                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
137                     cl::Hidden, cl::init(0));
138 
139 static cl::opt<bool>
140     ClWithIfunc("hwasan-with-ifunc",
141                 cl::desc("Access dynamic shadow through an ifunc global on "
142                          "platforms that support this"),
143                 cl::Hidden, cl::init(false));
144 
145 static cl::opt<bool> ClWithTls(
146     "hwasan-with-tls",
147     cl::desc("Access dynamic shadow through an thread-local pointer on "
148              "platforms that support this"),
149     cl::Hidden, cl::init(true));
150 
151 static cl::opt<bool>
152     ClRecordStackHistory("hwasan-record-stack-history",
153                          cl::desc("Record stack frames with tagged allocations "
154                                   "in a thread-local ring buffer"),
155                          cl::Hidden, cl::init(true));
156 static cl::opt<bool>
157     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
158                               cl::desc("instrument memory intrinsics"),
159                               cl::Hidden, cl::init(true));
160 
161 static cl::opt<bool>
162     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
163                               cl::desc("instrument landing pads"), cl::Hidden,
164                               cl::init(true));
165 
166 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
167                                        cl::desc("inline all checks"),
168                                        cl::Hidden, cl::init(false));
169 
170 namespace {
171 
172 /// An instrumentation pass implementing detection of addressability bugs
173 /// using tagged pointers.
174 class HWAddressSanitizer {
175 public:
176   explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
177                               bool Recover = false) : M(M) {
178     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
179     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
180         ClEnableKhwasan : CompileKernel;
181 
182     initializeModule();
183   }
184 
185   bool sanitizeFunction(Function &F);
186   void initializeModule();
187 
188   void initializeCallbacks(Module &M);
189 
190   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
191   Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
192 
193   void untagPointerOperand(Instruction *I, Value *Addr);
194   Value *shadowBase();
195   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
196   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
197                                  unsigned AccessSizeIndex,
198                                  Instruction *InsertBefore);
199   void instrumentMemIntrinsic(MemIntrinsic *MI);
200   bool instrumentMemAccess(Instruction *I);
201   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
202                                    uint64_t *TypeSize, unsigned *Alignment,
203                                    Value **MaybeMask);
204 
205   bool isInterestingAlloca(const AllocaInst &AI);
206   bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
207   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
208   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
209   bool instrumentStack(
210       SmallVectorImpl<AllocaInst *> &Allocas,
211       DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
212       SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
213   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
214   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
215   Value *getNextTagWithCall(IRBuilder<> &IRB);
216   Value *getStackBaseTag(IRBuilder<> &IRB);
217   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
218                      unsigned AllocaNo);
219   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
220 
221   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
222   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
223 
224   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
225   void instrumentGlobals();
226 
227 private:
228   LLVMContext *C;
229   Module &M;
230   Triple TargetTriple;
231   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
232   FunctionCallee HWAsanHandleVfork;
233 
234   /// This struct defines the shadow mapping using the rule:
235   ///   shadow = (mem >> Scale) + Offset.
236   /// If InGlobal is true, then
237   ///   extern char __hwasan_shadow[];
238   ///   shadow = (mem >> Scale) + &__hwasan_shadow
239   /// If InTls is true, then
240   ///   extern char *__hwasan_tls;
241   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
242   struct ShadowMapping {
243     int Scale;
244     uint64_t Offset;
245     bool InGlobal;
246     bool InTls;
247 
248     void init(Triple &TargetTriple);
249     unsigned getObjectAlignment() const { return 1U << Scale; }
250   };
251   ShadowMapping Mapping;
252 
253   Type *IntptrTy;
254   Type *Int8PtrTy;
255   Type *Int8Ty;
256   Type *Int32Ty;
257   Type *Int64Ty = Type::getInt64Ty(M.getContext());
258 
259   bool CompileKernel;
260   bool Recover;
261 
262   Function *HwasanCtorFunction;
263 
264   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
265   FunctionCallee HwasanMemoryAccessCallbackSized[2];
266 
267   FunctionCallee HwasanTagMemoryFunc;
268   FunctionCallee HwasanGenerateTagFunc;
269   FunctionCallee HwasanThreadEnterFunc;
270 
271   Constant *ShadowGlobal;
272 
273   Value *LocalDynamicShadow = nullptr;
274   Value *StackBaseTag = nullptr;
275   GlobalValue *ThreadPtrGlobal = nullptr;
276 };
277 
278 class HWAddressSanitizerLegacyPass : public FunctionPass {
279 public:
280   // Pass identification, replacement for typeid.
281   static char ID;
282 
283   explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
284                                         bool Recover = false)
285       : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) {}
286 
287   StringRef getPassName() const override { return "HWAddressSanitizer"; }
288 
289   bool doInitialization(Module &M) override {
290     HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
291     return true;
292   }
293 
294   bool runOnFunction(Function &F) override {
295     return HWASan->sanitizeFunction(F);
296   }
297 
298   bool doFinalization(Module &M) override {
299     HWASan.reset();
300     return false;
301   }
302 
303 private:
304   std::unique_ptr<HWAddressSanitizer> HWASan;
305   bool CompileKernel;
306   bool Recover;
307 };
308 
309 } // end anonymous namespace
310 
311 char HWAddressSanitizerLegacyPass::ID = 0;
312 
313 INITIALIZE_PASS_BEGIN(
314     HWAddressSanitizerLegacyPass, "hwasan",
315     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
316     false)
317 INITIALIZE_PASS_END(
318     HWAddressSanitizerLegacyPass, "hwasan",
319     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
320     false)
321 
322 FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel,
323                                                            bool Recover) {
324   assert(!CompileKernel || Recover);
325   return new HWAddressSanitizerLegacyPass(CompileKernel, Recover);
326 }
327 
328 HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover)
329     : CompileKernel(CompileKernel), Recover(Recover) {}
330 
331 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
332                                               ModuleAnalysisManager &MAM) {
333   HWAddressSanitizer HWASan(M, CompileKernel, Recover);
334   bool Modified = false;
335   for (Function &F : M)
336     Modified |= HWASan.sanitizeFunction(F);
337   if (Modified)
338     return PreservedAnalyses::none();
339   return PreservedAnalyses::all();
340 }
341 
342 /// Module-level initialization.
343 ///
344 /// inserts a call to __hwasan_init to the module's constructor list.
345 void HWAddressSanitizer::initializeModule() {
346   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
347   auto &DL = M.getDataLayout();
348 
349   TargetTriple = Triple(M.getTargetTriple());
350 
351   Mapping.init(TargetTriple);
352 
353   C = &(M.getContext());
354   IRBuilder<> IRB(*C);
355   IntptrTy = IRB.getIntPtrTy(DL);
356   Int8PtrTy = IRB.getInt8PtrTy();
357   Int8Ty = IRB.getInt8Ty();
358   Int32Ty = IRB.getInt32Ty();
359 
360   HwasanCtorFunction = nullptr;
361   if (!CompileKernel) {
362     std::tie(HwasanCtorFunction, std::ignore) =
363         getOrCreateSanitizerCtorAndInitFunctions(
364             M, kHwasanModuleCtorName, kHwasanInitName,
365             /*InitArgTypes=*/{},
366             /*InitArgs=*/{},
367             // This callback is invoked when the functions are created the first
368             // time. Hook them into the global ctors list in that case:
369             [&](Function *Ctor, FunctionCallee) {
370               Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
371               Ctor->setComdat(CtorComdat);
372               appendToGlobalCtors(M, Ctor, 0, Ctor);
373             });
374 
375     // Older versions of Android do not have the required runtime support for
376     // global instrumentation. On other platforms we currently require using the
377     // latest version of the runtime.
378     bool InstrumentGlobals =
379         !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
380     if (ClGlobals.getNumOccurrences())
381       InstrumentGlobals = ClGlobals;
382     if (InstrumentGlobals)
383       instrumentGlobals();
384   }
385 
386   if (!TargetTriple.isAndroid()) {
387     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
388       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
389                                     GlobalValue::ExternalLinkage, nullptr,
390                                     "__hwasan_tls", nullptr,
391                                     GlobalVariable::InitialExecTLSModel);
392       appendToCompilerUsed(M, GV);
393       return GV;
394     });
395     ThreadPtrGlobal = cast<GlobalVariable>(C);
396   }
397 }
398 
399 void HWAddressSanitizer::initializeCallbacks(Module &M) {
400   IRBuilder<> IRB(*C);
401   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
402     const std::string TypeStr = AccessIsWrite ? "store" : "load";
403     const std::string EndingStr = Recover ? "_noabort" : "";
404 
405     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
406         ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
407         FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
408 
409     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
410          AccessSizeIndex++) {
411       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
412           M.getOrInsertFunction(
413               ClMemoryAccessCallbackPrefix + TypeStr +
414                   itostr(1ULL << AccessSizeIndex) + EndingStr,
415               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
416     }
417   }
418 
419   HwasanTagMemoryFunc = M.getOrInsertFunction(
420       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
421   HwasanGenerateTagFunc =
422       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
423 
424   ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
425                                      ArrayType::get(IRB.getInt8Ty(), 0));
426 
427   const std::string MemIntrinCallbackPrefix =
428       CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
429   HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
430                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
431                                         IRB.getInt8PtrTy(), IntptrTy);
432   HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
433                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
434                                        IRB.getInt8PtrTy(), IntptrTy);
435   HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
436                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
437                                        IRB.getInt32Ty(), IntptrTy);
438 
439   HWAsanHandleVfork =
440       M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
441 
442   HwasanThreadEnterFunc =
443       M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy());
444 }
445 
446 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
447   // An empty inline asm with input reg == output reg.
448   // An opaque no-op cast, basically.
449   InlineAsm *Asm = InlineAsm::get(
450       FunctionType::get(Int8PtrTy, {ShadowGlobal->getType()}, false),
451       StringRef(""), StringRef("=r,0"),
452       /*hasSideEffects=*/false);
453   return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
454 }
455 
456 Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
457   // Generate code only when dynamic addressing is needed.
458   if (Mapping.Offset != kDynamicShadowSentinel)
459     return nullptr;
460 
461   if (Mapping.InGlobal) {
462     return getDynamicShadowIfunc(IRB);
463   } else {
464     Value *GlobalDynamicAddress =
465         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
466             kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
467     return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
468   }
469 }
470 
471 Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I,
472                                                      bool *IsWrite,
473                                                      uint64_t *TypeSize,
474                                                      unsigned *Alignment,
475                                                      Value **MaybeMask) {
476   // Skip memory accesses inserted by another instrumentation.
477   if (I->getMetadata("nosanitize")) return nullptr;
478 
479   // Do not instrument the load fetching the dynamic shadow address.
480   if (LocalDynamicShadow == I)
481     return nullptr;
482 
483   Value *PtrOperand = nullptr;
484   const DataLayout &DL = I->getModule()->getDataLayout();
485   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
486     if (!ClInstrumentReads) return nullptr;
487     *IsWrite = false;
488     *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
489     *Alignment = LI->getAlignment();
490     PtrOperand = LI->getPointerOperand();
491   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
492     if (!ClInstrumentWrites) return nullptr;
493     *IsWrite = true;
494     *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
495     *Alignment = SI->getAlignment();
496     PtrOperand = SI->getPointerOperand();
497   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
498     if (!ClInstrumentAtomics) return nullptr;
499     *IsWrite = true;
500     *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
501     *Alignment = 0;
502     PtrOperand = RMW->getPointerOperand();
503   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
504     if (!ClInstrumentAtomics) return nullptr;
505     *IsWrite = true;
506     *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
507     *Alignment = 0;
508     PtrOperand = XCHG->getPointerOperand();
509   }
510 
511   if (PtrOperand) {
512     // Do not instrument accesses from different address spaces; we cannot deal
513     // with them.
514     Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType());
515     if (PtrTy->getPointerAddressSpace() != 0)
516       return nullptr;
517 
518     // Ignore swifterror addresses.
519     // swifterror memory addresses are mem2reg promoted by instruction
520     // selection. As such they cannot have regular uses like an instrumentation
521     // function and it makes no sense to track them as memory.
522     if (PtrOperand->isSwiftError())
523       return nullptr;
524   }
525 
526   return PtrOperand;
527 }
528 
529 static unsigned getPointerOperandIndex(Instruction *I) {
530   if (LoadInst *LI = dyn_cast<LoadInst>(I))
531     return LI->getPointerOperandIndex();
532   if (StoreInst *SI = dyn_cast<StoreInst>(I))
533     return SI->getPointerOperandIndex();
534   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
535     return RMW->getPointerOperandIndex();
536   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
537     return XCHG->getPointerOperandIndex();
538   report_fatal_error("Unexpected instruction");
539   return -1;
540 }
541 
542 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
543   size_t Res = countTrailingZeros(TypeSize / 8);
544   assert(Res < kNumberOfAccessSizes);
545   return Res;
546 }
547 
548 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
549   if (TargetTriple.isAArch64())
550     return;
551 
552   IRBuilder<> IRB(I);
553   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
554   Value *UntaggedPtr =
555       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
556   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
557 }
558 
559 Value *HWAddressSanitizer::shadowBase() {
560   if (LocalDynamicShadow)
561     return LocalDynamicShadow;
562   return ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, Mapping.Offset),
563                                    Int8PtrTy);
564 }
565 
566 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
567   // Mem >> Scale
568   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
569   if (Mapping.Offset == 0)
570     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
571   // (Mem >> Scale) + Offset
572   return IRB.CreateGEP(Int8Ty, shadowBase(), Shadow);
573 }
574 
575 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
576                                                    unsigned AccessSizeIndex,
577                                                    Instruction *InsertBefore) {
578   const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
579   IRBuilder<> IRB(InsertBefore);
580 
581   if (!ClInlineAllChecks && TargetTriple.isAArch64() &&
582       TargetTriple.isOSBinFormatELF() && !Recover) {
583     Module *M = IRB.GetInsertBlock()->getParent()->getParent();
584     Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
585     IRB.CreateCall(
586         Intrinsic::getDeclaration(M, Intrinsic::hwasan_check_memaccess),
587         {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
588     return;
589   }
590 
591   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
592   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift),
593                                   IRB.getInt8Ty());
594   Value *AddrLong = untagPointer(IRB, PtrLong);
595   Value *Shadow = memToShadow(AddrLong, IRB);
596   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
597   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
598 
599   int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
600       ClMatchAllTag : (CompileKernel ? 0xFF : -1);
601   if (matchAllTag != -1) {
602     Value *TagNotIgnored = IRB.CreateICmpNE(PtrTag,
603         ConstantInt::get(PtrTag->getType(), matchAllTag));
604     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
605   }
606 
607   Instruction *CheckTerm =
608       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
609                                 MDBuilder(*C).createBranchWeights(1, 100000));
610 
611   IRB.SetInsertPoint(CheckTerm);
612   Value *OutOfShortGranuleTagRange =
613       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
614   Instruction *CheckFailTerm =
615       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
616                                 MDBuilder(*C).createBranchWeights(1, 100000));
617 
618   IRB.SetInsertPoint(CheckTerm);
619   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
620   PtrLowBits = IRB.CreateAdd(
621       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
622   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
623   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
624                             MDBuilder(*C).createBranchWeights(1, 100000),
625                             nullptr, nullptr, CheckFailTerm->getParent());
626 
627   IRB.SetInsertPoint(CheckTerm);
628   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
629   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
630   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
631   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
632   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
633                             MDBuilder(*C).createBranchWeights(1, 100000),
634                             nullptr, nullptr, CheckFailTerm->getParent());
635 
636   IRB.SetInsertPoint(CheckFailTerm);
637   InlineAsm *Asm;
638   switch (TargetTriple.getArch()) {
639     case Triple::x86_64:
640       // The signal handler will find the data address in rdi.
641       Asm = InlineAsm::get(
642           FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
643           "int3\nnopl " + itostr(0x40 + AccessInfo) + "(%rax)",
644           "{rdi}",
645           /*hasSideEffects=*/true);
646       break;
647     case Triple::aarch64:
648     case Triple::aarch64_be:
649       // The signal handler will find the data address in x0.
650       Asm = InlineAsm::get(
651           FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
652           "brk #" + itostr(0x900 + AccessInfo),
653           "{x0}",
654           /*hasSideEffects=*/true);
655       break;
656     default:
657       report_fatal_error("unsupported architecture");
658   }
659   IRB.CreateCall(Asm, PtrLong);
660   if (Recover)
661     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
662 }
663 
664 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
665   IRBuilder<> IRB(MI);
666   if (isa<MemTransferInst>(MI)) {
667     IRB.CreateCall(
668         isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
669         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
670          IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
671          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
672   } else if (isa<MemSetInst>(MI)) {
673     IRB.CreateCall(
674         HWAsanMemset,
675         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
676          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
677          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
678   }
679   MI->eraseFromParent();
680 }
681 
682 bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
683   LLVM_DEBUG(dbgs() << "Instrumenting: " << *I << "\n");
684   bool IsWrite = false;
685   unsigned Alignment = 0;
686   uint64_t TypeSize = 0;
687   Value *MaybeMask = nullptr;
688 
689   if (ClInstrumentMemIntrinsics && isa<MemIntrinsic>(I)) {
690     instrumentMemIntrinsic(cast<MemIntrinsic>(I));
691     return true;
692   }
693 
694   Value *Addr =
695       isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
696 
697   if (!Addr)
698     return false;
699 
700   if (MaybeMask)
701     return false; //FIXME
702 
703   IRBuilder<> IRB(I);
704   if (isPowerOf2_64(TypeSize) &&
705       (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) &&
706       (Alignment >= (1UL << Mapping.Scale) || Alignment == 0 ||
707        Alignment >= TypeSize / 8)) {
708     size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
709     if (ClInstrumentWithCalls) {
710       IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
711                      IRB.CreatePointerCast(Addr, IntptrTy));
712     } else {
713       instrumentMemAccessInline(Addr, IsWrite, AccessSizeIndex, I);
714     }
715   } else {
716     IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
717                    {IRB.CreatePointerCast(Addr, IntptrTy),
718                     ConstantInt::get(IntptrTy, TypeSize / 8)});
719   }
720   untagPointerOperand(I, Addr);
721 
722   return true;
723 }
724 
725 static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
726   uint64_t ArraySize = 1;
727   if (AI.isArrayAllocation()) {
728     const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
729     assert(CI && "non-constant array size");
730     ArraySize = CI->getZExtValue();
731   }
732   Type *Ty = AI.getAllocatedType();
733   uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
734   return SizeInBytes * ArraySize;
735 }
736 
737 bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
738                                    Value *Tag, size_t Size) {
739   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
740 
741   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
742   if (ClInstrumentWithCalls) {
743     IRB.CreateCall(HwasanTagMemoryFunc,
744                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
745                     ConstantInt::get(IntptrTy, AlignedSize)});
746   } else {
747     size_t ShadowSize = Size >> Mapping.Scale;
748     Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
749     // If this memset is not inlined, it will be intercepted in the hwasan
750     // runtime library. That's OK, because the interceptor skips the checks if
751     // the address is in the shadow region.
752     // FIXME: the interceptor is not as fast as real memset. Consider lowering
753     // llvm.memset right here into either a sequence of stores, or a call to
754     // hwasan_tag_memory.
755     if (ShadowSize)
756       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
757     if (Size != AlignedSize) {
758       IRB.CreateStore(
759           ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
760           IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
761       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
762                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
763                                    AlignedSize - 1));
764     }
765   }
766   return true;
767 }
768 
769 static unsigned RetagMask(unsigned AllocaNo) {
770   // A list of 8-bit numbers that have at most one run of non-zero bits.
771   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
772   // masks.
773   // The list does not include the value 255, which is used for UAR.
774   //
775   // Because we are more likely to use earlier elements of this list than later
776   // ones, it is sorted in increasing order of probability of collision with a
777   // mask allocated (temporally) nearby. The program that generated this list
778   // can be found at:
779   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
780   static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
781                                  48, 16,  120, 248, 56,  24,  8,   124, 252,
782                                  60, 28,  12,  4,   126, 254, 62,  30,  14,
783                                  6,  2,   127, 63,  31,  15,  7,   3,   1};
784   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
785 }
786 
787 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
788   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
789 }
790 
791 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
792   if (ClGenerateTagsWithCalls)
793     return getNextTagWithCall(IRB);
794   if (StackBaseTag)
795     return StackBaseTag;
796   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
797   // first).
798   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
799   auto GetStackPointerFn = Intrinsic::getDeclaration(
800       M, Intrinsic::frameaddress,
801       IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
802   Value *StackPointer = IRB.CreateCall(
803       GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
804 
805   // Extract some entropy from the stack pointer for the tags.
806   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
807   // between functions).
808   Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
809   Value *StackTag =
810       IRB.CreateXor(StackPointerLong, IRB.CreateLShr(StackPointerLong, 20),
811                     "hwasan.stack.base.tag");
812   return StackTag;
813 }
814 
815 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
816                                         AllocaInst *AI, unsigned AllocaNo) {
817   if (ClGenerateTagsWithCalls)
818     return getNextTagWithCall(IRB);
819   return IRB.CreateXor(StackTag,
820                        ConstantInt::get(IntptrTy, RetagMask(AllocaNo)));
821 }
822 
823 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
824   if (ClUARRetagToZero)
825     return ConstantInt::get(IntptrTy, 0);
826   if (ClGenerateTagsWithCalls)
827     return getNextTagWithCall(IRB);
828   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, 0xFFU));
829 }
830 
831 // Add a tag to an address.
832 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
833                                       Value *PtrLong, Value *Tag) {
834   Value *TaggedPtrLong;
835   if (CompileKernel) {
836     // Kernel addresses have 0xFF in the most significant byte.
837     Value *ShiftedTag = IRB.CreateOr(
838         IRB.CreateShl(Tag, kPointerTagShift),
839         ConstantInt::get(IntptrTy, (1ULL << kPointerTagShift) - 1));
840     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
841   } else {
842     // Userspace can simply do OR (tag << 56);
843     Value *ShiftedTag = IRB.CreateShl(Tag, kPointerTagShift);
844     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
845   }
846   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
847 }
848 
849 // Remove tag from an address.
850 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
851   Value *UntaggedPtrLong;
852   if (CompileKernel) {
853     // Kernel addresses have 0xFF in the most significant byte.
854     UntaggedPtrLong = IRB.CreateOr(PtrLong,
855         ConstantInt::get(PtrLong->getType(), 0xFFULL << kPointerTagShift));
856   } else {
857     // Userspace addresses have 0x00.
858     UntaggedPtrLong = IRB.CreateAnd(PtrLong,
859         ConstantInt::get(PtrLong->getType(), ~(0xFFULL << kPointerTagShift)));
860   }
861   return UntaggedPtrLong;
862 }
863 
864 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
865   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
866   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
867     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
868     // in Bionic's libc/private/bionic_tls.h.
869     Function *ThreadPointerFunc =
870         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
871     Value *SlotPtr = IRB.CreatePointerCast(
872         IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
873                                IRB.CreateCall(ThreadPointerFunc), 0x30),
874         Ty->getPointerTo(0));
875     return SlotPtr;
876   }
877   if (ThreadPtrGlobal)
878     return ThreadPtrGlobal;
879 
880 
881   return nullptr;
882 }
883 
884 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
885   if (!Mapping.InTls) {
886     LocalDynamicShadow = getDynamicShadowNonTls(IRB);
887     return;
888   }
889 
890   if (!WithFrameRecord && TargetTriple.isAndroid()) {
891     LocalDynamicShadow = getDynamicShadowIfunc(IRB);
892     return;
893   }
894 
895   Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
896   assert(SlotPtr);
897 
898   Instruction *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
899 
900   Function *F = IRB.GetInsertBlock()->getParent();
901   if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
902     Value *ThreadLongEqZero =
903         IRB.CreateICmpEQ(ThreadLong, ConstantInt::get(IntptrTy, 0));
904     auto *Br = cast<BranchInst>(SplitBlockAndInsertIfThen(
905         ThreadLongEqZero, cast<Instruction>(ThreadLongEqZero)->getNextNode(),
906         false, MDBuilder(*C).createBranchWeights(1, 100000)));
907 
908     IRB.SetInsertPoint(Br);
909     // FIXME: This should call a new runtime function with a custom calling
910     // convention to avoid needing to spill all arguments here.
911     IRB.CreateCall(HwasanThreadEnterFunc);
912     LoadInst *ReloadThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
913 
914     IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
915     PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
916     ThreadLongPhi->addIncoming(ThreadLong, ThreadLong->getParent());
917     ThreadLongPhi->addIncoming(ReloadThreadLong, ReloadThreadLong->getParent());
918     ThreadLong = ThreadLongPhi;
919   }
920 
921   // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
922   Value *ThreadLongMaybeUntagged =
923       TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
924 
925   if (WithFrameRecord) {
926     StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
927 
928     // Prepare ring buffer data.
929     Value *PC;
930     if (TargetTriple.getArch() == Triple::aarch64)
931       PC = readRegister(IRB, "pc");
932     else
933       PC = IRB.CreatePtrToInt(F, IntptrTy);
934     Module *M = F->getParent();
935     auto GetStackPointerFn = Intrinsic::getDeclaration(
936         M, Intrinsic::frameaddress,
937         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
938     Value *SP = IRB.CreatePtrToInt(
939         IRB.CreateCall(GetStackPointerFn,
940                        {Constant::getNullValue(IRB.getInt32Ty())}),
941         IntptrTy);
942     // Mix SP and PC.
943     // Assumptions:
944     // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
945     // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
946     // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
947     //       0xSSSSPPPPPPPPPPPP
948     SP = IRB.CreateShl(SP, 44);
949 
950     // Store data to ring buffer.
951     Value *RecordPtr =
952         IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
953     IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
954 
955     // Update the ring buffer. Top byte of ThreadLong defines the size of the
956     // buffer in pages, it must be a power of two, and the start of the buffer
957     // must be aligned by twice that much. Therefore wrap around of the ring
958     // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
959     // The use of AShr instead of LShr is due to
960     //   https://bugs.llvm.org/show_bug.cgi?id=39030
961     // Runtime library makes sure not to use the highest bit.
962     Value *WrapMask = IRB.CreateXor(
963         IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
964         ConstantInt::get(IntptrTy, (uint64_t)-1));
965     Value *ThreadLongNew = IRB.CreateAnd(
966         IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
967     IRB.CreateStore(ThreadLongNew, SlotPtr);
968   }
969 
970   // Get shadow base address by aligning RecordPtr up.
971   // Note: this is not correct if the pointer is already aligned.
972   // Runtime library will make sure this never happens.
973   LocalDynamicShadow = IRB.CreateAdd(
974       IRB.CreateOr(
975           ThreadLongMaybeUntagged,
976           ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
977       ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
978   LocalDynamicShadow = IRB.CreateIntToPtr(LocalDynamicShadow, Int8PtrTy);
979 }
980 
981 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
982   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
983   Function *ReadRegister =
984       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
985   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
986   Value *Args[] = {MetadataAsValue::get(*C, MD)};
987   return IRB.CreateCall(ReadRegister, Args);
988 }
989 
990 bool HWAddressSanitizer::instrumentLandingPads(
991     SmallVectorImpl<Instruction *> &LandingPadVec) {
992   for (auto *LP : LandingPadVec) {
993     IRBuilder<> IRB(LP->getNextNode());
994     IRB.CreateCall(
995         HWAsanHandleVfork,
996         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
997                                                                       : "sp")});
998   }
999   return true;
1000 }
1001 
1002 bool HWAddressSanitizer::instrumentStack(
1003     SmallVectorImpl<AllocaInst *> &Allocas,
1004     DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
1005     SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
1006   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1007   // alloca addresses using that. Unfortunately, offsets are not known yet
1008   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1009   // temp, shift-OR it into each alloca address and xor with the retag mask.
1010   // This generates one extra instruction per alloca use.
1011   for (unsigned N = 0; N < Allocas.size(); ++N) {
1012     auto *AI = Allocas[N];
1013     IRBuilder<> IRB(AI->getNextNode());
1014 
1015     // Replace uses of the alloca with tagged address.
1016     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
1017     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1018     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
1019     std::string Name =
1020         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1021     Replacement->setName(Name + ".hwasan");
1022 
1023     AI->replaceUsesWithIf(Replacement,
1024                           [AILong](Use &U) { return U.getUser() != AILong; });
1025 
1026     for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
1027       DIExpression *OldExpr = DDI->getExpression();
1028       DIExpression *NewExpr = DIExpression::append(
1029           OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
1030       DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
1031     }
1032 
1033     size_t Size = getAllocaSizeInBytes(*AI);
1034     tagAlloca(IRB, AI, Tag, Size);
1035 
1036     for (auto RI : RetVec) {
1037       IRB.SetInsertPoint(RI);
1038 
1039       // Re-tag alloca memory with the special UAR tag.
1040       Value *Tag = getUARTag(IRB, StackTag);
1041       tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
1042     }
1043   }
1044 
1045   return true;
1046 }
1047 
1048 bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
1049   return (AI.getAllocatedType()->isSized() &&
1050           // FIXME: instrument dynamic allocas, too
1051           AI.isStaticAlloca() &&
1052           // alloca() may be called with 0 size, ignore it.
1053           getAllocaSizeInBytes(AI) > 0 &&
1054           // We are only interested in allocas not promotable to registers.
1055           // Promotable allocas are common under -O0.
1056           !isAllocaPromotable(&AI) &&
1057           // inalloca allocas are not treated as static, and we don't want
1058           // dynamic alloca instrumentation for them as well.
1059           !AI.isUsedWithInAlloca() &&
1060           // swifterror allocas are register promoted by ISel
1061           !AI.isSwiftError());
1062 }
1063 
1064 bool HWAddressSanitizer::sanitizeFunction(Function &F) {
1065   if (&F == HwasanCtorFunction)
1066     return false;
1067 
1068   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1069     return false;
1070 
1071   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1072 
1073   SmallVector<Instruction*, 16> ToInstrument;
1074   SmallVector<AllocaInst*, 8> AllocasToInstrument;
1075   SmallVector<Instruction*, 8> RetVec;
1076   SmallVector<Instruction*, 8> LandingPadVec;
1077   DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
1078   for (auto &BB : F) {
1079     for (auto &Inst : BB) {
1080       if (ClInstrumentStack)
1081         if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
1082           if (isInterestingAlloca(*AI))
1083             AllocasToInstrument.push_back(AI);
1084           continue;
1085         }
1086 
1087       if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
1088           isa<CleanupReturnInst>(Inst))
1089         RetVec.push_back(&Inst);
1090 
1091       if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
1092         if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
1093           AllocaDeclareMap[Alloca].push_back(DDI);
1094 
1095       if (ClInstrumentLandingPads && isa<LandingPadInst>(Inst))
1096         LandingPadVec.push_back(&Inst);
1097 
1098       Value *MaybeMask = nullptr;
1099       bool IsWrite;
1100       unsigned Alignment;
1101       uint64_t TypeSize;
1102       Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
1103                                               &Alignment, &MaybeMask);
1104       if (Addr || isa<MemIntrinsic>(Inst))
1105         ToInstrument.push_back(&Inst);
1106     }
1107   }
1108 
1109   initializeCallbacks(*F.getParent());
1110 
1111   if (!LandingPadVec.empty())
1112     instrumentLandingPads(LandingPadVec);
1113 
1114   if (AllocasToInstrument.empty() && ToInstrument.empty())
1115     return false;
1116 
1117   assert(!LocalDynamicShadow);
1118 
1119   Instruction *InsertPt = &*F.getEntryBlock().begin();
1120   IRBuilder<> EntryIRB(InsertPt);
1121   emitPrologue(EntryIRB,
1122                /*WithFrameRecord*/ ClRecordStackHistory &&
1123                    !AllocasToInstrument.empty());
1124 
1125   bool Changed = false;
1126   if (!AllocasToInstrument.empty()) {
1127     Value *StackTag =
1128         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1129     Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
1130                                StackTag);
1131   }
1132 
1133   // Pad and align each of the allocas that we instrumented to stop small
1134   // uninteresting allocas from hiding in instrumented alloca's padding and so
1135   // that we have enough space to store real tags for short granules.
1136   DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
1137   for (AllocaInst *AI : AllocasToInstrument) {
1138     uint64_t Size = getAllocaSizeInBytes(*AI);
1139     uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1140     AI->setAlignment(
1141         std::max(AI->getAlignment(), Mapping.getObjectAlignment()));
1142     if (Size != AlignedSize) {
1143       Type *AllocatedType = AI->getAllocatedType();
1144       if (AI->isArrayAllocation()) {
1145         uint64_t ArraySize =
1146             cast<ConstantInt>(AI->getArraySize())->getZExtValue();
1147         AllocatedType = ArrayType::get(AllocatedType, ArraySize);
1148       }
1149       Type *TypeWithPadding = StructType::get(
1150           AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
1151       auto *NewAI = new AllocaInst(
1152           TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
1153       NewAI->takeName(AI);
1154       NewAI->setAlignment(AI->getAlignment());
1155       NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
1156       NewAI->setSwiftError(AI->isSwiftError());
1157       NewAI->copyMetadata(*AI);
1158       auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
1159       AI->replaceAllUsesWith(Bitcast);
1160       AllocaToPaddedAllocaMap[AI] = NewAI;
1161     }
1162   }
1163 
1164   if (!AllocaToPaddedAllocaMap.empty()) {
1165     for (auto &BB : F)
1166       for (auto &Inst : BB)
1167         if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst))
1168           if (auto *AI =
1169                   dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation()))
1170             if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
1171               DVI->setArgOperand(
1172                   0, MetadataAsValue::get(*C, LocalAsMetadata::get(NewAI)));
1173     for (auto &P : AllocaToPaddedAllocaMap)
1174       P.first->eraseFromParent();
1175   }
1176 
1177   // If we split the entry block, move any allocas that were originally in the
1178   // entry block back into the entry block so that they aren't treated as
1179   // dynamic allocas.
1180   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1181     InsertPt = &*F.getEntryBlock().begin();
1182     for (auto II = EntryIRB.GetInsertBlock()->begin(),
1183               IE = EntryIRB.GetInsertBlock()->end();
1184          II != IE;) {
1185       Instruction *I = &*II++;
1186       if (auto *AI = dyn_cast<AllocaInst>(I))
1187         if (isa<ConstantInt>(AI->getArraySize()))
1188           I->moveBefore(InsertPt);
1189     }
1190   }
1191 
1192   for (auto Inst : ToInstrument)
1193     Changed |= instrumentMemAccess(Inst);
1194 
1195   LocalDynamicShadow = nullptr;
1196   StackBaseTag = nullptr;
1197 
1198   return Changed;
1199 }
1200 
1201 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1202   Constant *Initializer = GV->getInitializer();
1203   uint64_t SizeInBytes =
1204       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1205   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1206   if (SizeInBytes != NewSize) {
1207     // Pad the initializer out to the next multiple of 16 bytes and add the
1208     // required short granule tag.
1209     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1210     Init.back() = Tag;
1211     Constant *Padding = ConstantDataArray::get(*C, Init);
1212     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1213   }
1214 
1215   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1216                                    GlobalValue::ExternalLinkage, Initializer,
1217                                    GV->getName() + ".hwasan");
1218   NewGV->copyAttributesFrom(GV);
1219   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1220   NewGV->copyMetadata(GV, 0);
1221   NewGV->setAlignment(
1222       std::max(GV->getAlignment(), Mapping.getObjectAlignment()));
1223 
1224   // It is invalid to ICF two globals that have different tags. In the case
1225   // where the size of the global is a multiple of the tag granularity the
1226   // contents of the globals may be the same but the tags (i.e. symbol values)
1227   // may be different, and the symbols are not considered during ICF. In the
1228   // case where the size is not a multiple of the granularity, the short granule
1229   // tags would discriminate two globals with different tags, but there would
1230   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1231   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1232   // granule tag in the last byte.
1233   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1234 
1235   // Descriptor format (assuming little-endian):
1236   // bytes 0-3: relative address of global
1237   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1238   // it isn't, we create multiple descriptors)
1239   // byte 7: tag
1240   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1241   const uint64_t MaxDescriptorSize = 0xfffff0;
1242   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1243        DescriptorPos += MaxDescriptorSize) {
1244     auto *Descriptor =
1245         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1246                            nullptr, GV->getName() + ".hwasan.descriptor");
1247     auto *GVRelPtr = ConstantExpr::getTrunc(
1248         ConstantExpr::getAdd(
1249             ConstantExpr::getSub(
1250                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1251                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1252             ConstantInt::get(Int64Ty, DescriptorPos)),
1253         Int32Ty);
1254     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1255     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1256     Descriptor->setComdat(NewGV->getComdat());
1257     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1258     Descriptor->setSection("hwasan_globals");
1259     Descriptor->setMetadata(LLVMContext::MD_associated,
1260                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1261     appendToCompilerUsed(M, Descriptor);
1262   }
1263 
1264   Constant *Aliasee = ConstantExpr::getIntToPtr(
1265       ConstantExpr::getAdd(
1266           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1267           ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)),
1268       GV->getType());
1269   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1270                                     GV->getLinkage(), "", Aliasee, &M);
1271   Alias->setVisibility(GV->getVisibility());
1272   Alias->takeName(GV);
1273   GV->replaceAllUsesWith(Alias);
1274   GV->eraseFromParent();
1275 }
1276 
1277 void HWAddressSanitizer::instrumentGlobals() {
1278   // Start by creating a note that contains pointers to the list of global
1279   // descriptors. Adding a note to the output file will cause the linker to
1280   // create a PT_NOTE program header pointing to the note that we can use to
1281   // find the descriptor list starting from the program headers. A function
1282   // provided by the runtime initializes the shadow memory for the globals by
1283   // accessing the descriptor list via the note. The dynamic loader needs to
1284   // call this function whenever a library is loaded.
1285   //
1286   // The reason why we use a note for this instead of a more conventional
1287   // approach of having a global constructor pass a descriptor list pointer to
1288   // the runtime is because of an order of initialization problem. With
1289   // constructors we can encounter the following problematic scenario:
1290   //
1291   // 1) library A depends on library B and also interposes one of B's symbols
1292   // 2) B's constructors are called before A's (as required for correctness)
1293   // 3) during construction, B accesses one of its "own" globals (actually
1294   //    interposed by A) and triggers a HWASAN failure due to the initialization
1295   //    for A not having happened yet
1296   //
1297   // Even without interposition it is possible to run into similar situations in
1298   // cases where two libraries mutually depend on each other.
1299   //
1300   // We only need one note per binary, so put everything for the note in a
1301   // comdat.
1302   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanNoteName);
1303 
1304   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
1305   auto Start =
1306       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
1307                          nullptr, "__start_hwasan_globals");
1308   Start->setVisibility(GlobalValue::HiddenVisibility);
1309   Start->setDSOLocal(true);
1310   auto Stop =
1311       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
1312                          nullptr, "__stop_hwasan_globals");
1313   Stop->setVisibility(GlobalValue::HiddenVisibility);
1314   Stop->setDSOLocal(true);
1315 
1316   // Null-terminated so actually 8 bytes, which are required in order to align
1317   // the note properly.
1318   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
1319 
1320   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
1321                                  Int32Ty, Int32Ty);
1322   auto *Note =
1323       new GlobalVariable(M, NoteTy, /*isConstantGlobal=*/true,
1324                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
1325   Note->setSection(".note.hwasan.globals");
1326   Note->setComdat(NoteComdat);
1327   Note->setAlignment(4);
1328   Note->setDSOLocal(true);
1329 
1330   // The pointers in the note need to be relative so that the note ends up being
1331   // placed in rodata, which is the standard location for notes.
1332   auto CreateRelPtr = [&](Constant *Ptr) {
1333     return ConstantExpr::getTrunc(
1334         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
1335                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
1336         Int32Ty);
1337   };
1338   Note->setInitializer(ConstantStruct::getAnon(
1339       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
1340        ConstantInt::get(Int32Ty, 8),                           // n_descsz
1341        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
1342        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
1343   appendToCompilerUsed(M, Note);
1344 
1345   // Create a zero-length global in hwasan_globals so that the linker will
1346   // always create start and stop symbols.
1347   auto Dummy = new GlobalVariable(
1348       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
1349       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
1350   Dummy->setSection("hwasan_globals");
1351   Dummy->setComdat(NoteComdat);
1352   Dummy->setMetadata(LLVMContext::MD_associated,
1353                      MDNode::get(*C, ValueAsMetadata::get(Note)));
1354   appendToCompilerUsed(M, Dummy);
1355 
1356   std::vector<GlobalVariable *> Globals;
1357   for (GlobalVariable &GV : M.globals()) {
1358     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1359         GV.isThreadLocal())
1360       continue;
1361 
1362     // Common symbols can't have aliases point to them, so they can't be tagged.
1363     if (GV.hasCommonLinkage())
1364       continue;
1365 
1366     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1367     // which would be broken both by adding tags and potentially by the extra
1368     // padding/alignment that we insert.
1369     if (GV.hasSection())
1370       continue;
1371 
1372     Globals.push_back(&GV);
1373   }
1374 
1375   MD5 Hasher;
1376   Hasher.update(M.getSourceFileName());
1377   MD5::MD5Result Hash;
1378   Hasher.final(Hash);
1379   uint8_t Tag = Hash[0];
1380 
1381   for (GlobalVariable *GV : Globals) {
1382     // Skip tag 0 in order to avoid collisions with untagged memory.
1383     if (Tag == 0)
1384       Tag = 1;
1385     instrumentGlobal(GV, Tag++);
1386   }
1387 }
1388 
1389 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
1390   Scale = kDefaultShadowScale;
1391   if (ClMappingOffset.getNumOccurrences() > 0) {
1392     InGlobal = false;
1393     InTls = false;
1394     Offset = ClMappingOffset;
1395   } else if (ClEnableKhwasan || ClInstrumentWithCalls) {
1396     InGlobal = false;
1397     InTls = false;
1398     Offset = 0;
1399   } else if (ClWithIfunc) {
1400     InGlobal = true;
1401     InTls = false;
1402     Offset = kDynamicShadowSentinel;
1403   } else if (ClWithTls) {
1404     InGlobal = false;
1405     InTls = true;
1406     Offset = kDynamicShadowSentinel;
1407   } else {
1408     InGlobal = false;
1409     InTls = false;
1410     Offset = kDynamicShadowSentinel;
1411   }
1412 }
1413