1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/Analysis/PostDominators.h"
22 #include "llvm/Analysis/StackSafetyAnalysis.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/BinaryFormat/Dwarf.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DebugInfoMetadata.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/InlineAsm.h"
37 #include "llvm/IR/InstIterator.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/MDBuilder.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
52 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
53 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
54 #include "llvm/Transforms/Utils/ModuleUtils.h"
55 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
56 
57 using namespace llvm;
58 
59 #define DEBUG_TYPE "hwasan"
60 
61 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
62 const char kHwasanNoteName[] = "hwasan.note";
63 const char kHwasanInitName[] = "__hwasan_init";
64 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
65 
66 const char kHwasanShadowMemoryDynamicAddress[] =
67     "__hwasan_shadow_memory_dynamic_address";
68 
69 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
70 static const size_t kNumberOfAccessSizes = 5;
71 
72 static const size_t kDefaultShadowScale = 4;
73 static const uint64_t kDynamicShadowSentinel =
74     std::numeric_limits<uint64_t>::max();
75 
76 static const unsigned kShadowBaseAlignment = 32;
77 
78 static cl::opt<std::string>
79     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
80                                  cl::desc("Prefix for memory access callbacks"),
81                                  cl::Hidden, cl::init("__hwasan_"));
82 
83 static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
84     "hwasan-kernel-mem-intrinsic-prefix",
85     cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
86     cl::init(false));
87 
88 static cl::opt<bool> ClInstrumentWithCalls(
89     "hwasan-instrument-with-calls",
90     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
91     cl::init(false));
92 
93 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
94                                        cl::desc("instrument read instructions"),
95                                        cl::Hidden, cl::init(true));
96 
97 static cl::opt<bool>
98     ClInstrumentWrites("hwasan-instrument-writes",
99                        cl::desc("instrument write instructions"), cl::Hidden,
100                        cl::init(true));
101 
102 static cl::opt<bool> ClInstrumentAtomics(
103     "hwasan-instrument-atomics",
104     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
105     cl::init(true));
106 
107 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
108                                        cl::desc("instrument byval arguments"),
109                                        cl::Hidden, cl::init(true));
110 
111 static cl::opt<bool>
112     ClRecover("hwasan-recover",
113               cl::desc("Enable recovery mode (continue-after-error)."),
114               cl::Hidden, cl::init(false));
115 
116 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
117                                        cl::desc("instrument stack (allocas)"),
118                                        cl::Hidden, cl::init(true));
119 
120 static cl::opt<bool>
121     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
122                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
123                      cl::Optional);
124 
125 static cl::opt<size_t> ClMaxLifetimes(
126     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
127     cl::ReallyHidden,
128     cl::desc("How many lifetime ends to handle for a single alloca."),
129     cl::Optional);
130 
131 static cl::opt<bool>
132     ClUseAfterScope("hwasan-use-after-scope",
133                     cl::desc("detect use after scope within function"),
134                     cl::Hidden, cl::init(false));
135 
136 static cl::opt<bool> ClUARRetagToZero(
137     "hwasan-uar-retag-to-zero",
138     cl::desc("Clear alloca tags before returning from the function to allow "
139              "non-instrumented and instrumented function calls mix. When set "
140              "to false, allocas are retagged before returning from the "
141              "function to detect use after return."),
142     cl::Hidden, cl::init(true));
143 
144 static cl::opt<bool> ClGenerateTagsWithCalls(
145     "hwasan-generate-tags-with-calls",
146     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
147     cl::init(false));
148 
149 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
150                                cl::Hidden, cl::init(false), cl::ZeroOrMore);
151 
152 static cl::opt<int> ClMatchAllTag(
153     "hwasan-match-all-tag",
154     cl::desc("don't report bad accesses via pointers with this tag"),
155     cl::Hidden, cl::init(-1));
156 
157 static cl::opt<bool>
158     ClEnableKhwasan("hwasan-kernel",
159                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
160                     cl::Hidden, cl::init(false));
161 
162 // These flags allow to change the shadow mapping and control how shadow memory
163 // is accessed. The shadow mapping looks like:
164 //    Shadow = (Mem >> scale) + offset
165 
166 static cl::opt<uint64_t>
167     ClMappingOffset("hwasan-mapping-offset",
168                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
169                     cl::Hidden, cl::init(0));
170 
171 static cl::opt<bool>
172     ClWithIfunc("hwasan-with-ifunc",
173                 cl::desc("Access dynamic shadow through an ifunc global on "
174                          "platforms that support this"),
175                 cl::Hidden, cl::init(false));
176 
177 static cl::opt<bool> ClWithTls(
178     "hwasan-with-tls",
179     cl::desc("Access dynamic shadow through an thread-local pointer on "
180              "platforms that support this"),
181     cl::Hidden, cl::init(true));
182 
183 static cl::opt<bool>
184     ClRecordStackHistory("hwasan-record-stack-history",
185                          cl::desc("Record stack frames with tagged allocations "
186                                   "in a thread-local ring buffer"),
187                          cl::Hidden, cl::init(true));
188 static cl::opt<bool>
189     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
190                               cl::desc("instrument memory intrinsics"),
191                               cl::Hidden, cl::init(true));
192 
193 static cl::opt<bool>
194     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
195                             cl::desc("instrument landing pads"), cl::Hidden,
196                             cl::init(false), cl::ZeroOrMore);
197 
198 static cl::opt<bool> ClUseShortGranules(
199     "hwasan-use-short-granules",
200     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
201     cl::init(false), cl::ZeroOrMore);
202 
203 static cl::opt<bool> ClInstrumentPersonalityFunctions(
204     "hwasan-instrument-personality-functions",
205     cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
206     cl::ZeroOrMore);
207 
208 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
209                                        cl::desc("inline all checks"),
210                                        cl::Hidden, cl::init(false));
211 
212 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
213 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
214                                       cl::desc("Use page aliasing in HWASan"),
215                                       cl::Hidden, cl::init(false));
216 
217 namespace {
218 
219 bool shouldUsePageAliases(const Triple &TargetTriple) {
220   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
221 }
222 
223 bool shouldInstrumentStack(const Triple &TargetTriple) {
224   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
225 }
226 
227 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
228   return ClInstrumentWithCalls || TargetTriple.getArch() == Triple::x86_64;
229 }
230 
231 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
232   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
233                                               : !DisableOptimization;
234 }
235 
236 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
237                                   bool DisableOptimization) {
238   return shouldInstrumentStack(TargetTriple) &&
239          mightUseStackSafetyAnalysis(DisableOptimization);
240 }
241 
242 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
243   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
244 }
245 
246 /// An instrumentation pass implementing detection of addressability bugs
247 /// using tagged pointers.
248 class HWAddressSanitizer {
249 public:
250   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
251                      const StackSafetyGlobalInfo *SSI)
252       : M(M), SSI(SSI) {
253     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
254     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
255                               ? ClEnableKhwasan
256                               : CompileKernel;
257 
258     initializeModule();
259   }
260 
261   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
262 
263   bool sanitizeFunction(Function &F,
264                         llvm::function_ref<const DominatorTree &()> GetDT,
265                         llvm::function_ref<const PostDominatorTree &()> GetPDT);
266   void initializeModule();
267   void createHwasanCtorComdat();
268 
269   void initializeCallbacks(Module &M);
270 
271   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
272 
273   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
274   Value *getShadowNonTls(IRBuilder<> &IRB);
275 
276   void untagPointerOperand(Instruction *I, Value *Addr);
277   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
278 
279   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
280   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
281                                   unsigned AccessSizeIndex,
282                                   Instruction *InsertBefore);
283   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
284                                  unsigned AccessSizeIndex,
285                                  Instruction *InsertBefore);
286   bool ignoreMemIntrinsic(MemIntrinsic *MI);
287   void instrumentMemIntrinsic(MemIntrinsic *MI);
288   bool instrumentMemAccess(InterestingMemoryOperand &O);
289   bool ignoreAccess(Instruction *Inst, Value *Ptr);
290   void getInterestingMemoryOperands(
291       Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
292 
293   bool isInterestingAlloca(const AllocaInst &AI);
294   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
295   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
296   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
297   bool instrumentStack(memtag::StackInfo &Info, Value *StackTag,
298                        llvm::function_ref<const DominatorTree &()> GetDT,
299                        llvm::function_ref<const PostDominatorTree &()> GetPDT);
300   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
301   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
302   Value *getNextTagWithCall(IRBuilder<> &IRB);
303   Value *getStackBaseTag(IRBuilder<> &IRB);
304   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
305                       unsigned AllocaNo);
306   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
307 
308   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
309   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
310   unsigned retagMask(unsigned AllocaNo);
311 
312   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
313 
314   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
315   void instrumentGlobals();
316 
317   void instrumentPersonalityFunctions();
318 
319 private:
320   LLVMContext *C;
321   Module &M;
322   const StackSafetyGlobalInfo *SSI;
323   Triple TargetTriple;
324   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
325   FunctionCallee HWAsanHandleVfork;
326 
327   /// This struct defines the shadow mapping using the rule:
328   ///   shadow = (mem >> Scale) + Offset.
329   /// If InGlobal is true, then
330   ///   extern char __hwasan_shadow[];
331   ///   shadow = (mem >> Scale) + &__hwasan_shadow
332   /// If InTls is true, then
333   ///   extern char *__hwasan_tls;
334   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
335   ///
336   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
337   /// ring buffer for storing stack allocations on targets that support it.
338   struct ShadowMapping {
339     int Scale;
340     uint64_t Offset;
341     bool InGlobal;
342     bool InTls;
343     bool WithFrameRecord;
344 
345     void init(Triple &TargetTriple, bool InstrumentWithCalls);
346     uint64_t getObjectAlignment() const { return 1ULL << Scale; }
347   };
348 
349   ShadowMapping Mapping;
350 
351   Type *VoidTy = Type::getVoidTy(M.getContext());
352   Type *IntptrTy;
353   Type *Int8PtrTy;
354   Type *Int8Ty;
355   Type *Int32Ty;
356   Type *Int64Ty = Type::getInt64Ty(M.getContext());
357 
358   bool CompileKernel;
359   bool Recover;
360   bool OutlinedChecks;
361   bool UseShortGranules;
362   bool InstrumentLandingPads;
363   bool InstrumentWithCalls;
364   bool InstrumentStack;
365   bool DetectUseAfterScope;
366   bool UsePageAliases;
367 
368   bool HasMatchAllTag = false;
369   uint8_t MatchAllTag = 0;
370 
371   unsigned PointerTagShift;
372   uint64_t TagMaskByte;
373 
374   Function *HwasanCtorFunction;
375 
376   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
377   FunctionCallee HwasanMemoryAccessCallbackSized[2];
378 
379   FunctionCallee HwasanTagMemoryFunc;
380   FunctionCallee HwasanGenerateTagFunc;
381 
382   Constant *ShadowGlobal;
383 
384   Value *ShadowBase = nullptr;
385   Value *StackBaseTag = nullptr;
386   GlobalValue *ThreadPtrGlobal = nullptr;
387 };
388 
389 } // end anonymous namespace
390 
391 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
392                                               ModuleAnalysisManager &MAM) {
393   const StackSafetyGlobalInfo *SSI = nullptr;
394   auto TargetTriple = llvm::Triple(M.getTargetTriple());
395   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
396     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
397 
398   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
399   bool Modified = false;
400   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
401   for (Function &F : M) {
402     Modified |= HWASan.sanitizeFunction(
403         F,
404         [&]() -> const DominatorTree & {
405           return FAM.getResult<DominatorTreeAnalysis>(F);
406         },
407         [&]() -> const PostDominatorTree & {
408           return FAM.getResult<PostDominatorTreeAnalysis>(F);
409         });
410   }
411   if (Modified)
412     return PreservedAnalyses::none();
413   return PreservedAnalyses::all();
414 }
415 void HWAddressSanitizerPass::printPipeline(
416     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
417   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
418       OS, MapClassName2PassName);
419   OS << "<";
420   if (Options.CompileKernel)
421     OS << "kernel;";
422   if (Options.Recover)
423     OS << "recover";
424   OS << ">";
425 }
426 
427 void HWAddressSanitizer::createHwasanCtorComdat() {
428   std::tie(HwasanCtorFunction, std::ignore) =
429       getOrCreateSanitizerCtorAndInitFunctions(
430           M, kHwasanModuleCtorName, kHwasanInitName,
431           /*InitArgTypes=*/{},
432           /*InitArgs=*/{},
433           // This callback is invoked when the functions are created the first
434           // time. Hook them into the global ctors list in that case:
435           [&](Function *Ctor, FunctionCallee) {
436             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
437             Ctor->setComdat(CtorComdat);
438             appendToGlobalCtors(M, Ctor, 0, Ctor);
439           });
440 
441   // Create a note that contains pointers to the list of global
442   // descriptors. Adding a note to the output file will cause the linker to
443   // create a PT_NOTE program header pointing to the note that we can use to
444   // find the descriptor list starting from the program headers. A function
445   // provided by the runtime initializes the shadow memory for the globals by
446   // accessing the descriptor list via the note. The dynamic loader needs to
447   // call this function whenever a library is loaded.
448   //
449   // The reason why we use a note for this instead of a more conventional
450   // approach of having a global constructor pass a descriptor list pointer to
451   // the runtime is because of an order of initialization problem. With
452   // constructors we can encounter the following problematic scenario:
453   //
454   // 1) library A depends on library B and also interposes one of B's symbols
455   // 2) B's constructors are called before A's (as required for correctness)
456   // 3) during construction, B accesses one of its "own" globals (actually
457   //    interposed by A) and triggers a HWASAN failure due to the initialization
458   //    for A not having happened yet
459   //
460   // Even without interposition it is possible to run into similar situations in
461   // cases where two libraries mutually depend on each other.
462   //
463   // We only need one note per binary, so put everything for the note in a
464   // comdat. This needs to be a comdat with an .init_array section to prevent
465   // newer versions of lld from discarding the note.
466   //
467   // Create the note even if we aren't instrumenting globals. This ensures that
468   // binaries linked from object files with both instrumented and
469   // non-instrumented globals will end up with a note, even if a comdat from an
470   // object file with non-instrumented globals is selected. The note is harmless
471   // if the runtime doesn't support it, since it will just be ignored.
472   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
473 
474   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
475   auto Start =
476       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
477                          nullptr, "__start_hwasan_globals");
478   Start->setVisibility(GlobalValue::HiddenVisibility);
479   Start->setDSOLocal(true);
480   auto Stop =
481       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
482                          nullptr, "__stop_hwasan_globals");
483   Stop->setVisibility(GlobalValue::HiddenVisibility);
484   Stop->setDSOLocal(true);
485 
486   // Null-terminated so actually 8 bytes, which are required in order to align
487   // the note properly.
488   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
489 
490   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
491                                  Int32Ty, Int32Ty);
492   auto *Note =
493       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
494                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
495   Note->setSection(".note.hwasan.globals");
496   Note->setComdat(NoteComdat);
497   Note->setAlignment(Align(4));
498   Note->setDSOLocal(true);
499 
500   // The pointers in the note need to be relative so that the note ends up being
501   // placed in rodata, which is the standard location for notes.
502   auto CreateRelPtr = [&](Constant *Ptr) {
503     return ConstantExpr::getTrunc(
504         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
505                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
506         Int32Ty);
507   };
508   Note->setInitializer(ConstantStruct::getAnon(
509       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
510        ConstantInt::get(Int32Ty, 8),                           // n_descsz
511        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
512        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
513   appendToCompilerUsed(M, Note);
514 
515   // Create a zero-length global in hwasan_globals so that the linker will
516   // always create start and stop symbols.
517   auto Dummy = new GlobalVariable(
518       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
519       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
520   Dummy->setSection("hwasan_globals");
521   Dummy->setComdat(NoteComdat);
522   Dummy->setMetadata(LLVMContext::MD_associated,
523                      MDNode::get(*C, ValueAsMetadata::get(Note)));
524   appendToCompilerUsed(M, Dummy);
525 }
526 
527 /// Module-level initialization.
528 ///
529 /// inserts a call to __hwasan_init to the module's constructor list.
530 void HWAddressSanitizer::initializeModule() {
531   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
532   auto &DL = M.getDataLayout();
533 
534   TargetTriple = Triple(M.getTargetTriple());
535 
536   // x86_64 currently has two modes:
537   // - Intel LAM (default)
538   // - pointer aliasing (heap only)
539   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
540   UsePageAliases = shouldUsePageAliases(TargetTriple);
541   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
542   InstrumentStack = shouldInstrumentStack(TargetTriple);
543   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
544   PointerTagShift = IsX86_64 ? 57 : 56;
545   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
546 
547   Mapping.init(TargetTriple, InstrumentWithCalls);
548 
549   C = &(M.getContext());
550   IRBuilder<> IRB(*C);
551   IntptrTy = IRB.getIntPtrTy(DL);
552   Int8PtrTy = IRB.getInt8PtrTy();
553   Int8Ty = IRB.getInt8Ty();
554   Int32Ty = IRB.getInt32Ty();
555 
556   HwasanCtorFunction = nullptr;
557 
558   // Older versions of Android do not have the required runtime support for
559   // short granules, global or personality function instrumentation. On other
560   // platforms we currently require using the latest version of the runtime.
561   bool NewRuntime =
562       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
563 
564   UseShortGranules =
565       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
566   OutlinedChecks =
567       TargetTriple.isAArch64() && TargetTriple.isOSBinFormatELF() &&
568       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
569 
570   if (ClMatchAllTag.getNumOccurrences()) {
571     if (ClMatchAllTag != -1) {
572       HasMatchAllTag = true;
573       MatchAllTag = ClMatchAllTag & 0xFF;
574     }
575   } else if (CompileKernel) {
576     HasMatchAllTag = true;
577     MatchAllTag = 0xFF;
578   }
579 
580   // If we don't have personality function support, fall back to landing pads.
581   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
582                               ? ClInstrumentLandingPads
583                               : !NewRuntime;
584 
585   if (!CompileKernel) {
586     createHwasanCtorComdat();
587     bool InstrumentGlobals =
588         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
589 
590     if (InstrumentGlobals && !UsePageAliases)
591       instrumentGlobals();
592 
593     bool InstrumentPersonalityFunctions =
594         ClInstrumentPersonalityFunctions.getNumOccurrences()
595             ? ClInstrumentPersonalityFunctions
596             : NewRuntime;
597     if (InstrumentPersonalityFunctions)
598       instrumentPersonalityFunctions();
599   }
600 
601   if (!TargetTriple.isAndroid()) {
602     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
603       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
604                                     GlobalValue::ExternalLinkage, nullptr,
605                                     "__hwasan_tls", nullptr,
606                                     GlobalVariable::InitialExecTLSModel);
607       appendToCompilerUsed(M, GV);
608       return GV;
609     });
610     ThreadPtrGlobal = cast<GlobalVariable>(C);
611   }
612 }
613 
614 void HWAddressSanitizer::initializeCallbacks(Module &M) {
615   IRBuilder<> IRB(*C);
616   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
617     const std::string TypeStr = AccessIsWrite ? "store" : "load";
618     const std::string EndingStr = Recover ? "_noabort" : "";
619 
620     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
621         ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
622         FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
623 
624     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
625          AccessSizeIndex++) {
626       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
627           M.getOrInsertFunction(
628               ClMemoryAccessCallbackPrefix + TypeStr +
629                   itostr(1ULL << AccessSizeIndex) + EndingStr,
630               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
631     }
632   }
633 
634   HwasanTagMemoryFunc = M.getOrInsertFunction(
635       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
636   HwasanGenerateTagFunc =
637       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
638 
639   ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
640                                      ArrayType::get(IRB.getInt8Ty(), 0));
641 
642   const std::string MemIntrinCallbackPrefix =
643       (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
644           ? std::string("")
645           : ClMemoryAccessCallbackPrefix;
646   HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
647                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
648                                         IRB.getInt8PtrTy(), IntptrTy);
649   HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
650                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
651                                        IRB.getInt8PtrTy(), IntptrTy);
652   HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
653                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
654                                        IRB.getInt32Ty(), IntptrTy);
655 
656   HWAsanHandleVfork =
657       M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
658 }
659 
660 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
661   // An empty inline asm with input reg == output reg.
662   // An opaque no-op cast, basically.
663   // This prevents code bloat as a result of rematerializing trivial definitions
664   // such as constants or global addresses at every load and store.
665   InlineAsm *Asm =
666       InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
667                      StringRef(""), StringRef("=r,0"),
668                      /*hasSideEffects=*/false);
669   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
670 }
671 
672 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
673   return getOpaqueNoopCast(IRB, ShadowGlobal);
674 }
675 
676 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
677   if (Mapping.Offset != kDynamicShadowSentinel)
678     return getOpaqueNoopCast(
679         IRB, ConstantExpr::getIntToPtr(
680                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
681 
682   if (Mapping.InGlobal) {
683     return getDynamicShadowIfunc(IRB);
684   } else {
685     Value *GlobalDynamicAddress =
686         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
687             kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
688     return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
689   }
690 }
691 
692 bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
693   // Do not instrument acesses from different address spaces; we cannot deal
694   // with them.
695   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
696   if (PtrTy->getPointerAddressSpace() != 0)
697     return true;
698 
699   // Ignore swifterror addresses.
700   // swifterror memory addresses are mem2reg promoted by instruction
701   // selection. As such they cannot have regular uses like an instrumentation
702   // function and it makes no sense to track them as memory.
703   if (Ptr->isSwiftError())
704     return true;
705 
706   if (findAllocaForValue(Ptr)) {
707     if (!InstrumentStack)
708       return true;
709     if (SSI && SSI->stackAccessIsSafe(*Inst))
710       return true;
711   }
712   return false;
713 }
714 
715 void HWAddressSanitizer::getInterestingMemoryOperands(
716     Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
717   // Skip memory accesses inserted by another instrumentation.
718   if (I->hasMetadata("nosanitize"))
719     return;
720 
721   // Do not instrument the load fetching the dynamic shadow address.
722   if (ShadowBase == I)
723     return;
724 
725   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
726     if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
727       return;
728     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
729                              LI->getType(), LI->getAlign());
730   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
731     if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
732       return;
733     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
734                              SI->getValueOperand()->getType(), SI->getAlign());
735   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
736     if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
737       return;
738     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
739                              RMW->getValOperand()->getType(), None);
740   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
741     if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
742       return;
743     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
744                              XCHG->getCompareOperand()->getType(), None);
745   } else if (auto CI = dyn_cast<CallInst>(I)) {
746     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
747       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
748           ignoreAccess(I, CI->getArgOperand(ArgNo)))
749         continue;
750       Type *Ty = CI->getParamByValType(ArgNo);
751       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
752     }
753   }
754 }
755 
756 static unsigned getPointerOperandIndex(Instruction *I) {
757   if (LoadInst *LI = dyn_cast<LoadInst>(I))
758     return LI->getPointerOperandIndex();
759   if (StoreInst *SI = dyn_cast<StoreInst>(I))
760     return SI->getPointerOperandIndex();
761   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
762     return RMW->getPointerOperandIndex();
763   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
764     return XCHG->getPointerOperandIndex();
765   report_fatal_error("Unexpected instruction");
766   return -1;
767 }
768 
769 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
770   size_t Res = countTrailingZeros(TypeSize / 8);
771   assert(Res < kNumberOfAccessSizes);
772   return Res;
773 }
774 
775 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
776   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64)
777     return;
778 
779   IRBuilder<> IRB(I);
780   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
781   Value *UntaggedPtr =
782       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
783   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
784 }
785 
786 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
787   // Mem >> Scale
788   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
789   if (Mapping.Offset == 0)
790     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
791   // (Mem >> Scale) + Offset
792   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
793 }
794 
795 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
796                                           unsigned AccessSizeIndex) {
797   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
798          (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
799          (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
800          (Recover << HWASanAccessInfo::RecoverShift) +
801          (IsWrite << HWASanAccessInfo::IsWriteShift) +
802          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
803 }
804 
805 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
806                                                     unsigned AccessSizeIndex,
807                                                     Instruction *InsertBefore) {
808   assert(!UsePageAliases);
809   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
810   IRBuilder<> IRB(InsertBefore);
811   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
812   Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
813   IRB.CreateCall(Intrinsic::getDeclaration(
814                      M, UseShortGranules
815                             ? Intrinsic::hwasan_check_memaccess_shortgranules
816                             : Intrinsic::hwasan_check_memaccess),
817                  {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
818 }
819 
820 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
821                                                    unsigned AccessSizeIndex,
822                                                    Instruction *InsertBefore) {
823   assert(!UsePageAliases);
824   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
825   IRBuilder<> IRB(InsertBefore);
826 
827   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
828   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
829                                   IRB.getInt8Ty());
830   Value *AddrLong = untagPointer(IRB, PtrLong);
831   Value *Shadow = memToShadow(AddrLong, IRB);
832   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
833   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
834 
835   if (HasMatchAllTag) {
836     Value *TagNotIgnored = IRB.CreateICmpNE(
837         PtrTag, ConstantInt::get(PtrTag->getType(), MatchAllTag));
838     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
839   }
840 
841   Instruction *CheckTerm =
842       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
843                                 MDBuilder(*C).createBranchWeights(1, 100000));
844 
845   IRB.SetInsertPoint(CheckTerm);
846   Value *OutOfShortGranuleTagRange =
847       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
848   Instruction *CheckFailTerm =
849       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
850                                 MDBuilder(*C).createBranchWeights(1, 100000));
851 
852   IRB.SetInsertPoint(CheckTerm);
853   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
854   PtrLowBits = IRB.CreateAdd(
855       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
856   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
857   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
858                             MDBuilder(*C).createBranchWeights(1, 100000),
859                             (DomTreeUpdater *)nullptr, nullptr,
860                             CheckFailTerm->getParent());
861 
862   IRB.SetInsertPoint(CheckTerm);
863   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
864   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
865   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
866   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
867   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
868                             MDBuilder(*C).createBranchWeights(1, 100000),
869                             (DomTreeUpdater *)nullptr, nullptr,
870                             CheckFailTerm->getParent());
871 
872   IRB.SetInsertPoint(CheckFailTerm);
873   InlineAsm *Asm;
874   switch (TargetTriple.getArch()) {
875   case Triple::x86_64:
876     // The signal handler will find the data address in rdi.
877     Asm = InlineAsm::get(
878         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
879         "int3\nnopl " +
880             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
881             "(%rax)",
882         "{rdi}",
883         /*hasSideEffects=*/true);
884     break;
885   case Triple::aarch64:
886   case Triple::aarch64_be:
887     // The signal handler will find the data address in x0.
888     Asm = InlineAsm::get(
889         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
890         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
891         "{x0}",
892         /*hasSideEffects=*/true);
893     break;
894   default:
895     report_fatal_error("unsupported architecture");
896   }
897   IRB.CreateCall(Asm, PtrLong);
898   if (Recover)
899     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
900 }
901 
902 bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
903   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
904     return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
905            (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
906   }
907   if (isa<MemSetInst>(MI))
908     return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
909   return false;
910 }
911 
912 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
913   IRBuilder<> IRB(MI);
914   if (isa<MemTransferInst>(MI)) {
915     IRB.CreateCall(
916         isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
917         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
918          IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
919          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
920   } else if (isa<MemSetInst>(MI)) {
921     IRB.CreateCall(
922         HWAsanMemset,
923         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
924          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
925          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
926   }
927   MI->eraseFromParent();
928 }
929 
930 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
931   Value *Addr = O.getPtr();
932 
933   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
934 
935   if (O.MaybeMask)
936     return false; // FIXME
937 
938   IRBuilder<> IRB(O.getInsn());
939   if (isPowerOf2_64(O.TypeSize) &&
940       (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
941       (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) ||
942        *O.Alignment >= O.TypeSize / 8)) {
943     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
944     if (InstrumentWithCalls) {
945       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
946                      IRB.CreatePointerCast(Addr, IntptrTy));
947     } else if (OutlinedChecks) {
948       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
949     } else {
950       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
951     }
952   } else {
953     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
954                    {IRB.CreatePointerCast(Addr, IntptrTy),
955                     ConstantInt::get(IntptrTy, O.TypeSize / 8)});
956   }
957   untagPointerOperand(O.getInsn(), Addr);
958 
959   return true;
960 }
961 
962 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
963                                    size_t Size) {
964   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
965   if (!UseShortGranules)
966     Size = AlignedSize;
967 
968   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
969   if (InstrumentWithCalls) {
970     IRB.CreateCall(HwasanTagMemoryFunc,
971                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
972                     ConstantInt::get(IntptrTy, AlignedSize)});
973   } else {
974     size_t ShadowSize = Size >> Mapping.Scale;
975     Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
976     // If this memset is not inlined, it will be intercepted in the hwasan
977     // runtime library. That's OK, because the interceptor skips the checks if
978     // the address is in the shadow region.
979     // FIXME: the interceptor is not as fast as real memset. Consider lowering
980     // llvm.memset right here into either a sequence of stores, or a call to
981     // hwasan_tag_memory.
982     if (ShadowSize)
983       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
984     if (Size != AlignedSize) {
985       IRB.CreateStore(
986           ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
987           IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
988       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
989                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
990                                    AlignedSize - 1));
991     }
992   }
993 }
994 
995 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
996   if (TargetTriple.getArch() == Triple::x86_64)
997     return AllocaNo & TagMaskByte;
998 
999   // A list of 8-bit numbers that have at most one run of non-zero bits.
1000   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1001   // masks.
1002   // The list does not include the value 255, which is used for UAR.
1003   //
1004   // Because we are more likely to use earlier elements of this list than later
1005   // ones, it is sorted in increasing order of probability of collision with a
1006   // mask allocated (temporally) nearby. The program that generated this list
1007   // can be found at:
1008   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1009   static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
1010                                  48, 16,  120, 248, 56,  24,  8,   124, 252,
1011                                  60, 28,  12,  4,   126, 254, 62,  30,  14,
1012                                  6,  2,   127, 63,  31,  15,  7,   3,   1};
1013   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
1014 }
1015 
1016 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1017   if (TargetTriple.getArch() == Triple::x86_64) {
1018     Constant *TagMask = ConstantInt::get(IntptrTy, TagMaskByte);
1019     Value *NewTag = IRB.CreateAnd(OldTag, TagMask);
1020     return NewTag;
1021   }
1022   // aarch64 uses 8-bit tags, so no mask is needed.
1023   return OldTag;
1024 }
1025 
1026 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1027   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1028 }
1029 
1030 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1031   if (ClGenerateTagsWithCalls)
1032     return getNextTagWithCall(IRB);
1033   if (StackBaseTag)
1034     return StackBaseTag;
1035   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1036   // first).
1037   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1038   auto GetStackPointerFn = Intrinsic::getDeclaration(
1039       M, Intrinsic::frameaddress,
1040       IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1041   Value *StackPointer = IRB.CreateCall(
1042       GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
1043 
1044   // Extract some entropy from the stack pointer for the tags.
1045   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1046   // between functions).
1047   Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
1048   Value *StackTag =
1049       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1050                                       IRB.CreateLShr(StackPointerLong, 20)));
1051   StackTag->setName("hwasan.stack.base.tag");
1052   return StackTag;
1053 }
1054 
1055 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1056                                         AllocaInst *AI, unsigned AllocaNo) {
1057   if (ClGenerateTagsWithCalls)
1058     return getNextTagWithCall(IRB);
1059   return IRB.CreateXor(StackTag,
1060                        ConstantInt::get(IntptrTy, retagMask(AllocaNo)));
1061 }
1062 
1063 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
1064   if (ClUARRetagToZero)
1065     return ConstantInt::get(IntptrTy, 0);
1066   if (ClGenerateTagsWithCalls)
1067     return getNextTagWithCall(IRB);
1068   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, TagMaskByte));
1069 }
1070 
1071 // Add a tag to an address.
1072 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1073                                       Value *PtrLong, Value *Tag) {
1074   assert(!UsePageAliases);
1075   Value *TaggedPtrLong;
1076   if (CompileKernel) {
1077     // Kernel addresses have 0xFF in the most significant byte.
1078     Value *ShiftedTag =
1079         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1080                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1081     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1082   } else {
1083     // Userspace can simply do OR (tag << PointerTagShift);
1084     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1085     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1086   }
1087   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1088 }
1089 
1090 // Remove tag from an address.
1091 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1092   assert(!UsePageAliases);
1093   Value *UntaggedPtrLong;
1094   if (CompileKernel) {
1095     // Kernel addresses have 0xFF in the most significant byte.
1096     UntaggedPtrLong =
1097         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1098                                                0xFFULL << PointerTagShift));
1099   } else {
1100     // Userspace addresses have 0x00.
1101     UntaggedPtrLong =
1102         IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
1103                                                 ~(0xFFULL << PointerTagShift)));
1104   }
1105   return UntaggedPtrLong;
1106 }
1107 
1108 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1109   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1110   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1111     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1112     // in Bionic's libc/private/bionic_tls.h.
1113     Function *ThreadPointerFunc =
1114         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1115     Value *SlotPtr = IRB.CreatePointerCast(
1116         IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
1117                                IRB.CreateCall(ThreadPointerFunc), 0x30),
1118         Ty->getPointerTo(0));
1119     return SlotPtr;
1120   }
1121   if (ThreadPtrGlobal)
1122     return ThreadPtrGlobal;
1123 
1124   return nullptr;
1125 }
1126 
1127 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1128   if (!Mapping.InTls)
1129     ShadowBase = getShadowNonTls(IRB);
1130   else if (!WithFrameRecord && TargetTriple.isAndroid())
1131     ShadowBase = getDynamicShadowIfunc(IRB);
1132 
1133   if (!WithFrameRecord && ShadowBase)
1134     return;
1135 
1136   Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1137   assert(SlotPtr);
1138 
1139   Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1140   // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
1141   Value *ThreadLongMaybeUntagged =
1142       TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
1143 
1144   if (WithFrameRecord) {
1145     Function *F = IRB.GetInsertBlock()->getParent();
1146     StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1147 
1148     // Prepare ring buffer data.
1149     Value *PC;
1150     if (TargetTriple.getArch() == Triple::aarch64)
1151       PC = readRegister(IRB, "pc");
1152     else
1153       PC = IRB.CreatePtrToInt(F, IntptrTy);
1154     Module *M = F->getParent();
1155     auto GetStackPointerFn = Intrinsic::getDeclaration(
1156         M, Intrinsic::frameaddress,
1157         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1158     Value *SP = IRB.CreatePtrToInt(
1159         IRB.CreateCall(GetStackPointerFn,
1160                        {Constant::getNullValue(IRB.getInt32Ty())}),
1161         IntptrTy);
1162     // Mix SP and PC.
1163     // Assumptions:
1164     // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1165     // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1166     // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1167     //       0xSSSSPPPPPPPPPPPP
1168     SP = IRB.CreateShl(SP, 44);
1169 
1170     // Store data to ring buffer.
1171     Value *RecordPtr =
1172         IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
1173     IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
1174 
1175     // Update the ring buffer. Top byte of ThreadLong defines the size of the
1176     // buffer in pages, it must be a power of two, and the start of the buffer
1177     // must be aligned by twice that much. Therefore wrap around of the ring
1178     // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1179     // The use of AShr instead of LShr is due to
1180     //   https://bugs.llvm.org/show_bug.cgi?id=39030
1181     // Runtime library makes sure not to use the highest bit.
1182     Value *WrapMask = IRB.CreateXor(
1183         IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1184         ConstantInt::get(IntptrTy, (uint64_t)-1));
1185     Value *ThreadLongNew = IRB.CreateAnd(
1186         IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1187     IRB.CreateStore(ThreadLongNew, SlotPtr);
1188   }
1189 
1190   if (!ShadowBase) {
1191     // Get shadow base address by aligning RecordPtr up.
1192     // Note: this is not correct if the pointer is already aligned.
1193     // Runtime library will make sure this never happens.
1194     ShadowBase = IRB.CreateAdd(
1195         IRB.CreateOr(
1196             ThreadLongMaybeUntagged,
1197             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1198         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1199     ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
1200   }
1201 }
1202 
1203 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1204   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1205   Function *ReadRegister =
1206       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1207   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1208   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1209   return IRB.CreateCall(ReadRegister, Args);
1210 }
1211 
1212 bool HWAddressSanitizer::instrumentLandingPads(
1213     SmallVectorImpl<Instruction *> &LandingPadVec) {
1214   for (auto *LP : LandingPadVec) {
1215     IRBuilder<> IRB(LP->getNextNode());
1216     IRB.CreateCall(
1217         HWAsanHandleVfork,
1218         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1219                                                                       : "sp")});
1220   }
1221   return true;
1222 }
1223 
1224 static bool isLifetimeIntrinsic(Value *V) {
1225   auto *II = dyn_cast<IntrinsicInst>(V);
1226   return II && II->isLifetimeStartOrEnd();
1227 }
1228 
1229 bool HWAddressSanitizer::instrumentStack(
1230     memtag::StackInfo &SInfo, Value *StackTag,
1231     llvm::function_ref<const DominatorTree &()> GetDT,
1232     llvm::function_ref<const PostDominatorTree &()> GetPDT) {
1233   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1234   // alloca addresses using that. Unfortunately, offsets are not known yet
1235   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1236   // temp, shift-OR it into each alloca address and xor with the retag mask.
1237   // This generates one extra instruction per alloca use.
1238   unsigned int I = 0;
1239 
1240   for (auto &KV : SInfo.AllocasToInstrument) {
1241     auto N = I++;
1242     auto *AI = KV.first;
1243     memtag::AllocaInfo &Info = KV.second;
1244     IRBuilder<> IRB(AI->getNextNode());
1245 
1246     // Replace uses of the alloca with tagged address.
1247     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
1248     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1249     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
1250     std::string Name =
1251         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1252     Replacement->setName(Name + ".hwasan");
1253 
1254     size_t Size = memtag::getAllocaSizeInBytes(*AI);
1255     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1256 
1257     Value *AICast = IRB.CreatePointerCast(AI, Int8PtrTy);
1258 
1259     auto HandleLifetime = [&](IntrinsicInst *II) {
1260       // Set the lifetime intrinsic to cover the whole alloca. This reduces the
1261       // set of assumptions we need to make about the lifetime. Without this we
1262       // would need to ensure that we can track the lifetime pointer to a
1263       // constant offset from the alloca, and would still need to change the
1264       // size to include the extra alignment we use for the untagging to make
1265       // the size consistent.
1266       //
1267       // The check for standard lifetime below makes sure that we have exactly
1268       // one set of start / end in any execution (i.e. the ends are not
1269       // reachable from each other), so this will not cause any problems.
1270       II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
1271       II->setArgOperand(1, AICast);
1272     };
1273     llvm::for_each(Info.LifetimeStart, HandleLifetime);
1274     llvm::for_each(Info.LifetimeEnd, HandleLifetime);
1275 
1276     AI->replaceUsesWithIf(Replacement, [AICast, AILong](Use &U) {
1277       auto *User = U.getUser();
1278       return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
1279     });
1280 
1281     for (auto *DDI : Info.DbgVariableIntrinsics) {
1282       // Prepend "tag_offset, N" to the dwarf expression.
1283       // Tag offset logically applies to the alloca pointer, and it makes sense
1284       // to put it at the beginning of the expression.
1285       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1286                                          retagMask(N)};
1287       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1288         if (DDI->getVariableLocationOp(LocNo) == AI)
1289           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1290                                                           NewOps, LocNo));
1291     }
1292 
1293     auto TagEnd = [&](Instruction *Node) {
1294       IRB.SetInsertPoint(Node);
1295       Value *UARTag = getUARTag(IRB, StackTag);
1296       // When untagging, use the `AlignedSize` because we need to set the tags
1297       // for the entire alloca to zero. If we used `Size` here, we would
1298       // keep the last granule tagged, and store zero in the last byte of the
1299       // last granule, due to how short granules are implemented.
1300       tagAlloca(IRB, AI, UARTag, AlignedSize);
1301     };
1302     // Calls to functions that may return twice (e.g. setjmp) confuse the
1303     // postdominator analysis, and will leave us to keep memory tagged after
1304     // function return. Work around this by always untagging at every return
1305     // statement if return_twice functions are called.
1306     bool StandardLifetime =
1307         SInfo.UnrecognizedLifetimes.empty() &&
1308         memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd,
1309                                    &GetDT(), ClMaxLifetimes) &&
1310         !SInfo.CallsReturnTwice;
1311     if (DetectUseAfterScope && StandardLifetime) {
1312       IntrinsicInst *Start = Info.LifetimeStart[0];
1313       IRB.SetInsertPoint(Start->getNextNode());
1314       tagAlloca(IRB, AI, Tag, Size);
1315       if (!memtag::forAllReachableExits(GetDT(), GetPDT(), Start,
1316                                         Info.LifetimeEnd, SInfo.RetVec,
1317                                         TagEnd)) {
1318         for (auto *End : Info.LifetimeEnd)
1319           End->eraseFromParent();
1320       }
1321     } else {
1322       tagAlloca(IRB, AI, Tag, Size);
1323       for (auto *RI : SInfo.RetVec)
1324         TagEnd(RI);
1325       // We inserted tagging outside of the lifetimes, so we have to remove
1326       // them.
1327       for (auto &II : Info.LifetimeStart)
1328         II->eraseFromParent();
1329       for (auto &II : Info.LifetimeEnd)
1330         II->eraseFromParent();
1331     }
1332     memtag::alignAndPadAlloca(Info, Align(Mapping.getObjectAlignment()));
1333   }
1334   for (auto &I : SInfo.UnrecognizedLifetimes)
1335     I->eraseFromParent();
1336   return true;
1337 }
1338 
1339 bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
1340   return (AI.getAllocatedType()->isSized() &&
1341           // FIXME: instrument dynamic allocas, too
1342           AI.isStaticAlloca() &&
1343           // alloca() may be called with 0 size, ignore it.
1344           memtag::getAllocaSizeInBytes(AI) > 0 &&
1345           // We are only interested in allocas not promotable to registers.
1346           // Promotable allocas are common under -O0.
1347           !isAllocaPromotable(&AI) &&
1348           // inalloca allocas are not treated as static, and we don't want
1349           // dynamic alloca instrumentation for them as well.
1350           !AI.isUsedWithInAlloca() &&
1351           // swifterror allocas are register promoted by ISel
1352           !AI.isSwiftError()) &&
1353          // safe allocas are not interesting
1354          !(SSI && SSI->isSafe(AI));
1355 }
1356 
1357 bool HWAddressSanitizer::sanitizeFunction(
1358     Function &F, llvm::function_ref<const DominatorTree &()> GetDT,
1359     llvm::function_ref<const PostDominatorTree &()> GetPDT) {
1360   if (&F == HwasanCtorFunction)
1361     return false;
1362 
1363   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1364     return false;
1365 
1366   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1367 
1368   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1369   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1370   SmallVector<Instruction *, 8> LandingPadVec;
1371 
1372   memtag::StackInfoBuilder SIB(
1373       [this](const AllocaInst &AI) { return isInterestingAlloca(AI); });
1374   for (auto &Inst : instructions(F)) {
1375     if (InstrumentStack) {
1376       SIB.visit(Inst);
1377     }
1378 
1379     if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1380       LandingPadVec.push_back(&Inst);
1381 
1382     getInterestingMemoryOperands(&Inst, OperandsToInstrument);
1383 
1384     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1385       if (!ignoreMemIntrinsic(MI))
1386         IntrinToInstrument.push_back(MI);
1387   }
1388 
1389   memtag::StackInfo &SInfo = SIB.get();
1390 
1391   initializeCallbacks(*F.getParent());
1392 
1393   bool Changed = false;
1394 
1395   if (!LandingPadVec.empty())
1396     Changed |= instrumentLandingPads(LandingPadVec);
1397 
1398   if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1399       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1400     // __hwasan_personality_thunk is a no-op for functions without an
1401     // instrumented stack, so we can drop it.
1402     F.setPersonalityFn(nullptr);
1403     Changed = true;
1404   }
1405 
1406   if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1407       IntrinToInstrument.empty())
1408     return Changed;
1409 
1410   assert(!ShadowBase);
1411 
1412   Instruction *InsertPt = &*F.getEntryBlock().begin();
1413   IRBuilder<> EntryIRB(InsertPt);
1414   emitPrologue(EntryIRB,
1415                /*WithFrameRecord*/ ClRecordStackHistory &&
1416                    Mapping.WithFrameRecord &&
1417                    !SInfo.AllocasToInstrument.empty());
1418 
1419   if (!SInfo.AllocasToInstrument.empty()) {
1420     Value *StackTag =
1421         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1422     instrumentStack(SInfo, StackTag, GetDT, GetPDT);
1423   }
1424 
1425   // If we split the entry block, move any allocas that were originally in the
1426   // entry block back into the entry block so that they aren't treated as
1427   // dynamic allocas.
1428   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1429     InsertPt = &*F.getEntryBlock().begin();
1430     for (Instruction &I :
1431          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1432       if (auto *AI = dyn_cast<AllocaInst>(&I))
1433         if (isa<ConstantInt>(AI->getArraySize()))
1434           I.moveBefore(InsertPt);
1435     }
1436   }
1437 
1438   for (auto &Operand : OperandsToInstrument)
1439     instrumentMemAccess(Operand);
1440 
1441   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1442     for (auto Inst : IntrinToInstrument)
1443       instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
1444   }
1445 
1446   ShadowBase = nullptr;
1447   StackBaseTag = nullptr;
1448 
1449   return true;
1450 }
1451 
1452 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1453   assert(!UsePageAliases);
1454   Constant *Initializer = GV->getInitializer();
1455   uint64_t SizeInBytes =
1456       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1457   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1458   if (SizeInBytes != NewSize) {
1459     // Pad the initializer out to the next multiple of 16 bytes and add the
1460     // required short granule tag.
1461     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1462     Init.back() = Tag;
1463     Constant *Padding = ConstantDataArray::get(*C, Init);
1464     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1465   }
1466 
1467   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1468                                    GlobalValue::ExternalLinkage, Initializer,
1469                                    GV->getName() + ".hwasan");
1470   NewGV->copyAttributesFrom(GV);
1471   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1472   NewGV->copyMetadata(GV, 0);
1473   NewGV->setAlignment(
1474       MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
1475 
1476   // It is invalid to ICF two globals that have different tags. In the case
1477   // where the size of the global is a multiple of the tag granularity the
1478   // contents of the globals may be the same but the tags (i.e. symbol values)
1479   // may be different, and the symbols are not considered during ICF. In the
1480   // case where the size is not a multiple of the granularity, the short granule
1481   // tags would discriminate two globals with different tags, but there would
1482   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1483   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1484   // granule tag in the last byte.
1485   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1486 
1487   // Descriptor format (assuming little-endian):
1488   // bytes 0-3: relative address of global
1489   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1490   // it isn't, we create multiple descriptors)
1491   // byte 7: tag
1492   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1493   const uint64_t MaxDescriptorSize = 0xfffff0;
1494   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1495        DescriptorPos += MaxDescriptorSize) {
1496     auto *Descriptor =
1497         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1498                            nullptr, GV->getName() + ".hwasan.descriptor");
1499     auto *GVRelPtr = ConstantExpr::getTrunc(
1500         ConstantExpr::getAdd(
1501             ConstantExpr::getSub(
1502                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1503                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1504             ConstantInt::get(Int64Ty, DescriptorPos)),
1505         Int32Ty);
1506     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1507     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1508     Descriptor->setComdat(NewGV->getComdat());
1509     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1510     Descriptor->setSection("hwasan_globals");
1511     Descriptor->setMetadata(LLVMContext::MD_associated,
1512                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1513     appendToCompilerUsed(M, Descriptor);
1514   }
1515 
1516   Constant *Aliasee = ConstantExpr::getIntToPtr(
1517       ConstantExpr::getAdd(
1518           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1519           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1520       GV->getType());
1521   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1522                                     GV->getLinkage(), "", Aliasee, &M);
1523   Alias->setVisibility(GV->getVisibility());
1524   Alias->takeName(GV);
1525   GV->replaceAllUsesWith(Alias);
1526   GV->eraseFromParent();
1527 }
1528 
1529 static DenseSet<GlobalVariable *> getExcludedGlobals(Module &M) {
1530   NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
1531   if (!Globals)
1532     return DenseSet<GlobalVariable *>();
1533   DenseSet<GlobalVariable *> Excluded(Globals->getNumOperands());
1534   for (auto MDN : Globals->operands()) {
1535     // Metadata node contains the global and the fields of "Entry".
1536     assert(MDN->getNumOperands() == 5);
1537     auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
1538     // The optimizer may optimize away a global entirely.
1539     if (!V)
1540       continue;
1541     auto *StrippedV = V->stripPointerCasts();
1542     auto *GV = dyn_cast<GlobalVariable>(StrippedV);
1543     if (!GV)
1544       continue;
1545     ConstantInt *IsExcluded = mdconst::extract<ConstantInt>(MDN->getOperand(4));
1546     if (IsExcluded->isOne())
1547       Excluded.insert(GV);
1548   }
1549   return Excluded;
1550 }
1551 
1552 void HWAddressSanitizer::instrumentGlobals() {
1553   std::vector<GlobalVariable *> Globals;
1554   auto ExcludedGlobals = getExcludedGlobals(M);
1555   for (GlobalVariable &GV : M.globals()) {
1556     if (ExcludedGlobals.count(&GV))
1557       continue;
1558 
1559     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1560         GV.isThreadLocal())
1561       continue;
1562 
1563     // Common symbols can't have aliases point to them, so they can't be tagged.
1564     if (GV.hasCommonLinkage())
1565       continue;
1566 
1567     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1568     // which would be broken both by adding tags and potentially by the extra
1569     // padding/alignment that we insert.
1570     if (GV.hasSection())
1571       continue;
1572 
1573     Globals.push_back(&GV);
1574   }
1575 
1576   MD5 Hasher;
1577   Hasher.update(M.getSourceFileName());
1578   MD5::MD5Result Hash;
1579   Hasher.final(Hash);
1580   uint8_t Tag = Hash[0];
1581 
1582   for (GlobalVariable *GV : Globals) {
1583     Tag &= TagMaskByte;
1584     // Skip tag 0 in order to avoid collisions with untagged memory.
1585     if (Tag == 0)
1586       Tag = 1;
1587     instrumentGlobal(GV, Tag++);
1588   }
1589 }
1590 
1591 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1592   // We need to untag stack frames as we unwind past them. That is the job of
1593   // the personality function wrapper, which either wraps an existing
1594   // personality function or acts as a personality function on its own. Each
1595   // function that has a personality function or that can be unwound past has
1596   // its personality function changed to a thunk that calls the personality
1597   // function wrapper in the runtime.
1598   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1599   for (Function &F : M) {
1600     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1601       continue;
1602 
1603     if (F.hasPersonalityFn()) {
1604       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1605     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1606       PersonalityFns[nullptr].push_back(&F);
1607     }
1608   }
1609 
1610   if (PersonalityFns.empty())
1611     return;
1612 
1613   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1614       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
1615       Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
1616   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1617   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1618 
1619   for (auto &P : PersonalityFns) {
1620     std::string ThunkName = kHwasanPersonalityThunkName;
1621     if (P.first)
1622       ThunkName += ("." + P.first->getName()).str();
1623     FunctionType *ThunkFnTy = FunctionType::get(
1624         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
1625     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1626                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1627     auto *ThunkFn = Function::Create(ThunkFnTy,
1628                                      IsLocal ? GlobalValue::InternalLinkage
1629                                              : GlobalValue::LinkOnceODRLinkage,
1630                                      ThunkName, &M);
1631     if (!IsLocal) {
1632       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1633       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1634     }
1635 
1636     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1637     IRBuilder<> IRB(BB);
1638     CallInst *WrapperCall = IRB.CreateCall(
1639         HwasanPersonalityWrapper,
1640         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1641          ThunkFn->getArg(3), ThunkFn->getArg(4),
1642          P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
1643                  : Constant::getNullValue(Int8PtrTy),
1644          IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
1645          IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
1646     WrapperCall->setTailCall();
1647     IRB.CreateRet(WrapperCall);
1648 
1649     for (Function *F : P.second)
1650       F->setPersonalityFn(ThunkFn);
1651   }
1652 }
1653 
1654 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1655                                              bool InstrumentWithCalls) {
1656   Scale = kDefaultShadowScale;
1657   if (TargetTriple.isOSFuchsia()) {
1658     // Fuchsia is always PIE, which means that the beginning of the address
1659     // space is always available.
1660     InGlobal = false;
1661     InTls = false;
1662     Offset = 0;
1663     WithFrameRecord = true;
1664   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1665     InGlobal = false;
1666     InTls = false;
1667     Offset = ClMappingOffset;
1668     WithFrameRecord = false;
1669   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1670     InGlobal = false;
1671     InTls = false;
1672     Offset = 0;
1673     WithFrameRecord = false;
1674   } else if (ClWithIfunc) {
1675     InGlobal = true;
1676     InTls = false;
1677     Offset = kDynamicShadowSentinel;
1678     WithFrameRecord = false;
1679   } else if (ClWithTls) {
1680     InGlobal = false;
1681     InTls = true;
1682     Offset = kDynamicShadowSentinel;
1683     WithFrameRecord = true;
1684   } else {
1685     InGlobal = false;
1686     InTls = false;
1687     Offset = kDynamicShadowSentinel;
1688     WithFrameRecord = false;
1689   }
1690 }
1691