1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file is a part of HWAddressSanitizer, an address sanity checker
12 /// based on tagged addressing.
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/IR/Attributes.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/Constant.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/InstVisitor.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/Intrinsics.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/IR/MDBuilder.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Instrumentation.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/ModuleUtils.h"
46 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
47 #include <sstream>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "hwasan"
52 
53 static const char *const kHwasanModuleCtorName = "hwasan.module_ctor";
54 static const char *const kHwasanInitName = "__hwasan_init";
55 
56 static const char *const kHwasanShadowMemoryDynamicAddress =
57     "__hwasan_shadow_memory_dynamic_address";
58 
59 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
60 static const size_t kNumberOfAccessSizes = 5;
61 
62 static const size_t kDefaultShadowScale = 4;
63 static const uint64_t kDynamicShadowSentinel =
64     std::numeric_limits<uint64_t>::max();
65 static const unsigned kPointerTagShift = 56;
66 
67 static const unsigned kShadowBaseAlignment = 32;
68 
69 static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
70     "hwasan-memory-access-callback-prefix",
71     cl::desc("Prefix for memory access callbacks"), cl::Hidden,
72     cl::init("__hwasan_"));
73 
74 static cl::opt<bool>
75     ClInstrumentWithCalls("hwasan-instrument-with-calls",
76                 cl::desc("instrument reads and writes with callbacks"),
77                 cl::Hidden, cl::init(false));
78 
79 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
80                                        cl::desc("instrument read instructions"),
81                                        cl::Hidden, cl::init(true));
82 
83 static cl::opt<bool> ClInstrumentWrites(
84     "hwasan-instrument-writes", cl::desc("instrument write instructions"),
85     cl::Hidden, cl::init(true));
86 
87 static cl::opt<bool> ClInstrumentAtomics(
88     "hwasan-instrument-atomics",
89     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
90     cl::init(true));
91 
92 static cl::opt<bool> ClRecover(
93     "hwasan-recover",
94     cl::desc("Enable recovery mode (continue-after-error)."),
95     cl::Hidden, cl::init(false));
96 
97 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
98                                        cl::desc("instrument stack (allocas)"),
99                                        cl::Hidden, cl::init(true));
100 
101 static cl::opt<bool> ClUARRetagToZero(
102     "hwasan-uar-retag-to-zero",
103     cl::desc("Clear alloca tags before returning from the function to allow "
104              "non-instrumented and instrumented function calls mix. When set "
105              "to false, allocas are retagged before returning from the "
106              "function to detect use after return."),
107     cl::Hidden, cl::init(true));
108 
109 static cl::opt<bool> ClGenerateTagsWithCalls(
110     "hwasan-generate-tags-with-calls",
111     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
112     cl::init(false));
113 
114 static cl::opt<int> ClMatchAllTag(
115     "hwasan-match-all-tag",
116     cl::desc("don't report bad accesses via pointers with this tag"),
117     cl::Hidden, cl::init(-1));
118 
119 static cl::opt<bool> ClEnableKhwasan(
120     "hwasan-kernel",
121     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
122     cl::Hidden, cl::init(false));
123 
124 // These flags allow to change the shadow mapping and control how shadow memory
125 // is accessed. The shadow mapping looks like:
126 //    Shadow = (Mem >> scale) + offset
127 
128 static cl::opt<unsigned long long> ClMappingOffset(
129     "hwasan-mapping-offset",
130     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), cl::Hidden,
131     cl::init(0));
132 
133 static cl::opt<bool>
134     ClWithIfunc("hwasan-with-ifunc",
135                 cl::desc("Access dynamic shadow through an ifunc global on "
136                          "platforms that support this"),
137                 cl::Hidden, cl::init(false));
138 
139 static cl::opt<bool> ClWithTls(
140     "hwasan-with-tls",
141     cl::desc("Access dynamic shadow through an thread-local pointer on "
142              "platforms that support this"),
143     cl::Hidden, cl::init(true));
144 
145 static cl::opt<bool>
146     ClRecordStackHistory("hwasan-record-stack-history",
147                          cl::desc("Record stack frames with tagged allocations "
148                                   "in a thread-local ring buffer"),
149                          cl::Hidden, cl::init(true));
150 static cl::opt<bool>
151     ClCreateFrameDescriptions("hwasan-create-frame-descriptions",
152                               cl::desc("create static frame descriptions"),
153                               cl::Hidden, cl::init(true));
154 
155 namespace {
156 
157 /// An instrumentation pass implementing detection of addressability bugs
158 /// using tagged pointers.
159 class HWAddressSanitizer : public FunctionPass {
160 public:
161   // Pass identification, replacement for typeid.
162   static char ID;
163 
164   explicit HWAddressSanitizer(bool CompileKernel = false, bool Recover = false)
165       : FunctionPass(ID) {
166     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
167     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
168         ClEnableKhwasan : CompileKernel;
169   }
170 
171   StringRef getPassName() const override { return "HWAddressSanitizer"; }
172 
173   bool runOnFunction(Function &F) override;
174   bool doInitialization(Module &M) override;
175 
176   void initializeCallbacks(Module &M);
177 
178   Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
179 
180   void untagPointerOperand(Instruction *I, Value *Addr);
181   Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB);
182   void instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
183                                  unsigned AccessSizeIndex,
184                                  Instruction *InsertBefore);
185   bool instrumentMemAccess(Instruction *I);
186   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
187                                    uint64_t *TypeSize, unsigned *Alignment,
188                                    Value **MaybeMask);
189 
190   bool isInterestingAlloca(const AllocaInst &AI);
191   bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag);
192   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
193   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
194   bool instrumentStack(SmallVectorImpl<AllocaInst *> &Allocas,
195                        SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
196   Value *getNextTagWithCall(IRBuilder<> &IRB);
197   Value *getStackBaseTag(IRBuilder<> &IRB);
198   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
199                      unsigned AllocaNo);
200   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
201 
202   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
203   Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
204 
205 private:
206   LLVMContext *C;
207   std::string CurModuleUniqueId;
208   Triple TargetTriple;
209 
210   // Frame description is a way to pass names/sizes of local variables
211   // to the run-time w/o adding extra executable code in every function.
212   // We do this by creating a separate section with {PC,Descr} pairs and passing
213   // the section beg/end to __hwasan_init_frames() at module init time.
214   std::string createFrameString(ArrayRef<AllocaInst*> Allocas);
215   void createFrameGlobal(Function &F, const std::string &FrameString);
216   // Get the section name for frame descriptions. Currently ELF-only.
217   const char *getFrameSection() { return "__hwasan_frames"; }
218   const char *getFrameSectionBeg() { return  "__start___hwasan_frames"; }
219   const char *getFrameSectionEnd() { return  "__stop___hwasan_frames"; }
220   GlobalVariable *createFrameSectionBound(Module &M, Type *Ty,
221                                           const char *Name) {
222     auto GV = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
223                                  nullptr, Name);
224     GV->setVisibility(GlobalValue::HiddenVisibility);
225     return GV;
226   }
227 
228   /// This struct defines the shadow mapping using the rule:
229   ///   shadow = (mem >> Scale) + Offset.
230   /// If InGlobal is true, then
231   ///   extern char __hwasan_shadow[];
232   ///   shadow = (mem >> Scale) + &__hwasan_shadow
233   /// If InTls is true, then
234   ///   extern char *__hwasan_tls;
235   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
236   struct ShadowMapping {
237     int Scale;
238     uint64_t Offset;
239     bool InGlobal;
240     bool InTls;
241 
242     void init(Triple &TargetTriple);
243     unsigned getAllocaAlignment() const { return 1U << Scale; }
244   };
245   ShadowMapping Mapping;
246 
247   Type *IntptrTy;
248   Type *Int8PtrTy;
249   Type *Int8Ty;
250 
251   bool CompileKernel;
252   bool Recover;
253 
254   Function *HwasanCtorFunction;
255 
256   Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
257   Function *HwasanMemoryAccessCallbackSized[2];
258 
259   Function *HwasanTagMemoryFunc;
260   Function *HwasanGenerateTagFunc;
261 
262   Constant *ShadowGlobal;
263 
264   Value *LocalDynamicShadow = nullptr;
265   GlobalValue *ThreadPtrGlobal = nullptr;
266 };
267 
268 } // end anonymous namespace
269 
270 char HWAddressSanitizer::ID = 0;
271 
272 INITIALIZE_PASS_BEGIN(
273     HWAddressSanitizer, "hwasan",
274     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
275     false)
276 INITIALIZE_PASS_END(
277     HWAddressSanitizer, "hwasan",
278     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
279     false)
280 
281 FunctionPass *llvm::createHWAddressSanitizerPass(bool CompileKernel,
282                                                  bool Recover) {
283   assert(!CompileKernel || Recover);
284   return new HWAddressSanitizer(CompileKernel, Recover);
285 }
286 
287 /// Module-level initialization.
288 ///
289 /// inserts a call to __hwasan_init to the module's constructor list.
290 bool HWAddressSanitizer::doInitialization(Module &M) {
291   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
292   auto &DL = M.getDataLayout();
293 
294   TargetTriple = Triple(M.getTargetTriple());
295 
296   Mapping.init(TargetTriple);
297 
298   C = &(M.getContext());
299   CurModuleUniqueId = getUniqueModuleId(&M);
300   IRBuilder<> IRB(*C);
301   IntptrTy = IRB.getIntPtrTy(DL);
302   Int8PtrTy = IRB.getInt8PtrTy();
303   Int8Ty = IRB.getInt8Ty();
304 
305   HwasanCtorFunction = nullptr;
306   if (!CompileKernel) {
307     std::tie(HwasanCtorFunction, std::ignore) =
308         createSanitizerCtorAndInitFunctions(M, kHwasanModuleCtorName,
309                                             kHwasanInitName,
310                                             /*InitArgTypes=*/{},
311                                             /*InitArgs=*/{});
312     appendToGlobalCtors(M, HwasanCtorFunction, 0);
313   }
314 
315   // Create a call to __hwasan_init_frames.
316   if (HwasanCtorFunction) {
317     // Create a dummy frame description for the CTOR function.
318     // W/o it we would have to create the call to __hwasan_init_frames after
319     // all functions are instrumented (i.e. need to have a ModulePass).
320     createFrameGlobal(*HwasanCtorFunction, "");
321     IRBuilder<> IRBCtor(HwasanCtorFunction->getEntryBlock().getTerminator());
322     IRBCtor.CreateCall(
323         declareSanitizerInitFunction(M, "__hwasan_init_frames",
324                                      {Int8PtrTy, Int8PtrTy}),
325         {createFrameSectionBound(M, Int8Ty, getFrameSectionBeg()),
326          createFrameSectionBound(M, Int8Ty, getFrameSectionEnd())});
327   }
328 
329   if (!TargetTriple.isAndroid())
330     appendToCompilerUsed(
331         M, ThreadPtrGlobal = new GlobalVariable(
332                M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr,
333                "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel));
334 
335   return true;
336 }
337 
338 void HWAddressSanitizer::initializeCallbacks(Module &M) {
339   IRBuilder<> IRB(*C);
340   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
341     const std::string TypeStr = AccessIsWrite ? "store" : "load";
342     const std::string EndingStr = Recover ? "_noabort" : "";
343 
344     HwasanMemoryAccessCallbackSized[AccessIsWrite] =
345         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
346             ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
347             FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false)));
348 
349     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
350          AccessSizeIndex++) {
351       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
352           checkSanitizerInterfaceFunction(M.getOrInsertFunction(
353               ClMemoryAccessCallbackPrefix + TypeStr +
354                   itostr(1ULL << AccessSizeIndex) + EndingStr,
355               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false)));
356     }
357   }
358 
359   HwasanTagMemoryFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
360       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy));
361   HwasanGenerateTagFunc = checkSanitizerInterfaceFunction(
362       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty));
363 
364   if (Mapping.InGlobal)
365     ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
366                                        ArrayType::get(IRB.getInt8Ty(), 0));
367 }
368 
369 Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
370   // Generate code only when dynamic addressing is needed.
371   if (Mapping.Offset != kDynamicShadowSentinel)
372     return nullptr;
373 
374   if (Mapping.InGlobal) {
375     // An empty inline asm with input reg == output reg.
376     // An opaque pointer-to-int cast, basically.
377     InlineAsm *Asm = InlineAsm::get(
378         FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false),
379         StringRef(""), StringRef("=r,0"),
380         /*hasSideEffects=*/false);
381     return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
382   } else {
383     Value *GlobalDynamicAddress =
384         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
385             kHwasanShadowMemoryDynamicAddress, IntptrTy);
386     return IRB.CreateLoad(GlobalDynamicAddress);
387   }
388 }
389 
390 Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I,
391                                                      bool *IsWrite,
392                                                      uint64_t *TypeSize,
393                                                      unsigned *Alignment,
394                                                      Value **MaybeMask) {
395   // Skip memory accesses inserted by another instrumentation.
396   if (I->getMetadata("nosanitize")) return nullptr;
397 
398   // Do not instrument the load fetching the dynamic shadow address.
399   if (LocalDynamicShadow == I)
400     return nullptr;
401 
402   Value *PtrOperand = nullptr;
403   const DataLayout &DL = I->getModule()->getDataLayout();
404   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
405     if (!ClInstrumentReads) return nullptr;
406     *IsWrite = false;
407     *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
408     *Alignment = LI->getAlignment();
409     PtrOperand = LI->getPointerOperand();
410   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
411     if (!ClInstrumentWrites) return nullptr;
412     *IsWrite = true;
413     *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
414     *Alignment = SI->getAlignment();
415     PtrOperand = SI->getPointerOperand();
416   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
417     if (!ClInstrumentAtomics) return nullptr;
418     *IsWrite = true;
419     *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
420     *Alignment = 0;
421     PtrOperand = RMW->getPointerOperand();
422   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
423     if (!ClInstrumentAtomics) return nullptr;
424     *IsWrite = true;
425     *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
426     *Alignment = 0;
427     PtrOperand = XCHG->getPointerOperand();
428   }
429 
430   if (PtrOperand) {
431     // Do not instrument accesses from different address spaces; we cannot deal
432     // with them.
433     Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType());
434     if (PtrTy->getPointerAddressSpace() != 0)
435       return nullptr;
436 
437     // Ignore swifterror addresses.
438     // swifterror memory addresses are mem2reg promoted by instruction
439     // selection. As such they cannot have regular uses like an instrumentation
440     // function and it makes no sense to track them as memory.
441     if (PtrOperand->isSwiftError())
442       return nullptr;
443   }
444 
445   return PtrOperand;
446 }
447 
448 static unsigned getPointerOperandIndex(Instruction *I) {
449   if (LoadInst *LI = dyn_cast<LoadInst>(I))
450     return LI->getPointerOperandIndex();
451   if (StoreInst *SI = dyn_cast<StoreInst>(I))
452     return SI->getPointerOperandIndex();
453   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
454     return RMW->getPointerOperandIndex();
455   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
456     return XCHG->getPointerOperandIndex();
457   report_fatal_error("Unexpected instruction");
458   return -1;
459 }
460 
461 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
462   size_t Res = countTrailingZeros(TypeSize / 8);
463   assert(Res < kNumberOfAccessSizes);
464   return Res;
465 }
466 
467 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
468   if (TargetTriple.isAArch64())
469     return;
470 
471   IRBuilder<> IRB(I);
472   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
473   Value *UntaggedPtr =
474       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
475   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
476 }
477 
478 Value *HWAddressSanitizer::memToShadow(Value *Mem, Type *Ty, IRBuilder<> &IRB) {
479   // Mem >> Scale
480   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
481   if (Mapping.Offset == 0)
482     return Shadow;
483   // (Mem >> Scale) + Offset
484   Value *ShadowBase;
485   if (LocalDynamicShadow)
486     ShadowBase = LocalDynamicShadow;
487   else
488     ShadowBase = ConstantInt::get(Ty, Mapping.Offset);
489   return IRB.CreateAdd(Shadow, ShadowBase);
490 }
491 
492 void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
493                                                    unsigned AccessSizeIndex,
494                                                    Instruction *InsertBefore) {
495   IRBuilder<> IRB(InsertBefore);
496   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift),
497                                   IRB.getInt8Ty());
498   Value *AddrLong = untagPointer(IRB, PtrLong);
499   Value *ShadowLong = memToShadow(AddrLong, PtrLong->getType(), IRB);
500   Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, Int8PtrTy));
501   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
502 
503   int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
504       ClMatchAllTag : (CompileKernel ? 0xFF : -1);
505   if (matchAllTag != -1) {
506     Value *TagNotIgnored = IRB.CreateICmpNE(PtrTag,
507         ConstantInt::get(PtrTag->getType(), matchAllTag));
508     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
509   }
510 
511   Instruction *CheckTerm =
512       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover,
513                                 MDBuilder(*C).createBranchWeights(1, 100000));
514 
515   IRB.SetInsertPoint(CheckTerm);
516   const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
517   InlineAsm *Asm;
518   switch (TargetTriple.getArch()) {
519     case Triple::x86_64:
520       // The signal handler will find the data address in rdi.
521       Asm = InlineAsm::get(
522           FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
523           "int3\nnopl " + itostr(0x40 + AccessInfo) + "(%rax)",
524           "{rdi}",
525           /*hasSideEffects=*/true);
526       break;
527     case Triple::aarch64:
528     case Triple::aarch64_be:
529       // The signal handler will find the data address in x0.
530       Asm = InlineAsm::get(
531           FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
532           "brk #" + itostr(0x900 + AccessInfo),
533           "{x0}",
534           /*hasSideEffects=*/true);
535       break;
536     default:
537       report_fatal_error("unsupported architecture");
538   }
539   IRB.CreateCall(Asm, PtrLong);
540 }
541 
542 bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
543   LLVM_DEBUG(dbgs() << "Instrumenting: " << *I << "\n");
544   bool IsWrite = false;
545   unsigned Alignment = 0;
546   uint64_t TypeSize = 0;
547   Value *MaybeMask = nullptr;
548   Value *Addr =
549       isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
550 
551   if (!Addr)
552     return false;
553 
554   if (MaybeMask)
555     return false; //FIXME
556 
557   IRBuilder<> IRB(I);
558   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
559   if (isPowerOf2_64(TypeSize) &&
560       (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) &&
561       (Alignment >= (1UL << Mapping.Scale) || Alignment == 0 ||
562        Alignment >= TypeSize / 8)) {
563     size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
564     if (ClInstrumentWithCalls) {
565       IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
566                      AddrLong);
567     } else {
568       instrumentMemAccessInline(AddrLong, IsWrite, AccessSizeIndex, I);
569     }
570   } else {
571     IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
572                    {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)});
573   }
574   untagPointerOperand(I, Addr);
575 
576   return true;
577 }
578 
579 static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
580   uint64_t ArraySize = 1;
581   if (AI.isArrayAllocation()) {
582     const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
583     assert(CI && "non-constant array size");
584     ArraySize = CI->getZExtValue();
585   }
586   Type *Ty = AI.getAllocatedType();
587   uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
588   return SizeInBytes * ArraySize;
589 }
590 
591 bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
592                                    Value *Tag) {
593   size_t Size = (getAllocaSizeInBytes(*AI) + Mapping.getAllocaAlignment() - 1) &
594                 ~(Mapping.getAllocaAlignment() - 1);
595 
596   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
597   if (ClInstrumentWithCalls) {
598     IRB.CreateCall(HwasanTagMemoryFunc,
599                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
600                     ConstantInt::get(IntptrTy, Size)});
601   } else {
602     size_t ShadowSize = Size >> Mapping.Scale;
603     Value *ShadowPtr = IRB.CreateIntToPtr(
604         memToShadow(IRB.CreatePointerCast(AI, IntptrTy), AI->getType(), IRB),
605         Int8PtrTy);
606     // If this memset is not inlined, it will be intercepted in the hwasan
607     // runtime library. That's OK, because the interceptor skips the checks if
608     // the address is in the shadow region.
609     // FIXME: the interceptor is not as fast as real memset. Consider lowering
610     // llvm.memset right here into either a sequence of stores, or a call to
611     // hwasan_tag_memory.
612     IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
613   }
614   return true;
615 }
616 
617 static unsigned RetagMask(unsigned AllocaNo) {
618   // A list of 8-bit numbers that have at most one run of non-zero bits.
619   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
620   // masks.
621   // The list does not include the value 255, which is used for UAR.
622   static unsigned FastMasks[] = {
623       0,   1,   2,   3,   4,   6,   7,   8,   12,  14,  15, 16,  24,
624       28,  30,  31,  32,  48,  56,  60,  62,  63,  64,  96, 112, 120,
625       124, 126, 127, 128, 192, 224, 240, 248, 252, 254};
626   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
627 }
628 
629 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
630   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
631 }
632 
633 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
634   if (ClGenerateTagsWithCalls)
635     return getNextTagWithCall(IRB);
636   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
637   // first).
638   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
639   auto GetStackPointerFn =
640       Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
641   Value *StackPointer = IRB.CreateCall(
642       GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
643 
644   // Extract some entropy from the stack pointer for the tags.
645   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
646   // between functions).
647   Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
648   Value *StackTag =
649       IRB.CreateXor(StackPointerLong, IRB.CreateLShr(StackPointerLong, 20),
650                     "hwasan.stack.base.tag");
651   return StackTag;
652 }
653 
654 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
655                                         AllocaInst *AI, unsigned AllocaNo) {
656   if (ClGenerateTagsWithCalls)
657     return getNextTagWithCall(IRB);
658   return IRB.CreateXor(StackTag,
659                        ConstantInt::get(IntptrTy, RetagMask(AllocaNo)));
660 }
661 
662 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
663   if (ClUARRetagToZero)
664     return ConstantInt::get(IntptrTy, 0);
665   if (ClGenerateTagsWithCalls)
666     return getNextTagWithCall(IRB);
667   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, 0xFFU));
668 }
669 
670 // Add a tag to an address.
671 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
672                                       Value *PtrLong, Value *Tag) {
673   Value *TaggedPtrLong;
674   if (CompileKernel) {
675     // Kernel addresses have 0xFF in the most significant byte.
676     Value *ShiftedTag = IRB.CreateOr(
677         IRB.CreateShl(Tag, kPointerTagShift),
678         ConstantInt::get(IntptrTy, (1ULL << kPointerTagShift) - 1));
679     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
680   } else {
681     // Userspace can simply do OR (tag << 56);
682     Value *ShiftedTag = IRB.CreateShl(Tag, kPointerTagShift);
683     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
684   }
685   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
686 }
687 
688 // Remove tag from an address.
689 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
690   Value *UntaggedPtrLong;
691   if (CompileKernel) {
692     // Kernel addresses have 0xFF in the most significant byte.
693     UntaggedPtrLong = IRB.CreateOr(PtrLong,
694         ConstantInt::get(PtrLong->getType(), 0xFFULL << kPointerTagShift));
695   } else {
696     // Userspace addresses have 0x00.
697     UntaggedPtrLong = IRB.CreateAnd(PtrLong,
698         ConstantInt::get(PtrLong->getType(), ~(0xFFULL << kPointerTagShift)));
699   }
700   return UntaggedPtrLong;
701 }
702 
703 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
704   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
705   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
706     Function *ThreadPointerFunc =
707         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
708     Value *SlotPtr = IRB.CreatePointerCast(
709         IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x40),
710         Ty->getPointerTo(0));
711     return SlotPtr;
712   }
713   if (ThreadPtrGlobal)
714     return ThreadPtrGlobal;
715 
716 
717   return nullptr;
718 }
719 
720 // Creates a string with a description of the stack frame (set of Allocas).
721 // The string is intended to be human readable.
722 // The current form is: Size1 Name1; Size2 Name2; ...
723 std::string
724 HWAddressSanitizer::createFrameString(ArrayRef<AllocaInst *> Allocas) {
725   std::ostringstream Descr;
726   for (auto AI : Allocas)
727     Descr << getAllocaSizeInBytes(*AI) << " " <<  AI->getName().str() << "; ";
728   return Descr.str();
729 }
730 
731 // Creates a global in the frame section which consists of two pointers:
732 // the function PC and the frame string constant.
733 void HWAddressSanitizer::createFrameGlobal(Function &F,
734                                            const std::string &FrameString) {
735   Module &M = *F.getParent();
736   auto DescrGV = createPrivateGlobalForString(M, FrameString, true);
737   auto PtrPairTy = StructType::get(F.getType(), DescrGV->getType());
738   auto GV = new GlobalVariable(
739       M, PtrPairTy, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
740       ConstantStruct::get(PtrPairTy, (Constant *)&F, (Constant *)DescrGV),
741       "__hwasan");
742   GV->setSection(getFrameSection());
743   appendToCompilerUsed(M, GV);
744   // Put GV into the F's Comadat so that if F is deleted GV can be deleted too.
745   if (&F != HwasanCtorFunction)
746     if (auto Comdat =
747             GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
748       GV->setComdat(Comdat);
749 }
750 
751 Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
752                                         bool WithFrameRecord) {
753   if (!Mapping.InTls)
754     return getDynamicShadowNonTls(IRB);
755 
756   Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
757   assert(SlotPtr);
758 
759   Value *ThreadLong = IRB.CreateLoad(SlotPtr);
760   // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
761   Value *ThreadLongMaybeUntagged =
762       TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
763 
764   if (WithFrameRecord) {
765     // Prepare ring buffer data.
766     Function *F = IRB.GetInsertBlock()->getParent();
767     auto PC = IRB.CreatePtrToInt(F, IntptrTy);
768     auto GetStackPointerFn =
769         Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress);
770     Value *SP = IRB.CreatePtrToInt(
771         IRB.CreateCall(GetStackPointerFn,
772                        {Constant::getNullValue(IRB.getInt32Ty())}),
773         IntptrTy);
774     // Mix SP and PC. TODO: also add the tag to the mix.
775     // Assumptions:
776     // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
777     // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
778     // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
779     //       0xSSSSPPPPPPPPPPPP
780     SP = IRB.CreateShl(SP, 44);
781 
782     // Store data to ring buffer.
783     Value *RecordPtr =
784         IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
785     IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
786 
787     // Update the ring buffer. Top byte of ThreadLong defines the size of the
788     // buffer in pages, it must be a power of two, and the start of the buffer
789     // must be aligned by twice that much. Therefore wrap around of the ring
790     // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
791     // The use of AShr instead of LShr is due to
792     //   https://bugs.llvm.org/show_bug.cgi?id=39030
793     // Runtime library makes sure not to use the highest bit.
794     Value *WrapMask = IRB.CreateXor(
795         IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
796         ConstantInt::get(IntptrTy, (uint64_t)-1));
797     Value *ThreadLongNew = IRB.CreateAnd(
798         IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
799     IRB.CreateStore(ThreadLongNew, SlotPtr);
800   }
801 
802   // Get shadow base address by aligning RecordPtr up.
803   // Note: this is not correct if the pointer is already aligned.
804   // Runtime library will make sure this never happens.
805   Value *ShadowBase = IRB.CreateAdd(
806       IRB.CreateOr(
807           ThreadLongMaybeUntagged,
808           ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
809       ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
810   return ShadowBase;
811 }
812 
813 bool HWAddressSanitizer::instrumentStack(
814     SmallVectorImpl<AllocaInst *> &Allocas,
815     SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
816   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
817   // alloca addresses using that. Unfortunately, offsets are not known yet
818   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
819   // temp, shift-OR it into each alloca address and xor with the retag mask.
820   // This generates one extra instruction per alloca use.
821   for (unsigned N = 0; N < Allocas.size(); ++N) {
822     auto *AI = Allocas[N];
823     IRBuilder<> IRB(AI->getNextNode());
824 
825     // Replace uses of the alloca with tagged address.
826     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
827     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
828     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
829     std::string Name =
830         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
831     Replacement->setName(Name + ".hwasan");
832 
833     for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) {
834       Use &U = *UI++;
835       if (U.getUser() != AILong)
836         U.set(Replacement);
837     }
838 
839     tagAlloca(IRB, AI, Tag);
840 
841     for (auto RI : RetVec) {
842       IRB.SetInsertPoint(RI);
843 
844       // Re-tag alloca memory with the special UAR tag.
845       Value *Tag = getUARTag(IRB, StackTag);
846       tagAlloca(IRB, AI, Tag);
847     }
848   }
849 
850   return true;
851 }
852 
853 bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
854   return (AI.getAllocatedType()->isSized() &&
855           // FIXME: instrument dynamic allocas, too
856           AI.isStaticAlloca() &&
857           // alloca() may be called with 0 size, ignore it.
858           getAllocaSizeInBytes(AI) > 0 &&
859           // We are only interested in allocas not promotable to registers.
860           // Promotable allocas are common under -O0.
861           !isAllocaPromotable(&AI) &&
862           // inalloca allocas are not treated as static, and we don't want
863           // dynamic alloca instrumentation for them as well.
864           !AI.isUsedWithInAlloca() &&
865           // swifterror allocas are register promoted by ISel
866           !AI.isSwiftError());
867 }
868 
869 bool HWAddressSanitizer::runOnFunction(Function &F) {
870   if (&F == HwasanCtorFunction)
871     return false;
872 
873   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
874     return false;
875 
876   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
877 
878   SmallVector<Instruction*, 16> ToInstrument;
879   SmallVector<AllocaInst*, 8> AllocasToInstrument;
880   SmallVector<Instruction*, 8> RetVec;
881   for (auto &BB : F) {
882     for (auto &Inst : BB) {
883       if (ClInstrumentStack)
884         if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
885           // Realign all allocas. We don't want small uninteresting allocas to
886           // hide in instrumented alloca's padding.
887           if (AI->getAlignment() < Mapping.getAllocaAlignment())
888             AI->setAlignment(Mapping.getAllocaAlignment());
889           // Instrument some of them.
890           if (isInterestingAlloca(*AI))
891             AllocasToInstrument.push_back(AI);
892           continue;
893         }
894 
895       if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
896           isa<CleanupReturnInst>(Inst))
897         RetVec.push_back(&Inst);
898 
899       Value *MaybeMask = nullptr;
900       bool IsWrite;
901       unsigned Alignment;
902       uint64_t TypeSize;
903       Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
904                                               &Alignment, &MaybeMask);
905       if (Addr || isa<MemIntrinsic>(Inst))
906         ToInstrument.push_back(&Inst);
907     }
908   }
909 
910   if (AllocasToInstrument.empty() && ToInstrument.empty())
911     return false;
912 
913   if (ClCreateFrameDescriptions && !AllocasToInstrument.empty())
914     createFrameGlobal(F, createFrameString(AllocasToInstrument));
915 
916   initializeCallbacks(*F.getParent());
917 
918   assert(!LocalDynamicShadow);
919 
920   Instruction *InsertPt = &*F.getEntryBlock().begin();
921   IRBuilder<> EntryIRB(InsertPt);
922   LocalDynamicShadow = emitPrologue(EntryIRB,
923                                     /*WithFrameRecord*/ ClRecordStackHistory &&
924                                         !AllocasToInstrument.empty());
925 
926   bool Changed = false;
927   if (!AllocasToInstrument.empty()) {
928     Value *StackTag =
929         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
930     Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag);
931   }
932 
933   for (auto Inst : ToInstrument)
934     Changed |= instrumentMemAccess(Inst);
935 
936   LocalDynamicShadow = nullptr;
937 
938   return Changed;
939 }
940 
941 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
942   Scale = kDefaultShadowScale;
943   if (ClMappingOffset.getNumOccurrences() > 0) {
944     InGlobal = false;
945     InTls = false;
946     Offset = ClMappingOffset;
947   } else if (ClEnableKhwasan || ClInstrumentWithCalls) {
948     InGlobal = false;
949     InTls = false;
950     Offset = 0;
951   } else if (ClWithIfunc) {
952     InGlobal = true;
953     InTls = false;
954     Offset = kDynamicShadowSentinel;
955   } else if (ClWithTls) {
956     InGlobal = false;
957     InTls = true;
958     Offset = kDynamicShadowSentinel;
959   } else {
960     InGlobal = false;
961     InTls = false;
962     Offset = kDynamicShadowSentinel;
963   }
964 }
965