1 //===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass abstracted struct/union member accesses in order to support 10 // compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program 11 // which can run on different kernels. In particular, if bpf program tries to 12 // access a particular kernel data structure member, the details of the 13 // intermediate member access will be remembered so bpf loader can do 14 // necessary adjustment right before program loading. 15 // 16 // For example, 17 // 18 // struct s { 19 // int a; 20 // int b; 21 // }; 22 // struct t { 23 // struct s c; 24 // int d; 25 // }; 26 // struct t e; 27 // 28 // For the member access e.c.b, the compiler will generate code 29 // &e + 4 30 // 31 // The compile-once run-everywhere instead generates the following code 32 // r = 4 33 // &e + r 34 // The "4" in "r = 4" can be changed based on a particular kernel version. 35 // For example, on a particular kernel version, if struct s is changed to 36 // 37 // struct s { 38 // int new_field; 39 // int a; 40 // int b; 41 // } 42 // 43 // By repeating the member access on the host, the bpf loader can 44 // adjust "r = 4" as "r = 8". 45 // 46 // This feature relies on the following three intrinsic calls: 47 // addr = preserve_array_access_index(base, dimension, index) 48 // addr = preserve_union_access_index(base, di_index) 49 // !llvm.preserve.access.index <union_ditype> 50 // addr = preserve_struct_access_index(base, gep_index, di_index) 51 // !llvm.preserve.access.index <struct_ditype> 52 // 53 //===----------------------------------------------------------------------===// 54 55 #include "BPF.h" 56 #include "BPFCORE.h" 57 #include "BPFTargetMachine.h" 58 #include "llvm/IR/DebugInfoMetadata.h" 59 #include "llvm/IR/GlobalVariable.h" 60 #include "llvm/IR/Instruction.h" 61 #include "llvm/IR/Instructions.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Type.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/Pass.h" 67 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 68 69 #define DEBUG_TYPE "bpf-abstract-member-access" 70 71 namespace llvm { 72 const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama"; 73 const std::string BPFCoreSharedInfo::PatchableExtSecName = 74 ".BPF.patchable_externs"; 75 } // namespace llvm 76 77 using namespace llvm; 78 79 namespace { 80 81 class BPFAbstractMemberAccess final : public ModulePass { 82 StringRef getPassName() const override { 83 return "BPF Abstract Member Access"; 84 } 85 86 bool runOnModule(Module &M) override; 87 88 public: 89 static char ID; 90 BPFAbstractMemberAccess() : ModulePass(ID) {} 91 92 private: 93 enum : uint32_t { 94 BPFPreserveArrayAI = 1, 95 BPFPreserveUnionAI = 2, 96 BPFPreserveStructAI = 3, 97 }; 98 99 std::map<std::string, GlobalVariable *> GEPGlobals; 100 // A map to link preserve_*_access_index instrinsic calls. 101 std::map<CallInst *, std::pair<CallInst *, uint32_t>> AIChain; 102 // A map to hold all the base preserve_*_access_index instrinsic calls. 103 // The base call is not an input of any other preserve_*_access_index 104 // intrinsics. 105 std::map<CallInst *, uint32_t> BaseAICalls; 106 107 bool doTransformation(Module &M); 108 109 void traceAICall(CallInst *Call, uint32_t Kind); 110 void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind); 111 void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind); 112 void collectAICallChains(Module &M, Function &F); 113 114 bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind); 115 bool removePreserveAccessIndexIntrinsic(Module &M); 116 void replaceWithGEP(std::vector<CallInst *> &CallList, 117 uint32_t NumOfZerosIndex, uint32_t DIIndex); 118 119 Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr, 120 std::string &AccessKey, uint32_t Kind, 121 MDNode *&TypeMeta); 122 bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); 123 bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); 124 }; 125 } // End anonymous namespace 126 127 char BPFAbstractMemberAccess::ID = 0; 128 INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE, 129 "abstracting struct/union member accessees", false, false) 130 131 ModulePass *llvm::createBPFAbstractMemberAccess() { 132 return new BPFAbstractMemberAccess(); 133 } 134 135 bool BPFAbstractMemberAccess::runOnModule(Module &M) { 136 LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); 137 138 // Bail out if no debug info. 139 if (empty(M.debug_compile_units())) 140 return false; 141 142 return doTransformation(M); 143 } 144 145 /// Check whether a call is a preserve_*_access_index intrinsic call or not. 146 bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, 147 uint32_t &Kind) { 148 if (!Call) 149 return false; 150 151 const auto *GV = dyn_cast<GlobalValue>(Call->getCalledValue()); 152 if (!GV) 153 return false; 154 if (GV->getName().startswith("llvm.preserve.array.access.index")) { 155 Kind = BPFPreserveArrayAI; 156 return true; 157 } 158 if (GV->getName().startswith("llvm.preserve.union.access.index")) { 159 Kind = BPFPreserveUnionAI; 160 return true; 161 } 162 if (GV->getName().startswith("llvm.preserve.struct.access.index")) { 163 Kind = BPFPreserveStructAI; 164 return true; 165 } 166 167 return false; 168 } 169 170 void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList, 171 uint32_t DimensionIndex, 172 uint32_t GEPIndex) { 173 for (auto Call : CallList) { 174 uint32_t Dimension = 1; 175 if (DimensionIndex > 0) 176 Dimension = cast<ConstantInt>(Call->getArgOperand(DimensionIndex)) 177 ->getZExtValue(); 178 179 Constant *Zero = 180 ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0); 181 SmallVector<Value *, 4> IdxList; 182 for (unsigned I = 0; I < Dimension; ++I) 183 IdxList.push_back(Zero); 184 IdxList.push_back(Call->getArgOperand(GEPIndex)); 185 186 auto *GEP = GetElementPtrInst::CreateInBounds(Call->getArgOperand(0), 187 IdxList, "", Call); 188 Call->replaceAllUsesWith(GEP); 189 Call->eraseFromParent(); 190 } 191 } 192 193 bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { 194 std::vector<CallInst *> PreserveArrayIndexCalls; 195 std::vector<CallInst *> PreserveUnionIndexCalls; 196 std::vector<CallInst *> PreserveStructIndexCalls; 197 bool Found = false; 198 199 for (Function &F : M) 200 for (auto &BB : F) 201 for (auto &I : BB) { 202 auto *Call = dyn_cast<CallInst>(&I); 203 uint32_t Kind; 204 if (!IsPreserveDIAccessIndexCall(Call, Kind)) 205 continue; 206 207 Found = true; 208 if (Kind == BPFPreserveArrayAI) 209 PreserveArrayIndexCalls.push_back(Call); 210 else if (Kind == BPFPreserveUnionAI) 211 PreserveUnionIndexCalls.push_back(Call); 212 else 213 PreserveStructIndexCalls.push_back(Call); 214 } 215 216 // do the following transformation: 217 // . addr = preserve_array_access_index(base, dimension, index) 218 // is transformed to 219 // addr = GEP(base, dimenion's zero's, index) 220 // . addr = preserve_union_access_index(base, di_index) 221 // is transformed to 222 // addr = base, i.e., all usages of "addr" are replaced by "base". 223 // . addr = preserve_struct_access_index(base, gep_index, di_index) 224 // is transformed to 225 // addr = GEP(base, 0, gep_index) 226 replaceWithGEP(PreserveArrayIndexCalls, 1, 2); 227 replaceWithGEP(PreserveStructIndexCalls, 0, 1); 228 for (auto Call : PreserveUnionIndexCalls) { 229 Call->replaceAllUsesWith(Call->getArgOperand(0)); 230 Call->eraseFromParent(); 231 } 232 233 return Found; 234 } 235 236 void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind) { 237 for (User *U : Call->users()) { 238 Instruction *Inst = dyn_cast<Instruction>(U); 239 if (!Inst) 240 continue; 241 242 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 243 traceBitCast(BI, Call, Kind); 244 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 245 uint32_t CIKind; 246 if (IsPreserveDIAccessIndexCall(CI, CIKind)) { 247 AIChain[CI] = std::make_pair(Call, Kind); 248 traceAICall(CI, CIKind); 249 } else { 250 BaseAICalls[Call] = Kind; 251 } 252 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 253 if (GI->hasAllZeroIndices()) 254 traceGEP(GI, Call, Kind); 255 else 256 BaseAICalls[Call] = Kind; 257 } 258 } 259 } 260 261 void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast, 262 CallInst *Parent, uint32_t Kind) { 263 for (User *U : BitCast->users()) { 264 Instruction *Inst = dyn_cast<Instruction>(U); 265 if (!Inst) 266 continue; 267 268 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 269 traceBitCast(BI, Parent, Kind); 270 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 271 uint32_t CIKind; 272 if (IsPreserveDIAccessIndexCall(CI, CIKind)) { 273 AIChain[CI] = std::make_pair(Parent, Kind); 274 traceAICall(CI, CIKind); 275 } else { 276 BaseAICalls[Parent] = Kind; 277 } 278 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 279 if (GI->hasAllZeroIndices()) 280 traceGEP(GI, Parent, Kind); 281 else 282 BaseAICalls[Parent] = Kind; 283 } 284 } 285 } 286 287 void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent, 288 uint32_t Kind) { 289 for (User *U : GEP->users()) { 290 Instruction *Inst = dyn_cast<Instruction>(U); 291 if (!Inst) 292 continue; 293 294 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 295 traceBitCast(BI, Parent, Kind); 296 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 297 uint32_t CIKind; 298 if (IsPreserveDIAccessIndexCall(CI, CIKind)) { 299 AIChain[CI] = std::make_pair(Parent, Kind); 300 traceAICall(CI, CIKind); 301 } else { 302 BaseAICalls[Parent] = Kind; 303 } 304 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 305 if (GI->hasAllZeroIndices()) 306 traceGEP(GI, Parent, Kind); 307 else 308 BaseAICalls[Parent] = Kind; 309 } 310 } 311 } 312 313 void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) { 314 AIChain.clear(); 315 BaseAICalls.clear(); 316 317 for (auto &BB : F) 318 for (auto &I : BB) { 319 uint32_t Kind; 320 auto *Call = dyn_cast<CallInst>(&I); 321 if (!IsPreserveDIAccessIndexCall(Call, Kind) || 322 AIChain.find(Call) != AIChain.end()) 323 continue; 324 325 traceAICall(Call, Kind); 326 } 327 } 328 329 /// Get access index from the preserve_*_access_index intrinsic calls. 330 bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, 331 uint64_t &AccessIndex) { 332 const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue); 333 if (!CV) 334 return false; 335 336 AccessIndex = CV->getValue().getZExtValue(); 337 return true; 338 } 339 340 /// Compute the base of the whole preserve_*_access_index chains, i.e., the base 341 /// pointer of the first preserve_*_access_index call, and construct the access 342 /// string, which will be the name of a global variable. 343 Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, 344 std::string &AccessStr, 345 std::string &AccessKey, 346 uint32_t Kind, 347 MDNode *&TypeMeta) { 348 Value *Base = nullptr; 349 std::vector<uint64_t> AccessIndices; 350 uint64_t TypeNameIndex = 0; 351 std::string LastTypeName; 352 353 while (Call) { 354 // Base of original corresponding GEP 355 Base = Call->getArgOperand(0); 356 357 // Type Name 358 std::string TypeName; 359 MDNode *MDN; 360 if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) { 361 MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index); 362 if (!MDN) 363 return nullptr; 364 365 DIType *Ty = dyn_cast<DIType>(MDN); 366 if (!Ty) 367 return nullptr; 368 369 TypeName = Ty->getName(); 370 } 371 372 // Access Index 373 uint64_t AccessIndex; 374 uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2; 375 if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex)) 376 return nullptr; 377 378 AccessIndices.push_back(AccessIndex); 379 if (TypeName.size()) { 380 TypeNameIndex = AccessIndices.size() - 1; 381 LastTypeName = TypeName; 382 TypeMeta = MDN; 383 } 384 385 Kind = AIChain[Call].second; 386 Call = AIChain[Call].first; 387 } 388 389 // The intial type name is required. 390 // FIXME: if the initial type access is an array index, e.g., 391 // &a[3].b.c, only one dimentional array is supported. 392 if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2) 393 return nullptr; 394 395 // Construct the type string AccessStr. 396 for (unsigned I = 0; I < AccessIndices.size(); ++I) 397 AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr; 398 399 if (TypeNameIndex == AccessIndices.size() - 1) 400 AccessStr = "0:" + AccessStr; 401 402 // Access key is the type name + access string, uniquely identifying 403 // one kernel memory access. 404 AccessKey = LastTypeName + ":" + AccessStr; 405 406 return Base; 407 } 408 409 /// Call/Kind is the base preserve_*_access_index() call. Attempts to do 410 /// transformation to a chain of relocable GEPs. 411 bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, 412 uint32_t Kind) { 413 std::string AccessStr, AccessKey; 414 MDNode *TypeMeta = nullptr; 415 Value *Base = 416 computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta); 417 if (!Base) 418 return false; 419 420 // Do the transformation 421 // For any original GEP Call and Base %2 like 422 // %4 = bitcast %struct.net_device** %dev1 to i64* 423 // it is transformed to: 424 // %6 = load __BTF_0:sk_buff:0:0:2:0: 425 // %7 = bitcast %struct.sk_buff* %2 to i8* 426 // %8 = getelementptr i8, i8* %7, %6 427 // %9 = bitcast i8* %8 to i64* 428 // using %9 instead of %4 429 // The original Call inst is removed. 430 BasicBlock *BB = Call->getParent(); 431 GlobalVariable *GV; 432 433 if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { 434 GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false, 435 GlobalVariable::ExternalLinkage, NULL, AccessStr); 436 GV->addAttribute(BPFCoreSharedInfo::AmaAttr); 437 // Set the metadata (debuginfo types) for the global. 438 if (TypeMeta) 439 GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); 440 GEPGlobals[AccessKey] = GV; 441 } else { 442 GV = GEPGlobals[AccessKey]; 443 } 444 445 // Load the global variable. 446 auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); 447 BB->getInstList().insert(Call->getIterator(), LDInst); 448 449 // Generate a BitCast 450 auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext())); 451 BB->getInstList().insert(Call->getIterator(), BCInst); 452 453 // Generate a GetElementPtr 454 auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()), 455 BCInst, LDInst); 456 BB->getInstList().insert(Call->getIterator(), GEP); 457 458 // Generate a BitCast 459 auto *BCInst2 = new BitCastInst(GEP, Call->getType()); 460 BB->getInstList().insert(Call->getIterator(), BCInst2); 461 462 Call->replaceAllUsesWith(BCInst2); 463 Call->eraseFromParent(); 464 465 return true; 466 } 467 468 bool BPFAbstractMemberAccess::doTransformation(Module &M) { 469 bool Transformed = false; 470 471 for (Function &F : M) { 472 // Collect PreserveDIAccessIndex Intrinsic call chains. 473 // The call chains will be used to generate the access 474 // patterns similar to GEP. 475 collectAICallChains(M, F); 476 477 for (auto &C : BaseAICalls) 478 Transformed = transformGEPChain(M, C.first, C.second) || Transformed; 479 } 480 481 return removePreserveAccessIndexIntrinsic(M) || Transformed; 482 } 483