1 //===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass abstracted struct/union member accesses in order to support 10 // compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program 11 // which can run on different kernels. In particular, if bpf program tries to 12 // access a particular kernel data structure member, the details of the 13 // intermediate member access will be remembered so bpf loader can do 14 // necessary adjustment right before program loading. 15 // 16 // For example, 17 // 18 // struct s { 19 // int a; 20 // int b; 21 // }; 22 // struct t { 23 // struct s c; 24 // int d; 25 // }; 26 // struct t e; 27 // 28 // For the member access e.c.b, the compiler will generate code 29 // &e + 4 30 // 31 // The compile-once run-everywhere instead generates the following code 32 // r = 4 33 // &e + r 34 // The "4" in "r = 4" can be changed based on a particular kernel version. 35 // For example, on a particular kernel version, if struct s is changed to 36 // 37 // struct s { 38 // int new_field; 39 // int a; 40 // int b; 41 // } 42 // 43 // By repeating the member access on the host, the bpf loader can 44 // adjust "r = 4" as "r = 8". 45 // 46 // This feature relies on the following three intrinsic calls: 47 // addr = preserve_array_access_index(base, dimension, index) 48 // addr = preserve_union_access_index(base, di_index) 49 // !llvm.preserve.access.index <union_ditype> 50 // addr = preserve_struct_access_index(base, gep_index, di_index) 51 // !llvm.preserve.access.index <struct_ditype> 52 // 53 // Bitfield member access needs special attention. User cannot take the 54 // address of a bitfield acceess. To facilitate kernel verifier 55 // for easy bitfield code optimization, a new clang intrinsic is introduced: 56 // uint32_t __builtin_preserve_field_info(member_access, info_kind) 57 // In IR, a chain with two (or more) intrinsic calls will be generated: 58 // ... 59 // addr = preserve_struct_access_index(base, 1, 1) !struct s 60 // uint32_t result = bpf_preserve_field_info(addr, info_kind) 61 // 62 // Suppose the info_kind is FIELD_SIGNEDNESS, 63 // The above two IR intrinsics will be replaced with 64 // a relocatable insn: 65 // signness = /* signness of member_access */ 66 // and signness can be changed by bpf loader based on the 67 // types on the host. 68 // 69 // User can also test whether a field exists or not with 70 // uint32_t result = bpf_preserve_field_info(member_access, FIELD_EXISTENCE) 71 // The field will be always available (result = 1) during initial 72 // compilation, but bpf loader can patch with the correct value 73 // on the target host where the member_access may or may not be available 74 // 75 //===----------------------------------------------------------------------===// 76 77 #include "BPF.h" 78 #include "BPFCORE.h" 79 #include "BPFTargetMachine.h" 80 #include "llvm/IR/DebugInfoMetadata.h" 81 #include "llvm/IR/GlobalVariable.h" 82 #include "llvm/IR/Instruction.h" 83 #include "llvm/IR/Instructions.h" 84 #include "llvm/IR/Module.h" 85 #include "llvm/IR/Type.h" 86 #include "llvm/IR/User.h" 87 #include "llvm/IR/Value.h" 88 #include "llvm/Pass.h" 89 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 90 #include <stack> 91 92 #define DEBUG_TYPE "bpf-abstract-member-access" 93 94 namespace llvm { 95 constexpr StringRef BPFCoreSharedInfo::AmaAttr; 96 } // namespace llvm 97 98 using namespace llvm; 99 100 namespace { 101 102 class BPFAbstractMemberAccess final : public ModulePass { 103 StringRef getPassName() const override { 104 return "BPF Abstract Member Access"; 105 } 106 107 bool runOnModule(Module &M) override; 108 109 public: 110 static char ID; 111 TargetMachine *TM; 112 // Add optional BPFTargetMachine parameter so that BPF backend can add the phase 113 // with target machine to find out the endianness. The default constructor (without 114 // parameters) is used by the pass manager for managing purposes. 115 BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) {} 116 117 struct CallInfo { 118 uint32_t Kind; 119 uint32_t AccessIndex; 120 Align RecordAlignment; 121 MDNode *Metadata; 122 Value *Base; 123 }; 124 typedef std::stack<std::pair<CallInst *, CallInfo>> CallInfoStack; 125 126 private: 127 enum : uint32_t { 128 BPFPreserveArrayAI = 1, 129 BPFPreserveUnionAI = 2, 130 BPFPreserveStructAI = 3, 131 BPFPreserveFieldInfoAI = 4, 132 }; 133 134 const DataLayout *DL = nullptr; 135 136 std::map<std::string, GlobalVariable *> GEPGlobals; 137 // A map to link preserve_*_access_index instrinsic calls. 138 std::map<CallInst *, std::pair<CallInst *, CallInfo>> AIChain; 139 // A map to hold all the base preserve_*_access_index instrinsic calls. 140 // The base call is not an input of any other preserve_* 141 // intrinsics. 142 std::map<CallInst *, CallInfo> BaseAICalls; 143 144 bool doTransformation(Module &M); 145 146 void traceAICall(CallInst *Call, CallInfo &ParentInfo); 147 void traceBitCast(BitCastInst *BitCast, CallInst *Parent, 148 CallInfo &ParentInfo); 149 void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, 150 CallInfo &ParentInfo); 151 void collectAICallChains(Module &M, Function &F); 152 153 bool IsPreserveDIAccessIndexCall(const CallInst *Call, CallInfo &Cinfo); 154 bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI, 155 const MDNode *ChildMeta); 156 bool removePreserveAccessIndexIntrinsic(Module &M); 157 void replaceWithGEP(std::vector<CallInst *> &CallList, 158 uint32_t NumOfZerosIndex, uint32_t DIIndex); 159 bool HasPreserveFieldInfoCall(CallInfoStack &CallStack); 160 void GetStorageBitRange(DIDerivedType *MemberTy, Align RecordAlignment, 161 uint32_t &StartBitOffset, uint32_t &EndBitOffset); 162 uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy, 163 uint32_t AccessIndex, uint32_t PatchImm, 164 Align RecordAlignment); 165 166 Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo, 167 std::string &AccessKey, MDNode *&BaseMeta); 168 MDNode *computeAccessKey(CallInst *Call, CallInfo &CInfo, 169 std::string &AccessKey, bool &IsInt32Ret); 170 uint64_t getConstant(const Value *IndexValue); 171 bool transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo); 172 }; 173 } // End anonymous namespace 174 175 char BPFAbstractMemberAccess::ID = 0; 176 INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE, 177 "abstracting struct/union member accessees", false, false) 178 179 ModulePass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) { 180 return new BPFAbstractMemberAccess(TM); 181 } 182 183 bool BPFAbstractMemberAccess::runOnModule(Module &M) { 184 LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); 185 186 // Bail out if no debug info. 187 if (M.debug_compile_units().empty()) 188 return false; 189 190 DL = &M.getDataLayout(); 191 return doTransformation(M); 192 } 193 194 static bool SkipDIDerivedTag(unsigned Tag, bool skipTypedef) { 195 if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && 196 Tag != dwarf::DW_TAG_volatile_type && 197 Tag != dwarf::DW_TAG_restrict_type && 198 Tag != dwarf::DW_TAG_member) 199 return false; 200 if (Tag == dwarf::DW_TAG_typedef && !skipTypedef) 201 return false; 202 return true; 203 } 204 205 static DIType * stripQualifiers(DIType *Ty, bool skipTypedef = true) { 206 while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { 207 if (!SkipDIDerivedTag(DTy->getTag(), skipTypedef)) 208 break; 209 Ty = DTy->getBaseType(); 210 } 211 return Ty; 212 } 213 214 static const DIType * stripQualifiers(const DIType *Ty) { 215 while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { 216 if (!SkipDIDerivedTag(DTy->getTag(), true)) 217 break; 218 Ty = DTy->getBaseType(); 219 } 220 return Ty; 221 } 222 223 static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) { 224 DINodeArray Elements = CTy->getElements(); 225 uint32_t DimSize = 1; 226 for (uint32_t I = StartDim; I < Elements.size(); ++I) { 227 if (auto *Element = dyn_cast_or_null<DINode>(Elements[I])) 228 if (Element->getTag() == dwarf::DW_TAG_subrange_type) { 229 const DISubrange *SR = cast<DISubrange>(Element); 230 auto *CI = SR->getCount().dyn_cast<ConstantInt *>(); 231 DimSize *= CI->getSExtValue(); 232 } 233 } 234 235 return DimSize; 236 } 237 238 /// Check whether a call is a preserve_*_access_index intrinsic call or not. 239 bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, 240 CallInfo &CInfo) { 241 if (!Call) 242 return false; 243 244 const auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand()); 245 if (!GV) 246 return false; 247 if (GV->getName().startswith("llvm.preserve.array.access.index")) { 248 CInfo.Kind = BPFPreserveArrayAI; 249 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); 250 if (!CInfo.Metadata) 251 report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic"); 252 CInfo.AccessIndex = getConstant(Call->getArgOperand(2)); 253 CInfo.Base = Call->getArgOperand(0); 254 CInfo.RecordAlignment = 255 DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType()); 256 return true; 257 } 258 if (GV->getName().startswith("llvm.preserve.union.access.index")) { 259 CInfo.Kind = BPFPreserveUnionAI; 260 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); 261 if (!CInfo.Metadata) 262 report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic"); 263 CInfo.AccessIndex = getConstant(Call->getArgOperand(1)); 264 CInfo.Base = Call->getArgOperand(0); 265 CInfo.RecordAlignment = 266 DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType()); 267 return true; 268 } 269 if (GV->getName().startswith("llvm.preserve.struct.access.index")) { 270 CInfo.Kind = BPFPreserveStructAI; 271 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); 272 if (!CInfo.Metadata) 273 report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic"); 274 CInfo.AccessIndex = getConstant(Call->getArgOperand(2)); 275 CInfo.Base = Call->getArgOperand(0); 276 CInfo.RecordAlignment = 277 DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType()); 278 return true; 279 } 280 if (GV->getName().startswith("llvm.bpf.preserve.field.info")) { 281 CInfo.Kind = BPFPreserveFieldInfoAI; 282 CInfo.Metadata = nullptr; 283 // Check validity of info_kind as clang did not check this. 284 uint64_t InfoKind = getConstant(Call->getArgOperand(1)); 285 if (InfoKind >= BPFCoreSharedInfo::MAX_FIELD_RELOC_KIND) 286 report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic"); 287 CInfo.AccessIndex = InfoKind; 288 return true; 289 } 290 if (GV->getName().startswith("llvm.bpf.preserve.type.info")) { 291 CInfo.Kind = BPFPreserveFieldInfoAI; 292 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); 293 if (!CInfo.Metadata) 294 report_fatal_error("Missing metadata for llvm.preserve.type.info intrinsic"); 295 uint64_t Flag = getConstant(Call->getArgOperand(1)); 296 if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_TYPE_INFO_FLAG) 297 report_fatal_error("Incorrect flag for llvm.bpf.preserve.type.info intrinsic"); 298 if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_EXISTENCE) 299 CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_EXISTENCE; 300 else 301 CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_SIZE; 302 return true; 303 } 304 if (GV->getName().startswith("llvm.bpf.preserve.enum.value")) { 305 CInfo.Kind = BPFPreserveFieldInfoAI; 306 CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); 307 if (!CInfo.Metadata) 308 report_fatal_error("Missing metadata for llvm.preserve.enum.value intrinsic"); 309 uint64_t Flag = getConstant(Call->getArgOperand(2)); 310 if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_ENUM_VALUE_FLAG) 311 report_fatal_error("Incorrect flag for llvm.bpf.preserve.enum.value intrinsic"); 312 if (Flag == BPFCoreSharedInfo::PRESERVE_ENUM_VALUE_EXISTENCE) 313 CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE_EXISTENCE; 314 else 315 CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE; 316 return true; 317 } 318 319 return false; 320 } 321 322 void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList, 323 uint32_t DimensionIndex, 324 uint32_t GEPIndex) { 325 for (auto Call : CallList) { 326 uint32_t Dimension = 1; 327 if (DimensionIndex > 0) 328 Dimension = getConstant(Call->getArgOperand(DimensionIndex)); 329 330 Constant *Zero = 331 ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0); 332 SmallVector<Value *, 4> IdxList; 333 for (unsigned I = 0; I < Dimension; ++I) 334 IdxList.push_back(Zero); 335 IdxList.push_back(Call->getArgOperand(GEPIndex)); 336 337 auto *GEP = GetElementPtrInst::CreateInBounds(Call->getArgOperand(0), 338 IdxList, "", Call); 339 Call->replaceAllUsesWith(GEP); 340 Call->eraseFromParent(); 341 } 342 } 343 344 bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { 345 std::vector<CallInst *> PreserveArrayIndexCalls; 346 std::vector<CallInst *> PreserveUnionIndexCalls; 347 std::vector<CallInst *> PreserveStructIndexCalls; 348 bool Found = false; 349 350 for (Function &F : M) 351 for (auto &BB : F) 352 for (auto &I : BB) { 353 auto *Call = dyn_cast<CallInst>(&I); 354 CallInfo CInfo; 355 if (!IsPreserveDIAccessIndexCall(Call, CInfo)) 356 continue; 357 358 Found = true; 359 if (CInfo.Kind == BPFPreserveArrayAI) 360 PreserveArrayIndexCalls.push_back(Call); 361 else if (CInfo.Kind == BPFPreserveUnionAI) 362 PreserveUnionIndexCalls.push_back(Call); 363 else 364 PreserveStructIndexCalls.push_back(Call); 365 } 366 367 // do the following transformation: 368 // . addr = preserve_array_access_index(base, dimension, index) 369 // is transformed to 370 // addr = GEP(base, dimenion's zero's, index) 371 // . addr = preserve_union_access_index(base, di_index) 372 // is transformed to 373 // addr = base, i.e., all usages of "addr" are replaced by "base". 374 // . addr = preserve_struct_access_index(base, gep_index, di_index) 375 // is transformed to 376 // addr = GEP(base, 0, gep_index) 377 replaceWithGEP(PreserveArrayIndexCalls, 1, 2); 378 replaceWithGEP(PreserveStructIndexCalls, 0, 1); 379 for (auto Call : PreserveUnionIndexCalls) { 380 Call->replaceAllUsesWith(Call->getArgOperand(0)); 381 Call->eraseFromParent(); 382 } 383 384 return Found; 385 } 386 387 /// Check whether the access index chain is valid. We check 388 /// here because there may be type casts between two 389 /// access indexes. We want to ensure memory access still valid. 390 bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode *ParentType, 391 uint32_t ParentAI, 392 const MDNode *ChildType) { 393 if (!ChildType) 394 return true; // preserve_field_info, no type comparison needed. 395 396 const DIType *PType = stripQualifiers(cast<DIType>(ParentType)); 397 const DIType *CType = stripQualifiers(cast<DIType>(ChildType)); 398 399 // Child is a derived/pointer type, which is due to type casting. 400 // Pointer type cannot be in the middle of chain. 401 if (isa<DIDerivedType>(CType)) 402 return false; 403 404 // Parent is a pointer type. 405 if (const auto *PtrTy = dyn_cast<DIDerivedType>(PType)) { 406 if (PtrTy->getTag() != dwarf::DW_TAG_pointer_type) 407 return false; 408 return stripQualifiers(PtrTy->getBaseType()) == CType; 409 } 410 411 // Otherwise, struct/union/array types 412 const auto *PTy = dyn_cast<DICompositeType>(PType); 413 const auto *CTy = dyn_cast<DICompositeType>(CType); 414 assert(PTy && CTy && "ParentType or ChildType is null or not composite"); 415 416 uint32_t PTyTag = PTy->getTag(); 417 assert(PTyTag == dwarf::DW_TAG_array_type || 418 PTyTag == dwarf::DW_TAG_structure_type || 419 PTyTag == dwarf::DW_TAG_union_type); 420 421 uint32_t CTyTag = CTy->getTag(); 422 assert(CTyTag == dwarf::DW_TAG_array_type || 423 CTyTag == dwarf::DW_TAG_structure_type || 424 CTyTag == dwarf::DW_TAG_union_type); 425 426 // Multi dimensional arrays, base element should be the same 427 if (PTyTag == dwarf::DW_TAG_array_type && PTyTag == CTyTag) 428 return PTy->getBaseType() == CTy->getBaseType(); 429 430 DIType *Ty; 431 if (PTyTag == dwarf::DW_TAG_array_type) 432 Ty = PTy->getBaseType(); 433 else 434 Ty = dyn_cast<DIType>(PTy->getElements()[ParentAI]); 435 436 return dyn_cast<DICompositeType>(stripQualifiers(Ty)) == CTy; 437 } 438 439 void BPFAbstractMemberAccess::traceAICall(CallInst *Call, 440 CallInfo &ParentInfo) { 441 for (User *U : Call->users()) { 442 Instruction *Inst = dyn_cast<Instruction>(U); 443 if (!Inst) 444 continue; 445 446 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 447 traceBitCast(BI, Call, ParentInfo); 448 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 449 CallInfo ChildInfo; 450 451 if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && 452 IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, 453 ChildInfo.Metadata)) { 454 AIChain[CI] = std::make_pair(Call, ParentInfo); 455 traceAICall(CI, ChildInfo); 456 } else { 457 BaseAICalls[Call] = ParentInfo; 458 } 459 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 460 if (GI->hasAllZeroIndices()) 461 traceGEP(GI, Call, ParentInfo); 462 else 463 BaseAICalls[Call] = ParentInfo; 464 } else { 465 BaseAICalls[Call] = ParentInfo; 466 } 467 } 468 } 469 470 void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast, 471 CallInst *Parent, 472 CallInfo &ParentInfo) { 473 for (User *U : BitCast->users()) { 474 Instruction *Inst = dyn_cast<Instruction>(U); 475 if (!Inst) 476 continue; 477 478 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 479 traceBitCast(BI, Parent, ParentInfo); 480 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 481 CallInfo ChildInfo; 482 if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && 483 IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, 484 ChildInfo.Metadata)) { 485 AIChain[CI] = std::make_pair(Parent, ParentInfo); 486 traceAICall(CI, ChildInfo); 487 } else { 488 BaseAICalls[Parent] = ParentInfo; 489 } 490 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 491 if (GI->hasAllZeroIndices()) 492 traceGEP(GI, Parent, ParentInfo); 493 else 494 BaseAICalls[Parent] = ParentInfo; 495 } else { 496 BaseAICalls[Parent] = ParentInfo; 497 } 498 } 499 } 500 501 void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent, 502 CallInfo &ParentInfo) { 503 for (User *U : GEP->users()) { 504 Instruction *Inst = dyn_cast<Instruction>(U); 505 if (!Inst) 506 continue; 507 508 if (auto *BI = dyn_cast<BitCastInst>(Inst)) { 509 traceBitCast(BI, Parent, ParentInfo); 510 } else if (auto *CI = dyn_cast<CallInst>(Inst)) { 511 CallInfo ChildInfo; 512 if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && 513 IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, 514 ChildInfo.Metadata)) { 515 AIChain[CI] = std::make_pair(Parent, ParentInfo); 516 traceAICall(CI, ChildInfo); 517 } else { 518 BaseAICalls[Parent] = ParentInfo; 519 } 520 } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) { 521 if (GI->hasAllZeroIndices()) 522 traceGEP(GI, Parent, ParentInfo); 523 else 524 BaseAICalls[Parent] = ParentInfo; 525 } else { 526 BaseAICalls[Parent] = ParentInfo; 527 } 528 } 529 } 530 531 void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) { 532 AIChain.clear(); 533 BaseAICalls.clear(); 534 535 for (auto &BB : F) 536 for (auto &I : BB) { 537 CallInfo CInfo; 538 auto *Call = dyn_cast<CallInst>(&I); 539 if (!IsPreserveDIAccessIndexCall(Call, CInfo) || 540 AIChain.find(Call) != AIChain.end()) 541 continue; 542 543 traceAICall(Call, CInfo); 544 } 545 } 546 547 uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) { 548 const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue); 549 assert(CV); 550 return CV->getValue().getZExtValue(); 551 } 552 553 /// Get the start and the end of storage offset for \p MemberTy. 554 void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy, 555 Align RecordAlignment, 556 uint32_t &StartBitOffset, 557 uint32_t &EndBitOffset) { 558 uint32_t MemberBitSize = MemberTy->getSizeInBits(); 559 uint32_t MemberBitOffset = MemberTy->getOffsetInBits(); 560 uint32_t AlignBits = RecordAlignment.value() * 8; 561 if (RecordAlignment > 8 || MemberBitSize > AlignBits) 562 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, " 563 "requiring too big alignment"); 564 565 StartBitOffset = MemberBitOffset & ~(AlignBits - 1); 566 if ((StartBitOffset + AlignBits) < (MemberBitOffset + MemberBitSize)) 567 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, " 568 "cross alignment boundary"); 569 EndBitOffset = StartBitOffset + AlignBits; 570 } 571 572 uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, 573 DICompositeType *CTy, 574 uint32_t AccessIndex, 575 uint32_t PatchImm, 576 Align RecordAlignment) { 577 if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE) 578 return 1; 579 580 uint32_t Tag = CTy->getTag(); 581 if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_OFFSET) { 582 if (Tag == dwarf::DW_TAG_array_type) { 583 auto *EltTy = stripQualifiers(CTy->getBaseType()); 584 PatchImm += AccessIndex * calcArraySize(CTy, 1) * 585 (EltTy->getSizeInBits() >> 3); 586 } else if (Tag == dwarf::DW_TAG_structure_type) { 587 auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]); 588 if (!MemberTy->isBitField()) { 589 PatchImm += MemberTy->getOffsetInBits() >> 3; 590 } else { 591 unsigned SBitOffset, NextSBitOffset; 592 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, 593 NextSBitOffset); 594 PatchImm += SBitOffset >> 3; 595 } 596 } 597 return PatchImm; 598 } 599 600 if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_SIZE) { 601 if (Tag == dwarf::DW_TAG_array_type) { 602 auto *EltTy = stripQualifiers(CTy->getBaseType()); 603 return calcArraySize(CTy, 1) * (EltTy->getSizeInBits() >> 3); 604 } else { 605 auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]); 606 uint32_t SizeInBits = MemberTy->getSizeInBits(); 607 if (!MemberTy->isBitField()) 608 return SizeInBits >> 3; 609 610 unsigned SBitOffset, NextSBitOffset; 611 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset); 612 SizeInBits = NextSBitOffset - SBitOffset; 613 if (SizeInBits & (SizeInBits - 1)) 614 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info"); 615 return SizeInBits >> 3; 616 } 617 } 618 619 if (InfoKind == BPFCoreSharedInfo::FIELD_SIGNEDNESS) { 620 const DIType *BaseTy; 621 if (Tag == dwarf::DW_TAG_array_type) { 622 // Signedness only checked when final array elements are accessed. 623 if (CTy->getElements().size() != 1) 624 report_fatal_error("Invalid array expression for llvm.bpf.preserve.field.info"); 625 BaseTy = stripQualifiers(CTy->getBaseType()); 626 } else { 627 auto *MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]); 628 BaseTy = stripQualifiers(MemberTy->getBaseType()); 629 } 630 631 // Only basic types and enum types have signedness. 632 const auto *BTy = dyn_cast<DIBasicType>(BaseTy); 633 while (!BTy) { 634 const auto *CompTy = dyn_cast<DICompositeType>(BaseTy); 635 // Report an error if the field expression does not have signedness. 636 if (!CompTy || CompTy->getTag() != dwarf::DW_TAG_enumeration_type) 637 report_fatal_error("Invalid field expression for llvm.bpf.preserve.field.info"); 638 BaseTy = stripQualifiers(CompTy->getBaseType()); 639 BTy = dyn_cast<DIBasicType>(BaseTy); 640 } 641 uint32_t Encoding = BTy->getEncoding(); 642 return (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char); 643 } 644 645 if (InfoKind == BPFCoreSharedInfo::FIELD_LSHIFT_U64) { 646 // The value is loaded into a value with FIELD_BYTE_SIZE size, 647 // and then zero or sign extended to U64. 648 // FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations 649 // to extract the original value. 650 const Triple &Triple = TM->getTargetTriple(); 651 DIDerivedType *MemberTy = nullptr; 652 bool IsBitField = false; 653 uint32_t SizeInBits; 654 655 if (Tag == dwarf::DW_TAG_array_type) { 656 auto *EltTy = stripQualifiers(CTy->getBaseType()); 657 SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits(); 658 } else { 659 MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]); 660 SizeInBits = MemberTy->getSizeInBits(); 661 IsBitField = MemberTy->isBitField(); 662 } 663 664 if (!IsBitField) { 665 if (SizeInBits > 64) 666 report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); 667 return 64 - SizeInBits; 668 } 669 670 unsigned SBitOffset, NextSBitOffset; 671 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset); 672 if (NextSBitOffset - SBitOffset > 64) 673 report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); 674 675 unsigned OffsetInBits = MemberTy->getOffsetInBits(); 676 if (Triple.getArch() == Triple::bpfel) 677 return SBitOffset + 64 - OffsetInBits - SizeInBits; 678 else 679 return OffsetInBits + 64 - NextSBitOffset; 680 } 681 682 if (InfoKind == BPFCoreSharedInfo::FIELD_RSHIFT_U64) { 683 DIDerivedType *MemberTy = nullptr; 684 bool IsBitField = false; 685 uint32_t SizeInBits; 686 if (Tag == dwarf::DW_TAG_array_type) { 687 auto *EltTy = stripQualifiers(CTy->getBaseType()); 688 SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits(); 689 } else { 690 MemberTy = cast<DIDerivedType>(CTy->getElements()[AccessIndex]); 691 SizeInBits = MemberTy->getSizeInBits(); 692 IsBitField = MemberTy->isBitField(); 693 } 694 695 if (!IsBitField) { 696 if (SizeInBits > 64) 697 report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); 698 return 64 - SizeInBits; 699 } 700 701 unsigned SBitOffset, NextSBitOffset; 702 GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset); 703 if (NextSBitOffset - SBitOffset > 64) 704 report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); 705 706 return 64 - SizeInBits; 707 } 708 709 llvm_unreachable("Unknown llvm.bpf.preserve.field.info info kind"); 710 } 711 712 bool BPFAbstractMemberAccess::HasPreserveFieldInfoCall(CallInfoStack &CallStack) { 713 // This is called in error return path, no need to maintain CallStack. 714 while (CallStack.size()) { 715 auto StackElem = CallStack.top(); 716 if (StackElem.second.Kind == BPFPreserveFieldInfoAI) 717 return true; 718 CallStack.pop(); 719 } 720 return false; 721 } 722 723 /// Compute the base of the whole preserve_* intrinsics chains, i.e., the base 724 /// pointer of the first preserve_*_access_index call, and construct the access 725 /// string, which will be the name of a global variable. 726 Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, 727 CallInfo &CInfo, 728 std::string &AccessKey, 729 MDNode *&TypeMeta) { 730 Value *Base = nullptr; 731 std::string TypeName; 732 CallInfoStack CallStack; 733 734 // Put the access chain into a stack with the top as the head of the chain. 735 while (Call) { 736 CallStack.push(std::make_pair(Call, CInfo)); 737 CInfo = AIChain[Call].second; 738 Call = AIChain[Call].first; 739 } 740 741 // The access offset from the base of the head of chain is also 742 // calculated here as all debuginfo types are available. 743 744 // Get type name and calculate the first index. 745 // We only want to get type name from typedef, structure or union. 746 // If user wants a relocation like 747 // int *p; ... __builtin_preserve_access_index(&p[4]) ... 748 // or 749 // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ... 750 // we will skip them. 751 uint32_t FirstIndex = 0; 752 uint32_t PatchImm = 0; // AccessOffset or the requested field info 753 uint32_t InfoKind = BPFCoreSharedInfo::FIELD_BYTE_OFFSET; 754 while (CallStack.size()) { 755 auto StackElem = CallStack.top(); 756 Call = StackElem.first; 757 CInfo = StackElem.second; 758 759 if (!Base) 760 Base = CInfo.Base; 761 762 DIType *PossibleTypeDef = stripQualifiers(cast<DIType>(CInfo.Metadata), 763 false); 764 DIType *Ty = stripQualifiers(PossibleTypeDef); 765 if (CInfo.Kind == BPFPreserveUnionAI || 766 CInfo.Kind == BPFPreserveStructAI) { 767 // struct or union type. If the typedef is in the metadata, always 768 // use the typedef. 769 TypeName = std::string(PossibleTypeDef->getName()); 770 TypeMeta = PossibleTypeDef; 771 PatchImm += FirstIndex * (Ty->getSizeInBits() >> 3); 772 break; 773 } 774 775 assert(CInfo.Kind == BPFPreserveArrayAI); 776 777 // Array entries will always be consumed for accumulative initial index. 778 CallStack.pop(); 779 780 // BPFPreserveArrayAI 781 uint64_t AccessIndex = CInfo.AccessIndex; 782 783 DIType *BaseTy = nullptr; 784 bool CheckElemType = false; 785 if (const auto *CTy = dyn_cast<DICompositeType>(Ty)) { 786 // array type 787 assert(CTy->getTag() == dwarf::DW_TAG_array_type); 788 789 790 FirstIndex += AccessIndex * calcArraySize(CTy, 1); 791 BaseTy = stripQualifiers(CTy->getBaseType()); 792 CheckElemType = CTy->getElements().size() == 1; 793 } else { 794 // pointer type 795 auto *DTy = cast<DIDerivedType>(Ty); 796 assert(DTy->getTag() == dwarf::DW_TAG_pointer_type); 797 798 BaseTy = stripQualifiers(DTy->getBaseType()); 799 CTy = dyn_cast<DICompositeType>(BaseTy); 800 if (!CTy) { 801 CheckElemType = true; 802 } else if (CTy->getTag() != dwarf::DW_TAG_array_type) { 803 FirstIndex += AccessIndex; 804 CheckElemType = true; 805 } else { 806 FirstIndex += AccessIndex * calcArraySize(CTy, 0); 807 } 808 } 809 810 if (CheckElemType) { 811 auto *CTy = dyn_cast<DICompositeType>(BaseTy); 812 if (!CTy) { 813 if (HasPreserveFieldInfoCall(CallStack)) 814 report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic"); 815 return nullptr; 816 } 817 818 unsigned CTag = CTy->getTag(); 819 if (CTag == dwarf::DW_TAG_structure_type || CTag == dwarf::DW_TAG_union_type) { 820 TypeName = std::string(CTy->getName()); 821 } else { 822 if (HasPreserveFieldInfoCall(CallStack)) 823 report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic"); 824 return nullptr; 825 } 826 TypeMeta = CTy; 827 PatchImm += FirstIndex * (CTy->getSizeInBits() >> 3); 828 break; 829 } 830 } 831 assert(TypeName.size()); 832 AccessKey += std::to_string(FirstIndex); 833 834 // Traverse the rest of access chain to complete offset calculation 835 // and access key construction. 836 while (CallStack.size()) { 837 auto StackElem = CallStack.top(); 838 CInfo = StackElem.second; 839 CallStack.pop(); 840 841 if (CInfo.Kind == BPFPreserveFieldInfoAI) { 842 InfoKind = CInfo.AccessIndex; 843 break; 844 } 845 846 // If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI, 847 // the action will be extracting field info. 848 if (CallStack.size()) { 849 auto StackElem2 = CallStack.top(); 850 CallInfo CInfo2 = StackElem2.second; 851 if (CInfo2.Kind == BPFPreserveFieldInfoAI) { 852 InfoKind = CInfo2.AccessIndex; 853 assert(CallStack.size() == 1); 854 } 855 } 856 857 // Access Index 858 uint64_t AccessIndex = CInfo.AccessIndex; 859 AccessKey += ":" + std::to_string(AccessIndex); 860 861 MDNode *MDN = CInfo.Metadata; 862 // At this stage, it cannot be pointer type. 863 auto *CTy = cast<DICompositeType>(stripQualifiers(cast<DIType>(MDN))); 864 PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm, 865 CInfo.RecordAlignment); 866 } 867 868 // Access key is the 869 // "llvm." + type name + ":" + reloc type + ":" + patched imm + "$" + 870 // access string, 871 // uniquely identifying one relocation. 872 // The prefix "llvm." indicates this is a temporary global, which should 873 // not be emitted to ELF file. 874 AccessKey = "llvm." + TypeName + ":" + std::to_string(InfoKind) + ":" + 875 std::to_string(PatchImm) + "$" + AccessKey; 876 877 return Base; 878 } 879 880 MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call, 881 CallInfo &CInfo, 882 std::string &AccessKey, 883 bool &IsInt32Ret) { 884 DIType *Ty = stripQualifiers(cast<DIType>(CInfo.Metadata), false); 885 assert(!Ty->getName().empty()); 886 887 int64_t PatchImm; 888 std::string AccessStr("0"); 889 if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE) { 890 PatchImm = 1; 891 } else if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_SIZE) { 892 // typedef debuginfo type has size 0, get the eventual base type. 893 DIType *BaseTy = stripQualifiers(Ty, true); 894 PatchImm = BaseTy->getSizeInBits() / 8; 895 } else { 896 // ENUM_VALUE_EXISTENCE and ENUM_VALUE 897 IsInt32Ret = false; 898 899 const auto *CE = cast<ConstantExpr>(Call->getArgOperand(1)); 900 const GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 901 assert(GV->hasInitializer()); 902 const ConstantDataArray *DA = cast<ConstantDataArray>(GV->getInitializer()); 903 assert(DA->isString()); 904 StringRef ValueStr = DA->getAsString(); 905 906 // ValueStr format: <EnumeratorStr>:<Value> 907 size_t Separator = ValueStr.find_first_of(':'); 908 StringRef EnumeratorStr = ValueStr.substr(0, Separator); 909 910 // Find enumerator index in the debuginfo 911 DIType *BaseTy = stripQualifiers(Ty, true); 912 const auto *CTy = cast<DICompositeType>(BaseTy); 913 assert(CTy->getTag() == dwarf::DW_TAG_enumeration_type); 914 int EnumIndex = 0; 915 for (const auto Element : CTy->getElements()) { 916 const auto *Enum = cast<DIEnumerator>(Element); 917 if (Enum->getName() == EnumeratorStr) { 918 AccessStr = std::to_string(EnumIndex); 919 break; 920 } 921 EnumIndex++; 922 } 923 924 if (CInfo.AccessIndex == BPFCoreSharedInfo::ENUM_VALUE) { 925 StringRef EValueStr = ValueStr.substr(Separator + 1); 926 PatchImm = std::stoll(std::string(EValueStr)); 927 } else { 928 PatchImm = 1; 929 } 930 } 931 932 AccessKey = "llvm." + Ty->getName().str() + ":" + 933 std::to_string(CInfo.AccessIndex) + std::string(":") + 934 std::to_string(PatchImm) + std::string("$") + AccessStr; 935 936 return Ty; 937 } 938 939 /// Call/Kind is the base preserve_*_access_index() call. Attempts to do 940 /// transformation to a chain of relocable GEPs. 941 bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, 942 CallInfo &CInfo) { 943 std::string AccessKey; 944 MDNode *TypeMeta; 945 Value *Base = nullptr; 946 bool IsInt32Ret; 947 948 IsInt32Ret = CInfo.Kind == BPFPreserveFieldInfoAI; 949 if (CInfo.Kind == BPFPreserveFieldInfoAI && CInfo.Metadata) { 950 TypeMeta = computeAccessKey(Call, CInfo, AccessKey, IsInt32Ret); 951 } else { 952 Base = computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta); 953 if (!Base) 954 return false; 955 } 956 957 BasicBlock *BB = Call->getParent(); 958 GlobalVariable *GV; 959 960 if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { 961 IntegerType *VarType; 962 if (IsInt32Ret) 963 VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value 964 else 965 VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr or enum value 966 967 GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage, 968 NULL, AccessKey); 969 GV->addAttribute(BPFCoreSharedInfo::AmaAttr); 970 GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); 971 GEPGlobals[AccessKey] = GV; 972 } else { 973 GV = GEPGlobals[AccessKey]; 974 } 975 976 if (CInfo.Kind == BPFPreserveFieldInfoAI) { 977 // Load the global variable which represents the returned field info. 978 LoadInst *LDInst; 979 if (IsInt32Ret) 980 LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "", Call); 981 else 982 LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV, "", Call); 983 Call->replaceAllUsesWith(LDInst); 984 Call->eraseFromParent(); 985 return true; 986 } 987 988 // For any original GEP Call and Base %2 like 989 // %4 = bitcast %struct.net_device** %dev1 to i64* 990 // it is transformed to: 991 // %6 = load sk_buff:50:$0:0:0:2:0 992 // %7 = bitcast %struct.sk_buff* %2 to i8* 993 // %8 = getelementptr i8, i8* %7, %6 994 // %9 = bitcast i8* %8 to i64* 995 // using %9 instead of %4 996 // The original Call inst is removed. 997 998 // Load the global variable. 999 auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV, "", Call); 1000 1001 // Generate a BitCast 1002 auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext())); 1003 BB->getInstList().insert(Call->getIterator(), BCInst); 1004 1005 // Generate a GetElementPtr 1006 auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()), 1007 BCInst, LDInst); 1008 BB->getInstList().insert(Call->getIterator(), GEP); 1009 1010 // Generate a BitCast 1011 auto *BCInst2 = new BitCastInst(GEP, Call->getType()); 1012 BB->getInstList().insert(Call->getIterator(), BCInst2); 1013 1014 Call->replaceAllUsesWith(BCInst2); 1015 Call->eraseFromParent(); 1016 1017 return true; 1018 } 1019 1020 bool BPFAbstractMemberAccess::doTransformation(Module &M) { 1021 bool Transformed = false; 1022 1023 for (Function &F : M) { 1024 // Collect PreserveDIAccessIndex Intrinsic call chains. 1025 // The call chains will be used to generate the access 1026 // patterns similar to GEP. 1027 collectAICallChains(M, F); 1028 1029 for (auto &C : BaseAICalls) 1030 Transformed = transformGEPChain(M, C.first, C.second) || Transformed; 1031 } 1032 1033 return removePreserveAccessIndexIntrinsic(M) || Transformed; 1034 } 1035