1 //===--- CodeGenTypes.cpp - TBAA information for LLVM CodeGen -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This is the code that manages TBAA information and defines the TBAA policy 11 // for the optimizer to use. Relevant standards text includes: 12 // 13 // C99 6.5p7 14 // C++ [basic.lval] (p10 in n3126, p15 in some earlier versions) 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "CodeGenTBAA.h" 19 #include "clang/AST/ASTContext.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Mangle.h" 22 #include "clang/AST/RecordLayout.h" 23 #include "clang/Frontend/CodeGenOptions.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/LLVMContext.h" 27 #include "llvm/IR/Metadata.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/IR/Type.h" 30 using namespace clang; 31 using namespace CodeGen; 32 33 CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::Module &M, 34 const CodeGenOptions &CGO, 35 const LangOptions &Features, MangleContext &MContext) 36 : Context(Ctx), Module(M), CodeGenOpts(CGO), 37 Features(Features), MContext(MContext), MDHelper(M.getContext()), 38 Root(nullptr), Char(nullptr) 39 {} 40 41 CodeGenTBAA::~CodeGenTBAA() { 42 } 43 44 llvm::MDNode *CodeGenTBAA::getRoot() { 45 // Define the root of the tree. This identifies the tree, so that 46 // if our LLVM IR is linked with LLVM IR from a different front-end 47 // (or a different version of this front-end), their TBAA trees will 48 // remain distinct, and the optimizer will treat them conservatively. 49 if (!Root) { 50 if (Features.CPlusPlus) 51 Root = MDHelper.createTBAARoot("Simple C++ TBAA"); 52 else 53 Root = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 54 } 55 56 return Root; 57 } 58 59 llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name, 60 llvm::MDNode *Parent, 61 uint64_t Size) { 62 (void)Size; // TODO: Support generation of size-aware type nodes. 63 return MDHelper.createTBAAScalarTypeNode(Name, Parent); 64 } 65 66 llvm::MDNode *CodeGenTBAA::getChar() { 67 // Define the root of the tree for user-accessible memory. C and C++ 68 // give special powers to char and certain similar types. However, 69 // these special powers only cover user-accessible memory, and doesn't 70 // include things like vtables. 71 if (!Char) 72 Char = createScalarTypeNode("omnipotent char", getRoot(), /* Size= */ 1); 73 74 return Char; 75 } 76 77 static bool TypeHasMayAlias(QualType QTy) { 78 // Tagged types have declarations, and therefore may have attributes. 79 if (const TagType *TTy = dyn_cast<TagType>(QTy)) 80 return TTy->getDecl()->hasAttr<MayAliasAttr>(); 81 82 // Typedef types have declarations, and therefore may have attributes. 83 if (const TypedefType *TTy = dyn_cast<TypedefType>(QTy)) { 84 if (TTy->getDecl()->hasAttr<MayAliasAttr>()) 85 return true; 86 // Also, their underlying types may have relevant attributes. 87 return TypeHasMayAlias(TTy->desugar()); 88 } 89 90 return false; 91 } 92 93 /// Check if the given type is a valid base type to be used in access tags. 94 static bool isValidBaseType(QualType QTy) { 95 if (QTy->isReferenceType()) 96 return false; 97 if (const RecordType *TTy = QTy->getAs<RecordType>()) { 98 const RecordDecl *RD = TTy->getDecl()->getDefinition(); 99 // Incomplete types are not valid base access types. 100 if (!RD) 101 return false; 102 if (RD->hasFlexibleArrayMember()) 103 return false; 104 // RD can be struct, union, class, interface or enum. 105 // For now, we only handle struct and class. 106 if (RD->isStruct() || RD->isClass()) 107 return true; 108 } 109 return false; 110 } 111 112 llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { 113 uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity(); 114 115 // Handle builtin types. 116 if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) { 117 switch (BTy->getKind()) { 118 // Character types are special and can alias anything. 119 // In C++, this technically only includes "char" and "unsigned char", 120 // and not "signed char". In C, it includes all three. For now, 121 // the risk of exploiting this detail in C++ seems likely to outweigh 122 // the benefit. 123 case BuiltinType::Char_U: 124 case BuiltinType::Char_S: 125 case BuiltinType::UChar: 126 case BuiltinType::SChar: 127 return getChar(); 128 129 // Unsigned types can alias their corresponding signed types. 130 case BuiltinType::UShort: 131 return getTypeInfo(Context.ShortTy); 132 case BuiltinType::UInt: 133 return getTypeInfo(Context.IntTy); 134 case BuiltinType::ULong: 135 return getTypeInfo(Context.LongTy); 136 case BuiltinType::ULongLong: 137 return getTypeInfo(Context.LongLongTy); 138 case BuiltinType::UInt128: 139 return getTypeInfo(Context.Int128Ty); 140 141 // Treat all other builtin types as distinct types. This includes 142 // treating wchar_t, char16_t, and char32_t as distinct from their 143 // "underlying types". 144 default: 145 return createScalarTypeNode(BTy->getName(Features), getChar(), Size); 146 } 147 } 148 149 // C++1z [basic.lval]p10: "If a program attempts to access the stored value of 150 // an object through a glvalue of other than one of the following types the 151 // behavior is undefined: [...] a char, unsigned char, or std::byte type." 152 if (Ty->isStdByteType()) 153 return getChar(); 154 155 // Handle pointers and references. 156 // TODO: Implement C++'s type "similarity" and consider dis-"similar" 157 // pointers distinct. 158 if (Ty->isPointerType() || Ty->isReferenceType()) 159 return createScalarTypeNode("any pointer", getChar(), Size); 160 161 // Enum types are distinct types. In C++ they have "underlying types", 162 // however they aren't related for TBAA. 163 if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) { 164 // In C++ mode, types have linkage, so we can rely on the ODR and 165 // on their mangled names, if they're external. 166 // TODO: Is there a way to get a program-wide unique name for a 167 // decl with local linkage or no linkage? 168 if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible()) 169 return getChar(); 170 171 SmallString<256> OutName; 172 llvm::raw_svector_ostream Out(OutName); 173 MContext.mangleTypeName(QualType(ETy, 0), Out); 174 return createScalarTypeNode(OutName, getChar(), Size); 175 } 176 177 // For now, handle any other kind of type conservatively. 178 return getChar(); 179 } 180 181 llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { 182 // At -O0 or relaxed aliasing, TBAA is not emitted for regular types. 183 if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) 184 return nullptr; 185 186 // If the type has the may_alias attribute (even on a typedef), it is 187 // effectively in the general char alias class. 188 if (TypeHasMayAlias(QTy)) 189 return getChar(); 190 191 // We need this function to not fall back to returning the "omnipotent char" 192 // type node for aggregate and union types. Otherwise, any dereference of an 193 // aggregate will result into the may-alias access descriptor, meaning all 194 // subsequent accesses to direct and indirect members of that aggregate will 195 // be considered may-alias too. 196 // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function. 197 if (isValidBaseType(QTy)) 198 return getBaseTypeInfo(QTy); 199 200 const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 201 if (llvm::MDNode *N = MetadataCache[Ty]) 202 return N; 203 204 // Note that the following helper call is allowed to add new nodes to the 205 // cache, which invalidates all its previously obtained iterators. So we 206 // first generate the node for the type and then add that node to the cache. 207 llvm::MDNode *TypeNode = getTypeInfoHelper(Ty); 208 return MetadataCache[Ty] = TypeNode; 209 } 210 211 TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) { 212 llvm::DataLayout DL(&Module); 213 unsigned Size = DL.getPointerTypeSize(VTablePtrType); 214 return TBAAAccessInfo(createScalarTypeNode("vtable pointer", getRoot(), Size), 215 Size); 216 } 217 218 bool 219 CodeGenTBAA::CollectFields(uint64_t BaseOffset, 220 QualType QTy, 221 SmallVectorImpl<llvm::MDBuilder::TBAAStructField> & 222 Fields, 223 bool MayAlias) { 224 /* Things not handled yet include: C++ base classes, bitfields, */ 225 226 if (const RecordType *TTy = QTy->getAs<RecordType>()) { 227 const RecordDecl *RD = TTy->getDecl()->getDefinition(); 228 if (RD->hasFlexibleArrayMember()) 229 return false; 230 231 // TODO: Handle C++ base classes. 232 if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(RD)) 233 if (Decl->bases_begin() != Decl->bases_end()) 234 return false; 235 236 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 237 238 unsigned idx = 0; 239 for (RecordDecl::field_iterator i = RD->field_begin(), 240 e = RD->field_end(); i != e; ++i, ++idx) { 241 uint64_t Offset = BaseOffset + 242 Layout.getFieldOffset(idx) / Context.getCharWidth(); 243 QualType FieldQTy = i->getType(); 244 if (!CollectFields(Offset, FieldQTy, Fields, 245 MayAlias || TypeHasMayAlias(FieldQTy))) 246 return false; 247 } 248 return true; 249 } 250 251 /* Otherwise, treat whatever it is as a field. */ 252 uint64_t Offset = BaseOffset; 253 uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity(); 254 llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); 255 llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size)); 256 Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); 257 return true; 258 } 259 260 llvm::MDNode * 261 CodeGenTBAA::getTBAAStructInfo(QualType QTy) { 262 const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 263 264 if (llvm::MDNode *N = StructMetadataCache[Ty]) 265 return N; 266 267 SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; 268 if (CollectFields(0, QTy, Fields, TypeHasMayAlias(QTy))) 269 return MDHelper.createTBAAStructNode(Fields); 270 271 // For now, handle any other kind of type conservatively. 272 return StructMetadataCache[Ty] = nullptr; 273 } 274 275 llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { 276 if (auto *TTy = dyn_cast<RecordType>(Ty)) { 277 const RecordDecl *RD = TTy->getDecl()->getDefinition(); 278 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 279 SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; 280 for (FieldDecl *Field : RD->fields()) { 281 QualType FieldQTy = Field->getType(); 282 llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ? 283 getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy); 284 if (!TypeNode) 285 return BaseTypeMetadataCache[Ty] = nullptr; 286 287 uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex()); 288 uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity(); 289 uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity(); 290 Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, 291 TypeNode)); 292 } 293 294 SmallString<256> OutName; 295 if (Features.CPlusPlus) { 296 // Don't use the mangler for C code. 297 llvm::raw_svector_ostream Out(OutName); 298 MContext.mangleTypeName(QualType(Ty, 0), Out); 299 } else { 300 OutName = RD->getName(); 301 } 302 303 // TODO: Support size-aware type nodes and create one here for the 304 // given aggregate type. 305 306 // Create the struct type node with a vector of pairs (offset, type). 307 SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes; 308 for (const auto &Field : Fields) 309 OffsetsAndTypes.push_back(std::make_pair(Field.TBAA, Field.Offset)); 310 return MDHelper.createTBAAStructTypeNode(OutName, OffsetsAndTypes); 311 } 312 313 return nullptr; 314 } 315 316 llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { 317 if (!isValidBaseType(QTy)) 318 return nullptr; 319 320 const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 321 if (llvm::MDNode *N = BaseTypeMetadataCache[Ty]) 322 return N; 323 324 // Note that the following helper call is allowed to add new nodes to the 325 // cache, which invalidates all its previously obtained iterators. So we 326 // first generate the node for the type and then add that node to the cache. 327 llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); 328 return BaseTypeMetadataCache[Ty] = TypeNode; 329 } 330 331 llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { 332 assert(!Info.isIncomplete() && "Access to an object of an incomplete type!"); 333 334 if (Info.isMayAlias()) 335 Info = TBAAAccessInfo(getChar(), Info.Size); 336 337 if (!Info.AccessType) 338 return nullptr; 339 340 if (!CodeGenOpts.StructPathTBAA) 341 Info = TBAAAccessInfo(Info.AccessType, Info.Size); 342 343 llvm::MDNode *&N = AccessTagMetadataCache[Info]; 344 if (N) 345 return N; 346 347 if (!Info.BaseType) { 348 Info.BaseType = Info.AccessType; 349 assert(!Info.Offset && "Nonzero offset for an access with no base type!"); 350 } 351 return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType, 352 Info.Offset); 353 } 354 355 TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, 356 TBAAAccessInfo TargetInfo) { 357 if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) 358 return TBAAAccessInfo::getMayAliasInfo(); 359 return TargetInfo; 360 } 361 362 TBAAAccessInfo 363 CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, 364 TBAAAccessInfo InfoB) { 365 if (InfoA == InfoB) 366 return InfoA; 367 368 if (!InfoA || !InfoB) 369 return TBAAAccessInfo(); 370 371 if (InfoA.isMayAlias() || InfoB.isMayAlias()) 372 return TBAAAccessInfo::getMayAliasInfo(); 373 374 // TODO: Implement the rest of the logic here. For example, two accesses 375 // with same final access types result in an access to an object of that final 376 // access type regardless of their base types. 377 return TBAAAccessInfo::getMayAliasInfo(); 378 } 379