1 //===--- CodeGenTypes.cpp - TBAA information for LLVM CodeGen -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This is the code that manages TBAA information and defines the TBAA policy 11 // for the optimizer to use. Relevant standards text includes: 12 // 13 // C99 6.5p7 14 // C++ [basic.lval] (p10 in n3126, p15 in some earlier versions) 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "CodeGenTBAA.h" 19 #include "clang/AST/ASTContext.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Mangle.h" 22 #include "clang/AST/RecordLayout.h" 23 #include "clang/Frontend/CodeGenOptions.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/LLVMContext.h" 27 #include "llvm/IR/Metadata.h" 28 #include "llvm/IR/Type.h" 29 using namespace clang; 30 using namespace CodeGen; 31 32 CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext& VMContext, 33 const CodeGenOptions &CGO, 34 const LangOptions &Features, MangleContext &MContext) 35 : Context(Ctx), CodeGenOpts(CGO), Features(Features), MContext(MContext), 36 MDHelper(VMContext), Root(nullptr), Char(nullptr) { 37 } 38 39 CodeGenTBAA::~CodeGenTBAA() { 40 } 41 42 llvm::MDNode *CodeGenTBAA::getRoot() { 43 // Define the root of the tree. This identifies the tree, so that 44 // if our LLVM IR is linked with LLVM IR from a different front-end 45 // (or a different version of this front-end), their TBAA trees will 46 // remain distinct, and the optimizer will treat them conservatively. 47 if (!Root) { 48 if (Features.CPlusPlus) 49 Root = MDHelper.createTBAARoot("Simple C++ TBAA"); 50 else 51 Root = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 52 } 53 54 return Root; 55 } 56 57 // For both scalar TBAA and struct-path aware TBAA, the scalar type has the 58 // same format: name, parent node, and offset. 59 llvm::MDNode *CodeGenTBAA::createTBAAScalarType(StringRef Name, 60 llvm::MDNode *Parent) { 61 return MDHelper.createTBAAScalarTypeNode(Name, Parent); 62 } 63 64 llvm::MDNode *CodeGenTBAA::getChar() { 65 // Define the root of the tree for user-accessible memory. C and C++ 66 // give special powers to char and certain similar types. However, 67 // these special powers only cover user-accessible memory, and doesn't 68 // include things like vtables. 69 if (!Char) 70 Char = createTBAAScalarType("omnipotent char", getRoot()); 71 72 return Char; 73 } 74 75 static bool TypeHasMayAlias(QualType QTy) { 76 // Tagged types have declarations, and therefore may have attributes. 77 if (const TagType *TTy = dyn_cast<TagType>(QTy)) 78 return TTy->getDecl()->hasAttr<MayAliasAttr>(); 79 80 // Typedef types have declarations, and therefore may have attributes. 81 if (const TypedefType *TTy = dyn_cast<TypedefType>(QTy)) { 82 if (TTy->getDecl()->hasAttr<MayAliasAttr>()) 83 return true; 84 // Also, their underlying types may have relevant attributes. 85 return TypeHasMayAlias(TTy->desugar()); 86 } 87 88 return false; 89 } 90 91 /// Check if the given type is a valid base type to be used in access tags. 92 static bool isValidBaseType(QualType QTy) { 93 if (QTy->isReferenceType()) 94 return false; 95 if (const RecordType *TTy = QTy->getAs<RecordType>()) { 96 const RecordDecl *RD = TTy->getDecl()->getDefinition(); 97 // Incomplete types are not valid base access types. 98 if (!RD) 99 return false; 100 if (RD->hasFlexibleArrayMember()) 101 return false; 102 // RD can be struct, union, class, interface or enum. 103 // For now, we only handle struct and class. 104 if (RD->isStruct() || RD->isClass()) 105 return true; 106 } 107 return false; 108 } 109 110 llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { 111 // Handle builtin types. 112 if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) { 113 switch (BTy->getKind()) { 114 // Character types are special and can alias anything. 115 // In C++, this technically only includes "char" and "unsigned char", 116 // and not "signed char". In C, it includes all three. For now, 117 // the risk of exploiting this detail in C++ seems likely to outweigh 118 // the benefit. 119 case BuiltinType::Char_U: 120 case BuiltinType::Char_S: 121 case BuiltinType::UChar: 122 case BuiltinType::SChar: 123 return getChar(); 124 125 // Unsigned types can alias their corresponding signed types. 126 case BuiltinType::UShort: 127 return getTypeInfo(Context.ShortTy); 128 case BuiltinType::UInt: 129 return getTypeInfo(Context.IntTy); 130 case BuiltinType::ULong: 131 return getTypeInfo(Context.LongTy); 132 case BuiltinType::ULongLong: 133 return getTypeInfo(Context.LongLongTy); 134 case BuiltinType::UInt128: 135 return getTypeInfo(Context.Int128Ty); 136 137 // Treat all other builtin types as distinct types. This includes 138 // treating wchar_t, char16_t, and char32_t as distinct from their 139 // "underlying types". 140 default: 141 return createTBAAScalarType(BTy->getName(Features), getChar()); 142 } 143 } 144 145 // C++1z [basic.lval]p10: "If a program attempts to access the stored value of 146 // an object through a glvalue of other than one of the following types the 147 // behavior is undefined: [...] a char, unsigned char, or std::byte type." 148 if (Ty->isStdByteType()) 149 return getChar(); 150 151 // Handle pointers and references. 152 // TODO: Implement C++'s type "similarity" and consider dis-"similar" 153 // pointers distinct. 154 if (Ty->isPointerType() || Ty->isReferenceType()) 155 return createTBAAScalarType("any pointer", getChar()); 156 157 // Enum types are distinct types. In C++ they have "underlying types", 158 // however they aren't related for TBAA. 159 if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) { 160 // In C++ mode, types have linkage, so we can rely on the ODR and 161 // on their mangled names, if they're external. 162 // TODO: Is there a way to get a program-wide unique name for a 163 // decl with local linkage or no linkage? 164 if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible()) 165 return getChar(); 166 167 SmallString<256> OutName; 168 llvm::raw_svector_ostream Out(OutName); 169 MContext.mangleTypeName(QualType(ETy, 0), Out); 170 return createTBAAScalarType(OutName, getChar()); 171 } 172 173 // For now, handle any other kind of type conservatively. 174 return getChar(); 175 } 176 177 llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { 178 // At -O0 or relaxed aliasing, TBAA is not emitted for regular types. 179 if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) 180 return nullptr; 181 182 // If the type has the may_alias attribute (even on a typedef), it is 183 // effectively in the general char alias class. 184 if (TypeHasMayAlias(QTy)) 185 return getChar(); 186 187 // We need this function to not fall back to returning the "omnipotent char" 188 // type node for aggregate and union types. Otherwise, any dereference of an 189 // aggregate will result into the may-alias access descriptor, meaning all 190 // subsequent accesses to direct and indirect members of that aggregate will 191 // be considered may-alias too. 192 // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function. 193 if (isValidBaseType(QTy)) 194 return getBaseTypeInfo(QTy); 195 196 const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 197 if (llvm::MDNode *N = MetadataCache[Ty]) 198 return N; 199 200 // Note that the following helper call is allowed to add new nodes to the 201 // cache, which invalidates all its previously obtained iterators. So we 202 // first generate the node for the type and then add that node to the cache. 203 llvm::MDNode *TypeNode = getTypeInfoHelper(Ty); 204 return MetadataCache[Ty] = TypeNode; 205 } 206 207 TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo() { 208 return TBAAAccessInfo(createTBAAScalarType("vtable pointer", getRoot())); 209 } 210 211 bool 212 CodeGenTBAA::CollectFields(uint64_t BaseOffset, 213 QualType QTy, 214 SmallVectorImpl<llvm::MDBuilder::TBAAStructField> & 215 Fields, 216 bool MayAlias) { 217 /* Things not handled yet include: C++ base classes, bitfields, */ 218 219 if (const RecordType *TTy = QTy->getAs<RecordType>()) { 220 const RecordDecl *RD = TTy->getDecl()->getDefinition(); 221 if (RD->hasFlexibleArrayMember()) 222 return false; 223 224 // TODO: Handle C++ base classes. 225 if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(RD)) 226 if (Decl->bases_begin() != Decl->bases_end()) 227 return false; 228 229 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 230 231 unsigned idx = 0; 232 for (RecordDecl::field_iterator i = RD->field_begin(), 233 e = RD->field_end(); i != e; ++i, ++idx) { 234 uint64_t Offset = BaseOffset + 235 Layout.getFieldOffset(idx) / Context.getCharWidth(); 236 QualType FieldQTy = i->getType(); 237 if (!CollectFields(Offset, FieldQTy, Fields, 238 MayAlias || TypeHasMayAlias(FieldQTy))) 239 return false; 240 } 241 return true; 242 } 243 244 /* Otherwise, treat whatever it is as a field. */ 245 uint64_t Offset = BaseOffset; 246 uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity(); 247 llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); 248 llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType)); 249 Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); 250 return true; 251 } 252 253 llvm::MDNode * 254 CodeGenTBAA::getTBAAStructInfo(QualType QTy) { 255 const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 256 257 if (llvm::MDNode *N = StructMetadataCache[Ty]) 258 return N; 259 260 SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; 261 if (CollectFields(0, QTy, Fields, TypeHasMayAlias(QTy))) 262 return MDHelper.createTBAAStructNode(Fields); 263 264 // For now, handle any other kind of type conservatively. 265 return StructMetadataCache[Ty] = nullptr; 266 } 267 268 llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { 269 if (auto *TTy = dyn_cast<RecordType>(Ty)) { 270 const RecordDecl *RD = TTy->getDecl()->getDefinition(); 271 272 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 273 SmallVector <std::pair<llvm::MDNode*, uint64_t>, 4> Fields; 274 unsigned idx = 0; 275 for (RecordDecl::field_iterator i = RD->field_begin(), 276 e = RD->field_end(); i != e; ++i, ++idx) { 277 QualType FieldQTy = i->getType(); 278 llvm::MDNode *FieldNode = isValidBaseType(FieldQTy) ? 279 getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy); 280 if (!FieldNode) 281 return BaseTypeMetadataCache[Ty] = nullptr; 282 Fields.push_back(std::make_pair( 283 FieldNode, Layout.getFieldOffset(idx) / Context.getCharWidth())); 284 } 285 286 SmallString<256> OutName; 287 if (Features.CPlusPlus) { 288 // Don't use the mangler for C code. 289 llvm::raw_svector_ostream Out(OutName); 290 MContext.mangleTypeName(QualType(Ty, 0), Out); 291 } else { 292 OutName = RD->getName(); 293 } 294 // Create the struct type node with a vector of pairs (offset, type). 295 return MDHelper.createTBAAStructTypeNode(OutName, Fields); 296 } 297 298 return nullptr; 299 } 300 301 llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { 302 if (!isValidBaseType(QTy)) 303 return nullptr; 304 305 const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 306 if (llvm::MDNode *N = BaseTypeMetadataCache[Ty]) 307 return N; 308 309 // Note that the following helper call is allowed to add new nodes to the 310 // cache, which invalidates all its previously obtained iterators. So we 311 // first generate the node for the type and then add that node to the cache. 312 llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); 313 return BaseTypeMetadataCache[Ty] = TypeNode; 314 } 315 316 llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { 317 if (Info.isMayAlias()) 318 Info = TBAAAccessInfo(getChar()); 319 320 if (!Info.AccessType) 321 return nullptr; 322 323 if (!CodeGenOpts.StructPathTBAA) 324 Info = TBAAAccessInfo(Info.AccessType); 325 326 llvm::MDNode *&N = AccessTagMetadataCache[Info]; 327 if (N) 328 return N; 329 330 if (!Info.BaseType) { 331 Info.BaseType = Info.AccessType; 332 assert(!Info.Offset && "Nonzero offset for an access with no base type!"); 333 } 334 return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType, 335 Info.Offset); 336 } 337 338 TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, 339 TBAAAccessInfo TargetInfo) { 340 if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) 341 return TBAAAccessInfo::getMayAliasInfo(); 342 return TargetInfo; 343 } 344 345 TBAAAccessInfo 346 CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, 347 TBAAAccessInfo InfoB) { 348 if (InfoA == InfoB) 349 return InfoA; 350 351 if (!InfoA || !InfoB) 352 return TBAAAccessInfo(); 353 354 if (InfoA.isMayAlias() || InfoB.isMayAlias()) 355 return TBAAAccessInfo::getMayAliasInfo(); 356 357 // TODO: Implement the rest of the logic here. For example, two accesses 358 // with same final access types result in an access to an object of that final 359 // access type regardless of their base types. 360 return TBAAAccessInfo::getMayAliasInfo(); 361 } 362