1 //===--- CodeGenTypes.cpp - TBAA information for LLVM CodeGen -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This is the code that manages TBAA information and defines the TBAA policy
11 // for the optimizer to use. Relevant standards text includes:
12 //
13 //   C99 6.5p7
14 //   C++ [basic.lval] (p10 in n3126, p15 in some earlier versions)
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "CodeGenTBAA.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Mangle.h"
22 #include "clang/AST/RecordLayout.h"
23 #include "clang/Frontend/CodeGenOptions.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/Type.h"
30 using namespace clang;
31 using namespace CodeGen;
32 
33 CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::Module &M,
34                          const CodeGenOptions &CGO,
35                          const LangOptions &Features, MangleContext &MContext)
36   : Context(Ctx), Module(M), CodeGenOpts(CGO),
37     Features(Features), MContext(MContext), MDHelper(M.getContext()),
38     Root(nullptr), Char(nullptr)
39 {}
40 
41 CodeGenTBAA::~CodeGenTBAA() {
42 }
43 
44 llvm::MDNode *CodeGenTBAA::getRoot() {
45   // Define the root of the tree. This identifies the tree, so that
46   // if our LLVM IR is linked with LLVM IR from a different front-end
47   // (or a different version of this front-end), their TBAA trees will
48   // remain distinct, and the optimizer will treat them conservatively.
49   if (!Root) {
50     if (Features.CPlusPlus)
51       Root = MDHelper.createTBAARoot("Simple C++ TBAA");
52     else
53       Root = MDHelper.createTBAARoot("Simple C/C++ TBAA");
54   }
55 
56   return Root;
57 }
58 
59 llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name,
60                                                 llvm::MDNode *Parent,
61                                                 uint64_t Size) {
62   (void)Size; // TODO: Support generation of size-aware type nodes.
63   return MDHelper.createTBAAScalarTypeNode(Name, Parent);
64 }
65 
66 llvm::MDNode *CodeGenTBAA::getChar() {
67   // Define the root of the tree for user-accessible memory. C and C++
68   // give special powers to char and certain similar types. However,
69   // these special powers only cover user-accessible memory, and doesn't
70   // include things like vtables.
71   if (!Char)
72     Char = createScalarTypeNode("omnipotent char", getRoot(), /* Size= */ 1);
73 
74   return Char;
75 }
76 
77 static bool TypeHasMayAlias(QualType QTy) {
78   // Tagged types have declarations, and therefore may have attributes.
79   if (const TagType *TTy = dyn_cast<TagType>(QTy))
80     return TTy->getDecl()->hasAttr<MayAliasAttr>();
81 
82   // Typedef types have declarations, and therefore may have attributes.
83   if (const TypedefType *TTy = dyn_cast<TypedefType>(QTy)) {
84     if (TTy->getDecl()->hasAttr<MayAliasAttr>())
85       return true;
86     // Also, their underlying types may have relevant attributes.
87     return TypeHasMayAlias(TTy->desugar());
88   }
89 
90   return false;
91 }
92 
93 /// Check if the given type is a valid base type to be used in access tags.
94 static bool isValidBaseType(QualType QTy) {
95   if (QTy->isReferenceType())
96     return false;
97   if (const RecordType *TTy = QTy->getAs<RecordType>()) {
98     const RecordDecl *RD = TTy->getDecl()->getDefinition();
99     // Incomplete types are not valid base access types.
100     if (!RD)
101       return false;
102     if (RD->hasFlexibleArrayMember())
103       return false;
104     // RD can be struct, union, class, interface or enum.
105     // For now, we only handle struct and class.
106     if (RD->isStruct() || RD->isClass())
107       return true;
108   }
109   return false;
110 }
111 
112 llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
113   uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
114 
115   // Handle builtin types.
116   if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) {
117     switch (BTy->getKind()) {
118     // Character types are special and can alias anything.
119     // In C++, this technically only includes "char" and "unsigned char",
120     // and not "signed char". In C, it includes all three. For now,
121     // the risk of exploiting this detail in C++ seems likely to outweigh
122     // the benefit.
123     case BuiltinType::Char_U:
124     case BuiltinType::Char_S:
125     case BuiltinType::UChar:
126     case BuiltinType::SChar:
127       return getChar();
128 
129     // Unsigned types can alias their corresponding signed types.
130     case BuiltinType::UShort:
131       return getTypeInfo(Context.ShortTy);
132     case BuiltinType::UInt:
133       return getTypeInfo(Context.IntTy);
134     case BuiltinType::ULong:
135       return getTypeInfo(Context.LongTy);
136     case BuiltinType::ULongLong:
137       return getTypeInfo(Context.LongLongTy);
138     case BuiltinType::UInt128:
139       return getTypeInfo(Context.Int128Ty);
140 
141     // Treat all other builtin types as distinct types. This includes
142     // treating wchar_t, char16_t, and char32_t as distinct from their
143     // "underlying types".
144     default:
145       return createScalarTypeNode(BTy->getName(Features), getChar(), Size);
146     }
147   }
148 
149   // C++1z [basic.lval]p10: "If a program attempts to access the stored value of
150   // an object through a glvalue of other than one of the following types the
151   // behavior is undefined: [...] a char, unsigned char, or std::byte type."
152   if (Ty->isStdByteType())
153     return getChar();
154 
155   // Handle pointers and references.
156   // TODO: Implement C++'s type "similarity" and consider dis-"similar"
157   // pointers distinct.
158   if (Ty->isPointerType() || Ty->isReferenceType())
159     return createScalarTypeNode("any pointer", getChar(), Size);
160 
161   // Enum types are distinct types. In C++ they have "underlying types",
162   // however they aren't related for TBAA.
163   if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) {
164     // In C++ mode, types have linkage, so we can rely on the ODR and
165     // on their mangled names, if they're external.
166     // TODO: Is there a way to get a program-wide unique name for a
167     // decl with local linkage or no linkage?
168     if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible())
169       return getChar();
170 
171     SmallString<256> OutName;
172     llvm::raw_svector_ostream Out(OutName);
173     MContext.mangleTypeName(QualType(ETy, 0), Out);
174     return createScalarTypeNode(OutName, getChar(), Size);
175   }
176 
177   // For now, handle any other kind of type conservatively.
178   return getChar();
179 }
180 
181 llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
182   // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
183   if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
184     return nullptr;
185 
186   // If the type has the may_alias attribute (even on a typedef), it is
187   // effectively in the general char alias class.
188   if (TypeHasMayAlias(QTy))
189     return getChar();
190 
191   // We need this function to not fall back to returning the "omnipotent char"
192   // type node for aggregate and union types. Otherwise, any dereference of an
193   // aggregate will result into the may-alias access descriptor, meaning all
194   // subsequent accesses to direct and indirect members of that aggregate will
195   // be considered may-alias too.
196   // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function.
197   if (isValidBaseType(QTy))
198     return getBaseTypeInfo(QTy);
199 
200   const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
201   if (llvm::MDNode *N = MetadataCache[Ty])
202     return N;
203 
204   // Note that the following helper call is allowed to add new nodes to the
205   // cache, which invalidates all its previously obtained iterators. So we
206   // first generate the node for the type and then add that node to the cache.
207   llvm::MDNode *TypeNode = getTypeInfoHelper(Ty);
208   return MetadataCache[Ty] = TypeNode;
209 }
210 
211 TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
212   llvm::DataLayout DL(&Module);
213   unsigned Size = DL.getPointerTypeSize(VTablePtrType);
214   return TBAAAccessInfo(createScalarTypeNode("vtable pointer", getRoot(), Size),
215                         Size);
216 }
217 
218 bool
219 CodeGenTBAA::CollectFields(uint64_t BaseOffset,
220                            QualType QTy,
221                            SmallVectorImpl<llvm::MDBuilder::TBAAStructField> &
222                              Fields,
223                            bool MayAlias) {
224   /* Things not handled yet include: C++ base classes, bitfields, */
225 
226   if (const RecordType *TTy = QTy->getAs<RecordType>()) {
227     const RecordDecl *RD = TTy->getDecl()->getDefinition();
228     if (RD->hasFlexibleArrayMember())
229       return false;
230 
231     // TODO: Handle C++ base classes.
232     if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(RD))
233       if (Decl->bases_begin() != Decl->bases_end())
234         return false;
235 
236     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
237 
238     unsigned idx = 0;
239     for (RecordDecl::field_iterator i = RD->field_begin(),
240          e = RD->field_end(); i != e; ++i, ++idx) {
241       uint64_t Offset = BaseOffset +
242                         Layout.getFieldOffset(idx) / Context.getCharWidth();
243       QualType FieldQTy = i->getType();
244       if (!CollectFields(Offset, FieldQTy, Fields,
245                          MayAlias || TypeHasMayAlias(FieldQTy)))
246         return false;
247     }
248     return true;
249   }
250 
251   /* Otherwise, treat whatever it is as a field. */
252   uint64_t Offset = BaseOffset;
253   uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity();
254   llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy);
255   llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size));
256   Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
257   return true;
258 }
259 
260 llvm::MDNode *
261 CodeGenTBAA::getTBAAStructInfo(QualType QTy) {
262   const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
263 
264   if (llvm::MDNode *N = StructMetadataCache[Ty])
265     return N;
266 
267   SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields;
268   if (CollectFields(0, QTy, Fields, TypeHasMayAlias(QTy)))
269     return MDHelper.createTBAAStructNode(Fields);
270 
271   // For now, handle any other kind of type conservatively.
272   return StructMetadataCache[Ty] = nullptr;
273 }
274 
275 llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
276   if (auto *TTy = dyn_cast<RecordType>(Ty)) {
277     const RecordDecl *RD = TTy->getDecl()->getDefinition();
278     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
279     SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields;
280     for (FieldDecl *Field : RD->fields()) {
281       QualType FieldQTy = Field->getType();
282       llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ?
283           getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
284       if (!TypeNode)
285         return BaseTypeMetadataCache[Ty] = nullptr;
286 
287       uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex());
288       uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity();
289       uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity();
290       Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size,
291                                                         TypeNode));
292     }
293 
294     SmallString<256> OutName;
295     if (Features.CPlusPlus) {
296       // Don't use the mangler for C code.
297       llvm::raw_svector_ostream Out(OutName);
298       MContext.mangleTypeName(QualType(Ty, 0), Out);
299     } else {
300       OutName = RD->getName();
301     }
302 
303     // TODO: Support size-aware type nodes and create one here for the
304     // given aggregate type.
305 
306     // Create the struct type node with a vector of pairs (offset, type).
307     SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes;
308     for (const auto &Field : Fields)
309         OffsetsAndTypes.push_back(std::make_pair(Field.TBAA, Field.Offset));
310     return MDHelper.createTBAAStructTypeNode(OutName, OffsetsAndTypes);
311   }
312 
313   return nullptr;
314 }
315 
316 llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) {
317   if (!isValidBaseType(QTy))
318     return nullptr;
319 
320   const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
321   if (llvm::MDNode *N = BaseTypeMetadataCache[Ty])
322     return N;
323 
324   // Note that the following helper call is allowed to add new nodes to the
325   // cache, which invalidates all its previously obtained iterators. So we
326   // first generate the node for the type and then add that node to the cache.
327   llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
328   return BaseTypeMetadataCache[Ty] = TypeNode;
329 }
330 
331 llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) {
332   assert(!Info.isIncomplete() && "Access to an object of an incomplete type!");
333 
334   if (Info.isMayAlias())
335     Info = TBAAAccessInfo(getChar(), Info.Size);
336 
337   if (!Info.AccessType)
338     return nullptr;
339 
340   if (!CodeGenOpts.StructPathTBAA)
341     Info = TBAAAccessInfo(Info.AccessType, Info.Size);
342 
343   llvm::MDNode *&N = AccessTagMetadataCache[Info];
344   if (N)
345     return N;
346 
347   if (!Info.BaseType) {
348     Info.BaseType = Info.AccessType;
349     assert(!Info.Offset && "Nonzero offset for an access with no base type!");
350   }
351   return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType,
352                                               Info.Offset);
353 }
354 
355 TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
356                                                  TBAAAccessInfo TargetInfo) {
357   if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias())
358     return TBAAAccessInfo::getMayAliasInfo();
359   return TargetInfo;
360 }
361 
362 TBAAAccessInfo
363 CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
364                                                  TBAAAccessInfo InfoB) {
365   if (InfoA == InfoB)
366     return InfoA;
367 
368   if (!InfoA || !InfoB)
369     return TBAAAccessInfo();
370 
371   if (InfoA.isMayAlias() || InfoB.isMayAlias())
372     return TBAAAccessInfo::getMayAliasInfo();
373 
374   // TODO: Implement the rest of the logic here. For example, two accesses
375   // with same final access types result in an access to an object of that final
376   // access type regardless of their base types.
377   return TBAAAccessInfo::getMayAliasInfo();
378 }
379