1 //== ObjCSelfInitChecker.cpp - Checker for 'self' initialization -*- C++ -*--=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines ObjCSelfInitChecker, a builtin check that checks for uses of
11 // 'self' before proper initialization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 // This checks initialization methods to verify that they assign 'self' to the
16 // result of an initialization call (e.g. [super init], or [self initWith..])
17 // before using 'self' or any instance variable.
18 //
19 // To perform the required checking, values are tagged with flags that indicate
20 // 1) if the object is the one pointed to by 'self', and 2) if the object
21 // is the result of an initializer (e.g. [super init]).
22 //
23 // Uses of an object that is true for 1) but not 2) trigger a diagnostic.
24 // The uses that are currently checked are:
25 //  - Using instance variables.
26 //  - Returning the object.
27 //
28 // Note that we don't check for an invalid 'self' that is the receiver of an
29 // obj-c message expression to cut down false positives where logging functions
30 // get information from self (like its class) or doing "invalidation" on self
31 // when the initialization fails.
32 //
33 // Because the object that 'self' points to gets invalidated when a call
34 // receives a reference to 'self', the checker keeps track and passes the flags
35 // for 1) and 2) to the new object that 'self' points to after the call.
36 //
37 // FIXME (rdar://7937506): In the case of:
38 //   [super init];
39 //   return self;
40 // Have an extra PathDiagnosticPiece in the path that says "called [super init],
41 // but didn't assign the result to self."
42 
43 //===----------------------------------------------------------------------===//
44 
45 // FIXME: Somehow stick the link to Apple's documentation about initializing
46 // objects in the diagnostics.
47 // http://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/ObjectiveC/Articles/ocAllocInit.html
48 
49 #include "ClangSACheckers.h"
50 #include "clang/StaticAnalyzer/Core/Checker.h"
51 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
52 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
53 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
54 #include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h"
55 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
56 #include "clang/AST/ParentMap.h"
57 
58 using namespace clang;
59 using namespace ento;
60 
61 static bool shouldRunOnFunctionOrMethod(const NamedDecl *ND);
62 static bool isInitializationMethod(const ObjCMethodDecl *MD);
63 static bool isInitMessage(const ObjCMessage &msg);
64 static bool isSelfVar(SVal location, CheckerContext &C);
65 
66 namespace {
67 class ObjCSelfInitChecker : public Checker<
68                                              check::PostObjCMessage,
69                                              check::PostStmt<ObjCIvarRefExpr>,
70                                              check::PreStmt<ReturnStmt>,
71                                              check::PreStmt<CallExpr>,
72                                              check::PostStmt<CallExpr>,
73                                              check::Location > {
74 public:
75   void checkPostObjCMessage(ObjCMessage msg, CheckerContext &C) const;
76   void checkPostStmt(const ObjCIvarRefExpr *E, CheckerContext &C) const;
77   void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const;
78   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
79   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
80   void checkLocation(SVal location, bool isLoad, CheckerContext &C) const;
81 };
82 } // end anonymous namespace
83 
84 namespace {
85 
86 class InitSelfBug : public BugType {
87   const std::string desc;
88 public:
89   InitSelfBug() : BugType("missing \"self = [(super or self) init...]\"",
90                           "missing \"self = [(super or self) init...]\"") {}
91 };
92 
93 } // end anonymous namespace
94 
95 namespace {
96 enum SelfFlagEnum {
97   /// \brief No flag set.
98   SelfFlag_None = 0x0,
99   /// \brief Value came from 'self'.
100   SelfFlag_Self    = 0x1,
101   /// \brief Value came from the result of an initializer (e.g. [super init]).
102   SelfFlag_InitRes = 0x2
103 };
104 }
105 
106 typedef llvm::ImmutableMap<SymbolRef, unsigned> SelfFlag;
107 namespace { struct CalledInit {}; }
108 namespace { struct PreCallSelfFlags {}; }
109 
110 namespace clang {
111 namespace ento {
112   template<>
113   struct ProgramStateTrait<SelfFlag> : public ProgramStatePartialTrait<SelfFlag> {
114     static void *GDMIndex() { static int index = 0; return &index; }
115   };
116   template <>
117   struct ProgramStateTrait<CalledInit> : public ProgramStatePartialTrait<bool> {
118     static void *GDMIndex() { static int index = 0; return &index; }
119   };
120 
121   /// \brief A call receiving a reference to 'self' invalidates the object that
122   /// 'self' contains. This keeps the "self flags" assigned to the 'self'
123   /// object before the call so we can assign them to the new object that 'self'
124   /// points to after the call.
125   template <>
126   struct ProgramStateTrait<PreCallSelfFlags> : public ProgramStatePartialTrait<unsigned> {
127     static void *GDMIndex() { static int index = 0; return &index; }
128   };
129 }
130 }
131 
132 static SelfFlagEnum getSelfFlags(SVal val, const ProgramState *state) {
133   if (SymbolRef sym = val.getAsSymbol())
134     if (const unsigned *attachedFlags = state->get<SelfFlag>(sym))
135       return (SelfFlagEnum)*attachedFlags;
136   return SelfFlag_None;
137 }
138 
139 static SelfFlagEnum getSelfFlags(SVal val, CheckerContext &C) {
140   return getSelfFlags(val, C.getState());
141 }
142 
143 static void addSelfFlag(const ProgramState *state, SVal val,
144                         SelfFlagEnum flag, CheckerContext &C) {
145   // We tag the symbol that the SVal wraps.
146   if (SymbolRef sym = val.getAsSymbol())
147     C.addTransition(state->set<SelfFlag>(sym, getSelfFlags(val, C) | flag));
148 }
149 
150 static bool hasSelfFlag(SVal val, SelfFlagEnum flag, CheckerContext &C) {
151   return getSelfFlags(val, C) & flag;
152 }
153 
154 /// \brief Returns true of the value of the expression is the object that 'self'
155 /// points to and is an object that did not come from the result of calling
156 /// an initializer.
157 static bool isInvalidSelf(const Expr *E, CheckerContext &C) {
158   SVal exprVal = C.getState()->getSVal(E);
159   if (!hasSelfFlag(exprVal, SelfFlag_Self, C))
160     return false; // value did not come from 'self'.
161   if (hasSelfFlag(exprVal, SelfFlag_InitRes, C))
162     return false; // 'self' is properly initialized.
163 
164   return true;
165 }
166 
167 static void checkForInvalidSelf(const Expr *E, CheckerContext &C,
168                                 const char *errorStr) {
169   if (!E)
170     return;
171 
172   if (!C.getState()->get<CalledInit>())
173     return;
174 
175   if (!isInvalidSelf(E, C))
176     return;
177 
178   // Generate an error node.
179   ExplodedNode *N = C.generateSink();
180   if (!N)
181     return;
182 
183   BugReport *report =
184     new BugReport(*new InitSelfBug(), errorStr, N);
185   C.EmitReport(report);
186 }
187 
188 void ObjCSelfInitChecker::checkPostObjCMessage(ObjCMessage msg,
189                                                CheckerContext &C) const {
190   // When encountering a message that does initialization (init rule),
191   // tag the return value so that we know later on that if self has this value
192   // then it is properly initialized.
193 
194   // FIXME: A callback should disable checkers at the start of functions.
195   if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>(
196                                      C.getCurrentAnalysisContext()->getDecl())))
197     return;
198 
199   if (isInitMessage(msg)) {
200     // Tag the return value as the result of an initializer.
201     const ProgramState *state = C.getState();
202 
203     // FIXME this really should be context sensitive, where we record
204     // the current stack frame (for IPA).  Also, we need to clean this
205     // value out when we return from this method.
206     state = state->set<CalledInit>(true);
207 
208     SVal V = state->getSVal(msg.getOriginExpr());
209     addSelfFlag(state, V, SelfFlag_InitRes, C);
210     return;
211   }
212 
213   // We don't check for an invalid 'self' in an obj-c message expression to cut
214   // down false positives where logging functions get information from self
215   // (like its class) or doing "invalidation" on self when the initialization
216   // fails.
217 }
218 
219 void ObjCSelfInitChecker::checkPostStmt(const ObjCIvarRefExpr *E,
220                                         CheckerContext &C) const {
221   // FIXME: A callback should disable checkers at the start of functions.
222   if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>(
223                                      C.getCurrentAnalysisContext()->getDecl())))
224     return;
225 
226   checkForInvalidSelf(E->getBase(), C,
227     "Instance variable used while 'self' is not set to the result of "
228                                                  "'[(super or self) init...]'");
229 }
230 
231 void ObjCSelfInitChecker::checkPreStmt(const ReturnStmt *S,
232                                        CheckerContext &C) const {
233   // FIXME: A callback should disable checkers at the start of functions.
234   if (!shouldRunOnFunctionOrMethod(dyn_cast<NamedDecl>(
235                                      C.getCurrentAnalysisContext()->getDecl())))
236     return;
237 
238   checkForInvalidSelf(S->getRetValue(), C,
239     "Returning 'self' while it is not set to the result of "
240                                                  "'[(super or self) init...]'");
241 }
242 
243 // When a call receives a reference to 'self', [Pre/Post]VisitGenericCall pass
244 // the SelfFlags from the object 'self' point to before the call, to the new
245 // object after the call. This is to avoid invalidation of 'self' by logging
246 // functions.
247 // Another common pattern in classes with multiple initializers is to put the
248 // subclass's common initialization bits into a static function that receives
249 // the value of 'self', e.g:
250 // @code
251 //   if (!(self = [super init]))
252 //     return nil;
253 //   if (!(self = _commonInit(self)))
254 //     return nil;
255 // @endcode
256 // Until we can use inter-procedural analysis, in such a call, transfer the
257 // SelfFlags to the result of the call.
258 
259 void ObjCSelfInitChecker::checkPreStmt(const CallExpr *CE,
260                                        CheckerContext &C) const {
261   const ProgramState *state = C.getState();
262   for (CallExpr::const_arg_iterator
263          I = CE->arg_begin(), E = CE->arg_end(); I != E; ++I) {
264     SVal argV = state->getSVal(*I);
265     if (isSelfVar(argV, C)) {
266       unsigned selfFlags = getSelfFlags(state->getSVal(cast<Loc>(argV)), C);
267       C.addTransition(state->set<PreCallSelfFlags>(selfFlags));
268       return;
269     } else if (hasSelfFlag(argV, SelfFlag_Self, C)) {
270       unsigned selfFlags = getSelfFlags(argV, C);
271       C.addTransition(state->set<PreCallSelfFlags>(selfFlags));
272       return;
273     }
274   }
275 }
276 
277 void ObjCSelfInitChecker::checkPostStmt(const CallExpr *CE,
278                                         CheckerContext &C) const {
279   const ProgramState *state = C.getState();
280   for (CallExpr::const_arg_iterator
281          I = CE->arg_begin(), E = CE->arg_end(); I != E; ++I) {
282     SVal argV = state->getSVal(*I);
283     if (isSelfVar(argV, C)) {
284       SelfFlagEnum prevFlags = (SelfFlagEnum)state->get<PreCallSelfFlags>();
285       state = state->remove<PreCallSelfFlags>();
286       addSelfFlag(state, state->getSVal(cast<Loc>(argV)), prevFlags, C);
287       return;
288     } else if (hasSelfFlag(argV, SelfFlag_Self, C)) {
289       SelfFlagEnum prevFlags = (SelfFlagEnum)state->get<PreCallSelfFlags>();
290       state = state->remove<PreCallSelfFlags>();
291       addSelfFlag(state, state->getSVal(CE), prevFlags, C);
292       return;
293     }
294   }
295 }
296 
297 void ObjCSelfInitChecker::checkLocation(SVal location, bool isLoad,
298                                         CheckerContext &C) const {
299   // Tag the result of a load from 'self' so that we can easily know that the
300   // value is the object that 'self' points to.
301   const ProgramState *state = C.getState();
302   if (isSelfVar(location, C))
303     addSelfFlag(state, state->getSVal(cast<Loc>(location)), SelfFlag_Self, C);
304 }
305 
306 // FIXME: A callback should disable checkers at the start of functions.
307 static bool shouldRunOnFunctionOrMethod(const NamedDecl *ND) {
308   if (!ND)
309     return false;
310 
311   const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(ND);
312   if (!MD)
313     return false;
314   if (!isInitializationMethod(MD))
315     return false;
316 
317   // self = [super init] applies only to NSObject subclasses.
318   // For instance, NSProxy doesn't implement -init.
319   ASTContext &Ctx = MD->getASTContext();
320   IdentifierInfo* NSObjectII = &Ctx.Idents.get("NSObject");
321   ObjCInterfaceDecl *ID = MD->getClassInterface()->getSuperClass();
322   for ( ; ID ; ID = ID->getSuperClass()) {
323     IdentifierInfo *II = ID->getIdentifier();
324 
325     if (II == NSObjectII)
326       break;
327   }
328   if (!ID)
329     return false;
330 
331   return true;
332 }
333 
334 /// \brief Returns true if the location is 'self'.
335 static bool isSelfVar(SVal location, CheckerContext &C) {
336   AnalysisContext *analCtx = C.getCurrentAnalysisContext();
337   if (!analCtx->getSelfDecl())
338     return false;
339   if (!isa<loc::MemRegionVal>(location))
340     return false;
341 
342   loc::MemRegionVal MRV = cast<loc::MemRegionVal>(location);
343   if (const DeclRegion *DR = dyn_cast<DeclRegion>(MRV.getRegion()))
344     return (DR->getDecl() == analCtx->getSelfDecl());
345 
346   return false;
347 }
348 
349 static bool isInitializationMethod(const ObjCMethodDecl *MD) {
350   return MD->getMethodFamily() == OMF_init;
351 }
352 
353 static bool isInitMessage(const ObjCMessage &msg) {
354   return msg.getMethodFamily() == OMF_init;
355 }
356 
357 //===----------------------------------------------------------------------===//
358 // Registration.
359 //===----------------------------------------------------------------------===//
360 
361 void ento::registerObjCSelfInitChecker(CheckerManager &mgr) {
362   mgr.registerChecker<ObjCSelfInitChecker>();
363 }
364