1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines RangeConstraintManager, a class that tracks simple
10 // equality and inequality constraints on symbolic values of ProgramState.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/Basic/JsonSupport.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
20 #include "llvm/ADT/FoldingSet.h"
21 #include "llvm/ADT/ImmutableSet.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <algorithm>
28 #include <iterator>
29
30 using namespace clang;
31 using namespace ento;
32
33 // This class can be extended with other tables which will help to reason
34 // about ranges more precisely.
35 class OperatorRelationsTable {
36 static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
37 BO_GE < BO_EQ && BO_EQ < BO_NE,
38 "This class relies on operators order. Rework it otherwise.");
39
40 public:
41 enum TriStateKind {
42 False = 0,
43 True,
44 Unknown,
45 };
46
47 private:
48 // CmpOpTable holds states which represent the corresponding range for
49 // branching an exploded graph. We can reason about the branch if there is
50 // a previously known fact of the existence of a comparison expression with
51 // operands used in the current expression.
52 // E.g. assuming (x < y) is true that means (x != y) is surely true.
53 // if (x previous_operation y) // < | != | >
54 // if (x operation y) // != | > | <
55 // tristate // True | Unknown | False
56 //
57 // CmpOpTable represents next:
58 // __|< |> |<=|>=|==|!=|UnknownX2|
59 // < |1 |0 |* |0 |0 |* |1 |
60 // > |0 |1 |0 |* |0 |* |1 |
61 // <=|1 |0 |1 |* |1 |* |0 |
62 // >=|0 |1 |* |1 |1 |* |0 |
63 // ==|0 |0 |* |* |1 |0 |1 |
64 // !=|1 |1 |* |* |0 |1 |0 |
65 //
66 // Columns stands for a previous operator.
67 // Rows stands for a current operator.
68 // Each row has exactly two `Unknown` cases.
69 // UnknownX2 means that both `Unknown` previous operators are met in code,
70 // and there is a special column for that, for example:
71 // if (x >= y)
72 // if (x != y)
73 // if (x <= y)
74 // False only
75 static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
76 const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
77 // < > <= >= == != UnknownX2
78 {True, False, Unknown, False, False, Unknown, True}, // <
79 {False, True, False, Unknown, False, Unknown, True}, // >
80 {True, False, True, Unknown, True, Unknown, False}, // <=
81 {False, True, Unknown, True, True, Unknown, False}, // >=
82 {False, False, Unknown, Unknown, True, False, True}, // ==
83 {True, True, Unknown, Unknown, False, True, False}, // !=
84 };
85
getIndexFromOp(BinaryOperatorKind OP)86 static size_t getIndexFromOp(BinaryOperatorKind OP) {
87 return static_cast<size_t>(OP - BO_LT);
88 }
89
90 public:
getCmpOpCount() const91 constexpr size_t getCmpOpCount() const { return CmpOpCount; }
92
getOpFromIndex(size_t Index)93 static BinaryOperatorKind getOpFromIndex(size_t Index) {
94 return static_cast<BinaryOperatorKind>(Index + BO_LT);
95 }
96
getCmpOpState(BinaryOperatorKind CurrentOP,BinaryOperatorKind QueriedOP) const97 TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
98 BinaryOperatorKind QueriedOP) const {
99 return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
100 }
101
getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const102 TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
103 return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
104 }
105 };
106
107 //===----------------------------------------------------------------------===//
108 // RangeSet implementation
109 //===----------------------------------------------------------------------===//
110
111 RangeSet::ContainerType RangeSet::Factory::EmptySet{};
112
add(RangeSet LHS,RangeSet RHS)113 RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
114 ContainerType Result;
115 Result.reserve(LHS.size() + RHS.size());
116 std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
117 std::back_inserter(Result));
118 return makePersistent(std::move(Result));
119 }
120
add(RangeSet Original,Range Element)121 RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
122 ContainerType Result;
123 Result.reserve(Original.size() + 1);
124
125 const_iterator Lower = llvm::lower_bound(Original, Element);
126 Result.insert(Result.end(), Original.begin(), Lower);
127 Result.push_back(Element);
128 Result.insert(Result.end(), Lower, Original.end());
129
130 return makePersistent(std::move(Result));
131 }
132
add(RangeSet Original,const llvm::APSInt & Point)133 RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
134 return add(Original, Range(Point));
135 }
136
unite(RangeSet LHS,RangeSet RHS)137 RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
138 ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
139 return makePersistent(std::move(Result));
140 }
141
unite(RangeSet Original,Range R)142 RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
143 ContainerType Result;
144 Result.push_back(R);
145 Result = unite(*Original.Impl, Result);
146 return makePersistent(std::move(Result));
147 }
148
unite(RangeSet Original,llvm::APSInt Point)149 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
150 return unite(Original, Range(ValueFactory.getValue(Point)));
151 }
152
unite(RangeSet Original,llvm::APSInt From,llvm::APSInt To)153 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
154 llvm::APSInt To) {
155 return unite(Original,
156 Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
157 }
158
159 template <typename T>
swapIterators(T & First,T & FirstEnd,T & Second,T & SecondEnd)160 void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
161 std::swap(First, Second);
162 std::swap(FirstEnd, SecondEnd);
163 }
164
unite(const ContainerType & LHS,const ContainerType & RHS)165 RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
166 const ContainerType &RHS) {
167 if (LHS.empty())
168 return RHS;
169 if (RHS.empty())
170 return LHS;
171
172 using llvm::APSInt;
173 using iterator = ContainerType::const_iterator;
174
175 iterator First = LHS.begin();
176 iterator FirstEnd = LHS.end();
177 iterator Second = RHS.begin();
178 iterator SecondEnd = RHS.end();
179 APSIntType Ty = APSIntType(First->From());
180 const APSInt Min = Ty.getMinValue();
181
182 // Handle a corner case first when both range sets start from MIN.
183 // This helps to avoid complicated conditions below. Specifically, this
184 // particular check for `MIN` is not needed in the loop below every time
185 // when we do `Second->From() - One` operation.
186 if (Min == First->From() && Min == Second->From()) {
187 if (First->To() > Second->To()) {
188 // [ First ]--->
189 // [ Second ]----->
190 // MIN^
191 // The Second range is entirely inside the First one.
192
193 // Check if Second is the last in its RangeSet.
194 if (++Second == SecondEnd)
195 // [ First ]--[ First + 1 ]--->
196 // [ Second ]--------------------->
197 // MIN^
198 // The Union is equal to First's RangeSet.
199 return LHS;
200 } else {
201 // case 1: [ First ]----->
202 // case 2: [ First ]--->
203 // [ Second ]--->
204 // MIN^
205 // The First range is entirely inside or equal to the Second one.
206
207 // Check if First is the last in its RangeSet.
208 if (++First == FirstEnd)
209 // [ First ]----------------------->
210 // [ Second ]--[ Second + 1 ]---->
211 // MIN^
212 // The Union is equal to Second's RangeSet.
213 return RHS;
214 }
215 }
216
217 const APSInt One = Ty.getValue(1);
218 ContainerType Result;
219
220 // This is called when there are no ranges left in one of the ranges.
221 // Append the rest of the ranges from another range set to the Result
222 // and return with that.
223 const auto AppendTheRest = [&Result](iterator I, iterator E) {
224 Result.append(I, E);
225 return Result;
226 };
227
228 while (true) {
229 // We want to keep the following invariant at all times:
230 // ---[ First ------>
231 // -----[ Second --->
232 if (First->From() > Second->From())
233 swapIterators(First, FirstEnd, Second, SecondEnd);
234
235 // The Union definitely starts with First->From().
236 // ----------[ First ------>
237 // ------------[ Second --->
238 // ----------[ Union ------>
239 // UnionStart^
240 const llvm::APSInt &UnionStart = First->From();
241
242 // Loop where the invariant holds.
243 while (true) {
244 // Skip all enclosed ranges.
245 // ---[ First ]--->
246 // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
247 while (First->To() >= Second->To()) {
248 // Check if Second is the last in its RangeSet.
249 if (++Second == SecondEnd) {
250 // Append the Union.
251 // ---[ Union ]--->
252 // -----[ Second ]----->
253 // --------[ First ]--->
254 // UnionEnd^
255 Result.emplace_back(UnionStart, First->To());
256 // ---[ Union ]----------------->
257 // --------------[ First + 1]--->
258 // Append all remaining ranges from the First's RangeSet.
259 return AppendTheRest(++First, FirstEnd);
260 }
261 }
262
263 // Check if First and Second are disjoint. It means that we find
264 // the end of the Union. Exit the loop and append the Union.
265 // ---[ First ]=------------->
266 // ------------=[ Second ]--->
267 // ----MinusOne^
268 if (First->To() < Second->From() - One)
269 break;
270
271 // First is entirely inside the Union. Go next.
272 // ---[ Union ----------->
273 // ---- [ First ]-------->
274 // -------[ Second ]----->
275 // Check if First is the last in its RangeSet.
276 if (++First == FirstEnd) {
277 // Append the Union.
278 // ---[ Union ]--->
279 // -----[ First ]------->
280 // --------[ Second ]--->
281 // UnionEnd^
282 Result.emplace_back(UnionStart, Second->To());
283 // ---[ Union ]------------------>
284 // --------------[ Second + 1]--->
285 // Append all remaining ranges from the Second's RangeSet.
286 return AppendTheRest(++Second, SecondEnd);
287 }
288
289 // We know that we are at one of the two cases:
290 // case 1: --[ First ]--------->
291 // case 2: ----[ First ]------->
292 // --------[ Second ]---------->
293 // In both cases First starts after Second->From().
294 // Make sure that the loop invariant holds.
295 swapIterators(First, FirstEnd, Second, SecondEnd);
296 }
297
298 // Here First and Second are disjoint.
299 // Append the Union.
300 // ---[ Union ]--------------->
301 // -----------------[ Second ]--->
302 // ------[ First ]--------------->
303 // UnionEnd^
304 Result.emplace_back(UnionStart, First->To());
305
306 // Check if First is the last in its RangeSet.
307 if (++First == FirstEnd)
308 // ---[ Union ]--------------->
309 // --------------[ Second ]--->
310 // Append all remaining ranges from the Second's RangeSet.
311 return AppendTheRest(Second, SecondEnd);
312 }
313
314 llvm_unreachable("Normally, we should not reach here");
315 }
316
getRangeSet(Range From)317 RangeSet RangeSet::Factory::getRangeSet(Range From) {
318 ContainerType Result;
319 Result.push_back(From);
320 return makePersistent(std::move(Result));
321 }
322
makePersistent(ContainerType && From)323 RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
324 llvm::FoldingSetNodeID ID;
325 void *InsertPos;
326
327 From.Profile(ID);
328 ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
329
330 if (!Result) {
331 // It is cheaper to fully construct the resulting range on stack
332 // and move it to the freshly allocated buffer if we don't have
333 // a set like this already.
334 Result = construct(std::move(From));
335 Cache.InsertNode(Result, InsertPos);
336 }
337
338 return Result;
339 }
340
construct(ContainerType && From)341 RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
342 void *Buffer = Arena.Allocate();
343 return new (Buffer) ContainerType(std::move(From));
344 }
345
getMinValue() const346 const llvm::APSInt &RangeSet::getMinValue() const {
347 assert(!isEmpty());
348 return begin()->From();
349 }
350
getMaxValue() const351 const llvm::APSInt &RangeSet::getMaxValue() const {
352 assert(!isEmpty());
353 return std::prev(end())->To();
354 }
355
isUnsigned() const356 bool clang::ento::RangeSet::isUnsigned() const {
357 assert(!isEmpty());
358 return begin()->From().isUnsigned();
359 }
360
getBitWidth() const361 uint32_t clang::ento::RangeSet::getBitWidth() const {
362 assert(!isEmpty());
363 return begin()->From().getBitWidth();
364 }
365
getAPSIntType() const366 APSIntType clang::ento::RangeSet::getAPSIntType() const {
367 assert(!isEmpty());
368 return APSIntType(begin()->From());
369 }
370
containsImpl(llvm::APSInt & Point) const371 bool RangeSet::containsImpl(llvm::APSInt &Point) const {
372 if (isEmpty() || !pin(Point))
373 return false;
374
375 Range Dummy(Point);
376 const_iterator It = llvm::upper_bound(*this, Dummy);
377 if (It == begin())
378 return false;
379
380 return std::prev(It)->Includes(Point);
381 }
382
pin(llvm::APSInt & Point) const383 bool RangeSet::pin(llvm::APSInt &Point) const {
384 APSIntType Type(getMinValue());
385 if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
386 return false;
387
388 Type.apply(Point);
389 return true;
390 }
391
pin(llvm::APSInt & Lower,llvm::APSInt & Upper) const392 bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
393 // This function has nine cases, the cartesian product of range-testing
394 // both the upper and lower bounds against the symbol's type.
395 // Each case requires a different pinning operation.
396 // The function returns false if the described range is entirely outside
397 // the range of values for the associated symbol.
398 APSIntType Type(getMinValue());
399 APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
400 APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
401
402 switch (LowerTest) {
403 case APSIntType::RTR_Below:
404 switch (UpperTest) {
405 case APSIntType::RTR_Below:
406 // The entire range is outside the symbol's set of possible values.
407 // If this is a conventionally-ordered range, the state is infeasible.
408 if (Lower <= Upper)
409 return false;
410
411 // However, if the range wraps around, it spans all possible values.
412 Lower = Type.getMinValue();
413 Upper = Type.getMaxValue();
414 break;
415 case APSIntType::RTR_Within:
416 // The range starts below what's possible but ends within it. Pin.
417 Lower = Type.getMinValue();
418 Type.apply(Upper);
419 break;
420 case APSIntType::RTR_Above:
421 // The range spans all possible values for the symbol. Pin.
422 Lower = Type.getMinValue();
423 Upper = Type.getMaxValue();
424 break;
425 }
426 break;
427 case APSIntType::RTR_Within:
428 switch (UpperTest) {
429 case APSIntType::RTR_Below:
430 // The range wraps around, but all lower values are not possible.
431 Type.apply(Lower);
432 Upper = Type.getMaxValue();
433 break;
434 case APSIntType::RTR_Within:
435 // The range may or may not wrap around, but both limits are valid.
436 Type.apply(Lower);
437 Type.apply(Upper);
438 break;
439 case APSIntType::RTR_Above:
440 // The range starts within what's possible but ends above it. Pin.
441 Type.apply(Lower);
442 Upper = Type.getMaxValue();
443 break;
444 }
445 break;
446 case APSIntType::RTR_Above:
447 switch (UpperTest) {
448 case APSIntType::RTR_Below:
449 // The range wraps but is outside the symbol's set of possible values.
450 return false;
451 case APSIntType::RTR_Within:
452 // The range starts above what's possible but ends within it (wrap).
453 Lower = Type.getMinValue();
454 Type.apply(Upper);
455 break;
456 case APSIntType::RTR_Above:
457 // The entire range is outside the symbol's set of possible values.
458 // If this is a conventionally-ordered range, the state is infeasible.
459 if (Lower <= Upper)
460 return false;
461
462 // However, if the range wraps around, it spans all possible values.
463 Lower = Type.getMinValue();
464 Upper = Type.getMaxValue();
465 break;
466 }
467 break;
468 }
469
470 return true;
471 }
472
intersect(RangeSet What,llvm::APSInt Lower,llvm::APSInt Upper)473 RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
474 llvm::APSInt Upper) {
475 if (What.isEmpty() || !What.pin(Lower, Upper))
476 return getEmptySet();
477
478 ContainerType DummyContainer;
479
480 if (Lower <= Upper) {
481 // [Lower, Upper] is a regular range.
482 //
483 // Shortcut: check that there is even a possibility of the intersection
484 // by checking the two following situations:
485 //
486 // <---[ What ]---[------]------>
487 // Lower Upper
488 // -or-
489 // <----[------]----[ What ]---->
490 // Lower Upper
491 if (What.getMaxValue() < Lower || Upper < What.getMinValue())
492 return getEmptySet();
493
494 DummyContainer.push_back(
495 Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
496 } else {
497 // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
498 //
499 // Shortcut: check that there is even a possibility of the intersection
500 // by checking the following situation:
501 //
502 // <------]---[ What ]---[------>
503 // Upper Lower
504 if (What.getMaxValue() < Lower && Upper < What.getMinValue())
505 return getEmptySet();
506
507 DummyContainer.push_back(
508 Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
509 DummyContainer.push_back(
510 Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
511 }
512
513 return intersect(*What.Impl, DummyContainer);
514 }
515
intersect(const RangeSet::ContainerType & LHS,const RangeSet::ContainerType & RHS)516 RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
517 const RangeSet::ContainerType &RHS) {
518 ContainerType Result;
519 Result.reserve(std::max(LHS.size(), RHS.size()));
520
521 const_iterator First = LHS.begin(), Second = RHS.begin(),
522 FirstEnd = LHS.end(), SecondEnd = RHS.end();
523
524 // If we ran out of ranges in one set, but not in the other,
525 // it means that those elements are definitely not in the
526 // intersection.
527 while (First != FirstEnd && Second != SecondEnd) {
528 // We want to keep the following invariant at all times:
529 //
530 // ----[ First ---------------------->
531 // --------[ Second ----------------->
532 if (Second->From() < First->From())
533 swapIterators(First, FirstEnd, Second, SecondEnd);
534
535 // Loop where the invariant holds:
536 do {
537 // Check for the following situation:
538 //
539 // ----[ First ]--------------------->
540 // ---------------[ Second ]--------->
541 //
542 // which means that...
543 if (Second->From() > First->To()) {
544 // ...First is not in the intersection.
545 //
546 // We should move on to the next range after First and break out of the
547 // loop because the invariant might not be true.
548 ++First;
549 break;
550 }
551
552 // We have a guaranteed intersection at this point!
553 // And this is the current situation:
554 //
555 // ----[ First ]----------------->
556 // -------[ Second ------------------>
557 //
558 // Additionally, it definitely starts with Second->From().
559 const llvm::APSInt &IntersectionStart = Second->From();
560
561 // It is important to know which of the two ranges' ends
562 // is greater. That "longer" range might have some other
563 // intersections, while the "shorter" range might not.
564 if (Second->To() > First->To()) {
565 // Here we make a decision to keep First as the "longer"
566 // range.
567 swapIterators(First, FirstEnd, Second, SecondEnd);
568 }
569
570 // At this point, we have the following situation:
571 //
572 // ---- First ]-------------------->
573 // ---- Second ]--[ Second+1 ---------->
574 //
575 // We don't know the relationship between First->From and
576 // Second->From and we don't know whether Second+1 intersects
577 // with First.
578 //
579 // However, we know that [IntersectionStart, Second->To] is
580 // a part of the intersection...
581 Result.push_back(Range(IntersectionStart, Second->To()));
582 ++Second;
583 // ...and that the invariant will hold for a valid Second+1
584 // because First->From <= Second->To < (Second+1)->From.
585 } while (Second != SecondEnd);
586 }
587
588 if (Result.empty())
589 return getEmptySet();
590
591 return makePersistent(std::move(Result));
592 }
593
intersect(RangeSet LHS,RangeSet RHS)594 RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
595 // Shortcut: let's see if the intersection is even possible.
596 if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
597 RHS.getMaxValue() < LHS.getMinValue())
598 return getEmptySet();
599
600 return intersect(*LHS.Impl, *RHS.Impl);
601 }
602
intersect(RangeSet LHS,llvm::APSInt Point)603 RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
604 if (LHS.containsImpl(Point))
605 return getRangeSet(ValueFactory.getValue(Point));
606
607 return getEmptySet();
608 }
609
negate(RangeSet What)610 RangeSet RangeSet::Factory::negate(RangeSet What) {
611 if (What.isEmpty())
612 return getEmptySet();
613
614 const llvm::APSInt SampleValue = What.getMinValue();
615 const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
616 const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
617
618 ContainerType Result;
619 Result.reserve(What.size() + (SampleValue == MIN));
620
621 // Handle a special case for MIN value.
622 const_iterator It = What.begin();
623 const_iterator End = What.end();
624
625 const llvm::APSInt &From = It->From();
626 const llvm::APSInt &To = It->To();
627
628 if (From == MIN) {
629 // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
630 if (To == MAX) {
631 return What;
632 }
633
634 const_iterator Last = std::prev(End);
635
636 // Try to find and unite the following ranges:
637 // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
638 if (Last->To() == MAX) {
639 // It means that in the original range we have ranges
640 // [MIN, A], ... , [B, MAX]
641 // And the result should be [MIN, -B], ..., [-A, MAX]
642 Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
643 // We already negated Last, so we can skip it.
644 End = Last;
645 } else {
646 // Add a separate range for the lowest value.
647 Result.emplace_back(MIN, MIN);
648 }
649
650 // Skip adding the second range in case when [From, To] are [MIN, MIN].
651 if (To != MIN) {
652 Result.emplace_back(ValueFactory.getValue(-To), MAX);
653 }
654
655 // Skip the first range in the loop.
656 ++It;
657 }
658
659 // Negate all other ranges.
660 for (; It != End; ++It) {
661 // Negate int values.
662 const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
663 const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
664
665 // Add a negated range.
666 Result.emplace_back(NewFrom, NewTo);
667 }
668
669 llvm::sort(Result);
670 return makePersistent(std::move(Result));
671 }
672
673 // Convert range set to the given integral type using truncation and promotion.
674 // This works similar to APSIntType::apply function but for the range set.
castTo(RangeSet What,APSIntType Ty)675 RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
676 // Set is empty or NOOP (aka cast to the same type).
677 if (What.isEmpty() || What.getAPSIntType() == Ty)
678 return What;
679
680 const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
681 const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
682 const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
683
684 if (IsTruncation)
685 return makePersistent(truncateTo(What, Ty));
686
687 // Here we handle 2 cases:
688 // - IsConversion && !IsPromotion.
689 // In this case we handle changing a sign with same bitwidth: char -> uchar,
690 // uint -> int. Here we convert negatives to positives and positives which
691 // is out of range to negatives. We use convertTo function for that.
692 // - IsConversion && IsPromotion && !What.isUnsigned().
693 // In this case we handle changing a sign from signeds to unsigneds with
694 // higher bitwidth: char -> uint, int-> uint64. The point is that we also
695 // need convert negatives to positives and use convertTo function as well.
696 // For example, we don't need such a convertion when converting unsigned to
697 // signed with higher bitwidth, because all the values of unsigned is valid
698 // for the such signed.
699 if (IsConversion && (!IsPromotion || !What.isUnsigned()))
700 return makePersistent(convertTo(What, Ty));
701
702 assert(IsPromotion && "Only promotion operation from unsigneds left.");
703 return makePersistent(promoteTo(What, Ty));
704 }
705
castTo(RangeSet What,QualType T)706 RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
707 assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
708 return castTo(What, ValueFactory.getAPSIntType(T));
709 }
710
truncateTo(RangeSet What,APSIntType Ty)711 RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
712 APSIntType Ty) {
713 using llvm::APInt;
714 using llvm::APSInt;
715 ContainerType Result;
716 ContainerType Dummy;
717 // CastRangeSize is an amount of all possible values of cast type.
718 // Example: `char` has 256 values; `short` has 65536 values.
719 // But in fact we use `amount of values` - 1, because
720 // we can't keep `amount of values of UINT64` inside uint64_t.
721 // E.g. 256 is an amount of all possible values of `char` and we can't keep
722 // it inside `char`.
723 // And it's OK, it's enough to do correct calculations.
724 uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
725 for (const Range &R : What) {
726 // Get bounds of the given range.
727 APSInt FromInt = R.From();
728 APSInt ToInt = R.To();
729 // CurrentRangeSize is an amount of all possible values of the current
730 // range minus one.
731 uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
732 // This is an optimization for a specific case when this Range covers
733 // the whole range of the target type.
734 Dummy.clear();
735 if (CurrentRangeSize >= CastRangeSize) {
736 Dummy.emplace_back(ValueFactory.getMinValue(Ty),
737 ValueFactory.getMaxValue(Ty));
738 Result = std::move(Dummy);
739 break;
740 }
741 // Cast the bounds.
742 Ty.apply(FromInt);
743 Ty.apply(ToInt);
744 const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
745 const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
746 if (FromInt > ToInt) {
747 Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
748 Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
749 } else
750 Dummy.emplace_back(PersistentFrom, PersistentTo);
751 // Every range retrieved after truncation potentialy has garbage values.
752 // So, we have to unite every next range with the previouses.
753 Result = unite(Result, Dummy);
754 }
755
756 return Result;
757 }
758
759 // Divide the convertion into two phases (presented as loops here).
760 // First phase(loop) works when casted values go in ascending order.
761 // E.g. char{1,3,5,127} -> uint{1,3,5,127}
762 // Interrupt the first phase and go to second one when casted values start
763 // go in descending order. That means that we crossed over the middle of
764 // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
765 // For instance:
766 // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
767 // Here we put {1,3,5} to one array and {-128, -1} to another
768 // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
769 // Here we put {128,129,255} to one array and {0,1,3} to another.
770 // After that we unite both arrays.
771 // NOTE: We don't just concatenate the arrays, because they may have
772 // adjacent ranges, e.g.:
773 // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
774 // unite -> uchar(0, 255)
775 // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
776 // unite -> uchar(-2, 1)
convertTo(RangeSet What,APSIntType Ty)777 RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
778 APSIntType Ty) {
779 using llvm::APInt;
780 using llvm::APSInt;
781 using Bounds = std::pair<const APSInt &, const APSInt &>;
782 ContainerType AscendArray;
783 ContainerType DescendArray;
784 auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
785 // Get bounds of the given range.
786 APSInt FromInt = R.From();
787 APSInt ToInt = R.To();
788 // Cast the bounds.
789 Ty.apply(FromInt);
790 Ty.apply(ToInt);
791 return {VF.getValue(FromInt), VF.getValue(ToInt)};
792 };
793 // Phase 1. Fill the first array.
794 APSInt LastConvertedInt = Ty.getMinValue();
795 const auto *It = What.begin();
796 const auto *E = What.end();
797 while (It != E) {
798 Bounds NewBounds = CastRange(*(It++));
799 // If values stop going acsending order, go to the second phase(loop).
800 if (NewBounds.first < LastConvertedInt) {
801 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
802 break;
803 }
804 // If the range contains a midpoint, then split the range.
805 // E.g. char(-5, 5) -> uchar(251, 5)
806 // Here we shall add a range (251, 255) to the first array and (0, 5) to the
807 // second one.
808 if (NewBounds.first > NewBounds.second) {
809 DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
810 AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
811 } else
812 // Values are going acsending order.
813 AscendArray.emplace_back(NewBounds.first, NewBounds.second);
814 LastConvertedInt = NewBounds.first;
815 }
816 // Phase 2. Fill the second array.
817 while (It != E) {
818 Bounds NewBounds = CastRange(*(It++));
819 DescendArray.emplace_back(NewBounds.first, NewBounds.second);
820 }
821 // Unite both arrays.
822 return unite(AscendArray, DescendArray);
823 }
824
825 /// Promotion from unsigneds to signeds/unsigneds left.
promoteTo(RangeSet What,APSIntType Ty)826 RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
827 APSIntType Ty) {
828 ContainerType Result;
829 // We definitely know the size of the result set.
830 Result.reserve(What.size());
831
832 // Each unsigned value fits every larger type without any changes,
833 // whether the larger type is signed or unsigned. So just promote and push
834 // back each range one by one.
835 for (const Range &R : What) {
836 // Get bounds of the given range.
837 llvm::APSInt FromInt = R.From();
838 llvm::APSInt ToInt = R.To();
839 // Cast the bounds.
840 Ty.apply(FromInt);
841 Ty.apply(ToInt);
842 Result.emplace_back(ValueFactory.getValue(FromInt),
843 ValueFactory.getValue(ToInt));
844 }
845 return Result;
846 }
847
deletePoint(RangeSet From,const llvm::APSInt & Point)848 RangeSet RangeSet::Factory::deletePoint(RangeSet From,
849 const llvm::APSInt &Point) {
850 if (!From.contains(Point))
851 return From;
852
853 llvm::APSInt Upper = Point;
854 llvm::APSInt Lower = Point;
855
856 ++Upper;
857 --Lower;
858
859 // Notice that the lower bound is greater than the upper bound.
860 return intersect(From, Upper, Lower);
861 }
862
dump(raw_ostream & OS) const863 LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
864 OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
865 }
dump() const866 LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
867
dump(raw_ostream & OS) const868 LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
869 OS << "{ ";
870 llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
871 OS << " }";
872 }
dump() const873 LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
874
875 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
876
877 namespace {
878 class EquivalenceClass;
879 } // end anonymous namespace
880
881 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
882 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
883 REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
884
885 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
886 REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
887
888 namespace {
889 /// This class encapsulates a set of symbols equal to each other.
890 ///
891 /// The main idea of the approach requiring such classes is in narrowing
892 /// and sharing constraints between symbols within the class. Also we can
893 /// conclude that there is no practical need in storing constraints for
894 /// every member of the class separately.
895 ///
896 /// Main terminology:
897 ///
898 /// * "Equivalence class" is an object of this class, which can be efficiently
899 /// compared to other classes. It represents the whole class without
900 /// storing the actual in it. The members of the class however can be
901 /// retrieved from the state.
902 ///
903 /// * "Class members" are the symbols corresponding to the class. This means
904 /// that A == B for every member symbols A and B from the class. Members of
905 /// each class are stored in the state.
906 ///
907 /// * "Trivial class" is a class that has and ever had only one same symbol.
908 ///
909 /// * "Merge operation" merges two classes into one. It is the main operation
910 /// to produce non-trivial classes.
911 /// If, at some point, we can assume that two symbols from two distinct
912 /// classes are equal, we can merge these classes.
913 class EquivalenceClass : public llvm::FoldingSetNode {
914 public:
915 /// Find equivalence class for the given symbol in the given state.
916 LLVM_NODISCARD static inline EquivalenceClass find(ProgramStateRef State,
917 SymbolRef Sym);
918
919 /// Merge classes for the given symbols and return a new state.
920 LLVM_NODISCARD static inline ProgramStateRef merge(RangeSet::Factory &F,
921 ProgramStateRef State,
922 SymbolRef First,
923 SymbolRef Second);
924 // Merge this class with the given class and return a new state.
925 LLVM_NODISCARD inline ProgramStateRef
926 merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
927
928 /// Return a set of class members for the given state.
929 LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State) const;
930
931 /// Return true if the current class is trivial in the given state.
932 /// A class is trivial if and only if there is not any member relations stored
933 /// to it in State/ClassMembers.
934 /// An equivalence class with one member might seem as it does not hold any
935 /// meaningful information, i.e. that is a tautology. However, during the
936 /// removal of dead symbols we do not remove classes with one member for
937 /// resource and performance reasons. Consequently, a class with one member is
938 /// not necessarily trivial. It could happen that we have a class with two
939 /// members and then during the removal of dead symbols we remove one of its
940 /// members. In this case, the class is still non-trivial (it still has the
941 /// mappings in ClassMembers), even though it has only one member.
942 LLVM_NODISCARD inline bool isTrivial(ProgramStateRef State) const;
943
944 /// Return true if the current class is trivial and its only member is dead.
945 LLVM_NODISCARD inline bool isTriviallyDead(ProgramStateRef State,
946 SymbolReaper &Reaper) const;
947
948 LLVM_NODISCARD static inline ProgramStateRef
949 markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
950 SymbolRef Second);
951 LLVM_NODISCARD static inline ProgramStateRef
952 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
953 EquivalenceClass First, EquivalenceClass Second);
954 LLVM_NODISCARD inline ProgramStateRef
955 markDisequal(RangeSet::Factory &F, ProgramStateRef State,
956 EquivalenceClass Other) const;
957 LLVM_NODISCARD static inline ClassSet
958 getDisequalClasses(ProgramStateRef State, SymbolRef Sym);
959 LLVM_NODISCARD inline ClassSet
960 getDisequalClasses(ProgramStateRef State) const;
961 LLVM_NODISCARD inline ClassSet
962 getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
963
964 LLVM_NODISCARD static inline Optional<bool> areEqual(ProgramStateRef State,
965 EquivalenceClass First,
966 EquivalenceClass Second);
967 LLVM_NODISCARD static inline Optional<bool>
968 areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
969
970 /// Remove one member from the class.
971 LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State,
972 const SymbolRef Old);
973
974 /// Iterate over all symbols and try to simplify them.
975 LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB,
976 RangeSet::Factory &F,
977 ProgramStateRef State,
978 EquivalenceClass Class);
979
980 void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
dump(ProgramStateRef State) const981 LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
982 dumpToStream(State, llvm::errs());
983 }
984
985 /// Check equivalence data for consistency.
986 LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool
987 isClassDataConsistent(ProgramStateRef State);
988
getType() const989 LLVM_NODISCARD QualType getType() const {
990 return getRepresentativeSymbol()->getType();
991 }
992
993 EquivalenceClass() = delete;
994 EquivalenceClass(const EquivalenceClass &) = default;
995 EquivalenceClass &operator=(const EquivalenceClass &) = delete;
996 EquivalenceClass(EquivalenceClass &&) = default;
997 EquivalenceClass &operator=(EquivalenceClass &&) = delete;
998
operator ==(const EquivalenceClass & Other) const999 bool operator==(const EquivalenceClass &Other) const {
1000 return ID == Other.ID;
1001 }
operator <(const EquivalenceClass & Other) const1002 bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
operator !=(const EquivalenceClass & Other) const1003 bool operator!=(const EquivalenceClass &Other) const {
1004 return !operator==(Other);
1005 }
1006
Profile(llvm::FoldingSetNodeID & ID,uintptr_t CID)1007 static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
1008 ID.AddInteger(CID);
1009 }
1010
Profile(llvm::FoldingSetNodeID & ID) const1011 void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
1012
1013 private:
EquivalenceClass(SymbolRef Sym)1014 /* implicit */ EquivalenceClass(SymbolRef Sym)
1015 : ID(reinterpret_cast<uintptr_t>(Sym)) {}
1016
1017 /// This function is intended to be used ONLY within the class.
1018 /// The fact that ID is a pointer to a symbol is an implementation detail
1019 /// and should stay that way.
1020 /// In the current implementation, we use it to retrieve the only member
1021 /// of the trivial class.
getRepresentativeSymbol() const1022 SymbolRef getRepresentativeSymbol() const {
1023 return reinterpret_cast<SymbolRef>(ID);
1024 }
1025 static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
1026
1027 inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
1028 SymbolSet Members, EquivalenceClass Other,
1029 SymbolSet OtherMembers);
1030
1031 static inline bool
1032 addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
1033 RangeSet::Factory &F, ProgramStateRef State,
1034 EquivalenceClass First, EquivalenceClass Second);
1035
1036 /// This is a unique identifier of the class.
1037 uintptr_t ID;
1038 };
1039
1040 //===----------------------------------------------------------------------===//
1041 // Constraint functions
1042 //===----------------------------------------------------------------------===//
1043
1044 LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED bool
areFeasible(ConstraintRangeTy Constraints)1045 areFeasible(ConstraintRangeTy Constraints) {
1046 return llvm::none_of(
1047 Constraints,
1048 [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
1049 return ClassConstraint.second.isEmpty();
1050 });
1051 }
1052
getConstraint(ProgramStateRef State,EquivalenceClass Class)1053 LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State,
1054 EquivalenceClass Class) {
1055 return State->get<ConstraintRange>(Class);
1056 }
1057
getConstraint(ProgramStateRef State,SymbolRef Sym)1058 LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State,
1059 SymbolRef Sym) {
1060 return getConstraint(State, EquivalenceClass::find(State, Sym));
1061 }
1062
setConstraint(ProgramStateRef State,EquivalenceClass Class,RangeSet Constraint)1063 LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State,
1064 EquivalenceClass Class,
1065 RangeSet Constraint) {
1066 return State->set<ConstraintRange>(Class, Constraint);
1067 }
1068
setConstraints(ProgramStateRef State,ConstraintRangeTy Constraints)1069 LLVM_NODISCARD ProgramStateRef setConstraints(ProgramStateRef State,
1070 ConstraintRangeTy Constraints) {
1071 return State->set<ConstraintRange>(Constraints);
1072 }
1073
1074 //===----------------------------------------------------------------------===//
1075 // Equality/diseqiality abstraction
1076 //===----------------------------------------------------------------------===//
1077
1078 /// A small helper function for detecting symbolic (dis)equality.
1079 ///
1080 /// Equality check can have different forms (like a == b or a - b) and this
1081 /// class encapsulates those away if the only thing the user wants to check -
1082 /// whether it's equality/diseqiality or not.
1083 ///
1084 /// \returns true if assuming this Sym to be true means equality of operands
1085 /// false if it means disequality of operands
1086 /// None otherwise
meansEquality(const SymSymExpr * Sym)1087 Optional<bool> meansEquality(const SymSymExpr *Sym) {
1088 switch (Sym->getOpcode()) {
1089 case BO_Sub:
1090 // This case is: A - B != 0 -> disequality check.
1091 return false;
1092 case BO_EQ:
1093 // This case is: A == B != 0 -> equality check.
1094 return true;
1095 case BO_NE:
1096 // This case is: A != B != 0 -> diseqiality check.
1097 return false;
1098 default:
1099 return llvm::None;
1100 }
1101 }
1102
1103 //===----------------------------------------------------------------------===//
1104 // Intersection functions
1105 //===----------------------------------------------------------------------===//
1106
1107 template <class SecondTy, class... RestTy>
1108 LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1109 SecondTy Second, RestTy... Tail);
1110
1111 template <class... RangeTy> struct IntersectionTraits;
1112
1113 template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
1114 // Found RangeSet, no need to check any further
1115 using Type = RangeSet;
1116 };
1117
1118 template <> struct IntersectionTraits<> {
1119 // We ran out of types, and we didn't find any RangeSet, so the result should
1120 // be optional.
1121 using Type = Optional<RangeSet>;
1122 };
1123
1124 template <class OptionalOrPointer, class... TailTy>
1125 struct IntersectionTraits<OptionalOrPointer, TailTy...> {
1126 // If current type is Optional or a raw pointer, we should keep looking.
1127 using Type = typename IntersectionTraits<TailTy...>::Type;
1128 };
1129
1130 template <class EndTy>
intersect(RangeSet::Factory & F,EndTy End)1131 LLVM_NODISCARD inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
1132 // If the list contains only RangeSet or Optional<RangeSet>, simply return
1133 // that range set.
1134 return End;
1135 }
1136
1137 LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED inline Optional<RangeSet>
intersect(RangeSet::Factory & F,const RangeSet * End)1138 intersect(RangeSet::Factory &F, const RangeSet *End) {
1139 // This is an extraneous conversion from a raw pointer into Optional<RangeSet>
1140 if (End) {
1141 return *End;
1142 }
1143 return llvm::None;
1144 }
1145
1146 template <class... RestTy>
intersect(RangeSet::Factory & F,RangeSet Head,RangeSet Second,RestTy...Tail)1147 LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1148 RangeSet Second, RestTy... Tail) {
1149 // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
1150 // of the function and can be sure that the result is RangeSet.
1151 return intersect(F, F.intersect(Head, Second), Tail...);
1152 }
1153
1154 template <class SecondTy, class... RestTy>
intersect(RangeSet::Factory & F,RangeSet Head,SecondTy Second,RestTy...Tail)1155 LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1156 SecondTy Second, RestTy... Tail) {
1157 if (Second) {
1158 // Here we call the <RangeSet,RangeSet,...> version of the function...
1159 return intersect(F, Head, *Second, Tail...);
1160 }
1161 // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
1162 // means that the result is definitely RangeSet.
1163 return intersect(F, Head, Tail...);
1164 }
1165
1166 /// Main generic intersect function.
1167 /// It intersects all of the given range sets. If some of the given arguments
1168 /// don't hold a range set (nullptr or llvm::None), the function will skip them.
1169 ///
1170 /// Available representations for the arguments are:
1171 /// * RangeSet
1172 /// * Optional<RangeSet>
1173 /// * RangeSet *
1174 /// Pointer to a RangeSet is automatically assumed to be nullable and will get
1175 /// checked as well as the optional version. If this behaviour is undesired,
1176 /// please dereference the pointer in the call.
1177 ///
1178 /// Return type depends on the arguments' types. If we can be sure in compile
1179 /// time that there will be a range set as a result, the returning type is
1180 /// simply RangeSet, in other cases we have to back off to Optional<RangeSet>.
1181 ///
1182 /// Please, prefer optional range sets to raw pointers. If the last argument is
1183 /// a raw pointer and all previous arguments are None, it will cost one
1184 /// additional check to convert RangeSet * into Optional<RangeSet>.
1185 template <class HeadTy, class SecondTy, class... RestTy>
1186 LLVM_NODISCARD inline
1187 typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
intersect(RangeSet::Factory & F,HeadTy Head,SecondTy Second,RestTy...Tail)1188 intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
1189 RestTy... Tail) {
1190 if (Head) {
1191 return intersect(F, *Head, Second, Tail...);
1192 }
1193 return intersect(F, Second, Tail...);
1194 }
1195
1196 //===----------------------------------------------------------------------===//
1197 // Symbolic reasoning logic
1198 //===----------------------------------------------------------------------===//
1199
1200 /// A little component aggregating all of the reasoning we have about
1201 /// the ranges of symbolic expressions.
1202 ///
1203 /// Even when we don't know the exact values of the operands, we still
1204 /// can get a pretty good estimate of the result's range.
1205 class SymbolicRangeInferrer
1206 : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
1207 public:
1208 template <class SourceType>
inferRange(RangeSet::Factory & F,ProgramStateRef State,SourceType Origin)1209 static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
1210 SourceType Origin) {
1211 SymbolicRangeInferrer Inferrer(F, State);
1212 return Inferrer.infer(Origin);
1213 }
1214
VisitSymExpr(SymbolRef Sym)1215 RangeSet VisitSymExpr(SymbolRef Sym) {
1216 if (Optional<RangeSet> RS = getRangeForNegatedSym(Sym))
1217 return *RS;
1218 // If we've reached this line, the actual type of the symbolic
1219 // expression is not supported for advanced inference.
1220 // In this case, we simply backoff to the default "let's simply
1221 // infer the range from the expression's type".
1222 return infer(Sym->getType());
1223 }
1224
VisitUnarySymExpr(const UnarySymExpr * USE)1225 RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
1226 if (Optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
1227 return *RS;
1228 return infer(USE->getType());
1229 }
1230
VisitSymIntExpr(const SymIntExpr * Sym)1231 RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
1232 return VisitBinaryOperator(Sym);
1233 }
1234
VisitIntSymExpr(const IntSymExpr * Sym)1235 RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
1236 return VisitBinaryOperator(Sym);
1237 }
1238
VisitSymSymExpr(const SymSymExpr * SSE)1239 RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
1240 return intersect(
1241 RangeFactory,
1242 // If Sym is a difference of symbols A - B, then maybe we have range
1243 // set stored for B - A.
1244 //
1245 // If we have range set stored for both A - B and B - A then
1246 // calculate the effective range set by intersecting the range set
1247 // for A - B and the negated range set of B - A.
1248 getRangeForNegatedSymSym(SSE),
1249 // If Sym is a comparison expression (except <=>),
1250 // find any other comparisons with the same operands.
1251 // See function description.
1252 getRangeForComparisonSymbol(SSE),
1253 // If Sym is (dis)equality, we might have some information
1254 // on that in our equality classes data structure.
1255 getRangeForEqualities(SSE),
1256 // And we should always check what we can get from the operands.
1257 VisitBinaryOperator(SSE));
1258 }
1259
1260 private:
SymbolicRangeInferrer(RangeSet::Factory & F,ProgramStateRef S)1261 SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
1262 : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
1263
1264 /// Infer range information from the given integer constant.
1265 ///
1266 /// It's not a real "inference", but is here for operating with
1267 /// sub-expressions in a more polymorphic manner.
inferAs(const llvm::APSInt & Val,QualType)1268 RangeSet inferAs(const llvm::APSInt &Val, QualType) {
1269 return {RangeFactory, Val};
1270 }
1271
1272 /// Infer range information from symbol in the context of the given type.
inferAs(SymbolRef Sym,QualType DestType)1273 RangeSet inferAs(SymbolRef Sym, QualType DestType) {
1274 QualType ActualType = Sym->getType();
1275 // Check that we can reason about the symbol at all.
1276 if (ActualType->isIntegralOrEnumerationType() ||
1277 Loc::isLocType(ActualType)) {
1278 return infer(Sym);
1279 }
1280 // Otherwise, let's simply infer from the destination type.
1281 // We couldn't figure out nothing else about that expression.
1282 return infer(DestType);
1283 }
1284
infer(SymbolRef Sym)1285 RangeSet infer(SymbolRef Sym) {
1286 return intersect(RangeFactory,
1287 // Of course, we should take the constraint directly
1288 // associated with this symbol into consideration.
1289 getConstraint(State, Sym),
1290 // Apart from the Sym itself, we can infer quite a lot if
1291 // we look into subexpressions of Sym.
1292 Visit(Sym));
1293 }
1294
infer(EquivalenceClass Class)1295 RangeSet infer(EquivalenceClass Class) {
1296 if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
1297 return *AssociatedConstraint;
1298
1299 return infer(Class.getType());
1300 }
1301
1302 /// Infer range information solely from the type.
infer(QualType T)1303 RangeSet infer(QualType T) {
1304 // Lazily generate a new RangeSet representing all possible values for the
1305 // given symbol type.
1306 RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
1307 ValueFactory.getMaxValue(T));
1308
1309 // References are known to be non-zero.
1310 if (T->isReferenceType())
1311 return assumeNonZero(Result, T);
1312
1313 return Result;
1314 }
1315
1316 template <class BinarySymExprTy>
VisitBinaryOperator(const BinarySymExprTy * Sym)1317 RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
1318 // TODO #1: VisitBinaryOperator implementation might not make a good
1319 // use of the inferred ranges. In this case, we might be calculating
1320 // everything for nothing. This being said, we should introduce some
1321 // sort of laziness mechanism here.
1322 //
1323 // TODO #2: We didn't go into the nested expressions before, so it
1324 // might cause us spending much more time doing the inference.
1325 // This can be a problem for deeply nested expressions that are
1326 // involved in conditions and get tested continuously. We definitely
1327 // need to address this issue and introduce some sort of caching
1328 // in here.
1329 QualType ResultType = Sym->getType();
1330 return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
1331 Sym->getOpcode(),
1332 inferAs(Sym->getRHS(), ResultType), ResultType);
1333 }
1334
VisitBinaryOperator(RangeSet LHS,BinaryOperator::Opcode Op,RangeSet RHS,QualType T)1335 RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
1336 RangeSet RHS, QualType T) {
1337 switch (Op) {
1338 case BO_Or:
1339 return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
1340 case BO_And:
1341 return VisitBinaryOperator<BO_And>(LHS, RHS, T);
1342 case BO_Rem:
1343 return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
1344 default:
1345 return infer(T);
1346 }
1347 }
1348
1349 //===----------------------------------------------------------------------===//
1350 // Ranges and operators
1351 //===----------------------------------------------------------------------===//
1352
1353 /// Return a rough approximation of the given range set.
1354 ///
1355 /// For the range set:
1356 /// { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
1357 /// it will return the range [x_0, y_N].
fillGaps(RangeSet Origin)1358 static Range fillGaps(RangeSet Origin) {
1359 assert(!Origin.isEmpty());
1360 return {Origin.getMinValue(), Origin.getMaxValue()};
1361 }
1362
1363 /// Try to convert given range into the given type.
1364 ///
1365 /// It will return llvm::None only when the trivial conversion is possible.
convert(const Range & Origin,APSIntType To)1366 llvm::Optional<Range> convert(const Range &Origin, APSIntType To) {
1367 if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
1368 To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
1369 return llvm::None;
1370 }
1371 return Range(ValueFactory.Convert(To, Origin.From()),
1372 ValueFactory.Convert(To, Origin.To()));
1373 }
1374
1375 template <BinaryOperator::Opcode Op>
VisitBinaryOperator(RangeSet LHS,RangeSet RHS,QualType T)1376 RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
1377 // We should propagate information about unfeasbility of one of the
1378 // operands to the resulting range.
1379 if (LHS.isEmpty() || RHS.isEmpty()) {
1380 return RangeFactory.getEmptySet();
1381 }
1382
1383 Range CoarseLHS = fillGaps(LHS);
1384 Range CoarseRHS = fillGaps(RHS);
1385
1386 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1387
1388 // We need to convert ranges to the resulting type, so we can compare values
1389 // and combine them in a meaningful (in terms of the given operation) way.
1390 auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
1391 auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
1392
1393 // It is hard to reason about ranges when conversion changes
1394 // borders of the ranges.
1395 if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
1396 return infer(T);
1397 }
1398
1399 return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
1400 }
1401
1402 template <BinaryOperator::Opcode Op>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1403 RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
1404 return infer(T);
1405 }
1406
1407 /// Return a symmetrical range for the given range and type.
1408 ///
1409 /// If T is signed, return the smallest range [-x..x] that covers the original
1410 /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
1411 /// exist due to original range covering min(T)).
1412 ///
1413 /// If T is unsigned, return the smallest range [0..x] that covers the
1414 /// original range.
getSymmetricalRange(Range Origin,QualType T)1415 Range getSymmetricalRange(Range Origin, QualType T) {
1416 APSIntType RangeType = ValueFactory.getAPSIntType(T);
1417
1418 if (RangeType.isUnsigned()) {
1419 return Range(ValueFactory.getMinValue(RangeType), Origin.To());
1420 }
1421
1422 if (Origin.From().isMinSignedValue()) {
1423 // If mini is a minimal signed value, absolute value of it is greater
1424 // than the maximal signed value. In order to avoid these
1425 // complications, we simply return the whole range.
1426 return {ValueFactory.getMinValue(RangeType),
1427 ValueFactory.getMaxValue(RangeType)};
1428 }
1429
1430 // At this point, we are sure that the type is signed and we can safely
1431 // use unary - operator.
1432 //
1433 // While calculating absolute maximum, we can use the following formula
1434 // because of these reasons:
1435 // * If From >= 0 then To >= From and To >= -From.
1436 // AbsMax == To == max(To, -From)
1437 // * If To <= 0 then -From >= -To and -From >= From.
1438 // AbsMax == -From == max(-From, To)
1439 // * Otherwise, From <= 0, To >= 0, and
1440 // AbsMax == max(abs(From), abs(To))
1441 llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
1442
1443 // Intersection is guaranteed to be non-empty.
1444 return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
1445 }
1446
1447 /// Return a range set subtracting zero from \p Domain.
assumeNonZero(RangeSet Domain,QualType T)1448 RangeSet assumeNonZero(RangeSet Domain, QualType T) {
1449 APSIntType IntType = ValueFactory.getAPSIntType(T);
1450 return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
1451 }
1452
1453 template <typename ProduceNegatedSymFunc>
getRangeForNegatedExpr(ProduceNegatedSymFunc F,QualType T)1454 Optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
1455 QualType T) {
1456 // Do not negate if the type cannot be meaningfully negated.
1457 if (!T->isUnsignedIntegerOrEnumerationType() &&
1458 !T->isSignedIntegerOrEnumerationType())
1459 return llvm::None;
1460
1461 if (SymbolRef NegatedSym = F())
1462 if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
1463 return RangeFactory.negate(*NegatedRange);
1464
1465 return llvm::None;
1466 }
1467
getRangeForNegatedUnarySym(const UnarySymExpr * USE)1468 Optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
1469 // Just get the operand when we negate a symbol that is already negated.
1470 // -(-a) == a
1471 return getRangeForNegatedExpr(
1472 [USE]() -> SymbolRef {
1473 if (USE->getOpcode() == UO_Minus)
1474 return USE->getOperand();
1475 return nullptr;
1476 },
1477 USE->getType());
1478 }
1479
getRangeForNegatedSymSym(const SymSymExpr * SSE)1480 Optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
1481 return getRangeForNegatedExpr(
1482 [SSE, State = this->State]() -> SymbolRef {
1483 if (SSE->getOpcode() == BO_Sub)
1484 return State->getSymbolManager().getSymSymExpr(
1485 SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
1486 return nullptr;
1487 },
1488 SSE->getType());
1489 }
1490
getRangeForNegatedSym(SymbolRef Sym)1491 Optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
1492 return getRangeForNegatedExpr(
1493 [Sym, State = this->State]() {
1494 return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
1495 Sym->getType());
1496 },
1497 Sym->getType());
1498 }
1499
1500 // Returns ranges only for binary comparison operators (except <=>)
1501 // when left and right operands are symbolic values.
1502 // Finds any other comparisons with the same operands.
1503 // Then do logical calculations and refuse impossible branches.
1504 // E.g. (x < y) and (x > y) at the same time are impossible.
1505 // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
1506 // E.g. (x == y) and (y == x) are just reversed but the same.
1507 // It covers all possible combinations (see CmpOpTable description).
1508 // Note that `x` and `y` can also stand for subexpressions,
1509 // not only for actual symbols.
getRangeForComparisonSymbol(const SymSymExpr * SSE)1510 Optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
1511 const BinaryOperatorKind CurrentOP = SSE->getOpcode();
1512
1513 // We currently do not support <=> (C++20).
1514 if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
1515 return llvm::None;
1516
1517 static const OperatorRelationsTable CmpOpTable{};
1518
1519 const SymExpr *LHS = SSE->getLHS();
1520 const SymExpr *RHS = SSE->getRHS();
1521 QualType T = SSE->getType();
1522
1523 SymbolManager &SymMgr = State->getSymbolManager();
1524
1525 // We use this variable to store the last queried operator (`QueriedOP`)
1526 // for which the `getCmpOpState` returned with `Unknown`. If there are two
1527 // different OPs that returned `Unknown` then we have to query the special
1528 // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
1529 // never returns `Unknown`, so `CurrentOP` is a good initial value.
1530 BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
1531
1532 // Loop goes through all of the columns exept the last one ('UnknownX2').
1533 // We treat `UnknownX2` column separately at the end of the loop body.
1534 for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
1535
1536 // Let's find an expression e.g. (x < y).
1537 BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
1538 const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
1539 const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
1540
1541 // If ranges were not previously found,
1542 // try to find a reversed expression (y > x).
1543 if (!QueriedRangeSet) {
1544 const BinaryOperatorKind ROP =
1545 BinaryOperator::reverseComparisonOp(QueriedOP);
1546 SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
1547 QueriedRangeSet = getConstraint(State, SymSym);
1548 }
1549
1550 if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
1551 continue;
1552
1553 const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
1554 const bool isInFalseBranch =
1555 ConcreteValue ? (*ConcreteValue == 0) : false;
1556
1557 // If it is a false branch, we shall be guided by opposite operator,
1558 // because the table is made assuming we are in the true branch.
1559 // E.g. when (x <= y) is false, then (x > y) is true.
1560 if (isInFalseBranch)
1561 QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
1562
1563 OperatorRelationsTable::TriStateKind BranchState =
1564 CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
1565
1566 if (BranchState == OperatorRelationsTable::Unknown) {
1567 if (LastQueriedOpToUnknown != CurrentOP &&
1568 LastQueriedOpToUnknown != QueriedOP) {
1569 // If we got the Unknown state for both different operators.
1570 // if (x <= y) // assume true
1571 // if (x != y) // assume true
1572 // if (x < y) // would be also true
1573 // Get a state from `UnknownX2` column.
1574 BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
1575 } else {
1576 LastQueriedOpToUnknown = QueriedOP;
1577 continue;
1578 }
1579 }
1580
1581 return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
1582 : getFalseRange(T);
1583 }
1584
1585 return llvm::None;
1586 }
1587
getRangeForEqualities(const SymSymExpr * Sym)1588 Optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
1589 Optional<bool> Equality = meansEquality(Sym);
1590
1591 if (!Equality)
1592 return llvm::None;
1593
1594 if (Optional<bool> AreEqual =
1595 EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
1596 // Here we cover two cases at once:
1597 // * if Sym is equality and its operands are known to be equal -> true
1598 // * if Sym is disequality and its operands are disequal -> true
1599 if (*AreEqual == *Equality) {
1600 return getTrueRange(Sym->getType());
1601 }
1602 // Opposite combinations result in false.
1603 return getFalseRange(Sym->getType());
1604 }
1605
1606 return llvm::None;
1607 }
1608
getTrueRange(QualType T)1609 RangeSet getTrueRange(QualType T) {
1610 RangeSet TypeRange = infer(T);
1611 return assumeNonZero(TypeRange, T);
1612 }
1613
getFalseRange(QualType T)1614 RangeSet getFalseRange(QualType T) {
1615 const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
1616 return RangeSet(RangeFactory, Zero);
1617 }
1618
1619 BasicValueFactory &ValueFactory;
1620 RangeSet::Factory &RangeFactory;
1621 ProgramStateRef State;
1622 };
1623
1624 //===----------------------------------------------------------------------===//
1625 // Range-based reasoning about symbolic operations
1626 //===----------------------------------------------------------------------===//
1627
1628 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1629 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
1630 QualType T) {
1631 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1632 llvm::APSInt Zero = ResultType.getZeroValue();
1633
1634 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1635 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1636
1637 bool IsLHSNegative = LHS.To() < Zero;
1638 bool IsRHSNegative = RHS.To() < Zero;
1639
1640 // Check if both ranges have the same sign.
1641 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1642 (IsLHSNegative && IsRHSNegative)) {
1643 // The result is definitely greater or equal than any of the operands.
1644 const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
1645
1646 // We estimate maximal value for positives as the maximal value for the
1647 // given type. For negatives, we estimate it with -1 (e.g. 0x11111111).
1648 //
1649 // TODO: We basically, limit the resulting range from below, but don't do
1650 // anything with the upper bound.
1651 //
1652 // For positive operands, it can be done as follows: for the upper
1653 // bound of LHS and RHS we calculate the most significant bit set.
1654 // Let's call it the N-th bit. Then we can estimate the maximal
1655 // number to be 2^(N+1)-1, i.e. the number with all the bits up to
1656 // the N-th bit set.
1657 const llvm::APSInt &Max = IsLHSNegative
1658 ? ValueFactory.getValue(--Zero)
1659 : ValueFactory.getMaxValue(ResultType);
1660
1661 return {RangeFactory, ValueFactory.getValue(Min), Max};
1662 }
1663
1664 // Otherwise, let's check if at least one of the operands is negative.
1665 if (IsLHSNegative || IsRHSNegative) {
1666 // This means that the result is definitely negative as well.
1667 return {RangeFactory, ValueFactory.getMinValue(ResultType),
1668 ValueFactory.getValue(--Zero)};
1669 }
1670
1671 RangeSet DefaultRange = infer(T);
1672
1673 // It is pretty hard to reason about operands with different signs
1674 // (and especially with possibly different signs). We simply check if it
1675 // can be zero. In order to conclude that the result could not be zero,
1676 // at least one of the operands should be definitely not zero itself.
1677 if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
1678 return assumeNonZero(DefaultRange, T);
1679 }
1680
1681 // Nothing much else to do here.
1682 return DefaultRange;
1683 }
1684
1685 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1686 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
1687 Range RHS,
1688 QualType T) {
1689 APSIntType ResultType = ValueFactory.getAPSIntType(T);
1690 llvm::APSInt Zero = ResultType.getZeroValue();
1691
1692 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1693 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1694
1695 bool IsLHSNegative = LHS.To() < Zero;
1696 bool IsRHSNegative = RHS.To() < Zero;
1697
1698 // Check if both ranges have the same sign.
1699 if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1700 (IsLHSNegative && IsRHSNegative)) {
1701 // The result is definitely less or equal than any of the operands.
1702 const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
1703
1704 // We conservatively estimate lower bound to be the smallest positive
1705 // or negative value corresponding to the sign of the operands.
1706 const llvm::APSInt &Min = IsLHSNegative
1707 ? ValueFactory.getMinValue(ResultType)
1708 : ValueFactory.getValue(Zero);
1709
1710 return {RangeFactory, Min, Max};
1711 }
1712
1713 // Otherwise, let's check if at least one of the operands is positive.
1714 if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
1715 // This makes result definitely positive.
1716 //
1717 // We can also reason about a maximal value by finding the maximal
1718 // value of the positive operand.
1719 const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
1720
1721 // The minimal value on the other hand is much harder to reason about.
1722 // The only thing we know for sure is that the result is positive.
1723 return {RangeFactory, ValueFactory.getValue(Zero),
1724 ValueFactory.getValue(Max)};
1725 }
1726
1727 // Nothing much else to do here.
1728 return infer(T);
1729 }
1730
1731 template <>
VisitBinaryOperator(Range LHS,Range RHS,QualType T)1732 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
1733 Range RHS,
1734 QualType T) {
1735 llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
1736
1737 Range ConservativeRange = getSymmetricalRange(RHS, T);
1738
1739 llvm::APSInt Max = ConservativeRange.To();
1740 llvm::APSInt Min = ConservativeRange.From();
1741
1742 if (Max == Zero) {
1743 // It's an undefined behaviour to divide by 0 and it seems like we know
1744 // for sure that RHS is 0. Let's say that the resulting range is
1745 // simply infeasible for that matter.
1746 return RangeFactory.getEmptySet();
1747 }
1748
1749 // At this point, our conservative range is closed. The result, however,
1750 // couldn't be greater than the RHS' maximal absolute value. Because of
1751 // this reason, we turn the range into open (or half-open in case of
1752 // unsigned integers).
1753 //
1754 // While we operate on integer values, an open interval (a, b) can be easily
1755 // represented by the closed interval [a + 1, b - 1]. And this is exactly
1756 // what we do next.
1757 //
1758 // If we are dealing with unsigned case, we shouldn't move the lower bound.
1759 if (Min.isSigned()) {
1760 ++Min;
1761 }
1762 --Max;
1763
1764 bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1765 bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1766
1767 // Remainder operator results with negative operands is implementation
1768 // defined. Positive cases are much easier to reason about though.
1769 if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
1770 // If maximal value of LHS is less than maximal value of RHS,
1771 // the result won't get greater than LHS.To().
1772 Max = std::min(LHS.To(), Max);
1773 // We want to check if it is a situation similar to the following:
1774 //
1775 // <------------|---[ LHS ]--------[ RHS ]----->
1776 // -INF 0 +INF
1777 //
1778 // In this situation, we can conclude that (LHS / RHS) == 0 and
1779 // (LHS % RHS) == LHS.
1780 Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
1781 }
1782
1783 // Nevertheless, the symmetrical range for RHS is a conservative estimate
1784 // for any sign of either LHS, or RHS.
1785 return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
1786 }
1787
1788 //===----------------------------------------------------------------------===//
1789 // Constraint manager implementation details
1790 //===----------------------------------------------------------------------===//
1791
1792 class RangeConstraintManager : public RangedConstraintManager {
1793 public:
RangeConstraintManager(ExprEngine * EE,SValBuilder & SVB)1794 RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
1795 : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
1796
1797 //===------------------------------------------------------------------===//
1798 // Implementation for interface from ConstraintManager.
1799 //===------------------------------------------------------------------===//
1800
haveEqualConstraints(ProgramStateRef S1,ProgramStateRef S2) const1801 bool haveEqualConstraints(ProgramStateRef S1,
1802 ProgramStateRef S2) const override {
1803 // NOTE: ClassMembers are as simple as back pointers for ClassMap,
1804 // so comparing constraint ranges and class maps should be
1805 // sufficient.
1806 return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
1807 S1->get<ClassMap>() == S2->get<ClassMap>();
1808 }
1809
1810 bool canReasonAbout(SVal X) const override;
1811
1812 ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
1813
1814 const llvm::APSInt *getSymVal(ProgramStateRef State,
1815 SymbolRef Sym) const override;
1816
1817 ProgramStateRef removeDeadBindings(ProgramStateRef State,
1818 SymbolReaper &SymReaper) override;
1819
1820 void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
1821 unsigned int Space = 0, bool IsDot = false) const override;
1822 void printValue(raw_ostream &Out, ProgramStateRef State,
1823 SymbolRef Sym) override;
1824 void printConstraints(raw_ostream &Out, ProgramStateRef State,
1825 const char *NL = "\n", unsigned int Space = 0,
1826 bool IsDot = false) const;
1827 void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
1828 const char *NL = "\n", unsigned int Space = 0,
1829 bool IsDot = false) const;
1830 void printDisequalities(raw_ostream &Out, ProgramStateRef State,
1831 const char *NL = "\n", unsigned int Space = 0,
1832 bool IsDot = false) const;
1833
1834 //===------------------------------------------------------------------===//
1835 // Implementation for interface from RangedConstraintManager.
1836 //===------------------------------------------------------------------===//
1837
1838 ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
1839 const llvm::APSInt &V,
1840 const llvm::APSInt &Adjustment) override;
1841
1842 ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
1843 const llvm::APSInt &V,
1844 const llvm::APSInt &Adjustment) override;
1845
1846 ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
1847 const llvm::APSInt &V,
1848 const llvm::APSInt &Adjustment) override;
1849
1850 ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
1851 const llvm::APSInt &V,
1852 const llvm::APSInt &Adjustment) override;
1853
1854 ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
1855 const llvm::APSInt &V,
1856 const llvm::APSInt &Adjustment) override;
1857
1858 ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
1859 const llvm::APSInt &V,
1860 const llvm::APSInt &Adjustment) override;
1861
1862 ProgramStateRef assumeSymWithinInclusiveRange(
1863 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1864 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1865
1866 ProgramStateRef assumeSymOutsideInclusiveRange(
1867 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1868 const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1869
1870 private:
1871 RangeSet::Factory F;
1872
1873 RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
1874 RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
1875 ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
1876 RangeSet Range);
1877 ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
1878 RangeSet Range);
1879
1880 RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
1881 const llvm::APSInt &Int,
1882 const llvm::APSInt &Adjustment);
1883 RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
1884 const llvm::APSInt &Int,
1885 const llvm::APSInt &Adjustment);
1886 RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
1887 const llvm::APSInt &Int,
1888 const llvm::APSInt &Adjustment);
1889 RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
1890 const llvm::APSInt &Int,
1891 const llvm::APSInt &Adjustment);
1892 RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
1893 const llvm::APSInt &Int,
1894 const llvm::APSInt &Adjustment);
1895 };
1896
1897 //===----------------------------------------------------------------------===//
1898 // Constraint assignment logic
1899 //===----------------------------------------------------------------------===//
1900
1901 /// ConstraintAssignorBase is a small utility class that unifies visitor
1902 /// for ranges with a visitor for constraints (rangeset/range/constant).
1903 ///
1904 /// It is designed to have one derived class, but generally it can have more.
1905 /// Derived class can control which types we handle by defining methods of the
1906 /// following form:
1907 ///
1908 /// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
1909 /// CONSTRAINT Constraint);
1910 ///
1911 /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
1912 /// CONSTRAINT is the type of constraint (RangeSet/Range/Const)
1913 /// return value signifies whether we should try other handle methods
1914 /// (i.e. false would mean to stop right after calling this method)
1915 template <class Derived> class ConstraintAssignorBase {
1916 public:
1917 using Const = const llvm::APSInt &;
1918
1919 #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
1920
1921 #define ASSIGN(CLASS, TO, SYM, CONSTRAINT) \
1922 if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT)) \
1923 return false
1924
assign(SymbolRef Sym,RangeSet Constraint)1925 void assign(SymbolRef Sym, RangeSet Constraint) {
1926 assignImpl(Sym, Constraint);
1927 }
1928
assignImpl(SymbolRef Sym,RangeSet Constraint)1929 bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
1930 switch (Sym->getKind()) {
1931 #define SYMBOL(Id, Parent) \
1932 case SymExpr::Id##Kind: \
1933 DISPATCH(Id);
1934 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
1935 }
1936 llvm_unreachable("Unknown SymExpr kind!");
1937 }
1938
1939 #define DEFAULT_ASSIGN(Id) \
1940 bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) { \
1941 return true; \
1942 } \
1943 bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
1944 bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
1945
1946 // When we dispatch for constraint types, we first try to check
1947 // if the new constraint is the constant and try the corresponding
1948 // assignor methods. If it didn't interrupt, we can proceed to the
1949 // range, and finally to the range set.
1950 #define CONSTRAINT_DISPATCH(Id) \
1951 if (const llvm::APSInt *Const = Constraint.getConcreteValue()) { \
1952 ASSIGN(Id, Const, Sym, *Const); \
1953 } \
1954 if (Constraint.size() == 1) { \
1955 ASSIGN(Id, Range, Sym, *Constraint.begin()); \
1956 } \
1957 ASSIGN(Id, RangeSet, Sym, Constraint)
1958
1959 // Our internal assign method first tries to call assignor methods for all
1960 // constraint types that apply. And if not interrupted, continues with its
1961 // parent class.
1962 #define SYMBOL(Id, Parent) \
1963 bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) { \
1964 CONSTRAINT_DISPATCH(Id); \
1965 DISPATCH(Parent); \
1966 } \
1967 DEFAULT_ASSIGN(Id)
1968 #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
1969 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
1970
1971 // Default implementations for the top class that doesn't have parents.
assignSymExprImpl(const SymExpr * Sym,RangeSet Constraint)1972 bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
1973 CONSTRAINT_DISPATCH(SymExpr);
1974 return true;
1975 }
1976 DEFAULT_ASSIGN(SymExpr);
1977
1978 #undef DISPATCH
1979 #undef CONSTRAINT_DISPATCH
1980 #undef DEFAULT_ASSIGN
1981 #undef ASSIGN
1982 };
1983
1984 /// A little component aggregating all of the reasoning we have about
1985 /// assigning new constraints to symbols.
1986 ///
1987 /// The main purpose of this class is to associate constraints to symbols,
1988 /// and impose additional constraints on other symbols, when we can imply
1989 /// them.
1990 ///
1991 /// It has a nice symmetry with SymbolicRangeInferrer. When the latter
1992 /// can provide more precise ranges by looking into the operands of the
1993 /// expression in question, ConstraintAssignor looks into the operands
1994 /// to see if we can imply more from the new constraint.
1995 class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
1996 public:
1997 template <class ClassOrSymbol>
1998 LLVM_NODISCARD static ProgramStateRef
assign(ProgramStateRef State,SValBuilder & Builder,RangeSet::Factory & F,ClassOrSymbol CoS,RangeSet NewConstraint)1999 assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
2000 ClassOrSymbol CoS, RangeSet NewConstraint) {
2001 if (!State || NewConstraint.isEmpty())
2002 return nullptr;
2003
2004 ConstraintAssignor Assignor{State, Builder, F};
2005 return Assignor.assign(CoS, NewConstraint);
2006 }
2007
2008 /// Handle expressions like: a % b != 0.
2009 template <typename SymT>
handleRemainderOp(const SymT * Sym,RangeSet Constraint)2010 bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
2011 if (Sym->getOpcode() != BO_Rem)
2012 return true;
2013 // a % b != 0 implies that a != 0.
2014 if (!Constraint.containsZero()) {
2015 SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
2016 if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
2017 State = State->assume(*NonLocSymSVal, true);
2018 if (!State)
2019 return false;
2020 }
2021 }
2022 return true;
2023 }
2024
2025 inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
assignSymIntExprToRangeSet(const SymIntExpr * Sym,RangeSet Constraint)2026 inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
2027 RangeSet Constraint) {
2028 return handleRemainderOp(Sym, Constraint);
2029 }
2030 inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2031 RangeSet Constraint);
2032
2033 private:
ConstraintAssignor(ProgramStateRef State,SValBuilder & Builder,RangeSet::Factory & F)2034 ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
2035 RangeSet::Factory &F)
2036 : State(State), Builder(Builder), RangeFactory(F) {}
2037 using Base = ConstraintAssignorBase<ConstraintAssignor>;
2038
2039 /// Base method for handling new constraints for symbols.
assign(SymbolRef Sym,RangeSet NewConstraint)2040 LLVM_NODISCARD ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
2041 // All constraints are actually associated with equivalence classes, and
2042 // that's what we are going to do first.
2043 State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
2044 if (!State)
2045 return nullptr;
2046
2047 // And after that we can check what other things we can get from this
2048 // constraint.
2049 Base::assign(Sym, NewConstraint);
2050 return State;
2051 }
2052
2053 /// Base method for handling new constraints for classes.
assign(EquivalenceClass Class,RangeSet NewConstraint)2054 LLVM_NODISCARD ProgramStateRef assign(EquivalenceClass Class,
2055 RangeSet NewConstraint) {
2056 // There is a chance that we might need to update constraints for the
2057 // classes that are known to be disequal to Class.
2058 //
2059 // In order for this to be even possible, the new constraint should
2060 // be simply a constant because we can't reason about range disequalities.
2061 if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
2062
2063 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2064 ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
2065
2066 // Add new constraint.
2067 Constraints = CF.add(Constraints, Class, NewConstraint);
2068
2069 for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
2070 RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
2071 RangeFactory, State, DisequalClass);
2072
2073 UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
2074
2075 // If we end up with at least one of the disequal classes to be
2076 // constrained with an empty range-set, the state is infeasible.
2077 if (UpdatedConstraint.isEmpty())
2078 return nullptr;
2079
2080 Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
2081 }
2082 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2083 "a state with infeasible constraints");
2084
2085 return setConstraints(State, Constraints);
2086 }
2087
2088 return setConstraint(State, Class, NewConstraint);
2089 }
2090
trackDisequality(ProgramStateRef State,SymbolRef LHS,SymbolRef RHS)2091 ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
2092 SymbolRef RHS) {
2093 return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
2094 }
2095
trackEquality(ProgramStateRef State,SymbolRef LHS,SymbolRef RHS)2096 ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
2097 SymbolRef RHS) {
2098 return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
2099 }
2100
interpreteAsBool(RangeSet Constraint)2101 LLVM_NODISCARD Optional<bool> interpreteAsBool(RangeSet Constraint) {
2102 assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
2103
2104 if (Constraint.getConcreteValue())
2105 return !Constraint.getConcreteValue()->isZero();
2106
2107 if (!Constraint.containsZero())
2108 return true;
2109
2110 return llvm::None;
2111 }
2112
2113 ProgramStateRef State;
2114 SValBuilder &Builder;
2115 RangeSet::Factory &RangeFactory;
2116 };
2117
2118
assignSymExprToConst(const SymExpr * Sym,const llvm::APSInt & Constraint)2119 bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
2120 const llvm::APSInt &Constraint) {
2121 llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
2122 // Iterate over all equivalence classes and try to simplify them.
2123 ClassMembersTy Members = State->get<ClassMembers>();
2124 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
2125 EquivalenceClass Class = ClassToSymbolSet.first;
2126 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2127 if (!State)
2128 return false;
2129 SimplifiedClasses.insert(Class);
2130 }
2131
2132 // Trivial equivalence classes (those that have only one symbol member) are
2133 // not stored in the State. Thus, we must skim through the constraints as
2134 // well. And we try to simplify symbols in the constraints.
2135 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2136 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2137 EquivalenceClass Class = ClassConstraint.first;
2138 if (SimplifiedClasses.count(Class)) // Already simplified.
2139 continue;
2140 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2141 if (!State)
2142 return false;
2143 }
2144
2145 // We may have trivial equivalence classes in the disequality info as
2146 // well, and we need to simplify them.
2147 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2148 for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
2149 DisequalityInfo) {
2150 EquivalenceClass Class = DisequalityEntry.first;
2151 ClassSet DisequalClasses = DisequalityEntry.second;
2152 State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2153 if (!State)
2154 return false;
2155 }
2156
2157 return true;
2158 }
2159
assignSymSymExprToRangeSet(const SymSymExpr * Sym,RangeSet Constraint)2160 bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2161 RangeSet Constraint) {
2162 if (!handleRemainderOp(Sym, Constraint))
2163 return false;
2164
2165 Optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
2166
2167 if (!ConstraintAsBool)
2168 return true;
2169
2170 if (Optional<bool> Equality = meansEquality(Sym)) {
2171 // Here we cover two cases:
2172 // * if Sym is equality and the new constraint is true -> Sym's operands
2173 // should be marked as equal
2174 // * if Sym is disequality and the new constraint is false -> Sym's
2175 // operands should be also marked as equal
2176 if (*Equality == *ConstraintAsBool) {
2177 State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
2178 } else {
2179 // Other combinations leave as with disequal operands.
2180 State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
2181 }
2182
2183 if (!State)
2184 return false;
2185 }
2186
2187 return true;
2188 }
2189
2190 } // end anonymous namespace
2191
2192 std::unique_ptr<ConstraintManager>
CreateRangeConstraintManager(ProgramStateManager & StMgr,ExprEngine * Eng)2193 ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
2194 ExprEngine *Eng) {
2195 return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
2196 }
2197
getConstraintMap(ProgramStateRef State)2198 ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
2199 ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
2200 ConstraintMap Result = F.getEmptyMap();
2201
2202 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2203 for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2204 EquivalenceClass Class = ClassConstraint.first;
2205 SymbolSet ClassMembers = Class.getClassMembers(State);
2206 assert(!ClassMembers.isEmpty() &&
2207 "Class must always have at least one member!");
2208
2209 SymbolRef Representative = *ClassMembers.begin();
2210 Result = F.add(Result, Representative, ClassConstraint.second);
2211 }
2212
2213 return Result;
2214 }
2215
2216 //===----------------------------------------------------------------------===//
2217 // EqualityClass implementation details
2218 //===----------------------------------------------------------------------===//
2219
dumpToStream(ProgramStateRef State,raw_ostream & os) const2220 LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
2221 raw_ostream &os) const {
2222 SymbolSet ClassMembers = getClassMembers(State);
2223 for (const SymbolRef &MemberSym : ClassMembers) {
2224 MemberSym->dump();
2225 os << "\n";
2226 }
2227 }
2228
find(ProgramStateRef State,SymbolRef Sym)2229 inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
2230 SymbolRef Sym) {
2231 assert(State && "State should not be null");
2232 assert(Sym && "Symbol should not be null");
2233 // We store far from all Symbol -> Class mappings
2234 if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
2235 return *NontrivialClass;
2236
2237 // This is a trivial class of Sym.
2238 return Sym;
2239 }
2240
merge(RangeSet::Factory & F,ProgramStateRef State,SymbolRef First,SymbolRef Second)2241 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2242 ProgramStateRef State,
2243 SymbolRef First,
2244 SymbolRef Second) {
2245 EquivalenceClass FirstClass = find(State, First);
2246 EquivalenceClass SecondClass = find(State, Second);
2247
2248 return FirstClass.merge(F, State, SecondClass);
2249 }
2250
merge(RangeSet::Factory & F,ProgramStateRef State,EquivalenceClass Other)2251 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2252 ProgramStateRef State,
2253 EquivalenceClass Other) {
2254 // It is already the same class.
2255 if (*this == Other)
2256 return State;
2257
2258 // FIXME: As of now, we support only equivalence classes of the same type.
2259 // This limitation is connected to the lack of explicit casts in
2260 // our symbolic expression model.
2261 //
2262 // That means that for `int x` and `char y` we don't distinguish
2263 // between these two very different cases:
2264 // * `x == y`
2265 // * `(char)x == y`
2266 //
2267 // The moment we introduce symbolic casts, this restriction can be
2268 // lifted.
2269 if (getType() != Other.getType())
2270 return State;
2271
2272 SymbolSet Members = getClassMembers(State);
2273 SymbolSet OtherMembers = Other.getClassMembers(State);
2274
2275 // We estimate the size of the class by the height of tree containing
2276 // its members. Merging is not a trivial operation, so it's easier to
2277 // merge the smaller class into the bigger one.
2278 if (Members.getHeight() >= OtherMembers.getHeight()) {
2279 return mergeImpl(F, State, Members, Other, OtherMembers);
2280 } else {
2281 return Other.mergeImpl(F, State, OtherMembers, *this, Members);
2282 }
2283 }
2284
2285 inline ProgramStateRef
mergeImpl(RangeSet::Factory & RangeFactory,ProgramStateRef State,SymbolSet MyMembers,EquivalenceClass Other,SymbolSet OtherMembers)2286 EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
2287 ProgramStateRef State, SymbolSet MyMembers,
2288 EquivalenceClass Other, SymbolSet OtherMembers) {
2289 // Essentially what we try to recreate here is some kind of union-find
2290 // data structure. It does have certain limitations due to persistence
2291 // and the need to remove elements from classes.
2292 //
2293 // In this setting, EquialityClass object is the representative of the class
2294 // or the parent element. ClassMap is a mapping of class members to their
2295 // parent. Unlike the union-find structure, they all point directly to the
2296 // class representative because we don't have an opportunity to actually do
2297 // path compression when dealing with immutability. This means that we
2298 // compress paths every time we do merges. It also means that we lose
2299 // the main amortized complexity benefit from the original data structure.
2300 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2301 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2302
2303 // 1. If the merged classes have any constraints associated with them, we
2304 // need to transfer them to the class we have left.
2305 //
2306 // Intersection here makes perfect sense because both of these constraints
2307 // must hold for the whole new class.
2308 if (Optional<RangeSet> NewClassConstraint =
2309 intersect(RangeFactory, getConstraint(State, *this),
2310 getConstraint(State, Other))) {
2311 // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
2312 // range inferrer shouldn't generate ranges incompatible with
2313 // equivalence classes. However, at the moment, due to imperfections
2314 // in the solver, it is possible and the merge function can also
2315 // return infeasible states aka null states.
2316 if (NewClassConstraint->isEmpty())
2317 // Infeasible state
2318 return nullptr;
2319
2320 // No need in tracking constraints of a now-dissolved class.
2321 Constraints = CRF.remove(Constraints, Other);
2322 // Assign new constraints for this class.
2323 Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
2324
2325 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2326 "a state with infeasible constraints");
2327
2328 State = State->set<ConstraintRange>(Constraints);
2329 }
2330
2331 // 2. Get ALL equivalence-related maps
2332 ClassMapTy Classes = State->get<ClassMap>();
2333 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2334
2335 ClassMembersTy Members = State->get<ClassMembers>();
2336 ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
2337
2338 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2339 DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
2340
2341 ClassSet::Factory &CF = State->get_context<ClassSet>();
2342 SymbolSet::Factory &F = getMembersFactory(State);
2343
2344 // 2. Merge members of the Other class into the current class.
2345 SymbolSet NewClassMembers = MyMembers;
2346 for (SymbolRef Sym : OtherMembers) {
2347 NewClassMembers = F.add(NewClassMembers, Sym);
2348 // *this is now the class for all these new symbols.
2349 Classes = CMF.add(Classes, Sym, *this);
2350 }
2351
2352 // 3. Adjust member mapping.
2353 //
2354 // No need in tracking members of a now-dissolved class.
2355 Members = MF.remove(Members, Other);
2356 // Now only the current class is mapped to all the symbols.
2357 Members = MF.add(Members, *this, NewClassMembers);
2358
2359 // 4. Update disequality relations
2360 ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
2361 // We are about to merge two classes but they are already known to be
2362 // non-equal. This is a contradiction.
2363 if (DisequalToOther.contains(*this))
2364 return nullptr;
2365
2366 if (!DisequalToOther.isEmpty()) {
2367 ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
2368 DisequalityInfo = DF.remove(DisequalityInfo, Other);
2369
2370 for (EquivalenceClass DisequalClass : DisequalToOther) {
2371 DisequalToThis = CF.add(DisequalToThis, DisequalClass);
2372
2373 // Disequality is a symmetric relation meaning that if
2374 // DisequalToOther not null then the set for DisequalClass is not
2375 // empty and has at least Other.
2376 ClassSet OriginalSetLinkedToOther =
2377 *DisequalityInfo.lookup(DisequalClass);
2378
2379 // Other will be eliminated and we should replace it with the bigger
2380 // united class.
2381 ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
2382 NewSet = CF.add(NewSet, *this);
2383
2384 DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
2385 }
2386
2387 DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
2388 State = State->set<DisequalityMap>(DisequalityInfo);
2389 }
2390
2391 // 5. Update the state
2392 State = State->set<ClassMap>(Classes);
2393 State = State->set<ClassMembers>(Members);
2394
2395 return State;
2396 }
2397
2398 inline SymbolSet::Factory &
getMembersFactory(ProgramStateRef State)2399 EquivalenceClass::getMembersFactory(ProgramStateRef State) {
2400 return State->get_context<SymbolSet>();
2401 }
2402
getClassMembers(ProgramStateRef State) const2403 SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
2404 if (const SymbolSet *Members = State->get<ClassMembers>(*this))
2405 return *Members;
2406
2407 // This class is trivial, so we need to construct a set
2408 // with just that one symbol from the class.
2409 SymbolSet::Factory &F = getMembersFactory(State);
2410 return F.add(F.getEmptySet(), getRepresentativeSymbol());
2411 }
2412
isTrivial(ProgramStateRef State) const2413 bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
2414 return State->get<ClassMembers>(*this) == nullptr;
2415 }
2416
isTriviallyDead(ProgramStateRef State,SymbolReaper & Reaper) const2417 bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
2418 SymbolReaper &Reaper) const {
2419 return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
2420 }
2421
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,SymbolRef First,SymbolRef Second)2422 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2423 ProgramStateRef State,
2424 SymbolRef First,
2425 SymbolRef Second) {
2426 return markDisequal(RF, State, find(State, First), find(State, Second));
2427 }
2428
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2429 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2430 ProgramStateRef State,
2431 EquivalenceClass First,
2432 EquivalenceClass Second) {
2433 return First.markDisequal(RF, State, Second);
2434 }
2435
2436 inline ProgramStateRef
markDisequal(RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass Other) const2437 EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
2438 EquivalenceClass Other) const {
2439 // If we know that two classes are equal, we can only produce an infeasible
2440 // state.
2441 if (*this == Other) {
2442 return nullptr;
2443 }
2444
2445 DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2446 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2447
2448 // Disequality is a symmetric relation, so if we mark A as disequal to B,
2449 // we should also mark B as disequalt to A.
2450 if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
2451 Other) ||
2452 !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
2453 *this))
2454 return nullptr;
2455
2456 assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2457 "a state with infeasible constraints");
2458
2459 State = State->set<DisequalityMap>(DisequalityInfo);
2460 State = State->set<ConstraintRange>(Constraints);
2461
2462 return State;
2463 }
2464
addToDisequalityInfo(DisequalityMapTy & Info,ConstraintRangeTy & Constraints,RangeSet::Factory & RF,ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2465 inline bool EquivalenceClass::addToDisequalityInfo(
2466 DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
2467 RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
2468 EquivalenceClass Second) {
2469
2470 // 1. Get all of the required factories.
2471 DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
2472 ClassSet::Factory &CF = State->get_context<ClassSet>();
2473 ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2474
2475 // 2. Add Second to the set of classes disequal to First.
2476 const ClassSet *CurrentSet = Info.lookup(First);
2477 ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
2478 NewSet = CF.add(NewSet, Second);
2479
2480 Info = F.add(Info, First, NewSet);
2481
2482 // 3. If Second is known to be a constant, we can delete this point
2483 // from the constraint asociated with First.
2484 //
2485 // So, if Second == 10, it means that First != 10.
2486 // At the same time, the same logic does not apply to ranges.
2487 if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
2488 if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
2489
2490 RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
2491 RF, State, First.getRepresentativeSymbol());
2492
2493 FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
2494
2495 // If the First class is about to be constrained with an empty
2496 // range-set, the state is infeasible.
2497 if (FirstConstraint.isEmpty())
2498 return false;
2499
2500 Constraints = CRF.add(Constraints, First, FirstConstraint);
2501 }
2502
2503 return true;
2504 }
2505
areEqual(ProgramStateRef State,SymbolRef FirstSym,SymbolRef SecondSym)2506 inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2507 SymbolRef FirstSym,
2508 SymbolRef SecondSym) {
2509 return EquivalenceClass::areEqual(State, find(State, FirstSym),
2510 find(State, SecondSym));
2511 }
2512
areEqual(ProgramStateRef State,EquivalenceClass First,EquivalenceClass Second)2513 inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2514 EquivalenceClass First,
2515 EquivalenceClass Second) {
2516 // The same equivalence class => symbols are equal.
2517 if (First == Second)
2518 return true;
2519
2520 // Let's check if we know anything about these two classes being not equal to
2521 // each other.
2522 ClassSet DisequalToFirst = First.getDisequalClasses(State);
2523 if (DisequalToFirst.contains(Second))
2524 return false;
2525
2526 // It is not clear.
2527 return llvm::None;
2528 }
2529
2530 LLVM_NODISCARD ProgramStateRef
removeMember(ProgramStateRef State,const SymbolRef Old)2531 EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
2532
2533 SymbolSet ClsMembers = getClassMembers(State);
2534 assert(ClsMembers.contains(Old));
2535
2536 // Remove `Old`'s Class->Sym relation.
2537 SymbolSet::Factory &F = getMembersFactory(State);
2538 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2539 ClsMembers = F.remove(ClsMembers, Old);
2540 // Ensure another precondition of the removeMember function (we can check
2541 // this only with isEmpty, thus we have to do the remove first).
2542 assert(!ClsMembers.isEmpty() &&
2543 "Class should have had at least two members before member removal");
2544 // Overwrite the existing members assigned to this class.
2545 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2546 ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
2547 State = State->set<ClassMembers>(ClassMembersMap);
2548
2549 // Remove `Old`'s Sym->Class relation.
2550 ClassMapTy Classes = State->get<ClassMap>();
2551 ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2552 Classes = CMF.remove(Classes, Old);
2553 State = State->set<ClassMap>(Classes);
2554
2555 return State;
2556 }
2557
2558 // Re-evaluate an SVal with top-level `State->assume` logic.
reAssume(ProgramStateRef State,const RangeSet * Constraint,SVal TheValue)2559 LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
2560 const RangeSet *Constraint,
2561 SVal TheValue) {
2562 if (!Constraint)
2563 return State;
2564
2565 const auto DefinedVal = TheValue.castAs<DefinedSVal>();
2566
2567 // If the SVal is 0, we can simply interpret that as `false`.
2568 if (Constraint->encodesFalseRange())
2569 return State->assume(DefinedVal, false);
2570
2571 // If the constraint does not encode 0 then we can interpret that as `true`
2572 // AND as a Range(Set).
2573 if (Constraint->encodesTrueRange()) {
2574 State = State->assume(DefinedVal, true);
2575 if (!State)
2576 return nullptr;
2577 // Fall through, re-assume based on the range values as well.
2578 }
2579 // Overestimate the individual Ranges with the RangeSet' lowest and
2580 // highest values.
2581 return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
2582 Constraint->getMaxValue(), true);
2583 }
2584
2585 // Iterate over all symbols and try to simplify them. Once a symbol is
2586 // simplified then we check if we can merge the simplified symbol's equivalence
2587 // class to this class. This way, we simplify not just the symbols but the
2588 // classes as well: we strive to keep the number of the classes to be the
2589 // absolute minimum.
2590 LLVM_NODISCARD ProgramStateRef
simplify(SValBuilder & SVB,RangeSet::Factory & F,ProgramStateRef State,EquivalenceClass Class)2591 EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
2592 ProgramStateRef State, EquivalenceClass Class) {
2593 SymbolSet ClassMembers = Class.getClassMembers(State);
2594 for (const SymbolRef &MemberSym : ClassMembers) {
2595
2596 const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
2597 const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
2598
2599 // The symbol is collapsed to a constant, check if the current State is
2600 // still feasible.
2601 if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
2602 const llvm::APSInt &SV = CI->getValue();
2603 const RangeSet *ClassConstraint = getConstraint(State, Class);
2604 // We have found a contradiction.
2605 if (ClassConstraint && !ClassConstraint->contains(SV))
2606 return nullptr;
2607 }
2608
2609 if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
2610 // The simplified symbol should be the member of the original Class,
2611 // however, it might be in another existing class at the moment. We
2612 // have to merge these classes.
2613 ProgramStateRef OldState = State;
2614 State = merge(F, State, MemberSym, SimplifiedMemberSym);
2615 if (!State)
2616 return nullptr;
2617 // No state change, no merge happened actually.
2618 if (OldState == State)
2619 continue;
2620
2621 assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
2622 // Remove the old and more complex symbol.
2623 State = find(State, MemberSym).removeMember(State, MemberSym);
2624
2625 // Query the class constraint again b/c that may have changed during the
2626 // merge above.
2627 const RangeSet *ClassConstraint = getConstraint(State, Class);
2628
2629 // Re-evaluate an SVal with top-level `State->assume`, this ignites
2630 // a RECURSIVE algorithm that will reach a FIXPOINT.
2631 //
2632 // About performance and complexity: Let us assume that in a State we
2633 // have N non-trivial equivalence classes and that all constraints and
2634 // disequality info is related to non-trivial classes. In the worst case,
2635 // we can simplify only one symbol of one class in each iteration. The
2636 // number of symbols in one class cannot grow b/c we replace the old
2637 // symbol with the simplified one. Also, the number of the equivalence
2638 // classes can decrease only, b/c the algorithm does a merge operation
2639 // optionally. We need N iterations in this case to reach the fixpoint.
2640 // Thus, the steps needed to be done in the worst case is proportional to
2641 // N*N.
2642 //
2643 // This worst case scenario can be extended to that case when we have
2644 // trivial classes in the constraints and in the disequality map. This
2645 // case can be reduced to the case with a State where there are only
2646 // non-trivial classes. This is because a merge operation on two trivial
2647 // classes results in one non-trivial class.
2648 State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
2649 if (!State)
2650 return nullptr;
2651 }
2652 }
2653 return State;
2654 }
2655
getDisequalClasses(ProgramStateRef State,SymbolRef Sym)2656 inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
2657 SymbolRef Sym) {
2658 return find(State, Sym).getDisequalClasses(State);
2659 }
2660
2661 inline ClassSet
getDisequalClasses(ProgramStateRef State) const2662 EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
2663 return getDisequalClasses(State->get<DisequalityMap>(),
2664 State->get_context<ClassSet>());
2665 }
2666
2667 inline ClassSet
getDisequalClasses(DisequalityMapTy Map,ClassSet::Factory & Factory) const2668 EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
2669 ClassSet::Factory &Factory) const {
2670 if (const ClassSet *DisequalClasses = Map.lookup(*this))
2671 return *DisequalClasses;
2672
2673 return Factory.getEmptySet();
2674 }
2675
isClassDataConsistent(ProgramStateRef State)2676 bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
2677 ClassMembersTy Members = State->get<ClassMembers>();
2678
2679 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
2680 for (SymbolRef Member : ClassMembersPair.second) {
2681 // Every member of the class should have a mapping back to the class.
2682 if (find(State, Member) == ClassMembersPair.first) {
2683 continue;
2684 }
2685
2686 return false;
2687 }
2688 }
2689
2690 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2691 for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
2692 EquivalenceClass Class = DisequalityInfo.first;
2693 ClassSet DisequalClasses = DisequalityInfo.second;
2694
2695 // There is no use in keeping empty sets in the map.
2696 if (DisequalClasses.isEmpty())
2697 return false;
2698
2699 // Disequality is symmetrical, i.e. for every Class A and B that A != B,
2700 // B != A should also be true.
2701 for (EquivalenceClass DisequalClass : DisequalClasses) {
2702 const ClassSet *DisequalToDisequalClasses =
2703 Disequalities.lookup(DisequalClass);
2704
2705 // It should be a set of at least one element: Class
2706 if (!DisequalToDisequalClasses ||
2707 !DisequalToDisequalClasses->contains(Class))
2708 return false;
2709 }
2710 }
2711
2712 return true;
2713 }
2714
2715 //===----------------------------------------------------------------------===//
2716 // RangeConstraintManager implementation
2717 //===----------------------------------------------------------------------===//
2718
canReasonAbout(SVal X) const2719 bool RangeConstraintManager::canReasonAbout(SVal X) const {
2720 Optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
2721 if (SymVal && SymVal->isExpression()) {
2722 const SymExpr *SE = SymVal->getSymbol();
2723
2724 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
2725 switch (SIE->getOpcode()) {
2726 // We don't reason yet about bitwise-constraints on symbolic values.
2727 case BO_And:
2728 case BO_Or:
2729 case BO_Xor:
2730 return false;
2731 // We don't reason yet about these arithmetic constraints on
2732 // symbolic values.
2733 case BO_Mul:
2734 case BO_Div:
2735 case BO_Rem:
2736 case BO_Shl:
2737 case BO_Shr:
2738 return false;
2739 // All other cases.
2740 default:
2741 return true;
2742 }
2743 }
2744
2745 if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
2746 // FIXME: Handle <=> here.
2747 if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
2748 BinaryOperator::isRelationalOp(SSE->getOpcode())) {
2749 // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
2750 // We've recently started producing Loc <> NonLoc comparisons (that
2751 // result from casts of one of the operands between eg. intptr_t and
2752 // void *), but we can't reason about them yet.
2753 if (Loc::isLocType(SSE->getLHS()->getType())) {
2754 return Loc::isLocType(SSE->getRHS()->getType());
2755 }
2756 }
2757 }
2758
2759 return false;
2760 }
2761
2762 return true;
2763 }
2764
checkNull(ProgramStateRef State,SymbolRef Sym)2765 ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
2766 SymbolRef Sym) {
2767 const RangeSet *Ranges = getConstraint(State, Sym);
2768
2769 // If we don't have any information about this symbol, it's underconstrained.
2770 if (!Ranges)
2771 return ConditionTruthVal();
2772
2773 // If we have a concrete value, see if it's zero.
2774 if (const llvm::APSInt *Value = Ranges->getConcreteValue())
2775 return *Value == 0;
2776
2777 BasicValueFactory &BV = getBasicVals();
2778 APSIntType IntType = BV.getAPSIntType(Sym->getType());
2779 llvm::APSInt Zero = IntType.getZeroValue();
2780
2781 // Check if zero is in the set of possible values.
2782 if (!Ranges->contains(Zero))
2783 return false;
2784
2785 // Zero is a possible value, but it is not the /only/ possible value.
2786 return ConditionTruthVal();
2787 }
2788
getSymVal(ProgramStateRef St,SymbolRef Sym) const2789 const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
2790 SymbolRef Sym) const {
2791 const RangeSet *T = getConstraint(St, Sym);
2792 return T ? T->getConcreteValue() : nullptr;
2793 }
2794
2795 //===----------------------------------------------------------------------===//
2796 // Remove dead symbols from existing constraints
2797 //===----------------------------------------------------------------------===//
2798
2799 /// Scan all symbols referenced by the constraints. If the symbol is not alive
2800 /// as marked in LSymbols, mark it as dead in DSymbols.
2801 ProgramStateRef
removeDeadBindings(ProgramStateRef State,SymbolReaper & SymReaper)2802 RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
2803 SymbolReaper &SymReaper) {
2804 ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2805 ClassMembersTy NewClassMembersMap = ClassMembersMap;
2806 ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2807 SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
2808
2809 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2810 ConstraintRangeTy NewConstraints = Constraints;
2811 ConstraintRangeTy::Factory &ConstraintFactory =
2812 State->get_context<ConstraintRange>();
2813
2814 ClassMapTy Map = State->get<ClassMap>();
2815 ClassMapTy NewMap = Map;
2816 ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
2817
2818 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2819 DisequalityMapTy::Factory &DisequalityFactory =
2820 State->get_context<DisequalityMap>();
2821 ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
2822
2823 bool ClassMapChanged = false;
2824 bool MembersMapChanged = false;
2825 bool ConstraintMapChanged = false;
2826 bool DisequalitiesChanged = false;
2827
2828 auto removeDeadClass = [&](EquivalenceClass Class) {
2829 // Remove associated constraint ranges.
2830 Constraints = ConstraintFactory.remove(Constraints, Class);
2831 ConstraintMapChanged = true;
2832
2833 // Update disequality information to not hold any information on the
2834 // removed class.
2835 ClassSet DisequalClasses =
2836 Class.getDisequalClasses(Disequalities, ClassSetFactory);
2837 if (!DisequalClasses.isEmpty()) {
2838 for (EquivalenceClass DisequalClass : DisequalClasses) {
2839 ClassSet DisequalToDisequalSet =
2840 DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
2841 // DisequalToDisequalSet is guaranteed to be non-empty for consistent
2842 // disequality info.
2843 assert(!DisequalToDisequalSet.isEmpty());
2844 ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
2845
2846 // No need in keeping an empty set.
2847 if (NewSet.isEmpty()) {
2848 Disequalities =
2849 DisequalityFactory.remove(Disequalities, DisequalClass);
2850 } else {
2851 Disequalities =
2852 DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
2853 }
2854 }
2855 // Remove the data for the class
2856 Disequalities = DisequalityFactory.remove(Disequalities, Class);
2857 DisequalitiesChanged = true;
2858 }
2859 };
2860
2861 // 1. Let's see if dead symbols are trivial and have associated constraints.
2862 for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
2863 Constraints) {
2864 EquivalenceClass Class = ClassConstraintPair.first;
2865 if (Class.isTriviallyDead(State, SymReaper)) {
2866 // If this class is trivial, we can remove its constraints right away.
2867 removeDeadClass(Class);
2868 }
2869 }
2870
2871 // 2. We don't need to track classes for dead symbols.
2872 for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
2873 SymbolRef Sym = SymbolClassPair.first;
2874
2875 if (SymReaper.isDead(Sym)) {
2876 ClassMapChanged = true;
2877 NewMap = ClassFactory.remove(NewMap, Sym);
2878 }
2879 }
2880
2881 // 3. Remove dead members from classes and remove dead non-trivial classes
2882 // and their constraints.
2883 for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
2884 ClassMembersMap) {
2885 EquivalenceClass Class = ClassMembersPair.first;
2886 SymbolSet LiveMembers = ClassMembersPair.second;
2887 bool MembersChanged = false;
2888
2889 for (SymbolRef Member : ClassMembersPair.second) {
2890 if (SymReaper.isDead(Member)) {
2891 MembersChanged = true;
2892 LiveMembers = SetFactory.remove(LiveMembers, Member);
2893 }
2894 }
2895
2896 // Check if the class changed.
2897 if (!MembersChanged)
2898 continue;
2899
2900 MembersMapChanged = true;
2901
2902 if (LiveMembers.isEmpty()) {
2903 // The class is dead now, we need to wipe it out of the members map...
2904 NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
2905
2906 // ...and remove all of its constraints.
2907 removeDeadClass(Class);
2908 } else {
2909 // We need to change the members associated with the class.
2910 NewClassMembersMap =
2911 EMFactory.add(NewClassMembersMap, Class, LiveMembers);
2912 }
2913 }
2914
2915 // 4. Update the state with new maps.
2916 //
2917 // Here we try to be humble and update a map only if it really changed.
2918 if (ClassMapChanged)
2919 State = State->set<ClassMap>(NewMap);
2920
2921 if (MembersMapChanged)
2922 State = State->set<ClassMembers>(NewClassMembersMap);
2923
2924 if (ConstraintMapChanged)
2925 State = State->set<ConstraintRange>(Constraints);
2926
2927 if (DisequalitiesChanged)
2928 State = State->set<DisequalityMap>(Disequalities);
2929
2930 assert(EquivalenceClass::isClassDataConsistent(State));
2931
2932 return State;
2933 }
2934
getRange(ProgramStateRef State,SymbolRef Sym)2935 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
2936 SymbolRef Sym) {
2937 return SymbolicRangeInferrer::inferRange(F, State, Sym);
2938 }
2939
setRange(ProgramStateRef State,SymbolRef Sym,RangeSet Range)2940 ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
2941 SymbolRef Sym,
2942 RangeSet Range) {
2943 return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
2944 }
2945
2946 //===------------------------------------------------------------------------===
2947 // assumeSymX methods: protected interface for RangeConstraintManager.
2948 //===------------------------------------------------------------------------===/
2949
2950 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
2951 // and (x, y) for open ranges. These ranges are modular, corresponding with
2952 // a common treatment of C integer overflow. This means that these methods
2953 // do not have to worry about overflow; RangeSet::Intersect can handle such a
2954 // "wraparound" range.
2955 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
2956 // UINT_MAX, 0, 1, and 2.
2957
2958 ProgramStateRef
assumeSymNE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)2959 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
2960 const llvm::APSInt &Int,
2961 const llvm::APSInt &Adjustment) {
2962 // Before we do any real work, see if the value can even show up.
2963 APSIntType AdjustmentType(Adjustment);
2964 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
2965 return St;
2966
2967 llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
2968 RangeSet New = getRange(St, Sym);
2969 New = F.deletePoint(New, Point);
2970
2971 return setRange(St, Sym, New);
2972 }
2973
2974 ProgramStateRef
assumeSymEQ(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)2975 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
2976 const llvm::APSInt &Int,
2977 const llvm::APSInt &Adjustment) {
2978 // Before we do any real work, see if the value can even show up.
2979 APSIntType AdjustmentType(Adjustment);
2980 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
2981 return nullptr;
2982
2983 // [Int-Adjustment, Int-Adjustment]
2984 llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
2985 RangeSet New = getRange(St, Sym);
2986 New = F.intersect(New, AdjInt);
2987
2988 return setRange(St, Sym, New);
2989 }
2990
getSymLTRange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)2991 RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
2992 SymbolRef Sym,
2993 const llvm::APSInt &Int,
2994 const llvm::APSInt &Adjustment) {
2995 // Before we do any real work, see if the value can even show up.
2996 APSIntType AdjustmentType(Adjustment);
2997 switch (AdjustmentType.testInRange(Int, true)) {
2998 case APSIntType::RTR_Below:
2999 return F.getEmptySet();
3000 case APSIntType::RTR_Within:
3001 break;
3002 case APSIntType::RTR_Above:
3003 return getRange(St, Sym);
3004 }
3005
3006 // Special case for Int == Min. This is always false.
3007 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3008 llvm::APSInt Min = AdjustmentType.getMinValue();
3009 if (ComparisonVal == Min)
3010 return F.getEmptySet();
3011
3012 llvm::APSInt Lower = Min - Adjustment;
3013 llvm::APSInt Upper = ComparisonVal - Adjustment;
3014 --Upper;
3015
3016 RangeSet Result = getRange(St, Sym);
3017 return F.intersect(Result, Lower, Upper);
3018 }
3019
3020 ProgramStateRef
assumeSymLT(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3021 RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
3022 const llvm::APSInt &Int,
3023 const llvm::APSInt &Adjustment) {
3024 RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
3025 return setRange(St, Sym, New);
3026 }
3027
getSymGTRange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3028 RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
3029 SymbolRef Sym,
3030 const llvm::APSInt &Int,
3031 const llvm::APSInt &Adjustment) {
3032 // Before we do any real work, see if the value can even show up.
3033 APSIntType AdjustmentType(Adjustment);
3034 switch (AdjustmentType.testInRange(Int, true)) {
3035 case APSIntType::RTR_Below:
3036 return getRange(St, Sym);
3037 case APSIntType::RTR_Within:
3038 break;
3039 case APSIntType::RTR_Above:
3040 return F.getEmptySet();
3041 }
3042
3043 // Special case for Int == Max. This is always false.
3044 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3045 llvm::APSInt Max = AdjustmentType.getMaxValue();
3046 if (ComparisonVal == Max)
3047 return F.getEmptySet();
3048
3049 llvm::APSInt Lower = ComparisonVal - Adjustment;
3050 llvm::APSInt Upper = Max - Adjustment;
3051 ++Lower;
3052
3053 RangeSet SymRange = getRange(St, Sym);
3054 return F.intersect(SymRange, Lower, Upper);
3055 }
3056
3057 ProgramStateRef
assumeSymGT(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3058 RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
3059 const llvm::APSInt &Int,
3060 const llvm::APSInt &Adjustment) {
3061 RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
3062 return setRange(St, Sym, New);
3063 }
3064
getSymGERange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3065 RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
3066 SymbolRef Sym,
3067 const llvm::APSInt &Int,
3068 const llvm::APSInt &Adjustment) {
3069 // Before we do any real work, see if the value can even show up.
3070 APSIntType AdjustmentType(Adjustment);
3071 switch (AdjustmentType.testInRange(Int, true)) {
3072 case APSIntType::RTR_Below:
3073 return getRange(St, Sym);
3074 case APSIntType::RTR_Within:
3075 break;
3076 case APSIntType::RTR_Above:
3077 return F.getEmptySet();
3078 }
3079
3080 // Special case for Int == Min. This is always feasible.
3081 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3082 llvm::APSInt Min = AdjustmentType.getMinValue();
3083 if (ComparisonVal == Min)
3084 return getRange(St, Sym);
3085
3086 llvm::APSInt Max = AdjustmentType.getMaxValue();
3087 llvm::APSInt Lower = ComparisonVal - Adjustment;
3088 llvm::APSInt Upper = Max - Adjustment;
3089
3090 RangeSet SymRange = getRange(St, Sym);
3091 return F.intersect(SymRange, Lower, Upper);
3092 }
3093
3094 ProgramStateRef
assumeSymGE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3095 RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
3096 const llvm::APSInt &Int,
3097 const llvm::APSInt &Adjustment) {
3098 RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
3099 return setRange(St, Sym, New);
3100 }
3101
3102 RangeSet
getSymLERange(llvm::function_ref<RangeSet ()> RS,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3103 RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
3104 const llvm::APSInt &Int,
3105 const llvm::APSInt &Adjustment) {
3106 // Before we do any real work, see if the value can even show up.
3107 APSIntType AdjustmentType(Adjustment);
3108 switch (AdjustmentType.testInRange(Int, true)) {
3109 case APSIntType::RTR_Below:
3110 return F.getEmptySet();
3111 case APSIntType::RTR_Within:
3112 break;
3113 case APSIntType::RTR_Above:
3114 return RS();
3115 }
3116
3117 // Special case for Int == Max. This is always feasible.
3118 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3119 llvm::APSInt Max = AdjustmentType.getMaxValue();
3120 if (ComparisonVal == Max)
3121 return RS();
3122
3123 llvm::APSInt Min = AdjustmentType.getMinValue();
3124 llvm::APSInt Lower = Min - Adjustment;
3125 llvm::APSInt Upper = ComparisonVal - Adjustment;
3126
3127 RangeSet Default = RS();
3128 return F.intersect(Default, Lower, Upper);
3129 }
3130
getSymLERange(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3131 RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
3132 SymbolRef Sym,
3133 const llvm::APSInt &Int,
3134 const llvm::APSInt &Adjustment) {
3135 return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
3136 }
3137
3138 ProgramStateRef
assumeSymLE(ProgramStateRef St,SymbolRef Sym,const llvm::APSInt & Int,const llvm::APSInt & Adjustment)3139 RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
3140 const llvm::APSInt &Int,
3141 const llvm::APSInt &Adjustment) {
3142 RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
3143 return setRange(St, Sym, New);
3144 }
3145
assumeSymWithinInclusiveRange(ProgramStateRef State,SymbolRef Sym,const llvm::APSInt & From,const llvm::APSInt & To,const llvm::APSInt & Adjustment)3146 ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
3147 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3148 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3149 RangeSet New = getSymGERange(State, Sym, From, Adjustment);
3150 if (New.isEmpty())
3151 return nullptr;
3152 RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
3153 return setRange(State, Sym, Out);
3154 }
3155
assumeSymOutsideInclusiveRange(ProgramStateRef State,SymbolRef Sym,const llvm::APSInt & From,const llvm::APSInt & To,const llvm::APSInt & Adjustment)3156 ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
3157 ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3158 const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3159 RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
3160 RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
3161 RangeSet New(F.add(RangeLT, RangeGT));
3162 return setRange(State, Sym, New);
3163 }
3164
3165 //===----------------------------------------------------------------------===//
3166 // Pretty-printing.
3167 //===----------------------------------------------------------------------===//
3168
printJson(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3169 void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
3170 const char *NL, unsigned int Space,
3171 bool IsDot) const {
3172 printConstraints(Out, State, NL, Space, IsDot);
3173 printEquivalenceClasses(Out, State, NL, Space, IsDot);
3174 printDisequalities(Out, State, NL, Space, IsDot);
3175 }
3176
printValue(raw_ostream & Out,ProgramStateRef State,SymbolRef Sym)3177 void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
3178 SymbolRef Sym) {
3179 const RangeSet RS = getRange(State, Sym);
3180 Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
3181 RS.dump(Out);
3182 }
3183
toString(const SymbolRef & Sym)3184 static std::string toString(const SymbolRef &Sym) {
3185 std::string S;
3186 llvm::raw_string_ostream O(S);
3187 Sym->dumpToStream(O);
3188 return O.str();
3189 }
3190
printConstraints(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3191 void RangeConstraintManager::printConstraints(raw_ostream &Out,
3192 ProgramStateRef State,
3193 const char *NL,
3194 unsigned int Space,
3195 bool IsDot) const {
3196 ConstraintRangeTy Constraints = State->get<ConstraintRange>();
3197
3198 Indent(Out, Space, IsDot) << "\"constraints\": ";
3199 if (Constraints.isEmpty()) {
3200 Out << "null," << NL;
3201 return;
3202 }
3203
3204 std::map<std::string, RangeSet> OrderedConstraints;
3205 for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
3206 SymbolSet ClassMembers = P.first.getClassMembers(State);
3207 for (const SymbolRef &ClassMember : ClassMembers) {
3208 bool insertion_took_place;
3209 std::tie(std::ignore, insertion_took_place) =
3210 OrderedConstraints.insert({toString(ClassMember), P.second});
3211 assert(insertion_took_place &&
3212 "two symbols should not have the same dump");
3213 }
3214 }
3215
3216 ++Space;
3217 Out << '[' << NL;
3218 bool First = true;
3219 for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
3220 if (First) {
3221 First = false;
3222 } else {
3223 Out << ',';
3224 Out << NL;
3225 }
3226 Indent(Out, Space, IsDot)
3227 << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
3228 P.second.dump(Out);
3229 Out << "\" }";
3230 }
3231 Out << NL;
3232
3233 --Space;
3234 Indent(Out, Space, IsDot) << "]," << NL;
3235 }
3236
toString(ProgramStateRef State,EquivalenceClass Class)3237 static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
3238 SymbolSet ClassMembers = Class.getClassMembers(State);
3239 llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
3240 ClassMembers.end());
3241 llvm::sort(ClassMembersSorted,
3242 [](const SymbolRef &LHS, const SymbolRef &RHS) {
3243 return toString(LHS) < toString(RHS);
3244 });
3245
3246 bool FirstMember = true;
3247
3248 std::string Str;
3249 llvm::raw_string_ostream Out(Str);
3250 Out << "[ ";
3251 for (SymbolRef ClassMember : ClassMembersSorted) {
3252 if (FirstMember)
3253 FirstMember = false;
3254 else
3255 Out << ", ";
3256 Out << "\"" << ClassMember << "\"";
3257 }
3258 Out << " ]";
3259 return Out.str();
3260 }
3261
printEquivalenceClasses(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3262 void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
3263 ProgramStateRef State,
3264 const char *NL,
3265 unsigned int Space,
3266 bool IsDot) const {
3267 ClassMembersTy Members = State->get<ClassMembers>();
3268
3269 Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
3270 if (Members.isEmpty()) {
3271 Out << "null," << NL;
3272 return;
3273 }
3274
3275 std::set<std::string> MembersStr;
3276 for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
3277 MembersStr.insert(toString(State, ClassToSymbolSet.first));
3278
3279 ++Space;
3280 Out << '[' << NL;
3281 bool FirstClass = true;
3282 for (const std::string &Str : MembersStr) {
3283 if (FirstClass) {
3284 FirstClass = false;
3285 } else {
3286 Out << ',';
3287 Out << NL;
3288 }
3289 Indent(Out, Space, IsDot);
3290 Out << Str;
3291 }
3292 Out << NL;
3293
3294 --Space;
3295 Indent(Out, Space, IsDot) << "]," << NL;
3296 }
3297
printDisequalities(raw_ostream & Out,ProgramStateRef State,const char * NL,unsigned int Space,bool IsDot) const3298 void RangeConstraintManager::printDisequalities(raw_ostream &Out,
3299 ProgramStateRef State,
3300 const char *NL,
3301 unsigned int Space,
3302 bool IsDot) const {
3303 DisequalityMapTy Disequalities = State->get<DisequalityMap>();
3304
3305 Indent(Out, Space, IsDot) << "\"disequality_info\": ";
3306 if (Disequalities.isEmpty()) {
3307 Out << "null," << NL;
3308 return;
3309 }
3310
3311 // Transform the disequality info to an ordered map of
3312 // [string -> (ordered set of strings)]
3313 using EqClassesStrTy = std::set<std::string>;
3314 using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
3315 DisequalityInfoStrTy DisequalityInfoStr;
3316 for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
3317 EquivalenceClass Class = ClassToDisEqSet.first;
3318 ClassSet DisequalClasses = ClassToDisEqSet.second;
3319 EqClassesStrTy MembersStr;
3320 for (EquivalenceClass DisEqClass : DisequalClasses)
3321 MembersStr.insert(toString(State, DisEqClass));
3322 DisequalityInfoStr.insert({toString(State, Class), MembersStr});
3323 }
3324
3325 ++Space;
3326 Out << '[' << NL;
3327 bool FirstClass = true;
3328 for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
3329 DisequalityInfoStr) {
3330 const std::string &Class = ClassToDisEqSet.first;
3331 if (FirstClass) {
3332 FirstClass = false;
3333 } else {
3334 Out << ',';
3335 Out << NL;
3336 }
3337 Indent(Out, Space, IsDot) << "{" << NL;
3338 unsigned int DisEqSpace = Space + 1;
3339 Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
3340 Out << Class;
3341 const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
3342 if (!DisequalClasses.empty()) {
3343 Out << "," << NL;
3344 Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
3345 unsigned int DisEqClassSpace = DisEqSpace + 1;
3346 Indent(Out, DisEqClassSpace, IsDot);
3347 bool FirstDisEqClass = true;
3348 for (const std::string &DisEqClass : DisequalClasses) {
3349 if (FirstDisEqClass) {
3350 FirstDisEqClass = false;
3351 } else {
3352 Out << ',' << NL;
3353 Indent(Out, DisEqClassSpace, IsDot);
3354 }
3355 Out << DisEqClass;
3356 }
3357 Out << "]" << NL;
3358 }
3359 Indent(Out, Space, IsDot) << "}";
3360 }
3361 Out << NL;
3362
3363 --Space;
3364 Indent(Out, Space, IsDot) << "]," << NL;
3365 }
3366