Transforms/Instrumentation/PoisonChecking.cpp

//===- PoisonChecking.cpp - -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Implements a transform pass which instruments IR such that poison semantics
// are made explicit.  That is, it provides a (possibly partial) executable
// semantics for every instruction w.r.t. poison as specified in the LLVM
// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
// is focused purely on the semantics of LLVM IR, not any particular source
// language.   If you're looking for something to see if your C/C++ contains
// UB, this is not it.
//
// The rewritten semantics of each instruction will include the following
// components:
//
// 1) The original instruction, unmodified.
// 2) A propagation rule which translates dynamic information about the poison
//    state of each input to whether the dynamic output of the instruction
//    produces poison.
// 3) A flag validation rule which validates any poison producing flags on the
//    instruction itself (e.g. checks for overflow on nsw).
// 4) A check rule which traps (to a handler function) if this instruction must
//    execute undefined behavior given the poison state of it's inputs.
//
// At the moment, the UB detection is done in a best effort manner; that is,
// the resulting code may produce a false negative result (not report UB when
// it actually exists according to the LangRef spec), but should never produce
// a false positive (report UB where it doesn't exist).  The intention is to
// eventually support a "strict" mode which never dynamically reports a false
// negative at the cost of rejecting some valid inputs to translation.
//
// Use cases for this pass include:
// - Understanding (and testing!) the implications of the definition of poison
//   from the LangRef.
// - Validating the output of a IR fuzzer to ensure that all programs produced
//   are well defined on the specific input used.
// - Finding/confirming poison specific miscompiles by checking the poison
//   status of an input/IR pair is the same before and after an optimization
//   transform.
// - Checking that a bugpoint reduction does not introduce UB which didn't
//   exist in the original program being reduced.
//
// The major sources of inaccuracy are currently:
// - Most validation rules not yet implemented for instructions with poison
//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
// - UB which is control dependent on a branch on poison is not yet
//   reported. Currently, only data flow dependence is modeled.
// - Poison which is propagated through memory is not modeled.  As such,
//   storing poison to memory and then reloading it will cause a false negative
//   as we consider the reloaded value to not be poisoned.
// - Poison propagation across function boundaries is not modeled.  At the
//   moment, all arguments and return values are assumed not to be poison.
// - Undef is not modeled.  In particular, the optimizer's freedom to pick
//   concrete values for undef bits so as to maximize potential for producing
//   poison is not modeled.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"

using namespace llvm;

#define DEBUG_TYPE "poison-checking"

static cl::opt<bool>
LocalCheck("poison-checking-function-local",
           cl::init(false),
           cl::desc("Check that returns are non-poison (for testing)"));


static bool isConstantFalse(Value* V) {
  assert(V->getType()->isIntegerTy(1));
  if (auto *CI = dyn_cast<ConstantInt>(V))
    return CI->isZero();
  return false;
}

static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
  if (Ops.size() == 0)
    return B.getFalse();
  unsigned i = 0;
  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
  if (i == Ops.size())
    return B.getFalse();
  Value *Accum = Ops[i++];
  for (; i < Ops.size(); i++)
    if (!isConstantFalse(Ops[i]))
      Accum = B.CreateOr(Accum, Ops[i]);
  return Accum;
}

static void generatePoisonChecksForBinOp(Instruction &I,
                                         SmallVector<Value*, 2> &Checks) {
  assert(isa<BinaryOperator>(I));

  IRBuilder<> B(&I);
  Value *LHS = I.getOperand(0);
  Value *RHS = I.getOperand(1);
  switch (I.getOpcode()) {
  default:
    return;
  case Instruction::Add: {
    if (I.hasNoSignedWrap()) {
      auto *OverflowOp =
        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    }
    if (I.hasNoUnsignedWrap()) {
      auto *OverflowOp =
        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    }
    break;
  }
  case Instruction::Sub: {
    if (I.hasNoSignedWrap()) {
      auto *OverflowOp =
        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    }
    if (I.hasNoUnsignedWrap()) {
      auto *OverflowOp =
        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    }
    break;
  }
  case Instruction::Mul: {
    if (I.hasNoSignedWrap()) {
      auto *OverflowOp =
        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    }
    if (I.hasNoUnsignedWrap()) {
      auto *OverflowOp =
        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
    }
    break;
  }
  };
}

static Value* generatePoisonChecks(Instruction &I) {
  IRBuilder<> B(&I);
  SmallVector<Value*, 2> Checks;
  if (isa<BinaryOperator>(I))
    generatePoisonChecksForBinOp(I, Checks);
  return buildOrChain(B, Checks);
}

static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
  auto Itr = ValToPoison.find(V);
  if (Itr != ValToPoison.end())
    return Itr->second;
  if (isa<Constant>(V)) {
    return ConstantInt::getFalse(V->getContext());
  }
  // Return false for unknwon values - this implements a non-strict mode where
  // unhandled IR constructs are simply considered to never produce poison.  At
  // some point in the future, we probably want a "strict mode" for testing if
  // nothing else.
  return ConstantInt::getFalse(V->getContext());
}

static void CreateAssert(IRBuilder<> &B, Value *Cond) {
  assert(Cond->getType()->isIntegerTy(1));
  if (auto *CI = dyn_cast<ConstantInt>(Cond))
    if (CI->isAllOnesValue())
      return;

  Module *M = B.GetInsertBlock()->getModule();
  M->getOrInsertFunction("__poison_checker_assert",
                         Type::getVoidTy(M->getContext()),
                         Type::getInt1Ty(M->getContext()));
  Function *TrapFunc = M->getFunction("__poison_checker_assert");
  B.CreateCall(TrapFunc, Cond);
}

static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
  assert(Cond->getType()->isIntegerTy(1));
  CreateAssert(B, B.CreateNot(Cond));
}

static bool rewrite(Function &F) {
  auto * const Int1Ty = Type::getInt1Ty(F.getContext());

  DenseMap<Value *, Value *> ValToPoison;

  for (BasicBlock &BB : F)
    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
      auto *OldPHI = cast<PHINode>(&*I);
      auto *NewPHI = PHINode::Create(Int1Ty,
                                     OldPHI->getNumIncomingValues());
      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
        NewPHI->addIncoming(UndefValue::get(Int1Ty),
                            OldPHI->getIncomingBlock(i));
      NewPHI->insertBefore(OldPHI);
      ValToPoison[OldPHI] = NewPHI;
    }

  for (BasicBlock &BB : F)
    for (Instruction &I : BB) {
      if (isa<PHINode>(I)) continue;

      IRBuilder<> B(cast<Instruction>(&I));
      if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
        CreateAssertNot(B, getPoisonFor(ValToPoison, Op));

      if (LocalCheck)
        if (auto *RI = dyn_cast<ReturnInst>(&I))
          if (RI->getNumOperands() != 0) {
            Value *Op = RI->getOperand(0);
            CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
          }

      SmallVector<Value*, 4> Checks;
      if (propagatesFullPoison(&I))
        for (Value *V : I.operands())
          Checks.push_back(getPoisonFor(ValToPoison, V));

      if (auto *Check = generatePoisonChecks(I))
        Checks.push_back(Check);
      ValToPoison[&I] = buildOrChain(B, Checks);
    }

  for (BasicBlock &BB : F)
    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
      auto *OldPHI = cast<PHINode>(&*I);
      if (!ValToPoison.count(OldPHI))
        continue; // skip the newly inserted phis
      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
        auto *OldVal = OldPHI->getIncomingValue(i);
        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
      }
    }
  return true;
}


PreservedAnalyses PoisonCheckingPass::run(Module &M,
                                          ModuleAnalysisManager &AM) {
  bool Changed = false;
  for (auto &F : M)
    Changed |= rewrite(F);

  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}

PreservedAnalyses PoisonCheckingPass::run(Function &F,
                                          FunctionAnalysisManager &AM) {
  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
}


/* Major TODO Items:
   - Control dependent poison UB
   - Strict mode - (i.e. must analyze every operand)
     - Poison through memory
     - Function ABIs

   Minor TODO items:
   - Add propagation rules for and/or instructions
   - Add hasPoisonFlags predicate to ValueTracking
   - Add poison check rules for:
     - exact flags, out of bounds operands
     - inbounds (can't be strict due to unknown allocation sizes)
     - fmf and fp casts
 */