//===------ Simplify.cpp ----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Simplify a SCoP by removing unnecessary statements and accesses.
//
//===----------------------------------------------------------------------===//

#include "polly/Simplify.h"
#include "polly/ScopInfo.h"
#include "polly/ScopPass.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/ISLOStream.h"
#include "polly/Support/ISLTools.h"
#include "polly/Support/VirtualInstruction.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "polly-simplify"

using namespace llvm;
using namespace polly;

namespace {

#define TWO_STATISTICS(VARNAME, DESC)                                          \
  static llvm::Statistic VARNAME[2] = {                                        \
      {DEBUG_TYPE, #VARNAME "0", DESC " (first)"},                             \
      {DEBUG_TYPE, #VARNAME "1", DESC " (second)"}}

/// Number of max disjuncts we allow in removeOverwrites(). This is to avoid
/// that the analysis of accesses in a statement is becoming too complex. Chosen
/// to be relatively small because all the common cases should access only few
/// array elements per statement.
static unsigned const SimplifyMaxDisjuncts = 4;

TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed");
TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified");

TWO_STATISTICS(TotalEmptyDomainsRemoved,
               "Number of statement with empty domains removed in any SCoP");
TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes");
TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another");
TWO_STATISTICS(TotalRedundantWritesRemoved,
               "Number of writes of same value removed in any SCoP");
TWO_STATISTICS(TotalEmptyPartialAccessesRemoved,
               "Number of empty partial accesses removed");
TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed");
TWO_STATISTICS(TotalDeadInstructionsRemoved,
               "Number of unused instructions removed");
TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP");

TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify");
TWO_STATISTICS(
    NumValueWritesInLoops,
    "Number of scalar value writes nested in affine loops after Simplify");
TWO_STATISTICS(NumPHIWrites,
               "Number of scalar phi writes after the first simplification");
TWO_STATISTICS(
    NumPHIWritesInLoops,
    "Number of scalar phi writes nested in affine loops after Simplify");
TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify");
TWO_STATISTICS(
    NumSingletonWritesInLoops,
    "Number of singleton writes nested in affine loops after Simplify");

static bool isImplicitRead(MemoryAccess *MA) {
  return MA->isRead() && MA->isOriginalScalarKind();
}

static bool isExplicitAccess(MemoryAccess *MA) {
  return MA->isOriginalArrayKind();
}

static bool isImplicitWrite(MemoryAccess *MA) {
  return MA->isWrite() && MA->isOriginalScalarKind();
}

/// Like isl::union_map::unite, but may also return an underapproximated
/// result if getting too complex.
///
/// This is implemented by adding disjuncts to the results until the limit is
/// reached.
static isl::union_map underapproximatedAddMap(isl::union_map UMap,
                                              isl::map Map) {
  if (UMap.is_null() || Map.is_null())
    return {};

  isl::map PrevMap = UMap.extract_map(Map.get_space());

  // Fast path: If known that we cannot exceed the disjunct limit, just add
  // them.
  if (unsignedFromIslSize(PrevMap.n_basic_map()) +
          unsignedFromIslSize(Map.n_basic_map()) <=
      SimplifyMaxDisjuncts)
    return UMap.unite(Map);

  isl::map Result = isl::map::empty(PrevMap.get_space());
  for (isl::basic_map BMap : PrevMap.get_basic_map_list()) {
    if (unsignedFromIslSize(Result.n_basic_map()) > SimplifyMaxDisjuncts)
      break;
    Result = Result.unite(BMap);
  }
  for (isl::basic_map BMap : Map.get_basic_map_list()) {
    if (unsignedFromIslSize(Result.n_basic_map()) > SimplifyMaxDisjuncts)
      break;
    Result = Result.unite(BMap);
  }

  isl::union_map UResult =
      UMap.subtract(isl::map::universe(PrevMap.get_space()));
  UResult.unite(Result);

  return UResult;
}

class SimplifyImpl {
private:
  /// The invocation id (if there are multiple instances in the pass manager's
  /// pipeline) to determine which statistics to update.
  int CallNo;

  /// The last/current SCoP that is/has been processed.
  Scop *S = nullptr;

  /// Number of statements with empty domains removed from the SCoP.
  int EmptyDomainsRemoved = 0;

  /// Number of writes that are overwritten anyway.
  int OverwritesRemoved = 0;

  /// Number of combined writes.
  int WritesCoalesced = 0;

  /// Number of redundant writes removed from this SCoP.
  int RedundantWritesRemoved = 0;

  /// Number of writes with empty access domain removed.
  int EmptyPartialAccessesRemoved = 0;

  /// Number of unused accesses removed from this SCoP.
  int DeadAccessesRemoved = 0;

  /// Number of unused instructions removed from this SCoP.
  int DeadInstructionsRemoved = 0;

  /// Number of unnecessary statements removed from the SCoP.
  int StmtsRemoved = 0;

  /// Remove statements that are never executed due to their domains being
  /// empty.
  ///
  /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's
  /// effective domain, i.e. including the SCoP's context as used by some other
  /// simplification methods in this pass. This is necessary because the
  /// analysis on empty domains is unreliable, e.g. remove a scalar value
  /// definition MemoryAccesses, but not its use.
  void removeEmptyDomainStmts();

  /// Remove writes that are overwritten unconditionally later in the same
  /// statement.
  ///
  /// There must be no read of the same value between the write (that is to be
  /// removed) and the overwrite.
  void removeOverwrites();

  /// Combine writes that write the same value if possible.
  ///
  /// This function is able to combine:
  /// - Partial writes with disjoint domain.
  /// - Writes that write to the same array element.
  ///
  /// In all cases, both writes must write the same values.
  void coalesceWrites();

  /// Remove writes that just write the same value already stored in the
  /// element.
  void removeRedundantWrites();

  /// Remove statements without side effects.
  void removeUnnecessaryStmts();

  /// Remove accesses that have an empty domain.
  void removeEmptyPartialAccesses();

  /// Mark all reachable instructions and access, and sweep those that are not
  /// reachable.
  void markAndSweep(LoopInfo *LI);

  /// Print simplification statistics to @p OS.
  void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const;

  /// Print the current state of all MemoryAccesses to @p OS.
  void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const;

public:
  explicit SimplifyImpl(int CallNo = 0) : CallNo(CallNo) {}

  void run(Scop &S, LoopInfo *LI);

  void printScop(raw_ostream &OS, Scop &S) const;

  /// Return whether at least one simplification has been applied.
  bool isModified() const;
};

/// Return whether at least one simplification has been applied.
bool SimplifyImpl::isModified() const {
  return EmptyDomainsRemoved > 0 || OverwritesRemoved > 0 ||
         WritesCoalesced > 0 || RedundantWritesRemoved > 0 ||
         EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 ||
         DeadInstructionsRemoved > 0 || StmtsRemoved > 0;
}

/// Remove statements that are never executed due to their domains being
/// empty.
///
/// In contrast to Scop::simplifySCoP, this removes based on the SCoP's
/// effective domain, i.e. including the SCoP's context as used by some other
/// simplification methods in this pass. This is necessary because the
/// analysis on empty domains is unreliable, e.g. remove a scalar value
/// definition MemoryAccesses, but not its use.
void SimplifyImpl::removeEmptyDomainStmts() {
  size_t NumStmtsBefore = S->getSize();

  S->removeStmts([](ScopStmt &Stmt) -> bool {
    auto EffectiveDomain =
        Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
    return EffectiveDomain.is_empty();
  });

  assert(NumStmtsBefore >= S->getSize());
  EmptyDomainsRemoved = NumStmtsBefore - S->getSize();
  LLVM_DEBUG(dbgs() << "Removed " << EmptyDomainsRemoved << " (of "
                    << NumStmtsBefore << ") statements with empty domains \n");
  TotalEmptyDomainsRemoved[CallNo] += EmptyDomainsRemoved;
}

/// Remove writes that are overwritten unconditionally later in the same
/// statement.
///
/// There must be no read of the same value between the write (that is to be
/// removed) and the overwrite.
void SimplifyImpl::removeOverwrites() {
  for (auto &Stmt : *S) {
    isl::set Domain = Stmt.getDomain();
    isl::union_map WillBeOverwritten = isl::union_map::empty(S->getIslCtx());

    SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));

    // Iterate in reverse order, so the overwrite comes before the write that
    // is to be removed.
    for (auto *MA : reverse(Accesses)) {

      // In region statements, the explicit accesses can be in blocks that are
      // can be executed in any order. We therefore process only the implicit
      // writes and stop after that.
      if (Stmt.isRegionStmt() && isExplicitAccess(MA))
        break;

      auto AccRel = MA->getAccessRelation();
      AccRel = AccRel.intersect_domain(Domain);
      AccRel = AccRel.intersect_params(S->getContext());

      // If a value is read in-between, do not consider it as overwritten.
      if (MA->isRead()) {
        // Invalidate all overwrites for the array it accesses to avoid too
        // complex isl sets.
        isl::map AccRelUniv = isl::map::universe(AccRel.get_space());
        WillBeOverwritten = WillBeOverwritten.subtract(AccRelUniv);
        continue;
      }

      // If all of a write's elements are overwritten, remove it.
      isl::union_map AccRelUnion = AccRel;
      if (AccRelUnion.is_subset(WillBeOverwritten)) {
        LLVM_DEBUG(dbgs() << "Removing " << MA
                          << " which will be overwritten anyway\n");

        Stmt.removeSingleMemoryAccess(MA);
        OverwritesRemoved++;
        TotalOverwritesRemoved[CallNo]++;
      }

      // Unconditional writes overwrite other values.
      if (MA->isMustWrite()) {
        // Avoid too complex isl sets. If necessary, throw away some of the
        // knowledge.
        WillBeOverwritten = underapproximatedAddMap(WillBeOverwritten, AccRel);
      }
    }
  }
}

/// Combine writes that write the same value if possible.
///
/// This function is able to combine:
/// - Partial writes with disjoint domain.
/// - Writes that write to the same array element.
///
/// In all cases, both writes must write the same values.
void SimplifyImpl::coalesceWrites() {
  for (auto &Stmt : *S) {
    isl::set Domain = Stmt.getDomain().intersect_params(S->getContext());

    // We let isl do the lookup for the same-value condition. For this, we
    // wrap llvm::Value into an isl::set such that isl can do the lookup in
    // its hashtable implementation. llvm::Values are only compared within a
    // ScopStmt, so the map can be local to this scope. TODO: Refactor with
    // ZoneAlgorithm::makeValueSet()
    SmallDenseMap<Value *, isl::set> ValueSets;
    auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
      assert(V);
      isl::set &Result = ValueSets[V];
      if (Result.is_null()) {
        isl::ctx Ctx = S->getIslCtx();
        std::string Name = getIslCompatibleName(
            "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames);
        isl::id Id = isl::id::alloc(Ctx, Name, V);
        Result = isl::set::universe(
            isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
      }
      return Result;
    };

    // List of all eligible (for coalescing) writes of the future.
    // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
    isl::union_map FutureWrites = isl::union_map::empty(S->getIslCtx());

    // Iterate over accesses from the last to the first.
    SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
    for (MemoryAccess *MA : reverse(Accesses)) {
      // In region statements, the explicit accesses can be in blocks that can
      // be executed in any order. We therefore process only the implicit
      // writes and stop after that.
      if (Stmt.isRegionStmt() && isExplicitAccess(MA))
        break;

      // { Domain[] -> Element[] }
      isl::map AccRel = MA->getLatestAccessRelation().intersect_domain(Domain);

      // { [Domain[] -> Element[]] }
      isl::set AccRelWrapped = AccRel.wrap();

      // { Value[] }
      isl::set ValSet;

      if (MA->isMustWrite() && (MA->isOriginalScalarKind() ||
                                isa<StoreInst>(MA->getAccessInstruction()))) {
        // Normally, tryGetValueStored() should be used to determine which
        // element is written, but it can return nullptr; For PHI accesses,
        // getAccessValue() returns the PHI instead of the PHI's incoming
        // value. In this case, where we only compare values of a single
        // statement, this is fine, because within a statement, a PHI in a
        // successor block has always the same value as the incoming write. We
        // still preferably use the incoming value directly so we also catch
        // direct uses of that.
        Value *StoredVal = MA->tryGetValueStored();
        if (!StoredVal)
          StoredVal = MA->getAccessValue();
        ValSet = makeValueSet(StoredVal);

        // { Domain[] }
        isl::set AccDomain = AccRel.domain();

        // Parts of the statement's domain that is not written by this access.
        isl::set UndefDomain = Domain.subtract(AccDomain);

        // { Element[] }
        isl::set ElementUniverse =
            isl::set::universe(AccRel.get_space().range());

        // { Domain[] -> Element[] }
        isl::map UndefAnything =
            isl::map::from_domain_and_range(UndefDomain, ElementUniverse);

        // We are looking a compatible write access. The other write can
        // access these elements...
        isl::map AllowedAccesses = AccRel.unite(UndefAnything);

        // ... and must write the same value.
        // { [Domain[] -> Element[]] -> Value[] }
        isl::map Filter =
            isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet);

        // Lookup future write that fulfills these conditions.
        // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] }
        isl::union_map Filtered =
            FutureWrites.uncurry().intersect_domain(Filter.wrap());

        // Iterate through the candidates.
        for (isl::map Map : Filtered.get_map_list()) {
          MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space()
                                      .get_tuple_id(isl::dim::out)
                                      .get_user();

          isl::map OtherAccRel =
              OtherMA->getLatestAccessRelation().intersect_domain(Domain);

          // The filter only guaranteed that some of OtherMA's accessed
          // elements are allowed. Verify that it only accesses allowed
          // elements. Otherwise, continue with the next candidate.
          if (!OtherAccRel.is_subset(AllowedAccesses).is_true())
            continue;

          // The combined access relation.
          // { Domain[] -> Element[] }
          isl::map NewAccRel = AccRel.unite(OtherAccRel);
          simplify(NewAccRel);

          // Carry out the coalescing.
          Stmt.removeSingleMemoryAccess(MA);
          OtherMA->setNewAccessRelation(NewAccRel);

          // We removed MA, OtherMA takes its role.
          MA = OtherMA;

          TotalWritesCoalesced[CallNo]++;
          WritesCoalesced++;

          // Don't look for more candidates.
          break;
        }
      }

      // Two writes cannot be coalesced if there is another access (to some of
      // the written elements) between them. Remove all visited write accesses
      // from the list of eligible writes. Don't just remove the accessed
      // elements, but any MemoryAccess that touches any of the invalidated
      // elements.
      SmallPtrSet<MemoryAccess *, 2> TouchedAccesses;
      for (isl::map Map :
           FutureWrites.intersect_domain(AccRelWrapped).get_map_list()) {
        MemoryAccess *MA = (MemoryAccess *)Map.get_space()
                               .range()
                               .unwrap()
                               .get_tuple_id(isl::dim::out)
                               .get_user();
        TouchedAccesses.insert(MA);
      }
      isl::union_map NewFutureWrites =
          isl::union_map::empty(FutureWrites.ctx());
      for (isl::map FutureWrite : FutureWrites.get_map_list()) {
        MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space()
                               .range()
                               .unwrap()
                               .get_tuple_id(isl::dim::out)
                               .get_user();
        if (!TouchedAccesses.count(MA))
          NewFutureWrites = NewFutureWrites.unite(FutureWrite);
      }
      FutureWrites = NewFutureWrites;

      if (MA->isMustWrite() && !ValSet.is_null()) {
        // { MemoryAccess[] }
        auto AccSet =
            isl::set::universe(isl::space(S->getIslCtx(), 0, 0)
                                   .set_tuple_id(isl::dim::set, MA->getId()));

        // { Val[] -> MemoryAccess[] }
        isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet);

        // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
        isl::map AccRelValAcc =
            isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap());
        FutureWrites = FutureWrites.unite(AccRelValAcc);
      }
    }
  }
}

/// Remove writes that just write the same value already stored in the
/// element.
void SimplifyImpl::removeRedundantWrites() {
  for (auto &Stmt : *S) {
    SmallDenseMap<Value *, isl::set> ValueSets;
    auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
      assert(V);
      isl::set &Result = ValueSets[V];
      if (Result.is_null()) {
        isl_ctx *Ctx = S->getIslCtx().get();
        std::string Name = getIslCompatibleName(
            "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames);
        isl::id Id = isl::manage(isl_id_alloc(Ctx, Name.c_str(), V));
        Result = isl::set::universe(
            isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
      }
      return Result;
    };

    isl::set Domain = Stmt.getDomain();
    Domain = Domain.intersect_params(S->getContext());

    // List of element reads that still have the same value while iterating
    // through the MemoryAccesses.
    // { [Domain[] -> Element[]] -> Val[] }
    isl::union_map Known = isl::union_map::empty(S->getIslCtx());

    SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
    for (MemoryAccess *MA : Accesses) {
      // Is the memory access in a defined order relative to the other
      // accesses? In region statements, only the first and the last accesses
      // have defined order. Execution of those in the middle may depend on
      // runtime conditions an therefore cannot be modified.
      bool IsOrdered =
          Stmt.isBlockStmt() || MA->isOriginalScalarKind() ||
          (!S->getBoxedLoops().size() && MA->getAccessInstruction() &&
           Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent());

      isl::map AccRel = MA->getAccessRelation();
      AccRel = AccRel.intersect_domain(Domain);
      isl::set AccRelWrapped = AccRel.wrap();

      // Determine whether a write is redundant (stores only values that are
      // already present in the written array elements) and remove it if this
      // is the case.
      if (IsOrdered && MA->isMustWrite() &&
          (isa<StoreInst>(MA->getAccessInstruction()) ||
           MA->isOriginalScalarKind())) {
        Value *StoredVal = MA->tryGetValueStored();
        if (!StoredVal)
          StoredVal = MA->getAccessValue();

        if (StoredVal) {
          // Lookup in the set of known values.
          isl::map AccRelStoredVal = isl::map::from_domain_and_range(
              AccRelWrapped, makeValueSet(StoredVal));
          if (isl::union_map(AccRelStoredVal).is_subset(Known)) {
            LLVM_DEBUG(dbgs() << "Cleanup of " << MA << ":\n");
            LLVM_DEBUG(dbgs() << "      Scalar: " << *StoredVal << "\n");
            LLVM_DEBUG(dbgs() << "      AccRel: " << AccRel << "\n");

            Stmt.removeSingleMemoryAccess(MA);

            RedundantWritesRemoved++;
            TotalRedundantWritesRemoved[CallNo]++;
          }
        }
      }

      // Update the know values set.
      if (MA->isRead()) {
        // Loaded values are the currently known values of the array element
        // it was loaded from.
        Value *LoadedVal = MA->getAccessValue();
        if (LoadedVal && IsOrdered) {
          isl::map AccRelVal = isl::map::from_domain_and_range(
              AccRelWrapped, makeValueSet(LoadedVal));

          Known = Known.unite(AccRelVal);
        }
      } else if (MA->isWrite()) {
        // Remove (possibly) overwritten values from the known elements set.
        // We remove all elements of the accessed array to avoid too complex
        // isl sets.
        isl::set AccRelUniv = isl::set::universe(AccRelWrapped.get_space());
        Known = Known.subtract_domain(AccRelUniv);

        // At this point, we could add the written value of must-writes.
        // However, writing same values is already handled by
        // coalesceWrites().
      }
    }
  }
}

/// Remove statements without side effects.
void SimplifyImpl::removeUnnecessaryStmts() {
  auto NumStmtsBefore = S->getSize();
  S->simplifySCoP(true);
  assert(NumStmtsBefore >= S->getSize());
  StmtsRemoved = NumStmtsBefore - S->getSize();
  LLVM_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore
                    << ") statements\n");
  TotalStmtsRemoved[CallNo] += StmtsRemoved;
}

/// Remove accesses that have an empty domain.
void SimplifyImpl::removeEmptyPartialAccesses() {
  for (ScopStmt &Stmt : *S) {
    // Defer the actual removal to not invalidate iterators.
    SmallVector<MemoryAccess *, 8> DeferredRemove;

    for (MemoryAccess *MA : Stmt) {
      if (!MA->isWrite())
        continue;

      isl::map AccRel = MA->getAccessRelation();
      if (!AccRel.is_empty().is_true())
        continue;

      LLVM_DEBUG(
          dbgs() << "Removing " << MA
                 << " because it's a partial access that never occurs\n");
      DeferredRemove.push_back(MA);
    }

    for (MemoryAccess *MA : DeferredRemove) {
      Stmt.removeSingleMemoryAccess(MA);
      EmptyPartialAccessesRemoved++;
      TotalEmptyPartialAccessesRemoved[CallNo]++;
    }
  }
}

/// Mark all reachable instructions and access, and sweep those that are not
/// reachable.
void SimplifyImpl::markAndSweep(LoopInfo *LI) {
  DenseSet<MemoryAccess *> UsedMA;
  DenseSet<VirtualInstruction> UsedInsts;

  // Get all reachable instructions and accesses.
  markReachable(S, LI, UsedInsts, UsedMA);

  // Remove all non-reachable accesses.
  // We need get all MemoryAccesses first, in order to not invalidate the
  // iterators when removing them.
  SmallVector<MemoryAccess *, 64> AllMAs;
  for (ScopStmt &Stmt : *S)
    AllMAs.append(Stmt.begin(), Stmt.end());

  for (MemoryAccess *MA : AllMAs) {
    if (UsedMA.count(MA))
      continue;
    LLVM_DEBUG(dbgs() << "Removing " << MA
                      << " because its value is not used\n");
    ScopStmt *Stmt = MA->getStatement();
    Stmt->removeSingleMemoryAccess(MA);

    DeadAccessesRemoved++;
    TotalDeadAccessesRemoved[CallNo]++;
  }

  // Remove all non-reachable instructions.
  for (ScopStmt &Stmt : *S) {
    // Note that for region statements, we can only remove the non-terminator
    // instructions of the entry block. All other instructions are not in the
    // instructions list, but implicitly always part of the statement.

    SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(),
                                            Stmt.insts_end());
    SmallVector<Instruction *, 32> RemainInsts;

    for (Instruction *Inst : AllInsts) {
      auto It = UsedInsts.find({&Stmt, Inst});
      if (It == UsedInsts.end()) {
        LLVM_DEBUG(dbgs() << "Removing "; Inst->print(dbgs());
                   dbgs() << " because it is not used\n");
        DeadInstructionsRemoved++;
        TotalDeadInstructionsRemoved[CallNo]++;
        continue;
      }

      RemainInsts.push_back(Inst);

      // If instructions appear multiple times, keep only the first.
      UsedInsts.erase(It);
    }

    // Set the new instruction list to be only those we did not remove.
    Stmt.setInstructions(RemainInsts);
  }
}

/// Print simplification statistics to @p OS.
void SimplifyImpl::printStatistics(llvm::raw_ostream &OS, int Indent) const {
  OS.indent(Indent) << "Statistics {\n";
  OS.indent(Indent + 4) << "Empty domains removed: " << EmptyDomainsRemoved
                        << '\n';
  OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved << '\n';
  OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced
                        << "\n";
  OS.indent(Indent + 4) << "Redundant writes removed: "
                        << RedundantWritesRemoved << "\n";
  OS.indent(Indent + 4) << "Accesses with empty domains removed: "
                        << EmptyPartialAccessesRemoved << "\n";
  OS.indent(Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved
                        << '\n';
  OS.indent(Indent + 4) << "Dead instructions removed: "
                        << DeadInstructionsRemoved << '\n';
  OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n";
  OS.indent(Indent) << "}\n";
}

/// Print the current state of all MemoryAccesses to @p OS.
void SimplifyImpl::printAccesses(llvm::raw_ostream &OS, int Indent) const {
  OS.indent(Indent) << "After accesses {\n";
  for (auto &Stmt : *S) {
    OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
    for (auto *MA : Stmt)
      MA->print(OS);
  }
  OS.indent(Indent) << "}\n";
}

void SimplifyImpl::run(Scop &S, LoopInfo *LI) {
  // Must not have run before.
  assert(!this->S);
  assert(!isModified());

  // Prepare processing of this SCoP.
  this->S = &S;
  ScopsProcessed[CallNo]++;

  LLVM_DEBUG(dbgs() << "Removing statements that are never executed...\n");
  removeEmptyDomainStmts();

  LLVM_DEBUG(dbgs() << "Removing partial writes that never happen...\n");
  removeEmptyPartialAccesses();

  LLVM_DEBUG(dbgs() << "Removing overwrites...\n");
  removeOverwrites();

  LLVM_DEBUG(dbgs() << "Coalesce partial writes...\n");
  coalesceWrites();

  LLVM_DEBUG(dbgs() << "Removing redundant writes...\n");
  removeRedundantWrites();

  LLVM_DEBUG(dbgs() << "Cleanup unused accesses...\n");
  markAndSweep(LI);

  LLVM_DEBUG(dbgs() << "Removing statements without side effects...\n");
  removeUnnecessaryStmts();

  if (isModified())
    ScopsModified[CallNo]++;
  LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
  LLVM_DEBUG(dbgs() << S);

  auto ScopStats = S.getStatistics();
  NumValueWrites[CallNo] += ScopStats.NumValueWrites;
  NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops;
  NumPHIWrites[CallNo] += ScopStats.NumPHIWrites;
  NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops;
  NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites;
  NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops;
}

void SimplifyImpl::printScop(raw_ostream &OS, Scop &S) const {
  assert(&S == this->S &&
         "Can only print analysis for the last processed SCoP");
  printStatistics(OS);

  if (!isModified()) {
    OS << "SCoP could not be simplified\n";
    return;
  }
  printAccesses(OS);
}

class SimplifyWrapperPass : public ScopPass {
public:
  static char ID;
  int CallNo;
  Optional<SimplifyImpl> Impl;

  explicit SimplifyWrapperPass(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {}

  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequiredTransitive<ScopInfoRegionPass>();
    AU.addRequired<LoopInfoWrapperPass>();
    AU.setPreservesAll();
  }

  virtual bool runOnScop(Scop &S) override {
    LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

    Impl.emplace(CallNo);
    Impl->run(S, LI);

    return false;
  }

  virtual void printScop(raw_ostream &OS, Scop &S) const override {
    if (Impl)
      Impl->printScop(OS, S);
  }

  virtual void releaseMemory() override { Impl.reset(); }
};

char SimplifyWrapperPass::ID;

static llvm::PreservedAnalyses
runSimplifyUsingNPM(Scop &S, ScopAnalysisManager &SAM,
                    ScopStandardAnalysisResults &SAR, SPMUpdater &U, int CallNo,
                    raw_ostream *OS) {
  SimplifyImpl Impl(CallNo);
  Impl.run(S, &SAR.LI);
  if (OS) {
    *OS << "Printing analysis 'Polly - Simplify' for region: '" << S.getName()
        << "' in function '" << S.getFunction().getName() << "':\n";
    Impl.printScop(*OS, S);
  }

  if (!Impl.isModified())
    return llvm::PreservedAnalyses::all();

  PreservedAnalyses PA;
  PA.preserveSet<AllAnalysesOn<Module>>();
  PA.preserveSet<AllAnalysesOn<Function>>();
  PA.preserveSet<AllAnalysesOn<Loop>>();
  return PA;
}

} // anonymous namespace

llvm::PreservedAnalyses SimplifyPass::run(Scop &S, ScopAnalysisManager &SAM,
                                          ScopStandardAnalysisResults &SAR,
                                          SPMUpdater &U) {
  return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, nullptr);
}

llvm::PreservedAnalyses
SimplifyPrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
                         ScopStandardAnalysisResults &SAR, SPMUpdater &U) {
  return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, &OS);
}

SmallVector<MemoryAccess *, 32> polly::getAccessesInOrder(ScopStmt &Stmt) {
  SmallVector<MemoryAccess *, 32> Accesses;

  for (MemoryAccess *MemAcc : Stmt)
    if (isImplicitRead(MemAcc))
      Accesses.push_back(MemAcc);

  for (MemoryAccess *MemAcc : Stmt)
    if (isExplicitAccess(MemAcc))
      Accesses.push_back(MemAcc);

  for (MemoryAccess *MemAcc : Stmt)
    if (isImplicitWrite(MemAcc))
      Accesses.push_back(MemAcc);

  return Accesses;
}

Pass *polly::createSimplifyWrapperPass(int CallNo) {
  return new SimplifyWrapperPass(CallNo);
}

INITIALIZE_PASS_BEGIN(SimplifyWrapperPass, "polly-simplify", "Polly - Simplify",
                      false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(SimplifyWrapperPass, "polly-simplify", "Polly - Simplify",
                    false, false)

//===----------------------------------------------------------------------===//

namespace {
/// Print result from SimplifyWrapperPass.
class SimplifyPrinterLegacyPass : public ScopPass {
public:
  static char ID;

  SimplifyPrinterLegacyPass() : SimplifyPrinterLegacyPass(outs()) {}
  explicit SimplifyPrinterLegacyPass(llvm::raw_ostream &OS)
      : ScopPass(ID), OS(OS) {}

  bool runOnScop(Scop &S) override {
    SimplifyWrapperPass &P = getAnalysis<SimplifyWrapperPass>();

    OS << "Printing analysis '" << P.getPassName() << "' for region: '"
       << S.getRegion().getNameStr() << "' in function '"
       << S.getFunction().getName() << "':\n";
    P.printScop(OS, S);

    return false;
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    ScopPass::getAnalysisUsage(AU);
    AU.addRequired<SimplifyWrapperPass>();
    AU.setPreservesAll();
  }

private:
  llvm::raw_ostream &OS;
};

char SimplifyPrinterLegacyPass::ID = 0;
} // namespace

Pass *polly::createSimplifyPrinterLegacyPass(raw_ostream &OS) {
  return new SimplifyPrinterLegacyPass(OS);
}

INITIALIZE_PASS_BEGIN(SimplifyPrinterLegacyPass, "polly-print-simplify",
                      "Polly - Print Simplify actions", false, false)
INITIALIZE_PASS_DEPENDENCY(SimplifyWrapperPass)
INITIALIZE_PASS_END(SimplifyPrinterLegacyPass, "polly-print-simplify",
                    "Polly - Print Simplify actions", false, false)