181ad6265SDimitry Andric //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric 
981ad6265SDimitry Andric #include "AMDGPUMemoryUtils.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
1181ad6265SDimitry Andric #include "AMDGPUBaseInfo.h"
1281ad6265SDimitry Andric #include "llvm/ADT/SmallSet.h"
1381ad6265SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
1481ad6265SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
1581ad6265SDimitry Andric #include "llvm/IR/DataLayout.h"
1681ad6265SDimitry Andric #include "llvm/IR/Instructions.h"
1781ad6265SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
1881ad6265SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
1981ad6265SDimitry Andric #include "llvm/IR/ReplaceConstant.h"
2081ad6265SDimitry Andric 
2181ad6265SDimitry Andric #define DEBUG_TYPE "amdgpu-memory-utils"
2281ad6265SDimitry Andric 
2381ad6265SDimitry Andric using namespace llvm;
2481ad6265SDimitry Andric 
2581ad6265SDimitry Andric namespace llvm {
2681ad6265SDimitry Andric 
2781ad6265SDimitry Andric namespace AMDGPU {
2881ad6265SDimitry Andric 
getAlign(DataLayout const & DL,const GlobalVariable * GV)2981ad6265SDimitry Andric Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
3081ad6265SDimitry Andric   return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
3181ad6265SDimitry Andric                                        GV->getValueType());
3281ad6265SDimitry Andric }
3381ad6265SDimitry Andric 
isDynamicLDS(const GlobalVariable & GV)34fe013be4SDimitry Andric bool isDynamicLDS(const GlobalVariable &GV) {
35fe013be4SDimitry Andric   // external zero size addrspace(3) without initializer implies cuda/hip extern
36fe013be4SDimitry Andric   // __shared__ the semantics for such a variable appears to be that all extern
37fe013be4SDimitry Andric   // __shared__ variables alias one another. This hits different handling.
38fe013be4SDimitry Andric   const Module *M = GV.getParent();
39fe013be4SDimitry Andric   const DataLayout &DL = M->getDataLayout();
40fe013be4SDimitry Andric   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
4181ad6265SDimitry Andric     return false;
4281ad6265SDimitry Andric   }
43fe013be4SDimitry Andric   uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
44fe013be4SDimitry Andric   return GV.hasExternalLinkage() && AllocSize == 0;
4581ad6265SDimitry Andric }
4681ad6265SDimitry Andric 
isLDSVariableToLower(const GlobalVariable & GV)47bdd1243dSDimitry Andric bool isLDSVariableToLower(const GlobalVariable &GV) {
4881ad6265SDimitry Andric   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
49bdd1243dSDimitry Andric     return false;
5081ad6265SDimitry Andric   }
51fe013be4SDimitry Andric   if (isDynamicLDS(GV)) {
52fe013be4SDimitry Andric     return true;
5381ad6265SDimitry Andric   }
5481ad6265SDimitry Andric   if (GV.isConstant()) {
5581ad6265SDimitry Andric     // A constant undef variable can't be written to, and any load is
5681ad6265SDimitry Andric     // undef, so it should be eliminated by the optimizer. It could be
5781ad6265SDimitry Andric     // dropped by the back end if not. This pass skips over it.
58bdd1243dSDimitry Andric     return false;
59bdd1243dSDimitry Andric   }
60fe013be4SDimitry Andric   if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
61fe013be4SDimitry Andric     // Initializers are unimplemented for LDS address space.
62fe013be4SDimitry Andric     // Leave such variables in place for consistent error reporting.
63fe013be4SDimitry Andric     return false;
64fe013be4SDimitry Andric   }
65bdd1243dSDimitry Andric   return true;
66bdd1243dSDimitry Andric }
67bdd1243dSDimitry Andric 
isReallyAClobber(const Value * Ptr,MemoryDef * Def,AAResults * AA)6881ad6265SDimitry Andric bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
6981ad6265SDimitry Andric   Instruction *DefInst = Def->getMemoryInst();
7081ad6265SDimitry Andric 
7181ad6265SDimitry Andric   if (isa<FenceInst>(DefInst))
7281ad6265SDimitry Andric     return false;
7381ad6265SDimitry Andric 
7481ad6265SDimitry Andric   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
7581ad6265SDimitry Andric     switch (II->getIntrinsicID()) {
7681ad6265SDimitry Andric     case Intrinsic::amdgcn_s_barrier:
77*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_signal:
78*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_signal_var:
79*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_signal_isfirst:
80*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
81*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_init:
82*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_join:
83*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_wait:
84*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_barrier_leave:
85*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_get_barrier_state:
86*c9157d92SDimitry Andric     case Intrinsic::amdgcn_s_wakeup_barrier:
8781ad6265SDimitry Andric     case Intrinsic::amdgcn_wave_barrier:
8881ad6265SDimitry Andric     case Intrinsic::amdgcn_sched_barrier:
89bdd1243dSDimitry Andric     case Intrinsic::amdgcn_sched_group_barrier:
9081ad6265SDimitry Andric       return false;
9181ad6265SDimitry Andric     default:
9281ad6265SDimitry Andric       break;
9381ad6265SDimitry Andric     }
9481ad6265SDimitry Andric   }
9581ad6265SDimitry Andric 
9681ad6265SDimitry Andric   // Ignore atomics not aliasing with the original load, any atomic is a
9781ad6265SDimitry Andric   // universal MemoryDef from MSSA's point of view too, just like a fence.
9881ad6265SDimitry Andric   const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
9981ad6265SDimitry Andric     return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
10081ad6265SDimitry Andric   };
10181ad6265SDimitry Andric 
10281ad6265SDimitry Andric   if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
10381ad6265SDimitry Andric       checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
10481ad6265SDimitry Andric     return false;
10581ad6265SDimitry Andric 
10681ad6265SDimitry Andric   return true;
10781ad6265SDimitry Andric }
10881ad6265SDimitry Andric 
isClobberedInFunction(const LoadInst * Load,MemorySSA * MSSA,AAResults * AA)10981ad6265SDimitry Andric bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
11081ad6265SDimitry Andric                            AAResults *AA) {
11181ad6265SDimitry Andric   MemorySSAWalker *Walker = MSSA->getWalker();
11281ad6265SDimitry Andric   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
11381ad6265SDimitry Andric   SmallSet<MemoryAccess *, 8> Visited;
11481ad6265SDimitry Andric   MemoryLocation Loc(MemoryLocation::get(Load));
11581ad6265SDimitry Andric 
11681ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
11781ad6265SDimitry Andric 
11881ad6265SDimitry Andric   // Start with a nearest dominating clobbering access, it will be either
11981ad6265SDimitry Andric   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
12081ad6265SDimitry Andric   // MemoryPhi if several MemoryDefs can define this memory state. In that
12181ad6265SDimitry Andric   // case add all Defs to WorkList and continue going up and checking all
12281ad6265SDimitry Andric   // the definitions of this memory location until the root. When all the
12381ad6265SDimitry Andric   // defs are exhausted and came to the entry state we have no clobber.
12481ad6265SDimitry Andric   // Along the scan ignore barriers and fences which are considered clobbers
12581ad6265SDimitry Andric   // by the MemorySSA, but not really writing anything into the memory.
12681ad6265SDimitry Andric   while (!WorkList.empty()) {
12781ad6265SDimitry Andric     MemoryAccess *MA = WorkList.pop_back_val();
12881ad6265SDimitry Andric     if (!Visited.insert(MA).second)
12981ad6265SDimitry Andric       continue;
13081ad6265SDimitry Andric 
13181ad6265SDimitry Andric     if (MSSA->isLiveOnEntryDef(MA))
13281ad6265SDimitry Andric       continue;
13381ad6265SDimitry Andric 
13481ad6265SDimitry Andric     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
13581ad6265SDimitry Andric       LLVM_DEBUG(dbgs() << "  Def: " << *Def->getMemoryInst() << '\n');
13681ad6265SDimitry Andric 
13781ad6265SDimitry Andric       if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
13881ad6265SDimitry Andric         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
13981ad6265SDimitry Andric         return true;
14081ad6265SDimitry Andric       }
14181ad6265SDimitry Andric 
14281ad6265SDimitry Andric       WorkList.push_back(
14381ad6265SDimitry Andric           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
14481ad6265SDimitry Andric       continue;
14581ad6265SDimitry Andric     }
14681ad6265SDimitry Andric 
14781ad6265SDimitry Andric     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
148bdd1243dSDimitry Andric     for (const auto &Use : Phi->incoming_values())
14981ad6265SDimitry Andric       WorkList.push_back(cast<MemoryAccess>(&Use));
15081ad6265SDimitry Andric   }
15181ad6265SDimitry Andric 
15281ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
15381ad6265SDimitry Andric   return false;
15481ad6265SDimitry Andric }
15581ad6265SDimitry Andric 
15681ad6265SDimitry Andric } // end namespace AMDGPU
15781ad6265SDimitry Andric 
15881ad6265SDimitry Andric } // end namespace llvm
159