| /llvm-project-15.0.7/llvm/lib/Target/AMDGPU/ |
| H A D | R600Subtarget.cpp | 25 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, in R600Subtarget() argument 27 : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT), in R600Subtarget() 30 TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), in R600Subtarget() 31 InstrItins(getInstrItineraryForCPU(GPU)) {} in R600Subtarget() 34 StringRef GPU, in initializeSubtargetDependencies() argument 38 ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS); in initializeSubtargetDependencies()
|
| H A D | R600TargetMachine.cpp | 67 StringRef GPU = getGPUName(F); in getSubtargetImpl() local 70 SmallString<128> SubtargetKey(GPU); in getSubtargetImpl() 79 I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this); in getSubtargetImpl()
|
| H A D | AMDGPUSubtarget.cpp | 61 StringRef GPU, StringRef FS) { in initializeSubtargetDependencies() argument 91 ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS); in initializeSubtargetDependencies() 159 GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, in GCNSubtarget() argument 162 AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), in GCNSubtarget() 166 InstrItins(getInstrItineraryForCPU(GPU)), in GCNSubtarget() 167 InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), in GCNSubtarget()
|
| /llvm-project-15.0.7/llvm/unittests/Frontend/ |
| H A D | OpenMPContextTest.cpp | 116 VariantMatchInfo GPU; in TEST_F() local 117 GPU.addTrait(TraitProperty::device_kind_gpu, ""); in TEST_F() 118 EXPECT_FALSE(isVariantApplicableInContext(GPU, HostLinux)); in TEST_F() 119 EXPECT_FALSE(isVariantApplicableInContext(GPU, DeviceLinux)); in TEST_F() 120 EXPECT_TRUE(isVariantApplicableInContext(GPU, HostNVPTX)); in TEST_F() 121 EXPECT_TRUE(isVariantApplicableInContext(GPU, DeviceNVPTX)); in TEST_F() 207 VariantMatchInfo GPU; in TEST_F() local 208 GPU.addTrait(TraitProperty::device_kind_gpu, ""); in TEST_F() 209 EXPECT_FALSE(isVariantApplicableInContext(GPU, HostLinuxParallelParallel)); in TEST_F() 211 EXPECT_TRUE(isVariantApplicableInContext(GPU, HostNVPTXFor)); in TEST_F() [all …]
|
| /llvm-project-15.0.7/llvm/test/Transforms/GlobalOpt/ |
| H A D | address_space_initializer.ll | 2 ; RUN: opt -passes=globalopt --mtriple=nvptx64 < %s -S | FileCheck %s --check-prefix=GPU 3 ; RUN: opt -passes=globalopt --mtriple=amdgcn < %s -S | FileCheck %s --check-prefix=GPU 14 ; GPU-NOT: @g0 = 15 ; GPU: internal unnamed_addr addrspace(3) global i16 undef 16 ; GPU-NOT: @g2 =
|
| /llvm-project-15.0.7/mlir/include/mlir/Dialect/GPU/Transforms/ |
| H A D | CMakeLists.txt | 2 mlir_tablegen(Passes.h.inc -gen-pass-decls -name GPU) 3 mlir_tablegen(Passes.capi.h.inc -gen-pass-capi-header --prefix GPU) 4 mlir_tablegen(Passes.capi.cpp.inc -gen-pass-capi-impl --prefix GPU)
|
| H A D | Passes.td | 1 //===-- Passes.td - GPU pass definition file ---------------*- tablegen -*-===// 27 let summary = "Make GPU ops async"; 34 let summary = "Greedily maps loops to GPU hardware dimensions."; 36 let description = "Greedily maps loops to GPU hardware dimensions.";
|
| /llvm-project-15.0.7/mlir/include/mlir/Dialect/GPU/IR/ |
| H A D | GPUBase.td | 1 //===-- GPUBase.td - GPU dialect definitions ---------------*- tablegen -*-===// 9 // Defines the GPU dialect 20 // GPU Dialect. 43 /// the GPU dialect. 82 Interface for GPU operations that execute asynchronously on the device. 84 GPU operations implementing this interface take a list of dependencies 126 // GPU Attributes.
|
| H A D | GPUOps.td | 9 // Defines some operations of the GPU dialect. 17 include "mlir/Dialect/GPU/IR/GPUBase.td" 18 include "mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td" 28 // GPU Dialect operations. 184 let summary = "Function executable on a GPU"; 482 let summary = "GPU kernel launch operation"; 652 let summary = "GPU yield operation"; 807 GPU module contains code that is intended to be run on a GPU. A host device 816 Using an op with a region to define a GPU module enables "embedding" GPU 988 let summary = "GPU memcpy operation"; [all …]
|
| /llvm-project-15.0.7/mlir/lib/Conversion/GPUToROCDL/ |
| H A D | GPUToROCDL.td | 1 //==-- GPUToROCDL.td - GPU Ops to ROCDL Patterns -------------*- tablegen -*==// 9 // Defines Patterns to lower GPU ops to ROCDL. 17 include "mlir/Dialect/GPU/IR/GPUOps.td"
|
| /llvm-project-15.0.7/mlir/lib/Conversion/GPUToNVVM/ |
| H A D | GPUToNVVM.td | 1 //==-- GPUToNVVM.td - GPU Ops to NVVM Patterns ---------------*- tablegen -*==// 9 // Defines Patterns to lower GPU ops to NVVM. 17 include "mlir/Dialect/GPU/IR/GPUOps.td"
|
| /llvm-project-15.0.7/clang/lib/Basic/Targets/ |
| H A D | NVPTX.h | 61 CudaArch GPU; variable 78 Features[CudaArchToString(GPU)] = true; in initFeatureMap() 129 GPU = StringToCudaArch(Name); in setCPU() 130 return GPU != CudaArch::UNKNOWN; in setCPU()
|
| /llvm-project-15.0.7/mlir/docs/Dialects/ |
| H A D | GPU.md | 6 This dialect provides middle-level abstractions for launching GPU kernels 11 manipulations to launch a GPU kernel and provide a simple path towards GPU 21 than actual GPU implementations but using static memory at the function level is 26 The buffers are considered live throughout the execution of the GPU function
|
| /llvm-project-15.0.7/openmp/docs/remarks/ |
| H A D | OMP100.rst | 7 A function remark that indicates the function, when compiled for a GPU, is 10 know all callers on the GPU. 17 passing scheme and often improve the register usage on the GPU. However, If a 18 parallel region on the GPU is in a function with external linkage we may not
|
| H A D | OMP112.rst | 3 Found thread data sharing on the GPU. Expect degraded performance due to data globalization. [OMP11… 14 device such as a GPU. Threads on a GPU cannot shared their data unless it is 56 omp112.cpp:9:7: remark: Found thread data sharing on the GPU. Expect degraded performance 81 omp112.cpp:4:7: remark: Found thread data sharing on the GPU. Expect degraded performance
|
| /llvm-project-15.0.7/clang/tools/amdgpu-arch/ |
| H A D | AMDGPUArch.cpp | 68 for (const auto &GPU : GPUs) in main() local 69 printf("%s\n", GPU.c_str()); in main()
|
| /llvm-project-15.0.7/llvm/test/CodeGen/AMDGPU/ |
| H A D | tid-code-object-v2-backwards-compatibility.ll | 7 ; GFX90C-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx90c with XNACK bei… 9 ; GFX940-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx940
|
| /llvm-project-15.0.7/llvm/test/Transforms/OpenMP/ |
| H A D | gpu_kernel_detection_remarks.ll | 4 ; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel1 5 ; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel2
|
| /llvm-project-15.0.7/llvm/lib/Support/ |
| H A D | TargetParser.cpp | 193 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { in getIsaVersion() argument 194 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); in getIsaVersion() 196 if (GPU == "generic-hsa") in getIsaVersion() 198 if (GPU == "generic") in getIsaVersion()
|
| /llvm-project-15.0.7/mlir/lib/Dialect/GPU/ |
| H A D | CMakeLists.txt | 27 ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU 56 ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
|
| /llvm-project-15.0.7/mlir/lib/CAPI/Dialect/ |
| H A D | GPU.cpp | 13 MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(GPU, gpu, mlir::gpu::GPUDialect)
|
| /llvm-project-15.0.7/mlir/include/mlir-c/Dialect/ |
| H A D | GPU.h | 20 MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(GPU, gpu);
|
| /llvm-project-15.0.7/llvm/docs/ |
| H A D | UserGuides.rst | 235 This document describes using the NVPTX backend to compile GPU kernels. 238 This document describes using the AMDGPU backend to compile GPU kernels. 250 This document describes using the SPIR-V target to compile GPU kernels. 253 This document describes using the DirectX target to compile GPU code for the
|
| H A D | CompileCudaWithLLVM.rst | 58 $ clang++ axpy.cu -o axpy --cuda-gpu-arch=<GPU arch> \ 81 * ``<GPU arch>`` -- the `compute capability 82 <https://developer.nvidia.com/cuda-gpus>`_ of your GPU. For example, if you 83 want to run your program on a GPU with compute capability of 3.5, specify 101 GPU hardware allows for more control over numerical operations than most CPUs, 242 * For each GPU architecture ``arch`` that we're compiling for, do: 248 ``S_arch``, containing GPU machine code (SASS) for ``arch``. 262 * For each GPU architecture ``arch`` that we're compiling for, do: 288 host compilation and during device compilation for each GPU architecture.) 503 on a CPU isn't necessarily fast on a GPU. We've made a number of changes to [all …]
|
| /llvm-project-15.0.7/mlir/test/lib/Dialect/ |
| H A D | CMakeLists.txt | 4 add_subdirectory(GPU)
|