| /llvm-project-15.0.7/mlir/test/Dialect/GPU/ |
| H A D | ops.mlir | 15 gpu.terminator 28 gpu.terminator 103 gpu.return 107 gpu.return 146 gpu.return 154 gpu.return 160 gpu.return 167 gpu.return 174 gpu.return 180 gpu.return [all …]
|
| H A D | invalid.mlir | 6 gpu.return 18 gpu.terminator 92 gpu.return 142 gpu.return 158 gpu.return 174 gpu.return 190 gpu.return 325 gpu.return 336 gpu.return 347 gpu.return [all …]
|
| H A D | async-region.mlir | 6 gpu.module @kernels { 7 gpu.func @kernel() kernel { gpu.return } 38 // CHECK-NOT: gpu.wait 49 // CHECK-NOT: gpu.wait 74 // CHECK-NOT: gpu.wait 86 // CHECK-NOT: gpu.wait 151 // CHECK: async.yield %[[t]], %[[t]] : !gpu.async.token, !gpu.async.token 177 %t0 = gpu.wait async 185 gpu.wait 186 // CHECK: gpu.wait [all …]
|
| H A D | canonicalize.mlir | 6 %1 = gpu.wait async 7 gpu.wait [] 8 %3 = gpu.wait async 9 gpu.wait [%3] 19 gpu.wait [%0] 22 gpu.wait [%1] 37 gpu.wait [%2] 41 gpu.wait [%3] 59 gpu.wait [%2] 63 gpu.wait [%3] [all …]
|
| H A D | outlining.mlir | 34 gpu.terminator 74 gpu.terminator 81 gpu.terminator 92 gpu.terminator 100 gpu.terminator 129 gpu.terminator 155 gpu.terminator 185 gpu.terminator 215 gpu.terminator 244 gpu.terminator [all …]
|
| H A D | promotion.mlir | 1 … mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.fu… 3 gpu.module @foo { 9 gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel { 14 // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x 47 gpu.return 53 gpu.module @foo { 59 gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel { 100 gpu.return 106 gpu.module @foo { 114 gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup}) [all …]
|
| H A D | mapping.mlir | 1 // RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s 20 // CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0)… 21 // CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0)… 22 // CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) … 23 // CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) … 53 // CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0)… 54 // CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0)… 55 // CHECK-SAME: #gpu.loop_dim_map<processor = thread_z, map = (d0) -> (d0), bound = (d0)… 57 // CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) … 58 // CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) … [all …]
|
| H A D | all-reduce.mlir | 1 // RUN: mlir-opt -test-gpu-rewrite %s | FileCheck %s 4 // CHECK: gpu.module @kernels { 5 gpu.module @kernels { 7 // CHECK-LABEL: gpu.func @kernel( 9 gpu.func @kernel(%arg0 : f32) kernel { 19 // CHECK: [[VAL_11:%.*]] = gpu.block_dim x 21 // CHECK: [[VAL_13:%.*]] = gpu.block_dim y 23 // CHECK: [[VAL_15:%.*]] = gpu.block_dim z 107 // CHECK: gpu.barrier 177 // CHECK: gpu.barrier [all …]
|
| H A D | int-range-interface.mlir | 44 gpu.terminator 52 gpu.module @gpu_module { 55 %grid_dim_x = gpu.grid_dim x 56 %grid_dim_y = gpu.grid_dim y 57 %grid_dim_z = gpu.grid_dim z 66 %block_id_x = gpu.block_id x 67 %block_id_y = gpu.block_id y 68 %block_id_z = gpu.block_id z 77 %block_dim_x = gpu.block_dim x 78 %block_dim_y = gpu.block_dim y [all …]
|
| H A D | sink-ops.mlir | 11 // CHECK: gpu.launch blocks 21 // CHECK-NEXT: gpu.terminator 23 gpu.terminator 38 // CHECK: gpu.launch blocks 47 // CHECK-NEXT: gpu.terminator 49 gpu.terminator 60 // CHECK: gpu.launch blocks 71 gpu.terminator 83 // CHECK: gpu.launch blocks 93 // CHECK: gpu.terminator [all …]
|
| /llvm-project-15.0.7/mlir/test/Conversion/GPUToSPIRV/ |
| H A D | builtins.mlir | 20 gpu.return 46 gpu.return 70 gpu.return 95 gpu.return 117 gpu.return 139 gpu.return 163 gpu.return 187 gpu.return 203 gpu.return 219 gpu.return [all …]
|
| H A D | simple.mlir | 4 gpu.module @kernels { 13 gpu.return 31 gpu.module @kernels { 47 gpu.return 55 gpu.module @kernels { 59 gpu.return 77 gpu.module @kernels { 86 gpu.return 103 gpu.return 116 gpu.barrier [all …]
|
| /llvm-project-15.0.7/mlir/test/Conversion/VectorToGPU/ |
| H A D | vector-to-mma-ops.mlir | 13 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, … 32 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, … 47 // CHECK-DAG: %[[C:.+]] = gpu.subgroup_mma_constant_matrix %[[F]] : !gpu.mma_matrix<16x16xf16, … 50 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, … 68 …%[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[ACC1]] : !gpu.mma_matrix<16x16xf16, "AOp">… 95 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C0]] : !gpu.mma_matrix<16x16xf16, "AOp">,… 96 …[[E:.+]] = gpu.subgroup_mma_elementwise addf %[[D]], %[[C1]] : (!gpu.mma_matrix<16x16xf16, "COp">,… 116 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C0]] : !gpu.mma_matrix<16x16xf16, "AOp">,… 118 …%[[F:.+]] = gpu.subgroup_mma_elementwise divf %[[D]], %[[E]] : (!gpu.mma_matrix<16x16xf16, "COp">,… 141 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, … [all …]
|
| /llvm-project-15.0.7/mlir/lib/Conversion/GPUCommon/ |
| H A D | GPUOpsLowering.h | 17 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { 20 : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter), in GPUFuncOpLowering() 25 matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, 42 using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern; 45 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, 56 : public ConvertOpToLLVMPattern<gpu::PrintfOp> { 59 : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter), 63 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, 70 struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> { 71 using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern; [all …]
|
| /llvm-project-15.0.7/clang/test/Driver/ |
| H A D | hip-output-file-name.hip | 6 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ 27 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ 32 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ 37 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ 42 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --gpu-bundle-output \ 47 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --no-gpu-bundle-output \ 52 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ 64 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ 69 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ 74 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 --gpu-bundle-output %s \ [all …]
|
| /llvm-project-15.0.7/mlir/test/Integration/GPU/CUDA/TensorCore/ |
| H A D | wmma-matmul-f32.mlir | 2 // RUN: -gpu-kernel-outlining \ 3 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ 4 // RUN: --convert-scf-to-cf -gpu-to-llvm \ 39 gpu.host_register %2 : memref<*xf16> 40 gpu.host_register %33 : memref<*xf32> 44 …%A = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->… 45 …%B = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->… 46 …%C = gpu.subgroup_mma_load_matrix %22[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf32> -… 48 …%R = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x1… 50 …gpu.subgroup_mma_store_matrix %R, %22[%c0, %c0] {leadDimension = 16 : index}: !gpu.mma_matrix<16x1… [all …]
|
| H A D | wmma-matmul-f16.mlir | 2 // RUN: -gpu-kernel-outlining \ 3 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ 4 // RUN: --convert-scf-to-cf -gpu-to-llvm \ 41 gpu.host_register %2 : memref<*xf16> 42 gpu.host_register %33 : memref<*xf16> 46 …%A = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->… 47 …%B = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->… 48 …%C = gpu.subgroup_mma_load_matrix %22[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> -… 50 …%R = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x1… 52 …gpu.subgroup_mma_store_matrix %R, %0[%c0, %c0] {leadDimension = 16 : index}: !gpu.mma_matrix<16x16… [all …]
|
| /llvm-project-15.0.7/mlir/test/Conversion/GPUToCUDA/ |
| H A D | lower-nvvm-kernel-to-cubin.mlir | 1 // RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s 3 // CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"} 4 gpu.module @foo { 6 // CHECK: attributes {gpu.kernel} 7 attributes { gpu.kernel } { 12 // CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"} 13 gpu.module @bar { 16 attributes { gpu.kernel } { 22 attributes { gpu.kernel } {
|
| /llvm-project-15.0.7/mlir/test/Conversion/GPUToROCm/ |
| H A D | lower-rocdl-kernel-to-hsaco.mlir | 1 // RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s 3 // CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"} 4 gpu.module @foo { 6 // CHECK: attributes {gpu.kernel} 7 attributes { gpu.kernel } { 12 // CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"} 13 gpu.module @bar { 16 attributes { gpu.kernel } { 22 attributes { gpu.kernel } {
|
| /llvm-project-15.0.7/mlir/lib/Conversion/GPUToSPIRV/ |
| H A D | GPUToSPIRV.cpp | 62 matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor, 72 matchAndRewrite(gpu::GPUFuncOp funcOp, OpAdaptor adaptor, 90 : public OpConversionPattern<gpu::ModuleEndOp> { 160 gpu::BlockDimOp op, OpAdaptor adaptor, in matchAndRewrite() 263 gpu::GPUFuncOp funcOp, OpAdaptor adaptor, in matchAndRewrite() 265 if (!gpu::GPUDialect::isKernel(funcOp)) in matchAndRewrite() 305 gpu::GPUModuleOp moduleOp, OpAdaptor adaptor, in matchAndRewrite() 335 gpu::ReturnOp returnOp, OpAdaptor adaptor, in matchAndRewrite() 349 gpu::BarrierOp barrierOp, OpAdaptor adaptor, in matchAndRewrite() 375 LaunchConfigConversion<gpu::ThreadIdOp, in populateGPUToSPIRVPatterns() [all …]
|
| /llvm-project-15.0.7/mlir/lib/Dialect/GPU/Transforms/ |
| H A D | AllReduceLowering.cpp | 31 GpuAllReduceRewriter(gpu::GPUFuncOp funcOp, gpu::AllReduceOp reduceOp, in GpuAllReduceRewriter() 69 Value dimX = getDimOp<gpu::BlockDimOp>(gpu::Dimension::x); in rewrite() 70 Value dimY = getDimOp<gpu::BlockDimOp>(gpu::Dimension::y); in rewrite() 71 Value dimZ = getDimOp<gpu::BlockDimOp>(gpu::Dimension::z); in rewrite() 72 Value tidX = getDimOp<gpu::ThreadIdOp>(gpu::Dimension::x); in rewrite() 73 Value tidY = getDimOp<gpu::ThreadIdOp>(gpu::Dimension::y); in rewrite() 74 Value tidZ = getDimOp<gpu::ThreadIdOp>(gpu::Dimension::z); in rewrite() 116 create<gpu::BarrierOp>(); in rewrite() 138 create<gpu::BarrierOp>(); in rewrite() 378 gpu::GPUFuncOp funcOp; [all …]
|
| H A D | KernelOutlining.cpp | 35 for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) in createForAllDimensions() 111 gpu::LaunchOp launchOp, in sinkOperationsIntoLaunchOp() 146 static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, in outlineKernelFuncImpl() 197 outlinedFunc.walk([](gpu::TerminatorOp op) { in outlineKernelFuncImpl() 199 replacer.create<gpu::ReturnOp>(op.getLoc()); in outlineKernelFuncImpl() 205 gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp, in outlineKernelFunc() 222 static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, in convertToLaunchFuncOp() 229 auto launchFunc = builder.create<gpu::LaunchFuncOp>( in convertToLaunchFuncOp() 247 if (op->walk([](gpu::LaunchOp launch) { in runOnOperation() 308 gpu::GPUFuncOp outlinedFunc = in runOnOperation() [all …]
|
| /llvm-project-15.0.7/mlir/test/Integration/GPU/ROCM/ |
| H A D | printf.mlir | 2 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime… 3 // RUN: -gpu-to-llvm \ 12 module attributes {gpu.container_module} { 13 gpu.module @kernels { 14 gpu.func @hello() kernel { 15 %0 = gpu.thread_id x 16 gpu.printf "Hello from %d\n" %0 : index 17 gpu.return 24 gpu.launch_func @kernels::@hello
|
| /llvm-project-15.0.7/mlir/lib/Conversion/GPUToNVVM/ |
| H A D | LowerGpuOpsToNVVMOps.cpp | 48 case gpu::ShuffleMode::XOR: in convertShflKind() 50 case gpu::ShuffleMode::UP: in convertShflKind() 52 case gpu::ShuffleMode::DOWN: in convertShflKind() 54 case gpu::ShuffleMode::IDX: in convertShflKind() 104 if (op.mode() == gpu::ShuffleMode::UP) { in matchAndRewrite() 131 matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor, in matchAndRewrite() 167 gpu::GPUModuleOp m = getOperation(); in runOnOperation() 189 gpu::GPUDialect::getPrivateAddressSpace()) in runOnOperation() 225 target.addIllegalDialect<gpu::GPUDialect>(); in configureGpuToNVVMConversionLegality() 231 target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp, gpu::ModuleEndOp>(); in configureGpuToNVVMConversionLegality() [all …]
|
| /llvm-project-15.0.7/mlir/include/mlir/Dialect/GPU/IR/ |
| H A D | GPUOps.td | 226 gpu.return 353 to have the `gpu.container_module` attribute. The `gpu.launch_func` 655 in gpu ops. It returns values to the immediately enclosing gpu op. 785 gpu.barrier 823 gpu.func {} 880 %t0 = gpu.foo async : !gpu.async.token 881 %t1 = gpu.bar async : !gpu.async.token 883 // gpu.baz doesn't run until gpu.foo and gpu.bar have both completed, just 894 %t0 = gpu.foo async : !gpu.async.token 895 %t1 = gpu.bar async : !gpu.async.token [all …]
|