Home
last modified time | relevance | path

Searched refs:gpu (Results 1 – 25 of 209) sorted by relevance

123456789

/llvm-project-15.0.7/mlir/test/Dialect/GPU/
H A Dops.mlir15 gpu.terminator
28 gpu.terminator
103 gpu.return
107 gpu.return
146 gpu.return
154 gpu.return
160 gpu.return
167 gpu.return
174 gpu.return
180 gpu.return
[all …]
H A Dinvalid.mlir6 gpu.return
18 gpu.terminator
92 gpu.return
142 gpu.return
158 gpu.return
174 gpu.return
190 gpu.return
325 gpu.return
336 gpu.return
347 gpu.return
[all …]
H A Dasync-region.mlir6 gpu.module @kernels {
7 gpu.func @kernel() kernel { gpu.return }
38 // CHECK-NOT: gpu.wait
49 // CHECK-NOT: gpu.wait
74 // CHECK-NOT: gpu.wait
86 // CHECK-NOT: gpu.wait
151 // CHECK: async.yield %[[t]], %[[t]] : !gpu.async.token, !gpu.async.token
177 %t0 = gpu.wait async
185 gpu.wait
186 // CHECK: gpu.wait
[all …]
H A Dcanonicalize.mlir6 %1 = gpu.wait async
7 gpu.wait []
8 %3 = gpu.wait async
9 gpu.wait [%3]
19 gpu.wait [%0]
22 gpu.wait [%1]
37 gpu.wait [%2]
41 gpu.wait [%3]
59 gpu.wait [%2]
63 gpu.wait [%3]
[all …]
H A Doutlining.mlir34 gpu.terminator
74 gpu.terminator
81 gpu.terminator
92 gpu.terminator
100 gpu.terminator
129 gpu.terminator
155 gpu.terminator
185 gpu.terminator
215 gpu.terminator
244 gpu.terminator
[all …]
H A Dpromotion.mlir1 … mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.fu…
3 gpu.module @foo {
9 gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel {
14 // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x
47 gpu.return
53 gpu.module @foo {
59 gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel {
100 gpu.return
106 gpu.module @foo {
114 gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup})
[all …]
H A Dmapping.mlir1 // RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s
20 // CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0)…
21 // CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0)…
22 // CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) …
23 // CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) …
53 // CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0)…
54 // CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0)…
55 // CHECK-SAME: #gpu.loop_dim_map<processor = thread_z, map = (d0) -> (d0), bound = (d0)…
57 // CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) …
58 // CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) …
[all …]
H A Dall-reduce.mlir1 // RUN: mlir-opt -test-gpu-rewrite %s | FileCheck %s
4 // CHECK: gpu.module @kernels {
5 gpu.module @kernels {
7 // CHECK-LABEL: gpu.func @kernel(
9 gpu.func @kernel(%arg0 : f32) kernel {
19 // CHECK: [[VAL_11:%.*]] = gpu.block_dim x
21 // CHECK: [[VAL_13:%.*]] = gpu.block_dim y
23 // CHECK: [[VAL_15:%.*]] = gpu.block_dim z
107 // CHECK: gpu.barrier
177 // CHECK: gpu.barrier
[all …]
H A Dint-range-interface.mlir44 gpu.terminator
52 gpu.module @gpu_module {
55 %grid_dim_x = gpu.grid_dim x
56 %grid_dim_y = gpu.grid_dim y
57 %grid_dim_z = gpu.grid_dim z
66 %block_id_x = gpu.block_id x
67 %block_id_y = gpu.block_id y
68 %block_id_z = gpu.block_id z
77 %block_dim_x = gpu.block_dim x
78 %block_dim_y = gpu.block_dim y
[all …]
H A Dsink-ops.mlir11 // CHECK: gpu.launch blocks
21 // CHECK-NEXT: gpu.terminator
23 gpu.terminator
38 // CHECK: gpu.launch blocks
47 // CHECK-NEXT: gpu.terminator
49 gpu.terminator
60 // CHECK: gpu.launch blocks
71 gpu.terminator
83 // CHECK: gpu.launch blocks
93 // CHECK: gpu.terminator
[all …]
/llvm-project-15.0.7/mlir/test/Conversion/GPUToSPIRV/
H A Dbuiltins.mlir20 gpu.return
46 gpu.return
70 gpu.return
95 gpu.return
117 gpu.return
139 gpu.return
163 gpu.return
187 gpu.return
203 gpu.return
219 gpu.return
[all …]
H A Dsimple.mlir4 gpu.module @kernels {
13 gpu.return
31 gpu.module @kernels {
47 gpu.return
55 gpu.module @kernels {
59 gpu.return
77 gpu.module @kernels {
86 gpu.return
103 gpu.return
116 gpu.barrier
[all …]
/llvm-project-15.0.7/mlir/test/Conversion/VectorToGPU/
H A Dvector-to-mma-ops.mlir13 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, …
32 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, …
47 // CHECK-DAG: %[[C:.+]] = gpu.subgroup_mma_constant_matrix %[[F]] : !gpu.mma_matrix<16x16xf16, …
50 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, …
68 …%[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[ACC1]] : !gpu.mma_matrix<16x16xf16, "AOp">…
95 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C0]] : !gpu.mma_matrix<16x16xf16, "AOp">,…
96 …[[E:.+]] = gpu.subgroup_mma_elementwise addf %[[D]], %[[C1]] : (!gpu.mma_matrix<16x16xf16, "COp">,…
116 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C0]] : !gpu.mma_matrix<16x16xf16, "AOp">,…
118 …%[[F:.+]] = gpu.subgroup_mma_elementwise divf %[[D]], %[[E]] : (!gpu.mma_matrix<16x16xf16, "COp">,…
141 … %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, …
[all …]
/llvm-project-15.0.7/mlir/lib/Conversion/GPUCommon/
H A DGPUOpsLowering.h17 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
20 : ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter), in GPUFuncOpLowering()
25 matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
42 using ConvertOpToLLVMPattern<gpu::PrintfOp>::ConvertOpToLLVMPattern;
45 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
56 : public ConvertOpToLLVMPattern<gpu::PrintfOp> {
59 : ConvertOpToLLVMPattern<gpu::PrintfOp>(converter),
63 matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
70 struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
71 using ConvertOpToLLVMPattern<gpu::ReturnOp>::ConvertOpToLLVMPattern;
[all …]
/llvm-project-15.0.7/clang/test/Driver/
H A Dhip-output-file-name.hip6 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
27 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
32 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
37 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
42 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --gpu-bundle-output \
47 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --no-gpu-bundle-output \
52 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
64 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
69 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
74 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 --gpu-bundle-output %s \
[all …]
/llvm-project-15.0.7/mlir/test/Integration/GPU/CUDA/TensorCore/
H A Dwmma-matmul-f32.mlir2 // RUN: -gpu-kernel-outlining \
3 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \
4 // RUN: --convert-scf-to-cf -gpu-to-llvm \
39 gpu.host_register %2 : memref<*xf16>
40 gpu.host_register %33 : memref<*xf32>
44 …%A = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->…
45 …%B = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->…
46 …%C = gpu.subgroup_mma_load_matrix %22[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf32> -…
48 …%R = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x1…
50gpu.subgroup_mma_store_matrix %R, %22[%c0, %c0] {leadDimension = 16 : index}: !gpu.mma_matrix<16x1…
[all …]
H A Dwmma-matmul-f16.mlir2 // RUN: -gpu-kernel-outlining \
3 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \
4 // RUN: --convert-scf-to-cf -gpu-to-llvm \
41 gpu.host_register %2 : memref<*xf16>
42 gpu.host_register %33 : memref<*xf16>
46 …%A = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->…
47 …%B = gpu.subgroup_mma_load_matrix %0[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> ->…
48 …%C = gpu.subgroup_mma_load_matrix %22[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> -…
50 …%R = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x1…
52gpu.subgroup_mma_store_matrix %R, %0[%c0, %c0] {leadDimension = 16 : index}: !gpu.mma_matrix<16x16…
[all …]
/llvm-project-15.0.7/mlir/test/Conversion/GPUToCUDA/
H A Dlower-nvvm-kernel-to-cubin.mlir1 // RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s
3 // CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"}
4 gpu.module @foo {
6 // CHECK: attributes {gpu.kernel}
7 attributes { gpu.kernel } {
12 // CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"}
13 gpu.module @bar {
16 attributes { gpu.kernel } {
22 attributes { gpu.kernel } {
/llvm-project-15.0.7/mlir/test/Conversion/GPUToROCm/
H A Dlower-rocdl-kernel-to-hsaco.mlir1 // RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s
3 // CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"}
4 gpu.module @foo {
6 // CHECK: attributes {gpu.kernel}
7 attributes { gpu.kernel } {
12 // CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"}
13 gpu.module @bar {
16 attributes { gpu.kernel } {
22 attributes { gpu.kernel } {
/llvm-project-15.0.7/mlir/lib/Conversion/GPUToSPIRV/
H A DGPUToSPIRV.cpp62 matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor,
72 matchAndRewrite(gpu::GPUFuncOp funcOp, OpAdaptor adaptor,
90 : public OpConversionPattern<gpu::ModuleEndOp> {
160 gpu::BlockDimOp op, OpAdaptor adaptor, in matchAndRewrite()
263 gpu::GPUFuncOp funcOp, OpAdaptor adaptor, in matchAndRewrite()
265 if (!gpu::GPUDialect::isKernel(funcOp)) in matchAndRewrite()
305 gpu::GPUModuleOp moduleOp, OpAdaptor adaptor, in matchAndRewrite()
335 gpu::ReturnOp returnOp, OpAdaptor adaptor, in matchAndRewrite()
349 gpu::BarrierOp barrierOp, OpAdaptor adaptor, in matchAndRewrite()
375 LaunchConfigConversion<gpu::ThreadIdOp, in populateGPUToSPIRVPatterns()
[all …]
/llvm-project-15.0.7/mlir/lib/Dialect/GPU/Transforms/
H A DAllReduceLowering.cpp31 GpuAllReduceRewriter(gpu::GPUFuncOp funcOp, gpu::AllReduceOp reduceOp, in GpuAllReduceRewriter()
69 Value dimX = getDimOp<gpu::BlockDimOp>(gpu::Dimension::x); in rewrite()
70 Value dimY = getDimOp<gpu::BlockDimOp>(gpu::Dimension::y); in rewrite()
71 Value dimZ = getDimOp<gpu::BlockDimOp>(gpu::Dimension::z); in rewrite()
72 Value tidX = getDimOp<gpu::ThreadIdOp>(gpu::Dimension::x); in rewrite()
73 Value tidY = getDimOp<gpu::ThreadIdOp>(gpu::Dimension::y); in rewrite()
74 Value tidZ = getDimOp<gpu::ThreadIdOp>(gpu::Dimension::z); in rewrite()
116 create<gpu::BarrierOp>(); in rewrite()
138 create<gpu::BarrierOp>(); in rewrite()
378 gpu::GPUFuncOp funcOp;
[all …]
H A DKernelOutlining.cpp35 for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) in createForAllDimensions()
111 gpu::LaunchOp launchOp, in sinkOperationsIntoLaunchOp()
146 static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, in outlineKernelFuncImpl()
197 outlinedFunc.walk([](gpu::TerminatorOp op) { in outlineKernelFuncImpl()
199 replacer.create<gpu::ReturnOp>(op.getLoc()); in outlineKernelFuncImpl()
205 gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp, in outlineKernelFunc()
222 static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, in convertToLaunchFuncOp()
229 auto launchFunc = builder.create<gpu::LaunchFuncOp>( in convertToLaunchFuncOp()
247 if (op->walk([](gpu::LaunchOp launch) { in runOnOperation()
308 gpu::GPUFuncOp outlinedFunc = in runOnOperation()
[all …]
/llvm-project-15.0.7/mlir/test/Integration/GPU/ROCM/
H A Dprintf.mlir2 // RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime…
3 // RUN: -gpu-to-llvm \
12 module attributes {gpu.container_module} {
13 gpu.module @kernels {
14 gpu.func @hello() kernel {
15 %0 = gpu.thread_id x
16 gpu.printf "Hello from %d\n" %0 : index
17 gpu.return
24 gpu.launch_func @kernels::@hello
/llvm-project-15.0.7/mlir/lib/Conversion/GPUToNVVM/
H A DLowerGpuOpsToNVVMOps.cpp48 case gpu::ShuffleMode::XOR: in convertShflKind()
50 case gpu::ShuffleMode::UP: in convertShflKind()
52 case gpu::ShuffleMode::DOWN: in convertShflKind()
54 case gpu::ShuffleMode::IDX: in convertShflKind()
104 if (op.mode() == gpu::ShuffleMode::UP) { in matchAndRewrite()
131 matchAndRewrite(gpu::LaneIdOp op, gpu::LaneIdOp::Adaptor adaptor, in matchAndRewrite()
167 gpu::GPUModuleOp m = getOperation(); in runOnOperation()
189 gpu::GPUDialect::getPrivateAddressSpace()) in runOnOperation()
225 target.addIllegalDialect<gpu::GPUDialect>(); in configureGpuToNVVMConversionLegality()
231 target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp, gpu::ModuleEndOp>(); in configureGpuToNVVMConversionLegality()
[all …]
/llvm-project-15.0.7/mlir/include/mlir/Dialect/GPU/IR/
H A DGPUOps.td226 gpu.return
353 to have the `gpu.container_module` attribute. The `gpu.launch_func`
655 in gpu ops. It returns values to the immediately enclosing gpu op.
785 gpu.barrier
823 gpu.func {}
880 %t0 = gpu.foo async : !gpu.async.token
881 %t1 = gpu.bar async : !gpu.async.token
883 // gpu.baz doesn't run until gpu.foo and gpu.bar have both completed, just
894 %t0 = gpu.foo async : !gpu.async.token
895 %t1 = gpu.bar async : !gpu.async.token
[all …]

123456789