146439e8dSYaxun Liu; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s 2de4b88d9SYaxun Liu 3fb17bf60SYaxun Liu; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer 4fb17bf60SYaxun Liu; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer 5fb17bf60SYaxun Liu; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer 6fb17bf60SYaxun Liu; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer 7de4b88d9SYaxun Liu 8de4b88d9SYaxun Liu%struct.ndrange_t = type { i32 } 9de4b88d9SYaxun Liu%opencl.queue_t = type opaque 10de4b88d9SYaxun Liu 11a99e7d8eSYaxun Liu; CHECK-LABEL: define amdgpu_kernel void @non_caller 12a99e7d8eSYaxun Liu; CHECK-NOT: #{{[0-9]+}} 13c928f2a6SYaxun Liudefine amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr 14c928f2a6SYaxun Liu !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { 15c928f2a6SYaxun Liu ret void 16c928f2a6SYaxun Liu} 17c928f2a6SYaxun Liu 18a99e7d8eSYaxun Liu; CHECK-LABEL: define amdgpu_kernel void @caller_indirect 19a99e7d8eSYaxun Liu; CHECK-SAME: #[[AT_CALLER:[0-9]+]] 20c928f2a6SYaxun Liudefine amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr 21c928f2a6SYaxun Liu !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { 22c928f2a6SYaxun Liu call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) 23c928f2a6SYaxun Liu ret void 24c928f2a6SYaxun Liu} 25c928f2a6SYaxun Liu 26a99e7d8eSYaxun Liu; CHECK-LABEL: define amdgpu_kernel void @caller 27a99e7d8eSYaxun Liu; CHECK-SAME: #[[AT_CALLER]] 28a99e7d8eSYaxun Liu; CHECK-NOT: @__test_block_invoke_kernel 29a99e7d8eSYaxun Liu; CHECK-NOT: @__test_block_invoke_2_kernel 30a99e7d8eSYaxun Liu; CHECK-NOT: @__amdgpu_enqueued_kernel 31a99e7d8eSYaxun Liu; CHECK-NOT: @__amdgpu_enqueued_kernel.1 32a99e7d8eSYaxun Liu; CHECK-NOT: @0 33a99e7d8eSYaxun Liu; CHECK-NOT: @1 34a99e7d8eSYaxun Liu; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle 35a99e7d8eSYaxun Liu; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle 36a99e7d8eSYaxun Liu; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.runtime_handle 37a99e7d8eSYaxun Liu; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.1.runtime_handle 38a99e7d8eSYaxun Liu; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_2_kernel.runtime_handle 39c928f2a6SYaxun Liudefine amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr 40de4b88d9SYaxun Liu !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { 41de4b88d9SYaxun Liuentry: 4246439e8dSYaxun Liu %block = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5) 4346439e8dSYaxun Liu %tmp = alloca %struct.ndrange_t, align 4, addrspace(5) 4446439e8dSYaxun Liu %block2 = alloca <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, align 8, addrspace(5) 4546439e8dSYaxun Liu %tmp3 = alloca %struct.ndrange_t, align 4, addrspace(5) 4646439e8dSYaxun Liu %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 0 4746439e8dSYaxun Liu store i32 25, i32 addrspace(5)* %block.size, align 8 4846439e8dSYaxun Liu %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 1 4946439e8dSYaxun Liu store i32 8, i32 addrspace(5)* %block.align, align 4 5046439e8dSYaxun Liu %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 2 5146439e8dSYaxun Liu store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured, align 8 5246439e8dSYaxun Liu %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 3 5346439e8dSYaxun Liu store i8 %b, i8 addrspace(5)* %block.captured1, align 8 5446439e8dSYaxun Liu %tmp1 = bitcast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block to void () addrspace(5)* 5546439e8dSYaxun Liu %tmp4 = addrspacecast void () addrspace(5)* %tmp1 to i8* 56*06c192d4SMatt Arsenault %tmp5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval(%struct.ndrange_t) nonnull %tmp, 5746439e8dSYaxun Liu i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2 58*06c192d4SMatt Arsenault %tmp10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval(%struct.ndrange_t) nonnull %tmp, 5946439e8dSYaxun Liu i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2 60*06c192d4SMatt Arsenault %tmp11 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval(%struct.ndrange_t) nonnull %tmp, 61a99e7d8eSYaxun Liu i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @0 to i8*), i8* nonnull %tmp4) #2 62*06c192d4SMatt Arsenault %tmp12 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval(%struct.ndrange_t) nonnull %tmp, 63a99e7d8eSYaxun Liu i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @1 to i8*), i8* nonnull %tmp4) #2 6446439e8dSYaxun Liu %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 0 6546439e8dSYaxun Liu store i32 41, i32 addrspace(5)* %block.size4, align 8 6646439e8dSYaxun Liu %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 1 6746439e8dSYaxun Liu store i32 8, i32 addrspace(5)* %block.align5, align 4 6846439e8dSYaxun Liu %block.captured7 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 2 6946439e8dSYaxun Liu store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured7, align 8 7046439e8dSYaxun Liu %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 5 7146439e8dSYaxun Liu store i8 %b, i8 addrspace(5)* %block.captured8, align 8 7246439e8dSYaxun Liu %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 3 7346439e8dSYaxun Liu store i64 addrspace(1)* %c, i64 addrspace(1)* addrspace(5)* %block.captured9, align 8 7446439e8dSYaxun Liu %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 4 7546439e8dSYaxun Liu store i64 %d, i64 addrspace(5)* %block.captured10, align 8 7646439e8dSYaxun Liu %tmp6 = bitcast <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2 to void () addrspace(5)* 7746439e8dSYaxun Liu %tmp8 = addrspacecast void () addrspace(5)* %tmp6 to i8* 78*06c192d4SMatt Arsenault %tmp9 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval(%struct.ndrange_t) nonnull %tmp3, 7946439e8dSYaxun Liu i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)* @__test_block_invoke_2_kernel to i8*), i8* nonnull %tmp8) #2 80de4b88d9SYaxun Liu ret void 81de4b88d9SYaxun Liu} 82de4b88d9SYaxun Liu 839381ae97SYaxun Liu; __enqueue_kernel* functions may get inlined 849381ae97SYaxun Liu; CHECK-LABEL: define amdgpu_kernel void @inlined_caller 859381ae97SYaxun Liu; CHECK-SAME: #[[AT_CALLER]] 869381ae97SYaxun Liu; CHECK-NOT: @__test_block_invoke_kernel 87fb17bf60SYaxun Liu; CHECK: load i64, i64 addrspace(1)* getelementptr inbounds ([2 x i64], [2 x i64] addrspace(1)* @__test_block_invoke_kernel.runtime_handle, i32 0, i32 0) 889381ae97SYaxun Liudefine amdgpu_kernel void @inlined_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr 899381ae97SYaxun Liu !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { 909381ae97SYaxun Liuentry: 919381ae97SYaxun Liu %tmp = load i64, i64 addrspace(1)* addrspacecast (i64* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i64*) to i64 addrspace(1)*) 929381ae97SYaxun Liu store i64 %tmp, i64 addrspace(1)* %c 939381ae97SYaxun Liu ret void 949381ae97SYaxun Liu} 959381ae97SYaxun Liu 96a99e7d8eSYaxun Liu; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_kernel 97a99e7d8eSYaxun Liu; CHECK-SAME: #[[AT1:[0-9]+]] 9846439e8dSYaxun Liudefine internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 99de4b88d9SYaxun Liu !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 100de4b88d9SYaxun Liuentry: 10146439e8dSYaxun Liu %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 2 10246439e8dSYaxun Liu %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 3 103de4b88d9SYaxun Liu store i8 %.fca.4.extract, i8 addrspace(1)* %.fca.3.extract, align 1 104de4b88d9SYaxun Liu ret void 105de4b88d9SYaxun Liu} 106de4b88d9SYaxun Liu 10746439e8dSYaxun Liudeclare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t addrspace(5)*, i8*, i8*) local_unnamed_addr 108de4b88d9SYaxun Liu 109a99e7d8eSYaxun Liu; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel 110a99e7d8eSYaxun Liu; CHECK-SAME: #[[AT2:[0-9]+]] 11146439e8dSYaxun Liudefine internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, 112de4b88d9SYaxun Liu i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 113de4b88d9SYaxun Liu !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 114de4b88d9SYaxun Liuentry: 11546439e8dSYaxun Liu %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 2 11646439e8dSYaxun Liu %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 3 11746439e8dSYaxun Liu %.fca.5.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 4 11846439e8dSYaxun Liu %.fca.6.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 5 119de4b88d9SYaxun Liu store i8 %.fca.6.extract, i8 addrspace(1)* %.fca.3.extract, align 1 120de4b88d9SYaxun Liu store i64 %.fca.5.extract, i64 addrspace(1)* %.fca.4.extract, align 8 121de4b88d9SYaxun Liu ret void 122de4b88d9SYaxun Liu} 123de4b88d9SYaxun Liu 124a99e7d8eSYaxun Liu; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel 125a99e7d8eSYaxun Liu; CHECK-SAME: #[[AT3:[0-9]+]] 126a99e7d8eSYaxun Liudefine internal amdgpu_kernel void @0(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 127a99e7d8eSYaxun Liu !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 128a99e7d8eSYaxun Liu ret void 129a99e7d8eSYaxun Liu} 130a99e7d8eSYaxun Liu 131a99e7d8eSYaxun Liu; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel.1 132a99e7d8eSYaxun Liu; CHECK-SAME: #[[AT4:[0-9]+]] 133a99e7d8eSYaxun Liudefine internal amdgpu_kernel void @1(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 134a99e7d8eSYaxun Liu !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 135a99e7d8eSYaxun Liu ret void 136a99e7d8eSYaxun Liu} 137a99e7d8eSYaxun Liu 138c928f2a6SYaxun Liu; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" } 139a99e7d8eSYaxun Liu; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel.runtime_handle" 140a99e7d8eSYaxun Liu; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel.runtime_handle" 141a99e7d8eSYaxun Liu; CHECK: attributes #[[AT3]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle" 142a99e7d8eSYaxun Liu; CHECK: attributes #[[AT4]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle" 143de4b88d9SYaxun Liu 144de4b88d9SYaxun Liuattributes #0 = { "enqueued-block" } 145de4b88d9SYaxun Liu 146de4b88d9SYaxun Liu!3 = !{i32 1, i32 0, i32 1, i32 0} 147de4b88d9SYaxun Liu!4 = !{!"none", !"none", !"none", !"none"} 148de4b88d9SYaxun Liu!5 = !{!"char*", !"char", !"long*", !"long"} 149de4b88d9SYaxun Liu!6 = !{!"", !"", !"", !""} 150de4b88d9SYaxun Liu!14 = !{i32 0} 151de4b88d9SYaxun Liu!15 = !{!"none"} 152de4b88d9SYaxun Liu!16 = !{!"__block_literal"} 153de4b88d9SYaxun Liu!17 = !{!""} 154