1; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s 2 3; CHECK: @__test_block_invoke_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* 4; CHECK: @__test_block_invoke_2_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* 5; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* 6; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* 7 8%struct.ndrange_t = type { i32 } 9%opencl.queue_t = type opaque 10 11; CHECK-LABEL: define amdgpu_kernel void @non_caller 12; CHECK-NOT: #{{[0-9]+}} 13define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr 14 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { 15 ret void 16} 17 18; CHECK-LABEL: define amdgpu_kernel void @caller_indirect 19; CHECK-SAME: #[[AT_CALLER:[0-9]+]] 20define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr 21 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { 22 call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) 23 ret void 24} 25 26; CHECK-LABEL: define amdgpu_kernel void @caller 27; CHECK-SAME: #[[AT_CALLER]] 28; CHECK-NOT: @__test_block_invoke_kernel 29; CHECK-NOT: @__test_block_invoke_2_kernel 30; CHECK-NOT: @__amdgpu_enqueued_kernel 31; CHECK-NOT: @__amdgpu_enqueued_kernel.1 32; CHECK-NOT: @0 33; CHECK-NOT: @1 34; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle 35; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle 36; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.runtime_handle 37; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.1.runtime_handle 38; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_2_kernel.runtime_handle 39define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr 40 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { 41entry: 42 %block = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5) 43 %tmp = alloca %struct.ndrange_t, align 4, addrspace(5) 44 %block2 = alloca <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, align 8, addrspace(5) 45 %tmp3 = alloca %struct.ndrange_t, align 4, addrspace(5) 46 %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 0 47 store i32 25, i32 addrspace(5)* %block.size, align 8 48 %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 1 49 store i32 8, i32 addrspace(5)* %block.align, align 4 50 %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 2 51 store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured, align 8 52 %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block, i32 0, i32 3 53 store i8 %b, i8 addrspace(5)* %block.captured1, align 8 54 %tmp1 = bitcast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %block to void () addrspace(5)* 55 %tmp4 = addrspacecast void () addrspace(5)* %tmp1 to i8* 56 %tmp5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, 57 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2 58 %tmp10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, 59 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2 60 %tmp11 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, 61 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @0 to i8*), i8* nonnull %tmp4) #2 62 %tmp12 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp, 63 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @1 to i8*), i8* nonnull %tmp4) #2 64 %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 0 65 store i32 41, i32 addrspace(5)* %block.size4, align 8 66 %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 1 67 store i32 8, i32 addrspace(5)* %block.align5, align 4 68 %block.captured7 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 2 69 store i8 addrspace(1)* %a, i8 addrspace(1)* addrspace(5)* %block.captured7, align 8 70 %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 5 71 store i8 %b, i8 addrspace(5)* %block.captured8, align 8 72 %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 3 73 store i64 addrspace(1)* %c, i64 addrspace(1)* addrspace(5)* %block.captured9, align 8 74 %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 4 75 store i64 %d, i64 addrspace(5)* %block.captured10, align 8 76 %tmp6 = bitcast <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2 to void () addrspace(5)* 77 %tmp8 = addrspacecast void () addrspace(5)* %tmp6 to i8* 78 %tmp9 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp3, 79 i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)* @__test_block_invoke_2_kernel to i8*), i8* nonnull %tmp8) #2 80 ret void 81} 82 83; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_kernel 84; CHECK-SAME: #[[AT1:[0-9]+]] 85define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 86 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 87entry: 88 %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 2 89 %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i8 }> %arg, 3 90 store i8 %.fca.4.extract, i8 addrspace(1)* %.fca.3.extract, align 1 91 ret void 92} 93 94declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t addrspace(5)*, i8*, i8*) local_unnamed_addr 95 96; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel 97; CHECK-SAME: #[[AT2:[0-9]+]] 98define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, 99 i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 100 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 101entry: 102 %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 2 103 %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 3 104 %.fca.5.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 4 105 %.fca.6.extract = extractvalue <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 5 106 store i8 %.fca.6.extract, i8 addrspace(1)* %.fca.3.extract, align 1 107 store i64 %.fca.5.extract, i64 addrspace(1)* %.fca.4.extract, align 8 108 ret void 109} 110 111; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel 112; CHECK-SAME: #[[AT3:[0-9]+]] 113define internal amdgpu_kernel void @0(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 114 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 115 ret void 116} 117 118; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel.1 119; CHECK-SAME: #[[AT4:[0-9]+]] 120define internal amdgpu_kernel void @1(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0 121 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { 122 ret void 123} 124 125; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" } 126; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel.runtime_handle" 127; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel.runtime_handle" 128; CHECK: attributes #[[AT3]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle" 129; CHECK: attributes #[[AT4]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle" 130 131attributes #0 = { "enqueued-block" } 132 133!3 = !{i32 1, i32 0, i32 1, i32 0} 134!4 = !{!"none", !"none", !"none", !"none"} 135!5 = !{!"char*", !"char", !"long*", !"long"} 136!6 = !{!"", !"", !"", !""} 137!14 = !{i32 0} 138!15 = !{!"none"} 139!16 = !{!"__block_literal"} 140!17 = !{!""} 141