1; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s 2 3declare i32 @llvm.amdgcn.workgroup.id.x() #0 4declare i32 @llvm.amdgcn.workgroup.id.y() #0 5declare i32 @llvm.amdgcn.workgroup.id.z() #0 6 7declare i32 @llvm.amdgcn.workitem.id.x() #0 8declare i32 @llvm.amdgcn.workitem.id.y() #0 9declare i32 @llvm.amdgcn.workitem.id.z() #0 10 11declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 12declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 13declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 14 15declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2 16declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2 17 18; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 19define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 20 %val = call i32 @llvm.amdgcn.workgroup.id.x() 21 store i32 %val, i32 addrspace(1)* %ptr 22 ret void 23} 24 25; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 { 26define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { 27 %val = call i32 @llvm.amdgcn.workgroup.id.y() 28 store i32 %val, i32 addrspace(1)* %ptr 29 ret void 30} 31 32; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 { 33define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { 34 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 35 store volatile i32 %val0, i32 addrspace(1)* %ptr 36 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 37 store volatile i32 %val1, i32 addrspace(1)* %ptr 38 ret void 39} 40 41; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 { 42define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { 43 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 44 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 45 store volatile i32 %val0, i32 addrspace(1)* %ptr 46 store volatile i32 %val1, i32 addrspace(1)* %ptr 47 ret void 48} 49 50; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 { 51define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { 52 %val = call i32 @llvm.amdgcn.workgroup.id.z() 53 store i32 %val, i32 addrspace(1)* %ptr 54 ret void 55} 56 57; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 { 58define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { 59 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 60 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 61 store volatile i32 %val0, i32 addrspace(1)* %ptr 62 store volatile i32 %val1, i32 addrspace(1)* %ptr 63 ret void 64} 65 66; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 { 67define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { 68 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 69 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 70 store volatile i32 %val0, i32 addrspace(1)* %ptr 71 store volatile i32 %val1, i32 addrspace(1)* %ptr 72 ret void 73} 74 75; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 { 76define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { 77 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 78 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 79 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 80 store volatile i32 %val0, i32 addrspace(1)* %ptr 81 store volatile i32 %val1, i32 addrspace(1)* %ptr 82 store volatile i32 %val2, i32 addrspace(1)* %ptr 83 ret void 84} 85 86; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 87define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 88 %val = call i32 @llvm.amdgcn.workitem.id.x() 89 store i32 %val, i32 addrspace(1)* %ptr 90 ret void 91} 92 93; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 { 94define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { 95 %val = call i32 @llvm.amdgcn.workitem.id.y() 96 store i32 %val, i32 addrspace(1)* %ptr 97 ret void 98} 99 100; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 { 101define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { 102 %val = call i32 @llvm.amdgcn.workitem.id.z() 103 store i32 %val, i32 addrspace(1)* %ptr 104 ret void 105} 106 107; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 108define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 109 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 110 %val1 = call i32 @llvm.amdgcn.workgroup.id.x() 111 store volatile i32 %val0, i32 addrspace(1)* %ptr 112 store volatile i32 %val1, i32 addrspace(1)* %ptr 113 ret void 114} 115 116; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 { 117define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { 118 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 119 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 120 store volatile i32 %val0, i32 addrspace(1)* %ptr 121 store volatile i32 %val1, i32 addrspace(1)* %ptr 122 ret void 123} 124 125; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 { 126define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { 127 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 128 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 129 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 130 store volatile i32 %val0, i32 addrspace(1)* %ptr 131 store volatile i32 %val1, i32 addrspace(1)* %ptr 132 store volatile i32 %val2, i32 addrspace(1)* %ptr 133 ret void 134} 135 136; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 { 137define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { 138 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 139 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 140 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 141 %val3 = call i32 @llvm.amdgcn.workgroup.id.x() 142 %val4 = call i32 @llvm.amdgcn.workgroup.id.y() 143 %val5 = call i32 @llvm.amdgcn.workgroup.id.z() 144 store volatile i32 %val0, i32 addrspace(1)* %ptr 145 store volatile i32 %val1, i32 addrspace(1)* %ptr 146 store volatile i32 %val2, i32 addrspace(1)* %ptr 147 store volatile i32 %val3, i32 addrspace(1)* %ptr 148 store volatile i32 %val4, i32 addrspace(1)* %ptr 149 store volatile i32 %val5, i32 addrspace(1)* %ptr 150 ret void 151} 152 153; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 { 154define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { 155 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 156 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 157 %val = load i32, i32 addrspace(4)* %bc 158 store i32 %val, i32 addrspace(1)* %ptr 159 ret void 160} 161 162; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 { 163define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { 164 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 165 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 166 %val = load i32, i32 addrspace(4)* %bc 167 store i32 %val, i32 addrspace(1)* %ptr 168 ret void 169} 170 171; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 { 172define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 { 173 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 174 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 175 %val = load i32, i32 addrspace(4)* %bc 176 store i32 %val, i32 addrspace(1)* %ptr 177 ret void 178} 179 180; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { 181define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { 182 %stof = addrspacecast i32 addrspace(3)* %ptr to i32* 183 store volatile i32 0, i32* %stof 184 ret void 185} 186 187; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 { 188define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { 189 %stof = addrspacecast i32 addrspace(5)* %ptr to i32* 190 store volatile i32 0, i32* %stof 191 ret void 192} 193 194; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 195define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 196 %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* 197 store volatile i32 0, i32 addrspace(3)* %ftos 198 ret void 199} 200 201; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 202define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 203 %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* 204 store volatile i32 0, i32 addrspace(5)* %ftos 205 ret void 206} 207 208; No-op addrspacecast should not use queue ptr 209; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 210define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 211 %stof = addrspacecast i32 addrspace(1)* %ptr to i32* 212 store volatile i32 0, i32* %stof 213 ret void 214} 215 216; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 217define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 218 %stof = addrspacecast i32 addrspace(4)* %ptr to i32* 219 %ld = load volatile i32, i32* %stof 220 ret void 221} 222 223; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 224define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 225 %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* 226 store volatile i32 0, i32 addrspace(1)* %ftos 227 ret void 228} 229 230; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 231define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 232 %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* 233 %ld = load volatile i32, i32 addrspace(4)* %ftos 234 ret void 235} 236 237; HSA: define amdgpu_kernel void @use_is_shared(i8* %ptr) #11 { 238define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 { 239 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) 240 %ext = zext i1 %is.shared to i32 241 store i32 %ext, i32 addrspace(1)* undef 242 ret void 243} 244 245; HSA: define amdgpu_kernel void @use_is_private(i8* %ptr) #11 { 246define amdgpu_kernel void @use_is_private(i8* %ptr) #1 { 247 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) 248 %ext = zext i1 %is.private to i32 249 store i32 %ext, i32 addrspace(1)* undef 250 ret void 251} 252 253attributes #0 = { nounwind readnone speculatable } 254attributes #1 = { nounwind } 255 256; HSA: attributes #0 = { nounwind readnone speculatable } 257; HSA: attributes #1 = { nounwind } 258; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" } 259; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" } 260; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } 261; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" } 262; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" } 263; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } 264; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 265; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 266; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } 267; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" } 268; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" } 269