1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s 3 4declare void @function1() 5 6declare void @function2() #0 7 8; Function Attrs: noinline 9define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #2 { 10 store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8 11 ret void 12} 13 14; Function Attrs: noinline 15define void @function4(i64 %arg, i64* %a) #2 { 16 store i64 %arg, i64* %a 17 ret void 18} 19 20; Function Attrs: noinline 21define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #2 { 22 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 168 23 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* 24 %x = load i64, i64 addrspace(4)* %cast 25 store i64 %x, i64* %sink 26 ret void 27} 28 29; Function Attrs: nounwind readnone speculatable willreturn 30declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1 31 32; CHECK: amdhsa.kernels: 33; CHECK: - .args: 34; CHECK-NOT: hidden_queue_ptr 35; CHECK-LABEL: .name: test_kernel10 36define amdgpu_kernel void @test_kernel10(i8* %a) { 37 store i8 3, i8* %a, align 1 38 ret void 39} 40 41; Call to an extern function 42 43; CHECK: - .args: 44; CHECK: hidden_queue_ptr 45; CHECK-LABEL: .name: test_kernel20 46define amdgpu_kernel void @test_kernel20(i8* %a) { 47 call void @function1() 48 store i8 3, i8* %a, align 1 49 ret void 50} 51 52; Explicit attribute on kernel 53 54; CHECK: - .args: 55; CHECK-NOT: hidden_queue_ptr 56; CHECK-LABEL: .name: test_kernel21 57define amdgpu_kernel void @test_kernel21(i8* %a) #0 { 58 call void @function1() 59 store i8 3, i8* %a, align 1 60 ret void 61} 62 63; Explicit attribute on extern callee 64 65; CHECK: - .args: 66; CHECK-NOT: hidden_queue_ptr 67; CHECK-LABEL: .name: test_kernel22 68define amdgpu_kernel void @test_kernel22(i8* %a) { 69 call void @function2() 70 store i8 3, i8* %a, align 1 71 ret void 72} 73 74; Access more bytes than the pointer size 75 76; CHECK: - .args: 77; CHECK: hidden_queue_ptr 78; CHECK-LABEL: .name: test_kernel30 79define amdgpu_kernel void @test_kernel30(i128* %a) { 80 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 81 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192 82 %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)* 83 %x = load i128, i128 addrspace(4)* %cast 84 store i128 %x, i128* %a 85 ret void 86} 87 88; Typical load of queue pointer 89 90; CHECK: - .args: 91; CHECK: hidden_queue_ptr 92; CHECK-LABEL: .name: test_kernel40 93define amdgpu_kernel void @test_kernel40(i64* %a) { 94 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 95 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200 96 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* 97 %x = load i64, i64 addrspace(4)* %cast 98 store i64 %x, i64* %a 99 ret void 100} 101 102; Typical usage, overriden by explicit attribute on kernel 103 104; CHECK: - .args: 105; CHECK-NOT: hidden_queue_ptr 106; CHECK-LABEL: .name: test_kernel41 107define amdgpu_kernel void @test_kernel41(i64* %a) #0 { 108 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 109 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200 110 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* 111 %x = load i64, i64 addrspace(4)* %cast 112 store i64 %x, i64* %a 113 ret void 114} 115 116; Access to implicit arg before the queue pointer 117 118; CHECK: - .args: 119; CHECK-NOT: hidden_queue_ptr 120; CHECK-LABEL: .name: test_kernel42 121define amdgpu_kernel void @test_kernel42(i64* %a) { 122 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 123 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 192 124 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* 125 %x = load i64, i64 addrspace(4)* %cast 126 store i64 %x, i64* %a 127 ret void 128} 129 130; Access to implicit arg after the queue pointer 131 132; CHECK: - .args: 133; CHECK-NOT: hidden_queue_ptr 134; CHECK-LABEL: .name: test_kernel43 135define amdgpu_kernel void @test_kernel43(i64* %a) { 136 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 137 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208 138 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* 139 %x = load i64, i64 addrspace(4)* %cast 140 store i64 %x, i64* %a 141 ret void 142} 143 144; Accessing a byte just before the queue pointer 145 146; CHECK: - .args: 147; CHECK-NOT: hidden_queue_ptr 148; CHECK-LABEL: .name: test_kernel44 149define amdgpu_kernel void @test_kernel44(i8* %a) { 150 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 151 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 199 152 %x = load i8, i8 addrspace(4)* %gep, align 1 153 store i8 %x, i8* %a, align 1 154 ret void 155} 156 157; Accessing a byte inside the queue pointer 158 159; CHECK: - .args: 160; CHECK: hidden_queue_ptr 161; CHECK-LABEL: .name: test_kernel45 162define amdgpu_kernel void @test_kernel45(i8* %a) { 163 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 164 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200 165 %x = load i8, i8 addrspace(4)* %gep, align 1 166 store i8 %x, i8* %a, align 1 167 ret void 168} 169 170; Accessing a byte inside the queue pointer 171 172; CHECK: - .args: 173; CHECK: hidden_queue_ptr 174; CHECK-LABEL: .name: test_kernel46 175define amdgpu_kernel void @test_kernel46(i8* %a) { 176 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 177 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 207 178 %x = load i8, i8 addrspace(4)* %gep, align 1 179 store i8 %x, i8* %a, align 1 180 ret void 181} 182 183; Accessing a byte just after the queue pointer 184 185; CHECK: - .args: 186; CHECK-NOT: hidden_queue_ptr 187; CHECK-LABEL: .name: test_kernel47 188define amdgpu_kernel void @test_kernel47(i8* %a) { 189 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 190 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 208 191 %x = load i8, i8 addrspace(4)* %gep, align 1 192 store i8 %x, i8* %a, align 1 193 ret void 194} 195 196; Access with an unknown offset 197 198; CHECK: - .args: 199; CHECK: hidden_queue_ptr 200; CHECK-LABEL: .name: test_kernel50 201define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) { 202 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 203 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b 204 %x = load i8, i8 addrspace(4)* %gep, align 1 205 store i8 %x, i8* %a, align 1 206 ret void 207} 208 209; Multiple geps reaching the queue pointer argument. 210 211; CHECK: - .args: 212; CHECK: hidden_queue_ptr 213; CHECK-LABEL: .name: test_kernel51 214define amdgpu_kernel void @test_kernel51(i8* %a) { 215 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 216 %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 217 %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 184 218 %x = load i8, i8 addrspace(4)* %gep2, align 1 219 store i8 %x, i8* %a, align 1 220 ret void 221} 222 223; Multiple geps not reaching the queue pointer argument. 224 225; CHECK: - .args: 226; CHECK-NOT: hidden_queue_ptr 227; CHECK-LABEL: .name: test_kernel52 228define amdgpu_kernel void @test_kernel52(i8* %a) { 229 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 230 %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 231 %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16 232 %x = load i8, i8 addrspace(4)* %gep2, align 1 233 store i8 %x, i8* %a, align 1 234 ret void 235} 236 237; Queue pointer used inside a function call 238 239; CHECK: - .args: 240; CHECK: hidden_queue_ptr 241; CHECK-LABEL: .name: test_kernel60 242define amdgpu_kernel void @test_kernel60(i64* %a) #2 { 243 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 244 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 200 245 %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* 246 %x = load i64, i64 addrspace(4)* %cast 247 call void @function4(i64 %x, i64* %a) 248 ret void 249} 250 251; Queue pointer retrieved inside a function call; chain of geps 252 253; CHECK: - .args: 254; CHECK: hidden_queue_ptr 255; CHECK-LABEL: .name: test_kernel61 256define amdgpu_kernel void @test_kernel61(i64* %a) #2 { 257 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 258 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32 259 call void @function5(i8 addrspace(4)* %gep, i64* %a) 260 ret void 261} 262 263; Pointer captured 264 265; CHECK: - .args: 266; CHECK: hidden_queue_ptr 267; CHECK-LABEL: .name: test_kernel70 268define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 { 269 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 270 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 271 store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8 272 ret void 273} 274 275; Pointer captured inside function call 276 277; CHECK: - .args: 278; CHECK: hidden_queue_ptr 279; CHECK-LABEL: .name: test_kernel71 280define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 { 281 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 282 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 283 call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink) 284 ret void 285} 286 287; Ineffective pointer capture 288 289; CHECK: - .args: 290; CHECK-NOT: hidden_queue_ptr 291; CHECK-LABEL: .name: test_kernel72 292define amdgpu_kernel void @test_kernel72() #2 { 293 %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 294 %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 295 store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8 296 ret void 297} 298 299attributes #0 = { "amdgpu-no-queue-ptr" } 300attributes #1 = { nounwind readnone speculatable willreturn } 301attributes #2 = { noinline } 302