1; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s 3 4; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty: 5; HSA: enable_sgpr_kernarg_segment_ptr = 1 6; HSA: kernarg_segment_byte_size = 56 7; HSA: kernarg_segment_alignment = 4 8 9; MESA: enable_sgpr_kernarg_segment_ptr = 1 10; MESA: kernarg_segment_byte_size = 16 11; MESA: kernarg_segment_alignment = 4 12 13; HSA: s_load_dword s0, s[4:5], 0x0 14define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 { 15 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 16 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 17 %load = load volatile i32, i32 addrspace(4)* %cast 18 ret void 19} 20 21; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit: 22; HSA: enable_sgpr_kernarg_segment_ptr = 0 23; HSA: kernarg_segment_byte_size = 0 24; HSA: kernarg_segment_alignment = 4 25 26; MESA: enable_sgpr_kernarg_segment_ptr = 1 27; MESA: kernarg_segment_byte_size = 16 28; MESA: kernarg_segment_alignment = 4 29 30; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}} 31; HSA: s_load_dword s0, [[NULL]], 0x0 32 33; MESA: s_load_dword s0, s[4:5], 0x0 34define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 { 35 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 36 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 37 %load = load volatile i32, i32 addrspace(4)* %cast 38 ret void 39} 40 41; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty: 42; GCN: enable_sgpr_kernarg_segment_ptr = 1 43 44; HSA: kernarg_segment_byte_size = 48 45; HSA: kernarg_segment_alignment = 4 46 47; MESA: kernarg_segment_byte_size = 16 48; MESA: kernarg_segment_alignment = 4 49 50; HSA: s_load_dword s0, s[4:5], 0x0 51define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 { 52 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 53 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 54 %load = load volatile i32, i32 addrspace(4)* %cast 55 ret void 56} 57 58; GCN-LABEL: {{^}}kernel_implicitarg_ptr: 59; GCN: enable_sgpr_kernarg_segment_ptr = 1 60 61; HSA: kernarg_segment_byte_size = 168 62; HSA: kernarg_segment_alignment = 4 63 64; MESA: kernarg_segment_byte_size = 128 65; MESA: kernarg_segment_alignment = 4 66 67; HSA: s_load_dword s0, s[4:5], 0x1c 68define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 { 69 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 70 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 71 %load = load volatile i32, i32 addrspace(4)* %cast 72 ret void 73} 74 75; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr: 76; GCN: enable_sgpr_kernarg_segment_ptr = 1 77 78; HSA: kernarg_segment_byte_size = 160 79; HSA: kernarg_segment_alignment = 4 80 81; MESA: kernarg_segment_byte_size = 128 82; MESA: kernarg_segment_alignment = 4 83 84; HSA: s_load_dword s0, s[4:5], 0x1c 85define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { 86 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 87 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 88 %load = load volatile i32, i32 addrspace(4)* %cast 89 ret void 90} 91 92; GCN-LABEL: {{^}}func_implicitarg_ptr: 93; GCN: s_waitcnt 94; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 95; GCN-NEXT: s_waitcnt 96; GCN-NEXT: s_setpc_b64 97define void @func_implicitarg_ptr() #0 { 98 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 99 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 100 %load = load volatile i32, i32 addrspace(4)* %cast 101 ret void 102} 103 104; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr: 105; GCN: s_waitcnt 106; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 107; GCN-NEXT: s_waitcnt 108; GCN-NEXT: s_setpc_b64 109define void @opencl_func_implicitarg_ptr() #0 { 110 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 111 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 112 %load = load volatile i32, i32 addrspace(4)* %cast 113 ret void 114} 115 116; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty: 117; HSA: enable_sgpr_kernarg_segment_ptr = 1 118; HSA: kernarg_segment_byte_size = 56 119; HSA: kernarg_segment_alignment = 4 120 121; MESA: enable_sgpr_kernarg_segment_ptr = 1 122; MESA: kernarg_segment_byte_size = 16 123; MESA: kernarg_segment_alignment = 4 124 125; GCN: s_mov_b64 s[8:9], s[4:5] 126; GCN: s_swappc_b64 127define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 { 128 call void @func_implicitarg_ptr() 129 ret void 130} 131 132; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0: 133; HSA: enable_sgpr_kernarg_segment_ptr = 0 134; HSA: kernarg_segment_byte_size = 0 135; HSA: kernarg_segment_alignment = 4 136 137; MESA: enable_sgpr_kernarg_segment_ptr = 1 138; MESA: kernarg_segment_byte_size = 16 139; MESA: kernarg_segment_alignment = 4 140 141; HSA: s_mov_b64 s[8:9], 0{{$}} 142; MESA: s_mov_b64 s[8:9], s[4:5]{{$}} 143; GCN: s_swappc_b64 144define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 { 145 call void @func_implicitarg_ptr() 146 ret void 147} 148 149; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty: 150; GCN: enable_sgpr_kernarg_segment_ptr = 1 151; HSA: kernarg_segment_byte_size = 48 152; HSA: kernarg_segment_alignment = 4 153; MESA: kernarg_segment_byte_size = 16 154; GCN: s_mov_b64 s[8:9], s[4:5] 155; GCN-NOT: s4 156; GCN-NOT: s5 157; GCN: s_swappc_b64 158define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 { 159 call void @func_implicitarg_ptr() 160 ret void 161} 162 163; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func: 164; GCN: enable_sgpr_kernarg_segment_ptr = 1 165; HSA: kernarg_segment_byte_size = 168 166; HSA: kernarg_segment_alignment = 4 167 168; MESA: kernarg_segment_byte_size = 128 169; MESA: kernarg_segment_alignment = 4 170 171; HSA: s_add_u32 s8, s4, 0x70 172; MESA: s_add_u32 s8, s4, 0x70 173 174; GCN: s_addc_u32 s9, s5, 0{{$}} 175; GCN: s_swappc_b64 176define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 { 177 call void @func_implicitarg_ptr() 178 ret void 179} 180 181; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func: 182; GCN: enable_sgpr_kernarg_segment_ptr = 1 183; HSA: kernarg_segment_byte_size = 160 184; HSA: kernarg_segment_alignment = 4 185; MESA: kernarg_segment_byte_size = 128 186; MESA: kernarg_segment_alignment = 4 187 188; GCN: s_add_u32 s8, s4, 0x70 189; GCN: s_addc_u32 s9, s5, 0{{$}} 190; GCN: s_swappc_b64 191define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 { 192 call void @func_implicitarg_ptr() 193 ret void 194} 195 196; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func: 197; GCN-NOT: s8 198; GCN-NOT: s9 199; GCN-NOT: s[8:9] 200; GCN: s_swappc_b64 201; GCN: s_setpc_b64 s[30:31] 202define void @func_call_implicitarg_ptr_func() #0 { 203 call void @func_implicitarg_ptr() 204 ret void 205} 206 207; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func: 208; GCN-NOT: s8 209; GCN-NOT: s9 210; GCN-NOT: s[8:9] 211; GCN: s_swappc_b64 212; GCN: s_setpc_b64 s[30:31] 213define void @opencl_func_call_implicitarg_ptr_func() #0 { 214 call void @func_implicitarg_ptr() 215 ret void 216} 217 218; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr: 219; GCN: s_waitcnt 220; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0 221; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0 222; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 223; GCN: s_waitcnt lgkmcnt(0) 224define void @func_kernarg_implicitarg_ptr() #0 { 225 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 226 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 227 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* 228 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 229 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr 230 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg 231 ret void 232} 233 234; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr: 235; GCN: s_waitcnt 236; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0 237; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0 238; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 239; GCN: s_waitcnt lgkmcnt(0) 240define void @opencl_func_kernarg_implicitarg_ptr() #0 { 241 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 242 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 243 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* 244 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 245 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr 246 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg 247 ret void 248} 249 250; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func: 251; GCN: s_add_u32 s8, s4, 0x70 252; GCN: s_addc_u32 s9, s5, 0 253; GCN: s_swappc_b64 254define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 { 255 call void @func_kernarg_implicitarg_ptr() 256 ret void 257} 258 259; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding: 260; HSA: kernarg_segment_byte_size = 120 261; MESA: kernarg_segment_byte_size = 84 262; GCN: kernarg_segment_alignment = 6 263define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 { 264 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 265 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 266 %load = load volatile i32, i32 addrspace(4)* %cast 267 ret void 268} 269 270 271; HSA-LABEL: Kernels: 272; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty 273; HSA: CodeProps: 274; HSA: KernargSegmentSize: 56 275; HSA: KernargSegmentAlign: 8 276 277; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty_0implicit 278; HSA: KernargSegmentSize: 0 279; HSA: KernargSegmentAlign: 4 280 281; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr_empty 282; HSA: KernargSegmentSize: 48 283; HSA: KernargSegmentAlign: 8 284 285; HSA-LABEL: - Name: kernel_implicitarg_ptr 286; HSA: KernargSegmentSize: 168 287; HSA: KernargSegmentAlign: 8 288 289; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr 290; HSA: KernargSegmentSize: 160 291; HSA: KernargSegmentAlign: 8 292 293; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty 294; HSA: KernargSegmentSize: 56 295; HSA: KernargSegmentAlign: 8 296 297; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty_implicit0 298; HSA: KernargSegmentSize: 0 299; HSA: KernargSegmentAlign: 4 300 301; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func_empty 302; HSA: KernargSegmentSize: 48 303; HSA: KernargSegmentAlign: 8 304 305; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func 306; HSA: KernargSegmentSize: 168 307; HSA: KernargSegmentAlign: 8 308 309; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func 310; HSA: KernargSegmentSize: 160 311; HSA: KernargSegmentAlign: 8 312 313; HSA-LABEL: - Name: kernel_call_kernarg_implicitarg_ptr_func 314; HSA: KernargSegmentSize: 168 315; HSA: KernargSegmentAlign: 8 316 317; HSA-LABEL: - Name: kernel_implicitarg_no_struct_align_padding 318; HSA: KernargSegmentSize: 120 319; HSA: KernargSegmentAlign: 64 320 321declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2 322declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2 323 324attributes #0 = { nounwind noinline } 325attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" } 326attributes #2 = { nounwind readnone speculatable } 327attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } 328