1; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s 3 4; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty: 5; HSA: enable_sgpr_kernarg_segment_ptr = 0 6; HSA: kernarg_segment_byte_size = 0 7; HSA: kernarg_segment_alignment = 4 8 9; MESA: enable_sgpr_kernarg_segment_ptr = 1 10; MESA: kernarg_segment_byte_size = 16 11; MESA: kernarg_segment_alignment = 4 12 13; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}} 14; HSA: s_load_dword s0, [[NULL]], 0x0 15define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 { 16 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 17 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 18 %load = load volatile i32, i32 addrspace(4)* %cast 19 ret void 20} 21 22; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit: 23; HSA: enable_sgpr_kernarg_segment_ptr = 0 24; HSA: kernarg_segment_byte_size = 0 25; HSA: kernarg_segment_alignment = 4 26 27; MESA: enable_sgpr_kernarg_segment_ptr = 1 28; MESA: kernarg_segment_byte_size = 16 29; MESA: kernarg_segment_alignment = 4 30 31; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}} 32; HSA: s_load_dword s0, [[NULL]], 0x0 33 34; MESA: s_load_dword s0, s[4:5], 0x0 35define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 { 36 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 37 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 38 %load = load volatile i32, i32 addrspace(4)* %cast 39 ret void 40} 41 42; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty: 43; GCN: enable_sgpr_kernarg_segment_ptr = 1 44 45; HSA: kernarg_segment_byte_size = 48 46; HSA: kernarg_segment_alignment = 4 47 48; MESA: kernarg_segment_byte_size = 16 49; MESA: kernarg_segment_alignment = 4 50 51; HSA: s_load_dword s0, s[4:5], 0x0 52define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 { 53 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 54 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 55 %load = load volatile i32, i32 addrspace(4)* %cast 56 ret void 57} 58 59; GCN-LABEL: {{^}}kernel_implicitarg_ptr: 60; GCN: enable_sgpr_kernarg_segment_ptr = 1 61 62; HSA: kernarg_segment_byte_size = 112 63; HSA: kernarg_segment_alignment = 4 64 65; MESA: kernarg_segment_byte_size = 128 66; MESA: kernarg_segment_alignment = 4 67 68; HSA: s_load_dword s0, s[4:5], 0x1c 69define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 { 70 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 71 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 72 %load = load volatile i32, i32 addrspace(4)* %cast 73 ret void 74} 75 76; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr: 77; GCN: enable_sgpr_kernarg_segment_ptr = 1 78 79; HSA: kernarg_segment_byte_size = 160 80; HSA: kernarg_segment_alignment = 4 81 82; MESA: kernarg_segment_byte_size = 128 83; MESA: kernarg_segment_alignment = 4 84 85; HSA: s_load_dword s0, s[4:5], 0x1c 86define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { 87 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 88 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 89 %load = load volatile i32, i32 addrspace(4)* %cast 90 ret void 91} 92 93; GCN-LABEL: {{^}}func_implicitarg_ptr: 94; GCN: s_waitcnt 95; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 96; GCN-NEXT: s_waitcnt 97; GCN-NEXT: s_setpc_b64 98define void @func_implicitarg_ptr() #0 { 99 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 100 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 101 %load = load volatile i32, i32 addrspace(4)* %cast 102 ret void 103} 104 105; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr: 106; GCN: s_waitcnt 107; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 108; GCN-NEXT: s_waitcnt 109; GCN-NEXT: s_setpc_b64 110define void @opencl_func_implicitarg_ptr() #0 { 111 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 112 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 113 %load = load volatile i32, i32 addrspace(4)* %cast 114 ret void 115} 116 117; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty: 118; HSA: enable_sgpr_kernarg_segment_ptr = 0 119; HSA: kernarg_segment_byte_size = 0 120; HSA: kernarg_segment_alignment = 4 121 122; MESA: enable_sgpr_kernarg_segment_ptr = 1 123; MESA: kernarg_segment_byte_size = 16 124; MESA: kernarg_segment_alignment = 4 125 126; XGCN-NOT: s[4:5] 127; XGCN-NOT: s4 128; XGCN-NOT: s5 129; GCN: s_swappc_b64 130define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 { 131 call void @func_implicitarg_ptr() 132 ret void 133} 134 135; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0: 136; HSA: enable_sgpr_kernarg_segment_ptr = 0 137; HSA: kernarg_segment_byte_size = 0 138; HSA: kernarg_segment_alignment = 4 139 140; MESA: enable_sgpr_kernarg_segment_ptr = 1 141; MESA: kernarg_segment_byte_size = 16 142; MESA: kernarg_segment_alignment = 4 143 144; HSA: s_mov_b64 s[4:5], 0{{$}} 145; MESA-NOT: s[4:5] 146; MESA-NOT: s4 147; MESA-NOT: s5 148; GCN: s_swappc_b64 149define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 { 150 call void @func_implicitarg_ptr() 151 ret void 152} 153 154; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty: 155; GCN: enable_sgpr_kernarg_segment_ptr = 1 156; HSA: kernarg_segment_byte_size = 48 157; HSA: kernarg_segment_alignment = 4 158; MESA: kernarg_segment_byte_size = 16 159; MESA: kernarg_segment_alignment = 4 160; GCN-NOT: s[4:5] 161; GCN-NOT: s4 162; GCN-NOT: s5 163; GCN: s_swappc_b64 164define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 { 165 call void @func_implicitarg_ptr() 166 ret void 167} 168 169; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func: 170; GCN: enable_sgpr_kernarg_segment_ptr = 1 171; HSA: kernarg_segment_byte_size = 112 172; HSA: kernarg_segment_alignment = 4 173; MESA: kernarg_segment_byte_size = 128 174; MESA: kernarg_segment_alignment = 4 175 176; HSA: s_add_u32 s4, s4, 0x70 177; MESA: s_add_u32 s4, s4, 0x70 178 179; GCN: s_addc_u32 s5, s5, 0{{$}} 180; GCN: s_swappc_b64 181define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 { 182 call void @func_implicitarg_ptr() 183 ret void 184} 185 186; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func: 187; GCN: enable_sgpr_kernarg_segment_ptr = 1 188; HSA: kernarg_segment_byte_size = 160 189; HSA: kernarg_segment_alignment = 4 190; MESA: kernarg_segment_byte_size = 128 191; MESA: kernarg_segment_alignment = 4 192 193; GCN: s_add_u32 s4, s4, 0x70 194; GCN: s_addc_u32 s5, s5, 0{{$}} 195; GCN: s_swappc_b64 196define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 { 197 call void @func_implicitarg_ptr() 198 ret void 199} 200 201; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func: 202; GCN-NOT: s4 203; GCN-NOT: s5 204; GCN-NOT: s[4:5] 205define void @func_call_implicitarg_ptr_func() #0 { 206 call void @func_implicitarg_ptr() 207 ret void 208} 209 210; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func: 211; GCN-NOT: s4 212; GCN-NOT: s5 213; GCN-NOT: s[4:5] 214define void @opencl_func_call_implicitarg_ptr_func() #0 { 215 call void @func_implicitarg_ptr() 216 ret void 217} 218 219; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr: 220; GCN: s_waitcnt 221; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0 222; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0 223; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 224; GCN: s_waitcnt lgkmcnt(0) 225define void @func_kernarg_implicitarg_ptr() #0 { 226 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 227 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 228 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* 229 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 230 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr 231 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg 232 ret void 233} 234 235; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr: 236; GCN: s_waitcnt 237; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0 238; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0 239; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 240; GCN: s_waitcnt lgkmcnt(0) 241define void @opencl_func_kernarg_implicitarg_ptr() #0 { 242 %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 243 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 244 %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* 245 %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 246 %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr 247 %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg 248 ret void 249} 250 251; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func: 252; GCN: s_add_u32 s4, s4, 0x70 253; GCN: s_addc_u32 s5, s5, 0 254; GCN: s_swappc_b64 255define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 { 256 call void @func_kernarg_implicitarg_ptr() 257 ret void 258} 259 260; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding: 261; HSA: kernarg_segment_byte_size = 120 262; MESA: kernarg_segment_byte_size = 84 263; GCN: kernarg_segment_alignment = 6 264define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 { 265 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 266 %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 267 %load = load volatile i32, i32 addrspace(4)* %cast 268 ret void 269} 270 271 272; HSA-LABEL: Kernels: 273; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty 274; HSA: CodeProps: 275; HSA: KernargSegmentSize: 0 276; HSA: KernargSegmentAlign: 4 277 278; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty_0implicit 279; HSA: KernargSegmentSize: 0 280; HSA: KernargSegmentAlign: 4 281 282; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr_empty 283; HSA: KernargSegmentSize: 48 284; HSA: KernargSegmentAlign: 8 285 286; HSA-LABEL: - Name: kernel_implicitarg_ptr 287; HSA: KernargSegmentSize: 112 288; HSA: KernargSegmentAlign: 4 289 290; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr 291; HSA: KernargSegmentSize: 160 292; HSA: KernargSegmentAlign: 8 293 294; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty 295; HSA: KernargSegmentSize: 0 296; HSA: KernargSegmentAlign: 4 297 298; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty_implicit0 299; HSA: KernargSegmentSize: 0 300; HSA: KernargSegmentAlign: 4 301 302; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func_empty 303; HSA: KernargSegmentSize: 48 304; HSA: KernargSegmentAlign: 8 305 306; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func 307; HSA: KernargSegmentSize: 112 308; HSA: KernargSegmentAlign: 4 309 310; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func 311; HSA: KernargSegmentSize: 160 312; HSA: KernargSegmentAlign: 8 313 314; HSA-LABEL: - Name: kernel_call_kernarg_implicitarg_ptr_func 315; HSA: KernargSegmentSize: 112 316; HSA: KernargSegmentAlign: 4 317 318; HSA-LABEL: - Name: kernel_implicitarg_no_struct_align_padding 319; HSA: KernargSegmentSize: 120 320; HSA: KernargSegmentAlign: 64 321 322declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2 323declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2 324 325attributes #0 = { nounwind noinline } 326attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" } 327attributes #2 = { nounwind readnone speculatable } 328attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } 329