1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s 3; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s 4 5; Test with gfx803 so that 6; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require 7; the queue ptr. Tests with code object v3 to test 8; llvm.trap/llvm.debugtrap that require the queue ptr. 9 10 11declare hidden void @requires_all_inputs() 12 13; This function incorrectly is marked with the hints that the callee 14; does not require the implicit arguments to the function. Make sure 15; we do not crash. 16define void @parent_func_missing_inputs() #0 { 17; FIXEDABI-LABEL: parent_func_missing_inputs: 18; FIXEDABI: ; %bb.0: 19; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; FIXEDABI-NEXT: s_or_saveexec_b64 s[16:17], -1 21; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 22; FIXEDABI-NEXT: s_mov_b64 exec, s[16:17] 23; FIXEDABI-NEXT: v_writelane_b32 v40, s33, 2 24; FIXEDABI-NEXT: s_mov_b32 s33, s32 25; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 26; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0 27; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 28; FIXEDABI-NEXT: s_getpc_b64 s[16:17] 29; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 30; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 31; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] 32; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 33; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 34; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00 35; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2 36; FIXEDABI-NEXT: s_or_saveexec_b64 s[4:5], -1 37; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 38; FIXEDABI-NEXT: s_mov_b64 exec, s[4:5] 39; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 40; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 41 call void @requires_all_inputs() 42 ret void 43} 44 45define amdgpu_kernel void @parent_kernel_missing_inputs() #0 { 46; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs: 47; FIXEDABI-SDAG: ; %bb.0: 48; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9 49; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 50; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 51; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s9 52; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 53; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 54; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0 55; FIXEDABI-SDAG-NEXT: s_mov_b32 s14, s8 56; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2 57; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0 58; FIXEDABI-SDAG-NEXT: s_mov_b32 s12, s6 59; FIXEDABI-SDAG-NEXT: s_mov_b32 s13, s7 60; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0 61; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s5 62; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[4:5] 63; FIXEDABI-SDAG-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 64; FIXEDABI-SDAG-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 65; FIXEDABI-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] 66; FIXEDABI-SDAG-NEXT: s_endpgm 67; 68; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs: 69; FIXEDABI-GISEL: ; %bb.0: 70; FIXEDABI-GISEL-NEXT: s_add_i32 s4, s4, s9 71; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 72; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 73; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s9 74; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 75; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 76; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0 77; FIXEDABI-GISEL-NEXT: s_mov_b32 s14, s8 78; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1 79; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0 80; FIXEDABI-GISEL-NEXT: s_mov_b32 s12, s6 81; FIXEDABI-GISEL-NEXT: s_mov_b32 s13, s7 82; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0 83; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s5 84; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[4:5] 85; FIXEDABI-GISEL-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 86; FIXEDABI-GISEL-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 87; FIXEDABI-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] 88; FIXEDABI-GISEL-NEXT: s_endpgm 89 call void @requires_all_inputs() 90 ret void 91} 92 93; Function is marked with amdgpu-no-workitem-id-* but uses them anyway 94define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 { 95; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id: 96; FIXEDABI-SDAG: ; %bb.0: 97; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; FIXEDABI-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v31 99; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 100; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 101; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 10, 10 102; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 103; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 104; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 20, 10 105; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 106; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 107; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] 108; 109; FIXEDABI-GISEL-LABEL: marked_func_use_workitem_id: 110; FIXEDABI-GISEL: ; %bb.0: 111; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; FIXEDABI-GISEL-NEXT: v_and_b32_e32 v2, 0x3ff, v31 113; FIXEDABI-GISEL-NEXT: v_bfe_u32 v3, v31, 10, 10 114; FIXEDABI-GISEL-NEXT: v_bfe_u32 v4, v31, 20, 10 115; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2 116; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 117; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v3 118; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 119; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v4 120; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 121; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] 122 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 123 %id.y = call i32 @llvm.amdgcn.workitem.id.y() 124 %id.z = call i32 @llvm.amdgcn.workitem.id.z() 125 store volatile i32 %id.x, i32 addrspace(1)* %ptr 126 store volatile i32 %id.y, i32 addrspace(1)* %ptr 127 store volatile i32 %id.z, i32 addrspace(1)* %ptr 128 ret void 129} 130 131; Function is marked with amdgpu-no-workitem-id-* but uses them anyway 132define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr) #0 { 133; FIXEDABI-LABEL: marked_kernel_use_workitem_id: 134; FIXEDABI: ; %bb.0: 135; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 136; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 137; FIXEDABI-NEXT: v_mov_b32_e32 v4, s1 138; FIXEDABI-NEXT: v_mov_b32_e32 v3, s0 139; FIXEDABI-NEXT: flat_store_dword v[3:4], v0 140; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 141; FIXEDABI-NEXT: flat_store_dword v[3:4], v1 142; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 143; FIXEDABI-NEXT: flat_store_dword v[3:4], v2 144; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 145; FIXEDABI-NEXT: s_endpgm 146 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 147 %id.y = call i32 @llvm.amdgcn.workitem.id.y() 148 %id.z = call i32 @llvm.amdgcn.workitem.id.z() 149 store volatile i32 %id.x, i32 addrspace(1)* %ptr 150 store volatile i32 %id.y, i32 addrspace(1)* %ptr 151 store volatile i32 %id.z, i32 addrspace(1)* %ptr 152 ret void 153} 154 155define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 { 156; FIXEDABI-LABEL: marked_func_use_workgroup_id: 157; FIXEDABI: ; %bb.0: 158; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; FIXEDABI-NEXT: v_mov_b32_e32 v2, s12 160; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 161; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 162; FIXEDABI-NEXT: v_mov_b32_e32 v2, s13 163; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 164; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 165; FIXEDABI-NEXT: v_mov_b32_e32 v2, s14 166; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 167; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 168; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 169 %id.x = call i32 @llvm.amdgcn.workgroup.id.x() 170 %id.y = call i32 @llvm.amdgcn.workgroup.id.y() 171 %id.z = call i32 @llvm.amdgcn.workgroup.id.z() 172 store volatile i32 %id.x, i32 addrspace(1)* %ptr 173 store volatile i32 %id.y, i32 addrspace(1)* %ptr 174 store volatile i32 %id.z, i32 addrspace(1)* %ptr 175 ret void 176} 177 178define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr) #0 { 179; FIXEDABI-LABEL: marked_kernel_use_workgroup_id: 180; FIXEDABI: ; %bb.0: 181; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 182; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6 183; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 184; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0 185; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1 186; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 187; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 188; FIXEDABI-NEXT: v_mov_b32_e32 v2, s7 189; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 190; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 191; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8 192; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 193; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 194; FIXEDABI-NEXT: s_endpgm 195 %id.x = call i32 @llvm.amdgcn.workgroup.id.x() 196 %id.y = call i32 @llvm.amdgcn.workgroup.id.y() 197 %id.z = call i32 @llvm.amdgcn.workgroup.id.z() 198 store volatile i32 %id.x, i32 addrspace(1)* %ptr 199 store volatile i32 %id.y, i32 addrspace(1)* %ptr 200 store volatile i32 %id.z, i32 addrspace(1)* %ptr 201 ret void 202} 203 204define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 { 205; FIXEDABI-LABEL: marked_func_use_other_sgpr: 206; FIXEDABI: ; %bb.0: 207; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6 209; FIXEDABI-NEXT: v_mov_b32_e32 v3, s7 210; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc 211; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 212; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8 213; FIXEDABI-NEXT: v_mov_b32_e32 v3, s9 214; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc 215; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 216; FIXEDABI-NEXT: v_mov_b32_e32 v2, s4 217; FIXEDABI-NEXT: v_mov_b32_e32 v3, s5 218; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc 219; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 220; FIXEDABI-NEXT: v_mov_b32_e32 v2, s10 221; FIXEDABI-NEXT: v_mov_b32_e32 v3, s11 222; FIXEDABI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 223; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 224; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 225 %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 226 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 227 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 228 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() 229 %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr 230 %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr 231 %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr 232 store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr 233 ret void 234} 235 236define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr) #0 { 237; FIXEDABI-LABEL: marked_kernel_use_other_sgpr: 238; FIXEDABI: ; %bb.0: 239; FIXEDABI-NEXT: s_add_u32 s0, s4, 8 240; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 241; FIXEDABI-NEXT: s_addc_u32 s1, s5, 0 242; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 243; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0 244; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1 245; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 246; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 247; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 248; FIXEDABI-NEXT: s_endpgm 249 %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 250 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 251 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 252 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() 253 %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr 254 %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr 255 %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr 256 store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr 257 ret void 258} 259 260define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 { 261; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr: 262; FIXEDABI: ; %bb.0: 263; FIXEDABI-NEXT: v_mov_b32_e32 v0, 0 264; FIXEDABI-NEXT: v_mov_b32_e32 v1, 0 265; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc 266; FIXEDABI-NEXT: s_endpgm 267 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 268 %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr 269 ret void 270} 271 272; On gfx8, the queue ptr is required for this addrspacecast. 273define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) #0 { 274; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr: 275; FIXEDABI-SDAG: ; %bb.0: 276; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 277; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[6:7], 0x44 278; FIXEDABI-SDAG-NEXT: s_load_dword s5, s[6:7], 0x40 279; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 280; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 281; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v2, s4 282; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 283; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc 284; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, s5 285; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 286; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v0, vcc 287; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 1 288; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 289; FIXEDABI-SDAG-NEXT: flat_store_dword v[2:3], v0 290; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) 291; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 2 292; FIXEDABI-SDAG-NEXT: flat_store_dword v[4:5], v0 293; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 294; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] 295; 296; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr: 297; FIXEDABI-GISEL: ; %bb.0: 298; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[6:7], 0x44 300; FIXEDABI-GISEL-NEXT: s_load_dword s5, s[6:7], 0x40 301; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 302; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc 303; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 304; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v3, s4 305; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 306; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, s5 307; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 308; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 309; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc 310; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, 1 311; FIXEDABI-GISEL-NEXT: flat_store_dword v[2:3], v4 312; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) 313; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v2, 2 314; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2 315; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 316; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] 317 %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32* 318 %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32* 319 store volatile i32 1, i32* %flat.private 320 store volatile i32 2, i32* %flat.local 321 ret void 322} 323 324define void @is_shared_requires_queue_ptr(i8* %ptr) #0 { 325; FIXEDABI-LABEL: is_shared_requires_queue_ptr: 326; FIXEDABI: ; %bb.0: 327; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x40 329; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 330; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 331; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 332; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 333; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 334; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 335 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) 336 %zext = zext i1 %is.shared to i32 337 store volatile i32 %zext, i32 addrspace(1)* undef 338 ret void 339} 340 341define void @is_private_requires_queue_ptr(i8* %ptr) #0 { 342; FIXEDABI-LABEL: is_private_requires_queue_ptr: 343; FIXEDABI: ; %bb.0: 344; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 345; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x44 346; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) 347; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 348; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 349; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 350; FIXEDABI-NEXT: s_waitcnt vmcnt(0) 351; FIXEDABI-NEXT: s_setpc_b64 s[30:31] 352 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) 353 %zext = zext i1 %is.private to i32 354 store volatile i32 %zext, i32 addrspace(1)* undef 355 ret void 356} 357 358define void @trap_requires_queue() #0 { 359; FIXEDABI-LABEL: trap_requires_queue: 360; FIXEDABI: ; %bb.0: 361; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 362; FIXEDABI-NEXT: s_mov_b64 s[0:1], s[6:7] 363; FIXEDABI-NEXT: s_trap 2 364 call void @llvm.trap() 365 unreachable 366} 367 368define void @debugtrap_requires_queue() #0 { 369; FIXEDABI-LABEL: debugtrap_requires_queue: 370; FIXEDABI: ; %bb.0: 371; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; FIXEDABI-NEXT: s_trap 3 373 call void @llvm.debugtrap() 374 unreachable 375} 376 377declare i32 @llvm.amdgcn.workitem.id.x() 378declare i32 @llvm.amdgcn.workitem.id.y() 379declare i32 @llvm.amdgcn.workitem.id.z() 380declare i32 @llvm.amdgcn.workgroup.id.x() 381declare i32 @llvm.amdgcn.workgroup.id.y() 382declare i32 @llvm.amdgcn.workgroup.id.z() 383declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 384declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 385declare i64 @llvm.amdgcn.dispatch.id() 386declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 387declare i1 @llvm.amdgcn.is.shared(i8*) 388declare i1 @llvm.amdgcn.is.private(i8*) 389declare void @llvm.trap() 390declare void @llvm.debugtrap() 391 392attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } 393