1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s 5 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s 9 10define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) { 11; GFX8V3-LABEL: addrspacecast: 12; GFX8V3: ; %bb.0: 13; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 14; GFX8V3-NEXT: s_load_dword s2, s[4:5], 0x44 15; GFX8V3-NEXT: s_load_dword s3, s[4:5], 0x40 16; GFX8V3-NEXT: v_mov_b32_e32 v4, 1 17; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) 18; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1 19; GFX8V3-NEXT: v_mov_b32_e32 v0, s2 20; GFX8V3-NEXT: s_cselect_b64 vcc, -1, 0 21; GFX8V3-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 22; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 23; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 24; GFX8V3-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 25; GFX8V3-NEXT: v_mov_b32_e32 v2, s3 26; GFX8V3-NEXT: s_cselect_b64 vcc, -1, 0 27; GFX8V3-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 28; GFX8V3-NEXT: v_mov_b32_e32 v2, s1 29; GFX8V3-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 30; GFX8V3-NEXT: flat_store_dword v[0:1], v4 31; GFX8V3-NEXT: s_waitcnt vmcnt(0) 32; GFX8V3-NEXT: v_mov_b32_e32 v0, 2 33; GFX8V3-NEXT: flat_store_dword v[2:3], v0 34; GFX8V3-NEXT: s_waitcnt vmcnt(0) 35; GFX8V3-NEXT: s_endpgm 36; 37; GFX8V4-LABEL: addrspacecast: 38; GFX8V4: ; %bb.0: 39; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 40; GFX8V4-NEXT: s_load_dword s2, s[4:5], 0x44 41; GFX8V4-NEXT: s_load_dword s3, s[4:5], 0x40 42; GFX8V4-NEXT: v_mov_b32_e32 v4, 1 43; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) 44; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1 45; GFX8V4-NEXT: v_mov_b32_e32 v0, s2 46; GFX8V4-NEXT: s_cselect_b64 vcc, -1, 0 47; GFX8V4-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 48; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 49; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1 50; GFX8V4-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 51; GFX8V4-NEXT: v_mov_b32_e32 v2, s3 52; GFX8V4-NEXT: s_cselect_b64 vcc, -1, 0 53; GFX8V4-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 54; GFX8V4-NEXT: v_mov_b32_e32 v2, s1 55; GFX8V4-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 56; GFX8V4-NEXT: flat_store_dword v[0:1], v4 57; GFX8V4-NEXT: s_waitcnt vmcnt(0) 58; GFX8V4-NEXT: v_mov_b32_e32 v0, 2 59; GFX8V4-NEXT: flat_store_dword v[2:3], v0 60; GFX8V4-NEXT: s_waitcnt vmcnt(0) 61; GFX8V4-NEXT: s_endpgm 62; 63; GFX8V5-LABEL: addrspacecast: 64; GFX8V5: ; %bb.0: 65; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 66; GFX8V5-NEXT: s_load_dword s2, s[4:5], 0xc8 67; GFX8V5-NEXT: s_load_dword s3, s[4:5], 0xcc 68; GFX8V5-NEXT: v_mov_b32_e32 v4, 1 69; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) 70; GFX8V5-NEXT: s_cmp_lg_u32 s0, -1 71; GFX8V5-NEXT: v_mov_b32_e32 v0, s2 72; GFX8V5-NEXT: s_cselect_b64 vcc, -1, 0 73; GFX8V5-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 74; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 75; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1 76; GFX8V5-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 77; GFX8V5-NEXT: v_mov_b32_e32 v2, s3 78; GFX8V5-NEXT: s_cselect_b64 vcc, -1, 0 79; GFX8V5-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 80; GFX8V5-NEXT: v_mov_b32_e32 v2, s1 81; GFX8V5-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 82; GFX8V5-NEXT: flat_store_dword v[0:1], v4 83; GFX8V5-NEXT: s_waitcnt vmcnt(0) 84; GFX8V5-NEXT: v_mov_b32_e32 v0, 2 85; GFX8V5-NEXT: flat_store_dword v[2:3], v0 86; GFX8V5-NEXT: s_waitcnt vmcnt(0) 87; GFX8V5-NEXT: s_endpgm 88; 89; GFX9V3-LABEL: addrspacecast: 90; GFX9V3: ; %bb.0: 91; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 92; GFX9V3-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) 93; GFX9V3-NEXT: s_lshl_b32 s2, s2, 16 94; GFX9V3-NEXT: v_mov_b32_e32 v0, s2 95; GFX9V3-NEXT: v_mov_b32_e32 v4, 1 96; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) 97; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1 98; GFX9V3-NEXT: s_cselect_b64 vcc, -1, 0 99; GFX9V3-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 100; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 101; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) 102; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16 103; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1 104; GFX9V3-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 105; GFX9V3-NEXT: v_mov_b32_e32 v2, s0 106; GFX9V3-NEXT: s_cselect_b64 vcc, -1, 0 107; GFX9V3-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 108; GFX9V3-NEXT: v_mov_b32_e32 v2, s1 109; GFX9V3-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 110; GFX9V3-NEXT: flat_store_dword v[0:1], v4 111; GFX9V3-NEXT: s_waitcnt vmcnt(0) 112; GFX9V3-NEXT: v_mov_b32_e32 v0, 2 113; GFX9V3-NEXT: flat_store_dword v[2:3], v0 114; GFX9V3-NEXT: s_waitcnt vmcnt(0) 115; GFX9V3-NEXT: s_endpgm 116; 117; GFX9V4-LABEL: addrspacecast: 118; GFX9V4: ; %bb.0: 119; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 120; GFX9V4-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) 121; GFX9V4-NEXT: s_lshl_b32 s2, s2, 16 122; GFX9V4-NEXT: v_mov_b32_e32 v0, s2 123; GFX9V4-NEXT: v_mov_b32_e32 v4, 1 124; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) 125; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1 126; GFX9V4-NEXT: s_cselect_b64 vcc, -1, 0 127; GFX9V4-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 128; GFX9V4-NEXT: v_mov_b32_e32 v0, s0 129; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) 130; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16 131; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1 132; GFX9V4-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 133; GFX9V4-NEXT: v_mov_b32_e32 v2, s0 134; GFX9V4-NEXT: s_cselect_b64 vcc, -1, 0 135; GFX9V4-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 136; GFX9V4-NEXT: v_mov_b32_e32 v2, s1 137; GFX9V4-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 138; GFX9V4-NEXT: flat_store_dword v[0:1], v4 139; GFX9V4-NEXT: s_waitcnt vmcnt(0) 140; GFX9V4-NEXT: v_mov_b32_e32 v0, 2 141; GFX9V4-NEXT: flat_store_dword v[2:3], v0 142; GFX9V4-NEXT: s_waitcnt vmcnt(0) 143; GFX9V4-NEXT: s_endpgm 144; 145; GFX9V5-LABEL: addrspacecast: 146; GFX9V5: ; %bb.0: 147; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 148; GFX9V5-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) 149; GFX9V5-NEXT: s_lshl_b32 s2, s2, 16 150; GFX9V5-NEXT: v_mov_b32_e32 v0, s2 151; GFX9V5-NEXT: v_mov_b32_e32 v4, 1 152; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) 153; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1 154; GFX9V5-NEXT: s_cselect_b64 vcc, -1, 0 155; GFX9V5-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 156; GFX9V5-NEXT: v_mov_b32_e32 v0, s0 157; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) 158; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16 159; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1 160; GFX9V5-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 161; GFX9V5-NEXT: v_mov_b32_e32 v2, s0 162; GFX9V5-NEXT: s_cselect_b64 vcc, -1, 0 163; GFX9V5-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 164; GFX9V5-NEXT: v_mov_b32_e32 v2, s1 165; GFX9V5-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 166; GFX9V5-NEXT: flat_store_dword v[0:1], v4 167; GFX9V5-NEXT: s_waitcnt vmcnt(0) 168; GFX9V5-NEXT: v_mov_b32_e32 v0, 2 169; GFX9V5-NEXT: flat_store_dword v[2:3], v0 170; GFX9V5-NEXT: s_waitcnt vmcnt(0) 171; GFX9V5-NEXT: s_endpgm 172 %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32* 173 %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32* 174 store volatile i32 1, i32* %flat.private 175 store volatile i32 2, i32* %flat.local 176 ret void 177} 178 179define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) { 180; GFX8V3-LABEL: llvm_amdgcn_is_shared: 181; GFX8V3: ; %bb.0: 182; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40 183; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4 184; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) 185; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 186; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0 187; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 188; GFX8V3-NEXT: flat_store_dword v[0:1], v0 189; GFX8V3-NEXT: s_waitcnt vmcnt(0) 190; GFX8V3-NEXT: s_endpgm 191; 192; GFX8V4-LABEL: llvm_amdgcn_is_shared: 193; GFX8V4: ; %bb.0: 194; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40 195; GFX8V4-NEXT: s_load_dword s1, s[6:7], 0x4 196; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) 197; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 198; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0 199; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 200; GFX8V4-NEXT: flat_store_dword v[0:1], v0 201; GFX8V4-NEXT: s_waitcnt vmcnt(0) 202; GFX8V4-NEXT: s_endpgm 203; 204; GFX8V5-LABEL: llvm_amdgcn_is_shared: 205; GFX8V5: ; %bb.0: 206; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xcc 207; GFX8V5-NEXT: s_load_dword s1, s[4:5], 0x4 208; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) 209; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0 210; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0 211; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 212; GFX8V5-NEXT: flat_store_dword v[0:1], v0 213; GFX8V5-NEXT: s_waitcnt vmcnt(0) 214; GFX8V5-NEXT: s_endpgm 215; 216; GFX9V3-LABEL: llvm_amdgcn_is_shared: 217; GFX9V3: ; %bb.0: 218; GFX9V3-NEXT: s_load_dword s0, s[4:5], 0x4 219; GFX9V3-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16) 220; GFX9V3-NEXT: s_lshl_b32 s1, s1, 16 221; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) 222; GFX9V3-NEXT: s_cmp_eq_u32 s0, s1 223; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0 224; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 225; GFX9V3-NEXT: global_store_dword v[0:1], v0, off 226; GFX9V3-NEXT: s_waitcnt vmcnt(0) 227; GFX9V3-NEXT: s_endpgm 228; 229; GFX9V4-LABEL: llvm_amdgcn_is_shared: 230; GFX9V4: ; %bb.0: 231; GFX9V4-NEXT: s_load_dword s0, s[4:5], 0x4 232; GFX9V4-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16) 233; GFX9V4-NEXT: s_lshl_b32 s1, s1, 16 234; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) 235; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1 236; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0 237; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 238; GFX9V4-NEXT: global_store_dword v[0:1], v0, off 239; GFX9V4-NEXT: s_waitcnt vmcnt(0) 240; GFX9V4-NEXT: s_endpgm 241; 242; GFX9V5-LABEL: llvm_amdgcn_is_shared: 243; GFX9V5: ; %bb.0: 244; GFX9V5-NEXT: s_load_dword s0, s[4:5], 0x4 245; GFX9V5-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 16, 16) 246; GFX9V5-NEXT: s_lshl_b32 s1, s1, 16 247; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) 248; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1 249; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0 250; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 251; GFX9V5-NEXT: global_store_dword v[0:1], v0, off 252; GFX9V5-NEXT: s_waitcnt vmcnt(0) 253; GFX9V5-NEXT: s_endpgm 254 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) 255 %zext = zext i1 %is.shared to i32 256 store volatile i32 %zext, i32 addrspace(1)* undef 257 ret void 258} 259 260define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) { 261; GFX8V3-LABEL: llvm_amdgcn_is_private: 262; GFX8V3: ; %bb.0: 263; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44 264; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4 265; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) 266; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 267; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0 268; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 269; GFX8V3-NEXT: flat_store_dword v[0:1], v0 270; GFX8V3-NEXT: s_waitcnt vmcnt(0) 271; GFX8V3-NEXT: s_endpgm 272; 273; GFX8V4-LABEL: llvm_amdgcn_is_private: 274; GFX8V4: ; %bb.0: 275; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44 276; GFX8V4-NEXT: s_load_dword s1, s[6:7], 0x4 277; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) 278; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 279; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0 280; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 281; GFX8V4-NEXT: flat_store_dword v[0:1], v0 282; GFX8V4-NEXT: s_waitcnt vmcnt(0) 283; GFX8V4-NEXT: s_endpgm 284; 285; GFX8V5-LABEL: llvm_amdgcn_is_private: 286; GFX8V5: ; %bb.0: 287; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xc8 288; GFX8V5-NEXT: s_load_dword s1, s[4:5], 0x4 289; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) 290; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0 291; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0 292; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 293; GFX8V5-NEXT: flat_store_dword v[0:1], v0 294; GFX8V5-NEXT: s_waitcnt vmcnt(0) 295; GFX8V5-NEXT: s_endpgm 296; 297; GFX9V3-LABEL: llvm_amdgcn_is_private: 298; GFX9V3: ; %bb.0: 299; GFX9V3-NEXT: s_load_dword s0, s[4:5], 0x4 300; GFX9V3-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16) 301; GFX9V3-NEXT: s_lshl_b32 s1, s1, 16 302; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) 303; GFX9V3-NEXT: s_cmp_eq_u32 s0, s1 304; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0 305; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 306; GFX9V3-NEXT: global_store_dword v[0:1], v0, off 307; GFX9V3-NEXT: s_waitcnt vmcnt(0) 308; GFX9V3-NEXT: s_endpgm 309; 310; GFX9V4-LABEL: llvm_amdgcn_is_private: 311; GFX9V4: ; %bb.0: 312; GFX9V4-NEXT: s_load_dword s0, s[4:5], 0x4 313; GFX9V4-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16) 314; GFX9V4-NEXT: s_lshl_b32 s1, s1, 16 315; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) 316; GFX9V4-NEXT: s_cmp_eq_u32 s0, s1 317; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0 318; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 319; GFX9V4-NEXT: global_store_dword v[0:1], v0, off 320; GFX9V4-NEXT: s_waitcnt vmcnt(0) 321; GFX9V4-NEXT: s_endpgm 322; 323; GFX9V5-LABEL: llvm_amdgcn_is_private: 324; GFX9V5: ; %bb.0: 325; GFX9V5-NEXT: s_load_dword s0, s[4:5], 0x4 326; GFX9V5-NEXT: s_getreg_b32 s1, hwreg(HW_REG_SH_MEM_BASES, 0, 16) 327; GFX9V5-NEXT: s_lshl_b32 s1, s1, 16 328; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) 329; GFX9V5-NEXT: s_cmp_eq_u32 s0, s1 330; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0 331; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 332; GFX9V5-NEXT: global_store_dword v[0:1], v0, off 333; GFX9V5-NEXT: s_waitcnt vmcnt(0) 334; GFX9V5-NEXT: s_endpgm 335 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) 336 %zext = zext i1 %is.private to i32 337 store volatile i32 %zext, i32 addrspace(1)* undef 338 ret void 339} 340 341define amdgpu_kernel void @llvm_trap() { 342; GFX8V3-LABEL: llvm_trap: 343; GFX8V3: ; %bb.0: 344; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5] 345; GFX8V3-NEXT: s_trap 2 346; 347; GFX8V4-LABEL: llvm_trap: 348; GFX8V4: ; %bb.0: 349; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5] 350; GFX8V4-NEXT: s_trap 2 351; 352; GFX8V5-LABEL: llvm_trap: 353; GFX8V5: ; %bb.0: 354; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xc8 355; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) 356; GFX8V5-NEXT: s_trap 2 357; 358; GFX9V3-LABEL: llvm_trap: 359; GFX9V3: ; %bb.0: 360; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5] 361; GFX9V3-NEXT: s_trap 2 362; 363; GFX9V4-LABEL: llvm_trap: 364; GFX9V4: ; %bb.0: 365; GFX9V4-NEXT: s_trap 2 366; 367; GFX9V5-LABEL: llvm_trap: 368; GFX9V5: ; %bb.0: 369; GFX9V5-NEXT: s_trap 2 370 call void @llvm.trap() 371 unreachable 372} 373 374define amdgpu_kernel void @llvm_debugtrap() { 375; GFX8V3-LABEL: llvm_debugtrap: 376; GFX8V3: ; %bb.0: 377; GFX8V3-NEXT: s_trap 3 378; 379; GFX8V4-LABEL: llvm_debugtrap: 380; GFX8V4: ; %bb.0: 381; GFX8V4-NEXT: s_trap 3 382; 383; GFX8V5-LABEL: llvm_debugtrap: 384; GFX8V5: ; %bb.0: 385; GFX8V5-NEXT: s_trap 3 386; 387; GFX9V3-LABEL: llvm_debugtrap: 388; GFX9V3: ; %bb.0: 389; GFX9V3-NEXT: s_trap 3 390; 391; GFX9V4-LABEL: llvm_debugtrap: 392; GFX9V4: ; %bb.0: 393; GFX9V4-NEXT: s_trap 3 394; 395; GFX9V5-LABEL: llvm_debugtrap: 396; GFX9V5: ; %bb.0: 397; GFX9V5-NEXT: s_trap 3 398 call void @llvm.debugtrap() 399 unreachable 400} 401 402define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { 403; GFX8V3-LABEL: llvm_amdgcn_queue_ptr: 404; GFX8V3: ; %bb.0: 405; GFX8V3-NEXT: v_mov_b32_e32 v0, s6 406; GFX8V3-NEXT: v_mov_b32_e32 v1, s7 407; GFX8V3-NEXT: s_add_u32 s0, s8, 8 408; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc 409; GFX8V3-NEXT: s_addc_u32 s1, s9, 0 410; GFX8V3-NEXT: s_waitcnt vmcnt(0) 411; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 412; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 413; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc 414; GFX8V3-NEXT: s_waitcnt vmcnt(0) 415; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 416; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 417; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc 418; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 419; GFX8V3-NEXT: v_mov_b32_e32 v2, s10 420; GFX8V3-NEXT: v_mov_b32_e32 v3, s11 421; GFX8V3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 422; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 423; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 424; GFX8V3-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 425; GFX8V3-NEXT: s_waitcnt vmcnt(0) 426; GFX8V3-NEXT: s_endpgm 427; 428; GFX8V4-LABEL: llvm_amdgcn_queue_ptr: 429; GFX8V4: ; %bb.0: 430; GFX8V4-NEXT: v_mov_b32_e32 v0, s6 431; GFX8V4-NEXT: v_mov_b32_e32 v1, s7 432; GFX8V4-NEXT: s_add_u32 s0, s8, 8 433; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc 434; GFX8V4-NEXT: s_addc_u32 s1, s9, 0 435; GFX8V4-NEXT: s_waitcnt vmcnt(0) 436; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 437; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 438; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc 439; GFX8V4-NEXT: s_waitcnt vmcnt(0) 440; GFX8V4-NEXT: v_mov_b32_e32 v0, s4 441; GFX8V4-NEXT: v_mov_b32_e32 v1, s5 442; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc 443; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 444; GFX8V4-NEXT: v_mov_b32_e32 v2, s10 445; GFX8V4-NEXT: v_mov_b32_e32 v3, s11 446; GFX8V4-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 447; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 448; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 449; GFX8V4-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 450; GFX8V4-NEXT: s_waitcnt vmcnt(0) 451; GFX8V4-NEXT: s_endpgm 452; 453; GFX8V5-LABEL: llvm_amdgcn_queue_ptr: 454; GFX8V5: ; %bb.0: 455; GFX8V5-NEXT: s_add_u32 s0, s6, 8 456; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc 457; GFX8V5-NEXT: s_addc_u32 s1, s7, 0 458; GFX8V5-NEXT: s_waitcnt vmcnt(0) 459; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 460; GFX8V5-NEXT: v_mov_b32_e32 v1, s1 461; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc 462; GFX8V5-NEXT: s_waitcnt vmcnt(0) 463; GFX8V5-NEXT: v_mov_b32_e32 v0, s4 464; GFX8V5-NEXT: v_mov_b32_e32 v1, s5 465; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc 466; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 467; GFX8V5-NEXT: v_mov_b32_e32 v2, s8 468; GFX8V5-NEXT: v_mov_b32_e32 v3, s9 469; GFX8V5-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 470; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 471; GFX8V5-NEXT: v_mov_b32_e32 v1, s1 472; GFX8V5-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 473; GFX8V5-NEXT: s_waitcnt vmcnt(0) 474; GFX8V5-NEXT: s_endpgm 475; 476; GFX9V3-LABEL: llvm_amdgcn_queue_ptr: 477; GFX9V3: ; %bb.0: 478; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 479; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc 480; GFX9V3-NEXT: s_waitcnt vmcnt(0) 481; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc 482; GFX9V3-NEXT: s_waitcnt vmcnt(0) 483; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc 484; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 485; GFX9V3-NEXT: s_waitcnt vmcnt(0) 486; GFX9V3-NEXT: v_mov_b32_e32 v0, s10 487; GFX9V3-NEXT: v_mov_b32_e32 v1, s11 488; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7 489; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5 490; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) 491; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 492; GFX9V3-NEXT: s_waitcnt vmcnt(0) 493; GFX9V3-NEXT: s_endpgm 494; 495; GFX9V4-LABEL: llvm_amdgcn_queue_ptr: 496; GFX9V4: ; %bb.0: 497; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 498; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc 499; GFX9V4-NEXT: s_waitcnt vmcnt(0) 500; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc 501; GFX9V4-NEXT: s_waitcnt vmcnt(0) 502; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc 503; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 504; GFX9V4-NEXT: s_waitcnt vmcnt(0) 505; GFX9V4-NEXT: v_mov_b32_e32 v0, s10 506; GFX9V4-NEXT: v_mov_b32_e32 v1, s11 507; GFX9V4-NEXT: ; kill: killed $sgpr6_sgpr7 508; GFX9V4-NEXT: ; kill: killed $sgpr4_sgpr5 509; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) 510; GFX9V4-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 511; GFX9V4-NEXT: s_waitcnt vmcnt(0) 512; GFX9V4-NEXT: s_endpgm 513; 514; GFX9V5-LABEL: llvm_amdgcn_queue_ptr: 515; GFX9V5: ; %bb.0: 516; GFX9V5-NEXT: v_mov_b32_e32 v2, 0 517; GFX9V5-NEXT: global_load_ubyte v0, v2, s[0:1] glc 518; GFX9V5-NEXT: s_waitcnt vmcnt(0) 519; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] offset:8 glc 520; GFX9V5-NEXT: s_waitcnt vmcnt(0) 521; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc 522; GFX9V5-NEXT: ; kill: killed $sgpr0_sgpr1 523; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 524; GFX9V5-NEXT: s_waitcnt vmcnt(0) 525; GFX9V5-NEXT: v_mov_b32_e32 v0, s8 526; GFX9V5-NEXT: v_mov_b32_e32 v1, s9 527; GFX9V5-NEXT: ; kill: killed $sgpr4_sgpr5 528; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) 529; GFX9V5-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 530; GFX9V5-NEXT: s_waitcnt vmcnt(0) 531; GFX9V5-NEXT: s_endpgm 532 %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 533 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 534 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 535 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() 536 %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr 537 %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr 538 %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr 539 store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr 540 ret void 541} 542 543declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 544declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 545declare i64 @llvm.amdgcn.dispatch.id() 546declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 547declare i1 @llvm.amdgcn.is.shared(i8*) 548declare i1 @llvm.amdgcn.is.private(i8*) 549declare void @llvm.trap() 550declare void @llvm.debugtrap() 551