1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; Test the localizer did something and we don't materialize all 5; constants in SGPRs in the entry block. 6 7define amdgpu_kernel void @localize_constants(i1 %cond) { 8; GFX9-LABEL: localize_constants: 9; GFX9: ; %bb.0: ; %entry 10; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 11; GFX9-NEXT: s_mov_b32 s0, -1 12; GFX9-NEXT: s_waitcnt lgkmcnt(0) 13; GFX9-NEXT: s_xor_b32 s1, s1, -1 14; GFX9-NEXT: s_and_b32 s1, s1, 1 15; GFX9-NEXT: s_cmp_lg_u32 s1, 0 16; GFX9-NEXT: s_cbranch_scc0 .LBB0_2 17; GFX9-NEXT: ; %bb.1: ; %bb1 18; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 19; GFX9-NEXT: global_store_dword v[0:1], v0, off 20; GFX9-NEXT: s_waitcnt vmcnt(0) 21; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 22; GFX9-NEXT: global_store_dword v[0:1], v0, off 23; GFX9-NEXT: s_waitcnt vmcnt(0) 24; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 25; GFX9-NEXT: global_store_dword v[0:1], v0, off 26; GFX9-NEXT: s_waitcnt vmcnt(0) 27; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 28; GFX9-NEXT: global_store_dword v[0:1], v0, off 29; GFX9-NEXT: s_waitcnt vmcnt(0) 30; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 31; GFX9-NEXT: global_store_dword v[0:1], v0, off 32; GFX9-NEXT: s_waitcnt vmcnt(0) 33; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 34; GFX9-NEXT: s_mov_b32 s0, 0 35; GFX9-NEXT: global_store_dword v[0:1], v0, off 36; GFX9-NEXT: s_waitcnt vmcnt(0) 37; GFX9-NEXT: .LBB0_2: ; %Flow 38; GFX9-NEXT: s_xor_b32 s0, s0, -1 39; GFX9-NEXT: s_and_b32 s0, s0, 1 40; GFX9-NEXT: s_cmp_lg_u32 s0, 0 41; GFX9-NEXT: s_cbranch_scc1 .LBB0_4 42; GFX9-NEXT: ; %bb.3: ; %bb0 43; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 44; GFX9-NEXT: global_store_dword v[0:1], v0, off 45; GFX9-NEXT: s_waitcnt vmcnt(0) 46; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 47; GFX9-NEXT: global_store_dword v[0:1], v0, off 48; GFX9-NEXT: s_waitcnt vmcnt(0) 49; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 50; GFX9-NEXT: global_store_dword v[0:1], v0, off 51; GFX9-NEXT: s_waitcnt vmcnt(0) 52; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 53; GFX9-NEXT: global_store_dword v[0:1], v0, off 54; GFX9-NEXT: s_waitcnt vmcnt(0) 55; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 56; GFX9-NEXT: global_store_dword v[0:1], v0, off 57; GFX9-NEXT: s_waitcnt vmcnt(0) 58; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 59; GFX9-NEXT: global_store_dword v[0:1], v0, off 60; GFX9-NEXT: s_waitcnt vmcnt(0) 61; GFX9-NEXT: .LBB0_4: ; %bb2 62; GFX9-NEXT: s_endpgm 63entry: 64 br i1 %cond, label %bb0, label %bb1 65 66bb0: 67 store volatile i32 123, i32 addrspace(1)* undef 68 store volatile i32 456, i32 addrspace(1)* undef 69 store volatile i32 999, i32 addrspace(1)* undef 70 store volatile i32 1000, i32 addrspace(1)* undef 71 store volatile i32 455, i32 addrspace(1)* undef 72 store volatile i32 23526, i32 addrspace(1)* undef 73 br label %bb2 74 75bb1: 76 store volatile i32 23526, i32 addrspace(1)* undef 77 store volatile i32 455, i32 addrspace(1)* undef 78 store volatile i32 1000, i32 addrspace(1)* undef 79 store volatile i32 456, i32 addrspace(1)* undef 80 store volatile i32 999, i32 addrspace(1)* undef 81 store volatile i32 123, i32 addrspace(1)* undef 82 br label %bb2 83 84bb2: 85 ret void 86} 87 88; FIXME: These aren't localized because thesee were legalized before 89; the localizer, and are no longer G_GLOBAL_VALUE. 90@gv0 = addrspace(1) global i32 undef, align 4 91@gv1 = addrspace(1) global i32 undef, align 4 92@gv2 = addrspace(1) global i32 undef, align 4 93@gv3 = addrspace(1) global i32 undef, align 4 94 95define amdgpu_kernel void @localize_globals(i1 %cond) { 96; GFX9-LABEL: localize_globals: 97; GFX9: ; %bb.0: ; %entry 98; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 99; GFX9-NEXT: s_mov_b32 s0, -1 100; GFX9-NEXT: s_waitcnt lgkmcnt(0) 101; GFX9-NEXT: s_xor_b32 s1, s1, -1 102; GFX9-NEXT: s_and_b32 s1, s1, 1 103; GFX9-NEXT: s_cmp_lg_u32 s1, 0 104; GFX9-NEXT: s_cbranch_scc0 .LBB1_2 105; GFX9-NEXT: ; %bb.1: ; %bb1 106; GFX9-NEXT: s_getpc_b64 s[0:1] 107; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4 108; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+12 109; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 110; GFX9-NEXT: s_getpc_b64 s[0:1] 111; GFX9-NEXT: s_add_u32 s0, s0, gv3@gotpcrel32@lo+4 112; GFX9-NEXT: s_addc_u32 s1, s1, gv3@gotpcrel32@hi+12 113; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 114; GFX9-NEXT: v_mov_b32_e32 v0, 0 115; GFX9-NEXT: v_mov_b32_e32 v1, 1 116; GFX9-NEXT: s_mov_b32 s0, 0 117; GFX9-NEXT: s_waitcnt lgkmcnt(0) 118; GFX9-NEXT: global_store_dword v0, v0, s[2:3] 119; GFX9-NEXT: s_waitcnt vmcnt(0) 120; GFX9-NEXT: global_store_dword v0, v1, s[4:5] 121; GFX9-NEXT: s_waitcnt vmcnt(0) 122; GFX9-NEXT: .LBB1_2: ; %Flow 123; GFX9-NEXT: s_xor_b32 s0, s0, -1 124; GFX9-NEXT: s_and_b32 s0, s0, 1 125; GFX9-NEXT: s_cmp_lg_u32 s0, 0 126; GFX9-NEXT: s_cbranch_scc1 .LBB1_4 127; GFX9-NEXT: ; %bb.3: ; %bb0 128; GFX9-NEXT: s_getpc_b64 s[0:1] 129; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 130; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+12 131; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 132; GFX9-NEXT: s_getpc_b64 s[2:3] 133; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4 134; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+12 135; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 136; GFX9-NEXT: v_mov_b32_e32 v0, 0 137; GFX9-NEXT: v_mov_b32_e32 v1, 1 138; GFX9-NEXT: s_waitcnt lgkmcnt(0) 139; GFX9-NEXT: global_store_dword v0, v0, s[0:1] 140; GFX9-NEXT: s_waitcnt vmcnt(0) 141; GFX9-NEXT: global_store_dword v0, v1, s[2:3] 142; GFX9-NEXT: s_waitcnt vmcnt(0) 143; GFX9-NEXT: .LBB1_4: ; %bb2 144; GFX9-NEXT: s_endpgm 145entry: 146 br i1 %cond, label %bb0, label %bb1 147 148bb0: 149 store volatile i32 0, i32 addrspace(1)* @gv0 150 store volatile i32 1, i32 addrspace(1)* @gv1 151 br label %bb2 152 153bb1: 154 store volatile i32 0, i32 addrspace(1)* @gv2 155 store volatile i32 1, i32 addrspace(1)* @gv3 156 br label %bb2 157 158bb2: 159 ret void 160} 161 162@static.gv0 = internal addrspace(1) global i32 undef, align 4 163@static.gv1 = internal addrspace(1) global i32 undef, align 4 164@static.gv2 = internal addrspace(1) global i32 undef, align 4 165@static.gv3 = internal addrspace(1) global i32 undef, align 4 166 167define void @localize_internal_globals(i1 %cond) { 168; GFX9-LABEL: localize_internal_globals: 169; GFX9: ; %bb.0: ; %entry 170; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 172; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 173; GFX9-NEXT: s_xor_b64 s[4:5], vcc, -1 174; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 175; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 176; GFX9-NEXT: s_cbranch_execnz .LBB2_3 177; GFX9-NEXT: ; %bb.1: ; %Flow 178; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 179; GFX9-NEXT: s_cbranch_execnz .LBB2_4 180; GFX9-NEXT: .LBB2_2: ; %bb2 181; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 182; GFX9-NEXT: s_setpc_b64 s[30:31] 183; GFX9-NEXT: .LBB2_3: ; %bb1 184; GFX9-NEXT: s_getpc_b64 s[6:7] 185; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4 186; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+12 187; GFX9-NEXT: v_mov_b32_e32 v0, 0 188; GFX9-NEXT: global_store_dword v0, v0, s[6:7] 189; GFX9-NEXT: s_waitcnt vmcnt(0) 190; GFX9-NEXT: s_getpc_b64 s[6:7] 191; GFX9-NEXT: s_add_u32 s6, s6, static.gv3@rel32@lo+4 192; GFX9-NEXT: s_addc_u32 s7, s7, static.gv3@rel32@hi+12 193; GFX9-NEXT: v_mov_b32_e32 v1, 1 194; GFX9-NEXT: global_store_dword v0, v1, s[6:7] 195; GFX9-NEXT: s_waitcnt vmcnt(0) 196; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 197; GFX9-NEXT: s_cbranch_execz .LBB2_2 198; GFX9-NEXT: .LBB2_4: ; %bb0 199; GFX9-NEXT: s_getpc_b64 s[6:7] 200; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4 201; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+12 202; GFX9-NEXT: v_mov_b32_e32 v0, 0 203; GFX9-NEXT: global_store_dword v0, v0, s[6:7] 204; GFX9-NEXT: s_waitcnt vmcnt(0) 205; GFX9-NEXT: s_getpc_b64 s[6:7] 206; GFX9-NEXT: s_add_u32 s6, s6, static.gv1@rel32@lo+4 207; GFX9-NEXT: s_addc_u32 s7, s7, static.gv1@rel32@hi+12 208; GFX9-NEXT: v_mov_b32_e32 v1, 1 209; GFX9-NEXT: global_store_dword v0, v1, s[6:7] 210; GFX9-NEXT: s_waitcnt vmcnt(0) 211; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 212; GFX9-NEXT: s_setpc_b64 s[30:31] 213entry: 214 br i1 %cond, label %bb0, label %bb1 215 216bb0: 217 store volatile i32 0, i32 addrspace(1)* @static.gv0 218 store volatile i32 1, i32 addrspace(1)* @static.gv1 219 br label %bb2 220 221bb1: 222 store volatile i32 0, i32 addrspace(1)* @static.gv2 223 store volatile i32 1, i32 addrspace(1)* @static.gv3 224 br label %bb2 225 226bb2: 227 ret void 228} 229 230; This would crash from using the wrong insert point 231define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) { 232; GFX9-LABEL: sink_null_insert_pt: 233; GFX9: ; %bb.0: ; %entry 234; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 235; GFX9-NEXT: s_or_saveexec_b64 s[16:17], -1 236; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill 237; GFX9-NEXT: s_mov_b64 exec, s[16:17] 238; GFX9-NEXT: v_mov_b32_e32 v0, 0 239; GFX9-NEXT: v_mov_b32_e32 v1, 0 240; GFX9-NEXT: global_load_dword v0, v[0:1], off glc 241; GFX9-NEXT: s_waitcnt vmcnt(0) 242; GFX9-NEXT: v_writelane_b32 v40, s33, 2 243; GFX9-NEXT: v_writelane_b32 v40, s30, 0 244; GFX9-NEXT: s_mov_b32 s33, s32 245; GFX9-NEXT: s_addk_i32 s32, 0x400 246; GFX9-NEXT: v_writelane_b32 v40, s31, 1 247; GFX9-NEXT: s_swappc_b64 s[30:31], 0 248; GFX9-NEXT: v_readlane_b32 s31, v40, 1 249; GFX9-NEXT: v_readlane_b32 s30, v40, 0 250; GFX9-NEXT: s_addk_i32 s32, 0xfc00 251; GFX9-NEXT: v_readlane_b32 s33, v40, 2 252; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 253; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload 254; GFX9-NEXT: s_mov_b64 exec, s[4:5] 255; GFX9-NEXT: s_waitcnt vmcnt(0) 256; GFX9-NEXT: s_setpc_b64 s[30:31] 257entry: 258 %load0 = load volatile i32, i32 addrspace(1)* null, align 4 259 br label %bb1 260 261bb1: 262 call void null() 263 ret void 264} 265