1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3
4; Test the localizer did something and we don't materialize all
5; constants in SGPRs in the entry block.
6
7define amdgpu_kernel void @localize_constants(i1 %cond) {
8; GFX9-LABEL: localize_constants:
9; GFX9:       ; %bb.0: ; %entry
10; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
11; GFX9-NEXT:    s_mov_b32 s0, -1
12; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
13; GFX9-NEXT:    s_xor_b32 s1, s1, -1
14; GFX9-NEXT:    s_and_b32 s1, s1, 1
15; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
16; GFX9-NEXT:    s_cbranch_scc0 .LBB0_2
17; GFX9-NEXT:  ; %bb.1: ; %bb1
18; GFX9-NEXT:    v_mov_b32_e32 v0, 0x5be6
19; GFX9-NEXT:    global_store_dword v[0:1], v0, off
20; GFX9-NEXT:    s_waitcnt vmcnt(0)
21; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c7
22; GFX9-NEXT:    global_store_dword v[0:1], v0, off
23; GFX9-NEXT:    s_waitcnt vmcnt(0)
24; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e8
25; GFX9-NEXT:    global_store_dword v[0:1], v0, off
26; GFX9-NEXT:    s_waitcnt vmcnt(0)
27; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c8
28; GFX9-NEXT:    global_store_dword v[0:1], v0, off
29; GFX9-NEXT:    s_waitcnt vmcnt(0)
30; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e7
31; GFX9-NEXT:    global_store_dword v[0:1], v0, off
32; GFX9-NEXT:    s_waitcnt vmcnt(0)
33; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
34; GFX9-NEXT:    s_mov_b32 s0, 0
35; GFX9-NEXT:    global_store_dword v[0:1], v0, off
36; GFX9-NEXT:    s_waitcnt vmcnt(0)
37; GFX9-NEXT:  .LBB0_2: ; %Flow
38; GFX9-NEXT:    s_xor_b32 s0, s0, -1
39; GFX9-NEXT:    s_and_b32 s0, s0, 1
40; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
41; GFX9-NEXT:    s_cbranch_scc1 .LBB0_4
42; GFX9-NEXT:  ; %bb.3: ; %bb0
43; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
44; GFX9-NEXT:    global_store_dword v[0:1], v0, off
45; GFX9-NEXT:    s_waitcnt vmcnt(0)
46; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c8
47; GFX9-NEXT:    global_store_dword v[0:1], v0, off
48; GFX9-NEXT:    s_waitcnt vmcnt(0)
49; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e7
50; GFX9-NEXT:    global_store_dword v[0:1], v0, off
51; GFX9-NEXT:    s_waitcnt vmcnt(0)
52; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3e8
53; GFX9-NEXT:    global_store_dword v[0:1], v0, off
54; GFX9-NEXT:    s_waitcnt vmcnt(0)
55; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1c7
56; GFX9-NEXT:    global_store_dword v[0:1], v0, off
57; GFX9-NEXT:    s_waitcnt vmcnt(0)
58; GFX9-NEXT:    v_mov_b32_e32 v0, 0x5be6
59; GFX9-NEXT:    global_store_dword v[0:1], v0, off
60; GFX9-NEXT:    s_waitcnt vmcnt(0)
61; GFX9-NEXT:  .LBB0_4: ; %bb2
62; GFX9-NEXT:    s_endpgm
63entry:
64  br i1 %cond, label %bb0, label %bb1
65
66bb0:
67  store volatile i32 123, i32 addrspace(1)* undef
68  store volatile i32 456, i32 addrspace(1)* undef
69  store volatile i32 999, i32 addrspace(1)* undef
70  store volatile i32 1000, i32 addrspace(1)* undef
71  store volatile i32 455, i32 addrspace(1)* undef
72  store volatile i32 23526, i32 addrspace(1)* undef
73  br label %bb2
74
75bb1:
76  store volatile i32 23526, i32 addrspace(1)* undef
77  store volatile i32 455, i32 addrspace(1)* undef
78  store volatile i32 1000, i32 addrspace(1)* undef
79  store volatile i32 456, i32 addrspace(1)* undef
80  store volatile i32 999, i32 addrspace(1)* undef
81  store volatile i32 123, i32 addrspace(1)* undef
82  br label %bb2
83
84bb2:
85  ret void
86}
87
88; FIXME: These aren't localized because thesee were legalized before
89; the localizer, and are no longer G_GLOBAL_VALUE.
90@gv0 = addrspace(1) global i32 undef, align 4
91@gv1 = addrspace(1) global i32 undef, align 4
92@gv2 = addrspace(1) global i32 undef, align 4
93@gv3 = addrspace(1) global i32 undef, align 4
94
95define amdgpu_kernel void @localize_globals(i1 %cond) {
96; GFX9-LABEL: localize_globals:
97; GFX9:       ; %bb.0: ; %entry
98; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
99; GFX9-NEXT:    s_mov_b32 s0, -1
100; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
101; GFX9-NEXT:    s_xor_b32 s1, s1, -1
102; GFX9-NEXT:    s_and_b32 s1, s1, 1
103; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
104; GFX9-NEXT:    s_cbranch_scc0 .LBB1_2
105; GFX9-NEXT:  ; %bb.1: ; %bb1
106; GFX9-NEXT:    s_getpc_b64 s[0:1]
107; GFX9-NEXT:    s_add_u32 s0, s0, gv2@gotpcrel32@lo+4
108; GFX9-NEXT:    s_addc_u32 s1, s1, gv2@gotpcrel32@hi+12
109; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
110; GFX9-NEXT:    s_getpc_b64 s[0:1]
111; GFX9-NEXT:    s_add_u32 s0, s0, gv3@gotpcrel32@lo+4
112; GFX9-NEXT:    s_addc_u32 s1, s1, gv3@gotpcrel32@hi+12
113; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
114; GFX9-NEXT:    v_mov_b32_e32 v0, 0
115; GFX9-NEXT:    v_mov_b32_e32 v1, 1
116; GFX9-NEXT:    s_mov_b32 s0, 0
117; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
118; GFX9-NEXT:    global_store_dword v0, v0, s[2:3]
119; GFX9-NEXT:    s_waitcnt vmcnt(0)
120; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
121; GFX9-NEXT:    s_waitcnt vmcnt(0)
122; GFX9-NEXT:  .LBB1_2: ; %Flow
123; GFX9-NEXT:    s_xor_b32 s0, s0, -1
124; GFX9-NEXT:    s_and_b32 s0, s0, 1
125; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
126; GFX9-NEXT:    s_cbranch_scc1 .LBB1_4
127; GFX9-NEXT:  ; %bb.3: ; %bb0
128; GFX9-NEXT:    s_getpc_b64 s[0:1]
129; GFX9-NEXT:    s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
130; GFX9-NEXT:    s_addc_u32 s1, s1, gv0@gotpcrel32@hi+12
131; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
132; GFX9-NEXT:    s_getpc_b64 s[2:3]
133; GFX9-NEXT:    s_add_u32 s2, s2, gv1@gotpcrel32@lo+4
134; GFX9-NEXT:    s_addc_u32 s3, s3, gv1@gotpcrel32@hi+12
135; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
136; GFX9-NEXT:    v_mov_b32_e32 v0, 0
137; GFX9-NEXT:    v_mov_b32_e32 v1, 1
138; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
139; GFX9-NEXT:    global_store_dword v0, v0, s[0:1]
140; GFX9-NEXT:    s_waitcnt vmcnt(0)
141; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
142; GFX9-NEXT:    s_waitcnt vmcnt(0)
143; GFX9-NEXT:  .LBB1_4: ; %bb2
144; GFX9-NEXT:    s_endpgm
145entry:
146  br i1 %cond, label %bb0, label %bb1
147
148bb0:
149  store volatile i32 0, i32 addrspace(1)* @gv0
150  store volatile i32 1, i32 addrspace(1)* @gv1
151  br label %bb2
152
153bb1:
154  store volatile i32 0, i32 addrspace(1)* @gv2
155  store volatile i32 1, i32 addrspace(1)* @gv3
156  br label %bb2
157
158bb2:
159  ret void
160}
161
162@static.gv0 = internal addrspace(1) global i32 undef, align 4
163@static.gv1 = internal addrspace(1) global i32 undef, align 4
164@static.gv2 = internal addrspace(1) global i32 undef, align 4
165@static.gv3 = internal addrspace(1) global i32 undef, align 4
166
167define void @localize_internal_globals(i1 %cond) {
168; GFX9-LABEL: localize_internal_globals:
169; GFX9:       ; %bb.0: ; %entry
170; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
172; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
173; GFX9-NEXT:    s_xor_b64 s[4:5], vcc, -1
174; GFX9-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
175; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
176; GFX9-NEXT:    s_cbranch_execnz .LBB2_3
177; GFX9-NEXT:  ; %bb.1: ; %Flow
178; GFX9-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
179; GFX9-NEXT:    s_cbranch_execnz .LBB2_4
180; GFX9-NEXT:  .LBB2_2: ; %bb2
181; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
182; GFX9-NEXT:    s_setpc_b64 s[30:31]
183; GFX9-NEXT:  .LBB2_3: ; %bb1
184; GFX9-NEXT:    s_getpc_b64 s[6:7]
185; GFX9-NEXT:    s_add_u32 s6, s6, static.gv2@rel32@lo+4
186; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv2@rel32@hi+12
187; GFX9-NEXT:    v_mov_b32_e32 v0, 0
188; GFX9-NEXT:    global_store_dword v0, v0, s[6:7]
189; GFX9-NEXT:    s_waitcnt vmcnt(0)
190; GFX9-NEXT:    s_getpc_b64 s[6:7]
191; GFX9-NEXT:    s_add_u32 s6, s6, static.gv3@rel32@lo+4
192; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv3@rel32@hi+12
193; GFX9-NEXT:    v_mov_b32_e32 v1, 1
194; GFX9-NEXT:    global_store_dword v0, v1, s[6:7]
195; GFX9-NEXT:    s_waitcnt vmcnt(0)
196; GFX9-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
197; GFX9-NEXT:    s_cbranch_execz .LBB2_2
198; GFX9-NEXT:  .LBB2_4: ; %bb0
199; GFX9-NEXT:    s_getpc_b64 s[6:7]
200; GFX9-NEXT:    s_add_u32 s6, s6, static.gv0@rel32@lo+4
201; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv0@rel32@hi+12
202; GFX9-NEXT:    v_mov_b32_e32 v0, 0
203; GFX9-NEXT:    global_store_dword v0, v0, s[6:7]
204; GFX9-NEXT:    s_waitcnt vmcnt(0)
205; GFX9-NEXT:    s_getpc_b64 s[6:7]
206; GFX9-NEXT:    s_add_u32 s6, s6, static.gv1@rel32@lo+4
207; GFX9-NEXT:    s_addc_u32 s7, s7, static.gv1@rel32@hi+12
208; GFX9-NEXT:    v_mov_b32_e32 v1, 1
209; GFX9-NEXT:    global_store_dword v0, v1, s[6:7]
210; GFX9-NEXT:    s_waitcnt vmcnt(0)
211; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
212; GFX9-NEXT:    s_setpc_b64 s[30:31]
213entry:
214  br i1 %cond, label %bb0, label %bb1
215
216bb0:
217  store volatile i32 0, i32 addrspace(1)* @static.gv0
218  store volatile i32 1, i32 addrspace(1)* @static.gv1
219  br label %bb2
220
221bb1:
222  store volatile i32 0, i32 addrspace(1)* @static.gv2
223  store volatile i32 1, i32 addrspace(1)* @static.gv3
224  br label %bb2
225
226bb2:
227  ret void
228}
229
230; This would crash from using the wrong insert point
231define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) {
232; GFX9-LABEL: sink_null_insert_pt:
233; GFX9:       ; %bb.0: ; %entry
234; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235; GFX9-NEXT:    s_or_saveexec_b64 s[16:17], -1
236; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
237; GFX9-NEXT:    s_mov_b64 exec, s[16:17]
238; GFX9-NEXT:    v_mov_b32_e32 v0, 0
239; GFX9-NEXT:    v_mov_b32_e32 v1, 0
240; GFX9-NEXT:    global_load_dword v0, v[0:1], off glc
241; GFX9-NEXT:    s_waitcnt vmcnt(0)
242; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
243; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
244; GFX9-NEXT:    s_mov_b32 s33, s32
245; GFX9-NEXT:    s_addk_i32 s32, 0x400
246; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
247; GFX9-NEXT:    s_swappc_b64 s[30:31], 0
248; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
249; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
250; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
251; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
252; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
253; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
254; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
255; GFX9-NEXT:    s_waitcnt vmcnt(0)
256; GFX9-NEXT:    s_setpc_b64 s[30:31]
257entry:
258  %load0 = load volatile i32, i32 addrspace(1)* null, align 4
259  br label %bb1
260
261bb1:
262  call void null()
263  ret void
264}
265