1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3
4define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
5; GCN-LABEL: lsh8_or_and:
6; GCN:       ; %bb.0: ; %bb
7; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
8; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
9; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
10; GCN-NEXT:    v_mov_b32_e32 v3, 0x6050400
11; GCN-NEXT:    s_waitcnt lgkmcnt(0)
12; GCN-NEXT:    v_mov_b32_e32 v1, s3
13; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
14; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
15; GCN-NEXT:    flat_load_dword v2, v[0:1]
16; GCN-NEXT:    s_waitcnt vmcnt(0)
17; GCN-NEXT:    v_perm_b32 v2, v2, s0, v3
18; GCN-NEXT:    flat_store_dword v[0:1], v2
19; GCN-NEXT:    s_endpgm
20bb:
21  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
22  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
23  %tmp = load i32, i32 addrspace(1)* %gep, align 4
24  %tmp2 = shl i32 %tmp, 8
25  %tmp3 = and i32 %arg1, 255
26  %tmp4 = or i32 %tmp2, %tmp3
27  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
28  ret void
29}
30
31define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
32; GCN-LABEL: lsr24_or_and:
33; GCN:       ; %bb.0: ; %bb
34; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
35; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
36; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
37; GCN-NEXT:    v_mov_b32_e32 v3, 0x7060503
38; GCN-NEXT:    s_waitcnt lgkmcnt(0)
39; GCN-NEXT:    v_mov_b32_e32 v1, s3
40; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
41; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
42; GCN-NEXT:    flat_load_dword v2, v[0:1]
43; GCN-NEXT:    s_waitcnt vmcnt(0)
44; GCN-NEXT:    v_perm_b32 v2, s0, v2, v3
45; GCN-NEXT:    flat_store_dword v[0:1], v2
46; GCN-NEXT:    s_endpgm
47bb:
48  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
49  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
50  %tmp = load i32, i32 addrspace(1)* %gep, align 4
51  %tmp2 = lshr i32 %tmp, 24
52  %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
53  %tmp4 = or i32 %tmp2, %tmp3
54  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
55  ret void
56}
57
58define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
59; GCN-LABEL: and_or_lsr24:
60; GCN:       ; %bb.0: ; %bb
61; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
62; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
63; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
64; GCN-NEXT:    v_mov_b32_e32 v3, 0x7060503
65; GCN-NEXT:    s_waitcnt lgkmcnt(0)
66; GCN-NEXT:    v_mov_b32_e32 v1, s3
67; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
68; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
69; GCN-NEXT:    flat_load_dword v2, v[0:1]
70; GCN-NEXT:    s_waitcnt vmcnt(0)
71; GCN-NEXT:    v_perm_b32 v2, v2, s0, v3
72; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
73; GCN-NEXT:    flat_store_dword v[0:1], v2
74; GCN-NEXT:    s_endpgm
75bb:
76  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
77  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
78  %tmp = load i32, i32 addrspace(1)* %gep, align 4
79  %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
80  %tmp3 = lshr i32 %arg1, 24
81  %tmp4 = or i32 %tmp2, %tmp3
82  %tmp5 = xor i32 %tmp4, -2147483648
83  store i32 %tmp5, i32 addrspace(1)* %gep, align 4
84  ret void
85}
86
87define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
88; GCN-LABEL: and_or_and:
89; GCN:       ; %bb.0: ; %bb
90; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
91; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
92; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
93; GCN-NEXT:    v_mov_b32_e32 v3, 0x7020500
94; GCN-NEXT:    s_waitcnt lgkmcnt(0)
95; GCN-NEXT:    v_mov_b32_e32 v1, s3
96; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
97; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
98; GCN-NEXT:    flat_load_dword v2, v[0:1]
99; GCN-NEXT:    s_waitcnt vmcnt(0)
100; GCN-NEXT:    v_perm_b32 v2, v2, s0, v3
101; GCN-NEXT:    flat_store_dword v[0:1], v2
102; GCN-NEXT:    s_endpgm
103bb:
104  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
105  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
106  %tmp = load i32, i32 addrspace(1)* %gep, align 4
107  %tmp2 = and i32 %tmp, -16711936
108  %tmp3 = and i32 %arg1, 16711935
109  %tmp4 = or i32 %tmp2, %tmp3
110  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
111  ret void
112}
113
114define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
115; GCN-LABEL: lsh8_or_lsr24:
116; GCN:       ; %bb.0: ; %bb
117; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
118; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
119; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
120; GCN-NEXT:    s_waitcnt lgkmcnt(0)
121; GCN-NEXT:    v_mov_b32_e32 v1, s3
122; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
123; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
124; GCN-NEXT:    flat_load_dword v2, v[0:1]
125; GCN-NEXT:    s_waitcnt vmcnt(0)
126; GCN-NEXT:    v_alignbit_b32 v2, v2, s0, 24
127; GCN-NEXT:    flat_store_dword v[0:1], v2
128; GCN-NEXT:    s_endpgm
129bb:
130  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
131  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
132  %tmp = load i32, i32 addrspace(1)* %gep, align 4
133  %tmp2 = shl i32 %tmp, 8
134  %tmp3 = lshr i32 %arg1, 24
135  %tmp4 = or i32 %tmp2, %tmp3
136  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
137  ret void
138}
139
140define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
141; GCN-LABEL: lsh16_or_lsr24:
142; GCN:       ; %bb.0: ; %bb
143; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
144; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
145; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
146; GCN-NEXT:    v_mov_b32_e32 v3, 0x5040c03
147; GCN-NEXT:    s_waitcnt lgkmcnt(0)
148; GCN-NEXT:    v_mov_b32_e32 v1, s3
149; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
150; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
151; GCN-NEXT:    flat_load_dword v2, v[0:1]
152; GCN-NEXT:    s_waitcnt vmcnt(0)
153; GCN-NEXT:    v_perm_b32 v2, v2, s0, v3
154; GCN-NEXT:    flat_store_dword v[0:1], v2
155; GCN-NEXT:    s_endpgm
156bb:
157  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
158  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
159  %tmp = load i32, i32 addrspace(1)* %gep, align 4
160  %tmp2 = shl i32 %tmp, 16
161  %tmp3 = lshr i32 %arg1, 24
162  %tmp4 = or i32 %tmp2, %tmp3
163  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
164  ret void
165}
166
167define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
168; GCN-LABEL: and_xor_and:
169; GCN:       ; %bb.0: ; %bb
170; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
171; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
172; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
173; GCN-NEXT:    v_mov_b32_e32 v3, 0x7020104
174; GCN-NEXT:    s_waitcnt lgkmcnt(0)
175; GCN-NEXT:    v_mov_b32_e32 v1, s3
176; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
177; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
178; GCN-NEXT:    flat_load_dword v2, v[0:1]
179; GCN-NEXT:    s_waitcnt vmcnt(0)
180; GCN-NEXT:    v_perm_b32 v2, v2, s0, v3
181; GCN-NEXT:    flat_store_dword v[0:1], v2
182; GCN-NEXT:    s_endpgm
183bb:
184  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
185  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
186  %tmp = load i32, i32 addrspace(1)* %gep, align 4
187  %tmp2 = and i32 %tmp, -16776961
188  %tmp3 = and i32 %arg1, 16776960
189  %tmp4 = xor i32 %tmp2, %tmp3
190  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
191  ret void
192}
193
194; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
195define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
196; GCN-LABEL: and_or_or_and:
197; GCN:       ; %bb.0: ; %bb
198; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
199; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
200; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
201; GCN-NEXT:    s_waitcnt lgkmcnt(0)
202; GCN-NEXT:    v_mov_b32_e32 v1, s3
203; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
204; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
205; GCN-NEXT:    flat_load_dword v2, v[0:1]
206; GCN-NEXT:    s_and_b32 s0, s0, 0xff00
207; GCN-NEXT:    s_or_b32 s0, s0, 0xffff0000
208; GCN-NEXT:    s_waitcnt vmcnt(0)
209; GCN-NEXT:    v_and_b32_e32 v2, 0xff00ff, v2
210; GCN-NEXT:    v_or_b32_e32 v2, s0, v2
211; GCN-NEXT:    flat_store_dword v[0:1], v2
212; GCN-NEXT:    s_endpgm
213bb:
214  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
215  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
216  %tmp = load i32, i32 addrspace(1)* %gep, align 4
217  %and = and i32 %tmp, 16711935     ; 0x00ff00ff
218  %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
219  %tmp2 = or i32 %tmp1, -65536
220  %tmp3 = or i32 %tmp2, %and
221  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
222  ret void
223}
224
225define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
226; GCN-LABEL: and_or_and_shl:
227; GCN:       ; %bb.0: ; %bb
228; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
229; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
230; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
231; GCN-NEXT:    v_mov_b32_e32 v3, 0x50c0c00
232; GCN-NEXT:    s_waitcnt lgkmcnt(0)
233; GCN-NEXT:    v_mov_b32_e32 v1, s3
234; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
235; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
236; GCN-NEXT:    flat_load_dword v2, v[0:1]
237; GCN-NEXT:    s_waitcnt vmcnt(0)
238; GCN-NEXT:    v_perm_b32 v2, v2, s0, v3
239; GCN-NEXT:    flat_store_dword v[0:1], v2
240; GCN-NEXT:    s_endpgm
241bb:
242  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
243  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
244  %tmp = load i32, i32 addrspace(1)* %gep, align 4
245  %tmp2 = shl i32 %tmp, 16
246  %tmp3 = and i32 %arg1, 65535
247  %tmp4 = or i32 %tmp2, %tmp3
248  %and = and i32 %tmp4, 4278190335
249  store i32 %and, i32 addrspace(1)* %gep, align 4
250  ret void
251}
252
253define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
254; GCN-LABEL: or_and_or:
255; GCN:       ; %bb.0: ; %bb
256; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
257; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
258; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
259; GCN-NEXT:    v_mov_b32_e32 v3, 0x7020104
260; GCN-NEXT:    s_waitcnt lgkmcnt(0)
261; GCN-NEXT:    v_mov_b32_e32 v1, s3
262; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
263; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
264; GCN-NEXT:    flat_load_dword v2, v[0:1]
265; GCN-NEXT:    s_waitcnt vmcnt(0)
266; GCN-NEXT:    v_perm_b32 v2, v2, s0, v3
267; GCN-NEXT:    flat_store_dword v[0:1], v2
268; GCN-NEXT:    s_endpgm
269bb:
270  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
271  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
272  %tmp = load i32, i32 addrspace(1)* %gep, align 4
273  %or1 = or i32 %tmp, 16776960    ; 0x00ffff00
274  %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
275  %and = and i32 %or1, %or2
276  store i32 %and, i32 addrspace(1)* %gep, align 4
277  ret void
278}
279
280; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
281define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
282; GCN-LABEL: known_ffff0500:
283; GCN:       ; %bb.0: ; %bb
284; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
285; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
286; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
287; GCN-NEXT:    v_mov_b32_e32 v5, 0xffff8004
288; GCN-NEXT:    s_waitcnt lgkmcnt(0)
289; GCN-NEXT:    v_mov_b32_e32 v1, s3
290; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
291; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
292; GCN-NEXT:    flat_load_dword v4, v[0:1]
293; GCN-NEXT:    s_bitset1_b32 s0, 15
294; GCN-NEXT:    s_and_b32 s0, s0, 0xff00
295; GCN-NEXT:    s_or_b32 s0, s0, 0xffff0000
296; GCN-NEXT:    v_mov_b32_e32 v2, s2
297; GCN-NEXT:    v_mov_b32_e32 v3, s3
298; GCN-NEXT:    s_waitcnt vmcnt(0)
299; GCN-NEXT:    v_or_b32_e32 v4, 4, v4
300; GCN-NEXT:    v_and_b32_e32 v4, 0xff00ff, v4
301; GCN-NEXT:    v_or_b32_e32 v4, s0, v4
302; GCN-NEXT:    flat_store_dword v[0:1], v4
303; GCN-NEXT:    flat_store_dword v[2:3], v5
304; GCN-NEXT:    s_endpgm
305bb:
306  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
307  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
308  %load = load i32, i32 addrspace(1)* %gep, align 4
309  %mask1 = or i32 %arg1, 32768 ; 0x8000
310  %mask2 = or i32 %load, 4
311  %and = and i32 %mask2, 16711935     ; 0x00ff00ff
312  %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
313  %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
314  %tmp3 = or i32 %tmp2, %and
315  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
316  %v = and i32 %tmp3, 4294934532 ; 0xffff8004
317  store i32 %v, i32 addrspace(1)* %arg, align 4
318  ret void
319}
320
321define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
322; GCN-LABEL: known_050c0c00:
323; GCN:       ; %bb.0: ; %bb
324; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
325; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
326; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
327; GCN-NEXT:    v_mov_b32_e32 v5, 0x50c0c00
328; GCN-NEXT:    v_mov_b32_e32 v6, 4
329; GCN-NEXT:    s_waitcnt lgkmcnt(0)
330; GCN-NEXT:    v_mov_b32_e32 v1, s3
331; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
332; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
333; GCN-NEXT:    flat_load_dword v4, v[0:1]
334; GCN-NEXT:    s_or_b32 s0, s0, 4
335; GCN-NEXT:    v_mov_b32_e32 v2, s2
336; GCN-NEXT:    v_mov_b32_e32 v3, s3
337; GCN-NEXT:    s_waitcnt vmcnt(0)
338; GCN-NEXT:    v_perm_b32 v4, v4, s0, v5
339; GCN-NEXT:    flat_store_dword v[0:1], v4
340; GCN-NEXT:    flat_store_dword v[2:3], v6
341; GCN-NEXT:    s_endpgm
342bb:
343  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
344  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
345  %tmp = load i32, i32 addrspace(1)* %gep, align 4
346  %tmp2 = shl i32 %tmp, 16
347  %mask = or i32 %arg1, 4
348  %tmp3 = and i32 %mask, 65535
349  %tmp4 = or i32 %tmp2, %tmp3
350  %and = and i32 %tmp4, 4278190335
351  store i32 %and, i32 addrspace(1)* %gep, align 4
352  %v = and i32 %and, 16776964
353  store i32 %v, i32 addrspace(1)* %arg, align 4
354  ret void
355}
356
357define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
358; GCN-LABEL: known_ffff8004:
359; GCN:       ; %bb.0: ; %bb
360; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
361; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
362; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
363; GCN-NEXT:    v_mov_b32_e32 v5, 0xffff0500
364; GCN-NEXT:    v_mov_b32_e32 v6, 0xffff8004
365; GCN-NEXT:    s_waitcnt lgkmcnt(0)
366; GCN-NEXT:    v_mov_b32_e32 v1, s3
367; GCN-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
368; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
369; GCN-NEXT:    flat_load_dword v4, v[0:1]
370; GCN-NEXT:    s_or_b32 s0, s0, 4
371; GCN-NEXT:    v_mov_b32_e32 v2, s2
372; GCN-NEXT:    v_mov_b32_e32 v3, s3
373; GCN-NEXT:    s_waitcnt vmcnt(0)
374; GCN-NEXT:    v_or_b32_e32 v4, 0x8000, v4
375; GCN-NEXT:    v_perm_b32 v4, v4, s0, v5
376; GCN-NEXT:    flat_store_dword v[0:1], v4
377; GCN-NEXT:    flat_store_dword v[2:3], v6
378; GCN-NEXT:    s_endpgm
379bb:
380  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
381  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
382  %load = load i32, i32 addrspace(1)* %gep, align 4
383  %mask1 = or i32 %arg1, 4
384  %mask2 = or i32 %load, 32768 ; 0x8000
385  %and = and i32 %mask1, 16711935     ; 0x00ff00ff
386  %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
387  %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
388  %tmp3 = or i32 %tmp2, %and
389  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
390  %v = and i32 %tmp3, 4294934532 ; 0xffff8004
391  store i32 %v, i32 addrspace(1)* %arg, align 4
392  ret void
393}
394
395declare i32 @llvm.amdgcn.workitem.id.x()
396