1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
5
6declare i32 @llvm.amdgcn.workitem.id.x() #0
7
8@lds.obj = addrspace(3) global [256 x i32] undef, align 4
9
10define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
11; CI-LABEL: write_ds_sub0_offset0_global:
12; CI:       ; %bb.0: ; %entry
13; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
14; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
15; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
16; CI-NEXT:    s_mov_b32 m0, -1
17; CI-NEXT:    ds_write_b32 v0, v1 offset:12
18; CI-NEXT:    s_endpgm
19;
20; GFX9-LABEL: write_ds_sub0_offset0_global:
21; GFX9:       ; %bb.0: ; %entry
22; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
23; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
24; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
25; GFX9-NEXT:    ds_write_b32 v0, v1 offset:12
26; GFX9-NEXT:    s_endpgm
27;
28; GFX10-LABEL: write_ds_sub0_offset0_global:
29; GFX10:       ; %bb.0: ; %entry
30; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
31; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7b
32; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
33; GFX10-NEXT:    ds_write_b32 v0, v1 offset:12
34; GFX10-NEXT:    s_endpgm
35entry:
36  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
37  %sub1 = sub i32 0, %x.i
38  %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
39  %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
40  store i32 123, i32 addrspace(3)* %arrayidx
41  ret void
42}
43
44define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 {
45; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit:
46; CI:       ; %bb.0: ; %entry
47; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
48; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
49; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
50; CI-NEXT:    s_mov_b64 vcc, 0
51; CI-NEXT:    s_waitcnt lgkmcnt(0)
52; CI-NEXT:    v_mov_b32_e32 v1, s0
53; CI-NEXT:    s_mov_b32 s0, 0
54; CI-NEXT:    v_div_fmas_f32 v1, v1, v1, v1
55; CI-NEXT:    v_mov_b32_e32 v2, 0x7b
56; CI-NEXT:    s_mov_b32 m0, -1
57; CI-NEXT:    s_mov_b32 s3, 0xf000
58; CI-NEXT:    s_mov_b32 s2, -1
59; CI-NEXT:    s_mov_b32 s1, s0
60; CI-NEXT:    ds_write_b32 v0, v2 offset:12
61; CI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
62; CI-NEXT:    s_waitcnt vmcnt(0)
63; CI-NEXT:    s_endpgm
64;
65; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit:
66; GFX9:       ; %bb.0: ; %entry
67; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x0
68; GFX9-NEXT:    s_mov_b64 vcc, 0
69; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
70; GFX9-NEXT:    v_sub_u32_e32 v3, 0, v0
71; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7b
72; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX9-NEXT:    v_mov_b32_e32 v1, s0
74; GFX9-NEXT:    v_div_fmas_f32 v2, v1, v1, v1
75; GFX9-NEXT:    v_mov_b32_e32 v0, 0
76; GFX9-NEXT:    v_mov_b32_e32 v1, 0
77; GFX9-NEXT:    ds_write_b32 v3, v4 offset:12
78; GFX9-NEXT:    global_store_dword v[0:1], v2, off
79; GFX9-NEXT:    s_waitcnt vmcnt(0)
80; GFX9-NEXT:    s_endpgm
81;
82; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit:
83; GFX10:       ; %bb.0: ; %entry
84; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x0
85; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
86; GFX10-NEXT:    s_mov_b32 vcc_lo, 0
87; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7b
88; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 0, v0
89; GFX10-NEXT:    v_mov_b32_e32 v0, 0
90; GFX10-NEXT:    v_mov_b32_e32 v1, 0
91; GFX10-NEXT:    ds_write_b32 v2, v3 offset:12
92; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
93; GFX10-NEXT:    v_div_fmas_f32 v4, s0, s0, s0
94; GFX10-NEXT:    global_store_dword v[0:1], v4, off
95; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
96; GFX10-NEXT:    s_endpgm
97entry:
98  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
99  %sub1 = sub i32 0, %x.i
100  %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
101  %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
102  store i32 123, i32 addrspace(3)* %arrayidx
103  %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false)
104  store volatile float %fmas, float addrspace(1)* null
105  ret void
106}
107
108define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset() #1 {
109; CI-LABEL: add_x_shl_neg_to_sub_max_offset:
110; CI:       ; %bb.0:
111; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
112; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
113; CI-NEXT:    v_mov_b32_e32 v1, 13
114; CI-NEXT:    s_mov_b32 m0, -1
115; CI-NEXT:    ds_write_b8 v0, v1 offset:65535
116; CI-NEXT:    s_endpgm
117;
118; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset:
119; GFX9:       ; %bb.0:
120; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
121; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
122; GFX9-NEXT:    v_mov_b32_e32 v1, 13
123; GFX9-NEXT:    ds_write_b8 v0, v1 offset:65535
124; GFX9-NEXT:    s_endpgm
125;
126; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset:
127; GFX10:       ; %bb.0:
128; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
129; GFX10-NEXT:    v_mov_b32_e32 v1, 13
130; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
131; GFX10-NEXT:    ds_write_b8 v0, v1 offset:65535
132; GFX10-NEXT:    s_endpgm
133  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
134  %neg = sub i32 0, %x.i
135  %shl = shl i32 %neg, 2
136  %add = add i32 65535, %shl
137  %ptr = inttoptr i32 %add to i8 addrspace(3)*
138  store i8 13, i8 addrspace(3)* %ptr
139  ret void
140}
141
142define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
143; CI-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
144; CI:       ; %bb.0:
145; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
146; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x10000, v0
147; CI-NEXT:    v_mov_b32_e32 v1, 13
148; CI-NEXT:    s_mov_b32 m0, -1
149; CI-NEXT:    ds_write_b8 v0, v1
150; CI-NEXT:    s_endpgm
151;
152; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
153; GFX9:       ; %bb.0:
154; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
155; GFX9-NEXT:    v_sub_u32_e32 v0, 0x10000, v0
156; GFX9-NEXT:    v_mov_b32_e32 v1, 13
157; GFX9-NEXT:    ds_write_b8 v0, v1
158; GFX9-NEXT:    s_endpgm
159;
160; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
161; GFX10:       ; %bb.0:
162; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
163; GFX10-NEXT:    v_mov_b32_e32 v1, 13
164; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0x10000, v0
165; GFX10-NEXT:    ds_write_b8 v0, v1
166; GFX10-NEXT:    s_endpgm
167  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
168  %neg = sub i32 0, %x.i
169  %shl = shl i32 %neg, 2
170  %add = add i32 65536, %shl
171  %ptr = inttoptr i32 %add to i8 addrspace(3)*
172  store i8 13, i8 addrspace(3)* %ptr
173  ret void
174}
175
176define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
177; CI-LABEL: add_x_shl_neg_to_sub_multi_use:
178; CI:       ; %bb.0:
179; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
180; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
181; CI-NEXT:    v_mov_b32_e32 v1, 13
182; CI-NEXT:    s_mov_b32 m0, -1
183; CI-NEXT:    ds_write_b32 v0, v1 offset:123
184; CI-NEXT:    ds_write_b32 v0, v1 offset:456
185; CI-NEXT:    s_endpgm
186;
187; GFX9-LABEL: add_x_shl_neg_to_sub_multi_use:
188; GFX9:       ; %bb.0:
189; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
190; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
191; GFX9-NEXT:    v_mov_b32_e32 v1, 13
192; GFX9-NEXT:    ds_write_b32 v0, v1 offset:123
193; GFX9-NEXT:    ds_write_b32 v0, v1 offset:456
194; GFX9-NEXT:    s_endpgm
195;
196; GFX10-LABEL: add_x_shl_neg_to_sub_multi_use:
197; GFX10:       ; %bb.0:
198; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
199; GFX10-NEXT:    v_mov_b32_e32 v1, 13
200; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
201; GFX10-NEXT:    ds_write_b32 v0, v1 offset:123
202; GFX10-NEXT:    ds_write_b32 v0, v1 offset:456
203; GFX10-NEXT:    s_endpgm
204  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
205  %neg = sub i32 0, %x.i
206  %shl = shl i32 %neg, 2
207  %add0 = add i32 123, %shl
208  %add1 = add i32 456, %shl
209  %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)*
210  store volatile i32 13, i32 addrspace(3)* %ptr0
211  %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)*
212  store volatile i32 13, i32 addrspace(3)* %ptr1
213  ret void
214}
215
216define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
217; CI-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
218; CI:       ; %bb.0:
219; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
220; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
221; CI-NEXT:    v_mov_b32_e32 v1, 13
222; CI-NEXT:    s_mov_b32 m0, -1
223; CI-NEXT:    ds_write_b32 v0, v1 offset:123
224; CI-NEXT:    ds_write_b32 v0, v1 offset:123
225; CI-NEXT:    s_endpgm
226;
227; GFX9-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
228; GFX9:       ; %bb.0:
229; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
230; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
231; GFX9-NEXT:    v_mov_b32_e32 v1, 13
232; GFX9-NEXT:    ds_write_b32 v0, v1 offset:123
233; GFX9-NEXT:    ds_write_b32 v0, v1 offset:123
234; GFX9-NEXT:    s_endpgm
235;
236; GFX10-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
237; GFX10:       ; %bb.0:
238; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
239; GFX10-NEXT:    v_mov_b32_e32 v1, 13
240; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
241; GFX10-NEXT:    ds_write_b32 v0, v1 offset:123
242; GFX10-NEXT:    ds_write_b32 v0, v1 offset:123
243; GFX10-NEXT:    s_endpgm
244  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
245  %neg = sub i32 0, %x.i
246  %shl = shl i32 %neg, 2
247  %add = add i32 123, %shl
248  %ptr = inttoptr i32 %add to i32 addrspace(3)*
249  store volatile i32 13, i32 addrspace(3)* %ptr
250  store volatile i32 13, i32 addrspace(3)* %ptr
251  ret void
252}
253
254define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
255; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
256; CI:       ; %bb.0:
257; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
258; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x3fb, v0
259; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
260; CI-NEXT:    v_mov_b32_e32 v2, 0
261; CI-NEXT:    s_mov_b32 m0, -1
262; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
263; CI-NEXT:    s_endpgm
264;
265; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
266; GFX9:       ; %bb.0:
267; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
268; GFX9-NEXT:    v_sub_u32_e32 v0, 0x3fb, v0
269; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
270; GFX9-NEXT:    v_mov_b32_e32 v2, 0
271; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
272; GFX9-NEXT:    s_endpgm
273;
274; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
275; GFX10:       ; %bb.0:
276; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
277; GFX10-NEXT:    v_mov_b32_e32 v1, 0
278; GFX10-NEXT:    v_mov_b32_e32 v2, 0x7b
279; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
280; GFX10-NEXT:    ds_write_b32 v0, v1 offset:1023
281; GFX10-NEXT:    ds_write_b32 v0, v2 offset:1019
282; GFX10-NEXT:    s_endpgm
283  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
284  %neg = sub i32 0, %x.i
285  %shl = shl i32 %neg, 2
286  %add = add i32 1019, %shl
287  %ptr = inttoptr i32 %add to i64 addrspace(3)*
288  store i64 123, i64 addrspace(3)* %ptr, align 4
289  ret void
290}
291
292define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 {
293; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
294; CI:       ; %bb.0:
295; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
296; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
297; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x3fb, v0
298; CI-NEXT:    s_mov_b64 vcc, 0
299; CI-NEXT:    s_waitcnt lgkmcnt(0)
300; CI-NEXT:    v_mov_b32_e32 v1, s0
301; CI-NEXT:    s_mov_b32 s0, 0
302; CI-NEXT:    v_div_fmas_f32 v1, v1, v1, v1
303; CI-NEXT:    v_mov_b32_e32 v2, 0x7b
304; CI-NEXT:    v_mov_b32_e32 v3, 0
305; CI-NEXT:    s_mov_b32 m0, -1
306; CI-NEXT:    s_mov_b32 s3, 0xf000
307; CI-NEXT:    s_mov_b32 s2, -1
308; CI-NEXT:    s_mov_b32 s1, s0
309; CI-NEXT:    ds_write2_b32 v0, v2, v3 offset1:1
310; CI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
311; CI-NEXT:    s_waitcnt vmcnt(0)
312; CI-NEXT:    s_endpgm
313;
314; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
315; GFX9:       ; %bb.0:
316; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x0
317; GFX9-NEXT:    s_mov_b64 vcc, 0
318; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
319; GFX9-NEXT:    v_sub_u32_e32 v3, 0x3fb, v0
320; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7b
321; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
322; GFX9-NEXT:    v_mov_b32_e32 v1, s0
323; GFX9-NEXT:    v_div_fmas_f32 v2, v1, v1, v1
324; GFX9-NEXT:    v_mov_b32_e32 v0, 0
325; GFX9-NEXT:    v_mov_b32_e32 v5, 0
326; GFX9-NEXT:    v_mov_b32_e32 v1, 0
327; GFX9-NEXT:    ds_write2_b32 v3, v4, v5 offset1:1
328; GFX9-NEXT:    global_store_dword v[0:1], v2, off
329; GFX9-NEXT:    s_waitcnt vmcnt(0)
330; GFX9-NEXT:    s_endpgm
331;
332; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
333; GFX10:       ; %bb.0:
334; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x0
335; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
336; GFX10-NEXT:    s_mov_b32 vcc_lo, 0
337; GFX10-NEXT:    v_mov_b32_e32 v3, 0
338; GFX10-NEXT:    v_mov_b32_e32 v4, 0x7b
339; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 0, v0
340; GFX10-NEXT:    v_mov_b32_e32 v0, 0
341; GFX10-NEXT:    v_mov_b32_e32 v1, 0
342; GFX10-NEXT:    ds_write_b32 v2, v3 offset:1023
343; GFX10-NEXT:    ds_write_b32 v2, v4 offset:1019
344; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
345; GFX10-NEXT:    v_div_fmas_f32 v5, s0, s0, s0
346; GFX10-NEXT:    global_store_dword v[0:1], v5, off
347; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
348; GFX10-NEXT:    s_endpgm
349  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
350  %neg = sub i32 0, %x.i
351  %shl = shl i32 %neg, 2
352  %add = add i32 1019, %shl
353  %ptr = inttoptr i32 %add to i64 addrspace(3)*
354  store i64 123, i64 addrspace(3)* %ptr, align 4
355  %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false)
356  store volatile float %fmas, float addrspace(1)* null
357  ret void
358}
359
360define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
361; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
362; CI:       ; %bb.0:
363; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
364; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x3fc, v0
365; CI-NEXT:    v_mov_b32_e32 v1, 0x7b
366; CI-NEXT:    v_mov_b32_e32 v2, 0
367; CI-NEXT:    s_mov_b32 m0, -1
368; CI-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
369; CI-NEXT:    s_endpgm
370;
371; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
372; GFX9:       ; %bb.0:
373; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
374; GFX9-NEXT:    v_sub_u32_e32 v0, 0x3fc, v0
375; GFX9-NEXT:    v_mov_b32_e32 v1, 0x7b
376; GFX9-NEXT:    v_mov_b32_e32 v2, 0
377; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
378; GFX9-NEXT:    s_endpgm
379;
380; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
381; GFX10:       ; %bb.0:
382; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
383; GFX10-NEXT:    v_mov_b32_e32 v1, 0
384; GFX10-NEXT:    v_mov_b32_e32 v2, 0x7b
385; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
386; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0x200, v0
387; GFX10-NEXT:    ds_write2_b32 v0, v2, v1 offset0:127 offset1:128
388; GFX10-NEXT:    s_endpgm
389  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
390  %neg = sub i32 0, %x.i
391  %shl = shl i32 %neg, 2
392  %add = add i32 1020, %shl
393  %ptr = inttoptr i32 %add to i64 addrspace(3)*
394  store i64 123, i64 addrspace(3)* %ptr, align 4
395  ret void
396}
397
398declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1)
399
400attributes #0 = { nounwind readnone }
401attributes #1 = { nounwind }
402attributes #2 = { nounwind convergent }
403