1; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
4; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
6; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
7
8; OPT-LABEL: @test_sink_global_small_offset_i32(
9; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
10; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
11; OPT: br i1
12; OPT-CI: ptrtoint
13
14; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
15; GCN: {{^}}BB0_2:
16define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
17entry:
18  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
19  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
20  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
21  %tmp0 = icmp eq i32 %tid, 0
22  br i1 %tmp0, label %endif, label %if
23
24if:
25  %tmp1 = load i32, i32 addrspace(1)* %in.gep
26  br label %endif
27
28endif:
29  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
30  store i32 %x, i32 addrspace(1)* %out.gep
31  br label %done
32
33done:
34  ret void
35}
36
37; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
38; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
39; OPT: br i1
40
41; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
42; GCN: s_and_saveexec_b64
43; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
44; GCN: {{^}}BB1_2:
45; GCN: s_or_b64 exec
46define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
47entry:
48  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
49  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
50  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
51  %tmp0 = icmp eq i32 %tid, 0
52  br i1 %tmp0, label %endif, label %if
53
54if:
55  %tmp1 = load i8, i8 addrspace(1)* %in.gep
56  %tmp2 = sext i8 %tmp1 to i32
57  br label %endif
58
59endif:
60  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
61  store i32 %x, i32 addrspace(1)* %out.gep
62  br label %done
63
64done:
65  ret void
66}
67
68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
69; GCN: s_and_saveexec_b64
70; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
71; GCN: {{^}}BB2_2:
72; GCN: s_or_b64 exec
73define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
74entry:
75  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
76  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
77  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
78  %tmp0 = icmp eq i32 %tid, 0
79  br i1 %tmp0, label %endif, label %if
80
81if:
82  %tmp1 = load i8, i8 addrspace(1)* %in.gep
83  %tmp2 = sext i8 %tmp1 to i32
84  br label %endif
85
86endif:
87  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
88  store i32 %x, i32 addrspace(1)* %out.gep
89  br label %done
90
91done:
92  ret void
93}
94
95; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
96; GCN: s_and_saveexec_b64
97; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
98; GCN: {{^}}BB3_2:
99; GCN: s_or_b64 exec
100define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
101entry:
102  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
103  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
104  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
105  %tmp0 = icmp eq i32 %tid, 0
106  br i1 %tmp0, label %endif, label %if
107
108if:
109  %tmp1 = load i8, i8 addrspace(1)* %in.gep
110  %tmp2 = sext i8 %tmp1 to i32
111  br label %endif
112
113endif:
114  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
115  store i32 %x, i32 addrspace(1)* %out.gep
116  br label %done
117
118done:
119  ret void
120}
121
122; OPT-LABEL: @test_sink_scratch_small_offset_i32(
123; OPT-NOT:  getelementptr [512 x i32]
124; OPT: br i1
125; OPT: ptrtoint
126
127; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
128; GCN: s_and_saveexec_b64
129; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
130; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
131; GCN: {{^}}BB4_2:
132define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
133entry:
134  %alloca = alloca [512 x i32], align 4
135  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
136  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
137  %add.arg = add i32 %arg, 8
138  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
139  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
140  %tmp0 = icmp eq i32 %tid, 0
141  br i1 %tmp0, label %endif, label %if
142
143if:
144  store volatile i32 123, i32* %alloca.gep
145  %tmp1 = load volatile i32, i32* %alloca.gep
146  br label %endif
147
148endif:
149  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
150  store i32 %x, i32 addrspace(1)* %out.gep.0
151  %load = load volatile i32, i32* %alloca.gep
152  store i32 %load, i32 addrspace(1)* %out.gep.1
153  br label %done
154
155done:
156  ret void
157}
158
159; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
160; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
161; OPT: br i1
162; OPT-NOT: ptrtoint
163
164; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
165; GCN: s_and_saveexec_b64
166; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
167; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
168; GCN: {{^}}BB5_2:
169define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
170entry:
171  %alloca = alloca [512 x i32], align 4
172  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
173  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
174  %add.arg = add i32 %arg, 8
175  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
176  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
177  %tmp0 = icmp eq i32 %tid, 0
178  br i1 %tmp0, label %endif, label %if
179
180if:
181  store volatile i32 123, i32* %alloca.gep
182  %tmp1 = load volatile i32, i32* %alloca.gep
183  br label %endif
184
185endif:
186  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
187  store i32 %x, i32 addrspace(1)* %out.gep.0
188  %load = load volatile i32, i32* %alloca.gep
189  store i32 %load, i32 addrspace(1)* %out.gep.1
190  br label %done
191
192done:
193  ret void
194}
195
196; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
197; GCN: s_and_saveexec_b64
198; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
199; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
200; GCN: {{^}}BB6_2:
201define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
202entry:
203  %offset.ext = zext i32 %offset to i64
204  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
205  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
206  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
207  %tmp0 = icmp eq i32 %tid, 0
208  br i1 %tmp0, label %endif, label %if
209
210if:
211  %tmp1 = load i32, i32 addrspace(1)* %in.gep
212  br label %endif
213
214endif:
215  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
216  store i32 %x, i32 addrspace(1)* %out.gep
217  br label %done
218
219done:
220  ret void
221}
222
223; OPT-LABEL: @test_sink_constant_small_offset_i32
224; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
225; OPT: br i1
226
227; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
228; GCN: s_and_saveexec_b64
229; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
230; GCN: s_or_b64 exec, exec
231define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
232entry:
233  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
234  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
235  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
236  %tmp0 = icmp eq i32 %tid, 0
237  br i1 %tmp0, label %endif, label %if
238
239if:
240  %tmp1 = load i32, i32 addrspace(2)* %in.gep
241  br label %endif
242
243endif:
244  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
245  store i32 %x, i32 addrspace(1)* %out.gep
246  br label %done
247
248done:
249  ret void
250}
251
252; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
253; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
254; OPT: br i1
255
256; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
257; GCN: s_and_saveexec_b64
258; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
259; GCN: s_or_b64 exec, exec
260define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
261entry:
262  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
263  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
264  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
265  %tmp0 = icmp eq i32 %tid, 0
266  br i1 %tmp0, label %endif, label %if
267
268if:
269  %tmp1 = load i32, i32 addrspace(2)* %in.gep
270  br label %endif
271
272endif:
273  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
274  store i32 %x, i32 addrspace(1)* %out.gep
275  br label %done
276
277done:
278  ret void
279}
280
281; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
282; OPT-SI:  getelementptr i32, i32 addrspace(2)*
283; OPT-CI-NOT:  getelementptr i32, i32 addrspace(2)*
284; OPT-VI-NOT:  getelementptr i32, i32 addrspace(2)*
285; OPT: br i1
286
287; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
288; GCN: s_and_saveexec_b64
289; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
290
291; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
292; GCN: s_or_b64 exec, exec
293define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
294entry:
295  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
296  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
297  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
298  %tmp0 = icmp eq i32 %tid, 0
299  br i1 %tmp0, label %endif, label %if
300
301if:
302  %tmp1 = load i32, i32 addrspace(2)* %in.gep
303  br label %endif
304
305endif:
306  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
307  store i32 %x, i32 addrspace(1)* %out.gep
308  br label %done
309
310done:
311  ret void
312}
313
314; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
315; OPT-SI: getelementptr i32, i32 addrspace(2)*
316; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
317; OPT: br i1
318
319; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
320; GCN: s_and_saveexec_b64
321; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
322; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
323; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
324; GCN: s_or_b64 exec, exec
325define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
326entry:
327  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
328  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
329  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
330  %tmp0 = icmp eq i32 %tid, 0
331  br i1 %tmp0, label %endif, label %if
332
333if:
334  %tmp1 = load i32, i32 addrspace(2)* %in.gep
335  br label %endif
336
337endif:
338  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
339  store i32 %x, i32 addrspace(1)* %out.gep
340  br label %done
341
342done:
343  ret void
344}
345
346; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
347; OPT: getelementptr i32, i32 addrspace(2)*
348; OPT: br i1
349
350; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
351; GCN: s_and_saveexec_b64
352; GCN: s_add_u32
353; GCN: s_addc_u32
354; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
355; GCN: s_or_b64 exec, exec
356define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
357entry:
358  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
359  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
360  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
361  %tmp0 = icmp eq i32 %tid, 0
362  br i1 %tmp0, label %endif, label %if
363
364if:
365  %tmp1 = load i32, i32 addrspace(2)* %in.gep
366  br label %endif
367
368endif:
369  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
370  store i32 %x, i32 addrspace(1)* %out.gep
371  br label %done
372
373done:
374  ret void
375}
376
377; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
378; GCN: s_and_saveexec_b64
379; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
380; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
381
382; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
383; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
384
385; GCN: s_or_b64 exec, exec
386define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
387entry:
388  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
389  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
390  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
391  %tmp0 = icmp eq i32 %tid, 0
392  br i1 %tmp0, label %endif, label %if
393
394if:
395  %tmp1 = load i32, i32 addrspace(2)* %in.gep
396  br label %endif
397
398endif:
399  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
400  store i32 %x, i32 addrspace(1)* %out.gep
401  br label %done
402
403done:
404  ret void
405}
406
407; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
408; OPT-SI: getelementptr i32, i32 addrspace(2)*
409; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
410; OPT-VI: getelementptr i32, i32 addrspace(2)*
411; OPT: br i1
412
413; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
414; GCN: s_and_saveexec_b64
415; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
416; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
417
418; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
419
420; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
421; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
422
423; GCN: s_or_b64 exec, exec
424define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
425entry:
426  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
427  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
428  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
429  %tmp0 = icmp eq i32 %tid, 0
430  br i1 %tmp0, label %endif, label %if
431
432if:
433  %tmp1 = load i32, i32 addrspace(2)* %in.gep
434  br label %endif
435
436endif:
437  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
438  store i32 %x, i32 addrspace(1)* %out.gep
439  br label %done
440
441done:
442  ret void
443}
444
445%struct.foo = type { [3 x float], [3 x float] }
446
447; OPT-LABEL: @sink_ds_address(
448; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64
449
450; GCN-LABEL: {{^}}sink_ds_address:
451; GCN: s_load_dword [[SREG1:s[0-9]+]],
452; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
453; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
454define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
455entry:
456  %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
457  %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
458  br label %bb32
459
460bb32:
461  %a = load float, float addrspace(3)* %x, align 4
462  %b = load float, float addrspace(3)* %y, align 4
463  %cmp = fcmp one float %a, %b
464  br i1 %cmp, label %bb34, label %bb33
465
466bb33:
467  unreachable
468
469bb34:
470  unreachable
471}
472
473; Address offset is not a multiple of 4. This is a valid mubuf offset,
474; but not smrd.
475
476; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(
477; OPT: br i1 %tmp0,
478; OPT: if:
479; OPT: %sunkaddr = ptrtoint i8 addrspace(2)* %in to i64
480; OPT: %sunkaddr1 = add i64 %sunkaddr, 4095
481define void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
482entry:
483  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
484  %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
485  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
486  %tmp0 = icmp eq i32 %tid, 0
487  br i1 %tmp0, label %endif, label %if
488
489if:
490  %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
491  %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
492  br label %endif
493
494endif:
495  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
496  store i32 %x, i32 addrspace(1)* %out.gep
497  br label %done
498
499done:
500  ret void
501}
502
503declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
504
505attributes #0 = { nounwind readnone }
506attributes #1 = { nounwind }
507