1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI
3; RUN: llc < %s -march=amdgcn -mcpu=tonga  -verify-machineinstrs | FileCheck %s --check-prefix=VI
4; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GFX9
5; RUN: llc < %s -march=r600 -mcpu=redwood  -verify-machineinstrs | FileCheck %s --check-prefix=R600
6; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefix=GFX10
7; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
8
9declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
10declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
11declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
12
13define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) {
14; SI-LABEL: fshl_i32:
15; SI:       ; %bb.0: ; %entry
16; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
17; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
18; SI-NEXT:    s_load_dword s0, s[0:1], 0xd
19; SI-NEXT:    s_mov_b32 s7, 0xf000
20; SI-NEXT:    s_mov_b32 s6, -1
21; SI-NEXT:    s_waitcnt lgkmcnt(0)
22; SI-NEXT:    v_mov_b32_e32 v0, s3
23; SI-NEXT:    v_alignbit_b32 v0, s2, v0, 1
24; SI-NEXT:    s_not_b32 s0, s0
25; SI-NEXT:    s_lshr_b32 s1, s2, 1
26; SI-NEXT:    v_mov_b32_e32 v1, s0
27; SI-NEXT:    v_alignbit_b32 v0, s1, v0, v1
28; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
29; SI-NEXT:    s_endpgm
30;
31; VI-LABEL: fshl_i32:
32; VI:       ; %bb.0: ; %entry
33; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
34; VI-NEXT:    s_load_dword s4, s[0:1], 0x34
35; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
36; VI-NEXT:    s_waitcnt lgkmcnt(0)
37; VI-NEXT:    v_mov_b32_e32 v0, s3
38; VI-NEXT:    s_not_b32 s4, s4
39; VI-NEXT:    s_lshr_b32 s3, s2, 1
40; VI-NEXT:    v_alignbit_b32 v0, s2, v0, 1
41; VI-NEXT:    v_mov_b32_e32 v1, s4
42; VI-NEXT:    v_alignbit_b32 v2, s3, v0, v1
43; VI-NEXT:    v_mov_b32_e32 v0, s0
44; VI-NEXT:    v_mov_b32_e32 v1, s1
45; VI-NEXT:    flat_store_dword v[0:1], v2
46; VI-NEXT:    s_endpgm
47;
48; GFX9-LABEL: fshl_i32:
49; GFX9:       ; %bb.0: ; %entry
50; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
51; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
52; GFX9-NEXT:    s_load_dword s6, s[0:1], 0x34
53; GFX9-NEXT:    v_mov_b32_e32 v0, 0
54; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
55; GFX9-NEXT:    v_mov_b32_e32 v1, s3
56; GFX9-NEXT:    s_lshr_b32 s0, s2, 1
57; GFX9-NEXT:    s_not_b32 s1, s6
58; GFX9-NEXT:    v_alignbit_b32 v1, s2, v1, 1
59; GFX9-NEXT:    v_mov_b32_e32 v2, s1
60; GFX9-NEXT:    v_alignbit_b32 v1, s0, v1, v2
61; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
62; GFX9-NEXT:    s_endpgm
63;
64; R600-LABEL: fshl_i32:
65; R600:       ; %bb.0: ; %entry
66; R600-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
67; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
68; R600-NEXT:    CF_END
69; R600-NEXT:    PAD
70; R600-NEXT:    ALU clause starting at 4:
71; R600-NEXT:     LSHR T0.Z, KC0[2].Z, 1,
72; R600-NEXT:     BIT_ALIGN_INT T0.W, KC0[2].Z, KC0[2].W, 1,
73; R600-NEXT:     NOT_INT * T1.W, KC0[3].X,
74; R600-NEXT:     BIT_ALIGN_INT T0.X, PV.Z, PV.W, PS,
75; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
76; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
77;
78; GFX10-LABEL: fshl_i32:
79; GFX10:       ; %bb.0: ; %entry
80; GFX10-NEXT:    s_clause 0x2
81; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
82; GFX10-NEXT:    s_load_dword s6, s[0:1], 0x34
83; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
84; GFX10-NEXT:    v_mov_b32_e32 v1, 0
85; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
86; GFX10-NEXT:    v_alignbit_b32 v0, s2, s3, 1
87; GFX10-NEXT:    s_lshr_b32 s0, s2, 1
88; GFX10-NEXT:    s_not_b32 s1, s6
89; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
90; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
91; GFX10-NEXT:    s_endpgm
92;
93; GFX11-LABEL: fshl_i32:
94; GFX11:       ; %bb.0: ; %entry
95; GFX11-NEXT:    s_clause 0x2
96; GFX11-NEXT:    s_load_b64 s[2:3], s[0:1], 0x2c
97; GFX11-NEXT:    s_load_b32 s4, s[0:1], 0x34
98; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
99; GFX11-NEXT:    v_mov_b32_e32 v1, 0
100; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
101; GFX11-NEXT:    v_alignbit_b32 v0, s2, s3, 1
102; GFX11-NEXT:    s_lshr_b32 s2, s2, 1
103; GFX11-NEXT:    s_not_b32 s3, s4
104; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
105; GFX11-NEXT:    v_alignbit_b32 v0, s2, v0, s3
106; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
107; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
108; GFX11-NEXT:    s_endpgm
109entry:
110  %0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
111  store i32 %0, i32 addrspace(1)* %in
112  ret void
113}
114
115define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
116; SI-LABEL: fshl_i32_imm:
117; SI:       ; %bb.0: ; %entry
118; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
119; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
120; SI-NEXT:    s_mov_b32 s3, 0xf000
121; SI-NEXT:    s_mov_b32 s2, -1
122; SI-NEXT:    s_waitcnt lgkmcnt(0)
123; SI-NEXT:    v_mov_b32_e32 v0, s5
124; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 25
125; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
126; SI-NEXT:    s_endpgm
127;
128; VI-LABEL: fshl_i32_imm:
129; VI:       ; %bb.0: ; %entry
130; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
131; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
132; VI-NEXT:    s_waitcnt lgkmcnt(0)
133; VI-NEXT:    v_mov_b32_e32 v0, s3
134; VI-NEXT:    v_alignbit_b32 v2, s2, v0, 25
135; VI-NEXT:    v_mov_b32_e32 v0, s0
136; VI-NEXT:    v_mov_b32_e32 v1, s1
137; VI-NEXT:    flat_store_dword v[0:1], v2
138; VI-NEXT:    s_endpgm
139;
140; GFX9-LABEL: fshl_i32_imm:
141; GFX9:       ; %bb.0: ; %entry
142; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
143; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
144; GFX9-NEXT:    v_mov_b32_e32 v0, 0
145; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
146; GFX9-NEXT:    v_mov_b32_e32 v1, s3
147; GFX9-NEXT:    v_alignbit_b32 v1, s2, v1, 25
148; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
149; GFX9-NEXT:    s_endpgm
150;
151; R600-LABEL: fshl_i32_imm:
152; R600:       ; %bb.0: ; %entry
153; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
154; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
155; R600-NEXT:    CF_END
156; R600-NEXT:    PAD
157; R600-NEXT:    ALU clause starting at 4:
158; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
159; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
160; R600-NEXT:     BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, literal.x,
161; R600-NEXT:    25(3.503246e-44), 0(0.000000e+00)
162;
163; GFX10-LABEL: fshl_i32_imm:
164; GFX10:       ; %bb.0: ; %entry
165; GFX10-NEXT:    s_clause 0x1
166; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
167; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
168; GFX10-NEXT:    v_mov_b32_e32 v0, 0
169; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
170; GFX10-NEXT:    v_alignbit_b32 v1, s2, s3, 25
171; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
172; GFX10-NEXT:    s_endpgm
173;
174; GFX11-LABEL: fshl_i32_imm:
175; GFX11:       ; %bb.0: ; %entry
176; GFX11-NEXT:    s_clause 0x1
177; GFX11-NEXT:    s_load_b64 s[2:3], s[0:1], 0x2c
178; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
179; GFX11-NEXT:    v_mov_b32_e32 v0, 0
180; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
181; GFX11-NEXT:    v_alignbit_b32 v1, s2, s3, 25
182; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
183; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
184; GFX11-NEXT:    s_endpgm
185entry:
186  %0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
187  store i32 %0, i32 addrspace(1)* %in
188  ret void
189}
190
191define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
192; SI-LABEL: fshl_v2i32:
193; SI:       ; %bb.0: ; %entry
194; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
195; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
196; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xf
197; SI-NEXT:    s_mov_b32 s11, 0xf000
198; SI-NEXT:    s_mov_b32 s10, -1
199; SI-NEXT:    s_waitcnt lgkmcnt(0)
200; SI-NEXT:    v_mov_b32_e32 v0, s7
201; SI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
202; SI-NEXT:    s_not_b32 s1, s1
203; SI-NEXT:    s_lshr_b32 s2, s5, 1
204; SI-NEXT:    v_mov_b32_e32 v1, s1
205; SI-NEXT:    v_alignbit_b32 v1, s2, v0, v1
206; SI-NEXT:    v_mov_b32_e32 v0, s6
207; SI-NEXT:    s_not_b32 s0, s0
208; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
209; SI-NEXT:    s_lshr_b32 s1, s4, 1
210; SI-NEXT:    v_mov_b32_e32 v2, s0
211; SI-NEXT:    v_alignbit_b32 v0, s1, v0, v2
212; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
213; SI-NEXT:    s_endpgm
214;
215; VI-LABEL: fshl_v2i32:
216; VI:       ; %bb.0: ; %entry
217; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
218; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
219; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
220; VI-NEXT:    s_waitcnt lgkmcnt(0)
221; VI-NEXT:    v_mov_b32_e32 v0, s7
222; VI-NEXT:    s_not_b32 s3, s3
223; VI-NEXT:    s_lshr_b32 s7, s5, 1
224; VI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
225; VI-NEXT:    v_mov_b32_e32 v1, s3
226; VI-NEXT:    v_alignbit_b32 v1, s7, v0, v1
227; VI-NEXT:    v_mov_b32_e32 v0, s6
228; VI-NEXT:    s_not_b32 s2, s2
229; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
230; VI-NEXT:    s_lshr_b32 s3, s4, 1
231; VI-NEXT:    v_mov_b32_e32 v2, s2
232; VI-NEXT:    v_alignbit_b32 v0, s3, v0, v2
233; VI-NEXT:    v_mov_b32_e32 v3, s1
234; VI-NEXT:    v_mov_b32_e32 v2, s0
235; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
236; VI-NEXT:    s_endpgm
237;
238; GFX9-LABEL: fshl_v2i32:
239; GFX9:       ; %bb.0: ; %entry
240; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
241; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
242; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x3c
243; GFX9-NEXT:    v_mov_b32_e32 v2, 0
244; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
245; GFX9-NEXT:    v_mov_b32_e32 v0, s7
246; GFX9-NEXT:    s_lshr_b32 s0, s5, 1
247; GFX9-NEXT:    s_not_b32 s1, s9
248; GFX9-NEXT:    v_alignbit_b32 v0, s5, v0, 1
249; GFX9-NEXT:    v_mov_b32_e32 v1, s1
250; GFX9-NEXT:    v_alignbit_b32 v1, s0, v0, v1
251; GFX9-NEXT:    v_mov_b32_e32 v0, s6
252; GFX9-NEXT:    s_not_b32 s1, s8
253; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 1
254; GFX9-NEXT:    s_lshr_b32 s0, s4, 1
255; GFX9-NEXT:    v_mov_b32_e32 v3, s1
256; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v3
257; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
258; GFX9-NEXT:    s_endpgm
259;
260; R600-LABEL: fshl_v2i32:
261; R600:       ; %bb.0: ; %entry
262; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
263; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
264; R600-NEXT:    CF_END
265; R600-NEXT:    PAD
266; R600-NEXT:    ALU clause starting at 4:
267; R600-NEXT:     LSHR T0.Z, KC0[3].X, 1,
268; R600-NEXT:     BIT_ALIGN_INT * T0.W, KC0[3].X, KC0[3].Z, 1,
269; R600-NEXT:     NOT_INT * T1.W, KC0[4].X,
270; R600-NEXT:     BIT_ALIGN_INT T0.Y, T0.Z, T0.W, PV.W,
271; R600-NEXT:     LSHR T0.Z, KC0[2].W, 1,
272; R600-NEXT:     BIT_ALIGN_INT * T0.W, KC0[2].W, KC0[3].Y, 1,
273; R600-NEXT:     NOT_INT * T1.W, KC0[3].W,
274; R600-NEXT:     BIT_ALIGN_INT T0.X, T0.Z, T0.W, PV.W,
275; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
276; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
277;
278; GFX10-LABEL: fshl_v2i32:
279; GFX10:       ; %bb.0: ; %entry
280; GFX10-NEXT:    s_clause 0x2
281; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
282; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
283; GFX10-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
284; GFX10-NEXT:    v_mov_b32_e32 v2, 0
285; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
286; GFX10-NEXT:    v_alignbit_b32 v0, s5, s7, 1
287; GFX10-NEXT:    v_alignbit_b32 v3, s4, s6, 1
288; GFX10-NEXT:    s_lshr_b32 s0, s5, 1
289; GFX10-NEXT:    s_not_b32 s1, s3
290; GFX10-NEXT:    s_lshr_b32 s3, s4, 1
291; GFX10-NEXT:    s_not_b32 s2, s2
292; GFX10-NEXT:    v_alignbit_b32 v1, s0, v0, s1
293; GFX10-NEXT:    v_alignbit_b32 v0, s3, v3, s2
294; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
295; GFX10-NEXT:    s_endpgm
296;
297; GFX11-LABEL: fshl_v2i32:
298; GFX11:       ; %bb.0: ; %entry
299; GFX11-NEXT:    s_clause 0x2
300; GFX11-NEXT:    s_load_b128 s[4:7], s[0:1], 0x2c
301; GFX11-NEXT:    s_load_b64 s[2:3], s[0:1], 0x3c
302; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
303; GFX11-NEXT:    v_mov_b32_e32 v2, 0
304; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
305; GFX11-NEXT:    v_alignbit_b32 v0, s5, s7, 1
306; GFX11-NEXT:    v_alignbit_b32 v3, s4, s6, 1
307; GFX11-NEXT:    s_lshr_b32 s5, s5, 1
308; GFX11-NEXT:    s_not_b32 s3, s3
309; GFX11-NEXT:    s_lshr_b32 s4, s4, 1
310; GFX11-NEXT:    s_not_b32 s2, s2
311; GFX11-NEXT:    v_alignbit_b32 v1, s5, v0, s3
312; GFX11-NEXT:    v_alignbit_b32 v0, s4, v3, s2
313; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
314; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
315; GFX11-NEXT:    s_endpgm
316entry:
317  %0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
318  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
319  ret void
320}
321
322define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
323; SI-LABEL: fshl_v2i32_imm:
324; SI:       ; %bb.0: ; %entry
325; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
326; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
327; SI-NEXT:    s_mov_b32 s3, 0xf000
328; SI-NEXT:    s_mov_b32 s2, -1
329; SI-NEXT:    s_waitcnt lgkmcnt(0)
330; SI-NEXT:    v_mov_b32_e32 v0, s7
331; SI-NEXT:    v_mov_b32_e32 v2, s6
332; SI-NEXT:    v_alignbit_b32 v1, s5, v0, 23
333; SI-NEXT:    v_alignbit_b32 v0, s4, v2, 25
334; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
335; SI-NEXT:    s_endpgm
336;
337; VI-LABEL: fshl_v2i32_imm:
338; VI:       ; %bb.0: ; %entry
339; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
340; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
341; VI-NEXT:    s_waitcnt lgkmcnt(0)
342; VI-NEXT:    v_mov_b32_e32 v0, s7
343; VI-NEXT:    v_mov_b32_e32 v2, s6
344; VI-NEXT:    v_alignbit_b32 v1, s5, v0, 23
345; VI-NEXT:    v_alignbit_b32 v0, s4, v2, 25
346; VI-NEXT:    v_mov_b32_e32 v3, s1
347; VI-NEXT:    v_mov_b32_e32 v2, s0
348; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
349; VI-NEXT:    s_endpgm
350;
351; GFX9-LABEL: fshl_v2i32_imm:
352; GFX9:       ; %bb.0: ; %entry
353; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
354; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
355; GFX9-NEXT:    v_mov_b32_e32 v2, 0
356; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
357; GFX9-NEXT:    v_mov_b32_e32 v0, s7
358; GFX9-NEXT:    v_mov_b32_e32 v3, s6
359; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 23
360; GFX9-NEXT:    v_alignbit_b32 v0, s4, v3, 25
361; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
362; GFX9-NEXT:    s_endpgm
363;
364; R600-LABEL: fshl_v2i32_imm:
365; R600:       ; %bb.0: ; %entry
366; R600-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
367; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
368; R600-NEXT:    CF_END
369; R600-NEXT:    PAD
370; R600-NEXT:    ALU clause starting at 4:
371; R600-NEXT:     BIT_ALIGN_INT * T0.Y, KC0[3].X, KC0[3].Z, literal.x,
372; R600-NEXT:    23(3.222986e-44), 0(0.000000e+00)
373; R600-NEXT:     BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, literal.x,
374; R600-NEXT:    25(3.503246e-44), 0(0.000000e+00)
375; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
376; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
377;
378; GFX10-LABEL: fshl_v2i32_imm:
379; GFX10:       ; %bb.0: ; %entry
380; GFX10-NEXT:    s_clause 0x1
381; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
382; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
383; GFX10-NEXT:    v_mov_b32_e32 v2, 0
384; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
385; GFX10-NEXT:    v_alignbit_b32 v1, s5, s7, 23
386; GFX10-NEXT:    v_alignbit_b32 v0, s4, s6, 25
387; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
388; GFX10-NEXT:    s_endpgm
389;
390; GFX11-LABEL: fshl_v2i32_imm:
391; GFX11:       ; %bb.0: ; %entry
392; GFX11-NEXT:    s_clause 0x1
393; GFX11-NEXT:    s_load_b128 s[4:7], s[0:1], 0x2c
394; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
395; GFX11-NEXT:    v_mov_b32_e32 v2, 0
396; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
397; GFX11-NEXT:    v_alignbit_b32 v1, s5, s7, 23
398; GFX11-NEXT:    v_alignbit_b32 v0, s4, s6, 25
399; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
400; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
401; GFX11-NEXT:    s_endpgm
402entry:
403  %0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
404  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
405  ret void
406}
407
408define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
409; SI-LABEL: fshl_v4i32:
410; SI:       ; %bb.0: ; %entry
411; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
412; SI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x15
413; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
414; SI-NEXT:    s_mov_b32 s3, 0xf000
415; SI-NEXT:    s_mov_b32 s2, -1
416; SI-NEXT:    s_waitcnt lgkmcnt(0)
417; SI-NEXT:    v_mov_b32_e32 v0, s11
418; SI-NEXT:    s_not_b32 s11, s15
419; SI-NEXT:    v_alignbit_b32 v0, s7, v0, 1
420; SI-NEXT:    s_lshr_b32 s7, s7, 1
421; SI-NEXT:    v_mov_b32_e32 v1, s11
422; SI-NEXT:    v_alignbit_b32 v3, s7, v0, v1
423; SI-NEXT:    v_mov_b32_e32 v0, s10
424; SI-NEXT:    s_not_b32 s7, s14
425; SI-NEXT:    v_alignbit_b32 v0, s6, v0, 1
426; SI-NEXT:    s_lshr_b32 s6, s6, 1
427; SI-NEXT:    v_mov_b32_e32 v1, s7
428; SI-NEXT:    v_alignbit_b32 v2, s6, v0, v1
429; SI-NEXT:    v_mov_b32_e32 v0, s9
430; SI-NEXT:    s_not_b32 s6, s13
431; SI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
432; SI-NEXT:    s_lshr_b32 s5, s5, 1
433; SI-NEXT:    v_mov_b32_e32 v1, s6
434; SI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
435; SI-NEXT:    v_mov_b32_e32 v0, s8
436; SI-NEXT:    s_not_b32 s5, s12
437; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
438; SI-NEXT:    s_lshr_b32 s4, s4, 1
439; SI-NEXT:    v_mov_b32_e32 v4, s5
440; SI-NEXT:    v_alignbit_b32 v0, s4, v0, v4
441; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
442; SI-NEXT:    s_endpgm
443;
444; VI-LABEL: fshl_v4i32:
445; VI:       ; %bb.0: ; %entry
446; VI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
447; VI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
448; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
449; VI-NEXT:    s_waitcnt lgkmcnt(0)
450; VI-NEXT:    v_mov_b32_e32 v0, s11
451; VI-NEXT:    s_not_b32 s3, s15
452; VI-NEXT:    s_lshr_b32 s2, s7, 1
453; VI-NEXT:    v_alignbit_b32 v0, s7, v0, 1
454; VI-NEXT:    v_mov_b32_e32 v1, s3
455; VI-NEXT:    v_alignbit_b32 v3, s2, v0, v1
456; VI-NEXT:    v_mov_b32_e32 v0, s10
457; VI-NEXT:    s_not_b32 s3, s14
458; VI-NEXT:    v_alignbit_b32 v0, s6, v0, 1
459; VI-NEXT:    s_lshr_b32 s2, s6, 1
460; VI-NEXT:    v_mov_b32_e32 v1, s3
461; VI-NEXT:    v_alignbit_b32 v2, s2, v0, v1
462; VI-NEXT:    v_mov_b32_e32 v0, s9
463; VI-NEXT:    s_not_b32 s3, s13
464; VI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
465; VI-NEXT:    s_lshr_b32 s2, s5, 1
466; VI-NEXT:    v_mov_b32_e32 v1, s3
467; VI-NEXT:    v_alignbit_b32 v1, s2, v0, v1
468; VI-NEXT:    v_mov_b32_e32 v0, s8
469; VI-NEXT:    s_not_b32 s3, s12
470; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
471; VI-NEXT:    s_lshr_b32 s2, s4, 1
472; VI-NEXT:    v_mov_b32_e32 v4, s3
473; VI-NEXT:    v_alignbit_b32 v0, s2, v0, v4
474; VI-NEXT:    v_mov_b32_e32 v5, s1
475; VI-NEXT:    v_mov_b32_e32 v4, s0
476; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
477; VI-NEXT:    s_endpgm
478;
479; GFX9-LABEL: fshl_v4i32:
480; GFX9:       ; %bb.0: ; %entry
481; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
482; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
483; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
484; GFX9-NEXT:    v_mov_b32_e32 v4, 0
485; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
486; GFX9-NEXT:    s_not_b32 s1, s15
487; GFX9-NEXT:    v_mov_b32_e32 v0, s11
488; GFX9-NEXT:    s_lshr_b32 s0, s7, 1
489; GFX9-NEXT:    v_alignbit_b32 v0, s7, v0, 1
490; GFX9-NEXT:    v_mov_b32_e32 v1, s1
491; GFX9-NEXT:    v_alignbit_b32 v3, s0, v0, v1
492; GFX9-NEXT:    v_mov_b32_e32 v0, s10
493; GFX9-NEXT:    s_not_b32 s1, s14
494; GFX9-NEXT:    v_alignbit_b32 v0, s6, v0, 1
495; GFX9-NEXT:    s_lshr_b32 s0, s6, 1
496; GFX9-NEXT:    v_mov_b32_e32 v1, s1
497; GFX9-NEXT:    v_alignbit_b32 v2, s0, v0, v1
498; GFX9-NEXT:    v_mov_b32_e32 v0, s9
499; GFX9-NEXT:    s_not_b32 s1, s13
500; GFX9-NEXT:    v_alignbit_b32 v0, s5, v0, 1
501; GFX9-NEXT:    s_lshr_b32 s0, s5, 1
502; GFX9-NEXT:    v_mov_b32_e32 v1, s1
503; GFX9-NEXT:    v_alignbit_b32 v1, s0, v0, v1
504; GFX9-NEXT:    v_mov_b32_e32 v0, s8
505; GFX9-NEXT:    s_not_b32 s1, s12
506; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 1
507; GFX9-NEXT:    s_lshr_b32 s0, s4, 1
508; GFX9-NEXT:    v_mov_b32_e32 v5, s1
509; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v5
510; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
511; GFX9-NEXT:    s_endpgm
512;
513; R600-LABEL: fshl_v4i32:
514; R600:       ; %bb.0: ; %entry
515; R600-NEXT:    ALU 17, @4, KC0[CB0:0-32], KC1[]
516; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
517; R600-NEXT:    CF_END
518; R600-NEXT:    PAD
519; R600-NEXT:    ALU clause starting at 4:
520; R600-NEXT:     LSHR T0.Z, KC0[4].X, 1,
521; R600-NEXT:     BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, 1,
522; R600-NEXT:     NOT_INT * T1.W, KC0[6].X,
523; R600-NEXT:     LSHR T0.Y, KC0[3].W, 1,
524; R600-NEXT:     BIT_ALIGN_INT T1.Z, KC0[3].W, KC0[4].W, 1,
525; R600-NEXT:     BIT_ALIGN_INT * T0.W, T0.Z, T0.W, PV.W,
526; R600-NEXT:     NOT_INT * T1.W, KC0[5].W,
527; R600-NEXT:     LSHR T1.Y, KC0[3].Z, 1,
528; R600-NEXT:     BIT_ALIGN_INT T0.Z, T0.Y, T1.Z, PV.W,
529; R600-NEXT:     BIT_ALIGN_INT * T1.W, KC0[3].Z, KC0[4].Z, 1,
530; R600-NEXT:     NOT_INT * T2.W, KC0[5].Z,
531; R600-NEXT:     BIT_ALIGN_INT T0.Y, T1.Y, T1.W, PV.W,
532; R600-NEXT:     LSHR T1.Z, KC0[3].Y, 1,
533; R600-NEXT:     BIT_ALIGN_INT * T1.W, KC0[3].Y, KC0[4].Y, 1,
534; R600-NEXT:     NOT_INT * T2.W, KC0[5].Y,
535; R600-NEXT:     BIT_ALIGN_INT T0.X, T1.Z, T1.W, PV.W,
536; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
537; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
538;
539; GFX10-LABEL: fshl_v4i32:
540; GFX10:       ; %bb.0: ; %entry
541; GFX10-NEXT:    s_clause 0x1
542; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
543; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
544; GFX10-NEXT:    v_mov_b32_e32 v4, 0
545; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
546; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
547; GFX10-NEXT:    v_alignbit_b32 v0, s7, s11, 1
548; GFX10-NEXT:    v_alignbit_b32 v1, s6, s10, 1
549; GFX10-NEXT:    v_alignbit_b32 v5, s5, s9, 1
550; GFX10-NEXT:    v_alignbit_b32 v6, s4, s8, 1
551; GFX10-NEXT:    s_lshr_b32 s2, s7, 1
552; GFX10-NEXT:    s_not_b32 s3, s15
553; GFX10-NEXT:    s_lshr_b32 s6, s6, 1
554; GFX10-NEXT:    s_not_b32 s7, s14
555; GFX10-NEXT:    s_lshr_b32 s5, s5, 1
556; GFX10-NEXT:    s_not_b32 s9, s13
557; GFX10-NEXT:    s_lshr_b32 s4, s4, 1
558; GFX10-NEXT:    s_not_b32 s8, s12
559; GFX10-NEXT:    v_alignbit_b32 v3, s2, v0, s3
560; GFX10-NEXT:    v_alignbit_b32 v2, s6, v1, s7
561; GFX10-NEXT:    v_alignbit_b32 v1, s5, v5, s9
562; GFX10-NEXT:    v_alignbit_b32 v0, s4, v6, s8
563; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
564; GFX10-NEXT:    s_endpgm
565;
566; GFX11-LABEL: fshl_v4i32:
567; GFX11:       ; %bb.0: ; %entry
568; GFX11-NEXT:    s_clause 0x2
569; GFX11-NEXT:    s_load_b256 s[4:11], s[0:1], 0x34
570; GFX11-NEXT:    s_load_b128 s[12:15], s[0:1], 0x54
571; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
572; GFX11-NEXT:    v_mov_b32_e32 v4, 0
573; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
574; GFX11-NEXT:    v_alignbit_b32 v0, s7, s11, 1
575; GFX11-NEXT:    v_alignbit_b32 v1, s6, s10, 1
576; GFX11-NEXT:    v_alignbit_b32 v5, s5, s9, 1
577; GFX11-NEXT:    v_alignbit_b32 v6, s4, s8, 1
578; GFX11-NEXT:    s_lshr_b32 s2, s7, 1
579; GFX11-NEXT:    s_not_b32 s3, s15
580; GFX11-NEXT:    s_lshr_b32 s6, s6, 1
581; GFX11-NEXT:    s_not_b32 s7, s14
582; GFX11-NEXT:    s_lshr_b32 s5, s5, 1
583; GFX11-NEXT:    s_not_b32 s9, s13
584; GFX11-NEXT:    s_lshr_b32 s4, s4, 1
585; GFX11-NEXT:    s_not_b32 s8, s12
586; GFX11-NEXT:    v_alignbit_b32 v3, s2, v0, s3
587; GFX11-NEXT:    v_alignbit_b32 v2, s6, v1, s7
588; GFX11-NEXT:    v_alignbit_b32 v1, s5, v5, s9
589; GFX11-NEXT:    v_alignbit_b32 v0, s4, v6, s8
590; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
591; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
592; GFX11-NEXT:    s_endpgm
593entry:
594  %0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
595  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
596  ret void
597}
598
599define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
600; SI-LABEL: fshl_v4i32_imm:
601; SI:       ; %bb.0: ; %entry
602; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
603; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
604; SI-NEXT:    s_mov_b32 s3, 0xf000
605; SI-NEXT:    s_mov_b32 s2, -1
606; SI-NEXT:    s_waitcnt lgkmcnt(0)
607; SI-NEXT:    v_mov_b32_e32 v0, s11
608; SI-NEXT:    v_mov_b32_e32 v1, s10
609; SI-NEXT:    v_alignbit_b32 v3, s7, v0, 31
610; SI-NEXT:    v_mov_b32_e32 v0, s9
611; SI-NEXT:    v_alignbit_b32 v2, s6, v1, 23
612; SI-NEXT:    v_alignbit_b32 v1, s5, v0, 25
613; SI-NEXT:    v_mov_b32_e32 v0, s8
614; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 31
615; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
616; SI-NEXT:    s_endpgm
617;
618; VI-LABEL: fshl_v4i32_imm:
619; VI:       ; %bb.0: ; %entry
620; VI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
621; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
622; VI-NEXT:    s_waitcnt lgkmcnt(0)
623; VI-NEXT:    v_mov_b32_e32 v0, s11
624; VI-NEXT:    v_mov_b32_e32 v1, s10
625; VI-NEXT:    v_mov_b32_e32 v4, s9
626; VI-NEXT:    v_alignbit_b32 v3, s7, v0, 31
627; VI-NEXT:    v_alignbit_b32 v2, s6, v1, 23
628; VI-NEXT:    v_alignbit_b32 v1, s5, v4, 25
629; VI-NEXT:    v_mov_b32_e32 v0, s8
630; VI-NEXT:    v_mov_b32_e32 v5, s1
631; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 31
632; VI-NEXT:    v_mov_b32_e32 v4, s0
633; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
634; VI-NEXT:    s_endpgm
635;
636; GFX9-LABEL: fshl_v4i32_imm:
637; GFX9:       ; %bb.0: ; %entry
638; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
639; GFX9-NEXT:    v_mov_b32_e32 v4, 0
640; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
641; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
642; GFX9-NEXT:    v_mov_b32_e32 v0, s11
643; GFX9-NEXT:    v_mov_b32_e32 v1, s10
644; GFX9-NEXT:    v_alignbit_b32 v3, s7, v0, 31
645; GFX9-NEXT:    v_mov_b32_e32 v0, s9
646; GFX9-NEXT:    v_alignbit_b32 v2, s6, v1, 23
647; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 25
648; GFX9-NEXT:    v_mov_b32_e32 v0, s8
649; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 31
650; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
651; GFX9-NEXT:    s_endpgm
652;
653; R600-LABEL: fshl_v4i32_imm:
654; R600:       ; %bb.0: ; %entry
655; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
656; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
657; R600-NEXT:    CF_END
658; R600-NEXT:    PAD
659; R600-NEXT:    ALU clause starting at 4:
660; R600-NEXT:     BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, literal.x,
661; R600-NEXT:    31(4.344025e-44), 0(0.000000e+00)
662; R600-NEXT:     BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, literal.x,
663; R600-NEXT:    23(3.222986e-44), 0(0.000000e+00)
664; R600-NEXT:     BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, literal.x,
665; R600-NEXT:    25(3.503246e-44), 0(0.000000e+00)
666; R600-NEXT:     BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, literal.x,
667; R600-NEXT:    31(4.344025e-44), 0(0.000000e+00)
668; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
669; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
670;
671; GFX10-LABEL: fshl_v4i32_imm:
672; GFX10:       ; %bb.0: ; %entry
673; GFX10-NEXT:    s_clause 0x1
674; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
675; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
676; GFX10-NEXT:    v_mov_b32_e32 v4, 0
677; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
678; GFX10-NEXT:    v_alignbit_b32 v3, s7, s11, 31
679; GFX10-NEXT:    v_alignbit_b32 v2, s6, s10, 23
680; GFX10-NEXT:    v_alignbit_b32 v1, s5, s9, 25
681; GFX10-NEXT:    v_alignbit_b32 v0, s4, s8, 31
682; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
683; GFX10-NEXT:    s_endpgm
684;
685; GFX11-LABEL: fshl_v4i32_imm:
686; GFX11:       ; %bb.0: ; %entry
687; GFX11-NEXT:    s_clause 0x1
688; GFX11-NEXT:    s_load_b256 s[4:11], s[0:1], 0x34
689; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
690; GFX11-NEXT:    v_mov_b32_e32 v4, 0
691; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
692; GFX11-NEXT:    v_alignbit_b32 v3, s7, s11, 31
693; GFX11-NEXT:    v_alignbit_b32 v2, s6, s10, 23
694; GFX11-NEXT:    v_alignbit_b32 v1, s5, s9, 25
695; GFX11-NEXT:    v_alignbit_b32 v0, s4, s8, 31
696; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
697; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
698; GFX11-NEXT:    s_endpgm
699entry:
700  %0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>)
701  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
702  ret void
703}
704