1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s
7
8define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
9; GFX6-LABEL: s_fshr_i7:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
12; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
13; GFX6-NEXT:    s_and_b32 s2, s2, 0x7f
14; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
15; GFX6-NEXT:    s_and_b32 s1, s1, 0x7f
16; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
17; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
18; GFX6-NEXT:    v_mul_lo_u32 v1, -7, v0
19; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
20; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
21; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
22; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
23; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
24; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
25; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
26; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
27; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
28; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
29; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
30; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
31; GFX6-NEXT:    v_and_b32_e32 v0, 0x7f, v0
32; GFX6-NEXT:    v_and_b32_e32 v1, 0x7f, v1
33; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
34; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
35; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
36; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
37; GFX6-NEXT:    ; return to shader part epilog
38;
39; GFX8-LABEL: s_fshr_i7:
40; GFX8:       ; %bb.0:
41; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
42; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
43; GFX8-NEXT:    s_and_b32 s2, s2, 0x7f
44; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
45; GFX8-NEXT:    s_and_b32 s1, s1, 0x7f
46; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
47; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
48; GFX8-NEXT:    v_mul_lo_u32 v1, -7, v0
49; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
50; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
51; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
52; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
53; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
54; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
55; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
56; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
57; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
58; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
59; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
60; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
61; GFX8-NEXT:    v_and_b32_e32 v0, 0x7f, v0
62; GFX8-NEXT:    v_and_b32_e32 v1, 0x7f, v1
63; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
64; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
65; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
66; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
67; GFX8-NEXT:    ; return to shader part epilog
68;
69; GFX9-LABEL: s_fshr_i7:
70; GFX9:       ; %bb.0:
71; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
72; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
73; GFX9-NEXT:    s_and_b32 s2, s2, 0x7f
74; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
75; GFX9-NEXT:    s_and_b32 s1, s1, 0x7f
76; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
77; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
78; GFX9-NEXT:    v_mul_lo_u32 v1, -7, v0
79; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
80; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
81; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
82; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
83; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
84; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
85; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
86; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
87; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
88; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
89; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
90; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
91; GFX9-NEXT:    v_and_b32_e32 v0, 0x7f, v0
92; GFX9-NEXT:    v_and_b32_e32 v1, 0x7f, v1
93; GFX9-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
94; GFX9-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
95; GFX9-NEXT:    v_or_b32_e32 v0, v1, v0
96; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
97; GFX9-NEXT:    ; return to shader part epilog
98;
99; GFX10-LABEL: s_fshr_i7:
100; GFX10:       ; %bb.0:
101; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
102; GFX10-NEXT:    s_and_b32 s2, s2, 0x7f
103; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
104; GFX10-NEXT:    s_and_b32 s1, s1, 0x7f
105; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
106; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
107; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
108; GFX10-NEXT:    v_mul_lo_u32 v1, -7, v0
109; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
110; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
111; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
112; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
113; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
114; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
115; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
116; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
117; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
118; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
119; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
120; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
121; GFX10-NEXT:    v_and_b32_e32 v0, 0x7f, v0
122; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
123; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
124; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
125; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
126; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
127; GFX10-NEXT:    ; return to shader part epilog
128;
129; GFX11-LABEL: s_fshr_i7:
130; GFX11:       ; %bb.0:
131; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
132; GFX11-NEXT:    s_and_b32 s2, s2, 0x7f
133; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
134; GFX11-NEXT:    s_and_b32 s1, s1, 0x7f
135; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
136; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
137; GFX11-NEXT:    s_waitcnt_depctr 0xfff
138; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
139; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
140; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
141; GFX11-NEXT:    v_mul_lo_u32 v1, -7, v0
142; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
143; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
144; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
145; GFX11-NEXT:    v_mul_hi_u32 v0, s2, v0
146; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
147; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 7
148; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
149; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
150; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
151; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
152; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
153; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
154; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
155; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
156; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
157; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
158; GFX11-NEXT:    v_sub_nc_u16 v1, 6, v0
159; GFX11-NEXT:    v_and_b32_e32 v0, 0x7f, v0
160; GFX11-NEXT:    v_and_b32_e32 v1, 0x7f, v1
161; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
162; GFX11-NEXT:    v_lshrrev_b16 v0, v0, s1
163; GFX11-NEXT:    v_lshlrev_b16 v1, v1, s0
164; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
165; GFX11-NEXT:    v_or_b32_e32 v0, v1, v0
166; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
167; GFX11-NEXT:    ; return to shader part epilog
168  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
169  ret i7 %result
170}
171
172define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
173; GFX6-LABEL: v_fshr_i7:
174; GFX6:       ; %bb.0:
175; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
177; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
178; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
179; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
180; GFX6-NEXT:    v_and_b32_e32 v1, 0x7f, v1
181; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
182; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
183; GFX6-NEXT:    v_mul_lo_u32 v4, -7, v3
184; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
185; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
186; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
187; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
188; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
189; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
190; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
191; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
192; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
193; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
194; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
195; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
196; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
197; GFX6-NEXT:    v_and_b32_e32 v3, 0x7f, v3
198; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
199; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
200; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
201; GFX6-NEXT:    s_setpc_b64 s[30:31]
202;
203; GFX8-LABEL: v_fshr_i7:
204; GFX8:       ; %bb.0:
205; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
207; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
208; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
209; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
210; GFX8-NEXT:    v_and_b32_e32 v1, 0x7f, v1
211; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
212; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
213; GFX8-NEXT:    v_mul_lo_u32 v4, -7, v3
214; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
215; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
216; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
217; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
218; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
219; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
220; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
221; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
222; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
223; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
224; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
225; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
226; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
227; GFX8-NEXT:    v_and_b32_e32 v3, 0x7f, v3
228; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
229; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
230; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
231; GFX8-NEXT:    s_setpc_b64 s[30:31]
232;
233; GFX9-LABEL: v_fshr_i7:
234; GFX9:       ; %bb.0:
235; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
237; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
238; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
239; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
240; GFX9-NEXT:    v_and_b32_e32 v1, 0x7f, v1
241; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
242; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
243; GFX9-NEXT:    v_mul_lo_u32 v4, -7, v3
244; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
245; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
246; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
247; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
248; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
249; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
250; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
251; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
252; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
253; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
254; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
255; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
256; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
257; GFX9-NEXT:    v_and_b32_e32 v3, 0x7f, v3
258; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
259; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
260; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
261; GFX9-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX10-LABEL: v_fshr_i7:
264; GFX10:       ; %bb.0:
265; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
267; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
268; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
269; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
270; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
271; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
272; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
273; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
274; GFX10-NEXT:    v_mul_lo_u32 v4, -7, v3
275; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
276; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
277; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
278; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
279; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
280; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
281; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
282; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
283; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
284; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
285; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
286; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
287; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
288; GFX10-NEXT:    v_and_b32_e32 v3, 0x7f, v3
289; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
290; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
291; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
292; GFX10-NEXT:    s_setpc_b64 s[30:31]
293;
294; GFX11-LABEL: v_fshr_i7:
295; GFX11:       ; %bb.0:
296; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
298; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
299; GFX11-NEXT:    v_and_b32_e32 v2, 0x7f, v2
300; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
301; GFX11-NEXT:    v_and_b32_e32 v1, 0x7f, v1
302; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
303; GFX11-NEXT:    v_rcp_iflag_f32_e32 v3, v3
304; GFX11-NEXT:    s_waitcnt_depctr 0xfff
305; GFX11-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
306; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
307; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
308; GFX11-NEXT:    v_mul_lo_u32 v4, -7, v3
309; GFX11-NEXT:    v_mul_hi_u32 v4, v3, v4
310; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
311; GFX11-NEXT:    v_add_nc_u32_e32 v3, v3, v4
312; GFX11-NEXT:    v_mul_hi_u32 v3, v2, v3
313; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
314; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 7
315; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
316; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
317; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
318; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
319; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
320; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
321; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
322; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
323; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
324; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
325; GFX11-NEXT:    v_sub_nc_u16 v3, 6, v2
326; GFX11-NEXT:    v_and_b32_e32 v2, 0x7f, v2
327; GFX11-NEXT:    v_and_b32_e32 v3, 0x7f, v3
328; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
329; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
330; GFX11-NEXT:    v_lshlrev_b16 v0, v3, v0
331; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
332; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
333; GFX11-NEXT:    s_setpc_b64 s[30:31]
334  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
335  ret i7 %result
336}
337
338define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
339; GFX6-LABEL: s_fshr_i8:
340; GFX6:       ; %bb.0:
341; GFX6-NEXT:    s_and_b32 s3, s2, 7
342; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
343; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
344; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
345; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
346; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
347; GFX6-NEXT:    s_or_b32 s0, s0, s1
348; GFX6-NEXT:    ; return to shader part epilog
349;
350; GFX8-LABEL: s_fshr_i8:
351; GFX8:       ; %bb.0:
352; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
353; GFX8-NEXT:    s_and_b32 s3, s2, 7
354; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
355; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
356; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
357; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
358; GFX8-NEXT:    s_lshr_b32 s1, s1, s3
359; GFX8-NEXT:    s_or_b32 s0, s0, s1
360; GFX8-NEXT:    ; return to shader part epilog
361;
362; GFX9-LABEL: s_fshr_i8:
363; GFX9:       ; %bb.0:
364; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
365; GFX9-NEXT:    s_and_b32 s3, s2, 7
366; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
367; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
368; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
369; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
370; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
371; GFX9-NEXT:    s_or_b32 s0, s0, s1
372; GFX9-NEXT:    ; return to shader part epilog
373;
374; GFX10-LABEL: s_fshr_i8:
375; GFX10:       ; %bb.0:
376; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
377; GFX10-NEXT:    s_and_b32 s3, s2, 7
378; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
379; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
380; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
381; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
382; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
383; GFX10-NEXT:    s_or_b32 s0, s0, s1
384; GFX10-NEXT:    ; return to shader part epilog
385;
386; GFX11-LABEL: s_fshr_i8:
387; GFX11:       ; %bb.0:
388; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
389; GFX11-NEXT:    s_and_b32 s3, s2, 7
390; GFX11-NEXT:    s_and_not1_b32 s2, 7, s2
391; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
392; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
393; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
394; GFX11-NEXT:    s_lshr_b32 s1, s1, s3
395; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
396; GFX11-NEXT:    s_or_b32 s0, s0, s1
397; GFX11-NEXT:    ; return to shader part epilog
398  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
399  ret i8 %result
400}
401
402define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) {
403; GFX6-LABEL: v_fshr_i8:
404; GFX6:       ; %bb.0:
405; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX6-NEXT:    v_and_b32_e32 v3, 7, v2
407; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
408; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
409; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
410; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
411; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
412; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
413; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
414; GFX6-NEXT:    s_setpc_b64 s[30:31]
415;
416; GFX8-LABEL: v_fshr_i8:
417; GFX8:       ; %bb.0:
418; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX8-NEXT:    v_and_b32_e32 v3, 7, v2
420; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
421; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
422; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
423; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
424; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
425; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
426; GFX8-NEXT:    s_setpc_b64 s[30:31]
427;
428; GFX9-LABEL: v_fshr_i8:
429; GFX9:       ; %bb.0:
430; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX9-NEXT:    v_and_b32_e32 v3, 7, v2
432; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
433; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
434; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
435; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
436; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
437; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
438; GFX9-NEXT:    s_setpc_b64 s[30:31]
439;
440; GFX10-LABEL: v_fshr_i8:
441; GFX10:       ; %bb.0:
442; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
444; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
445; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
446; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
447; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
448; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
449; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
450; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
451; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
452; GFX10-NEXT:    s_setpc_b64 s[30:31]
453;
454; GFX11-LABEL: v_fshr_i8:
455; GFX11:       ; %bb.0:
456; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
458; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
459; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
460; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
461; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
462; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
463; GFX11-NEXT:    v_and_b32_e32 v3, 7, v3
464; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
465; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
466; GFX11-NEXT:    v_lshlrev_b16 v0, v3, v0
467; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
468; GFX11-NEXT:    s_setpc_b64 s[30:31]
469  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
470  ret i8 %result
471}
472
473define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
474; GFX6-LABEL: s_fshr_i8_4:
475; GFX6:       ; %bb.0:
476; GFX6-NEXT:    s_lshl_b32 s0, s0, 4
477; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x40004
478; GFX6-NEXT:    s_or_b32 s0, s0, s1
479; GFX6-NEXT:    ; return to shader part epilog
480;
481; GFX8-LABEL: s_fshr_i8_4:
482; GFX8:       ; %bb.0:
483; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
484; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
485; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
486; GFX8-NEXT:    s_lshr_b32 s1, s1, 4
487; GFX8-NEXT:    s_or_b32 s0, s0, s1
488; GFX8-NEXT:    ; return to shader part epilog
489;
490; GFX9-LABEL: s_fshr_i8_4:
491; GFX9:       ; %bb.0:
492; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
493; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
494; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
495; GFX9-NEXT:    s_lshr_b32 s1, s1, 4
496; GFX9-NEXT:    s_or_b32 s0, s0, s1
497; GFX9-NEXT:    ; return to shader part epilog
498;
499; GFX10-LABEL: s_fshr_i8_4:
500; GFX10:       ; %bb.0:
501; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
502; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
503; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
504; GFX10-NEXT:    s_lshr_b32 s1, s1, 4
505; GFX10-NEXT:    s_or_b32 s0, s0, s1
506; GFX10-NEXT:    ; return to shader part epilog
507;
508; GFX11-LABEL: s_fshr_i8_4:
509; GFX11:       ; %bb.0:
510; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
511; GFX11-NEXT:    s_lshl_b32 s0, s0, 4
512; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
513; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
514; GFX11-NEXT:    s_lshr_b32 s1, s1, 4
515; GFX11-NEXT:    s_or_b32 s0, s0, s1
516; GFX11-NEXT:    ; return to shader part epilog
517  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
518  ret i8 %result
519}
520
521define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) {
522; GFX6-LABEL: v_fshr_i8_4:
523; GFX6:       ; %bb.0:
524; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
526; GFX6-NEXT:    v_bfe_u32 v1, v1, 4, 4
527; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
528; GFX6-NEXT:    s_setpc_b64 s[30:31]
529;
530; GFX8-LABEL: v_fshr_i8_4:
531; GFX8:       ; %bb.0:
532; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; GFX8-NEXT:    v_mov_b32_e32 v2, 4
534; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
535; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
536; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
537; GFX8-NEXT:    s_setpc_b64 s[30:31]
538;
539; GFX9-LABEL: v_fshr_i8_4:
540; GFX9:       ; %bb.0:
541; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542; GFX9-NEXT:    s_mov_b32 s4, 4
543; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
544; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
545; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
546; GFX9-NEXT:    s_setpc_b64 s[30:31]
547;
548; GFX10-LABEL: v_fshr_i8_4:
549; GFX10:       ; %bb.0:
550; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
552; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
553; GFX10-NEXT:    v_lshlrev_b16 v0, 4, v0
554; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
555; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
556; GFX10-NEXT:    s_setpc_b64 s[30:31]
557;
558; GFX11-LABEL: v_fshr_i8_4:
559; GFX11:       ; %bb.0:
560; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
562; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
563; GFX11-NEXT:    v_lshlrev_b16 v0, 4, v0
564; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
565; GFX11-NEXT:    v_lshrrev_b16 v1, 4, v1
566; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
567; GFX11-NEXT:    s_setpc_b64 s[30:31]
568  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
569  ret i8 %result
570}
571
572define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
573; GFX6-LABEL: s_fshr_i8_5:
574; GFX6:       ; %bb.0:
575; GFX6-NEXT:    s_lshl_b32 s0, s0, 3
576; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x30005
577; GFX6-NEXT:    s_or_b32 s0, s0, s1
578; GFX6-NEXT:    ; return to shader part epilog
579;
580; GFX8-LABEL: s_fshr_i8_5:
581; GFX8:       ; %bb.0:
582; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
583; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
584; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
585; GFX8-NEXT:    s_lshr_b32 s1, s1, 5
586; GFX8-NEXT:    s_or_b32 s0, s0, s1
587; GFX8-NEXT:    ; return to shader part epilog
588;
589; GFX9-LABEL: s_fshr_i8_5:
590; GFX9:       ; %bb.0:
591; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
592; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
593; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
594; GFX9-NEXT:    s_lshr_b32 s1, s1, 5
595; GFX9-NEXT:    s_or_b32 s0, s0, s1
596; GFX9-NEXT:    ; return to shader part epilog
597;
598; GFX10-LABEL: s_fshr_i8_5:
599; GFX10:       ; %bb.0:
600; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
601; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
602; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
603; GFX10-NEXT:    s_lshr_b32 s1, s1, 5
604; GFX10-NEXT:    s_or_b32 s0, s0, s1
605; GFX10-NEXT:    ; return to shader part epilog
606;
607; GFX11-LABEL: s_fshr_i8_5:
608; GFX11:       ; %bb.0:
609; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
610; GFX11-NEXT:    s_lshl_b32 s0, s0, 3
611; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
612; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
613; GFX11-NEXT:    s_lshr_b32 s1, s1, 5
614; GFX11-NEXT:    s_or_b32 s0, s0, s1
615; GFX11-NEXT:    ; return to shader part epilog
616  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
617  ret i8 %result
618}
619
620define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) {
621; GFX6-LABEL: v_fshr_i8_5:
622; GFX6:       ; %bb.0:
623; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
625; GFX6-NEXT:    v_bfe_u32 v1, v1, 5, 3
626; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
627; GFX6-NEXT:    s_setpc_b64 s[30:31]
628;
629; GFX8-LABEL: v_fshr_i8_5:
630; GFX8:       ; %bb.0:
631; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
632; GFX8-NEXT:    v_mov_b32_e32 v2, 5
633; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
634; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
635; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
636; GFX8-NEXT:    s_setpc_b64 s[30:31]
637;
638; GFX9-LABEL: v_fshr_i8_5:
639; GFX9:       ; %bb.0:
640; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641; GFX9-NEXT:    v_mov_b32_e32 v2, 5
642; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
643; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
644; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
645; GFX9-NEXT:    s_setpc_b64 s[30:31]
646;
647; GFX10-LABEL: v_fshr_i8_5:
648; GFX10:       ; %bb.0:
649; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
651; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
652; GFX10-NEXT:    v_lshlrev_b16 v0, 3, v0
653; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
654; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
655; GFX10-NEXT:    s_setpc_b64 s[30:31]
656;
657; GFX11-LABEL: v_fshr_i8_5:
658; GFX11:       ; %bb.0:
659; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
661; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
662; GFX11-NEXT:    v_lshlrev_b16 v0, 3, v0
663; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
664; GFX11-NEXT:    v_lshrrev_b16 v1, 5, v1
665; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
666; GFX11-NEXT:    s_setpc_b64 s[30:31]
667  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
668  ret i8 %result
669}
670
671define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
672; GFX6-LABEL: s_fshr_v2i8:
673; GFX6:       ; %bb.0:
674; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
675; GFX6-NEXT:    s_lshr_b32 s4, s2, 8
676; GFX6-NEXT:    s_and_b32 s5, s2, 7
677; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
678; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
679; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
680; GFX6-NEXT:    s_and_b32 s2, s1, 0xff
681; GFX6-NEXT:    s_lshr_b32 s2, s2, s5
682; GFX6-NEXT:    s_or_b32 s0, s0, s2
683; GFX6-NEXT:    s_and_b32 s2, s4, 7
684; GFX6-NEXT:    s_andn2_b32 s4, 7, s4
685; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
686; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80008
687; GFX6-NEXT:    s_lshl_b32 s3, s3, s4
688; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
689; GFX6-NEXT:    s_or_b32 s1, s3, s1
690; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
691; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
692; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
693; GFX6-NEXT:    s_or_b32 s0, s0, s1
694; GFX6-NEXT:    ; return to shader part epilog
695;
696; GFX8-LABEL: s_fshr_v2i8:
697; GFX8:       ; %bb.0:
698; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
699; GFX8-NEXT:    s_lshr_b32 s4, s1, 8
700; GFX8-NEXT:    s_lshr_b32 s5, s2, 8
701; GFX8-NEXT:    s_and_b32 s6, s2, 7
702; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
703; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
704; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
705; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
706; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
707; GFX8-NEXT:    s_andn2_b32 s2, 7, s5
708; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
709; GFX8-NEXT:    s_lshr_b32 s1, s1, s6
710; GFX8-NEXT:    s_lshl_b32 s2, s3, s2
711; GFX8-NEXT:    s_and_b32 s3, s4, 0xff
712; GFX8-NEXT:    s_or_b32 s0, s0, s1
713; GFX8-NEXT:    s_and_b32 s1, s5, 7
714; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
715; GFX8-NEXT:    s_lshr_b32 s1, s3, s1
716; GFX8-NEXT:    s_or_b32 s1, s2, s1
717; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
718; GFX8-NEXT:    s_bfe_u32 s2, 8, 0x100000
719; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
720; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
721; GFX8-NEXT:    s_or_b32 s0, s0, s1
722; GFX8-NEXT:    ; return to shader part epilog
723;
724; GFX9-LABEL: s_fshr_v2i8:
725; GFX9:       ; %bb.0:
726; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
727; GFX9-NEXT:    s_lshr_b32 s4, s1, 8
728; GFX9-NEXT:    s_lshr_b32 s5, s2, 8
729; GFX9-NEXT:    s_and_b32 s6, s2, 7
730; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
731; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
732; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
733; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
734; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
735; GFX9-NEXT:    s_andn2_b32 s2, 7, s5
736; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
737; GFX9-NEXT:    s_lshr_b32 s1, s1, s6
738; GFX9-NEXT:    s_lshl_b32 s2, s3, s2
739; GFX9-NEXT:    s_and_b32 s3, s4, 0xff
740; GFX9-NEXT:    s_or_b32 s0, s0, s1
741; GFX9-NEXT:    s_and_b32 s1, s5, 7
742; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
743; GFX9-NEXT:    s_lshr_b32 s1, s3, s1
744; GFX9-NEXT:    s_or_b32 s1, s2, s1
745; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
746; GFX9-NEXT:    s_bfe_u32 s2, 8, 0x100000
747; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
748; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
749; GFX9-NEXT:    s_or_b32 s0, s0, s1
750; GFX9-NEXT:    ; return to shader part epilog
751;
752; GFX10-LABEL: s_fshr_v2i8:
753; GFX10:       ; %bb.0:
754; GFX10-NEXT:    s_lshr_b32 s4, s1, 8
755; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
756; GFX10-NEXT:    s_lshr_b32 s5, s2, 8
757; GFX10-NEXT:    s_and_b32 s6, s2, 7
758; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
759; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
760; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
761; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
762; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
763; GFX10-NEXT:    s_and_b32 s2, s5, 7
764; GFX10-NEXT:    s_andn2_b32 s5, 7, s5
765; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
766; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
767; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
768; GFX10-NEXT:    s_lshl_b32 s3, s3, s5
769; GFX10-NEXT:    s_lshr_b32 s2, s4, s2
770; GFX10-NEXT:    s_lshr_b32 s1, s1, s6
771; GFX10-NEXT:    s_or_b32 s2, s3, s2
772; GFX10-NEXT:    s_or_b32 s0, s0, s1
773; GFX10-NEXT:    s_and_b32 s1, s2, 0xff
774; GFX10-NEXT:    s_bfe_u32 s2, 8, 0x100000
775; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
776; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
777; GFX10-NEXT:    s_or_b32 s0, s0, s1
778; GFX10-NEXT:    ; return to shader part epilog
779;
780; GFX11-LABEL: s_fshr_v2i8:
781; GFX11:       ; %bb.0:
782; GFX11-NEXT:    s_lshr_b32 s4, s1, 8
783; GFX11-NEXT:    s_lshr_b32 s3, s0, 8
784; GFX11-NEXT:    s_lshr_b32 s5, s2, 8
785; GFX11-NEXT:    s_and_b32 s6, s2, 7
786; GFX11-NEXT:    s_and_not1_b32 s2, 7, s2
787; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
788; GFX11-NEXT:    s_and_b32 s4, s4, 0xff
789; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
790; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
791; GFX11-NEXT:    s_and_b32 s2, s5, 7
792; GFX11-NEXT:    s_and_not1_b32 s5, 7, s5
793; GFX11-NEXT:    s_lshl_b32 s3, s3, 1
794; GFX11-NEXT:    s_bfe_u32 s4, s4, 0x100000
795; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
796; GFX11-NEXT:    s_lshl_b32 s3, s3, s5
797; GFX11-NEXT:    s_lshr_b32 s2, s4, s2
798; GFX11-NEXT:    s_lshr_b32 s1, s1, s6
799; GFX11-NEXT:    s_or_b32 s2, s3, s2
800; GFX11-NEXT:    s_or_b32 s0, s0, s1
801; GFX11-NEXT:    s_and_b32 s1, s2, 0xff
802; GFX11-NEXT:    s_bfe_u32 s2, 8, 0x100000
803; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
804; GFX11-NEXT:    s_lshl_b32 s1, s1, s2
805; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
806; GFX11-NEXT:    s_or_b32 s0, s0, s1
807; GFX11-NEXT:    ; return to shader part epilog
808  %lhs = bitcast i16 %lhs.arg to <2 x i8>
809  %rhs = bitcast i16 %rhs.arg to <2 x i8>
810  %amt = bitcast i16 %amt.arg to <2 x i8>
811  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
812  %cast.result = bitcast <2 x i8> %result to i16
813  ret i16 %cast.result
814}
815
816define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
817; GFX6-LABEL: v_fshr_v2i8:
818; GFX6:       ; %bb.0:
819; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
820; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 8, v2
821; GFX6-NEXT:    v_and_b32_e32 v5, 7, v2
822; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
823; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
824; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
825; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
826; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
827; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v1
828; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v5, v2
829; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
830; GFX6-NEXT:    v_and_b32_e32 v2, 7, v4
831; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
832; GFX6-NEXT:    v_and_b32_e32 v4, 7, v4
833; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
834; GFX6-NEXT:    v_bfe_u32 v1, v1, 8, 8
835; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
836; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
837; GFX6-NEXT:    v_or_b32_e32 v1, v3, v1
838; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
839; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
840; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
841; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
842; GFX6-NEXT:    s_setpc_b64 s[30:31]
843;
844; GFX8-LABEL: v_fshr_v2i8:
845; GFX8:       ; %bb.0:
846; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
848; GFX8-NEXT:    v_and_b32_e32 v6, 7, v2
849; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
850; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
851; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
852; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
853; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
854; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
855; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
856; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v5
857; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
858; GFX8-NEXT:    v_and_b32_e32 v1, 7, v5
859; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
860; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
861; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v2, v3
862; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
863; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
864; GFX8-NEXT:    v_and_b32_e32 v1, 0xff, v1
865; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
866; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
867; GFX8-NEXT:    s_setpc_b64 s[30:31]
868;
869; GFX9-LABEL: v_fshr_v2i8:
870; GFX9:       ; %bb.0:
871; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
873; GFX9-NEXT:    v_and_b32_e32 v6, 7, v2
874; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
875; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
876; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
877; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
878; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
879; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
880; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
881; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v5
882; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
883; GFX9-NEXT:    v_and_b32_e32 v1, 7, v5
884; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
885; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
886; GFX9-NEXT:    v_lshlrev_b16_e32 v2, v2, v3
887; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
888; GFX9-NEXT:    v_or_b32_e32 v1, v2, v1
889; GFX9-NEXT:    v_and_b32_e32 v1, 0xff, v1
890; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
891; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
892; GFX9-NEXT:    s_setpc_b64 s[30:31]
893;
894; GFX10-LABEL: v_fshr_v2i8:
895; GFX10:       ; %bb.0:
896; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
897; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
898; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
899; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
900; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
901; GFX10-NEXT:    v_and_b32_e32 v7, 7, v2
902; GFX10-NEXT:    v_xor_b32_e32 v2, -1, v2
903; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v3
904; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
905; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
906; GFX10-NEXT:    v_and_b32_e32 v5, 0xff, v5
907; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
908; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
909; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
910; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
911; GFX10-NEXT:    v_lshrrev_b16 v3, v3, v5
912; GFX10-NEXT:    s_movk_i32 s4, 0xff
913; GFX10-NEXT:    v_lshlrev_b16 v4, v6, v4
914; GFX10-NEXT:    v_lshrrev_b16 v1, v7, v1
915; GFX10-NEXT:    v_lshlrev_b16 v0, v2, v0
916; GFX10-NEXT:    v_or_b32_e32 v2, v4, v3
917; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
918; GFX10-NEXT:    v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
919; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
920; GFX10-NEXT:    s_setpc_b64 s[30:31]
921;
922; GFX11-LABEL: v_fshr_v2i8:
923; GFX11:       ; %bb.0:
924; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
926; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
927; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
928; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
929; GFX11-NEXT:    v_and_b32_e32 v7, 7, v2
930; GFX11-NEXT:    v_xor_b32_e32 v2, -1, v2
931; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v3
932; GFX11-NEXT:    v_and_b32_e32 v3, 7, v3
933; GFX11-NEXT:    v_lshlrev_b16 v4, 1, v4
934; GFX11-NEXT:    v_and_b32_e32 v5, 0xff, v5
935; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
936; GFX11-NEXT:    v_and_b32_e32 v6, 7, v6
937; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
938; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
939; GFX11-NEXT:    v_lshrrev_b16 v3, v3, v5
940; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
941; GFX11-NEXT:    v_lshlrev_b16 v4, v6, v4
942; GFX11-NEXT:    v_lshrrev_b16 v1, v7, v1
943; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
944; GFX11-NEXT:    v_lshlrev_b16 v0, v2, v0
945; GFX11-NEXT:    v_or_b32_e32 v2, v4, v3
946; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
947; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
948; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v2
949; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
950; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
951; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
952; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
953; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
954; GFX11-NEXT:    s_setpc_b64 s[30:31]
955  %lhs = bitcast i16 %lhs.arg to <2 x i8>
956  %rhs = bitcast i16 %rhs.arg to <2 x i8>
957  %amt = bitcast i16 %amt.arg to <2 x i8>
958  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
959  %cast.result = bitcast <2 x i8> %result to i16
960  ret i16 %cast.result
961}
962
963define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
964; GFX6-LABEL: s_fshr_v4i8:
965; GFX6:       ; %bb.0:
966; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
967; GFX6-NEXT:    s_lshr_b32 s4, s0, 16
968; GFX6-NEXT:    s_lshr_b32 s5, s0, 24
969; GFX6-NEXT:    s_lshr_b32 s7, s2, 8
970; GFX6-NEXT:    s_lshr_b32 s8, s2, 16
971; GFX6-NEXT:    s_lshr_b32 s9, s2, 24
972; GFX6-NEXT:    s_and_b32 s10, s2, 7
973; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
974; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
975; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
976; GFX6-NEXT:    s_and_b32 s2, s1, 0xff
977; GFX6-NEXT:    s_lshr_b32 s2, s2, s10
978; GFX6-NEXT:    s_or_b32 s0, s0, s2
979; GFX6-NEXT:    s_and_b32 s2, s7, 7
980; GFX6-NEXT:    s_andn2_b32 s7, 7, s7
981; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
982; GFX6-NEXT:    s_lshl_b32 s3, s3, s7
983; GFX6-NEXT:    s_bfe_u32 s7, s1, 0x80008
984; GFX6-NEXT:    s_lshr_b32 s2, s7, s2
985; GFX6-NEXT:    s_lshr_b32 s6, s1, 24
986; GFX6-NEXT:    s_or_b32 s2, s3, s2
987; GFX6-NEXT:    s_and_b32 s3, s8, 7
988; GFX6-NEXT:    s_andn2_b32 s7, 7, s8
989; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
990; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80010
991; GFX6-NEXT:    s_lshl_b32 s4, s4, s7
992; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
993; GFX6-NEXT:    s_or_b32 s1, s4, s1
994; GFX6-NEXT:    s_and_b32 s3, s9, 7
995; GFX6-NEXT:    s_andn2_b32 s4, 7, s9
996; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
997; GFX6-NEXT:    s_and_b32 s2, s2, 0xff
998; GFX6-NEXT:    s_lshl_b32 s4, s5, s4
999; GFX6-NEXT:    s_lshr_b32 s3, s6, s3
1000; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
1001; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
1002; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1003; GFX6-NEXT:    s_or_b32 s3, s4, s3
1004; GFX6-NEXT:    s_or_b32 s0, s0, s2
1005; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1006; GFX6-NEXT:    s_or_b32 s0, s0, s1
1007; GFX6-NEXT:    s_and_b32 s1, s3, 0xff
1008; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
1009; GFX6-NEXT:    s_or_b32 s0, s0, s1
1010; GFX6-NEXT:    ; return to shader part epilog
1011;
1012; GFX8-LABEL: s_fshr_v4i8:
1013; GFX8:       ; %bb.0:
1014; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
1015; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
1016; GFX8-NEXT:    s_lshr_b32 s5, s0, 24
1017; GFX8-NEXT:    s_lshr_b32 s6, s1, 8
1018; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
1019; GFX8-NEXT:    s_lshr_b32 s8, s1, 24
1020; GFX8-NEXT:    s_lshr_b32 s9, s2, 8
1021; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
1022; GFX8-NEXT:    s_lshr_b32 s11, s2, 24
1023; GFX8-NEXT:    s_and_b32 s12, s2, 7
1024; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
1025; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1026; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1027; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
1028; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
1029; GFX8-NEXT:    s_andn2_b32 s2, 7, s9
1030; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
1031; GFX8-NEXT:    s_lshr_b32 s1, s1, s12
1032; GFX8-NEXT:    s_lshl_b32 s2, s3, s2
1033; GFX8-NEXT:    s_and_b32 s3, s6, 0xff
1034; GFX8-NEXT:    s_or_b32 s0, s0, s1
1035; GFX8-NEXT:    s_and_b32 s1, s9, 7
1036; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
1037; GFX8-NEXT:    s_lshr_b32 s1, s3, s1
1038; GFX8-NEXT:    s_andn2_b32 s3, 7, s10
1039; GFX8-NEXT:    s_lshl_b32 s4, s4, 1
1040; GFX8-NEXT:    s_lshl_b32 s3, s4, s3
1041; GFX8-NEXT:    s_and_b32 s4, s7, 0xff
1042; GFX8-NEXT:    s_or_b32 s1, s2, s1
1043; GFX8-NEXT:    s_and_b32 s2, s10, 7
1044; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
1045; GFX8-NEXT:    s_lshr_b32 s2, s4, s2
1046; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1047; GFX8-NEXT:    s_or_b32 s2, s3, s2
1048; GFX8-NEXT:    s_and_b32 s3, s11, 7
1049; GFX8-NEXT:    s_andn2_b32 s4, 7, s11
1050; GFX8-NEXT:    s_lshl_b32 s5, s5, 1
1051; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
1052; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
1053; GFX8-NEXT:    s_lshl_b32 s4, s5, s4
1054; GFX8-NEXT:    s_lshr_b32 s3, s8, s3
1055; GFX8-NEXT:    s_or_b32 s0, s0, s1
1056; GFX8-NEXT:    s_and_b32 s1, s2, 0xff
1057; GFX8-NEXT:    s_or_b32 s3, s4, s3
1058; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
1059; GFX8-NEXT:    s_or_b32 s0, s0, s1
1060; GFX8-NEXT:    s_and_b32 s1, s3, 0xff
1061; GFX8-NEXT:    s_lshl_b32 s1, s1, 24
1062; GFX8-NEXT:    s_or_b32 s0, s0, s1
1063; GFX8-NEXT:    ; return to shader part epilog
1064;
1065; GFX9-LABEL: s_fshr_v4i8:
1066; GFX9:       ; %bb.0:
1067; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
1068; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
1069; GFX9-NEXT:    s_lshr_b32 s5, s0, 24
1070; GFX9-NEXT:    s_lshr_b32 s6, s1, 8
1071; GFX9-NEXT:    s_lshr_b32 s7, s1, 16
1072; GFX9-NEXT:    s_lshr_b32 s8, s1, 24
1073; GFX9-NEXT:    s_lshr_b32 s9, s2, 8
1074; GFX9-NEXT:    s_lshr_b32 s10, s2, 16
1075; GFX9-NEXT:    s_lshr_b32 s11, s2, 24
1076; GFX9-NEXT:    s_and_b32 s12, s2, 7
1077; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
1078; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1079; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
1080; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
1081; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
1082; GFX9-NEXT:    s_andn2_b32 s2, 7, s9
1083; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
1084; GFX9-NEXT:    s_lshr_b32 s1, s1, s12
1085; GFX9-NEXT:    s_lshl_b32 s2, s3, s2
1086; GFX9-NEXT:    s_and_b32 s3, s6, 0xff
1087; GFX9-NEXT:    s_or_b32 s0, s0, s1
1088; GFX9-NEXT:    s_and_b32 s1, s9, 7
1089; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
1090; GFX9-NEXT:    s_lshr_b32 s1, s3, s1
1091; GFX9-NEXT:    s_andn2_b32 s3, 7, s10
1092; GFX9-NEXT:    s_lshl_b32 s4, s4, 1
1093; GFX9-NEXT:    s_lshl_b32 s3, s4, s3
1094; GFX9-NEXT:    s_and_b32 s4, s7, 0xff
1095; GFX9-NEXT:    s_or_b32 s1, s2, s1
1096; GFX9-NEXT:    s_and_b32 s2, s10, 7
1097; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
1098; GFX9-NEXT:    s_lshr_b32 s2, s4, s2
1099; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
1100; GFX9-NEXT:    s_or_b32 s2, s3, s2
1101; GFX9-NEXT:    s_and_b32 s3, s11, 7
1102; GFX9-NEXT:    s_andn2_b32 s4, 7, s11
1103; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
1104; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
1105; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
1106; GFX9-NEXT:    s_lshl_b32 s4, s5, s4
1107; GFX9-NEXT:    s_lshr_b32 s3, s8, s3
1108; GFX9-NEXT:    s_or_b32 s0, s0, s1
1109; GFX9-NEXT:    s_and_b32 s1, s2, 0xff
1110; GFX9-NEXT:    s_or_b32 s3, s4, s3
1111; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
1112; GFX9-NEXT:    s_or_b32 s0, s0, s1
1113; GFX9-NEXT:    s_and_b32 s1, s3, 0xff
1114; GFX9-NEXT:    s_lshl_b32 s1, s1, 24
1115; GFX9-NEXT:    s_or_b32 s0, s0, s1
1116; GFX9-NEXT:    ; return to shader part epilog
1117;
1118; GFX10-LABEL: s_fshr_v4i8:
1119; GFX10:       ; %bb.0:
1120; GFX10-NEXT:    s_lshr_b32 s6, s1, 8
1121; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
1122; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
1123; GFX10-NEXT:    s_lshr_b32 s5, s0, 24
1124; GFX10-NEXT:    s_lshr_b32 s7, s1, 16
1125; GFX10-NEXT:    s_lshr_b32 s8, s1, 24
1126; GFX10-NEXT:    s_lshr_b32 s9, s2, 8
1127; GFX10-NEXT:    s_lshr_b32 s10, s2, 16
1128; GFX10-NEXT:    s_lshr_b32 s11, s2, 24
1129; GFX10-NEXT:    s_and_b32 s12, s2, 7
1130; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
1131; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
1132; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1133; GFX10-NEXT:    s_and_b32 s6, s6, 0xff
1134; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
1135; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
1136; GFX10-NEXT:    s_and_b32 s2, s9, 7
1137; GFX10-NEXT:    s_andn2_b32 s9, 7, s9
1138; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
1139; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
1140; GFX10-NEXT:    s_lshr_b32 s1, s1, s12
1141; GFX10-NEXT:    s_lshl_b32 s3, s3, s9
1142; GFX10-NEXT:    s_lshr_b32 s2, s6, s2
1143; GFX10-NEXT:    s_and_b32 s6, s7, 0xff
1144; GFX10-NEXT:    s_or_b32 s0, s0, s1
1145; GFX10-NEXT:    s_or_b32 s1, s3, s2
1146; GFX10-NEXT:    s_and_b32 s2, s10, 7
1147; GFX10-NEXT:    s_andn2_b32 s3, 7, s10
1148; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
1149; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
1150; GFX10-NEXT:    s_lshl_b32 s3, s4, s3
1151; GFX10-NEXT:    s_lshr_b32 s2, s6, s2
1152; GFX10-NEXT:    s_andn2_b32 s4, 7, s11
1153; GFX10-NEXT:    s_lshl_b32 s5, s5, 1
1154; GFX10-NEXT:    s_and_b32 s6, s11, 7
1155; GFX10-NEXT:    s_lshl_b32 s4, s5, s4
1156; GFX10-NEXT:    s_lshr_b32 s5, s8, s6
1157; GFX10-NEXT:    s_or_b32 s2, s3, s2
1158; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
1159; GFX10-NEXT:    s_or_b32 s3, s4, s5
1160; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
1161; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
1162; GFX10-NEXT:    s_and_b32 s2, s2, 0xff
1163; GFX10-NEXT:    s_or_b32 s0, s0, s1
1164; GFX10-NEXT:    s_lshl_b32 s1, s2, 16
1165; GFX10-NEXT:    s_and_b32 s2, s3, 0xff
1166; GFX10-NEXT:    s_or_b32 s0, s0, s1
1167; GFX10-NEXT:    s_lshl_b32 s1, s2, 24
1168; GFX10-NEXT:    s_or_b32 s0, s0, s1
1169; GFX10-NEXT:    ; return to shader part epilog
1170;
1171; GFX11-LABEL: s_fshr_v4i8:
1172; GFX11:       ; %bb.0:
1173; GFX11-NEXT:    s_lshr_b32 s6, s1, 8
1174; GFX11-NEXT:    s_lshr_b32 s3, s0, 8
1175; GFX11-NEXT:    s_lshr_b32 s4, s0, 16
1176; GFX11-NEXT:    s_lshr_b32 s5, s0, 24
1177; GFX11-NEXT:    s_lshr_b32 s7, s1, 16
1178; GFX11-NEXT:    s_lshr_b32 s8, s1, 24
1179; GFX11-NEXT:    s_lshr_b32 s9, s2, 8
1180; GFX11-NEXT:    s_lshr_b32 s10, s2, 16
1181; GFX11-NEXT:    s_lshr_b32 s11, s2, 24
1182; GFX11-NEXT:    s_and_b32 s12, s2, 7
1183; GFX11-NEXT:    s_and_not1_b32 s2, 7, s2
1184; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
1185; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
1186; GFX11-NEXT:    s_and_b32 s6, s6, 0xff
1187; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
1188; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
1189; GFX11-NEXT:    s_and_b32 s2, s9, 7
1190; GFX11-NEXT:    s_and_not1_b32 s9, 7, s9
1191; GFX11-NEXT:    s_lshl_b32 s3, s3, 1
1192; GFX11-NEXT:    s_bfe_u32 s6, s6, 0x100000
1193; GFX11-NEXT:    s_lshr_b32 s1, s1, s12
1194; GFX11-NEXT:    s_lshl_b32 s3, s3, s9
1195; GFX11-NEXT:    s_lshr_b32 s2, s6, s2
1196; GFX11-NEXT:    s_and_b32 s6, s7, 0xff
1197; GFX11-NEXT:    s_or_b32 s0, s0, s1
1198; GFX11-NEXT:    s_or_b32 s1, s3, s2
1199; GFX11-NEXT:    s_and_b32 s2, s10, 7
1200; GFX11-NEXT:    s_and_not1_b32 s3, 7, s10
1201; GFX11-NEXT:    s_lshl_b32 s4, s4, 1
1202; GFX11-NEXT:    s_bfe_u32 s6, s6, 0x100000
1203; GFX11-NEXT:    s_lshl_b32 s3, s4, s3
1204; GFX11-NEXT:    s_lshr_b32 s2, s6, s2
1205; GFX11-NEXT:    s_and_not1_b32 s4, 7, s11
1206; GFX11-NEXT:    s_lshl_b32 s5, s5, 1
1207; GFX11-NEXT:    s_and_b32 s6, s11, 7
1208; GFX11-NEXT:    s_lshl_b32 s4, s5, s4
1209; GFX11-NEXT:    s_lshr_b32 s5, s8, s6
1210; GFX11-NEXT:    s_or_b32 s2, s3, s2
1211; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
1212; GFX11-NEXT:    s_or_b32 s3, s4, s5
1213; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
1214; GFX11-NEXT:    s_lshl_b32 s1, s1, 8
1215; GFX11-NEXT:    s_and_b32 s2, s2, 0xff
1216; GFX11-NEXT:    s_or_b32 s0, s0, s1
1217; GFX11-NEXT:    s_lshl_b32 s1, s2, 16
1218; GFX11-NEXT:    s_and_b32 s2, s3, 0xff
1219; GFX11-NEXT:    s_or_b32 s0, s0, s1
1220; GFX11-NEXT:    s_lshl_b32 s1, s2, 24
1221; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1222; GFX11-NEXT:    s_or_b32 s0, s0, s1
1223; GFX11-NEXT:    ; return to shader part epilog
1224  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1225  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1226  %amt = bitcast i32 %amt.arg to <4 x i8>
1227  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1228  %cast.result = bitcast <4 x i8> %result to i32
1229  ret i32 %cast.result
1230}
1231
1232define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
1233; GFX6-LABEL: v_fshr_v4i8:
1234; GFX6:       ; %bb.0:
1235; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 8, v2
1237; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
1238; GFX6-NEXT:    v_lshrrev_b32_e32 v9, 24, v2
1239; GFX6-NEXT:    v_and_b32_e32 v10, 7, v2
1240; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
1241; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1242; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1243; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1244; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
1245; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1246; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
1247; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v1
1248; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v10, v2
1249; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1250; GFX6-NEXT:    v_and_b32_e32 v2, 7, v7
1251; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v7
1252; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1253; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
1254; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v7, v3
1255; GFX6-NEXT:    v_bfe_u32 v7, v1, 8, 8
1256; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v7
1257; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v8
1258; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 24, v1
1259; GFX6-NEXT:    v_or_b32_e32 v2, v3, v2
1260; GFX6-NEXT:    v_and_b32_e32 v3, 7, v8
1261; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1262; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
1263; GFX6-NEXT:    v_bfe_u32 v1, v1, 16, 8
1264; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v7, v4
1265; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
1266; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
1267; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v9
1268; GFX6-NEXT:    v_and_b32_e32 v3, 7, v9
1269; GFX6-NEXT:    v_and_b32_e32 v4, 7, v4
1270; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
1271; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v2
1272; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
1273; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v3, v6
1274; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
1275; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
1276; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
1277; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
1278; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1279; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1280; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1281; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v3
1282; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1283; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1284; GFX6-NEXT:    s_setpc_b64 s[30:31]
1285;
1286; GFX8-LABEL: v_fshr_v4i8:
1287; GFX8:       ; %bb.0:
1288; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1289; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1290; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
1291; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v2
1292; GFX8-NEXT:    v_and_b32_e32 v8, 7, v2
1293; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
1294; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
1295; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v0
1296; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v2, v9
1297; GFX8-NEXT:    v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1298; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1299; GFX8-NEXT:    v_or_b32_e32 v2, v2, v8
1300; GFX8-NEXT:    v_and_b32_e32 v8, 7, v5
1301; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
1302; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1303; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1304; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1305; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v5, v3
1306; GFX8-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1307; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
1308; GFX8-NEXT:    v_and_b32_e32 v4, 7, v6
1309; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v6
1310; GFX8-NEXT:    v_mov_b32_e32 v6, 1
1311; GFX8-NEXT:    v_mov_b32_e32 v9, 0xff
1312; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1313; GFX8-NEXT:    v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1314; GFX8-NEXT:    v_lshlrev_b16_e32 v5, v5, v8
1315; GFX8-NEXT:    v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1316; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v8
1317; GFX8-NEXT:    v_or_b32_e32 v4, v5, v4
1318; GFX8-NEXT:    v_and_b32_e32 v5, 7, v7
1319; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v7
1320; GFX8-NEXT:    v_and_b32_e32 v7, 7, v7
1321; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1322; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
1323; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1324; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1325; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1326; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1327; GFX8-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1328; GFX8-NEXT:    v_and_b32_e32 v2, 0xff, v4
1329; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1330; GFX8-NEXT:    v_and_b32_e32 v0, 0xff, v0
1331; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
1332; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1333; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1334; GFX8-NEXT:    s_setpc_b64 s[30:31]
1335;
1336; GFX9-LABEL: v_fshr_v4i8:
1337; GFX9:       ; %bb.0:
1338; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1340; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
1341; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v2
1342; GFX9-NEXT:    v_and_b32_e32 v8, 7, v2
1343; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
1344; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
1345; GFX9-NEXT:    v_lshlrev_b16_e32 v9, 1, v0
1346; GFX9-NEXT:    v_lshlrev_b16_e32 v2, v2, v9
1347; GFX9-NEXT:    v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1348; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1349; GFX9-NEXT:    v_or_b32_e32 v2, v2, v8
1350; GFX9-NEXT:    v_and_b32_e32 v8, 7, v5
1351; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v5
1352; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1353; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1354; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1355; GFX9-NEXT:    v_lshlrev_b16_e32 v3, v5, v3
1356; GFX9-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1357; GFX9-NEXT:    v_or_b32_e32 v3, v3, v4
1358; GFX9-NEXT:    v_and_b32_e32 v4, 7, v6
1359; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v6
1360; GFX9-NEXT:    v_mov_b32_e32 v6, 1
1361; GFX9-NEXT:    v_mov_b32_e32 v9, 0xff
1362; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1363; GFX9-NEXT:    v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1364; GFX9-NEXT:    v_lshlrev_b16_e32 v5, v5, v8
1365; GFX9-NEXT:    v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1366; GFX9-NEXT:    v_lshrrev_b16_e32 v4, v4, v8
1367; GFX9-NEXT:    v_or_b32_e32 v4, v5, v4
1368; GFX9-NEXT:    v_and_b32_e32 v5, 7, v7
1369; GFX9-NEXT:    v_xor_b32_e32 v7, -1, v7
1370; GFX9-NEXT:    v_and_b32_e32 v7, 7, v7
1371; GFX9-NEXT:    v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1372; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
1373; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1374; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
1375; GFX9-NEXT:    v_mov_b32_e32 v1, 8
1376; GFX9-NEXT:    s_movk_i32 s4, 0xff
1377; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1378; GFX9-NEXT:    v_and_or_b32 v1, v2, s4, v1
1379; GFX9-NEXT:    v_and_b32_e32 v2, 0xff, v4
1380; GFX9-NEXT:    v_and_b32_e32 v0, 0xff, v0
1381; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1382; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1383; GFX9-NEXT:    v_or3_b32 v0, v1, v2, v0
1384; GFX9-NEXT:    s_setpc_b64 s[30:31]
1385;
1386; GFX10-LABEL: v_fshr_v4i8:
1387; GFX10:       ; %bb.0:
1388; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1390; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1391; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1392; GFX10-NEXT:    v_xor_b32_e32 v8, -1, v2
1393; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
1394; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 24, v2
1395; GFX10-NEXT:    v_xor_b32_e32 v10, -1, v5
1396; GFX10-NEXT:    v_lshlrev_b16 v3, 1, v3
1397; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1398; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 24, v0
1399; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 8, v1
1400; GFX10-NEXT:    v_and_b32_e32 v10, 7, v10
1401; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
1402; GFX10-NEXT:    v_and_b32_e32 v8, 7, v8
1403; GFX10-NEXT:    v_mov_b32_e32 v13, 0xff
1404; GFX10-NEXT:    v_xor_b32_e32 v14, -1, v12
1405; GFX10-NEXT:    v_lshlrev_b16 v3, v10, v3
1406; GFX10-NEXT:    v_xor_b32_e32 v10, -1, v11
1407; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
1408; GFX10-NEXT:    v_lshlrev_b16 v0, v8, v0
1409; GFX10-NEXT:    v_and_b32_e32 v8, 0xff, v1
1410; GFX10-NEXT:    v_and_b32_e32 v5, 7, v5
1411; GFX10-NEXT:    v_and_b32_e32 v7, 0xff, v7
1412; GFX10-NEXT:    v_and_b32_e32 v11, 7, v11
1413; GFX10-NEXT:    v_and_b32_e32 v10, 7, v10
1414; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
1415; GFX10-NEXT:    v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1416; GFX10-NEXT:    v_and_b32_e32 v13, 7, v14
1417; GFX10-NEXT:    v_lshlrev_b16 v6, 1, v6
1418; GFX10-NEXT:    v_and_b32_e32 v12, 7, v12
1419; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
1420; GFX10-NEXT:    v_lshrrev_b16 v5, v5, v7
1421; GFX10-NEXT:    v_lshlrev_b16 v4, v10, v4
1422; GFX10-NEXT:    v_lshrrev_b16 v1, v11, v1
1423; GFX10-NEXT:    v_lshlrev_b16 v6, v13, v6
1424; GFX10-NEXT:    v_lshrrev_b16 v7, v12, v9
1425; GFX10-NEXT:    v_lshrrev_b16 v2, v2, v8
1426; GFX10-NEXT:    v_or_b32_e32 v3, v3, v5
1427; GFX10-NEXT:    v_mov_b32_e32 v5, 8
1428; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
1429; GFX10-NEXT:    v_or_b32_e32 v4, v6, v7
1430; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
1431; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1432; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
1433; GFX10-NEXT:    v_and_b32_e32 v3, 0xff, v4
1434; GFX10-NEXT:    v_and_or_b32 v0, v0, 0xff, v2
1435; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1436; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1437; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
1438; GFX10-NEXT:    s_setpc_b64 s[30:31]
1439;
1440; GFX11-LABEL: v_fshr_v4i8:
1441; GFX11:       ; %bb.0:
1442; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1444; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 8, v1
1445; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 8, v2
1446; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1447; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
1448; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 24, v2
1449; GFX11-NEXT:    v_and_b32_e32 v6, 0xff, v6
1450; GFX11-NEXT:    v_xor_b32_e32 v12, -1, v7
1451; GFX11-NEXT:    v_and_b32_e32 v7, 7, v7
1452; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1453; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1454; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
1455; GFX11-NEXT:    v_and_b32_e32 v12, 7, v12
1456; GFX11-NEXT:    v_lshlrev_b16 v3, 1, v3
1457; GFX11-NEXT:    v_xor_b32_e32 v14, -1, v11
1458; GFX11-NEXT:    v_lshrrev_b16 v6, v7, v6
1459; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v13
1460; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
1461; GFX11-NEXT:    v_xor_b32_e32 v10, -1, v2
1462; GFX11-NEXT:    v_lshlrev_b16 v3, v12, v3
1463; GFX11-NEXT:    v_and_b32_e32 v11, 7, v11
1464; GFX11-NEXT:    v_and_b32_e32 v12, 7, v14
1465; GFX11-NEXT:    v_lshlrev_b16 v4, 1, v4
1466; GFX11-NEXT:    v_and_b32_e32 v8, 0xff, v8
1467; GFX11-NEXT:    v_and_b32_e32 v7, 7, v7
1468; GFX11-NEXT:    v_lshlrev_b16 v5, 1, v5
1469; GFX11-NEXT:    v_and_b32_e32 v13, 7, v13
1470; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
1471; GFX11-NEXT:    v_and_b32_e32 v10, 7, v10
1472; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
1473; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
1474; GFX11-NEXT:    v_or_b32_e32 v3, v3, v6
1475; GFX11-NEXT:    v_lshlrev_b16 v4, v12, v4
1476; GFX11-NEXT:    v_lshrrev_b16 v6, v11, v8
1477; GFX11-NEXT:    v_lshlrev_b16 v5, v7, v5
1478; GFX11-NEXT:    v_lshrrev_b16 v7, v13, v9
1479; GFX11-NEXT:    v_lshlrev_b16 v0, v10, v0
1480; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
1481; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v3
1482; GFX11-NEXT:    v_or_b32_e32 v3, v4, v6
1483; GFX11-NEXT:    v_or_b32_e32 v4, v5, v7
1484; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1485; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1486; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v2
1487; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1488; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v3
1489; GFX11-NEXT:    v_and_b32_e32 v3, 0xff, v4
1490; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1491; GFX11-NEXT:    v_and_or_b32 v0, v0, 0xff, v1
1492; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
1493; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1494; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1495; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
1496; GFX11-NEXT:    s_setpc_b64 s[30:31]
1497  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1498  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1499  %amt = bitcast i32 %amt.arg to <4 x i8>
1500  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1501  %cast.result = bitcast <4 x i8> %result to i32
1502  ret i32 %cast.result
1503}
1504
1505define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1506; GFX6-LABEL: s_fshr_i24:
1507; GFX6:       ; %bb.0:
1508; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1509; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1510; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1511; GFX6-NEXT:    s_and_b32 s2, s2, 0xffffff
1512; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
1513; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1514; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1515; GFX6-NEXT:    s_and_b32 s1, s1, 0xffffff
1516; GFX6-NEXT:    v_mul_lo_u32 v1, v1, v0
1517; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
1518; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1519; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
1520; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1521; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
1522; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
1523; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1524; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1525; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
1526; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1527; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1528; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
1529; GFX6-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1530; GFX6-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1531; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
1532; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
1533; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
1534; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1535; GFX6-NEXT:    ; return to shader part epilog
1536;
1537; GFX8-LABEL: s_fshr_i24:
1538; GFX8:       ; %bb.0:
1539; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1540; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1541; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1542; GFX8-NEXT:    s_and_b32 s2, s2, 0xffffff
1543; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1544; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1545; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1546; GFX8-NEXT:    s_and_b32 s1, s1, 0xffffff
1547; GFX8-NEXT:    v_mul_lo_u32 v1, v1, v0
1548; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
1549; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
1550; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
1551; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
1552; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
1553; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
1554; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1555; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1556; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
1557; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1558; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1559; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
1560; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1561; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1562; GFX8-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
1563; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1564; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1565; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1566; GFX8-NEXT:    ; return to shader part epilog
1567;
1568; GFX9-LABEL: s_fshr_i24:
1569; GFX9:       ; %bb.0:
1570; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1571; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1572; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1573; GFX9-NEXT:    s_and_b32 s2, s2, 0xffffff
1574; GFX9-NEXT:    s_and_b32 s1, s1, 0xffffff
1575; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1576; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
1577; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1578; GFX9-NEXT:    v_mul_lo_u32 v1, v1, v0
1579; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
1580; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
1581; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
1582; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
1583; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
1584; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
1585; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1586; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1587; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
1588; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1589; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1590; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
1591; GFX9-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1592; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1593; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1594; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1595; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1596; GFX9-NEXT:    ; return to shader part epilog
1597;
1598; GFX10-LABEL: s_fshr_i24:
1599; GFX10:       ; %bb.0:
1600; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1601; GFX10-NEXT:    s_and_b32 s2, s2, 0xffffff
1602; GFX10-NEXT:    s_and_b32 s1, s1, 0xffffff
1603; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1604; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1605; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1606; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
1607; GFX10-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1608; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
1609; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1610; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
1611; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
1612; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1613; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1614; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1615; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1616; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1617; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1618; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1619; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1620; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1621; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1622; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1623; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1624; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1625; GFX10-NEXT:    ; return to shader part epilog
1626;
1627; GFX11-LABEL: s_fshr_i24:
1628; GFX11:       ; %bb.0:
1629; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1630; GFX11-NEXT:    s_and_b32 s2, s2, 0xffffff
1631; GFX11-NEXT:    s_and_b32 s1, s1, 0xffffff
1632; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
1633; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1634; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1635; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1636; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1637; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
1638; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1639; GFX11-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1640; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
1641; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1642; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1643; GFX11-NEXT:    v_mul_hi_u32 v0, s2, v0
1644; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1645; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
1646; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1647; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1648; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1649; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1650; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1651; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1652; GFX11-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1653; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1654; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1655; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1656; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1657; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1658; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1659; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1660; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1661; GFX11-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1662; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1663; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1664; GFX11-NEXT:    ; return to shader part epilog
1665  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1666  ret i24 %result
1667}
1668
1669define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1670; GFX6-LABEL: v_fshr_i24:
1671; GFX6:       ; %bb.0:
1672; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1674; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1675; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1676; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1677; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1678; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1679; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
1680; GFX6-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1681; GFX6-NEXT:    v_mul_lo_u32 v4, v4, v3
1682; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
1683; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1684; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
1685; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
1686; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
1687; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
1688; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1689; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1690; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
1691; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1692; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1693; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
1694; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1695; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1696; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1697; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1698; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1699; GFX6-NEXT:    s_setpc_b64 s[30:31]
1700;
1701; GFX8-LABEL: v_fshr_i24:
1702; GFX8:       ; %bb.0:
1703; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1704; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1705; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1706; GFX8-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1707; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1708; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1709; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1710; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
1711; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1712; GFX8-NEXT:    v_mul_lo_u32 v4, v4, v3
1713; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
1714; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
1715; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
1716; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
1717; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
1718; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
1719; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1720; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1721; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
1722; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1723; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1724; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
1725; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1726; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1727; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1728; GFX8-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1729; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1730; GFX8-NEXT:    s_setpc_b64 s[30:31]
1731;
1732; GFX9-LABEL: v_fshr_i24:
1733; GFX9:       ; %bb.0:
1734; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1736; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1737; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1738; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1739; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1740; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1741; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
1742; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1743; GFX9-NEXT:    v_mul_lo_u32 v4, v4, v3
1744; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
1745; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
1746; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
1747; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
1748; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
1749; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
1750; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1751; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1752; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
1753; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1754; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1755; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
1756; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1757; GFX9-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1758; GFX9-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1759; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1760; GFX9-NEXT:    s_setpc_b64 s[30:31]
1761;
1762; GFX10-LABEL: v_fshr_i24:
1763; GFX10:       ; %bb.0:
1764; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1765; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1766; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1767; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1768; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1769; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1770; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1771; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1772; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
1773; GFX10-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1774; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
1775; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1776; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
1777; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
1778; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1779; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1780; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1781; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1782; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1783; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1784; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1785; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1786; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1787; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1788; GFX10-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1789; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1790; GFX10-NEXT:    s_setpc_b64 s[30:31]
1791;
1792; GFX11-LABEL: v_fshr_i24:
1793; GFX11:       ; %bb.0:
1794; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1796; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1797; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1798; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1799; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1800; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1801; GFX11-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1802; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1803; GFX11-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1804; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
1805; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1806; GFX11-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1807; GFX11-NEXT:    v_mul_hi_u32 v4, v3, v4
1808; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1809; GFX11-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1810; GFX11-NEXT:    v_mul_hi_u32 v3, v2, v3
1811; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1812; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 24
1813; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1814; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1815; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1816; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1817; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1818; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1819; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1820; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1821; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1822; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1823; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1824; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1825; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1826; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1827; GFX11-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1828; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1829; GFX11-NEXT:    s_setpc_b64 s[30:31]
1830  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1831  ret i24 %result
1832}
1833
1834define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1835; GFX6-LABEL: s_fshr_v2i24:
1836; GFX6:       ; %bb.0:
1837; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1838; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1839; GFX6-NEXT:    s_lshr_b32 s6, s0, 16
1840; GFX6-NEXT:    s_lshr_b32 s7, s0, 24
1841; GFX6-NEXT:    s_lshr_b32 s8, s1, 8
1842; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1843; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1844; GFX6-NEXT:    s_and_b32 s9, s0, 0xff
1845; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x80008
1846; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1847; GFX6-NEXT:    s_lshl_b32 s0, s0, 8
1848; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
1849; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1850; GFX6-NEXT:    s_or_b32 s0, s9, s0
1851; GFX6-NEXT:    s_or_b32 s1, s7, s1
1852; GFX6-NEXT:    s_and_b32 s7, s8, 0xff
1853; GFX6-NEXT:    s_lshr_b32 s8, s2, 16
1854; GFX6-NEXT:    s_lshr_b32 s9, s2, 24
1855; GFX6-NEXT:    s_and_b32 s11, s2, 0xff
1856; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x80008
1857; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v0
1858; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
1859; GFX6-NEXT:    s_and_b32 s8, s8, 0xff
1860; GFX6-NEXT:    s_or_b32 s2, s11, s2
1861; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1862; GFX6-NEXT:    s_lshr_b32 s10, s3, 8
1863; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
1864; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1865; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
1866; GFX6-NEXT:    s_or_b32 s2, s2, s8
1867; GFX6-NEXT:    s_lshl_b32 s3, s3, 8
1868; GFX6-NEXT:    s_and_b32 s8, s10, 0xff
1869; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
1870; GFX6-NEXT:    s_or_b32 s3, s9, s3
1871; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1872; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x100000
1873; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1874; GFX6-NEXT:    s_or_b32 s3, s3, s8
1875; GFX6-NEXT:    s_lshr_b32 s8, s4, 16
1876; GFX6-NEXT:    s_lshr_b32 s9, s4, 24
1877; GFX6-NEXT:    s_and_b32 s11, s4, 0xff
1878; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x80008
1879; GFX6-NEXT:    s_lshl_b32 s4, s4, 8
1880; GFX6-NEXT:    s_and_b32 s8, s8, 0xff
1881; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1882; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1883; GFX6-NEXT:    s_or_b32 s4, s11, s4
1884; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1885; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1886; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
1887; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1888; GFX6-NEXT:    s_or_b32 s4, s4, s8
1889; GFX6-NEXT:    v_mul_hi_u32 v0, s4, v0
1890; GFX6-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1891; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v2
1892; GFX6-NEXT:    s_lshr_b32 s10, s5, 8
1893; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1894; GFX6-NEXT:    s_and_b32 s5, s5, 0xff
1895; GFX6-NEXT:    v_mul_lo_u32 v1, v1, v2
1896; GFX6-NEXT:    s_lshl_b32 s5, s5, 8
1897; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
1898; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v0
1899; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1900; GFX6-NEXT:    v_mul_hi_u32 v1, v2, v1
1901; GFX6-NEXT:    s_and_b32 s8, s10, 0xff
1902; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1903; GFX6-NEXT:    s_or_b32 s5, s9, s5
1904; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1905; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v0
1906; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
1907; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1908; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1909; GFX6-NEXT:    s_or_b32 s5, s5, s8
1910; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1911; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
1912; GFX6-NEXT:    v_mul_hi_u32 v1, s5, v1
1913; GFX6-NEXT:    s_and_b32 s6, s6, 0xff
1914; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
1915; GFX6-NEXT:    s_bfe_u32 s6, s6, 0x100000
1916; GFX6-NEXT:    v_mul_lo_u32 v1, v1, 24
1917; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v0
1918; GFX6-NEXT:    s_lshl_b32 s4, s6, 17
1919; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
1920; GFX6-NEXT:    s_or_b32 s0, s4, s0
1921; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
1922; GFX6-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1923; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
1924; GFX6-NEXT:    v_lshr_b32_e32 v0, s2, v0
1925; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s5, v1
1926; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1927; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 24, v1
1928; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1929; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1930; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 24, v1
1931; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1932; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
1933; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
1934; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1935; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 23, v1
1936; GFX6-NEXT:    s_lshl_b32 s0, s7, 17
1937; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
1938; GFX6-NEXT:    s_or_b32 s0, s0, s1
1939; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1940; GFX6-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1941; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
1942; GFX6-NEXT:    v_lshr_b32_e32 v1, s3, v1
1943; GFX6-NEXT:    v_bfe_u32 v3, v0, 8, 8
1944; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
1945; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v0
1946; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1947; GFX6-NEXT:    v_bfe_u32 v0, v0, 16, 8
1948; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
1949; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1950; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1951; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v1
1952; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1953; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1954; GFX6-NEXT:    v_bfe_u32 v2, v1, 8, 8
1955; GFX6-NEXT:    v_bfe_u32 v1, v1, 16, 8
1956; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1957; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
1958; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1959; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
1960; GFX6-NEXT:    ; return to shader part epilog
1961;
1962; GFX8-LABEL: s_fshr_v2i24:
1963; GFX8:       ; %bb.0:
1964; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1965; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1966; GFX8-NEXT:    s_lshr_b32 s9, s1, 8
1967; GFX8-NEXT:    s_bfe_u32 s10, 8, 0x100000
1968; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1969; GFX8-NEXT:    s_lshr_b32 s6, s0, 8
1970; GFX8-NEXT:    s_lshr_b32 s8, s0, 24
1971; GFX8-NEXT:    s_lshl_b32 s1, s1, s10
1972; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1973; GFX8-NEXT:    s_and_b32 s6, s6, 0xff
1974; GFX8-NEXT:    s_or_b32 s1, s8, s1
1975; GFX8-NEXT:    s_lshr_b32 s8, s2, 8
1976; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1977; GFX8-NEXT:    s_lshr_b32 s7, s0, 16
1978; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
1979; GFX8-NEXT:    s_lshl_b32 s6, s6, s10
1980; GFX8-NEXT:    s_and_b32 s8, s8, 0xff
1981; GFX8-NEXT:    s_or_b32 s0, s0, s6
1982; GFX8-NEXT:    s_and_b32 s6, s7, 0xff
1983; GFX8-NEXT:    s_and_b32 s7, s9, 0xff
1984; GFX8-NEXT:    s_lshr_b32 s9, s2, 16
1985; GFX8-NEXT:    s_lshr_b32 s11, s2, 24
1986; GFX8-NEXT:    s_and_b32 s2, s2, 0xff
1987; GFX8-NEXT:    s_lshl_b32 s8, s8, s10
1988; GFX8-NEXT:    s_or_b32 s2, s2, s8
1989; GFX8-NEXT:    s_and_b32 s8, s9, 0xff
1990; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1991; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1992; GFX8-NEXT:    v_mul_lo_u32 v2, v1, v0
1993; GFX8-NEXT:    s_lshr_b32 s12, s3, 8
1994; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
1995; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1996; GFX8-NEXT:    s_and_b32 s3, s3, 0xff
1997; GFX8-NEXT:    s_or_b32 s2, s2, s8
1998; GFX8-NEXT:    s_lshl_b32 s3, s3, s10
1999; GFX8-NEXT:    s_and_b32 s8, s12, 0xff
2000; GFX8-NEXT:    s_or_b32 s3, s11, s3
2001; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
2002; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
2003; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
2004; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
2005; GFX8-NEXT:    s_or_b32 s3, s3, s8
2006; GFX8-NEXT:    s_lshr_b32 s8, s4, 8
2007; GFX8-NEXT:    s_and_b32 s8, s8, 0xff
2008; GFX8-NEXT:    s_lshr_b32 s9, s4, 16
2009; GFX8-NEXT:    s_lshr_b32 s11, s4, 24
2010; GFX8-NEXT:    s_and_b32 s4, s4, 0xff
2011; GFX8-NEXT:    s_lshl_b32 s8, s8, s10
2012; GFX8-NEXT:    s_or_b32 s4, s4, s8
2013; GFX8-NEXT:    s_and_b32 s8, s9, 0xff
2014; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
2015; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
2016; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
2017; GFX8-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2018; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
2019; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
2020; GFX8-NEXT:    s_or_b32 s4, s4, s8
2021; GFX8-NEXT:    v_mul_hi_u32 v0, s4, v0
2022; GFX8-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
2023; GFX8-NEXT:    v_cvt_u32_f32_e32 v2, v2
2024; GFX8-NEXT:    s_lshr_b32 s12, s5, 8
2025; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
2026; GFX8-NEXT:    s_and_b32 s5, s5, 0xff
2027; GFX8-NEXT:    v_mul_lo_u32 v1, v1, v2
2028; GFX8-NEXT:    s_lshl_b32 s5, s5, s10
2029; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s4, v0
2030; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v0
2031; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2032; GFX8-NEXT:    v_mul_hi_u32 v1, v2, v1
2033; GFX8-NEXT:    s_and_b32 s8, s12, 0xff
2034; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2035; GFX8-NEXT:    s_or_b32 s5, s11, s5
2036; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
2037; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v0
2038; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
2039; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
2040; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2041; GFX8-NEXT:    s_or_b32 s5, s5, s8
2042; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2043; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
2044; GFX8-NEXT:    v_mul_hi_u32 v1, s5, v1
2045; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
2046; GFX8-NEXT:    s_bfe_u32 s6, s6, 0x100000
2047; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v0
2048; GFX8-NEXT:    v_mul_lo_u32 v1, v1, 24
2049; GFX8-NEXT:    s_lshl_b32 s4, s6, 17
2050; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
2051; GFX8-NEXT:    s_or_b32 s0, s4, s0
2052; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
2053; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2054; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
2055; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
2056; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s5, v1
2057; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
2058; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
2059; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2060; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2061; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
2062; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2063; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2064; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
2065; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2066; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 23, v1
2067; GFX8-NEXT:    s_lshl_b32 s0, s7, 17
2068; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
2069; GFX8-NEXT:    s_or_b32 s0, s0, s1
2070; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2071; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2072; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
2073; GFX8-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
2074; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
2075; GFX8-NEXT:    v_mov_b32_e32 v2, 8
2076; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2077; GFX8-NEXT:    v_mov_b32_e32 v4, 16
2078; GFX8-NEXT:    v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2079; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2080; GFX8-NEXT:    v_or_b32_e32 v0, v3, v0
2081; GFX8-NEXT:    v_and_b32_e32 v3, 0xff, v1
2082; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
2083; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2084; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
2085; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
2086; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2087; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
2088; GFX8-NEXT:    ; return to shader part epilog
2089;
2090; GFX9-LABEL: s_fshr_v2i24:
2091; GFX9:       ; %bb.0:
2092; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2093; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2094; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
2095; GFX9-NEXT:    s_lshr_b32 s11, s1, 8
2096; GFX9-NEXT:    s_bfe_u32 s12, 8, 0x100000
2097; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2098; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
2099; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
2100; GFX9-NEXT:    s_lshr_b32 s7, s0, 8
2101; GFX9-NEXT:    s_lshr_b32 s10, s0, 24
2102; GFX9-NEXT:    v_mul_lo_u32 v2, v1, v0
2103; GFX9-NEXT:    s_lshl_b32 s1, s1, s12
2104; GFX9-NEXT:    s_and_b32 s7, s7, 0xff
2105; GFX9-NEXT:    s_or_b32 s1, s10, s1
2106; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
2107; GFX9-NEXT:    s_lshr_b32 s10, s2, 8
2108; GFX9-NEXT:    s_lshr_b32 s9, s0, 16
2109; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
2110; GFX9-NEXT:    s_lshl_b32 s7, s7, s12
2111; GFX9-NEXT:    s_and_b32 s10, s10, 0xff
2112; GFX9-NEXT:    s_or_b32 s0, s0, s7
2113; GFX9-NEXT:    s_and_b32 s7, s9, 0xff
2114; GFX9-NEXT:    s_and_b32 s9, s11, 0xff
2115; GFX9-NEXT:    s_lshr_b32 s11, s2, 16
2116; GFX9-NEXT:    s_lshr_b32 s13, s2, 24
2117; GFX9-NEXT:    s_and_b32 s2, s2, 0xff
2118; GFX9-NEXT:    s_lshl_b32 s10, s10, s12
2119; GFX9-NEXT:    s_or_b32 s2, s2, s10
2120; GFX9-NEXT:    s_and_b32 s10, s11, 0xff
2121; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
2122; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
2123; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
2124; GFX9-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2125; GFX9-NEXT:    s_lshr_b32 s14, s3, 8
2126; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
2127; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
2128; GFX9-NEXT:    s_and_b32 s3, s3, 0xff
2129; GFX9-NEXT:    s_or_b32 s2, s2, s10
2130; GFX9-NEXT:    s_lshl_b32 s3, s3, s12
2131; GFX9-NEXT:    s_and_b32 s10, s14, 0xff
2132; GFX9-NEXT:    s_or_b32 s3, s13, s3
2133; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
2134; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
2135; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
2136; GFX9-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
2137; GFX9-NEXT:    s_or_b32 s3, s3, s10
2138; GFX9-NEXT:    s_lshr_b32 s10, s4, 8
2139; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
2140; GFX9-NEXT:    s_and_b32 s10, s10, 0xff
2141; GFX9-NEXT:    s_lshr_b32 s11, s4, 16
2142; GFX9-NEXT:    s_lshr_b32 s13, s4, 24
2143; GFX9-NEXT:    s_and_b32 s4, s4, 0xff
2144; GFX9-NEXT:    s_lshl_b32 s10, s10, s12
2145; GFX9-NEXT:    s_or_b32 s4, s4, s10
2146; GFX9-NEXT:    s_and_b32 s10, s11, 0xff
2147; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
2148; GFX9-NEXT:    v_mul_lo_u32 v1, v1, v2
2149; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
2150; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
2151; GFX9-NEXT:    s_or_b32 s4, s4, s10
2152; GFX9-NEXT:    v_mul_hi_u32 v0, s4, v0
2153; GFX9-NEXT:    s_lshr_b32 s14, s5, 8
2154; GFX9-NEXT:    s_and_b32 s5, s5, 0xff
2155; GFX9-NEXT:    v_mul_hi_u32 v1, v2, v1
2156; GFX9-NEXT:    s_lshl_b32 s5, s5, s12
2157; GFX9-NEXT:    s_and_b32 s10, s14, 0xff
2158; GFX9-NEXT:    s_or_b32 s5, s13, s5
2159; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
2160; GFX9-NEXT:    s_bfe_u32 s5, s5, 0x100000
2161; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
2162; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
2163; GFX9-NEXT:    s_or_b32 s5, s5, s10
2164; GFX9-NEXT:    v_add_u32_e32 v1, v2, v1
2165; GFX9-NEXT:    v_mul_hi_u32 v1, s5, v1
2166; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
2167; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v0
2168; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2169; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2170; GFX9-NEXT:    v_mul_lo_u32 v1, v1, 24
2171; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v0
2172; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2173; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
2174; GFX9-NEXT:    s_bfe_u32 s7, s7, 0x100000
2175; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2176; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v0
2177; GFX9-NEXT:    s_lshl_b32 s4, s7, 17
2178; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
2179; GFX9-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2180; GFX9-NEXT:    s_or_b32 s0, s4, s0
2181; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
2182; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
2183; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
2184; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v2, v0
2185; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v1
2186; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2187; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2188; GFX9-NEXT:    v_subrev_u32_e32 v2, 24, v1
2189; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2190; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2191; GFX9-NEXT:    s_bfe_u32 s9, s9, 0x100000
2192; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2193; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v1
2194; GFX9-NEXT:    s_lshl_b32 s0, s9, 17
2195; GFX9-NEXT:    s_lshl_b32 s1, s1, 1
2196; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2197; GFX9-NEXT:    s_or_b32 s0, s0, s1
2198; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2199; GFX9-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
2200; GFX9-NEXT:    s_mov_b32 s6, 8
2201; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
2202; GFX9-NEXT:    s_mov_b32 s8, 16
2203; GFX9-NEXT:    s_movk_i32 s0, 0xff
2204; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2205; GFX9-NEXT:    v_and_b32_e32 v3, 0xff, v1
2206; GFX9-NEXT:    v_and_or_b32 v2, v0, s0, v2
2207; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2208; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
2209; GFX9-NEXT:    v_or3_b32 v0, v2, v0, v3
2210; GFX9-NEXT:    v_bfe_u32 v2, v1, 8, 8
2211; GFX9-NEXT:    v_bfe_u32 v1, v1, 16, 8
2212; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 8, v2
2213; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2214; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
2215; GFX9-NEXT:    ; return to shader part epilog
2216;
2217; GFX10-LABEL: s_fshr_v2i24:
2218; GFX10:       ; %bb.0:
2219; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2220; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v1, 24
2221; GFX10-NEXT:    s_lshr_b32 s9, s1, 8
2222; GFX10-NEXT:    s_bfe_u32 s10, 8, 0x100000
2223; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
2224; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2225; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
2226; GFX10-NEXT:    s_lshr_b32 s6, s0, 8
2227; GFX10-NEXT:    s_lshr_b32 s8, s0, 24
2228; GFX10-NEXT:    s_lshl_b32 s1, s1, s10
2229; GFX10-NEXT:    s_and_b32 s6, s6, 0xff
2230; GFX10-NEXT:    s_or_b32 s1, s8, s1
2231; GFX10-NEXT:    s_lshr_b32 s8, s4, 8
2232; GFX10-NEXT:    s_lshr_b32 s7, s0, 16
2233; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
2234; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2235; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
2236; GFX10-NEXT:    s_lshl_b32 s6, s6, s10
2237; GFX10-NEXT:    s_and_b32 s8, s8, 0xff
2238; GFX10-NEXT:    s_or_b32 s0, s0, s6
2239; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
2240; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
2241; GFX10-NEXT:    s_and_b32 s6, s7, 0xff
2242; GFX10-NEXT:    s_and_b32 s7, s9, 0xff
2243; GFX10-NEXT:    s_lshr_b32 s9, s4, 16
2244; GFX10-NEXT:    v_mul_lo_u32 v2, 0xffffffe8, v0
2245; GFX10-NEXT:    v_mul_lo_u32 v3, 0xffffffe8, v1
2246; GFX10-NEXT:    s_lshr_b32 s11, s4, 24
2247; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
2248; GFX10-NEXT:    s_lshl_b32 s8, s8, s10
2249; GFX10-NEXT:    s_lshr_b32 s12, s5, 8
2250; GFX10-NEXT:    s_or_b32 s4, s4, s8
2251; GFX10-NEXT:    s_and_b32 s8, s9, 0xff
2252; GFX10-NEXT:    v_mul_hi_u32 v2, v0, v2
2253; GFX10-NEXT:    v_mul_hi_u32 v3, v1, v3
2254; GFX10-NEXT:    s_bfe_u32 s8, s8, 0x100000
2255; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
2256; GFX10-NEXT:    s_and_b32 s5, s5, 0xff
2257; GFX10-NEXT:    s_lshl_b32 s8, s8, 16
2258; GFX10-NEXT:    s_lshl_b32 s5, s5, s10
2259; GFX10-NEXT:    s_or_b32 s4, s4, s8
2260; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v2
2261; GFX10-NEXT:    s_and_b32 s8, s12, 0xff
2262; GFX10-NEXT:    s_or_b32 s5, s11, s5
2263; GFX10-NEXT:    s_bfe_u32 s8, s8, 0x100000
2264; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v3
2265; GFX10-NEXT:    v_mul_hi_u32 v0, s4, v0
2266; GFX10-NEXT:    s_bfe_u32 s5, s5, 0x100000
2267; GFX10-NEXT:    s_lshl_b32 s8, s8, 16
2268; GFX10-NEXT:    s_lshr_b32 s9, s2, 8
2269; GFX10-NEXT:    s_or_b32 s5, s5, s8
2270; GFX10-NEXT:    s_lshr_b32 s8, s2, 16
2271; GFX10-NEXT:    v_mul_hi_u32 v1, s5, v1
2272; GFX10-NEXT:    s_and_b32 s9, s9, 0xff
2273; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
2274; GFX10-NEXT:    s_lshr_b32 s11, s2, 24
2275; GFX10-NEXT:    s_lshr_b32 s12, s3, 8
2276; GFX10-NEXT:    s_and_b32 s2, s2, 0xff
2277; GFX10-NEXT:    s_lshl_b32 s9, s9, s10
2278; GFX10-NEXT:    s_and_b32 s8, s8, 0xff
2279; GFX10-NEXT:    v_mul_lo_u32 v1, v1, 24
2280; GFX10-NEXT:    s_and_b32 s3, s3, 0xff
2281; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s4, v0
2282; GFX10-NEXT:    s_or_b32 s2, s2, s9
2283; GFX10-NEXT:    s_bfe_u32 s4, s8, 0x100000
2284; GFX10-NEXT:    s_lshl_b32 s3, s3, s10
2285; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
2286; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
2287; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s5, v1
2288; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2289; GFX10-NEXT:    s_and_b32 s5, s12, 0xff
2290; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
2291; GFX10-NEXT:    s_or_b32 s3, s11, s3
2292; GFX10-NEXT:    s_bfe_u32 s5, s5, 0x100000
2293; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2294; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
2295; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2296; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
2297; GFX10-NEXT:    s_lshl_b32 s5, s5, 16
2298; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v0
2299; GFX10-NEXT:    s_or_b32 s2, s2, s4
2300; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2301; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2302; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x100000
2303; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
2304; GFX10-NEXT:    s_or_b32 s3, s3, s5
2305; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
2306; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2307; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2308; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2309; GFX10-NEXT:    s_bfe_u32 s7, s7, 0x100000
2310; GFX10-NEXT:    s_lshl_b32 s4, s6, 17
2311; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
2312; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2313; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2314; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
2315; GFX10-NEXT:    s_lshl_b32 s1, s1, 1
2316; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2317; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 23, v1
2318; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2319; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
2320; GFX10-NEXT:    s_or_b32 s0, s4, s0
2321; GFX10-NEXT:    s_lshl_b32 s2, s7, 17
2322; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2323; GFX10-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
2324; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v3, v0
2325; GFX10-NEXT:    s_or_b32 s0, s2, s1
2326; GFX10-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
2327; GFX10-NEXT:    s_mov_b32 s0, 8
2328; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2329; GFX10-NEXT:    s_mov_b32 s0, 16
2330; GFX10-NEXT:    v_and_b32_e32 v3, 0xff, v1
2331; GFX10-NEXT:    v_bfe_u32 v4, v1, 8, 8
2332; GFX10-NEXT:    v_bfe_u32 v1, v1, 16, 8
2333; GFX10-NEXT:    v_and_or_b32 v2, v0, 0xff, v2
2334; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2335; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
2336; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 8, v4
2337; GFX10-NEXT:    v_or3_b32 v0, v2, v0, v3
2338; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2339; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2340; GFX10-NEXT:    ; return to shader part epilog
2341;
2342; GFX11-LABEL: s_fshr_v2i24:
2343; GFX11:       ; %bb.0:
2344; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2345; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v1, 24
2346; GFX11-NEXT:    s_lshr_b32 s6, s0, 8
2347; GFX11-NEXT:    s_bfe_u32 s9, 8, 0x100000
2348; GFX11-NEXT:    s_and_b32 s6, s6, 0xff
2349; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2350; GFX11-NEXT:    v_rcp_iflag_f32_e32 v1, v1
2351; GFX11-NEXT:    s_lshr_b32 s7, s0, 16
2352; GFX11-NEXT:    s_lshr_b32 s8, s0, 24
2353; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
2354; GFX11-NEXT:    s_lshl_b32 s6, s6, s9
2355; GFX11-NEXT:    s_lshr_b32 s10, s1, 8
2356; GFX11-NEXT:    s_or_b32 s0, s0, s6
2357; GFX11-NEXT:    s_and_b32 s6, s7, 0xff
2358; GFX11-NEXT:    s_and_b32 s7, s10, 0xff
2359; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2360; GFX11-NEXT:    v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1
2361; GFX11-NEXT:    s_lshr_b32 s10, s4, 8
2362; GFX11-NEXT:    s_lshr_b32 s11, s4, 16
2363; GFX11-NEXT:    s_and_b32 s10, s10, 0xff
2364; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2365; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
2366; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v1
2367; GFX11-NEXT:    s_and_b32 s12, s4, 0xff
2368; GFX11-NEXT:    s_lshl_b32 s10, s10, s9
2369; GFX11-NEXT:    s_and_b32 s11, s11, 0xff
2370; GFX11-NEXT:    v_mul_lo_u32 v2, 0xffffffe8, v0
2371; GFX11-NEXT:    s_or_b32 s10, s12, s10
2372; GFX11-NEXT:    v_mul_lo_u32 v3, 0xffffffe8, v1
2373; GFX11-NEXT:    s_bfe_u32 s11, s11, 0x100000
2374; GFX11-NEXT:    s_bfe_u32 s10, s10, 0x100000
2375; GFX11-NEXT:    s_lshl_b32 s11, s11, 16
2376; GFX11-NEXT:    s_lshr_b32 s12, s5, 8
2377; GFX11-NEXT:    s_or_b32 s10, s10, s11
2378; GFX11-NEXT:    v_mul_hi_u32 v2, v0, v2
2379; GFX11-NEXT:    s_and_b32 s5, s5, 0xff
2380; GFX11-NEXT:    s_lshr_b32 s4, s4, 24
2381; GFX11-NEXT:    s_lshl_b32 s5, s5, s9
2382; GFX11-NEXT:    s_and_b32 s11, s12, 0xff
2383; GFX11-NEXT:    s_or_b32 s4, s4, s5
2384; GFX11-NEXT:    s_bfe_u32 s5, s11, 0x100000
2385; GFX11-NEXT:    s_bfe_u32 s4, s4, 0x100000
2386; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v2
2387; GFX11-NEXT:    v_mul_hi_u32 v2, v1, v3
2388; GFX11-NEXT:    s_lshl_b32 s5, s5, 16
2389; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
2390; GFX11-NEXT:    s_or_b32 s4, s4, s5
2391; GFX11-NEXT:    v_mul_hi_u32 v0, s10, v0
2392; GFX11-NEXT:    s_lshl_b32 s1, s1, s9
2393; GFX11-NEXT:    s_lshr_b32 s11, s2, 16
2394; GFX11-NEXT:    s_or_b32 s1, s8, s1
2395; GFX11-NEXT:    v_add_nc_u32_e32 v1, v1, v2
2396; GFX11-NEXT:    s_lshr_b32 s8, s2, 8
2397; GFX11-NEXT:    s_lshr_b32 s5, s2, 24
2398; GFX11-NEXT:    s_and_b32 s8, s8, 0xff
2399; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
2400; GFX11-NEXT:    v_mul_hi_u32 v1, s4, v1
2401; GFX11-NEXT:    s_and_b32 s2, s2, 0xff
2402; GFX11-NEXT:    s_lshl_b32 s8, s8, s9
2403; GFX11-NEXT:    s_bfe_u32 s0, s0, 0x100000
2404; GFX11-NEXT:    s_or_b32 s2, s2, s8
2405; GFX11-NEXT:    s_and_b32 s8, s11, 0xff
2406; GFX11-NEXT:    s_bfe_u32 s2, s2, 0x100000
2407; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s10, v0
2408; GFX11-NEXT:    v_mul_lo_u32 v1, v1, 24
2409; GFX11-NEXT:    s_bfe_u32 s8, s8, 0x100000
2410; GFX11-NEXT:    s_lshr_b32 s10, s3, 8
2411; GFX11-NEXT:    s_and_b32 s3, s3, 0xff
2412; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
2413; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2414; GFX11-NEXT:    s_lshl_b32 s8, s8, 16
2415; GFX11-NEXT:    s_lshl_b32 s3, s3, s9
2416; GFX11-NEXT:    v_sub_nc_u32_e32 v1, s4, v1
2417; GFX11-NEXT:    s_and_b32 s4, s10, 0xff
2418; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2419; GFX11-NEXT:    s_or_b32 s2, s2, s8
2420; GFX11-NEXT:    s_bfe_u32 s6, s6, 0x100000
2421; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v1
2422; GFX11-NEXT:    s_or_b32 s3, s5, s3
2423; GFX11-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
2424; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2425; GFX11-NEXT:    s_bfe_u32 s4, s4, 0x100000
2426; GFX11-NEXT:    s_bfe_u32 s3, s3, 0x100000
2427; GFX11-NEXT:    s_lshl_b32 s4, s4, 16
2428; GFX11-NEXT:    s_lshl_b32 s5, s6, 17
2429; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2430; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2431; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
2432; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
2433; GFX11-NEXT:    s_or_b32 s0, s5, s0
2434; GFX11-NEXT:    s_bfe_u32 s7, s7, 0x100000
2435; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2436; GFX11-NEXT:    s_lshl_b32 s1, s1, 1
2437; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2438; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 24, v1
2439; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2440; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2441; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
2442; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v1
2443; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2444; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 23, v0
2445; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2446; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2447; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
2448; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
2449; GFX11-NEXT:    s_or_b32 s2, s3, s4
2450; GFX11-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
2451; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
2452; GFX11-NEXT:    v_lshl_or_b32 v0, s0, v2, v0
2453; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
2454; GFX11-NEXT:    s_lshl_b32 s0, s7, 17
2455; GFX11-NEXT:    s_or_b32 s0, s0, s1
2456; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2457; GFX11-NEXT:    v_bfe_u32 v3, v0, 8, 8
2458; GFX11-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
2459; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2460; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 8, v3
2461; GFX11-NEXT:    v_bfe_u32 v3, v0, 16, 8
2462; GFX11-NEXT:    v_and_b32_e32 v4, 0xff, v1
2463; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2464; GFX11-NEXT:    v_and_or_b32 v0, v0, 0xff, v2
2465; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
2466; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2467; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
2468; GFX11-NEXT:    v_bfe_u32 v4, v1, 8, 8
2469; GFX11-NEXT:    v_bfe_u32 v1, v1, 16, 8
2470; GFX11-NEXT:    v_or3_b32 v0, v0, v2, v3
2471; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2472; GFX11-NEXT:    v_lshl_or_b32 v1, v1, 8, v4
2473; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2474; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2475; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
2476; GFX11-NEXT:    ; return to shader part epilog
2477  %lhs = bitcast i48 %lhs.arg to <2 x i24>
2478  %rhs = bitcast i48 %rhs.arg to <2 x i24>
2479  %amt = bitcast i48 %amt.arg to <2 x i24>
2480  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2481  %cast.result = bitcast <2 x i24> %result to i48
2482  ret i48 %cast.result
2483}
2484
2485define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
2486; GFX6-LABEL: v_fshr_v2i24:
2487; GFX6:       ; %bb.0:
2488; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2489; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2490; GFX6-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2491; GFX6-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
2492; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2493; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
2494; GFX6-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2495; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v6
2496; GFX6-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2497; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2498; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2499; GFX6-NEXT:    v_mul_lo_u32 v8, v7, v6
2500; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2501; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2502; GFX6-NEXT:    v_mul_hi_u32 v8, v6, v8
2503; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2504; GFX6-NEXT:    v_mul_hi_u32 v6, v4, v6
2505; GFX6-NEXT:    v_rcp_iflag_f32_e32 v8, v9
2506; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2507; GFX6-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
2508; GFX6-NEXT:    v_cvt_u32_f32_e32 v8, v8
2509; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
2510; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 24, v4
2511; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2512; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2513; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 24, v4
2514; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2515; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2516; GFX6-NEXT:    v_mul_lo_u32 v6, v7, v8
2517; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 23, v4
2518; GFX6-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
2519; GFX6-NEXT:    v_mul_hi_u32 v6, v8, v6
2520; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2521; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
2522; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2523; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2524; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
2525; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
2526; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2527; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
2528; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
2529; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2530; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2531; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
2532; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2533; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2534; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
2535; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2536; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2537; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2538; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2539; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
2540; GFX6-NEXT:    s_setpc_b64 s[30:31]
2541;
2542; GFX8-LABEL: v_fshr_v2i24:
2543; GFX8:       ; %bb.0:
2544; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2545; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2546; GFX8-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2547; GFX8-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
2548; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2549; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
2550; GFX8-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2551; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
2552; GFX8-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2553; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2554; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2555; GFX8-NEXT:    v_mul_lo_u32 v8, v7, v6
2556; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2557; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2558; GFX8-NEXT:    v_mul_hi_u32 v8, v6, v8
2559; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
2560; GFX8-NEXT:    v_mul_hi_u32 v6, v4, v6
2561; GFX8-NEXT:    v_rcp_iflag_f32_e32 v8, v9
2562; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2563; GFX8-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
2564; GFX8-NEXT:    v_cvt_u32_f32_e32 v8, v8
2565; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v6
2566; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 24, v4
2567; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2568; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2569; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 24, v4
2570; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2571; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2572; GFX8-NEXT:    v_mul_lo_u32 v6, v7, v8
2573; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, 23, v4
2574; GFX8-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
2575; GFX8-NEXT:    v_mul_hi_u32 v6, v8, v6
2576; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2577; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
2578; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2579; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v8, v6
2580; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
2581; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2582; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2583; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
2584; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
2585; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2586; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2587; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
2588; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2589; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2590; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
2591; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2592; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2593; GFX8-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2594; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2595; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
2596; GFX8-NEXT:    s_setpc_b64 s[30:31]
2597;
2598; GFX9-LABEL: v_fshr_v2i24:
2599; GFX9:       ; %bb.0:
2600; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2601; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2602; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2603; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
2604; GFX9-NEXT:    v_rcp_iflag_f32_e32 v9, v9
2605; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
2606; GFX9-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2607; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v6
2608; GFX9-NEXT:    v_mul_f32_e32 v9, 0x4f7ffffe, v9
2609; GFX9-NEXT:    v_cvt_u32_f32_e32 v9, v9
2610; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2611; GFX9-NEXT:    v_mul_lo_u32 v8, v7, v6
2612; GFX9-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2613; GFX9-NEXT:    v_mul_lo_u32 v7, v7, v9
2614; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2615; GFX9-NEXT:    v_mul_hi_u32 v8, v6, v8
2616; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2617; GFX9-NEXT:    v_mul_hi_u32 v7, v9, v7
2618; GFX9-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2619; GFX9-NEXT:    v_add_u32_e32 v6, v6, v8
2620; GFX9-NEXT:    v_mul_hi_u32 v6, v4, v6
2621; GFX9-NEXT:    v_add_u32_e32 v7, v9, v7
2622; GFX9-NEXT:    v_mul_hi_u32 v7, v5, v7
2623; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2624; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
2625; GFX9-NEXT:    v_mul_lo_u32 v7, v7, 24
2626; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v6
2627; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
2628; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2629; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2630; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
2631; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2632; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2633; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
2634; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2635; GFX9-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2636; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2637; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2638; GFX9-NEXT:    v_sub_u32_e32 v2, v5, v7
2639; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
2640; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2641; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2642; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
2643; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2644; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2645; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
2646; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2647; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2648; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2649; GFX9-NEXT:    v_lshl_or_b32 v1, v1, v4, v2
2650; GFX9-NEXT:    s_setpc_b64 s[30:31]
2651;
2652; GFX10-LABEL: v_fshr_v2i24:
2653; GFX10:       ; %bb.0:
2654; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2655; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2656; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2657; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v7, 24
2658; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2659; GFX10-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2660; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2661; GFX10-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2662; GFX10-NEXT:    v_rcp_iflag_f32_e32 v7, v7
2663; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2664; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2665; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2666; GFX10-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2667; GFX10-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
2668; GFX10-NEXT:    v_cvt_u32_f32_e32 v6, v6
2669; GFX10-NEXT:    v_cvt_u32_f32_e32 v7, v7
2670; GFX10-NEXT:    v_mul_lo_u32 v8, 0xffffffe8, v6
2671; GFX10-NEXT:    v_mul_lo_u32 v9, 0xffffffe8, v7
2672; GFX10-NEXT:    v_mul_hi_u32 v8, v6, v8
2673; GFX10-NEXT:    v_mul_hi_u32 v9, v7, v9
2674; GFX10-NEXT:    v_add_nc_u32_e32 v6, v6, v8
2675; GFX10-NEXT:    v_add_nc_u32_e32 v7, v7, v9
2676; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v6
2677; GFX10-NEXT:    v_mul_hi_u32 v7, v5, v7
2678; GFX10-NEXT:    v_mul_lo_u32 v6, v6, 24
2679; GFX10-NEXT:    v_mul_lo_u32 v7, v7, 24
2680; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v6
2681; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v7
2682; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2683; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2684; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2685; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2686; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2687; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2688; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2689; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2690; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2691; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2692; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2693; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2694; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2695; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2696; GFX10-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2697; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2698; GFX10-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2699; GFX10-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2700; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v7
2701; GFX10-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
2702; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2703; GFX10-NEXT:    v_lshl_or_b32 v1, v1, v4, v3
2704; GFX10-NEXT:    s_setpc_b64 s[30:31]
2705;
2706; GFX11-LABEL: v_fshr_v2i24:
2707; GFX11:       ; %bb.0:
2708; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2710; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2711; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v7, 24
2712; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2713; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2714; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2715; GFX11-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2716; GFX11-NEXT:    v_rcp_iflag_f32_e32 v7, v7
2717; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2718; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2719; GFX11-NEXT:    v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1
2720; GFX11-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
2721; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2722; GFX11-NEXT:    v_cvt_u32_f32_e32 v6, v6
2723; GFX11-NEXT:    v_cvt_u32_f32_e32 v7, v7
2724; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2725; GFX11-NEXT:    v_mul_lo_u32 v8, 0xffffffe8, v6
2726; GFX11-NEXT:    v_mul_lo_u32 v9, 0xffffffe8, v7
2727; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2728; GFX11-NEXT:    v_mul_hi_u32 v8, v6, v8
2729; GFX11-NEXT:    v_mul_hi_u32 v9, v7, v9
2730; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2731; GFX11-NEXT:    v_add_nc_u32_e32 v6, v6, v8
2732; GFX11-NEXT:    v_add_nc_u32_e32 v7, v7, v9
2733; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2734; GFX11-NEXT:    v_mul_hi_u32 v7, v5, v7
2735; GFX11-NEXT:    v_mul_lo_u32 v7, v7, 24
2736; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2737; GFX11-NEXT:    v_sub_nc_u32_e32 v5, v5, v7
2738; GFX11-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2739; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2740; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2741; GFX11-NEXT:    v_mul_hi_u32 v6, v4, v6
2742; GFX11-NEXT:    v_mul_lo_u32 v6, v6, 24
2743; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2744; GFX11-NEXT:    v_sub_nc_u32_e32 v4, v4, v6
2745; GFX11-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2746; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2747; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2748; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2749; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2750; GFX11-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2751; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2752; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2753; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2754; GFX11-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2755; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2756; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2757; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2758; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2759; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
2760; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2761; GFX11-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2762; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2763; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2764; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2765; GFX11-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2766; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v7
2767; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2768; GFX11-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
2769; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2770; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2771; GFX11-NEXT:    v_lshl_or_b32 v1, v1, v4, v3
2772; GFX11-NEXT:    s_setpc_b64 s[30:31]
2773  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2774  ret <2 x i24> %result
2775}
2776
2777define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2778; GFX6-LABEL: s_fshr_i32:
2779; GFX6:       ; %bb.0:
2780; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2781; GFX6-NEXT:    v_mov_b32_e32 v1, s2
2782; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2783; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2784; GFX6-NEXT:    ; return to shader part epilog
2785;
2786; GFX8-LABEL: s_fshr_i32:
2787; GFX8:       ; %bb.0:
2788; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2789; GFX8-NEXT:    v_mov_b32_e32 v1, s2
2790; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2791; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2792; GFX8-NEXT:    ; return to shader part epilog
2793;
2794; GFX9-LABEL: s_fshr_i32:
2795; GFX9:       ; %bb.0:
2796; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2797; GFX9-NEXT:    v_mov_b32_e32 v1, s2
2798; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2799; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2800; GFX9-NEXT:    ; return to shader part epilog
2801;
2802; GFX10-LABEL: s_fshr_i32:
2803; GFX10:       ; %bb.0:
2804; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2805; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2806; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2807; GFX10-NEXT:    ; return to shader part epilog
2808;
2809; GFX11-LABEL: s_fshr_i32:
2810; GFX11:       ; %bb.0:
2811; GFX11-NEXT:    v_mov_b32_e32 v0, s2
2812; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2813; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2814; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2815; GFX11-NEXT:    ; return to shader part epilog
2816  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2817  ret i32 %result
2818}
2819
2820define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2821; GFX6-LABEL: s_fshr_i32_5:
2822; GFX6:       ; %bb.0:
2823; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2824; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2825; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2826; GFX6-NEXT:    ; return to shader part epilog
2827;
2828; GFX8-LABEL: s_fshr_i32_5:
2829; GFX8:       ; %bb.0:
2830; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2831; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2832; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2833; GFX8-NEXT:    ; return to shader part epilog
2834;
2835; GFX9-LABEL: s_fshr_i32_5:
2836; GFX9:       ; %bb.0:
2837; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2838; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2839; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2840; GFX9-NEXT:    ; return to shader part epilog
2841;
2842; GFX10-LABEL: s_fshr_i32_5:
2843; GFX10:       ; %bb.0:
2844; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 5
2845; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2846; GFX10-NEXT:    ; return to shader part epilog
2847;
2848; GFX11-LABEL: s_fshr_i32_5:
2849; GFX11:       ; %bb.0:
2850; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 5
2851; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2852; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2853; GFX11-NEXT:    ; return to shader part epilog
2854  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2855  ret i32 %result
2856}
2857
2858define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2859; GFX6-LABEL: s_fshr_i32_8:
2860; GFX6:       ; %bb.0:
2861; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2862; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2863; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2864; GFX6-NEXT:    ; return to shader part epilog
2865;
2866; GFX8-LABEL: s_fshr_i32_8:
2867; GFX8:       ; %bb.0:
2868; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2869; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2870; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2871; GFX8-NEXT:    ; return to shader part epilog
2872;
2873; GFX9-LABEL: s_fshr_i32_8:
2874; GFX9:       ; %bb.0:
2875; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2876; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2877; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2878; GFX9-NEXT:    ; return to shader part epilog
2879;
2880; GFX10-LABEL: s_fshr_i32_8:
2881; GFX10:       ; %bb.0:
2882; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 8
2883; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2884; GFX10-NEXT:    ; return to shader part epilog
2885;
2886; GFX11-LABEL: s_fshr_i32_8:
2887; GFX11:       ; %bb.0:
2888; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 8
2889; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2890; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2891; GFX11-NEXT:    ; return to shader part epilog
2892  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2893  ret i32 %result
2894}
2895
2896define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2897; GFX6-LABEL: v_fshr_i32:
2898; GFX6:       ; %bb.0:
2899; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2900; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2901; GFX6-NEXT:    s_setpc_b64 s[30:31]
2902;
2903; GFX8-LABEL: v_fshr_i32:
2904; GFX8:       ; %bb.0:
2905; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2906; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2907; GFX8-NEXT:    s_setpc_b64 s[30:31]
2908;
2909; GFX9-LABEL: v_fshr_i32:
2910; GFX9:       ; %bb.0:
2911; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2912; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2913; GFX9-NEXT:    s_setpc_b64 s[30:31]
2914;
2915; GFX10-LABEL: v_fshr_i32:
2916; GFX10:       ; %bb.0:
2917; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2918; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2919; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2920; GFX10-NEXT:    s_setpc_b64 s[30:31]
2921;
2922; GFX11-LABEL: v_fshr_i32:
2923; GFX11:       ; %bb.0:
2924; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2925; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2926; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2927; GFX11-NEXT:    s_setpc_b64 s[30:31]
2928  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2929  ret i32 %result
2930}
2931
2932define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) {
2933; GFX6-LABEL: v_fshr_i32_5:
2934; GFX6:       ; %bb.0:
2935; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2936; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2937; GFX6-NEXT:    s_setpc_b64 s[30:31]
2938;
2939; GFX8-LABEL: v_fshr_i32_5:
2940; GFX8:       ; %bb.0:
2941; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2942; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2943; GFX8-NEXT:    s_setpc_b64 s[30:31]
2944;
2945; GFX9-LABEL: v_fshr_i32_5:
2946; GFX9:       ; %bb.0:
2947; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2948; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2949; GFX9-NEXT:    s_setpc_b64 s[30:31]
2950;
2951; GFX10-LABEL: v_fshr_i32_5:
2952; GFX10:       ; %bb.0:
2953; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2954; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2955; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2956; GFX10-NEXT:    s_setpc_b64 s[30:31]
2957;
2958; GFX11-LABEL: v_fshr_i32_5:
2959; GFX11:       ; %bb.0:
2960; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2961; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2962; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2963; GFX11-NEXT:    s_setpc_b64 s[30:31]
2964  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2965  ret i32 %result
2966}
2967
2968define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) {
2969; GFX6-LABEL: v_fshr_i32_8:
2970; GFX6:       ; %bb.0:
2971; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2972; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2973; GFX6-NEXT:    s_setpc_b64 s[30:31]
2974;
2975; GFX8-LABEL: v_fshr_i32_8:
2976; GFX8:       ; %bb.0:
2977; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2978; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2979; GFX8-NEXT:    s_setpc_b64 s[30:31]
2980;
2981; GFX9-LABEL: v_fshr_i32_8:
2982; GFX9:       ; %bb.0:
2983; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2984; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2985; GFX9-NEXT:    s_setpc_b64 s[30:31]
2986;
2987; GFX10-LABEL: v_fshr_i32_8:
2988; GFX10:       ; %bb.0:
2989; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2990; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2991; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2992; GFX10-NEXT:    s_setpc_b64 s[30:31]
2993;
2994; GFX11-LABEL: v_fshr_i32_8:
2995; GFX11:       ; %bb.0:
2996; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2997; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2998; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2999; GFX11-NEXT:    s_setpc_b64 s[30:31]
3000  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
3001  ret i32 %result
3002}
3003
3004define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
3005; GFX6-LABEL: v_fshr_i32_ssv:
3006; GFX6:       ; %bb.0:
3007; GFX6-NEXT:    v_mov_b32_e32 v1, s1
3008; GFX6-NEXT:    v_alignbit_b32 v0, s0, v1, v0
3009; GFX6-NEXT:    ; return to shader part epilog
3010;
3011; GFX8-LABEL: v_fshr_i32_ssv:
3012; GFX8:       ; %bb.0:
3013; GFX8-NEXT:    v_mov_b32_e32 v1, s1
3014; GFX8-NEXT:    v_alignbit_b32 v0, s0, v1, v0
3015; GFX8-NEXT:    ; return to shader part epilog
3016;
3017; GFX9-LABEL: v_fshr_i32_ssv:
3018; GFX9:       ; %bb.0:
3019; GFX9-NEXT:    v_mov_b32_e32 v1, s1
3020; GFX9-NEXT:    v_alignbit_b32 v0, s0, v1, v0
3021; GFX9-NEXT:    ; return to shader part epilog
3022;
3023; GFX10-LABEL: v_fshr_i32_ssv:
3024; GFX10:       ; %bb.0:
3025; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
3026; GFX10-NEXT:    ; return to shader part epilog
3027;
3028; GFX11-LABEL: v_fshr_i32_ssv:
3029; GFX11:       ; %bb.0:
3030; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, v0
3031; GFX11-NEXT:    ; return to shader part epilog
3032  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
3033  %cast.result = bitcast i32 %result to float
3034  ret float %cast.result
3035}
3036
3037define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
3038; GFX6-LABEL: v_fshr_i32_svs:
3039; GFX6:       ; %bb.0:
3040; GFX6-NEXT:    v_mov_b32_e32 v1, s1
3041; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3042; GFX6-NEXT:    ; return to shader part epilog
3043;
3044; GFX8-LABEL: v_fshr_i32_svs:
3045; GFX8:       ; %bb.0:
3046; GFX8-NEXT:    v_mov_b32_e32 v1, s1
3047; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3048; GFX8-NEXT:    ; return to shader part epilog
3049;
3050; GFX9-LABEL: v_fshr_i32_svs:
3051; GFX9:       ; %bb.0:
3052; GFX9-NEXT:    v_mov_b32_e32 v1, s1
3053; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3054; GFX9-NEXT:    ; return to shader part epilog
3055;
3056; GFX10-LABEL: v_fshr_i32_svs:
3057; GFX10:       ; %bb.0:
3058; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
3059; GFX10-NEXT:    ; return to shader part epilog
3060;
3061; GFX11-LABEL: v_fshr_i32_svs:
3062; GFX11:       ; %bb.0:
3063; GFX11-NEXT:    v_alignbit_b32 v0, s0, v0, s1
3064; GFX11-NEXT:    ; return to shader part epilog
3065  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
3066  %cast.result = bitcast i32 %result to float
3067  ret float %cast.result
3068}
3069
3070define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
3071; GFX6-LABEL: v_fshr_i32_vss:
3072; GFX6:       ; %bb.0:
3073; GFX6-NEXT:    v_mov_b32_e32 v0, s1
3074; GFX6-NEXT:    v_mov_b32_e32 v1, s2
3075; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3076; GFX6-NEXT:    ; return to shader part epilog
3077;
3078; GFX8-LABEL: v_fshr_i32_vss:
3079; GFX8:       ; %bb.0:
3080; GFX8-NEXT:    v_mov_b32_e32 v0, s1
3081; GFX8-NEXT:    v_mov_b32_e32 v1, s2
3082; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3083; GFX8-NEXT:    ; return to shader part epilog
3084;
3085; GFX9-LABEL: v_fshr_i32_vss:
3086; GFX9:       ; %bb.0:
3087; GFX9-NEXT:    v_mov_b32_e32 v0, s1
3088; GFX9-NEXT:    v_mov_b32_e32 v1, s2
3089; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3090; GFX9-NEXT:    ; return to shader part epilog
3091;
3092; GFX10-LABEL: v_fshr_i32_vss:
3093; GFX10:       ; %bb.0:
3094; GFX10-NEXT:    v_mov_b32_e32 v0, s2
3095; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
3096; GFX10-NEXT:    ; return to shader part epilog
3097;
3098; GFX11-LABEL: v_fshr_i32_vss:
3099; GFX11:       ; %bb.0:
3100; GFX11-NEXT:    v_mov_b32_e32 v0, s2
3101; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3102; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, v0
3103; GFX11-NEXT:    ; return to shader part epilog
3104  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
3105  %cast.result = bitcast i32 %result to float
3106  ret float %cast.result
3107}
3108
3109define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
3110; GFX6-LABEL: v_fshr_v2i32:
3111; GFX6:       ; %bb.0:
3112; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3113; GFX6-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3114; GFX6-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3115; GFX6-NEXT:    s_setpc_b64 s[30:31]
3116;
3117; GFX8-LABEL: v_fshr_v2i32:
3118; GFX8:       ; %bb.0:
3119; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3120; GFX8-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3121; GFX8-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3122; GFX8-NEXT:    s_setpc_b64 s[30:31]
3123;
3124; GFX9-LABEL: v_fshr_v2i32:
3125; GFX9:       ; %bb.0:
3126; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3127; GFX9-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3128; GFX9-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3129; GFX9-NEXT:    s_setpc_b64 s[30:31]
3130;
3131; GFX10-LABEL: v_fshr_v2i32:
3132; GFX10:       ; %bb.0:
3133; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3134; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3135; GFX10-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3136; GFX10-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3137; GFX10-NEXT:    s_setpc_b64 s[30:31]
3138;
3139; GFX11-LABEL: v_fshr_v2i32:
3140; GFX11:       ; %bb.0:
3141; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3142; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3143; GFX11-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3144; GFX11-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3145; GFX11-NEXT:    s_setpc_b64 s[30:31]
3146  %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
3147  ret <2 x i32> %result
3148}
3149
3150define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
3151; GFX6-LABEL: v_fshr_v3i32:
3152; GFX6:       ; %bb.0:
3153; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3154; GFX6-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3155; GFX6-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3156; GFX6-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3157; GFX6-NEXT:    s_setpc_b64 s[30:31]
3158;
3159; GFX8-LABEL: v_fshr_v3i32:
3160; GFX8:       ; %bb.0:
3161; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3162; GFX8-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3163; GFX8-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3164; GFX8-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3165; GFX8-NEXT:    s_setpc_b64 s[30:31]
3166;
3167; GFX9-LABEL: v_fshr_v3i32:
3168; GFX9:       ; %bb.0:
3169; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3170; GFX9-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3171; GFX9-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3172; GFX9-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3173; GFX9-NEXT:    s_setpc_b64 s[30:31]
3174;
3175; GFX10-LABEL: v_fshr_v3i32:
3176; GFX10:       ; %bb.0:
3177; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3178; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3179; GFX10-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3180; GFX10-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3181; GFX10-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3182; GFX10-NEXT:    s_setpc_b64 s[30:31]
3183;
3184; GFX11-LABEL: v_fshr_v3i32:
3185; GFX11:       ; %bb.0:
3186; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3187; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3188; GFX11-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3189; GFX11-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3190; GFX11-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3191; GFX11-NEXT:    s_setpc_b64 s[30:31]
3192  %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
3193  ret <3 x i32> %result
3194}
3195
3196define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
3197; GFX6-LABEL: v_fshr_v4i32:
3198; GFX6:       ; %bb.0:
3199; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3200; GFX6-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3201; GFX6-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3202; GFX6-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3203; GFX6-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3204; GFX6-NEXT:    s_setpc_b64 s[30:31]
3205;
3206; GFX8-LABEL: v_fshr_v4i32:
3207; GFX8:       ; %bb.0:
3208; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3209; GFX8-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3210; GFX8-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3211; GFX8-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3212; GFX8-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3213; GFX8-NEXT:    s_setpc_b64 s[30:31]
3214;
3215; GFX9-LABEL: v_fshr_v4i32:
3216; GFX9:       ; %bb.0:
3217; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3218; GFX9-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3219; GFX9-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3220; GFX9-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3221; GFX9-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3222; GFX9-NEXT:    s_setpc_b64 s[30:31]
3223;
3224; GFX10-LABEL: v_fshr_v4i32:
3225; GFX10:       ; %bb.0:
3226; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3227; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3228; GFX10-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3229; GFX10-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3230; GFX10-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3231; GFX10-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3232; GFX10-NEXT:    s_setpc_b64 s[30:31]
3233;
3234; GFX11-LABEL: v_fshr_v4i32:
3235; GFX11:       ; %bb.0:
3236; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3237; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3238; GFX11-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3239; GFX11-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3240; GFX11-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3241; GFX11-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3242; GFX11-NEXT:    s_setpc_b64 s[30:31]
3243  %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
3244  ret <4 x i32> %result
3245}
3246
3247define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
3248; GFX6-LABEL: s_fshr_i16:
3249; GFX6:       ; %bb.0:
3250; GFX6-NEXT:    s_and_b32 s3, s2, 15
3251; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
3252; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3253; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3254; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
3255; GFX6-NEXT:    s_bfe_u32 s2, s3, 0x100000
3256; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
3257; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
3258; GFX6-NEXT:    s_or_b32 s0, s0, s1
3259; GFX6-NEXT:    ; return to shader part epilog
3260;
3261; GFX8-LABEL: s_fshr_i16:
3262; GFX8:       ; %bb.0:
3263; GFX8-NEXT:    s_and_b32 s3, s2, 15
3264; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3265; GFX8-NEXT:    s_bfe_u32 s4, 1, 0x100000
3266; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
3267; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3268; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
3269; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3270; GFX8-NEXT:    s_bfe_u32 s2, s3, 0x100000
3271; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3272; GFX8-NEXT:    s_or_b32 s0, s0, s1
3273; GFX8-NEXT:    ; return to shader part epilog
3274;
3275; GFX9-LABEL: s_fshr_i16:
3276; GFX9:       ; %bb.0:
3277; GFX9-NEXT:    s_and_b32 s3, s2, 15
3278; GFX9-NEXT:    s_andn2_b32 s2, 15, s2
3279; GFX9-NEXT:    s_bfe_u32 s4, 1, 0x100000
3280; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
3281; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
3282; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3283; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
3284; GFX9-NEXT:    s_bfe_u32 s2, s3, 0x100000
3285; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
3286; GFX9-NEXT:    s_or_b32 s0, s0, s1
3287; GFX9-NEXT:    ; return to shader part epilog
3288;
3289; GFX10-LABEL: s_fshr_i16:
3290; GFX10:       ; %bb.0:
3291; GFX10-NEXT:    s_and_b32 s3, s2, 15
3292; GFX10-NEXT:    s_bfe_u32 s4, 1, 0x100000
3293; GFX10-NEXT:    s_andn2_b32 s2, 15, s2
3294; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
3295; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
3296; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
3297; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
3298; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3299; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
3300; GFX10-NEXT:    s_or_b32 s0, s0, s1
3301; GFX10-NEXT:    ; return to shader part epilog
3302;
3303; GFX11-LABEL: s_fshr_i16:
3304; GFX11:       ; %bb.0:
3305; GFX11-NEXT:    s_and_b32 s3, s2, 15
3306; GFX11-NEXT:    s_bfe_u32 s4, 1, 0x100000
3307; GFX11-NEXT:    s_and_not1_b32 s2, 15, s2
3308; GFX11-NEXT:    s_lshl_b32 s0, s0, s4
3309; GFX11-NEXT:    s_bfe_u32 s2, s2, 0x100000
3310; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
3311; GFX11-NEXT:    s_bfe_u32 s3, s3, 0x100000
3312; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
3313; GFX11-NEXT:    s_lshr_b32 s1, s1, s3
3314; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3315; GFX11-NEXT:    s_or_b32 s0, s0, s1
3316; GFX11-NEXT:    ; return to shader part epilog
3317  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3318  ret i16 %result
3319}
3320
3321define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
3322; GFX6-LABEL: s_fshr_i16_4:
3323; GFX6:       ; %bb.0:
3324; GFX6-NEXT:    s_lshl_b32 s0, s0, 12
3325; GFX6-NEXT:    s_bfe_u32 s1, s1, 0xc0004
3326; GFX6-NEXT:    s_or_b32 s0, s0, s1
3327; GFX6-NEXT:    ; return to shader part epilog
3328;
3329; GFX8-LABEL: s_fshr_i16_4:
3330; GFX8:       ; %bb.0:
3331; GFX8-NEXT:    s_bfe_u32 s2, 12, 0x100000
3332; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
3333; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3334; GFX8-NEXT:    s_bfe_u32 s2, 4, 0x100000
3335; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3336; GFX8-NEXT:    s_or_b32 s0, s0, s1
3337; GFX8-NEXT:    ; return to shader part epilog
3338;
3339; GFX9-LABEL: s_fshr_i16_4:
3340; GFX9:       ; %bb.0:
3341; GFX9-NEXT:    s_bfe_u32 s2, 12, 0x100000
3342; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3343; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
3344; GFX9-NEXT:    s_bfe_u32 s2, 4, 0x100000
3345; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
3346; GFX9-NEXT:    s_or_b32 s0, s0, s1
3347; GFX9-NEXT:    ; return to shader part epilog
3348;
3349; GFX10-LABEL: s_fshr_i16_4:
3350; GFX10:       ; %bb.0:
3351; GFX10-NEXT:    s_bfe_u32 s2, 12, 0x100000
3352; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
3353; GFX10-NEXT:    s_bfe_u32 s3, 4, 0x100000
3354; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3355; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
3356; GFX10-NEXT:    s_or_b32 s0, s0, s1
3357; GFX10-NEXT:    ; return to shader part epilog
3358;
3359; GFX11-LABEL: s_fshr_i16_4:
3360; GFX11:       ; %bb.0:
3361; GFX11-NEXT:    s_bfe_u32 s2, 12, 0x100000
3362; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
3363; GFX11-NEXT:    s_bfe_u32 s3, 4, 0x100000
3364; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
3365; GFX11-NEXT:    s_lshr_b32 s1, s1, s3
3366; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3367; GFX11-NEXT:    s_or_b32 s0, s0, s1
3368; GFX11-NEXT:    ; return to shader part epilog
3369  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3370  ret i16 %result
3371}
3372
3373define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
3374; GFX6-LABEL: s_fshr_i16_5:
3375; GFX6:       ; %bb.0:
3376; GFX6-NEXT:    s_lshl_b32 s0, s0, 11
3377; GFX6-NEXT:    s_bfe_u32 s1, s1, 0xb0005
3378; GFX6-NEXT:    s_or_b32 s0, s0, s1
3379; GFX6-NEXT:    ; return to shader part epilog
3380;
3381; GFX8-LABEL: s_fshr_i16_5:
3382; GFX8:       ; %bb.0:
3383; GFX8-NEXT:    s_bfe_u32 s2, 11, 0x100000
3384; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
3385; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3386; GFX8-NEXT:    s_bfe_u32 s2, 5, 0x100000
3387; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3388; GFX8-NEXT:    s_or_b32 s0, s0, s1
3389; GFX8-NEXT:    ; return to shader part epilog
3390;
3391; GFX9-LABEL: s_fshr_i16_5:
3392; GFX9:       ; %bb.0:
3393; GFX9-NEXT:    s_bfe_u32 s2, 11, 0x100000
3394; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3395; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
3396; GFX9-NEXT:    s_bfe_u32 s2, 5, 0x100000
3397; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
3398; GFX9-NEXT:    s_or_b32 s0, s0, s1
3399; GFX9-NEXT:    ; return to shader part epilog
3400;
3401; GFX10-LABEL: s_fshr_i16_5:
3402; GFX10:       ; %bb.0:
3403; GFX10-NEXT:    s_bfe_u32 s2, 11, 0x100000
3404; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
3405; GFX10-NEXT:    s_bfe_u32 s3, 5, 0x100000
3406; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3407; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
3408; GFX10-NEXT:    s_or_b32 s0, s0, s1
3409; GFX10-NEXT:    ; return to shader part epilog
3410;
3411; GFX11-LABEL: s_fshr_i16_5:
3412; GFX11:       ; %bb.0:
3413; GFX11-NEXT:    s_bfe_u32 s2, 11, 0x100000
3414; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
3415; GFX11-NEXT:    s_bfe_u32 s3, 5, 0x100000
3416; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
3417; GFX11-NEXT:    s_lshr_b32 s1, s1, s3
3418; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3419; GFX11-NEXT:    s_or_b32 s0, s0, s1
3420; GFX11-NEXT:    ; return to shader part epilog
3421  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3422  ret i16 %result
3423}
3424
3425define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3426; GFX6-LABEL: v_fshr_i16:
3427; GFX6:       ; %bb.0:
3428; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3429; GFX6-NEXT:    v_and_b32_e32 v3, 15, v2
3430; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
3431; GFX6-NEXT:    v_and_b32_e32 v2, 15, v2
3432; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
3433; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3434; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
3435; GFX6-NEXT:    v_bfe_u32 v2, v3, 0, 16
3436; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3437; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
3438; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3439; GFX6-NEXT:    s_setpc_b64 s[30:31]
3440;
3441; GFX8-LABEL: v_fshr_i16:
3442; GFX8:       ; %bb.0:
3443; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3444; GFX8-NEXT:    v_and_b32_e32 v3, 15, v2
3445; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3446; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
3447; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3448; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
3449; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
3450; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3451; GFX8-NEXT:    s_setpc_b64 s[30:31]
3452;
3453; GFX9-LABEL: v_fshr_i16:
3454; GFX9:       ; %bb.0:
3455; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3456; GFX9-NEXT:    v_and_b32_e32 v3, 15, v2
3457; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
3458; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
3459; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3460; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
3461; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
3462; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3463; GFX9-NEXT:    s_setpc_b64 s[30:31]
3464;
3465; GFX10-LABEL: v_fshr_i16:
3466; GFX10:       ; %bb.0:
3467; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3468; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3469; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
3470; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
3471; GFX10-NEXT:    v_and_b32_e32 v2, 15, v2
3472; GFX10-NEXT:    v_and_b32_e32 v3, 15, v3
3473; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
3474; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
3475; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3476; GFX10-NEXT:    s_setpc_b64 s[30:31]
3477;
3478; GFX11-LABEL: v_fshr_i16:
3479; GFX11:       ; %bb.0:
3480; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3481; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3482; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
3483; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
3484; GFX11-NEXT:    v_and_b32_e32 v2, 15, v2
3485; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3486; GFX11-NEXT:    v_and_b32_e32 v3, 15, v3
3487; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
3488; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3489; GFX11-NEXT:    v_lshlrev_b16 v0, v3, v0
3490; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3491; GFX11-NEXT:    s_setpc_b64 s[30:31]
3492  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3493  ret i16 %result
3494}
3495
3496define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) {
3497; GFX6-LABEL: v_fshr_i16_4:
3498; GFX6:       ; %bb.0:
3499; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3500; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 12, v0
3501; GFX6-NEXT:    v_bfe_u32 v1, v1, 4, 12
3502; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3503; GFX6-NEXT:    s_setpc_b64 s[30:31]
3504;
3505; GFX8-LABEL: v_fshr_i16_4:
3506; GFX8:       ; %bb.0:
3507; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3508; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
3509; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
3510; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3511; GFX8-NEXT:    s_setpc_b64 s[30:31]
3512;
3513; GFX9-LABEL: v_fshr_i16_4:
3514; GFX9:       ; %bb.0:
3515; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3516; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
3517; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
3518; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3519; GFX9-NEXT:    s_setpc_b64 s[30:31]
3520;
3521; GFX10-LABEL: v_fshr_i16_4:
3522; GFX10:       ; %bb.0:
3523; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3524; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3525; GFX10-NEXT:    v_lshlrev_b16 v0, 12, v0
3526; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
3527; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3528; GFX10-NEXT:    s_setpc_b64 s[30:31]
3529;
3530; GFX11-LABEL: v_fshr_i16_4:
3531; GFX11:       ; %bb.0:
3532; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3533; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3534; GFX11-NEXT:    v_lshlrev_b16 v0, 12, v0
3535; GFX11-NEXT:    v_lshrrev_b16 v1, 4, v1
3536; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3537; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3538; GFX11-NEXT:    s_setpc_b64 s[30:31]
3539  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3540  ret i16 %result
3541}
3542
3543define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) {
3544; GFX6-LABEL: v_fshr_i16_5:
3545; GFX6:       ; %bb.0:
3546; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3547; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 11, v0
3548; GFX6-NEXT:    v_bfe_u32 v1, v1, 5, 11
3549; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3550; GFX6-NEXT:    s_setpc_b64 s[30:31]
3551;
3552; GFX8-LABEL: v_fshr_i16_5:
3553; GFX8:       ; %bb.0:
3554; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3555; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
3556; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
3557; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3558; GFX8-NEXT:    s_setpc_b64 s[30:31]
3559;
3560; GFX9-LABEL: v_fshr_i16_5:
3561; GFX9:       ; %bb.0:
3562; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3563; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
3564; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
3565; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3566; GFX9-NEXT:    s_setpc_b64 s[30:31]
3567;
3568; GFX10-LABEL: v_fshr_i16_5:
3569; GFX10:       ; %bb.0:
3570; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3571; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3572; GFX10-NEXT:    v_lshlrev_b16 v0, 11, v0
3573; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
3574; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3575; GFX10-NEXT:    s_setpc_b64 s[30:31]
3576;
3577; GFX11-LABEL: v_fshr_i16_5:
3578; GFX11:       ; %bb.0:
3579; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3580; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3581; GFX11-NEXT:    v_lshlrev_b16 v0, 11, v0
3582; GFX11-NEXT:    v_lshrrev_b16 v1, 5, v1
3583; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3584; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3585; GFX11-NEXT:    s_setpc_b64 s[30:31]
3586  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3587  ret i16 %result
3588}
3589
3590define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
3591; GFX6-LABEL: v_fshr_i16_ssv:
3592; GFX6:       ; %bb.0:
3593; GFX6-NEXT:    v_and_b32_e32 v1, 15, v0
3594; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3595; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
3596; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3597; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3598; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
3599; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3600; GFX6-NEXT:    s_and_b32 s0, s1, 0xffff
3601; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
3602; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3603; GFX6-NEXT:    ; return to shader part epilog
3604;
3605; GFX8-LABEL: v_fshr_i16_ssv:
3606; GFX8:       ; %bb.0:
3607; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
3608; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3609; GFX8-NEXT:    s_bfe_u32 s2, 1, 0x100000
3610; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
3611; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
3612; GFX8-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
3613; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
3614; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3615; GFX8-NEXT:    ; return to shader part epilog
3616;
3617; GFX9-LABEL: v_fshr_i16_ssv:
3618; GFX9:       ; %bb.0:
3619; GFX9-NEXT:    v_and_b32_e32 v1, 15, v0
3620; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
3621; GFX9-NEXT:    s_bfe_u32 s2, 1, 0x100000
3622; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
3623; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3624; GFX9-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
3625; GFX9-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
3626; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3627; GFX9-NEXT:    ; return to shader part epilog
3628;
3629; GFX10-LABEL: v_fshr_i16_ssv:
3630; GFX10:       ; %bb.0:
3631; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
3632; GFX10-NEXT:    v_and_b32_e32 v0, 15, v0
3633; GFX10-NEXT:    s_bfe_u32 s2, 1, 0x100000
3634; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3635; GFX10-NEXT:    v_and_b32_e32 v1, 15, v1
3636; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
3637; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
3638; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
3639; GFX10-NEXT:    ; return to shader part epilog
3640;
3641; GFX11-LABEL: v_fshr_i16_ssv:
3642; GFX11:       ; %bb.0:
3643; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
3644; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
3645; GFX11-NEXT:    s_bfe_u32 s2, 1, 0x100000
3646; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
3647; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
3648; GFX11-NEXT:    v_and_b32_e32 v1, 15, v1
3649; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3650; GFX11-NEXT:    v_lshrrev_b16 v0, v0, s1
3651; GFX11-NEXT:    v_lshlrev_b16 v1, v1, s0
3652; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3653; GFX11-NEXT:    v_or_b32_e32 v0, v1, v0
3654; GFX11-NEXT:    ; return to shader part epilog
3655  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3656  %cast.result = bitcast i16 %result to half
3657  ret half %cast.result
3658}
3659
3660define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
3661; GFX6-LABEL: v_fshr_i16_svs:
3662; GFX6:       ; %bb.0:
3663; GFX6-NEXT:    s_and_b32 s2, s1, 15
3664; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3665; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3666; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3667; GFX6-NEXT:    s_lshl_b32 s0, s0, s1
3668; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
3669; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3670; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s1, v0
3671; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3672; GFX6-NEXT:    ; return to shader part epilog
3673;
3674; GFX8-LABEL: v_fshr_i16_svs:
3675; GFX8:       ; %bb.0:
3676; GFX8-NEXT:    s_and_b32 s2, s1, 15
3677; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3678; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
3679; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
3680; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3681; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
3682; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
3683; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3684; GFX8-NEXT:    ; return to shader part epilog
3685;
3686; GFX9-LABEL: v_fshr_i16_svs:
3687; GFX9:       ; %bb.0:
3688; GFX9-NEXT:    s_and_b32 s2, s1, 15
3689; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3690; GFX9-NEXT:    s_bfe_u32 s3, 1, 0x100000
3691; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
3692; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
3693; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
3694; GFX9-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
3695; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3696; GFX9-NEXT:    ; return to shader part epilog
3697;
3698; GFX10-LABEL: v_fshr_i16_svs:
3699; GFX10:       ; %bb.0:
3700; GFX10-NEXT:    s_and_b32 s2, s1, 15
3701; GFX10-NEXT:    s_bfe_u32 s3, 1, 0x100000
3702; GFX10-NEXT:    s_andn2_b32 s1, 15, s1
3703; GFX10-NEXT:    v_lshrrev_b16 v0, s2, v0
3704; GFX10-NEXT:    s_lshl_b32 s0, s0, s3
3705; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
3706; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
3707; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3708; GFX10-NEXT:    ; return to shader part epilog
3709;
3710; GFX11-LABEL: v_fshr_i16_svs:
3711; GFX11:       ; %bb.0:
3712; GFX11-NEXT:    s_and_b32 s2, s1, 15
3713; GFX11-NEXT:    s_bfe_u32 s3, 1, 0x100000
3714; GFX11-NEXT:    s_and_not1_b32 s1, 15, s1
3715; GFX11-NEXT:    v_lshrrev_b16 v0, s2, v0
3716; GFX11-NEXT:    s_lshl_b32 s0, s0, s3
3717; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
3718; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3719; GFX11-NEXT:    s_lshl_b32 s0, s0, s1
3720; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3721; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
3722; GFX11-NEXT:    ; return to shader part epilog
3723  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3724  %cast.result = bitcast i16 %result to half
3725  ret half %cast.result
3726}
3727
3728define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
3729; GFX6-LABEL: v_fshr_i16_vss:
3730; GFX6:       ; %bb.0:
3731; GFX6-NEXT:    s_and_b32 s2, s1, 15
3732; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3733; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
3734; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3735; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s1, v0
3736; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
3737; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
3738; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
3739; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3740; GFX6-NEXT:    ; return to shader part epilog
3741;
3742; GFX8-LABEL: v_fshr_i16_vss:
3743; GFX8:       ; %bb.0:
3744; GFX8-NEXT:    s_and_b32 s2, s1, 15
3745; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3746; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3747; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
3748; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3749; GFX8-NEXT:    s_bfe_u32 s1, s2, 0x100000
3750; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3751; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3752; GFX8-NEXT:    ; return to shader part epilog
3753;
3754; GFX9-LABEL: v_fshr_i16_vss:
3755; GFX9:       ; %bb.0:
3756; GFX9-NEXT:    s_and_b32 s2, s1, 15
3757; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3758; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3759; GFX9-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
3760; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
3761; GFX9-NEXT:    s_bfe_u32 s1, s2, 0x100000
3762; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
3763; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3764; GFX9-NEXT:    ; return to shader part epilog
3765;
3766; GFX10-LABEL: v_fshr_i16_vss:
3767; GFX10:       ; %bb.0:
3768; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
3769; GFX10-NEXT:    s_andn2_b32 s2, 15, s1
3770; GFX10-NEXT:    s_and_b32 s1, s1, 15
3771; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x100000
3772; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
3773; GFX10-NEXT:    v_lshlrev_b16 v0, s2, v0
3774; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
3775; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3776; GFX10-NEXT:    ; return to shader part epilog
3777;
3778; GFX11-LABEL: v_fshr_i16_vss:
3779; GFX11:       ; %bb.0:
3780; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
3781; GFX11-NEXT:    s_and_not1_b32 s2, 15, s1
3782; GFX11-NEXT:    s_and_b32 s1, s1, 15
3783; GFX11-NEXT:    s_bfe_u32 s0, s0, 0x100000
3784; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x100000
3785; GFX11-NEXT:    v_lshlrev_b16 v0, s2, v0
3786; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
3787; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3788; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
3789; GFX11-NEXT:    ; return to shader part epilog
3790  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3791  %cast.result = bitcast i16 %result to half
3792  ret half %cast.result
3793}
3794
3795define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3796; GFX6-LABEL: s_fshr_v2i16:
3797; GFX6:       ; %bb.0:
3798; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
3799; GFX6-NEXT:    s_and_b32 s4, s4, 0xffff
3800; GFX6-NEXT:    s_or_b32 s4, s5, s4
3801; GFX6-NEXT:    s_bfe_u32 s5, 1, 0x100000
3802; GFX6-NEXT:    s_lshl_b32 s0, s0, s5
3803; GFX6-NEXT:    s_bfe_u32 s6, s2, 0xf0001
3804; GFX6-NEXT:    s_bfe_u32 s7, 14, 0x100000
3805; GFX6-NEXT:    s_lshl_b32 s1, s1, s5
3806; GFX6-NEXT:    s_bfe_u32 s5, s3, 0xf0001
3807; GFX6-NEXT:    s_lshr_b32 s6, s6, s7
3808; GFX6-NEXT:    s_lshr_b32 s5, s5, s7
3809; GFX6-NEXT:    s_xor_b32 s4, s4, -1
3810; GFX6-NEXT:    s_or_b32 s0, s0, s6
3811; GFX6-NEXT:    s_or_b32 s1, s1, s5
3812; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
3813; GFX6-NEXT:    s_lshr_b32 s5, s4, 16
3814; GFX6-NEXT:    s_and_b32 s6, s4, 15
3815; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
3816; GFX6-NEXT:    s_bfe_u32 s6, s6, 0x100000
3817; GFX6-NEXT:    s_bfe_u32 s2, s2, 0xf0001
3818; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
3819; GFX6-NEXT:    s_lshl_b32 s0, s0, s6
3820; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
3821; GFX6-NEXT:    s_or_b32 s0, s0, s2
3822; GFX6-NEXT:    s_and_b32 s2, s5, 15
3823; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
3824; GFX6-NEXT:    s_andn2_b32 s4, 15, s5
3825; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3826; GFX6-NEXT:    s_lshl_b32 s1, s1, s2
3827; GFX6-NEXT:    s_bfe_u32 s2, s3, 0xf0001
3828; GFX6-NEXT:    s_bfe_u32 s3, s4, 0x100000
3829; GFX6-NEXT:    s_lshr_b32 s2, s2, s3
3830; GFX6-NEXT:    s_or_b32 s1, s1, s2
3831; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3832; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3833; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
3834; GFX6-NEXT:    s_or_b32 s0, s0, s1
3835; GFX6-NEXT:    ; return to shader part epilog
3836;
3837; GFX8-LABEL: s_fshr_v2i16:
3838; GFX8:       ; %bb.0:
3839; GFX8-NEXT:    s_bfe_u32 s5, 1, 0x100000
3840; GFX8-NEXT:    s_bfe_u32 s6, s1, 0x100000
3841; GFX8-NEXT:    s_bfe_u32 s7, 15, 0x100000
3842; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
3843; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
3844; GFX8-NEXT:    s_lshl_b32 s0, s0, s5
3845; GFX8-NEXT:    s_lshr_b32 s6, s6, s7
3846; GFX8-NEXT:    s_or_b32 s0, s0, s6
3847; GFX8-NEXT:    s_lshl_b32 s3, s3, s5
3848; GFX8-NEXT:    s_lshr_b32 s6, s4, s7
3849; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
3850; GFX8-NEXT:    s_xor_b32 s2, s2, -1
3851; GFX8-NEXT:    s_or_b32 s3, s3, s6
3852; GFX8-NEXT:    s_lshr_b32 s6, s2, 16
3853; GFX8-NEXT:    s_and_b32 s7, s2, 15
3854; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3855; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3856; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
3857; GFX8-NEXT:    s_lshr_b32 s1, s1, s5
3858; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3859; GFX8-NEXT:    s_lshl_b32 s0, s0, s7
3860; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3861; GFX8-NEXT:    s_or_b32 s0, s0, s1
3862; GFX8-NEXT:    s_and_b32 s1, s6, 15
3863; GFX8-NEXT:    s_lshl_b32 s4, s4, s5
3864; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3865; GFX8-NEXT:    s_andn2_b32 s2, 15, s6
3866; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
3867; GFX8-NEXT:    s_bfe_u32 s3, s4, 0x100000
3868; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
3869; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3870; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
3871; GFX8-NEXT:    s_or_b32 s1, s1, s2
3872; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3873; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3874; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
3875; GFX8-NEXT:    s_or_b32 s0, s0, s1
3876; GFX8-NEXT:    ; return to shader part epilog
3877;
3878; GFX9-LABEL: s_fshr_v2i16:
3879; GFX9:       ; %bb.0:
3880; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
3881; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3882; GFX9-NEXT:    s_lshl_b32 s4, s4, 1
3883; GFX9-NEXT:    s_and_b32 s3, s2, 0xf000f
3884; GFX9-NEXT:    s_andn2_b32 s2, 0xf000f, s2
3885; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
3886; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
3887; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
3888; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3889; GFX9-NEXT:    s_lshl_b32 s2, s4, s5
3890; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3891; GFX9-NEXT:    s_lshr_b32 s2, s1, 16
3892; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
3893; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
3894; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
3895; GFX9-NEXT:    s_lshr_b32 s2, s2, s4
3896; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
3897; GFX9-NEXT:    s_or_b32 s0, s0, s1
3898; GFX9-NEXT:    ; return to shader part epilog
3899;
3900; GFX10-LABEL: s_fshr_v2i16:
3901; GFX10:       ; %bb.0:
3902; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3903; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3904; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
3905; GFX10-NEXT:    s_and_b32 s4, s2, 0xf000f
3906; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3907; GFX10-NEXT:    s_andn2_b32 s2, 0xf000f, s2
3908; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3909; GFX10-NEXT:    s_lshr_b32 s5, s2, 16
3910; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3911; GFX10-NEXT:    s_lshl_b32 s2, s3, s5
3912; GFX10-NEXT:    s_lshr_b32 s3, s1, 16
3913; GFX10-NEXT:    s_and_b32 s1, s1, 0xffff
3914; GFX10-NEXT:    s_lshr_b32 s5, s4, 16
3915; GFX10-NEXT:    s_lshr_b32 s1, s1, s4
3916; GFX10-NEXT:    s_lshr_b32 s3, s3, s5
3917; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3918; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
3919; GFX10-NEXT:    s_or_b32 s0, s0, s1
3920; GFX10-NEXT:    ; return to shader part epilog
3921;
3922; GFX11-LABEL: s_fshr_v2i16:
3923; GFX11:       ; %bb.0:
3924; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
3925; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
3926; GFX11-NEXT:    s_lshl_b32 s3, s3, 1
3927; GFX11-NEXT:    s_and_b32 s4, s2, 0xf000f
3928; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3929; GFX11-NEXT:    s_and_not1_b32 s2, 0xf000f, s2
3930; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
3931; GFX11-NEXT:    s_lshr_b32 s5, s2, 16
3932; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
3933; GFX11-NEXT:    s_lshl_b32 s2, s3, s5
3934; GFX11-NEXT:    s_lshr_b32 s3, s1, 16
3935; GFX11-NEXT:    s_and_b32 s1, s1, 0xffff
3936; GFX11-NEXT:    s_lshr_b32 s5, s4, 16
3937; GFX11-NEXT:    s_lshr_b32 s1, s1, s4
3938; GFX11-NEXT:    s_lshr_b32 s3, s3, s5
3939; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3940; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
3941; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3942; GFX11-NEXT:    s_or_b32 s0, s0, s1
3943; GFX11-NEXT:    ; return to shader part epilog
3944  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3945  %cast = bitcast <2 x i16> %result to i32
3946  ret i32 %cast
3947}
3948
3949define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3950; GFX6-LABEL: v_fshr_v2i16:
3951; GFX6:       ; %bb.0:
3952; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3953; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
3954; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
3955; GFX6-NEXT:    v_or_b32_e32 v4, v5, v4
3956; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
3957; GFX6-NEXT:    v_bfe_u32 v5, v2, 1, 15
3958; GFX6-NEXT:    s_bfe_u32 s5, 14, 0x100000
3959; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
3960; GFX6-NEXT:    v_lshrrev_b32_e32 v5, s5, v5
3961; GFX6-NEXT:    v_or_b32_e32 v0, v0, v5
3962; GFX6-NEXT:    v_bfe_u32 v5, v3, 1, 15
3963; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
3964; GFX6-NEXT:    v_lshrrev_b32_e32 v5, s5, v5
3965; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3966; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
3967; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
3968; GFX6-NEXT:    v_and_b32_e32 v6, 15, v4
3969; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3970; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
3971; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3972; GFX6-NEXT:    v_bfe_u32 v6, v6, 0, 16
3973; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
3974; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
3975; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v6, v0
3976; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
3977; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3978; GFX6-NEXT:    v_and_b32_e32 v2, 15, v5
3979; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v5
3980; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
3981; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3982; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3983; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
3984; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
3985; GFX6-NEXT:    v_bfe_u32 v3, v4, 0, 16
3986; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
3987; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3988; GFX6-NEXT:    s_setpc_b64 s[30:31]
3989;
3990; GFX8-LABEL: v_fshr_v2i16:
3991; GFX8:       ; %bb.0:
3992; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3993; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
3994; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 15, v1
3995; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
3996; GFX8-NEXT:    v_mov_b32_e32 v4, 1
3997; GFX8-NEXT:    v_mov_b32_e32 v5, 15
3998; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3999; GFX8-NEXT:    v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4000; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
4001; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
4002; GFX8-NEXT:    v_lshlrev_b16_e32 v5, 1, v1
4003; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4004; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
4005; GFX8-NEXT:    v_and_b32_e32 v6, 15, v2
4006; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
4007; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
4008; GFX8-NEXT:    v_lshrrev_b16_e32 v5, 1, v5
4009; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v6, v3
4010; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v2, v5
4011; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
4012; GFX8-NEXT:    v_and_b32_e32 v3, 15, v4
4013; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4014; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
4015; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
4016; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
4017; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v4, v1
4018; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4019; GFX8-NEXT:    v_mov_b32_e32 v1, 16
4020; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4021; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4022; GFX8-NEXT:    s_setpc_b64 s[30:31]
4023;
4024; GFX9-LABEL: v_fshr_v2i16:
4025; GFX9:       ; %bb.0:
4026; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4027; GFX9-NEXT:    v_and_b32_e32 v3, 0xf000f, v2
4028; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
4029; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
4030; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4031; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
4032; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
4033; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
4034; GFX9-NEXT:    s_setpc_b64 s[30:31]
4035;
4036; GFX10-LABEL: v_fshr_v2i16:
4037; GFX10:       ; %bb.0:
4038; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4039; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4040; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
4041; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4042; GFX10-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
4043; GFX10-NEXT:    v_and_b32_e32 v3, 0xf000f, v3
4044; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
4045; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
4046; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
4047; GFX10-NEXT:    s_setpc_b64 s[30:31]
4048;
4049; GFX11-LABEL: v_fshr_v2i16:
4050; GFX11:       ; %bb.0:
4051; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4052; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4053; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
4054; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4055; GFX11-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
4056; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
4057; GFX11-NEXT:    v_and_b32_e32 v3, 0xf000f, v3
4058; GFX11-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
4059; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4060; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
4061; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
4062; GFX11-NEXT:    s_setpc_b64 s[30:31]
4063  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4064  ret <2 x i16> %result
4065}
4066
4067define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
4068; GFX6-LABEL: v_fshr_v2i16_4_8:
4069; GFX6:       ; %bb.0:
4070; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4071; GFX6-NEXT:    s_bfe_u32 s4, 12, 0x100000
4072; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
4073; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
4074; GFX6-NEXT:    s_bfe_u32 s4, 3, 0x100000
4075; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
4076; GFX6-NEXT:    s_bfe_u32 s4, 8, 0x100000
4077; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4078; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
4079; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
4080; GFX6-NEXT:    s_bfe_u32 s4, 7, 0x100000
4081; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
4082; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
4083; GFX6-NEXT:    s_setpc_b64 s[30:31]
4084;
4085; GFX8-LABEL: v_fshr_v2i16_4_8:
4086; GFX8:       ; %bb.0:
4087; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4088; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
4089; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
4090; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 4, v1
4091; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
4092; GFX8-NEXT:    v_mov_b32_e32 v3, 8
4093; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
4094; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4095; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
4096; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4097; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4098; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4099; GFX8-NEXT:    s_setpc_b64 s[30:31]
4100;
4101; GFX9-LABEL: v_fshr_v2i16_4_8:
4102; GFX9:       ; %bb.0:
4103; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4104; GFX9-NEXT:    v_mov_b32_e32 v2, 0x8000c
4105; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
4106; GFX9-NEXT:    v_mov_b32_e32 v2, 0x80004
4107; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
4108; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
4109; GFX9-NEXT:    s_setpc_b64 s[30:31]
4110;
4111; GFX10-LABEL: v_fshr_v2i16_4_8:
4112; GFX10:       ; %bb.0:
4113; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4114; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4115; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 0x8000c, v0
4116; GFX10-NEXT:    v_pk_lshrrev_b16 v1, 0x80004, v1
4117; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
4118; GFX10-NEXT:    s_setpc_b64 s[30:31]
4119;
4120; GFX11-LABEL: v_fshr_v2i16_4_8:
4121; GFX11:       ; %bb.0:
4122; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4123; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4124; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 0x8000c, v0
4125; GFX11-NEXT:    v_pk_lshrrev_b16 v1, 0x80004, v1
4126; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4127; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
4128; GFX11-NEXT:    s_setpc_b64 s[30:31]
4129  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
4130  ret <2 x i16> %result
4131}
4132
4133define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
4134; GFX6-LABEL: v_fshr_v2i16_ssv:
4135; GFX6:       ; %bb.0:
4136; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4137; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4138; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
4139; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
4140; GFX6-NEXT:    s_bfe_u32 s5, s2, 0xf0001
4141; GFX6-NEXT:    s_bfe_u32 s6, 14, 0x100000
4142; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
4143; GFX6-NEXT:    s_lshl_b32 s0, s0, s4
4144; GFX6-NEXT:    s_lshr_b32 s5, s5, s6
4145; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
4146; GFX6-NEXT:    v_and_b32_e32 v2, 15, v0
4147; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
4148; GFX6-NEXT:    s_or_b32 s0, s0, s5
4149; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
4150; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
4151; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
4152; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
4153; GFX6-NEXT:    s_bfe_u32 s0, s2, 0xf0001
4154; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
4155; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
4156; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
4157; GFX6-NEXT:    s_bfe_u32 s4, s3, 0xf0001
4158; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
4159; GFX6-NEXT:    v_and_b32_e32 v2, 15, v1
4160; GFX6-NEXT:    v_xor_b32_e32 v1, -1, v1
4161; GFX6-NEXT:    s_lshr_b32 s4, s4, s6
4162; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
4163; GFX6-NEXT:    v_and_b32_e32 v1, 15, v1
4164; GFX6-NEXT:    s_or_b32 s1, s1, s4
4165; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
4166; GFX6-NEXT:    s_bfe_u32 s0, s3, 0xf0001
4167; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
4168; GFX6-NEXT:    v_lshl_b32_e32 v2, s1, v2
4169; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
4170; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
4171; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
4172; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
4173; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4174; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4175; GFX6-NEXT:    ; return to shader part epilog
4176;
4177; GFX8-LABEL: v_fshr_v2i16_ssv:
4178; GFX8:       ; %bb.0:
4179; GFX8-NEXT:    s_bfe_u32 s4, 1, 0x100000
4180; GFX8-NEXT:    s_bfe_u32 s5, s1, 0x100000
4181; GFX8-NEXT:    s_bfe_u32 s6, 15, 0x100000
4182; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4183; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
4184; GFX8-NEXT:    s_lshr_b32 s5, s5, s6
4185; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
4186; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
4187; GFX8-NEXT:    s_or_b32 s0, s0, s5
4188; GFX8-NEXT:    s_lshl_b32 s1, s1, s4
4189; GFX8-NEXT:    v_and_b32_e32 v2, 15, v0
4190; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
4191; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
4192; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s0
4193; GFX8-NEXT:    s_bfe_u32 s0, s1, 0x100000
4194; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
4195; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
4196; GFX8-NEXT:    s_lshr_b32 s5, s3, s6
4197; GFX8-NEXT:    s_lshl_b32 s3, s3, s4
4198; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
4199; GFX8-NEXT:    s_lshl_b32 s2, s2, s4
4200; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
4201; GFX8-NEXT:    v_and_b32_e32 v2, 15, v1
4202; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v1
4203; GFX8-NEXT:    s_bfe_u32 s0, s3, 0x100000
4204; GFX8-NEXT:    s_or_b32 s2, s2, s5
4205; GFX8-NEXT:    v_and_b32_e32 v1, 15, v1
4206; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
4207; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s2
4208; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s0
4209; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
4210; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4211; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4212; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4213; GFX8-NEXT:    ; return to shader part epilog
4214;
4215; GFX9-LABEL: v_fshr_v2i16_ssv:
4216; GFX9:       ; %bb.0:
4217; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
4218; GFX9-NEXT:    v_and_b32_e32 v1, 0xf000f, v0
4219; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
4220; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
4221; GFX9-NEXT:    s_lshl_b32 s2, s2, 1
4222; GFX9-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
4223; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4224; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
4225; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v1, s1
4226; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
4227; GFX9-NEXT:    ; return to shader part epilog
4228;
4229; GFX10-LABEL: v_fshr_v2i16_ssv:
4230; GFX10:       ; %bb.0:
4231; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
4232; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
4233; GFX10-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
4234; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
4235; GFX10-NEXT:    s_lshl_b32 s2, s2, 1
4236; GFX10-NEXT:    v_and_b32_e32 v1, 0xf000f, v1
4237; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4238; GFX10-NEXT:    v_pk_lshrrev_b16 v0, v0, s1
4239; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v1, s0
4240; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
4241; GFX10-NEXT:    ; return to shader part epilog
4242;
4243; GFX11-LABEL: v_fshr_v2i16_ssv:
4244; GFX11:       ; %bb.0:
4245; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
4246; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
4247; GFX11-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
4248; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
4249; GFX11-NEXT:    s_lshl_b32 s2, s2, 1
4250; GFX11-NEXT:    v_and_b32_e32 v1, 0xf000f, v1
4251; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4252; GFX11-NEXT:    v_pk_lshrrev_b16 v0, v0, s1
4253; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4254; GFX11-NEXT:    v_pk_lshlrev_b16 v1, v1, s0
4255; GFX11-NEXT:    v_or_b32_e32 v0, v1, v0
4256; GFX11-NEXT:    ; return to shader part epilog
4257  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4258  %cast = bitcast <2 x i16> %result to float
4259  ret float %cast
4260}
4261
4262define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
4263; GFX6-LABEL: v_fshr_v2i16_svs:
4264; GFX6:       ; %bb.0:
4265; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
4266; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
4267; GFX6-NEXT:    s_or_b32 s2, s3, s2
4268; GFX6-NEXT:    s_bfe_u32 s3, 1, 0x100000
4269; GFX6-NEXT:    v_bfe_u32 v2, v0, 1, 15
4270; GFX6-NEXT:    s_bfe_u32 s4, 14, 0x100000
4271; GFX6-NEXT:    s_lshl_b32 s0, s0, s3
4272; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
4273; GFX6-NEXT:    v_bfe_u32 v3, v1, 1, 15
4274; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
4275; GFX6-NEXT:    s_lshl_b32 s0, s1, s3
4276; GFX6-NEXT:    v_lshrrev_b32_e32 v3, s4, v3
4277; GFX6-NEXT:    v_or_b32_e32 v3, s0, v3
4278; GFX6-NEXT:    s_xor_b32 s0, s2, -1
4279; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
4280; GFX6-NEXT:    s_lshr_b32 s1, s0, 16
4281; GFX6-NEXT:    s_and_b32 s2, s0, 15
4282; GFX6-NEXT:    s_andn2_b32 s0, 15, s0
4283; GFX6-NEXT:    v_bfe_u32 v0, v0, 1, 15
4284; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
4285; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
4286; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
4287; GFX6-NEXT:    s_and_b32 s0, s1, 15
4288; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
4289; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s2, v2
4290; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
4291; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
4292; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
4293; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s0, v3
4294; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 15
4295; GFX6-NEXT:    s_bfe_u32 s0, s1, 0x100000
4296; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s0, v1
4297; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
4298; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
4299; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
4300; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4301; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4302; GFX6-NEXT:    ; return to shader part epilog
4303;
4304; GFX8-LABEL: v_fshr_v2i16_svs:
4305; GFX8:       ; %bb.0:
4306; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
4307; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4308; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
4309; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 15, v0
4310; GFX8-NEXT:    v_mov_b32_e32 v2, 15
4311; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
4312; GFX8-NEXT:    s_lshl_b32 s0, s2, s3
4313; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4314; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
4315; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
4316; GFX8-NEXT:    v_mov_b32_e32 v4, 1
4317; GFX8-NEXT:    s_xor_b32 s0, s1, -1
4318; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4319; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
4320; GFX8-NEXT:    s_and_b32 s2, s0, 15
4321; GFX8-NEXT:    s_andn2_b32 s0, 15, s0
4322; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
4323; GFX8-NEXT:    v_lshrrev_b16_e32 v3, s0, v3
4324; GFX8-NEXT:    s_and_b32 s0, s1, 15
4325; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
4326; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 1, v0
4327; GFX8-NEXT:    v_lshlrev_b16_e32 v2, s0, v2
4328; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
4329; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s2, v1
4330; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
4331; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4332; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4333; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4334; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4335; GFX8-NEXT:    ; return to shader part epilog
4336;
4337; GFX9-LABEL: v_fshr_v2i16_svs:
4338; GFX9:       ; %bb.0:
4339; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
4340; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
4341; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
4342; GFX9-NEXT:    s_and_b32 s2, s1, 0xf000f
4343; GFX9-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4344; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
4345; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
4346; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
4347; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
4348; GFX9-NEXT:    s_lshl_b32 s1, s3, s4
4349; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4350; GFX9-NEXT:    v_pk_lshrrev_b16 v0, s2, v0
4351; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4352; GFX9-NEXT:    ; return to shader part epilog
4353;
4354; GFX10-LABEL: v_fshr_v2i16_svs:
4355; GFX10:       ; %bb.0:
4356; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
4357; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
4358; GFX10-NEXT:    s_lshl_b32 s2, s2, 1
4359; GFX10-NEXT:    s_and_b32 s3, s1, 0xf000f
4360; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4361; GFX10-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4362; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
4363; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
4364; GFX10-NEXT:    v_pk_lshrrev_b16 v0, s3, v0
4365; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
4366; GFX10-NEXT:    s_lshl_b32 s1, s2, s4
4367; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4368; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4369; GFX10-NEXT:    ; return to shader part epilog
4370;
4371; GFX11-LABEL: v_fshr_v2i16_svs:
4372; GFX11:       ; %bb.0:
4373; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
4374; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
4375; GFX11-NEXT:    s_lshl_b32 s2, s2, 1
4376; GFX11-NEXT:    s_and_b32 s3, s1, 0xf000f
4377; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4378; GFX11-NEXT:    s_and_not1_b32 s1, 0xf000f, s1
4379; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
4380; GFX11-NEXT:    s_lshr_b32 s4, s1, 16
4381; GFX11-NEXT:    v_pk_lshrrev_b16 v0, s3, v0
4382; GFX11-NEXT:    s_lshl_b32 s0, s0, s1
4383; GFX11-NEXT:    s_lshl_b32 s1, s2, s4
4384; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4385; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4386; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4387; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
4388; GFX11-NEXT:    ; return to shader part epilog
4389  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4390  %cast = bitcast <2 x i16> %result to float
4391  ret float %cast
4392}
4393
4394define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
4395; GFX6-LABEL: v_fshr_v2i16_vss:
4396; GFX6:       ; %bb.0:
4397; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
4398; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
4399; GFX6-NEXT:    s_or_b32 s2, s3, s2
4400; GFX6-NEXT:    s_bfe_u32 s3, 1, 0x100000
4401; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s3, v0
4402; GFX6-NEXT:    s_bfe_u32 s4, s0, 0xf0001
4403; GFX6-NEXT:    s_bfe_u32 s5, 14, 0x100000
4404; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s3, v1
4405; GFX6-NEXT:    s_bfe_u32 s3, s1, 0xf0001
4406; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
4407; GFX6-NEXT:    s_lshr_b32 s3, s3, s5
4408; GFX6-NEXT:    s_xor_b32 s2, s2, -1
4409; GFX6-NEXT:    v_or_b32_e32 v0, s4, v0
4410; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
4411; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
4412; GFX6-NEXT:    s_lshr_b32 s3, s2, 16
4413; GFX6-NEXT:    s_and_b32 s4, s2, 15
4414; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
4415; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
4416; GFX6-NEXT:    s_bfe_u32 s0, s0, 0xf0001
4417; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
4418; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
4419; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
4420; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4421; GFX6-NEXT:    s_and_b32 s0, s3, 15
4422; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
4423; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
4424; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
4425; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s0, v1
4426; GFX6-NEXT:    s_bfe_u32 s0, s1, 0xf0001
4427; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
4428; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
4429; GFX6-NEXT:    v_or_b32_e32 v1, s0, v1
4430; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
4431; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
4432; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4433; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4434; GFX6-NEXT:    ; return to shader part epilog
4435;
4436; GFX8-LABEL: v_fshr_v2i16_vss:
4437; GFX8:       ; %bb.0:
4438; GFX8-NEXT:    s_bfe_u32 s3, s0, 0x100000
4439; GFX8-NEXT:    s_bfe_u32 s4, 15, 0x100000
4440; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4441; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v0
4442; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
4443; GFX8-NEXT:    v_mov_b32_e32 v2, 1
4444; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
4445; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4446; GFX8-NEXT:    s_lshr_b32 s3, s2, s4
4447; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
4448; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
4449; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
4450; GFX8-NEXT:    s_xor_b32 s1, s1, -1
4451; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
4452; GFX8-NEXT:    s_and_b32 s5, s1, 15
4453; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
4454; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
4455; GFX8-NEXT:    s_lshr_b32 s0, s0, s3
4456; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
4457; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s5, v1
4458; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
4459; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
4460; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
4461; GFX8-NEXT:    s_and_b32 s0, s4, 15
4462; GFX8-NEXT:    s_andn2_b32 s1, 15, s4
4463; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s0, v0
4464; GFX8-NEXT:    s_bfe_u32 s0, s2, 0x100000
4465; GFX8-NEXT:    s_lshr_b32 s0, s0, s3
4466; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
4467; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
4468; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4469; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4470; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4471; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4472; GFX8-NEXT:    ; return to shader part epilog
4473;
4474; GFX9-LABEL: v_fshr_v2i16_vss:
4475; GFX9:       ; %bb.0:
4476; GFX9-NEXT:    s_and_b32 s2, s1, 0xf000f
4477; GFX9-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4478; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4479; GFX9-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
4480; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
4481; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
4482; GFX9-NEXT:    s_lshr_b32 s3, s2, 16
4483; GFX9-NEXT:    s_lshr_b32 s0, s0, s2
4484; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
4485; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4486; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4487; GFX9-NEXT:    ; return to shader part epilog
4488;
4489; GFX10-LABEL: v_fshr_v2i16_vss:
4490; GFX10:       ; %bb.0:
4491; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4492; GFX10-NEXT:    s_and_b32 s2, s1, 0xf000f
4493; GFX10-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4494; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
4495; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
4496; GFX10-NEXT:    s_lshr_b32 s4, s2, 16
4497; GFX10-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
4498; GFX10-NEXT:    s_lshr_b32 s0, s0, s2
4499; GFX10-NEXT:    s_lshr_b32 s1, s3, s4
4500; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4501; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4502; GFX10-NEXT:    ; return to shader part epilog
4503;
4504; GFX11-LABEL: v_fshr_v2i16_vss:
4505; GFX11:       ; %bb.0:
4506; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4507; GFX11-NEXT:    s_and_b32 s2, s1, 0xf000f
4508; GFX11-NEXT:    s_and_not1_b32 s1, 0xf000f, s1
4509; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
4510; GFX11-NEXT:    s_and_b32 s0, s0, 0xffff
4511; GFX11-NEXT:    s_lshr_b32 s4, s2, 16
4512; GFX11-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
4513; GFX11-NEXT:    s_lshr_b32 s0, s0, s2
4514; GFX11-NEXT:    s_lshr_b32 s1, s3, s4
4515; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4516; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4517; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4518; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
4519; GFX11-NEXT:    ; return to shader part epilog
4520  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4521  %cast = bitcast <2 x i16> %result to float
4522  ret float %cast
4523}
4524
4525; ; FIXME
4526; define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
4527;   %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4528;   %cast = bitcast <3 x i16> %result to i48
4529;   ret i48 %cast
4530; }
4531
4532; ; FIXME
4533; define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
4534;   %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4535;   %cast.result = bitcast <3 x i16> %result to <3 x half>
4536;   ret <3 x half> %cast.result
4537; }
4538
4539define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
4540; GFX6-LABEL: s_fshr_v4i16:
4541; GFX6:       ; %bb.0:
4542; GFX6-NEXT:    s_lshl_b32 s9, s9, 16
4543; GFX6-NEXT:    s_and_b32 s8, s8, 0xffff
4544; GFX6-NEXT:    s_or_b32 s8, s9, s8
4545; GFX6-NEXT:    s_lshl_b32 s9, s11, 16
4546; GFX6-NEXT:    s_and_b32 s10, s10, 0xffff
4547; GFX6-NEXT:    s_or_b32 s9, s9, s10
4548; GFX6-NEXT:    s_bfe_u32 s10, 1, 0x100000
4549; GFX6-NEXT:    s_bfe_u32 s11, s4, 0xf0001
4550; GFX6-NEXT:    s_bfe_u32 s12, 14, 0x100000
4551; GFX6-NEXT:    s_lshl_b32 s0, s0, s10
4552; GFX6-NEXT:    s_lshr_b32 s11, s11, s12
4553; GFX6-NEXT:    s_or_b32 s0, s0, s11
4554; GFX6-NEXT:    s_bfe_u32 s11, s5, 0xf0001
4555; GFX6-NEXT:    s_lshl_b32 s1, s1, s10
4556; GFX6-NEXT:    s_lshr_b32 s11, s11, s12
4557; GFX6-NEXT:    s_xor_b32 s8, s8, -1
4558; GFX6-NEXT:    s_or_b32 s1, s1, s11
4559; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
4560; GFX6-NEXT:    s_lshr_b32 s11, s8, 16
4561; GFX6-NEXT:    s_and_b32 s13, s8, 15
4562; GFX6-NEXT:    s_andn2_b32 s8, 15, s8
4563; GFX6-NEXT:    s_bfe_u32 s13, s13, 0x100000
4564; GFX6-NEXT:    s_bfe_u32 s4, s4, 0xf0001
4565; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
4566; GFX6-NEXT:    s_lshl_b32 s0, s0, s13
4567; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
4568; GFX6-NEXT:    s_or_b32 s0, s0, s4
4569; GFX6-NEXT:    s_and_b32 s4, s11, 15
4570; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
4571; GFX6-NEXT:    s_andn2_b32 s8, 15, s11
4572; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
4573; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
4574; GFX6-NEXT:    s_bfe_u32 s4, s5, 0xf0001
4575; GFX6-NEXT:    s_bfe_u32 s5, s8, 0x100000
4576; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
4577; GFX6-NEXT:    s_or_b32 s1, s1, s4
4578; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
4579; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
4580; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
4581; GFX6-NEXT:    s_or_b32 s0, s0, s1
4582; GFX6-NEXT:    s_lshl_b32 s1, s2, s10
4583; GFX6-NEXT:    s_bfe_u32 s2, s6, 0xf0001
4584; GFX6-NEXT:    s_lshr_b32 s2, s2, s12
4585; GFX6-NEXT:    s_or_b32 s1, s1, s2
4586; GFX6-NEXT:    s_lshl_b32 s2, s3, s10
4587; GFX6-NEXT:    s_bfe_u32 s3, s7, 0xf0001
4588; GFX6-NEXT:    s_lshr_b32 s3, s3, s12
4589; GFX6-NEXT:    s_xor_b32 s5, s9, -1
4590; GFX6-NEXT:    s_or_b32 s2, s2, s3
4591; GFX6-NEXT:    s_lshl_b32 s3, s6, 1
4592; GFX6-NEXT:    s_lshl_b32 s4, s7, 1
4593; GFX6-NEXT:    s_lshr_b32 s6, s5, 16
4594; GFX6-NEXT:    s_and_b32 s7, s5, 15
4595; GFX6-NEXT:    s_andn2_b32 s5, 15, s5
4596; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
4597; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
4598; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
4599; GFX6-NEXT:    s_lshl_b32 s1, s1, s7
4600; GFX6-NEXT:    s_lshr_b32 s3, s3, s5
4601; GFX6-NEXT:    s_or_b32 s1, s1, s3
4602; GFX6-NEXT:    s_and_b32 s3, s6, 15
4603; GFX6-NEXT:    s_andn2_b32 s5, 15, s6
4604; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x100000
4605; GFX6-NEXT:    s_lshl_b32 s2, s2, s3
4606; GFX6-NEXT:    s_bfe_u32 s3, s4, 0xf0001
4607; GFX6-NEXT:    s_bfe_u32 s4, s5, 0x100000
4608; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
4609; GFX6-NEXT:    s_or_b32 s2, s2, s3
4610; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
4611; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
4612; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
4613; GFX6-NEXT:    s_or_b32 s1, s1, s2
4614; GFX6-NEXT:    ; return to shader part epilog
4615;
4616; GFX8-LABEL: s_fshr_v4i16:
4617; GFX8:       ; %bb.0:
4618; GFX8-NEXT:    s_bfe_u32 s8, 1, 0x100000
4619; GFX8-NEXT:    s_bfe_u32 s9, s2, 0x100000
4620; GFX8-NEXT:    s_bfe_u32 s10, 15, 0x100000
4621; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
4622; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
4623; GFX8-NEXT:    s_lshl_b32 s0, s0, s8
4624; GFX8-NEXT:    s_lshr_b32 s9, s9, s10
4625; GFX8-NEXT:    s_or_b32 s0, s0, s9
4626; GFX8-NEXT:    s_lshl_b32 s6, s6, s8
4627; GFX8-NEXT:    s_lshr_b32 s9, s7, s10
4628; GFX8-NEXT:    s_lshl_b32 s2, s2, s8
4629; GFX8-NEXT:    s_xor_b32 s4, s4, -1
4630; GFX8-NEXT:    s_or_b32 s6, s6, s9
4631; GFX8-NEXT:    s_lshr_b32 s9, s4, 16
4632; GFX8-NEXT:    s_and_b32 s11, s4, 15
4633; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
4634; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
4635; GFX8-NEXT:    s_bfe_u32 s11, s11, 0x100000
4636; GFX8-NEXT:    s_lshr_b32 s2, s2, s8
4637; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
4638; GFX8-NEXT:    s_lshl_b32 s0, s0, s11
4639; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
4640; GFX8-NEXT:    s_or_b32 s0, s0, s2
4641; GFX8-NEXT:    s_and_b32 s2, s9, 15
4642; GFX8-NEXT:    s_lshl_b32 s7, s7, s8
4643; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
4644; GFX8-NEXT:    s_andn2_b32 s4, 15, s9
4645; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
4646; GFX8-NEXT:    s_bfe_u32 s6, s7, 0x100000
4647; GFX8-NEXT:    s_lshr_b32 s6, s6, s8
4648; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
4649; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
4650; GFX8-NEXT:    s_or_b32 s2, s2, s4
4651; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
4652; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
4653; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4654; GFX8-NEXT:    s_bfe_u32 s6, s3, 0x100000
4655; GFX8-NEXT:    s_or_b32 s0, s0, s2
4656; GFX8-NEXT:    s_lshr_b32 s2, s1, 16
4657; GFX8-NEXT:    s_lshr_b32 s4, s3, 16
4658; GFX8-NEXT:    s_lshl_b32 s1, s1, s8
4659; GFX8-NEXT:    s_lshr_b32 s6, s6, s10
4660; GFX8-NEXT:    s_or_b32 s1, s1, s6
4661; GFX8-NEXT:    s_lshl_b32 s2, s2, s8
4662; GFX8-NEXT:    s_lshr_b32 s6, s4, s10
4663; GFX8-NEXT:    s_lshl_b32 s3, s3, s8
4664; GFX8-NEXT:    s_xor_b32 s5, s5, -1
4665; GFX8-NEXT:    s_or_b32 s2, s2, s6
4666; GFX8-NEXT:    s_lshr_b32 s6, s5, 16
4667; GFX8-NEXT:    s_and_b32 s7, s5, 15
4668; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
4669; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
4670; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
4671; GFX8-NEXT:    s_lshr_b32 s3, s3, s8
4672; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
4673; GFX8-NEXT:    s_lshl_b32 s1, s1, s7
4674; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
4675; GFX8-NEXT:    s_or_b32 s1, s1, s3
4676; GFX8-NEXT:    s_and_b32 s3, s6, 15
4677; GFX8-NEXT:    s_lshl_b32 s4, s4, s8
4678; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
4679; GFX8-NEXT:    s_andn2_b32 s5, 15, s6
4680; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
4681; GFX8-NEXT:    s_bfe_u32 s3, s4, 0x100000
4682; GFX8-NEXT:    s_lshr_b32 s3, s3, s8
4683; GFX8-NEXT:    s_bfe_u32 s4, s5, 0x100000
4684; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
4685; GFX8-NEXT:    s_or_b32 s2, s2, s3
4686; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
4687; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
4688; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4689; GFX8-NEXT:    s_or_b32 s1, s1, s2
4690; GFX8-NEXT:    ; return to shader part epilog
4691;
4692; GFX9-LABEL: s_fshr_v4i16:
4693; GFX9:       ; %bb.0:
4694; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4695; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
4696; GFX9-NEXT:    s_lshl_b32 s7, s7, 1
4697; GFX9-NEXT:    s_and_b32 s6, s4, 0xf000f
4698; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4699; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s7
4700; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4701; GFX9-NEXT:    s_lshr_b32 s8, s4, 16
4702; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
4703; GFX9-NEXT:    s_lshl_b32 s4, s7, s8
4704; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4705; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
4706; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4707; GFX9-NEXT:    s_lshr_b32 s7, s6, 16
4708; GFX9-NEXT:    s_lshr_b32 s2, s2, s6
4709; GFX9-NEXT:    s_lshr_b32 s4, s4, s7
4710; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4711; GFX9-NEXT:    s_or_b32 s0, s0, s2
4712; GFX9-NEXT:    s_and_b32 s2, s5, 0xf000f
4713; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4714; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4715; GFX9-NEXT:    s_lshl_b32 s1, s1, 0x10001
4716; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
4717; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s5
4718; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4719; GFX9-NEXT:    s_lshr_b32 s6, s4, 16
4720; GFX9-NEXT:    s_lshl_b32 s1, s1, s4
4721; GFX9-NEXT:    s_lshl_b32 s4, s5, s6
4722; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4723; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
4724; GFX9-NEXT:    s_and_b32 s3, s3, 0xffff
4725; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
4726; GFX9-NEXT:    s_lshr_b32 s2, s3, s2
4727; GFX9-NEXT:    s_lshr_b32 s3, s4, s5
4728; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
4729; GFX9-NEXT:    s_or_b32 s1, s1, s2
4730; GFX9-NEXT:    ; return to shader part epilog
4731;
4732; GFX10-LABEL: s_fshr_v4i16:
4733; GFX10:       ; %bb.0:
4734; GFX10-NEXT:    s_lshr_b32 s6, s0, 16
4735; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
4736; GFX10-NEXT:    s_lshl_b32 s6, s6, 1
4737; GFX10-NEXT:    s_and_b32 s7, s4, 0xf000f
4738; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4739; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4740; GFX10-NEXT:    s_lshr_b32 s6, s0, 16
4741; GFX10-NEXT:    s_lshr_b32 s8, s4, 16
4742; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
4743; GFX10-NEXT:    s_lshl_b32 s4, s6, s8
4744; GFX10-NEXT:    s_lshr_b32 s6, s2, 16
4745; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4746; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
4747; GFX10-NEXT:    s_and_b32 s2, s2, 0xffff
4748; GFX10-NEXT:    s_lshr_b32 s8, s7, 16
4749; GFX10-NEXT:    s_lshl_b32 s1, s1, 0x10001
4750; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
4751; GFX10-NEXT:    s_lshr_b32 s2, s2, s7
4752; GFX10-NEXT:    s_lshr_b32 s6, s6, s8
4753; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4754; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4755; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4756; GFX10-NEXT:    s_and_b32 s6, s5, 0xf000f
4757; GFX10-NEXT:    s_lshr_b32 s5, s1, 16
4758; GFX10-NEXT:    s_lshr_b32 s7, s4, 16
4759; GFX10-NEXT:    s_lshl_b32 s1, s1, s4
4760; GFX10-NEXT:    s_lshl_b32 s4, s5, s7
4761; GFX10-NEXT:    s_lshr_b32 s5, s3, 16
4762; GFX10-NEXT:    s_and_b32 s3, s3, 0xffff
4763; GFX10-NEXT:    s_lshr_b32 s7, s6, 16
4764; GFX10-NEXT:    s_lshr_b32 s3, s3, s6
4765; GFX10-NEXT:    s_lshr_b32 s5, s5, s7
4766; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4767; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
4768; GFX10-NEXT:    s_or_b32 s0, s0, s2
4769; GFX10-NEXT:    s_or_b32 s1, s1, s3
4770; GFX10-NEXT:    ; return to shader part epilog
4771;
4772; GFX11-LABEL: s_fshr_v4i16:
4773; GFX11:       ; %bb.0:
4774; GFX11-NEXT:    s_lshr_b32 s6, s0, 16
4775; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
4776; GFX11-NEXT:    s_lshl_b32 s6, s6, 1
4777; GFX11-NEXT:    s_and_b32 s7, s4, 0xf000f
4778; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4779; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s4
4780; GFX11-NEXT:    s_lshr_b32 s6, s0, 16
4781; GFX11-NEXT:    s_lshr_b32 s8, s4, 16
4782; GFX11-NEXT:    s_lshl_b32 s0, s0, s4
4783; GFX11-NEXT:    s_lshl_b32 s4, s6, s8
4784; GFX11-NEXT:    s_lshr_b32 s6, s2, 16
4785; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4786; GFX11-NEXT:    s_lshr_b32 s4, s1, 16
4787; GFX11-NEXT:    s_and_b32 s2, s2, 0xffff
4788; GFX11-NEXT:    s_lshr_b32 s8, s7, 16
4789; GFX11-NEXT:    s_lshl_b32 s1, s1, 0x10001
4790; GFX11-NEXT:    s_lshl_b32 s4, s4, 1
4791; GFX11-NEXT:    s_lshr_b32 s2, s2, s7
4792; GFX11-NEXT:    s_lshr_b32 s6, s6, s8
4793; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4794; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s5
4795; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4796; GFX11-NEXT:    s_and_b32 s6, s5, 0xf000f
4797; GFX11-NEXT:    s_lshr_b32 s5, s1, 16
4798; GFX11-NEXT:    s_lshr_b32 s7, s4, 16
4799; GFX11-NEXT:    s_lshl_b32 s1, s1, s4
4800; GFX11-NEXT:    s_lshl_b32 s4, s5, s7
4801; GFX11-NEXT:    s_lshr_b32 s5, s3, 16
4802; GFX11-NEXT:    s_and_b32 s3, s3, 0xffff
4803; GFX11-NEXT:    s_lshr_b32 s7, s6, 16
4804; GFX11-NEXT:    s_lshr_b32 s3, s3, s6
4805; GFX11-NEXT:    s_lshr_b32 s5, s5, s7
4806; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4807; GFX11-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
4808; GFX11-NEXT:    s_or_b32 s0, s0, s2
4809; GFX11-NEXT:    s_or_b32 s1, s1, s3
4810; GFX11-NEXT:    ; return to shader part epilog
4811  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4812  %cast.result = bitcast <4 x i16> %result to <2 x i32>
4813  ret <2 x i32> %cast.result
4814}
4815
4816define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
4817; GFX6-LABEL: v_fshr_v4i16:
4818; GFX6:       ; %bb.0:
4819; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4820; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
4821; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
4822; GFX6-NEXT:    v_or_b32_e32 v8, v9, v8
4823; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v11
4824; GFX6-NEXT:    v_and_b32_e32 v10, 0xffff, v10
4825; GFX6-NEXT:    v_or_b32_e32 v9, v9, v10
4826; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
4827; GFX6-NEXT:    v_bfe_u32 v10, v4, 1, 15
4828; GFX6-NEXT:    s_bfe_u32 s5, 14, 0x100000
4829; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
4830; GFX6-NEXT:    v_lshrrev_b32_e32 v10, s5, v10
4831; GFX6-NEXT:    v_or_b32_e32 v0, v0, v10
4832; GFX6-NEXT:    v_bfe_u32 v10, v5, 1, 15
4833; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
4834; GFX6-NEXT:    v_lshrrev_b32_e32 v10, s5, v10
4835; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4836; GFX6-NEXT:    v_or_b32_e32 v1, v1, v10
4837; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
4838; GFX6-NEXT:    v_and_b32_e32 v11, 15, v8
4839; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4840; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
4841; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
4842; GFX6-NEXT:    v_bfe_u32 v11, v11, 0, 16
4843; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
4844; GFX6-NEXT:    v_bfe_u32 v8, v8, 0, 16
4845; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v11, v0
4846; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
4847; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
4848; GFX6-NEXT:    v_and_b32_e32 v4, 15, v10
4849; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
4850; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
4851; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
4852; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
4853; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
4854; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
4855; GFX6-NEXT:    v_bfe_u32 v5, v8, 0, 16
4856; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
4857; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
4858; GFX6-NEXT:    v_bfe_u32 v4, v6, 1, 15
4859; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s4, v2
4860; GFX6-NEXT:    v_lshrrev_b32_e32 v4, s5, v4
4861; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
4862; GFX6-NEXT:    v_bfe_u32 v4, v7, 1, 15
4863; GFX6-NEXT:    v_lshlrev_b32_e32 v3, s4, v3
4864; GFX6-NEXT:    v_lshrrev_b32_e32 v4, s5, v4
4865; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
4866; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v6
4867; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v9
4868; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v7
4869; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
4870; GFX6-NEXT:    v_and_b32_e32 v8, 15, v6
4871; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
4872; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4873; GFX6-NEXT:    v_bfe_u32 v8, v8, 0, 16
4874; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
4875; GFX6-NEXT:    v_bfe_u32 v6, v6, 0, 16
4876; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v8, v2
4877; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v6, v4
4878; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
4879; GFX6-NEXT:    v_and_b32_e32 v4, 15, v7
4880; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v7
4881; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4882; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
4883; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
4884; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
4885; GFX6-NEXT:    v_bfe_u32 v5, v6, 0, 16
4886; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
4887; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
4888; GFX6-NEXT:    s_setpc_b64 s[30:31]
4889;
4890; GFX8-LABEL: v_fshr_v4i16:
4891; GFX8:       ; %bb.0:
4892; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4893; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
4894; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 15, v2
4895; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
4896; GFX8-NEXT:    v_mov_b32_e32 v7, 1
4897; GFX8-NEXT:    v_mov_b32_e32 v8, 15
4898; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4899; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4900; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4901; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
4902; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v2
4903; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4904; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
4905; GFX8-NEXT:    v_and_b32_e32 v10, 15, v4
4906; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4907; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
4908; GFX8-NEXT:    v_lshrrev_b16_e32 v9, 1, v9
4909; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v10, v6
4910; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v9
4911; GFX8-NEXT:    v_or_b32_e32 v4, v6, v4
4912; GFX8-NEXT:    v_and_b32_e32 v6, 15, v7
4913; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v7
4914; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
4915; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
4916; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v6, v0
4917; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v7, v2
4918; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4919; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4920; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4921; GFX8-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4922; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 1, v1
4923; GFX8-NEXT:    v_lshrrev_b16_e32 v6, 15, v3
4924; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
4925; GFX8-NEXT:    v_mov_b32_e32 v6, 1
4926; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4927; GFX8-NEXT:    v_lshrrev_b16_sdwa v7, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4928; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
4929; GFX8-NEXT:    v_or_b32_e32 v1, v1, v7
4930; GFX8-NEXT:    v_lshlrev_b16_e32 v7, 1, v3
4931; GFX8-NEXT:    v_lshlrev_b16_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4932; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
4933; GFX8-NEXT:    v_and_b32_e32 v8, 15, v5
4934; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
4935; GFX8-NEXT:    v_and_b32_e32 v5, 15, v5
4936; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 1, v7
4937; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v8, v4
4938; GFX8-NEXT:    v_lshrrev_b16_e32 v5, v5, v7
4939; GFX8-NEXT:    v_or_b32_e32 v4, v4, v5
4940; GFX8-NEXT:    v_and_b32_e32 v5, 15, v6
4941; GFX8-NEXT:    v_xor_b32_e32 v6, -1, v6
4942; GFX8-NEXT:    v_and_b32_e32 v6, 15, v6
4943; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
4944; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v5, v1
4945; GFX8-NEXT:    v_lshrrev_b16_e32 v3, v6, v3
4946; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4947; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4948; GFX8-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4949; GFX8-NEXT:    s_setpc_b64 s[30:31]
4950;
4951; GFX9-LABEL: v_fshr_v4i16:
4952; GFX9:       ; %bb.0:
4953; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4954; GFX9-NEXT:    v_and_b32_e32 v6, 0xf000f, v4
4955; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
4956; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4957; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4958; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
4959; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
4960; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v5
4961; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4962; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v5
4963; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4964; GFX9-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4965; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v4, v1
4966; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v2, v3
4967; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
4968; GFX9-NEXT:    s_setpc_b64 s[30:31]
4969;
4970; GFX10-LABEL: v_fshr_v4i16:
4971; GFX10:       ; %bb.0:
4972; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4973; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4974; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
4975; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v5
4976; GFX10-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4977; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4978; GFX10-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
4979; GFX10-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
4980; GFX10-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4981; GFX10-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
4982; GFX10-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
4983; GFX10-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
4984; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
4985; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
4986; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
4987; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
4988; GFX10-NEXT:    s_setpc_b64 s[30:31]
4989;
4990; GFX11-LABEL: v_fshr_v4i16:
4991; GFX11:       ; %bb.0:
4992; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4993; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4994; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v4
4995; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v5
4996; GFX11-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4997; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4998; GFX11-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
4999; GFX11-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
5000; GFX11-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5001; GFX11-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
5002; GFX11-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
5003; GFX11-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
5004; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
5005; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5006; GFX11-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
5007; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5008; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5009; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
5010; GFX11-NEXT:    s_setpc_b64 s[30:31]
5011  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
5012  %cast.result = bitcast <4 x i16> %result to <4 x half>
5013  ret <4 x half> %cast.result
5014}
5015
5016define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
5017; GFX6-LABEL: s_fshr_i64:
5018; GFX6:       ; %bb.0:
5019; GFX6-NEXT:    s_and_b64 s[6:7], s[4:5], 63
5020; GFX6-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
5021; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5022; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5023; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
5024; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5025; GFX6-NEXT:    ; return to shader part epilog
5026;
5027; GFX8-LABEL: s_fshr_i64:
5028; GFX8:       ; %bb.0:
5029; GFX8-NEXT:    s_and_b64 s[6:7], s[4:5], 63
5030; GFX8-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
5031; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5032; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5033; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
5034; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5035; GFX8-NEXT:    ; return to shader part epilog
5036;
5037; GFX9-LABEL: s_fshr_i64:
5038; GFX9:       ; %bb.0:
5039; GFX9-NEXT:    s_and_b64 s[6:7], s[4:5], 63
5040; GFX9-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
5041; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5042; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5043; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
5044; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5045; GFX9-NEXT:    ; return to shader part epilog
5046;
5047; GFX10-LABEL: s_fshr_i64:
5048; GFX10:       ; %bb.0:
5049; GFX10-NEXT:    s_andn2_b64 s[6:7], 63, s[4:5]
5050; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5051; GFX10-NEXT:    s_and_b64 s[4:5], s[4:5], 63
5052; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s6
5053; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5054; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5055; GFX10-NEXT:    ; return to shader part epilog
5056;
5057; GFX11-LABEL: s_fshr_i64:
5058; GFX11:       ; %bb.0:
5059; GFX11-NEXT:    s_and_not1_b64 s[6:7], 63, s[4:5]
5060; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5061; GFX11-NEXT:    s_and_b64 s[4:5], s[4:5], 63
5062; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s6
5063; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5064; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5065; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5066; GFX11-NEXT:    ; return to shader part epilog
5067  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5068  ret i64 %result
5069}
5070
5071define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
5072; GCN-LABEL: s_fshr_i64_5:
5073; GCN:       ; %bb.0:
5074; GCN-NEXT:    s_lshl_b32 s1, s0, 27
5075; GCN-NEXT:    s_mov_b32 s0, 0
5076; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], 5
5077; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5078; GCN-NEXT:    ; return to shader part epilog
5079;
5080; GFX11-LABEL: s_fshr_i64_5:
5081; GFX11:       ; %bb.0:
5082; GFX11-NEXT:    s_lshl_b32 s1, s0, 27
5083; GFX11-NEXT:    s_mov_b32 s0, 0
5084; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], 5
5085; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5086; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5087; GFX11-NEXT:    ; return to shader part epilog
5088  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5089  ret i64 %result
5090}
5091
5092define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
5093; GCN-LABEL: s_fshr_i64_32:
5094; GCN:       ; %bb.0:
5095; GCN-NEXT:    s_mov_b32 s1, s0
5096; GCN-NEXT:    s_mov_b32 s0, 0
5097; GCN-NEXT:    s_mov_b32 s2, s3
5098; GCN-NEXT:    s_mov_b32 s3, s0
5099; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5100; GCN-NEXT:    ; return to shader part epilog
5101;
5102; GFX11-LABEL: s_fshr_i64_32:
5103; GFX11:       ; %bb.0:
5104; GFX11-NEXT:    s_mov_b32 s1, s0
5105; GFX11-NEXT:    s_mov_b32 s0, 0
5106; GFX11-NEXT:    s_mov_b32 s2, s3
5107; GFX11-NEXT:    s_mov_b32 s3, s0
5108; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5109; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5110; GFX11-NEXT:    ; return to shader part epilog
5111  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5112  ret i64 %result
5113}
5114
5115define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
5116; GCN-LABEL: s_fshr_i64_48:
5117; GCN:       ; %bb.0:
5118; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
5119; GCN-NEXT:    s_lshr_b32 s2, s3, 16
5120; GCN-NEXT:    s_mov_b32 s3, 0
5121; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5122; GCN-NEXT:    ; return to shader part epilog
5123;
5124; GFX11-LABEL: s_fshr_i64_48:
5125; GFX11:       ; %bb.0:
5126; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
5127; GFX11-NEXT:    s_lshr_b32 s2, s3, 16
5128; GFX11-NEXT:    s_mov_b32 s3, 0
5129; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5130; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5131; GFX11-NEXT:    ; return to shader part epilog
5132  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5133  ret i64 %result
5134}
5135
5136define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) {
5137; GFX6-LABEL: v_fshr_i64:
5138; GFX6:       ; %bb.0:
5139; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5140; GFX6-NEXT:    v_and_b32_e32 v5, 63, v4
5141; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
5142; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
5143; GFX6-NEXT:    v_and_b32_e32 v4, 63, v4
5144; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v4
5145; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v5
5146; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
5147; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
5148; GFX6-NEXT:    s_setpc_b64 s[30:31]
5149;
5150; GFX8-LABEL: v_fshr_i64:
5151; GFX8:       ; %bb.0:
5152; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5153; GFX8-NEXT:    v_and_b32_e32 v5, 63, v4
5154; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
5155; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5156; GFX8-NEXT:    v_and_b32_e32 v4, 63, v4
5157; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
5158; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
5159; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
5160; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
5161; GFX8-NEXT:    s_setpc_b64 s[30:31]
5162;
5163; GFX9-LABEL: v_fshr_i64:
5164; GFX9:       ; %bb.0:
5165; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5166; GFX9-NEXT:    v_and_b32_e32 v5, 63, v4
5167; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
5168; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5169; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
5170; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
5171; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
5172; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
5173; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
5174; GFX9-NEXT:    s_setpc_b64 s[30:31]
5175;
5176; GFX10-LABEL: v_fshr_i64:
5177; GFX10:       ; %bb.0:
5178; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5179; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5180; GFX10-NEXT:    v_xor_b32_e32 v5, -1, v4
5181; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5182; GFX10-NEXT:    v_and_b32_e32 v4, 63, v4
5183; GFX10-NEXT:    v_and_b32_e32 v5, 63, v5
5184; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5185; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5186; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
5187; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
5188; GFX10-NEXT:    s_setpc_b64 s[30:31]
5189;
5190; GFX11-LABEL: v_fshr_i64:
5191; GFX11:       ; %bb.0:
5192; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5193; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5194; GFX11-NEXT:    v_xor_b32_e32 v5, -1, v4
5195; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5196; GFX11-NEXT:    v_and_b32_e32 v4, 63, v4
5197; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5198; GFX11-NEXT:    v_and_b32_e32 v5, 63, v5
5199; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5200; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5201; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5202; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5203; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5204; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
5205; GFX11-NEXT:    s_setpc_b64 s[30:31]
5206  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5207  ret i64 %result
5208}
5209
5210define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) {
5211; GFX6-LABEL: v_fshr_i64_5:
5212; GFX6:       ; %bb.0:
5213; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5214; GFX6-NEXT:    v_mov_b32_e32 v4, v0
5215; GFX6-NEXT:    v_lshr_b64 v[0:1], v[2:3], 5
5216; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
5217; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
5218; GFX6-NEXT:    s_setpc_b64 s[30:31]
5219;
5220; GFX8-LABEL: v_fshr_i64_5:
5221; GFX8:       ; %bb.0:
5222; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5223; GFX8-NEXT:    v_mov_b32_e32 v4, v0
5224; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5225; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
5226; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
5227; GFX8-NEXT:    s_setpc_b64 s[30:31]
5228;
5229; GFX9-LABEL: v_fshr_i64_5:
5230; GFX9:       ; %bb.0:
5231; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5232; GFX9-NEXT:    v_mov_b32_e32 v4, v0
5233; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5234; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
5235; GFX9-NEXT:    v_or_b32_e32 v1, v2, v1
5236; GFX9-NEXT:    s_setpc_b64 s[30:31]
5237;
5238; GFX10-LABEL: v_fshr_i64_5:
5239; GFX10:       ; %bb.0:
5240; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5241; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5242; GFX10-NEXT:    v_mov_b32_e32 v4, v0
5243; GFX10-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5244; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
5245; GFX10-NEXT:    v_or_b32_e32 v1, v2, v1
5246; GFX10-NEXT:    s_setpc_b64 s[30:31]
5247;
5248; GFX11-LABEL: v_fshr_i64_5:
5249; GFX11:       ; %bb.0:
5250; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5251; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5252; GFX11-NEXT:    v_mov_b32_e32 v4, v0
5253; GFX11-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5254; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5255; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
5256; GFX11-NEXT:    v_or_b32_e32 v1, v2, v1
5257; GFX11-NEXT:    s_setpc_b64 s[30:31]
5258  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5259  ret i64 %result
5260}
5261
5262define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) {
5263; GFX6-LABEL: v_fshr_i64_32:
5264; GFX6:       ; %bb.0:
5265; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5266; GFX6-NEXT:    v_mov_b32_e32 v1, v0
5267; GFX6-NEXT:    v_mov_b32_e32 v0, v3
5268; GFX6-NEXT:    s_setpc_b64 s[30:31]
5269;
5270; GFX8-LABEL: v_fshr_i64_32:
5271; GFX8:       ; %bb.0:
5272; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5273; GFX8-NEXT:    v_mov_b32_e32 v1, v0
5274; GFX8-NEXT:    v_mov_b32_e32 v0, v3
5275; GFX8-NEXT:    s_setpc_b64 s[30:31]
5276;
5277; GFX9-LABEL: v_fshr_i64_32:
5278; GFX9:       ; %bb.0:
5279; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5280; GFX9-NEXT:    v_mov_b32_e32 v1, v0
5281; GFX9-NEXT:    v_mov_b32_e32 v0, v3
5282; GFX9-NEXT:    s_setpc_b64 s[30:31]
5283;
5284; GFX10-LABEL: v_fshr_i64_32:
5285; GFX10:       ; %bb.0:
5286; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5287; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5288; GFX10-NEXT:    v_mov_b32_e32 v1, v0
5289; GFX10-NEXT:    v_mov_b32_e32 v0, v3
5290; GFX10-NEXT:    s_setpc_b64 s[30:31]
5291;
5292; GFX11-LABEL: v_fshr_i64_32:
5293; GFX11:       ; %bb.0:
5294; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5295; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5296; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
5297; GFX11-NEXT:    s_setpc_b64 s[30:31]
5298  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5299  ret i64 %result
5300}
5301
5302define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) {
5303; GFX6-LABEL: v_fshr_i64_48:
5304; GFX6:       ; %bb.0:
5305; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5306; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 16
5307; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
5308; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
5309; GFX6-NEXT:    s_setpc_b64 s[30:31]
5310;
5311; GFX8-LABEL: v_fshr_i64_48:
5312; GFX8:       ; %bb.0:
5313; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5314; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5315; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5316; GFX8-NEXT:    s_setpc_b64 s[30:31]
5317;
5318; GFX9-LABEL: v_fshr_i64_48:
5319; GFX9:       ; %bb.0:
5320; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5321; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5322; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5323; GFX9-NEXT:    s_setpc_b64 s[30:31]
5324;
5325; GFX10-LABEL: v_fshr_i64_48:
5326; GFX10:       ; %bb.0:
5327; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5328; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5329; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5330; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5331; GFX10-NEXT:    s_setpc_b64 s[30:31]
5332;
5333; GFX11-LABEL: v_fshr_i64_48:
5334; GFX11:       ; %bb.0:
5335; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5336; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5337; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5338; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
5339; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5340; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5341; GFX11-NEXT:    s_setpc_b64 s[30:31]
5342  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5343  ret i64 %result
5344}
5345
5346define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
5347; GFX6-LABEL: v_fshr_i64_ssv:
5348; GFX6:       ; %bb.0:
5349; GFX6-NEXT:    v_and_b32_e32 v2, 63, v0
5350; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
5351; GFX6-NEXT:    v_and_b32_e32 v0, 63, v0
5352; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5353; GFX6-NEXT:    v_lshl_b64 v[0:1], s[0:1], v0
5354; GFX6-NEXT:    v_lshr_b64 v[2:3], s[2:3], v2
5355; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
5356; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
5357; GFX6-NEXT:    ; return to shader part epilog
5358;
5359; GFX8-LABEL: v_fshr_i64_ssv:
5360; GFX8:       ; %bb.0:
5361; GFX8-NEXT:    v_and_b32_e32 v2, 63, v0
5362; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
5363; GFX8-NEXT:    v_and_b32_e32 v0, 63, v0
5364; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5365; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
5366; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
5367; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
5368; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
5369; GFX8-NEXT:    ; return to shader part epilog
5370;
5371; GFX9-LABEL: v_fshr_i64_ssv:
5372; GFX9:       ; %bb.0:
5373; GFX9-NEXT:    v_and_b32_e32 v2, 63, v0
5374; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
5375; GFX9-NEXT:    v_and_b32_e32 v0, 63, v0
5376; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5377; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
5378; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
5379; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
5380; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
5381; GFX9-NEXT:    ; return to shader part epilog
5382;
5383; GFX10-LABEL: v_fshr_i64_ssv:
5384; GFX10:       ; %bb.0:
5385; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
5386; GFX10-NEXT:    v_and_b32_e32 v0, 63, v0
5387; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5388; GFX10-NEXT:    v_and_b32_e32 v2, 63, v1
5389; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v0, s[2:3]
5390; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v2, s[0:1]
5391; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
5392; GFX10-NEXT:    v_or_b32_e32 v1, v3, v1
5393; GFX10-NEXT:    ; return to shader part epilog
5394;
5395; GFX11-LABEL: v_fshr_i64_ssv:
5396; GFX11:       ; %bb.0:
5397; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
5398; GFX11-NEXT:    v_and_b32_e32 v0, 63, v0
5399; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5400; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5401; GFX11-NEXT:    v_and_b32_e32 v2, 63, v1
5402; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v0, s[2:3]
5403; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5404; GFX11-NEXT:    v_lshlrev_b64 v[2:3], v2, s[0:1]
5405; GFX11-NEXT:    v_or_b32_e32 v0, v2, v0
5406; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5407; GFX11-NEXT:    v_or_b32_e32 v1, v3, v1
5408; GFX11-NEXT:    ; return to shader part epilog
5409  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5410  %cast = bitcast i64 %result to <2 x float>
5411  ret <2 x float> %cast
5412}
5413
5414define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
5415; GFX6-LABEL: v_fshr_i64_svs:
5416; GFX6:       ; %bb.0:
5417; GFX6-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5418; GFX6-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
5419; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5420; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], s4
5421; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5422; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
5423; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
5424; GFX6-NEXT:    ; return to shader part epilog
5425;
5426; GFX8-LABEL: v_fshr_i64_svs:
5427; GFX8:       ; %bb.0:
5428; GFX8-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5429; GFX8-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
5430; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5431; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
5432; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5433; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
5434; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
5435; GFX8-NEXT:    ; return to shader part epilog
5436;
5437; GFX9-LABEL: v_fshr_i64_svs:
5438; GFX9:       ; %bb.0:
5439; GFX9-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5440; GFX9-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
5441; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5442; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
5443; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5444; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
5445; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
5446; GFX9-NEXT:    ; return to shader part epilog
5447;
5448; GFX10-LABEL: v_fshr_i64_svs:
5449; GFX10:       ; %bb.0:
5450; GFX10-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5451; GFX10-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
5452; GFX10-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
5453; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5454; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5455; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
5456; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
5457; GFX10-NEXT:    ; return to shader part epilog
5458;
5459; GFX11-LABEL: v_fshr_i64_svs:
5460; GFX11:       ; %bb.0:
5461; GFX11-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5462; GFX11-NEXT:    s_and_not1_b64 s[2:3], 63, s[2:3]
5463; GFX11-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
5464; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5465; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5466; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5467; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
5468; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
5469; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5470; GFX11-NEXT:    v_or_b32_e32 v1, s1, v1
5471; GFX11-NEXT:    ; return to shader part epilog
5472  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5473  %cast = bitcast i64 %result to <2 x float>
5474  ret <2 x float> %cast
5475}
5476
5477define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
5478; GFX6-LABEL: v_fshr_i64_vss:
5479; GFX6:       ; %bb.0:
5480; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
5481; GFX6-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5482; GFX6-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
5483; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], s2
5484; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
5485; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
5486; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
5487; GFX6-NEXT:    ; return to shader part epilog
5488;
5489; GFX8-LABEL: v_fshr_i64_vss:
5490; GFX8:       ; %bb.0:
5491; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5492; GFX8-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5493; GFX8-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
5494; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s2, v[0:1]
5495; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
5496; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
5497; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
5498; GFX8-NEXT:    ; return to shader part epilog
5499;
5500; GFX9-LABEL: v_fshr_i64_vss:
5501; GFX9:       ; %bb.0:
5502; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5503; GFX9-NEXT:    s_and_b64 s[4:5], s[2:3], 63
5504; GFX9-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
5505; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s2, v[0:1]
5506; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
5507; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
5508; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
5509; GFX9-NEXT:    ; return to shader part epilog
5510;
5511; GFX10-LABEL: v_fshr_i64_vss:
5512; GFX10:       ; %bb.0:
5513; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5514; GFX10-NEXT:    s_andn2_b64 s[4:5], 63, s[2:3]
5515; GFX10-NEXT:    s_and_b64 s[2:3], s[2:3], 63
5516; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5517; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s4, v[0:1]
5518; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
5519; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
5520; GFX10-NEXT:    ; return to shader part epilog
5521;
5522; GFX11-LABEL: v_fshr_i64_vss:
5523; GFX11:       ; %bb.0:
5524; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5525; GFX11-NEXT:    s_and_not1_b64 s[4:5], 63, s[2:3]
5526; GFX11-NEXT:    s_and_b64 s[2:3], s[2:3], 63
5527; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5528; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5529; GFX11-NEXT:    v_lshlrev_b64 v[0:1], s4, v[0:1]
5530; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5531; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
5532; GFX11-NEXT:    v_or_b32_e32 v1, s1, v1
5533; GFX11-NEXT:    ; return to shader part epilog
5534  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5535  %cast = bitcast i64 %result to <2 x float>
5536  ret <2 x float> %cast
5537}
5538
5539define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
5540; GFX6-LABEL: s_fshr_v2i64:
5541; GFX6:       ; %bb.0:
5542; GFX6-NEXT:    s_and_b64 s[12:13], s[8:9], 63
5543; GFX6-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
5544; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5545; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5546; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
5547; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5548; GFX6-NEXT:    s_and_b64 s[4:5], s[10:11], 63
5549; GFX6-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
5550; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5551; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
5552; GFX6-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
5553; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5554; GFX6-NEXT:    ; return to shader part epilog
5555;
5556; GFX8-LABEL: s_fshr_v2i64:
5557; GFX8:       ; %bb.0:
5558; GFX8-NEXT:    s_and_b64 s[12:13], s[8:9], 63
5559; GFX8-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
5560; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5561; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5562; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
5563; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5564; GFX8-NEXT:    s_and_b64 s[4:5], s[10:11], 63
5565; GFX8-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
5566; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5567; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
5568; GFX8-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
5569; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5570; GFX8-NEXT:    ; return to shader part epilog
5571;
5572; GFX9-LABEL: s_fshr_v2i64:
5573; GFX9:       ; %bb.0:
5574; GFX9-NEXT:    s_and_b64 s[12:13], s[8:9], 63
5575; GFX9-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
5576; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5577; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5578; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
5579; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5580; GFX9-NEXT:    s_and_b64 s[4:5], s[10:11], 63
5581; GFX9-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
5582; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5583; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
5584; GFX9-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
5585; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5586; GFX9-NEXT:    ; return to shader part epilog
5587;
5588; GFX10-LABEL: s_fshr_v2i64:
5589; GFX10:       ; %bb.0:
5590; GFX10-NEXT:    s_andn2_b64 s[12:13], 63, s[8:9]
5591; GFX10-NEXT:    s_and_b64 s[8:9], s[8:9], 63
5592; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5593; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5594; GFX10-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
5595; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5596; GFX10-NEXT:    s_and_b64 s[10:11], s[10:11], 63
5597; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
5598; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
5599; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s10
5600; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5601; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5602; GFX10-NEXT:    ; return to shader part epilog
5603;
5604; GFX11-LABEL: s_fshr_v2i64:
5605; GFX11:       ; %bb.0:
5606; GFX11-NEXT:    s_and_not1_b64 s[12:13], 63, s[8:9]
5607; GFX11-NEXT:    s_and_b64 s[8:9], s[8:9], 63
5608; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5609; GFX11-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5610; GFX11-NEXT:    s_and_not1_b64 s[8:9], 63, s[10:11]
5611; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5612; GFX11-NEXT:    s_and_b64 s[10:11], s[10:11], 63
5613; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
5614; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
5615; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], s10
5616; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5617; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5618; GFX11-NEXT:    ; return to shader part epilog
5619  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5620  ret <2 x i64> %result
5621}
5622
5623define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
5624; GFX6-LABEL: v_fshr_v2i64:
5625; GFX6:       ; %bb.0:
5626; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5627; GFX6-NEXT:    v_and_b32_e32 v9, 63, v8
5628; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
5629; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
5630; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
5631; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v8
5632; GFX6-NEXT:    v_lshr_b64 v[4:5], v[4:5], v9
5633; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
5634; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
5635; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
5636; GFX6-NEXT:    v_and_b32_e32 v4, 63, v10
5637; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
5638; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], v8
5639; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], v4
5640; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
5641; GFX6-NEXT:    v_or_b32_e32 v2, v2, v6
5642; GFX6-NEXT:    v_or_b32_e32 v3, v3, v7
5643; GFX6-NEXT:    s_setpc_b64 s[30:31]
5644;
5645; GFX8-LABEL: v_fshr_v2i64:
5646; GFX8:       ; %bb.0:
5647; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5648; GFX8-NEXT:    v_and_b32_e32 v9, 63, v8
5649; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v8
5650; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5651; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
5652; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
5653; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
5654; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v10
5655; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5656; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
5657; GFX8-NEXT:    v_and_b32_e32 v4, 63, v10
5658; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
5659; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v8, v[2:3]
5660; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
5661; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
5662; GFX8-NEXT:    v_or_b32_e32 v2, v2, v6
5663; GFX8-NEXT:    v_or_b32_e32 v3, v3, v7
5664; GFX8-NEXT:    s_setpc_b64 s[30:31]
5665;
5666; GFX9-LABEL: v_fshr_v2i64:
5667; GFX9:       ; %bb.0:
5668; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5669; GFX9-NEXT:    v_and_b32_e32 v9, 63, v8
5670; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v8
5671; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5672; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
5673; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
5674; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
5675; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v10
5676; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5677; GFX9-NEXT:    v_or_b32_e32 v0, v0, v4
5678; GFX9-NEXT:    v_and_b32_e32 v4, 63, v10
5679; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
5680; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v8, v[2:3]
5681; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
5682; GFX9-NEXT:    v_or_b32_e32 v1, v1, v5
5683; GFX9-NEXT:    v_or_b32_e32 v2, v2, v6
5684; GFX9-NEXT:    v_or_b32_e32 v3, v3, v7
5685; GFX9-NEXT:    s_setpc_b64 s[30:31]
5686;
5687; GFX10-LABEL: v_fshr_v2i64:
5688; GFX10:       ; %bb.0:
5689; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5690; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5691; GFX10-NEXT:    v_xor_b32_e32 v9, -1, v8
5692; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v10
5693; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5694; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5695; GFX10-NEXT:    v_and_b32_e32 v8, 63, v8
5696; GFX10-NEXT:    v_and_b32_e32 v9, 63, v9
5697; GFX10-NEXT:    v_and_b32_e32 v11, 63, v11
5698; GFX10-NEXT:    v_and_b32_e32 v10, 63, v10
5699; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5700; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5701; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v11, v[2:3]
5702; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v10, v[6:7]
5703; GFX10-NEXT:    v_or_b32_e32 v0, v0, v4
5704; GFX10-NEXT:    v_or_b32_e32 v1, v1, v5
5705; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
5706; GFX10-NEXT:    v_or_b32_e32 v3, v3, v7
5707; GFX10-NEXT:    s_setpc_b64 s[30:31]
5708;
5709; GFX11-LABEL: v_fshr_v2i64:
5710; GFX11:       ; %bb.0:
5711; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5712; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5713; GFX11-NEXT:    v_xor_b32_e32 v9, -1, v8
5714; GFX11-NEXT:    v_xor_b32_e32 v11, -1, v10
5715; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5716; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5717; GFX11-NEXT:    v_and_b32_e32 v8, 63, v8
5718; GFX11-NEXT:    v_and_b32_e32 v9, 63, v9
5719; GFX11-NEXT:    v_and_b32_e32 v11, 63, v11
5720; GFX11-NEXT:    v_and_b32_e32 v10, 63, v10
5721; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5722; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5723; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5724; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5725; GFX11-NEXT:    v_lshlrev_b64 v[2:3], v11, v[2:3]
5726; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v10, v[6:7]
5727; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5728; GFX11-NEXT:    v_or_b32_e32 v0, v0, v4
5729; GFX11-NEXT:    v_or_b32_e32 v1, v1, v5
5730; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5731; GFX11-NEXT:    v_or_b32_e32 v2, v2, v6
5732; GFX11-NEXT:    v_or_b32_e32 v3, v3, v7
5733; GFX11-NEXT:    s_setpc_b64 s[30:31]
5734  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5735  ret <2 x i64> %result
5736}
5737
5738define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
5739; GFX6-LABEL: s_fshr_i128:
5740; GFX6:       ; %bb.0:
5741; GFX6-NEXT:    s_movk_i32 s10, 0x7f
5742; GFX6-NEXT:    s_mov_b32 s11, 0
5743; GFX6-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
5744; GFX6-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
5745; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5746; GFX6-NEXT:    s_lshr_b32 s10, s1, 31
5747; GFX6-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
5748; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
5749; GFX6-NEXT:    s_sub_i32 s13, s8, 64
5750; GFX6-NEXT:    s_sub_i32 s9, 64, s8
5751; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
5752; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
5753; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
5754; GFX6-NEXT:    s_cselect_b32 s17, 1, 0
5755; GFX6-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
5756; GFX6-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
5757; GFX6-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
5758; GFX6-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
5759; GFX6-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
5760; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
5761; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5762; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
5763; GFX6-NEXT:    s_cmp_lg_u32 s17, 0
5764; GFX6-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
5765; GFX6-NEXT:    s_sub_i32 s14, s12, 64
5766; GFX6-NEXT:    s_sub_i32 s13, 64, s12
5767; GFX6-NEXT:    s_cmp_lt_u32 s12, 64
5768; GFX6-NEXT:    s_cselect_b32 s15, 1, 0
5769; GFX6-NEXT:    s_cmp_eq_u32 s12, 0
5770; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
5771; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
5772; GFX6-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
5773; GFX6-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
5774; GFX6-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
5775; GFX6-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5776; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
5777; GFX6-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
5778; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
5779; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5780; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
5781; GFX6-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
5782; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
5783; GFX6-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
5784; GFX6-NEXT:    ; return to shader part epilog
5785;
5786; GFX8-LABEL: s_fshr_i128:
5787; GFX8:       ; %bb.0:
5788; GFX8-NEXT:    s_movk_i32 s10, 0x7f
5789; GFX8-NEXT:    s_mov_b32 s11, 0
5790; GFX8-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
5791; GFX8-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
5792; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5793; GFX8-NEXT:    s_lshr_b32 s10, s1, 31
5794; GFX8-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
5795; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
5796; GFX8-NEXT:    s_sub_i32 s13, s8, 64
5797; GFX8-NEXT:    s_sub_i32 s9, 64, s8
5798; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
5799; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
5800; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
5801; GFX8-NEXT:    s_cselect_b32 s17, 1, 0
5802; GFX8-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
5803; GFX8-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
5804; GFX8-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
5805; GFX8-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
5806; GFX8-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
5807; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
5808; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5809; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
5810; GFX8-NEXT:    s_cmp_lg_u32 s17, 0
5811; GFX8-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
5812; GFX8-NEXT:    s_sub_i32 s14, s12, 64
5813; GFX8-NEXT:    s_sub_i32 s13, 64, s12
5814; GFX8-NEXT:    s_cmp_lt_u32 s12, 64
5815; GFX8-NEXT:    s_cselect_b32 s15, 1, 0
5816; GFX8-NEXT:    s_cmp_eq_u32 s12, 0
5817; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
5818; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
5819; GFX8-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
5820; GFX8-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
5821; GFX8-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
5822; GFX8-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5823; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
5824; GFX8-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
5825; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
5826; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5827; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
5828; GFX8-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
5829; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
5830; GFX8-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
5831; GFX8-NEXT:    ; return to shader part epilog
5832;
5833; GFX9-LABEL: s_fshr_i128:
5834; GFX9:       ; %bb.0:
5835; GFX9-NEXT:    s_movk_i32 s10, 0x7f
5836; GFX9-NEXT:    s_mov_b32 s11, 0
5837; GFX9-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
5838; GFX9-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
5839; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5840; GFX9-NEXT:    s_lshr_b32 s10, s1, 31
5841; GFX9-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
5842; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
5843; GFX9-NEXT:    s_sub_i32 s13, s8, 64
5844; GFX9-NEXT:    s_sub_i32 s9, 64, s8
5845; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
5846; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
5847; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
5848; GFX9-NEXT:    s_cselect_b32 s17, 1, 0
5849; GFX9-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
5850; GFX9-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
5851; GFX9-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
5852; GFX9-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
5853; GFX9-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
5854; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
5855; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5856; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
5857; GFX9-NEXT:    s_cmp_lg_u32 s17, 0
5858; GFX9-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
5859; GFX9-NEXT:    s_sub_i32 s14, s12, 64
5860; GFX9-NEXT:    s_sub_i32 s13, 64, s12
5861; GFX9-NEXT:    s_cmp_lt_u32 s12, 64
5862; GFX9-NEXT:    s_cselect_b32 s15, 1, 0
5863; GFX9-NEXT:    s_cmp_eq_u32 s12, 0
5864; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
5865; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
5866; GFX9-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
5867; GFX9-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
5868; GFX9-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
5869; GFX9-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5870; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
5871; GFX9-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
5872; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
5873; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5874; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
5875; GFX9-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
5876; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
5877; GFX9-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
5878; GFX9-NEXT:    ; return to shader part epilog
5879;
5880; GFX10-LABEL: s_fshr_i128:
5881; GFX10:       ; %bb.0:
5882; GFX10-NEXT:    s_movk_i32 s10, 0x7f
5883; GFX10-NEXT:    s_mov_b32 s11, 0
5884; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5885; GFX10-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
5886; GFX10-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
5887; GFX10-NEXT:    s_lshr_b32 s10, s1, 31
5888; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5889; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[10:11]
5890; GFX10-NEXT:    s_sub_i32 s13, s8, 64
5891; GFX10-NEXT:    s_sub_i32 s9, 64, s8
5892; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
5893; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
5894; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
5895; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
5896; GFX10-NEXT:    s_lshr_b64 s[10:11], s[0:1], s9
5897; GFX10-NEXT:    s_lshl_b64 s[14:15], s[2:3], s8
5898; GFX10-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
5899; GFX10-NEXT:    s_or_b64 s[10:11], s[10:11], s[14:15]
5900; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s13
5901; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
5902; GFX10-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
5903; GFX10-NEXT:    s_cselect_b64 s[0:1], s[10:11], s[0:1]
5904; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
5905; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5906; GFX10-NEXT:    s_sub_i32 s14, s12, 64
5907; GFX10-NEXT:    s_sub_i32 s10, 64, s12
5908; GFX10-NEXT:    s_cmp_lt_u32 s12, 64
5909; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
5910; GFX10-NEXT:    s_cmp_eq_u32 s12, 0
5911; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
5912; GFX10-NEXT:    s_lshr_b64 s[0:1], s[4:5], s12
5913; GFX10-NEXT:    s_lshl_b64 s[10:11], s[6:7], s10
5914; GFX10-NEXT:    s_lshr_b64 s[12:13], s[6:7], s12
5915; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
5916; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5917; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
5918; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[6:7]
5919; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
5920; GFX10-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
5921; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
5922; GFX10-NEXT:    s_cselect_b64 s[4:5], s[12:13], 0
5923; GFX10-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
5924; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5925; GFX10-NEXT:    ; return to shader part epilog
5926;
5927; GFX11-LABEL: s_fshr_i128:
5928; GFX11:       ; %bb.0:
5929; GFX11-NEXT:    s_movk_i32 s10, 0x7f
5930; GFX11-NEXT:    s_mov_b32 s11, 0
5931; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5932; GFX11-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
5933; GFX11-NEXT:    s_and_not1_b64 s[8:9], s[10:11], s[8:9]
5934; GFX11-NEXT:    s_lshr_b32 s10, s1, 31
5935; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5936; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[10:11]
5937; GFX11-NEXT:    s_sub_i32 s13, s8, 64
5938; GFX11-NEXT:    s_sub_i32 s9, 64, s8
5939; GFX11-NEXT:    s_cmp_lt_u32 s8, 64
5940; GFX11-NEXT:    s_cselect_b32 s16, 1, 0
5941; GFX11-NEXT:    s_cmp_eq_u32 s8, 0
5942; GFX11-NEXT:    s_cselect_b32 s17, 1, 0
5943; GFX11-NEXT:    s_lshr_b64 s[10:11], s[0:1], s9
5944; GFX11-NEXT:    s_lshl_b64 s[14:15], s[2:3], s8
5945; GFX11-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
5946; GFX11-NEXT:    s_or_b64 s[10:11], s[10:11], s[14:15]
5947; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s13
5948; GFX11-NEXT:    s_cmp_lg_u32 s16, 0
5949; GFX11-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
5950; GFX11-NEXT:    s_cselect_b64 s[0:1], s[10:11], s[0:1]
5951; GFX11-NEXT:    s_cmp_lg_u32 s17, 0
5952; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5953; GFX11-NEXT:    s_sub_i32 s14, s12, 64
5954; GFX11-NEXT:    s_sub_i32 s10, 64, s12
5955; GFX11-NEXT:    s_cmp_lt_u32 s12, 64
5956; GFX11-NEXT:    s_cselect_b32 s15, 1, 0
5957; GFX11-NEXT:    s_cmp_eq_u32 s12, 0
5958; GFX11-NEXT:    s_cselect_b32 s16, 1, 0
5959; GFX11-NEXT:    s_lshr_b64 s[0:1], s[4:5], s12
5960; GFX11-NEXT:    s_lshl_b64 s[10:11], s[6:7], s10
5961; GFX11-NEXT:    s_lshr_b64 s[12:13], s[6:7], s12
5962; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
5963; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5964; GFX11-NEXT:    s_cmp_lg_u32 s15, 0
5965; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[6:7]
5966; GFX11-NEXT:    s_cmp_lg_u32 s16, 0
5967; GFX11-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
5968; GFX11-NEXT:    s_cmp_lg_u32 s15, 0
5969; GFX11-NEXT:    s_cselect_b64 s[4:5], s[12:13], 0
5970; GFX11-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
5971; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5972; GFX11-NEXT:    ; return to shader part epilog
5973  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5974  ret i128 %result
5975}
5976
5977define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
5978; GFX6-LABEL: v_fshr_i128:
5979; GFX6:       ; %bb.0:
5980; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5981; GFX6-NEXT:    v_and_b32_e32 v14, 0x7f, v8
5982; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
5983; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
5984; GFX6-NEXT:    v_and_b32_e32 v15, 0x7f, v8
5985; GFX6-NEXT:    v_lshl_b64 v[8:9], v[0:1], 1
5986; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5987; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
5988; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v15
5989; GFX6-NEXT:    v_lshr_b64 v[0:1], v[8:9], v0
5990; GFX6-NEXT:    v_lshl_b64 v[10:11], v[2:3], v15
5991; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v15
5992; GFX6-NEXT:    v_lshl_b64 v[12:13], v[8:9], v15
5993; GFX6-NEXT:    v_or_b32_e32 v10, v0, v10
5994; GFX6-NEXT:    v_or_b32_e32 v11, v1, v11
5995; GFX6-NEXT:    v_lshl_b64 v[0:1], v[8:9], v16
5996; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
5997; GFX6-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
5998; GFX6-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
5999; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
6000; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
6001; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6002; GFX6-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
6003; GFX6-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
6004; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v14
6005; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], v14
6006; GFX6-NEXT:    v_lshl_b64 v[2:3], v[6:7], v2
6007; GFX6-NEXT:    v_subrev_i32_e32 v15, vcc, 64, v14
6008; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
6009; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6010; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], v15
6011; GFX6-NEXT:    v_lshr_b64 v[8:9], v[6:7], v14
6012; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6013; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6014; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6015; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6016; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6017; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6018; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6019; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6020; GFX6-NEXT:    v_or_b32_e32 v0, v12, v0
6021; GFX6-NEXT:    v_or_b32_e32 v1, v13, v1
6022; GFX6-NEXT:    v_or_b32_e32 v2, v10, v2
6023; GFX6-NEXT:    v_or_b32_e32 v3, v11, v3
6024; GFX6-NEXT:    s_setpc_b64 s[30:31]
6025;
6026; GFX8-LABEL: v_fshr_i128:
6027; GFX8:       ; %bb.0:
6028; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6029; GFX8-NEXT:    v_and_b32_e32 v14, 0x7f, v8
6030; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v8
6031; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6032; GFX8-NEXT:    v_and_b32_e32 v15, 0x7f, v8
6033; GFX8-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
6034; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6035; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
6036; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v15
6037; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[8:9]
6038; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v15, v[2:3]
6039; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v15
6040; GFX8-NEXT:    v_lshlrev_b64 v[12:13], v15, v[8:9]
6041; GFX8-NEXT:    v_or_b32_e32 v10, v0, v10
6042; GFX8-NEXT:    v_or_b32_e32 v11, v1, v11
6043; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v16, v[8:9]
6044; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6045; GFX8-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
6046; GFX8-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
6047; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
6048; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
6049; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6050; GFX8-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
6051; GFX8-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
6052; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v14
6053; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
6054; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
6055; GFX8-NEXT:    v_subrev_u32_e32 v15, vcc, 64, v14
6056; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
6057; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6058; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
6059; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
6060; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6061; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6062; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6063; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6064; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6065; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6066; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6067; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6068; GFX8-NEXT:    v_or_b32_e32 v0, v12, v0
6069; GFX8-NEXT:    v_or_b32_e32 v1, v13, v1
6070; GFX8-NEXT:    v_or_b32_e32 v2, v10, v2
6071; GFX8-NEXT:    v_or_b32_e32 v3, v11, v3
6072; GFX8-NEXT:    s_setpc_b64 s[30:31]
6073;
6074; GFX9-LABEL: v_fshr_i128:
6075; GFX9:       ; %bb.0:
6076; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6077; GFX9-NEXT:    v_and_b32_e32 v14, 0x7f, v8
6078; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v8
6079; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6080; GFX9-NEXT:    v_and_b32_e32 v15, 0x7f, v8
6081; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
6082; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6083; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
6084; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v15
6085; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[8:9]
6086; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[2:3]
6087; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v15
6088; GFX9-NEXT:    v_lshlrev_b64 v[12:13], v15, v[8:9]
6089; GFX9-NEXT:    v_or_b32_e32 v10, v0, v10
6090; GFX9-NEXT:    v_or_b32_e32 v11, v1, v11
6091; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v16, v[8:9]
6092; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6093; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
6094; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
6095; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
6096; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
6097; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6098; GFX9-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
6099; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v14
6100; GFX9-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
6101; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
6102; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
6103; GFX9-NEXT:    v_subrev_u32_e32 v15, 64, v14
6104; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
6105; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6106; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
6107; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
6108; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6109; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6110; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6111; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6112; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6113; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6114; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6115; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6116; GFX9-NEXT:    v_or_b32_e32 v0, v12, v0
6117; GFX9-NEXT:    v_or_b32_e32 v1, v13, v1
6118; GFX9-NEXT:    v_or_b32_e32 v2, v10, v2
6119; GFX9-NEXT:    v_or_b32_e32 v3, v11, v3
6120; GFX9-NEXT:    s_setpc_b64 s[30:31]
6121;
6122; GFX10-LABEL: v_fshr_i128:
6123; GFX10:       ; %bb.0:
6124; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6125; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6126; GFX10-NEXT:    v_xor_b32_e32 v9, -1, v8
6127; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6128; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 31, v1
6129; GFX10-NEXT:    v_and_b32_e32 v19, 0x7f, v8
6130; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6131; GFX10-NEXT:    v_and_b32_e32 v18, 0x7f, v9
6132; GFX10-NEXT:    v_or_b32_e32 v2, v2, v10
6133; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
6134; GFX10-NEXT:    v_subrev_nc_u32_e32 v21, 64, v19
6135; GFX10-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
6136; GFX10-NEXT:    v_subrev_nc_u32_e32 v20, 64, v18
6137; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
6138; GFX10-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
6139; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
6140; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
6141; GFX10-NEXT:    v_lshlrev_b64 v[14:15], v18, v[0:1]
6142; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
6143; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
6144; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v19
6145; GFX10-NEXT:    v_or_b32_e32 v12, v12, v16
6146; GFX10-NEXT:    v_or_b32_e32 v10, v10, v8
6147; GFX10-NEXT:    v_or_b32_e32 v11, v11, v9
6148; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v21, v[6:7]
6149; GFX10-NEXT:    v_or_b32_e32 v13, v13, v17
6150; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v19
6151; GFX10-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
6152; GFX10-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
6153; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v19, v[6:7]
6154; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s4
6155; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v18
6156; GFX10-NEXT:    v_cndmask_b32_e64 v6, v9, v13, s4
6157; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6158; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6159; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s5
6160; GFX10-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s6
6161; GFX10-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s6
6162; GFX10-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s5
6163; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s4
6164; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v1, s4
6165; GFX10-NEXT:    v_or_b32_e32 v0, v14, v4
6166; GFX10-NEXT:    v_or_b32_e32 v1, v7, v5
6167; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
6168; GFX10-NEXT:    v_or_b32_e32 v3, v3, v8
6169; GFX10-NEXT:    s_setpc_b64 s[30:31]
6170;
6171; GFX11-LABEL: v_fshr_i128:
6172; GFX11:       ; %bb.0:
6173; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6174; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6175; GFX11-NEXT:    v_xor_b32_e32 v9, -1, v8
6176; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 31, v1
6177; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6178; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6179; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
6180; GFX11-NEXT:    v_and_b32_e32 v18, 0x7f, v9
6181; GFX11-NEXT:    v_or_b32_e32 v2, v2, v10
6182; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
6183; GFX11-NEXT:    v_lshlrev_b64 v[14:15], v18, v[0:1]
6184; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
6185; GFX11-NEXT:    v_and_b32_e32 v19, 0x7f, v8
6186; GFX11-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
6187; GFX11-NEXT:    v_subrev_nc_u32_e32 v20, 64, v18
6188; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
6189; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6190; GFX11-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
6191; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
6192; GFX11-NEXT:    v_subrev_nc_u32_e32 v21, 64, v19
6193; GFX11-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
6194; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
6195; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
6196; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v19
6197; GFX11-NEXT:    v_or_b32_e32 v10, v10, v8
6198; GFX11-NEXT:    v_or_b32_e32 v11, v11, v9
6199; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v21, v[6:7]
6200; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v19
6201; GFX11-NEXT:    v_or_b32_e32 v12, v12, v16
6202; GFX11-NEXT:    v_or_b32_e32 v13, v13, v17
6203; GFX11-NEXT:    v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
6204; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v19, v[6:7]
6205; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6206; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s0
6207; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v18
6208; GFX11-NEXT:    v_cndmask_b32_e64 v6, v9, v13, s0
6209; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6210; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s1
6211; GFX11-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s2
6212; GFX11-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s2
6213; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
6214; GFX11-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s1
6215; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s0
6216; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v1, s0
6217; GFX11-NEXT:    v_or_b32_e32 v0, v14, v4
6218; GFX11-NEXT:    v_or_b32_e32 v1, v7, v5
6219; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6220; GFX11-NEXT:    v_or_b32_e32 v2, v2, v6
6221; GFX11-NEXT:    v_or_b32_e32 v3, v3, v8
6222; GFX11-NEXT:    s_setpc_b64 s[30:31]
6223  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6224  ret i128 %result
6225}
6226
6227define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
6228; GFX6-LABEL: v_fshr_i128_ssv:
6229; GFX6:       ; %bb.0:
6230; GFX6-NEXT:    v_and_b32_e32 v6, 0x7f, v0
6231; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
6232; GFX6-NEXT:    s_mov_b32 s9, 0
6233; GFX6-NEXT:    v_and_b32_e32 v7, 0x7f, v0
6234; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6235; GFX6-NEXT:    s_lshr_b32 s8, s1, 31
6236; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
6237; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
6238; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v7
6239; GFX6-NEXT:    v_lshr_b64 v[0:1], s[10:11], v0
6240; GFX6-NEXT:    v_lshl_b64 v[2:3], s[0:1], v7
6241; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 64, v7
6242; GFX6-NEXT:    v_lshl_b64 v[4:5], s[10:11], v7
6243; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
6244; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6245; GFX6-NEXT:    v_lshl_b64 v[0:1], s[10:11], v8
6246; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6247; GFX6-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
6248; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
6249; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6250; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6251; GFX6-NEXT:    v_mov_b32_e32 v2, s0
6252; GFX6-NEXT:    v_mov_b32_e32 v3, s1
6253; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6254; GFX6-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
6255; GFX6-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
6256; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v6
6257; GFX6-NEXT:    v_lshr_b64 v[0:1], s[4:5], v6
6258; GFX6-NEXT:    v_lshl_b64 v[2:3], s[6:7], v2
6259; GFX6-NEXT:    v_subrev_i32_e32 v11, vcc, 64, v6
6260; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
6261; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6262; GFX6-NEXT:    v_lshr_b64 v[0:1], s[6:7], v11
6263; GFX6-NEXT:    v_lshr_b64 v[4:5], s[6:7], v6
6264; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
6265; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6266; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6267; GFX6-NEXT:    v_mov_b32_e32 v2, s4
6268; GFX6-NEXT:    v_mov_b32_e32 v3, s5
6269; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
6270; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6271; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6272; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6273; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6274; GFX6-NEXT:    v_or_b32_e32 v0, v8, v0
6275; GFX6-NEXT:    v_or_b32_e32 v1, v9, v1
6276; GFX6-NEXT:    v_or_b32_e32 v2, v7, v2
6277; GFX6-NEXT:    v_or_b32_e32 v3, v10, v3
6278; GFX6-NEXT:    ; return to shader part epilog
6279;
6280; GFX8-LABEL: v_fshr_i128_ssv:
6281; GFX8:       ; %bb.0:
6282; GFX8-NEXT:    v_and_b32_e32 v6, 0x7f, v0
6283; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
6284; GFX8-NEXT:    s_mov_b32 s9, 0
6285; GFX8-NEXT:    v_and_b32_e32 v7, 0x7f, v0
6286; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6287; GFX8-NEXT:    s_lshr_b32 s8, s1, 31
6288; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
6289; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
6290; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v7
6291; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, s[10:11]
6292; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v7, s[0:1]
6293; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 64, v7
6294; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v7, s[10:11]
6295; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
6296; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6297; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v8, s[10:11]
6298; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6299; GFX8-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
6300; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
6301; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6302; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6303; GFX8-NEXT:    v_mov_b32_e32 v2, s0
6304; GFX8-NEXT:    v_mov_b32_e32 v3, s1
6305; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6306; GFX8-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
6307; GFX8-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
6308; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v6
6309; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v6, s[4:5]
6310; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
6311; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, 64, v6
6312; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
6313; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6314; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
6315; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v6, s[6:7]
6316; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
6317; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6318; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6319; GFX8-NEXT:    v_mov_b32_e32 v2, s4
6320; GFX8-NEXT:    v_mov_b32_e32 v3, s5
6321; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
6322; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6323; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6324; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6325; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6326; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
6327; GFX8-NEXT:    v_or_b32_e32 v1, v9, v1
6328; GFX8-NEXT:    v_or_b32_e32 v2, v7, v2
6329; GFX8-NEXT:    v_or_b32_e32 v3, v10, v3
6330; GFX8-NEXT:    ; return to shader part epilog
6331;
6332; GFX9-LABEL: v_fshr_i128_ssv:
6333; GFX9:       ; %bb.0:
6334; GFX9-NEXT:    v_and_b32_e32 v6, 0x7f, v0
6335; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
6336; GFX9-NEXT:    s_mov_b32 s9, 0
6337; GFX9-NEXT:    v_and_b32_e32 v7, 0x7f, v0
6338; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6339; GFX9-NEXT:    s_lshr_b32 s8, s1, 31
6340; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
6341; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
6342; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v7
6343; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, s[10:11]
6344; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v7, s[0:1]
6345; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v7
6346; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v7, s[10:11]
6347; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
6348; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6349; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v8, s[10:11]
6350; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6351; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
6352; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
6353; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6354; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6355; GFX9-NEXT:    v_mov_b32_e32 v2, s0
6356; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6357; GFX9-NEXT:    v_mov_b32_e32 v3, s1
6358; GFX9-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
6359; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v6
6360; GFX9-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
6361; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v6, s[4:5]
6362; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
6363; GFX9-NEXT:    v_subrev_u32_e32 v11, 64, v6
6364; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
6365; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6366; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
6367; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v6, s[6:7]
6368; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
6369; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6370; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6371; GFX9-NEXT:    v_mov_b32_e32 v2, s4
6372; GFX9-NEXT:    v_mov_b32_e32 v3, s5
6373; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
6374; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6375; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6376; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6377; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6378; GFX9-NEXT:    v_or_b32_e32 v0, v8, v0
6379; GFX9-NEXT:    v_or_b32_e32 v1, v9, v1
6380; GFX9-NEXT:    v_or_b32_e32 v2, v7, v2
6381; GFX9-NEXT:    v_or_b32_e32 v3, v10, v3
6382; GFX9-NEXT:    ; return to shader part epilog
6383;
6384; GFX10-LABEL: v_fshr_i128_ssv:
6385; GFX10:       ; %bb.0:
6386; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
6387; GFX10-NEXT:    v_and_b32_e32 v13, 0x7f, v0
6388; GFX10-NEXT:    s_mov_b32 s9, 0
6389; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6390; GFX10-NEXT:    s_lshr_b32 s8, s1, 31
6391; GFX10-NEXT:    v_and_b32_e32 v12, 0x7f, v1
6392; GFX10-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
6393; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6394; GFX10-NEXT:    s_or_b64 s[8:9], s[2:3], s[8:9]
6395; GFX10-NEXT:    v_subrev_nc_u32_e32 v14, 64, v13
6396; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
6397; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
6398; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
6399; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
6400; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
6401; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
6402; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
6403; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
6404; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v12, s[0:1]
6405; GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
6406; GFX10-NEXT:    v_or_b32_e32 v4, v4, v8
6407; GFX10-NEXT:    v_or_b32_e32 v2, v2, v0
6408; GFX10-NEXT:    v_or_b32_e32 v3, v3, v1
6409; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v14, s[6:7]
6410; GFX10-NEXT:    v_or_b32_e32 v5, v5, v9
6411; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
6412; GFX10-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6413; GFX10-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6414; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
6415; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
6416; GFX10-NEXT:    v_cmp_eq_u32_e64 s2, 0, v12
6417; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
6418; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6419; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6420; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s1
6421; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, s8, s2
6422; GFX10-NEXT:    v_cndmask_b32_e64 v7, v10, s9, s2
6423; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s1
6424; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6425; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6426; GFX10-NEXT:    v_or_b32_e32 v0, v6, v0
6427; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
6428; GFX10-NEXT:    v_or_b32_e32 v2, v5, v2
6429; GFX10-NEXT:    v_or_b32_e32 v3, v7, v3
6430; GFX10-NEXT:    ; return to shader part epilog
6431;
6432; GFX11-LABEL: v_fshr_i128_ssv:
6433; GFX11:       ; %bb.0:
6434; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
6435; GFX11-NEXT:    s_lshr_b32 s8, s1, 31
6436; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6437; GFX11-NEXT:    s_mov_b32 s9, 0
6438; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6439; GFX11-NEXT:    v_and_b32_e32 v12, 0x7f, v1
6440; GFX11-NEXT:    s_or_b64 s[8:9], s[2:3], s[8:9]
6441; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6442; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v12, s[0:1]
6443; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
6444; GFX11-NEXT:    v_and_b32_e32 v13, 0x7f, v0
6445; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
6446; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
6447; GFX11-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
6448; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6449; GFX11-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
6450; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
6451; GFX11-NEXT:    v_subrev_nc_u32_e32 v14, 64, v13
6452; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
6453; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
6454; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
6455; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
6456; GFX11-NEXT:    v_or_b32_e32 v2, v2, v0
6457; GFX11-NEXT:    v_or_b32_e32 v3, v3, v1
6458; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v14, s[6:7]
6459; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
6460; GFX11-NEXT:    v_or_b32_e32 v4, v4, v8
6461; GFX11-NEXT:    v_or_b32_e32 v5, v5, v9
6462; GFX11-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6463; GFX11-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6464; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
6465; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
6466; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v12
6467; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
6468; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6469; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6470; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s1
6471; GFX11-NEXT:    v_cndmask_b32_e64 v5, v8, s8, s2
6472; GFX11-NEXT:    v_cndmask_b32_e64 v7, v10, s9, s2
6473; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s1
6474; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6475; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6476; GFX11-NEXT:    v_or_b32_e32 v0, v6, v0
6477; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6478; GFX11-NEXT:    v_or_b32_e32 v1, v4, v1
6479; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
6480; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
6481; GFX11-NEXT:    v_or_b32_e32 v3, v7, v3
6482; GFX11-NEXT:    ; return to shader part epilog
6483  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6484  %cast.result = bitcast i128 %result to <4 x float>
6485  ret <4 x float> %cast.result
6486}
6487
6488define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
6489; GFX6-LABEL: v_fshr_i128_svs:
6490; GFX6:       ; %bb.0:
6491; GFX6-NEXT:    s_movk_i32 s6, 0x7f
6492; GFX6-NEXT:    s_mov_b32 s7, 0
6493; GFX6-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6494; GFX6-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6495; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6496; GFX6-NEXT:    s_lshr_b32 s6, s1, 31
6497; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
6498; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
6499; GFX6-NEXT:    s_sub_i32 s9, s4, 64
6500; GFX6-NEXT:    s_sub_i32 s5, 64, s4
6501; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
6502; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
6503; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
6504; GFX6-NEXT:    s_cselect_b32 s13, 1, 0
6505; GFX6-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
6506; GFX6-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
6507; GFX6-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
6508; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
6509; GFX6-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
6510; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
6511; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6512; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
6513; GFX6-NEXT:    s_cmp_lg_u32 s13, 0
6514; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
6515; GFX6-NEXT:    s_sub_i32 s4, s8, 64
6516; GFX6-NEXT:    s_sub_i32 s5, 64, s8
6517; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
6518; GFX6-NEXT:    s_cselect_b32 s6, 1, 0
6519; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
6520; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], s8
6521; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s5
6522; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
6523; GFX6-NEXT:    v_lshr_b64 v[8:9], v[2:3], s8
6524; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], s4
6525; GFX6-NEXT:    s_and_b32 s4, 1, s6
6526; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
6527; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
6528; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6529; GFX6-NEXT:    s_and_b32 s4, 1, s7
6530; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6531; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6532; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6533; GFX6-NEXT:    s_and_b32 s4, 1, s6
6534; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
6535; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
6536; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6537; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6538; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6539; GFX6-NEXT:    v_or_b32_e32 v0, s2, v0
6540; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
6541; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
6542; GFX6-NEXT:    v_or_b32_e32 v3, s1, v3
6543; GFX6-NEXT:    ; return to shader part epilog
6544;
6545; GFX8-LABEL: v_fshr_i128_svs:
6546; GFX8:       ; %bb.0:
6547; GFX8-NEXT:    s_movk_i32 s6, 0x7f
6548; GFX8-NEXT:    s_mov_b32 s7, 0
6549; GFX8-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6550; GFX8-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6551; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6552; GFX8-NEXT:    s_lshr_b32 s6, s1, 31
6553; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
6554; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
6555; GFX8-NEXT:    s_sub_i32 s9, s4, 64
6556; GFX8-NEXT:    s_sub_i32 s5, 64, s4
6557; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
6558; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
6559; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
6560; GFX8-NEXT:    s_cselect_b32 s13, 1, 0
6561; GFX8-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
6562; GFX8-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
6563; GFX8-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
6564; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
6565; GFX8-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
6566; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
6567; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6568; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
6569; GFX8-NEXT:    s_cmp_lg_u32 s13, 0
6570; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
6571; GFX8-NEXT:    s_sub_i32 s4, s8, 64
6572; GFX8-NEXT:    s_sub_i32 s5, 64, s8
6573; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
6574; GFX8-NEXT:    s_cselect_b32 s6, 1, 0
6575; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
6576; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
6577; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
6578; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
6579; GFX8-NEXT:    v_lshrrev_b64 v[8:9], s8, v[2:3]
6580; GFX8-NEXT:    v_lshrrev_b64 v[2:3], s4, v[2:3]
6581; GFX8-NEXT:    s_and_b32 s4, 1, s6
6582; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
6583; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
6584; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6585; GFX8-NEXT:    s_and_b32 s4, 1, s7
6586; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6587; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6588; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6589; GFX8-NEXT:    s_and_b32 s4, 1, s6
6590; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
6591; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
6592; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6593; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6594; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6595; GFX8-NEXT:    v_or_b32_e32 v0, s2, v0
6596; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
6597; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
6598; GFX8-NEXT:    v_or_b32_e32 v3, s1, v3
6599; GFX8-NEXT:    ; return to shader part epilog
6600;
6601; GFX9-LABEL: v_fshr_i128_svs:
6602; GFX9:       ; %bb.0:
6603; GFX9-NEXT:    s_movk_i32 s6, 0x7f
6604; GFX9-NEXT:    s_mov_b32 s7, 0
6605; GFX9-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6606; GFX9-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6607; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6608; GFX9-NEXT:    s_lshr_b32 s6, s1, 31
6609; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
6610; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
6611; GFX9-NEXT:    s_sub_i32 s9, s4, 64
6612; GFX9-NEXT:    s_sub_i32 s5, 64, s4
6613; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
6614; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
6615; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
6616; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
6617; GFX9-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
6618; GFX9-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
6619; GFX9-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
6620; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
6621; GFX9-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
6622; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
6623; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6624; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
6625; GFX9-NEXT:    s_cmp_lg_u32 s13, 0
6626; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
6627; GFX9-NEXT:    s_sub_i32 s4, s8, 64
6628; GFX9-NEXT:    s_sub_i32 s5, 64, s8
6629; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
6630; GFX9-NEXT:    s_cselect_b32 s6, 1, 0
6631; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
6632; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
6633; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
6634; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
6635; GFX9-NEXT:    v_lshrrev_b64 v[8:9], s8, v[2:3]
6636; GFX9-NEXT:    v_lshrrev_b64 v[2:3], s4, v[2:3]
6637; GFX9-NEXT:    s_and_b32 s4, 1, s6
6638; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
6639; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
6640; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6641; GFX9-NEXT:    s_and_b32 s4, 1, s7
6642; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6643; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6644; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6645; GFX9-NEXT:    s_and_b32 s4, 1, s6
6646; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
6647; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
6648; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6649; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6650; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6651; GFX9-NEXT:    v_or_b32_e32 v0, s2, v0
6652; GFX9-NEXT:    v_or_b32_e32 v1, s3, v1
6653; GFX9-NEXT:    v_or_b32_e32 v2, s0, v2
6654; GFX9-NEXT:    v_or_b32_e32 v3, s1, v3
6655; GFX9-NEXT:    ; return to shader part epilog
6656;
6657; GFX10-LABEL: v_fshr_i128_svs:
6658; GFX10:       ; %bb.0:
6659; GFX10-NEXT:    s_movk_i32 s6, 0x7f
6660; GFX10-NEXT:    s_mov_b32 s7, 0
6661; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6662; GFX10-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6663; GFX10-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6664; GFX10-NEXT:    s_lshr_b32 s6, s1, 31
6665; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6666; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
6667; GFX10-NEXT:    s_sub_i32 s9, s4, 64
6668; GFX10-NEXT:    s_sub_i32 s5, 64, s4
6669; GFX10-NEXT:    s_cmp_lt_u32 s4, 64
6670; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
6671; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
6672; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
6673; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
6674; GFX10-NEXT:    s_lshr_b64 s[6:7], s[0:1], s5
6675; GFX10-NEXT:    s_lshl_b64 s[10:11], s[2:3], s4
6676; GFX10-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
6677; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
6678; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
6679; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
6680; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], 0
6681; GFX10-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
6682; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
6683; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6684; GFX10-NEXT:    s_sub_i32 s0, 64, s8
6685; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s0, v[2:3]
6686; GFX10-NEXT:    s_sub_i32 s0, s8, 64
6687; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
6688; GFX10-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
6689; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
6690; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
6691; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
6692; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
6693; GFX10-NEXT:    s_and_b32 s0, 1, s1
6694; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
6695; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
6696; GFX10-NEXT:    s_and_b32 s0, 1, s6
6697; GFX10-NEXT:    s_and_b32 s1, 1, s1
6698; GFX10-NEXT:    v_lshrrev_b64 v[2:3], s8, v[2:3]
6699; GFX10-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc_lo
6700; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc_lo
6701; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
6702; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
6703; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc_lo
6704; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
6705; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6706; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6707; GFX10-NEXT:    v_or_b32_e32 v0, s4, v0
6708; GFX10-NEXT:    v_or_b32_e32 v1, s5, v1
6709; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
6710; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
6711; GFX10-NEXT:    ; return to shader part epilog
6712;
6713; GFX11-LABEL: v_fshr_i128_svs:
6714; GFX11:       ; %bb.0:
6715; GFX11-NEXT:    s_movk_i32 s6, 0x7f
6716; GFX11-NEXT:    s_mov_b32 s7, 0
6717; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6718; GFX11-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6719; GFX11-NEXT:    s_and_not1_b64 s[4:5], s[6:7], s[4:5]
6720; GFX11-NEXT:    s_lshr_b32 s6, s1, 31
6721; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6722; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
6723; GFX11-NEXT:    s_sub_i32 s9, s4, 64
6724; GFX11-NEXT:    s_sub_i32 s5, 64, s4
6725; GFX11-NEXT:    s_cmp_lt_u32 s4, 64
6726; GFX11-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
6727; GFX11-NEXT:    s_cselect_b32 s12, 1, 0
6728; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
6729; GFX11-NEXT:    s_cselect_b32 s13, 1, 0
6730; GFX11-NEXT:    s_lshr_b64 s[6:7], s[0:1], s5
6731; GFX11-NEXT:    s_lshl_b64 s[10:11], s[2:3], s4
6732; GFX11-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
6733; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
6734; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
6735; GFX11-NEXT:    s_cmp_lg_u32 s12, 0
6736; GFX11-NEXT:    s_cselect_b64 s[4:5], s[4:5], 0
6737; GFX11-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
6738; GFX11-NEXT:    s_cmp_lg_u32 s13, 0
6739; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6740; GFX11-NEXT:    s_sub_i32 s0, 64, s8
6741; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6742; GFX11-NEXT:    v_lshlrev_b64 v[6:7], s0, v[2:3]
6743; GFX11-NEXT:    s_sub_i32 s0, s8, 64
6744; GFX11-NEXT:    s_cmp_lt_u32 s8, 64
6745; GFX11-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
6746; GFX11-NEXT:    s_cselect_b32 s1, 1, 0
6747; GFX11-NEXT:    s_cmp_eq_u32 s8, 0
6748; GFX11-NEXT:    v_or_b32_e32 v4, v4, v6
6749; GFX11-NEXT:    s_cselect_b32 s6, 1, 0
6750; GFX11-NEXT:    s_and_b32 s0, 1, s1
6751; GFX11-NEXT:    v_or_b32_e32 v5, v5, v7
6752; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
6753; GFX11-NEXT:    s_and_b32 s0, 1, s6
6754; GFX11-NEXT:    s_and_b32 s1, 1, s1
6755; GFX11-NEXT:    v_lshrrev_b64 v[2:3], s8, v[2:3]
6756; GFX11-NEXT:    v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5
6757; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
6758; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
6759; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
6760; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
6761; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6762; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6763; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
6764; GFX11-NEXT:    v_or_b32_e32 v0, s4, v0
6765; GFX11-NEXT:    v_or_b32_e32 v1, s5, v1
6766; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6767; GFX11-NEXT:    v_or_b32_e32 v2, s2, v2
6768; GFX11-NEXT:    v_or_b32_e32 v3, s3, v3
6769; GFX11-NEXT:    ; return to shader part epilog
6770  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6771  %cast.result = bitcast i128 %result to <4 x float>
6772  ret <4 x float> %cast.result
6773}
6774
6775define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
6776; GFX6-LABEL: v_fshr_i128_vss:
6777; GFX6:       ; %bb.0:
6778; GFX6-NEXT:    s_mov_b64 s[6:7], 0x7f
6779; GFX6-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6780; GFX6-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6781; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
6782; GFX6-NEXT:    s_sub_i32 s5, s4, 64
6783; GFX6-NEXT:    s_sub_i32 s6, 64, s4
6784; GFX6-NEXT:    v_lshl_b64 v[4:5], v[0:1], 1
6785; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6786; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
6787; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
6788; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
6789; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
6790; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
6791; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], s6
6792; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s4
6793; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], s4
6794; GFX6-NEXT:    s_and_b32 s4, 1, s7
6795; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6796; GFX6-NEXT:    s_and_b32 s4, 1, s9
6797; GFX6-NEXT:    s_sub_i32 s10, s8, 64
6798; GFX6-NEXT:    s_sub_i32 s9, 64, s8
6799; GFX6-NEXT:    v_or_b32_e32 v6, v0, v6
6800; GFX6-NEXT:    v_or_b32_e32 v7, v1, v7
6801; GFX6-NEXT:    v_lshl_b64 v[0:1], v[4:5], s5
6802; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
6803; GFX6-NEXT:    s_cselect_b32 s11, 1, 0
6804; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
6805; GFX6-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
6806; GFX6-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
6807; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
6808; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
6809; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6810; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
6811; GFX6-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
6812; GFX6-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
6813; GFX6-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
6814; GFX6-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6815; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6816; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
6817; GFX6-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
6818; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
6819; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6820; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
6821; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6822; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6823; GFX6-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
6824; GFX6-NEXT:    v_or_b32_e32 v0, s0, v4
6825; GFX6-NEXT:    v_or_b32_e32 v1, s1, v5
6826; GFX6-NEXT:    v_or_b32_e32 v2, s2, v2
6827; GFX6-NEXT:    v_or_b32_e32 v3, s3, v3
6828; GFX6-NEXT:    ; return to shader part epilog
6829;
6830; GFX8-LABEL: v_fshr_i128_vss:
6831; GFX8:       ; %bb.0:
6832; GFX8-NEXT:    s_mov_b64 s[6:7], 0x7f
6833; GFX8-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6834; GFX8-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6835; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6836; GFX8-NEXT:    s_sub_i32 s5, s4, 64
6837; GFX8-NEXT:    s_sub_i32 s6, 64, s4
6838; GFX8-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
6839; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6840; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
6841; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
6842; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
6843; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
6844; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
6845; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s6, v[4:5]
6846; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
6847; GFX8-NEXT:    v_lshlrev_b64 v[8:9], s4, v[4:5]
6848; GFX8-NEXT:    s_and_b32 s4, 1, s7
6849; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6850; GFX8-NEXT:    s_and_b32 s4, 1, s9
6851; GFX8-NEXT:    s_sub_i32 s10, s8, 64
6852; GFX8-NEXT:    s_sub_i32 s9, 64, s8
6853; GFX8-NEXT:    v_or_b32_e32 v6, v0, v6
6854; GFX8-NEXT:    v_or_b32_e32 v7, v1, v7
6855; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s5, v[4:5]
6856; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
6857; GFX8-NEXT:    s_cselect_b32 s11, 1, 0
6858; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
6859; GFX8-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
6860; GFX8-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
6861; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
6862; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
6863; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6864; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
6865; GFX8-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
6866; GFX8-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
6867; GFX8-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
6868; GFX8-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6869; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6870; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
6871; GFX8-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
6872; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
6873; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6874; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
6875; GFX8-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6876; GFX8-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6877; GFX8-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
6878; GFX8-NEXT:    v_or_b32_e32 v0, s0, v4
6879; GFX8-NEXT:    v_or_b32_e32 v1, s1, v5
6880; GFX8-NEXT:    v_or_b32_e32 v2, s2, v2
6881; GFX8-NEXT:    v_or_b32_e32 v3, s3, v3
6882; GFX8-NEXT:    ; return to shader part epilog
6883;
6884; GFX9-LABEL: v_fshr_i128_vss:
6885; GFX9:       ; %bb.0:
6886; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
6887; GFX9-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6888; GFX9-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6889; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6890; GFX9-NEXT:    s_sub_i32 s5, s4, 64
6891; GFX9-NEXT:    s_sub_i32 s6, 64, s4
6892; GFX9-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
6893; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6894; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
6895; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
6896; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
6897; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
6898; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
6899; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s6, v[4:5]
6900; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
6901; GFX9-NEXT:    v_lshlrev_b64 v[8:9], s4, v[4:5]
6902; GFX9-NEXT:    s_and_b32 s4, 1, s7
6903; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6904; GFX9-NEXT:    s_and_b32 s4, 1, s9
6905; GFX9-NEXT:    s_sub_i32 s10, s8, 64
6906; GFX9-NEXT:    s_sub_i32 s9, 64, s8
6907; GFX9-NEXT:    v_or_b32_e32 v6, v0, v6
6908; GFX9-NEXT:    v_or_b32_e32 v7, v1, v7
6909; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s5, v[4:5]
6910; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
6911; GFX9-NEXT:    s_cselect_b32 s11, 1, 0
6912; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
6913; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
6914; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
6915; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
6916; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
6917; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
6918; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
6919; GFX9-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
6920; GFX9-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
6921; GFX9-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
6922; GFX9-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6923; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6924; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
6925; GFX9-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
6926; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
6927; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6928; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
6929; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6930; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6931; GFX9-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
6932; GFX9-NEXT:    v_or_b32_e32 v0, s0, v4
6933; GFX9-NEXT:    v_or_b32_e32 v1, s1, v5
6934; GFX9-NEXT:    v_or_b32_e32 v2, s2, v2
6935; GFX9-NEXT:    v_or_b32_e32 v3, s3, v3
6936; GFX9-NEXT:    ; return to shader part epilog
6937;
6938; GFX10-LABEL: v_fshr_i128_vss:
6939; GFX10:       ; %bb.0:
6940; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6941; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 31, v1
6942; GFX10-NEXT:    s_mov_b64 s[6:7], 0x7f
6943; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6944; GFX10-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6945; GFX10-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
6946; GFX10-NEXT:    v_or_b32_e32 v2, v2, v4
6947; GFX10-NEXT:    s_sub_i32 s6, 64, s4
6948; GFX10-NEXT:    s_sub_i32 s5, s4, 64
6949; GFX10-NEXT:    s_cmp_lt_u32 s4, 64
6950; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s6, v[0:1]
6951; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
6952; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
6953; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
6954; GFX10-NEXT:    v_lshlrev_b64 v[8:9], s4, v[0:1]
6955; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
6956; GFX10-NEXT:    s_and_b32 s4, 1, s7
6957; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s5, v[0:1]
6958; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
6959; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
6960; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
6961; GFX10-NEXT:    s_and_b32 s4, 1, s9
6962; GFX10-NEXT:    s_sub_i32 s10, s8, 64
6963; GFX10-NEXT:    s_sub_i32 s6, 64, s8
6964; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
6965; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc_lo
6966; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
6967; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
6968; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc_lo
6969; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
6970; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
6971; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
6972; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
6973; GFX10-NEXT:    s_lshr_b64 s[4:5], s[0:1], s8
6974; GFX10-NEXT:    s_lshl_b64 s[6:7], s[2:3], s6
6975; GFX10-NEXT:    s_lshr_b64 s[8:9], s[2:3], s8
6976; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
6977; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6978; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
6979; GFX10-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc_lo
6980; GFX10-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
6981; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
6982; GFX10-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
6983; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6984; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
6985; GFX10-NEXT:    v_or_b32_e32 v0, s0, v6
6986; GFX10-NEXT:    s_cselect_b64 s[2:3], s[8:9], 0
6987; GFX10-NEXT:    v_or_b32_e32 v1, s1, v7
6988; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
6989; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
6990; GFX10-NEXT:    ; return to shader part epilog
6991;
6992; GFX11-LABEL: v_fshr_i128_vss:
6993; GFX11:       ; %bb.0:
6994; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6995; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 31, v1
6996; GFX11-NEXT:    s_mov_b64 s[6:7], 0x7f
6997; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6998; GFX11-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
6999; GFX11-NEXT:    s_and_not1_b64 s[4:5], s[6:7], s[4:5]
7000; GFX11-NEXT:    v_or_b32_e32 v2, v2, v4
7001; GFX11-NEXT:    s_sub_i32 s6, 64, s4
7002; GFX11-NEXT:    s_sub_i32 s5, s4, 64
7003; GFX11-NEXT:    s_cmp_lt_u32 s4, 64
7004; GFX11-NEXT:    v_lshrrev_b64 v[4:5], s6, v[0:1]
7005; GFX11-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
7006; GFX11-NEXT:    s_cselect_b32 s7, 1, 0
7007; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
7008; GFX11-NEXT:    v_lshlrev_b64 v[8:9], s4, v[0:1]
7009; GFX11-NEXT:    s_cselect_b32 s9, 1, 0
7010; GFX11-NEXT:    s_and_b32 s4, 1, s7
7011; GFX11-NEXT:    v_lshlrev_b64 v[0:1], s5, v[0:1]
7012; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
7013; GFX11-NEXT:    v_or_b32_e32 v4, v4, v6
7014; GFX11-NEXT:    v_or_b32_e32 v5, v5, v7
7015; GFX11-NEXT:    s_and_b32 s4, 1, s9
7016; GFX11-NEXT:    s_sub_i32 s10, s8, 64
7017; GFX11-NEXT:    s_sub_i32 s6, 64, s8
7018; GFX11-NEXT:    s_cmp_lt_u32 s8, 64
7019; GFX11-NEXT:    v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9
7020; GFX11-NEXT:    s_cselect_b32 s11, 1, 0
7021; GFX11-NEXT:    s_cmp_eq_u32 s8, 0
7022; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
7023; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
7024; GFX11-NEXT:    s_cselect_b32 s12, 1, 0
7025; GFX11-NEXT:    s_lshr_b64 s[4:5], s[0:1], s8
7026; GFX11-NEXT:    s_lshl_b64 s[6:7], s[2:3], s6
7027; GFX11-NEXT:    s_lshr_b64 s[8:9], s[2:3], s8
7028; GFX11-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
7029; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
7030; GFX11-NEXT:    s_cmp_lg_u32 s11, 0
7031; GFX11-NEXT:    v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3
7032; GFX11-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
7033; GFX11-NEXT:    s_cmp_lg_u32 s12, 0
7034; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
7035; GFX11-NEXT:    s_cmp_lg_u32 s11, 0
7036; GFX11-NEXT:    v_or_b32_e32 v0, s0, v6
7037; GFX11-NEXT:    s_cselect_b64 s[2:3], s[8:9], 0
7038; GFX11-NEXT:    v_or_b32_e32 v1, s1, v7
7039; GFX11-NEXT:    v_or_b32_e32 v2, s2, v2
7040; GFX11-NEXT:    v_or_b32_e32 v3, s3, v3
7041; GFX11-NEXT:    ; return to shader part epilog
7042  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
7043  %cast.result = bitcast i128 %result to <4 x float>
7044  ret <4 x float> %cast.result
7045}
7046
7047define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
7048; GFX6-LABEL: s_fshr_i128_65:
7049; GFX6:       ; %bb.0:
7050; GFX6-NEXT:    s_mov_b32 s4, 0
7051; GFX6-NEXT:    s_lshl_b32 s5, s0, 31
7052; GFX6-NEXT:    s_lshl_b32 s3, s2, 31
7053; GFX6-NEXT:    s_mov_b32 s2, s4
7054; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7055; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
7056; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
7057; GFX6-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7058; GFX6-NEXT:    ; return to shader part epilog
7059;
7060; GFX8-LABEL: s_fshr_i128_65:
7061; GFX8:       ; %bb.0:
7062; GFX8-NEXT:    s_mov_b32 s4, 0
7063; GFX8-NEXT:    s_lshl_b32 s5, s0, 31
7064; GFX8-NEXT:    s_lshl_b32 s3, s2, 31
7065; GFX8-NEXT:    s_mov_b32 s2, s4
7066; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7067; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
7068; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
7069; GFX8-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7070; GFX8-NEXT:    ; return to shader part epilog
7071;
7072; GFX9-LABEL: s_fshr_i128_65:
7073; GFX9:       ; %bb.0:
7074; GFX9-NEXT:    s_mov_b32 s4, 0
7075; GFX9-NEXT:    s_lshl_b32 s5, s0, 31
7076; GFX9-NEXT:    s_lshl_b32 s3, s2, 31
7077; GFX9-NEXT:    s_mov_b32 s2, s4
7078; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7079; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
7080; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
7081; GFX9-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7082; GFX9-NEXT:    ; return to shader part epilog
7083;
7084; GFX10-LABEL: s_fshr_i128_65:
7085; GFX10:       ; %bb.0:
7086; GFX10-NEXT:    s_mov_b32 s4, 0
7087; GFX10-NEXT:    s_lshl_b32 s5, s0, 31
7088; GFX10-NEXT:    s_lshl_b32 s3, s2, 31
7089; GFX10-NEXT:    s_mov_b32 s2, s4
7090; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
7091; GFX10-NEXT:    s_lshr_b64 s[8:9], s[0:1], 1
7092; GFX10-NEXT:    s_or_b64 s[0:1], s[4:5], s[6:7]
7093; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7094; GFX10-NEXT:    ; return to shader part epilog
7095;
7096; GFX11-LABEL: s_fshr_i128_65:
7097; GFX11:       ; %bb.0:
7098; GFX11-NEXT:    s_mov_b32 s4, 0
7099; GFX11-NEXT:    s_lshl_b32 s5, s0, 31
7100; GFX11-NEXT:    s_lshl_b32 s3, s2, 31
7101; GFX11-NEXT:    s_mov_b32 s2, s4
7102; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
7103; GFX11-NEXT:    s_lshr_b64 s[8:9], s[0:1], 1
7104; GFX11-NEXT:    s_or_b64 s[0:1], s[4:5], s[6:7]
7105; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7106; GFX11-NEXT:    ; return to shader part epilog
7107  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7108  ret i128 %result
7109}
7110
7111define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) {
7112; GFX6-LABEL: v_fshr_i128_65:
7113; GFX6:       ; %bb.0:
7114; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7115; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
7116; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
7117; GFX6-NEXT:    v_lshr_b64 v[2:3], v[0:1], 1
7118; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], 1
7119; GFX6-NEXT:    v_or_b32_e32 v3, v5, v3
7120; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
7121; GFX6-NEXT:    s_setpc_b64 s[30:31]
7122;
7123; GFX8-LABEL: v_fshr_i128_65:
7124; GFX8:       ; %bb.0:
7125; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7126; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
7127; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
7128; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7129; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 1, v[6:7]
7130; GFX8-NEXT:    v_or_b32_e32 v3, v5, v3
7131; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
7132; GFX8-NEXT:    s_setpc_b64 s[30:31]
7133;
7134; GFX9-LABEL: v_fshr_i128_65:
7135; GFX9:       ; %bb.0:
7136; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7137; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
7138; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
7139; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7140; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 1, v[6:7]
7141; GFX9-NEXT:    v_or_b32_e32 v3, v5, v3
7142; GFX9-NEXT:    v_or_b32_e32 v1, v4, v1
7143; GFX9-NEXT:    s_setpc_b64 s[30:31]
7144;
7145; GFX10-LABEL: v_fshr_i128_65:
7146; GFX10:       ; %bb.0:
7147; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7148; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7149; GFX10-NEXT:    v_mov_b32_e32 v8, v2
7150; GFX10-NEXT:    v_lshrrev_b64 v[4:5], 1, v[6:7]
7151; GFX10-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7152; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 31, v0
7153; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 31, v8
7154; GFX10-NEXT:    v_or_b32_e32 v1, v9, v5
7155; GFX10-NEXT:    v_or_b32_e32 v3, v0, v3
7156; GFX10-NEXT:    v_mov_b32_e32 v0, v4
7157; GFX10-NEXT:    s_setpc_b64 s[30:31]
7158;
7159; GFX11-LABEL: v_fshr_i128_65:
7160; GFX11:       ; %bb.0:
7161; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7162; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7163; GFX11-NEXT:    v_dual_mov_b32 v8, v2 :: v_dual_lshlrev_b32 v9, 31, v0
7164; GFX11-NEXT:    v_lshrrev_b64 v[4:5], 1, v[6:7]
7165; GFX11-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7166; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
7167; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 31, v8
7168; GFX11-NEXT:    v_or_b32_e32 v1, v9, v5
7169; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
7170; GFX11-NEXT:    v_or_b32_e32 v3, v0, v3
7171; GFX11-NEXT:    v_mov_b32_e32 v0, v4
7172; GFX11-NEXT:    s_setpc_b64 s[30:31]
7173  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7174  ret i128 %result
7175}
7176
7177define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
7178; GFX6-LABEL: s_fshr_v2i128:
7179; GFX6:       ; %bb.0:
7180; GFX6-NEXT:    s_movk_i32 s18, 0x7f
7181; GFX6-NEXT:    s_mov_b32 s19, 0
7182; GFX6-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
7183; GFX6-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
7184; GFX6-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
7185; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7186; GFX6-NEXT:    s_lshr_b32 s0, s1, 31
7187; GFX6-NEXT:    s_mov_b32 s1, s19
7188; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
7189; GFX6-NEXT:    s_sub_i32 s23, s16, 64
7190; GFX6-NEXT:    s_sub_i32 s17, 64, s16
7191; GFX6-NEXT:    s_cmp_lt_u32 s16, 64
7192; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
7193; GFX6-NEXT:    s_cmp_eq_u32 s16, 0
7194; GFX6-NEXT:    s_cselect_b32 s29, 1, 0
7195; GFX6-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
7196; GFX6-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
7197; GFX6-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
7198; GFX6-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
7199; GFX6-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
7200; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
7201; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
7202; GFX6-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
7203; GFX6-NEXT:    s_cmp_lg_u32 s29, 0
7204; GFX6-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
7205; GFX6-NEXT:    s_sub_i32 s26, s22, 64
7206; GFX6-NEXT:    s_sub_i32 s24, 64, s22
7207; GFX6-NEXT:    s_cmp_lt_u32 s22, 64
7208; GFX6-NEXT:    s_cselect_b32 s27, 1, 0
7209; GFX6-NEXT:    s_cmp_eq_u32 s22, 0
7210; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
7211; GFX6-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
7212; GFX6-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
7213; GFX6-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
7214; GFX6-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
7215; GFX6-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
7216; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
7217; GFX6-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
7218; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
7219; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
7220; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
7221; GFX6-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
7222; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
7223; GFX6-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
7224; GFX6-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
7225; GFX6-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
7226; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7227; GFX6-NEXT:    s_lshr_b32 s18, s5, 31
7228; GFX6-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
7229; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
7230; GFX6-NEXT:    s_sub_i32 s9, s10, 64
7231; GFX6-NEXT:    s_sub_i32 s11, 64, s10
7232; GFX6-NEXT:    s_cmp_lt_u32 s10, 64
7233; GFX6-NEXT:    s_cselect_b32 s20, 1, 0
7234; GFX6-NEXT:    s_cmp_eq_u32 s10, 0
7235; GFX6-NEXT:    s_cselect_b32 s21, 1, 0
7236; GFX6-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
7237; GFX6-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
7238; GFX6-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
7239; GFX6-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
7240; GFX6-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
7241; GFX6-NEXT:    s_cmp_lg_u32 s20, 0
7242; GFX6-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
7243; GFX6-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
7244; GFX6-NEXT:    s_cmp_lg_u32 s21, 0
7245; GFX6-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
7246; GFX6-NEXT:    s_sub_i32 s18, s8, 64
7247; GFX6-NEXT:    s_sub_i32 s16, 64, s8
7248; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
7249; GFX6-NEXT:    s_cselect_b32 s19, 1, 0
7250; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
7251; GFX6-NEXT:    s_cselect_b32 s20, 1, 0
7252; GFX6-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
7253; GFX6-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
7254; GFX6-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
7255; GFX6-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
7256; GFX6-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7257; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
7258; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
7259; GFX6-NEXT:    s_cmp_lg_u32 s20, 0
7260; GFX6-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
7261; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
7262; GFX6-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
7263; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
7264; GFX6-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
7265; GFX6-NEXT:    ; return to shader part epilog
7266;
7267; GFX8-LABEL: s_fshr_v2i128:
7268; GFX8:       ; %bb.0:
7269; GFX8-NEXT:    s_movk_i32 s18, 0x7f
7270; GFX8-NEXT:    s_mov_b32 s19, 0
7271; GFX8-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
7272; GFX8-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
7273; GFX8-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
7274; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7275; GFX8-NEXT:    s_lshr_b32 s0, s1, 31
7276; GFX8-NEXT:    s_mov_b32 s1, s19
7277; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
7278; GFX8-NEXT:    s_sub_i32 s23, s16, 64
7279; GFX8-NEXT:    s_sub_i32 s17, 64, s16
7280; GFX8-NEXT:    s_cmp_lt_u32 s16, 64
7281; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
7282; GFX8-NEXT:    s_cmp_eq_u32 s16, 0
7283; GFX8-NEXT:    s_cselect_b32 s29, 1, 0
7284; GFX8-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
7285; GFX8-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
7286; GFX8-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
7287; GFX8-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
7288; GFX8-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
7289; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
7290; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
7291; GFX8-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
7292; GFX8-NEXT:    s_cmp_lg_u32 s29, 0
7293; GFX8-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
7294; GFX8-NEXT:    s_sub_i32 s26, s22, 64
7295; GFX8-NEXT:    s_sub_i32 s24, 64, s22
7296; GFX8-NEXT:    s_cmp_lt_u32 s22, 64
7297; GFX8-NEXT:    s_cselect_b32 s27, 1, 0
7298; GFX8-NEXT:    s_cmp_eq_u32 s22, 0
7299; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
7300; GFX8-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
7301; GFX8-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
7302; GFX8-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
7303; GFX8-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
7304; GFX8-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
7305; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
7306; GFX8-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
7307; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
7308; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
7309; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
7310; GFX8-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
7311; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
7312; GFX8-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
7313; GFX8-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
7314; GFX8-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
7315; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7316; GFX8-NEXT:    s_lshr_b32 s18, s5, 31
7317; GFX8-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
7318; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
7319; GFX8-NEXT:    s_sub_i32 s9, s10, 64
7320; GFX8-NEXT:    s_sub_i32 s11, 64, s10
7321; GFX8-NEXT:    s_cmp_lt_u32 s10, 64
7322; GFX8-NEXT:    s_cselect_b32 s20, 1, 0
7323; GFX8-NEXT:    s_cmp_eq_u32 s10, 0
7324; GFX8-NEXT:    s_cselect_b32 s21, 1, 0
7325; GFX8-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
7326; GFX8-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
7327; GFX8-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
7328; GFX8-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
7329; GFX8-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
7330; GFX8-NEXT:    s_cmp_lg_u32 s20, 0
7331; GFX8-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
7332; GFX8-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
7333; GFX8-NEXT:    s_cmp_lg_u32 s21, 0
7334; GFX8-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
7335; GFX8-NEXT:    s_sub_i32 s18, s8, 64
7336; GFX8-NEXT:    s_sub_i32 s16, 64, s8
7337; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
7338; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
7339; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
7340; GFX8-NEXT:    s_cselect_b32 s20, 1, 0
7341; GFX8-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
7342; GFX8-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
7343; GFX8-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
7344; GFX8-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
7345; GFX8-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7346; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
7347; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
7348; GFX8-NEXT:    s_cmp_lg_u32 s20, 0
7349; GFX8-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
7350; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
7351; GFX8-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
7352; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
7353; GFX8-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
7354; GFX8-NEXT:    ; return to shader part epilog
7355;
7356; GFX9-LABEL: s_fshr_v2i128:
7357; GFX9:       ; %bb.0:
7358; GFX9-NEXT:    s_movk_i32 s18, 0x7f
7359; GFX9-NEXT:    s_mov_b32 s19, 0
7360; GFX9-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
7361; GFX9-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
7362; GFX9-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
7363; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7364; GFX9-NEXT:    s_lshr_b32 s0, s1, 31
7365; GFX9-NEXT:    s_mov_b32 s1, s19
7366; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
7367; GFX9-NEXT:    s_sub_i32 s23, s16, 64
7368; GFX9-NEXT:    s_sub_i32 s17, 64, s16
7369; GFX9-NEXT:    s_cmp_lt_u32 s16, 64
7370; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
7371; GFX9-NEXT:    s_cmp_eq_u32 s16, 0
7372; GFX9-NEXT:    s_cselect_b32 s29, 1, 0
7373; GFX9-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
7374; GFX9-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
7375; GFX9-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
7376; GFX9-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
7377; GFX9-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
7378; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
7379; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
7380; GFX9-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
7381; GFX9-NEXT:    s_cmp_lg_u32 s29, 0
7382; GFX9-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
7383; GFX9-NEXT:    s_sub_i32 s26, s22, 64
7384; GFX9-NEXT:    s_sub_i32 s24, 64, s22
7385; GFX9-NEXT:    s_cmp_lt_u32 s22, 64
7386; GFX9-NEXT:    s_cselect_b32 s27, 1, 0
7387; GFX9-NEXT:    s_cmp_eq_u32 s22, 0
7388; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
7389; GFX9-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
7390; GFX9-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
7391; GFX9-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
7392; GFX9-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
7393; GFX9-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
7394; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
7395; GFX9-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
7396; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
7397; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
7398; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
7399; GFX9-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
7400; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
7401; GFX9-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
7402; GFX9-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
7403; GFX9-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
7404; GFX9-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7405; GFX9-NEXT:    s_lshr_b32 s18, s5, 31
7406; GFX9-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
7407; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
7408; GFX9-NEXT:    s_sub_i32 s9, s10, 64
7409; GFX9-NEXT:    s_sub_i32 s11, 64, s10
7410; GFX9-NEXT:    s_cmp_lt_u32 s10, 64
7411; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
7412; GFX9-NEXT:    s_cmp_eq_u32 s10, 0
7413; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
7414; GFX9-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
7415; GFX9-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
7416; GFX9-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
7417; GFX9-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
7418; GFX9-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
7419; GFX9-NEXT:    s_cmp_lg_u32 s20, 0
7420; GFX9-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
7421; GFX9-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
7422; GFX9-NEXT:    s_cmp_lg_u32 s21, 0
7423; GFX9-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
7424; GFX9-NEXT:    s_sub_i32 s18, s8, 64
7425; GFX9-NEXT:    s_sub_i32 s16, 64, s8
7426; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
7427; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
7428; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
7429; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
7430; GFX9-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
7431; GFX9-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
7432; GFX9-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
7433; GFX9-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
7434; GFX9-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7435; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
7436; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
7437; GFX9-NEXT:    s_cmp_lg_u32 s20, 0
7438; GFX9-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
7439; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
7440; GFX9-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
7441; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
7442; GFX9-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
7443; GFX9-NEXT:    ; return to shader part epilog
7444;
7445; GFX10-LABEL: s_fshr_v2i128:
7446; GFX10:       ; %bb.0:
7447; GFX10-NEXT:    s_movk_i32 s18, 0x7f
7448; GFX10-NEXT:    s_mov_b32 s19, 0
7449; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7450; GFX10-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
7451; GFX10-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
7452; GFX10-NEXT:    s_lshr_b32 s24, s1, 31
7453; GFX10-NEXT:    s_mov_b32 s25, s19
7454; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
7455; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[24:25]
7456; GFX10-NEXT:    s_sub_i32 s23, s16, 64
7457; GFX10-NEXT:    s_sub_i32 s17, 64, s16
7458; GFX10-NEXT:    s_cmp_lt_u32 s16, 64
7459; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
7460; GFX10-NEXT:    s_cmp_eq_u32 s16, 0
7461; GFX10-NEXT:    s_cselect_b32 s29, 1, 0
7462; GFX10-NEXT:    s_lshr_b64 s[24:25], s[0:1], s17
7463; GFX10-NEXT:    s_lshl_b64 s[26:27], s[2:3], s16
7464; GFX10-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
7465; GFX10-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7466; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s23
7467; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
7468; GFX10-NEXT:    s_cselect_b64 s[16:17], s[16:17], 0
7469; GFX10-NEXT:    s_cselect_b64 s[0:1], s[24:25], s[0:1]
7470; GFX10-NEXT:    s_cmp_lg_u32 s29, 0
7471; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7472; GFX10-NEXT:    s_sub_i32 s26, s22, 64
7473; GFX10-NEXT:    s_sub_i32 s23, 64, s22
7474; GFX10-NEXT:    s_cmp_lt_u32 s22, 64
7475; GFX10-NEXT:    s_cselect_b32 s27, 1, 0
7476; GFX10-NEXT:    s_cmp_eq_u32 s22, 0
7477; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
7478; GFX10-NEXT:    s_lshr_b64 s[0:1], s[8:9], s22
7479; GFX10-NEXT:    s_lshl_b64 s[24:25], s[10:11], s23
7480; GFX10-NEXT:    s_lshr_b64 s[22:23], s[10:11], s22
7481; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[24:25]
7482; GFX10-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
7483; GFX10-NEXT:    s_cmp_lg_u32 s27, 0
7484; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[10:11]
7485; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
7486; GFX10-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
7487; GFX10-NEXT:    s_cmp_lg_u32 s27, 0
7488; GFX10-NEXT:    s_cselect_b64 s[8:9], s[22:23], 0
7489; GFX10-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
7490; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7491; GFX10-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
7492; GFX10-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7493; GFX10-NEXT:    s_lshr_b32 s18, s5, 31
7494; GFX10-NEXT:    s_or_b64 s[0:1], s[16:17], s[0:1]
7495; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], 1
7496; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[18:19]
7497; GFX10-NEXT:    s_sub_i32 s9, s10, 64
7498; GFX10-NEXT:    s_sub_i32 s11, 64, s10
7499; GFX10-NEXT:    s_cmp_lt_u32 s10, 64
7500; GFX10-NEXT:    s_cselect_b32 s20, 1, 0
7501; GFX10-NEXT:    s_cmp_eq_u32 s10, 0
7502; GFX10-NEXT:    s_cselect_b32 s21, 1, 0
7503; GFX10-NEXT:    s_lshr_b64 s[16:17], s[4:5], s11
7504; GFX10-NEXT:    s_lshl_b64 s[18:19], s[6:7], s10
7505; GFX10-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
7506; GFX10-NEXT:    s_or_b64 s[16:17], s[16:17], s[18:19]
7507; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
7508; GFX10-NEXT:    s_cmp_lg_u32 s20, 0
7509; GFX10-NEXT:    s_cselect_b64 s[10:11], s[10:11], 0
7510; GFX10-NEXT:    s_cselect_b64 s[4:5], s[16:17], s[4:5]
7511; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
7512; GFX10-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7513; GFX10-NEXT:    s_sub_i32 s18, s8, 64
7514; GFX10-NEXT:    s_sub_i32 s9, 64, s8
7515; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
7516; GFX10-NEXT:    s_cselect_b32 s19, 1, 0
7517; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
7518; GFX10-NEXT:    s_cselect_b32 s20, 1, 0
7519; GFX10-NEXT:    s_lshr_b64 s[4:5], s[12:13], s8
7520; GFX10-NEXT:    s_lshl_b64 s[16:17], s[14:15], s9
7521; GFX10-NEXT:    s_lshr_b64 s[8:9], s[14:15], s8
7522; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[16:17]
7523; GFX10-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7524; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
7525; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[14:15]
7526; GFX10-NEXT:    s_cmp_lg_u32 s20, 0
7527; GFX10-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[4:5]
7528; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
7529; GFX10-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
7530; GFX10-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
7531; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7532; GFX10-NEXT:    ; return to shader part epilog
7533;
7534; GFX11-LABEL: s_fshr_v2i128:
7535; GFX11:       ; %bb.0:
7536; GFX11-NEXT:    s_movk_i32 s18, 0x7f
7537; GFX11-NEXT:    s_mov_b32 s19, 0
7538; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7539; GFX11-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
7540; GFX11-NEXT:    s_and_not1_b64 s[16:17], s[18:19], s[16:17]
7541; GFX11-NEXT:    s_lshr_b32 s24, s1, 31
7542; GFX11-NEXT:    s_mov_b32 s25, s19
7543; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
7544; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[24:25]
7545; GFX11-NEXT:    s_sub_i32 s23, s16, 64
7546; GFX11-NEXT:    s_sub_i32 s17, 64, s16
7547; GFX11-NEXT:    s_cmp_lt_u32 s16, 64
7548; GFX11-NEXT:    s_cselect_b32 s28, 1, 0
7549; GFX11-NEXT:    s_cmp_eq_u32 s16, 0
7550; GFX11-NEXT:    s_cselect_b32 s29, 1, 0
7551; GFX11-NEXT:    s_lshr_b64 s[24:25], s[0:1], s17
7552; GFX11-NEXT:    s_lshl_b64 s[26:27], s[2:3], s16
7553; GFX11-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
7554; GFX11-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7555; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s23
7556; GFX11-NEXT:    s_cmp_lg_u32 s28, 0
7557; GFX11-NEXT:    s_cselect_b64 s[16:17], s[16:17], 0
7558; GFX11-NEXT:    s_cselect_b64 s[0:1], s[24:25], s[0:1]
7559; GFX11-NEXT:    s_cmp_lg_u32 s29, 0
7560; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7561; GFX11-NEXT:    s_sub_i32 s26, s22, 64
7562; GFX11-NEXT:    s_sub_i32 s23, 64, s22
7563; GFX11-NEXT:    s_cmp_lt_u32 s22, 64
7564; GFX11-NEXT:    s_cselect_b32 s27, 1, 0
7565; GFX11-NEXT:    s_cmp_eq_u32 s22, 0
7566; GFX11-NEXT:    s_cselect_b32 s28, 1, 0
7567; GFX11-NEXT:    s_lshr_b64 s[0:1], s[8:9], s22
7568; GFX11-NEXT:    s_lshl_b64 s[24:25], s[10:11], s23
7569; GFX11-NEXT:    s_lshr_b64 s[22:23], s[10:11], s22
7570; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[24:25]
7571; GFX11-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
7572; GFX11-NEXT:    s_cmp_lg_u32 s27, 0
7573; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[10:11]
7574; GFX11-NEXT:    s_cmp_lg_u32 s28, 0
7575; GFX11-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
7576; GFX11-NEXT:    s_cmp_lg_u32 s27, 0
7577; GFX11-NEXT:    s_cselect_b64 s[8:9], s[22:23], 0
7578; GFX11-NEXT:    s_and_not1_b64 s[10:11], s[18:19], s[20:21]
7579; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7580; GFX11-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
7581; GFX11-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7582; GFX11-NEXT:    s_lshr_b32 s18, s5, 31
7583; GFX11-NEXT:    s_or_b64 s[0:1], s[16:17], s[0:1]
7584; GFX11-NEXT:    s_lshl_b64 s[4:5], s[4:5], 1
7585; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[18:19]
7586; GFX11-NEXT:    s_sub_i32 s9, s10, 64
7587; GFX11-NEXT:    s_sub_i32 s11, 64, s10
7588; GFX11-NEXT:    s_cmp_lt_u32 s10, 64
7589; GFX11-NEXT:    s_cselect_b32 s20, 1, 0
7590; GFX11-NEXT:    s_cmp_eq_u32 s10, 0
7591; GFX11-NEXT:    s_cselect_b32 s21, 1, 0
7592; GFX11-NEXT:    s_lshr_b64 s[16:17], s[4:5], s11
7593; GFX11-NEXT:    s_lshl_b64 s[18:19], s[6:7], s10
7594; GFX11-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
7595; GFX11-NEXT:    s_or_b64 s[16:17], s[16:17], s[18:19]
7596; GFX11-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
7597; GFX11-NEXT:    s_cmp_lg_u32 s20, 0
7598; GFX11-NEXT:    s_cselect_b64 s[10:11], s[10:11], 0
7599; GFX11-NEXT:    s_cselect_b64 s[4:5], s[16:17], s[4:5]
7600; GFX11-NEXT:    s_cmp_lg_u32 s21, 0
7601; GFX11-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7602; GFX11-NEXT:    s_sub_i32 s18, s8, 64
7603; GFX11-NEXT:    s_sub_i32 s9, 64, s8
7604; GFX11-NEXT:    s_cmp_lt_u32 s8, 64
7605; GFX11-NEXT:    s_cselect_b32 s19, 1, 0
7606; GFX11-NEXT:    s_cmp_eq_u32 s8, 0
7607; GFX11-NEXT:    s_cselect_b32 s20, 1, 0
7608; GFX11-NEXT:    s_lshr_b64 s[4:5], s[12:13], s8
7609; GFX11-NEXT:    s_lshl_b64 s[16:17], s[14:15], s9
7610; GFX11-NEXT:    s_lshr_b64 s[8:9], s[14:15], s8
7611; GFX11-NEXT:    s_or_b64 s[4:5], s[4:5], s[16:17]
7612; GFX11-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7613; GFX11-NEXT:    s_cmp_lg_u32 s19, 0
7614; GFX11-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[14:15]
7615; GFX11-NEXT:    s_cmp_lg_u32 s20, 0
7616; GFX11-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[4:5]
7617; GFX11-NEXT:    s_cmp_lg_u32 s19, 0
7618; GFX11-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
7619; GFX11-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
7620; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7621; GFX11-NEXT:    ; return to shader part epilog
7622  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
7623  ret <2 x i128> %result
7624}
7625
7626define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
7627; GFX6-LABEL: v_fshr_v2i128:
7628; GFX6:       ; %bb.0:
7629; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7630; GFX6-NEXT:    v_xor_b32_e32 v17, -1, v16
7631; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
7632; GFX6-NEXT:    v_and_b32_e32 v23, 0x7f, v17
7633; GFX6-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
7634; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
7635; GFX6-NEXT:    v_or_b32_e32 v2, v2, v17
7636; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, 64, v23
7637; GFX6-NEXT:    v_lshr_b64 v[17:18], v[0:1], v17
7638; GFX6-NEXT:    v_lshl_b64 v[21:22], v[2:3], v23
7639; GFX6-NEXT:    v_and_b32_e32 v24, 0x7f, v16
7640; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, 64, v24
7641; GFX6-NEXT:    v_or_b32_e32 v21, v17, v21
7642; GFX6-NEXT:    v_or_b32_e32 v22, v18, v22
7643; GFX6-NEXT:    v_lshl_b64 v[16:17], v[10:11], v16
7644; GFX6-NEXT:    v_lshr_b64 v[18:19], v[8:9], v24
7645; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
7646; GFX6-NEXT:    v_or_b32_e32 v18, v18, v16
7647; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v23
7648; GFX6-NEXT:    v_or_b32_e32 v19, v19, v17
7649; GFX6-NEXT:    v_lshl_b64 v[16:17], v[0:1], v16
7650; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v23
7651; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
7652; GFX6-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
7653; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
7654; GFX6-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
7655; GFX6-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
7656; GFX6-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
7657; GFX6-NEXT:    v_subrev_i32_e64 v0, s[4:5], 64, v24
7658; GFX6-NEXT:    v_lshr_b64 v[2:3], v[10:11], v0
7659; GFX6-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
7660; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
7661; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
7662; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v24
7663; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
7664; GFX6-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
7665; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
7666; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
7667; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
7668; GFX6-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
7669; GFX6-NEXT:    v_or_b32_e32 v0, v25, v2
7670; GFX6-NEXT:    v_or_b32_e32 v2, v17, v8
7671; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v20
7672; GFX6-NEXT:    v_lshl_b64 v[6:7], v[6:7], 1
7673; GFX6-NEXT:    v_or_b32_e32 v1, v18, v3
7674; GFX6-NEXT:    v_or_b32_e32 v3, v16, v9
7675; GFX6-NEXT:    v_and_b32_e32 v17, 0x7f, v8
7676; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], 1
7677; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7678; GFX6-NEXT:    v_or_b32_e32 v6, v6, v4
7679; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 64, v17
7680; GFX6-NEXT:    v_lshr_b64 v[4:5], v[8:9], v4
7681; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v17
7682; GFX6-NEXT:    v_subrev_i32_e32 v18, vcc, 64, v17
7683; GFX6-NEXT:    v_or_b32_e32 v10, v4, v10
7684; GFX6-NEXT:    v_or_b32_e32 v11, v5, v11
7685; GFX6-NEXT:    v_lshl_b64 v[4:5], v[8:9], v17
7686; GFX6-NEXT:    v_lshl_b64 v[8:9], v[8:9], v18
7687; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
7688; GFX6-NEXT:    v_and_b32_e32 v16, 0x7f, v20
7689; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
7690; GFX6-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
7691; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
7692; GFX6-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
7693; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
7694; GFX6-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
7695; GFX6-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
7696; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 64, v16
7697; GFX6-NEXT:    v_lshr_b64 v[4:5], v[12:13], v16
7698; GFX6-NEXT:    v_lshl_b64 v[6:7], v[14:15], v6
7699; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, 64, v16
7700; GFX6-NEXT:    v_or_b32_e32 v11, v4, v6
7701; GFX6-NEXT:    v_or_b32_e32 v17, v5, v7
7702; GFX6-NEXT:    v_lshr_b64 v[6:7], v[14:15], v10
7703; GFX6-NEXT:    v_lshr_b64 v[4:5], v[14:15], v16
7704; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
7705; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
7706; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
7707; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
7708; GFX6-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
7709; GFX6-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
7710; GFX6-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
7711; GFX6-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
7712; GFX6-NEXT:    v_or_b32_e32 v4, v18, v6
7713; GFX6-NEXT:    v_or_b32_e32 v5, v19, v7
7714; GFX6-NEXT:    v_or_b32_e32 v6, v8, v10
7715; GFX6-NEXT:    v_or_b32_e32 v7, v9, v11
7716; GFX6-NEXT:    s_setpc_b64 s[30:31]
7717;
7718; GFX8-LABEL: v_fshr_v2i128:
7719; GFX8:       ; %bb.0:
7720; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7721; GFX8-NEXT:    v_xor_b32_e32 v17, -1, v16
7722; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7723; GFX8-NEXT:    v_and_b32_e32 v23, 0x7f, v17
7724; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
7725; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
7726; GFX8-NEXT:    v_or_b32_e32 v2, v2, v17
7727; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, 64, v23
7728; GFX8-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
7729; GFX8-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
7730; GFX8-NEXT:    v_and_b32_e32 v24, 0x7f, v16
7731; GFX8-NEXT:    v_sub_u32_e32 v16, vcc, 64, v24
7732; GFX8-NEXT:    v_or_b32_e32 v21, v17, v21
7733; GFX8-NEXT:    v_or_b32_e32 v22, v18, v22
7734; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
7735; GFX8-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
7736; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
7737; GFX8-NEXT:    v_or_b32_e32 v18, v18, v16
7738; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v23
7739; GFX8-NEXT:    v_or_b32_e32 v19, v19, v17
7740; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
7741; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
7742; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
7743; GFX8-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
7744; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
7745; GFX8-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
7746; GFX8-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
7747; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
7748; GFX8-NEXT:    v_subrev_u32_e64 v0, s[4:5], 64, v24
7749; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
7750; GFX8-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
7751; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
7752; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
7753; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
7754; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
7755; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
7756; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
7757; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
7758; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
7759; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
7760; GFX8-NEXT:    v_or_b32_e32 v0, v25, v2
7761; GFX8-NEXT:    v_or_b32_e32 v2, v17, v8
7762; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v20
7763; GFX8-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
7764; GFX8-NEXT:    v_or_b32_e32 v1, v18, v3
7765; GFX8-NEXT:    v_or_b32_e32 v3, v16, v9
7766; GFX8-NEXT:    v_and_b32_e32 v17, 0x7f, v8
7767; GFX8-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
7768; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7769; GFX8-NEXT:    v_or_b32_e32 v6, v6, v4
7770; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 64, v17
7771; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
7772; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v17, v[6:7]
7773; GFX8-NEXT:    v_subrev_u32_e32 v18, vcc, 64, v17
7774; GFX8-NEXT:    v_or_b32_e32 v10, v4, v10
7775; GFX8-NEXT:    v_or_b32_e32 v11, v5, v11
7776; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v17, v[8:9]
7777; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v18, v[8:9]
7778; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
7779; GFX8-NEXT:    v_and_b32_e32 v16, 0x7f, v20
7780; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
7781; GFX8-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
7782; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
7783; GFX8-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
7784; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
7785; GFX8-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
7786; GFX8-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
7787; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 64, v16
7788; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v16, v[12:13]
7789; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
7790; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, 64, v16
7791; GFX8-NEXT:    v_or_b32_e32 v11, v4, v6
7792; GFX8-NEXT:    v_or_b32_e32 v17, v5, v7
7793; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v10, v[14:15]
7794; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v16, v[14:15]
7795; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
7796; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
7797; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
7798; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
7799; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
7800; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
7801; GFX8-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
7802; GFX8-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
7803; GFX8-NEXT:    v_or_b32_e32 v4, v18, v6
7804; GFX8-NEXT:    v_or_b32_e32 v5, v19, v7
7805; GFX8-NEXT:    v_or_b32_e32 v6, v8, v10
7806; GFX8-NEXT:    v_or_b32_e32 v7, v9, v11
7807; GFX8-NEXT:    s_setpc_b64 s[30:31]
7808;
7809; GFX9-LABEL: v_fshr_v2i128:
7810; GFX9:       ; %bb.0:
7811; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7812; GFX9-NEXT:    v_xor_b32_e32 v17, -1, v16
7813; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7814; GFX9-NEXT:    v_and_b32_e32 v23, 0x7f, v17
7815; GFX9-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
7816; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
7817; GFX9-NEXT:    v_or_b32_e32 v2, v2, v17
7818; GFX9-NEXT:    v_sub_u32_e32 v17, 64, v23
7819; GFX9-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
7820; GFX9-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
7821; GFX9-NEXT:    v_and_b32_e32 v24, 0x7f, v16
7822; GFX9-NEXT:    v_sub_u32_e32 v16, 64, v24
7823; GFX9-NEXT:    v_or_b32_e32 v21, v17, v21
7824; GFX9-NEXT:    v_or_b32_e32 v22, v18, v22
7825; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
7826; GFX9-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
7827; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
7828; GFX9-NEXT:    v_or_b32_e32 v18, v18, v16
7829; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v23
7830; GFX9-NEXT:    v_or_b32_e32 v19, v19, v17
7831; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
7832; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
7833; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
7834; GFX9-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
7835; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
7836; GFX9-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
7837; GFX9-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
7838; GFX9-NEXT:    v_subrev_u32_e32 v0, 64, v24
7839; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
7840; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
7841; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
7842; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
7843; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
7844; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
7845; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
7846; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
7847; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
7848; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
7849; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
7850; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
7851; GFX9-NEXT:    v_or_b32_e32 v0, v25, v2
7852; GFX9-NEXT:    v_or_b32_e32 v2, v17, v8
7853; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v20
7854; GFX9-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
7855; GFX9-NEXT:    v_or_b32_e32 v1, v18, v3
7856; GFX9-NEXT:    v_or_b32_e32 v3, v16, v9
7857; GFX9-NEXT:    v_and_b32_e32 v17, 0x7f, v8
7858; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
7859; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7860; GFX9-NEXT:    v_or_b32_e32 v6, v6, v4
7861; GFX9-NEXT:    v_sub_u32_e32 v4, 64, v17
7862; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
7863; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v17, v[6:7]
7864; GFX9-NEXT:    v_subrev_u32_e32 v18, 64, v17
7865; GFX9-NEXT:    v_or_b32_e32 v10, v4, v10
7866; GFX9-NEXT:    v_or_b32_e32 v11, v5, v11
7867; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v17, v[8:9]
7868; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v18, v[8:9]
7869; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
7870; GFX9-NEXT:    v_and_b32_e32 v16, 0x7f, v20
7871; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
7872; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
7873; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
7874; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
7875; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
7876; GFX9-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
7877; GFX9-NEXT:    v_sub_u32_e32 v6, 64, v16
7878; GFX9-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
7879; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v16, v[12:13]
7880; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
7881; GFX9-NEXT:    v_subrev_u32_e32 v10, 64, v16
7882; GFX9-NEXT:    v_or_b32_e32 v11, v4, v6
7883; GFX9-NEXT:    v_or_b32_e32 v17, v5, v7
7884; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v10, v[14:15]
7885; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v16, v[14:15]
7886; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
7887; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
7888; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
7889; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
7890; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
7891; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
7892; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
7893; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
7894; GFX9-NEXT:    v_or_b32_e32 v4, v18, v6
7895; GFX9-NEXT:    v_or_b32_e32 v5, v19, v7
7896; GFX9-NEXT:    v_or_b32_e32 v6, v8, v10
7897; GFX9-NEXT:    v_or_b32_e32 v7, v9, v11
7898; GFX9-NEXT:    s_setpc_b64 s[30:31]
7899;
7900; GFX10-LABEL: v_fshr_v2i128:
7901; GFX10:       ; %bb.0:
7902; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7903; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
7904; GFX10-NEXT:    v_xor_b32_e32 v17, -1, v16
7905; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7906; GFX10-NEXT:    v_and_b32_e32 v26, 0x7f, v16
7907; GFX10-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
7908; GFX10-NEXT:    v_and_b32_e32 v25, 0x7f, v17
7909; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
7910; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
7911; GFX10-NEXT:    v_subrev_nc_u32_e32 v27, 64, v26
7912; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v26
7913; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
7914; GFX10-NEXT:    v_or_b32_e32 v2, v2, v17
7915; GFX10-NEXT:    v_subrev_nc_u32_e32 v19, 64, v25
7916; GFX10-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
7917; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
7918; GFX10-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
7919; GFX10-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
7920; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
7921; GFX10-NEXT:    v_cndmask_b32_e32 v23, 0, v23, vcc_lo
7922; GFX10-NEXT:    v_cndmask_b32_e32 v24, 0, v24, vcc_lo
7923; GFX10-NEXT:    v_or_b32_e32 v22, v18, v22
7924; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
7925; GFX10-NEXT:    v_or_b32_e32 v21, v17, v21
7926; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v26, v[8:9]
7927; GFX10-NEXT:    v_cndmask_b32_e32 v22, v1, v22, vcc_lo
7928; GFX10-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
7929; GFX10-NEXT:    v_cndmask_b32_e32 v21, v0, v21, vcc_lo
7930; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
7931; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
7932; GFX10-NEXT:    v_or_b32_e32 v16, v16, v18
7933; GFX10-NEXT:    v_or_b32_e32 v17, v17, v19
7934; GFX10-NEXT:    v_cndmask_b32_e32 v18, v21, v2, vcc_lo
7935; GFX10-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
7936; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
7937; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s4
7938; GFX10-NEXT:    v_xor_b32_e32 v16, -1, v20
7939; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s4
7940; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v26, v[10:11]
7941; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
7942; GFX10-NEXT:    v_and_b32_e32 v25, 0x7f, v16
7943; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 31, v5
7944; GFX10-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
7945; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
7946; GFX10-NEXT:    v_or_b32_e32 v0, v23, v0
7947; GFX10-NEXT:    v_sub_nc_u32_e32 v9, 64, v25
7948; GFX10-NEXT:    v_or_b32_e32 v6, v6, v8
7949; GFX10-NEXT:    v_and_b32_e32 v23, 0x7f, v20
7950; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s4
7951; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v3, s4
7952; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
7953; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v25, v[6:7]
7954; GFX10-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
7955; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 64, v25
7956; GFX10-NEXT:    v_or_b32_e32 v2, v18, v2
7957; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v25, v[4:5]
7958; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
7959; GFX10-NEXT:    v_or_b32_e32 v10, v8, v10
7960; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, 64, v23
7961; GFX10-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
7962; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
7963; GFX10-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
7964; GFX10-NEXT:    v_or_b32_e32 v5, v9, v11
7965; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v8, v[14:15]
7966; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v23
7967; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0, v16, vcc_lo
7968; GFX10-NEXT:    v_or_b32_e32 v16, v18, v20
7969; GFX10-NEXT:    v_or_b32_e32 v18, v19, v21
7970; GFX10-NEXT:    v_cndmask_b32_e32 v10, v3, v10, vcc_lo
7971; GFX10-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
7972; GFX10-NEXT:    v_lshrrev_b64 v[3:4], v23, v[14:15]
7973; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s4
7974; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v23
7975; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v25
7976; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v18, s4
7977; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
7978; GFX10-NEXT:    v_or_b32_e32 v1, v24, v1
7979; GFX10-NEXT:    v_cndmask_b32_e64 v6, v10, v6, s6
7980; GFX10-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s6
7981; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, v12, s5
7982; GFX10-NEXT:    v_cndmask_b32_e64 v8, v9, v13, s5
7983; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s4
7984; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s4
7985; GFX10-NEXT:    v_or_b32_e32 v3, v22, v26
7986; GFX10-NEXT:    v_or_b32_e32 v4, v11, v5
7987; GFX10-NEXT:    v_or_b32_e32 v5, v14, v8
7988; GFX10-NEXT:    v_or_b32_e32 v6, v6, v9
7989; GFX10-NEXT:    v_or_b32_e32 v7, v7, v10
7990; GFX10-NEXT:    s_setpc_b64 s[30:31]
7991;
7992; GFX11-LABEL: v_fshr_v2i128:
7993; GFX11:       ; %bb.0:
7994; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7995; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
7996; GFX11-NEXT:    v_xor_b32_e32 v17, -1, v16
7997; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7998; GFX11-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
7999; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
8000; GFX11-NEXT:    v_and_b32_e32 v25, 0x7f, v17
8001; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
8002; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
8003; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
8004; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8005; GFX11-NEXT:    v_or_b32_e32 v2, v2, v17
8006; GFX11-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
8007; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8008; GFX11-NEXT:    v_dual_cndmask_b32 v23, 0, v23 :: v_dual_and_b32 v26, 0x7f, v16
8009; GFX11-NEXT:    v_cndmask_b32_e32 v24, 0, v24, vcc_lo
8010; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
8011; GFX11-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
8012; GFX11-NEXT:    v_subrev_nc_u32_e32 v19, 64, v25
8013; GFX11-NEXT:    v_subrev_nc_u32_e32 v27, 64, v26
8014; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v26
8015; GFX11-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
8016; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
8017; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
8018; GFX11-NEXT:    v_or_b32_e32 v22, v18, v22
8019; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
8020; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
8021; GFX11-NEXT:    v_or_b32_e32 v21, v17, v21
8022; GFX11-NEXT:    v_lshrrev_b64 v[16:17], v26, v[8:9]
8023; GFX11-NEXT:    v_cndmask_b32_e32 v22, v1, v22, vcc_lo
8024; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8025; GFX11-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
8026; GFX11-NEXT:    v_cndmask_b32_e32 v21, v0, v21, vcc_lo
8027; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
8028; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
8029; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8030; GFX11-NEXT:    v_or_b32_e32 v16, v16, v18
8031; GFX11-NEXT:    v_or_b32_e32 v17, v17, v19
8032; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s0
8033; GFX11-NEXT:    v_xor_b32_e32 v16, -1, v20
8034; GFX11-NEXT:    v_cndmask_b32_e32 v18, v21, v2, vcc_lo
8035; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
8036; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s0
8037; GFX11-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
8038; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
8039; GFX11-NEXT:    v_and_b32_e32 v25, 0x7f, v16
8040; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v26, v[10:11]
8041; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
8042; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 31, v5
8043; GFX11-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
8044; GFX11-NEXT:    v_sub_nc_u32_e32 v9, 64, v25
8045; GFX11-NEXT:    v_cndmask_b32_e64 v26, 0, v3, s0
8046; GFX11-NEXT:    v_subrev_nc_u32_e32 v3, 64, v25
8047; GFX11-NEXT:    v_or_b32_e32 v6, v6, v8
8048; GFX11-NEXT:    v_or_b32_e32 v0, v23, v0
8049; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
8050; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v25, v[4:5]
8051; GFX11-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
8052; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v25, v[6:7]
8053; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
8054; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
8055; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v25
8056; GFX11-NEXT:    v_or_b32_e32 v1, v24, v1
8057; GFX11-NEXT:    v_or_b32_e32 v10, v8, v10
8058; GFX11-NEXT:    v_and_b32_e32 v23, 0x7f, v20
8059; GFX11-NEXT:    v_or_b32_e32 v2, v18, v2
8060; GFX11-NEXT:    v_or_b32_e32 v5, v9, v11
8061; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8062; GFX11-NEXT:    v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10
8063; GFX11-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
8064; GFX11-NEXT:    v_subrev_nc_u32_e32 v8, 64, v23
8065; GFX11-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
8066; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v23
8067; GFX11-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8068; GFX11-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
8069; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v8, v[14:15]
8070; GFX11-NEXT:    v_lshrrev_b64 v[3:4], v23, v[14:15]
8071; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8072; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v23
8073; GFX11-NEXT:    v_cndmask_b32_e64 v6, v10, v6, s2
8074; GFX11-NEXT:    v_or_b32_e32 v16, v18, v20
8075; GFX11-NEXT:    v_or_b32_e32 v18, v19, v21
8076; GFX11-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s2
8077; GFX11-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s0
8078; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8079; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s0
8080; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v18, s0
8081; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8082; GFX11-NEXT:    v_or_b32_e32 v7, v7, v10
8083; GFX11-NEXT:    v_cndmask_b32_e64 v5, v8, v12, s1
8084; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
8085; GFX11-NEXT:    v_cndmask_b32_e64 v8, v9, v13, s1
8086; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s0
8087; GFX11-NEXT:    v_or_b32_e32 v3, v22, v26
8088; GFX11-NEXT:    v_or_b32_e32 v4, v11, v5
8089; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8090; GFX11-NEXT:    v_or_b32_e32 v5, v14, v8
8091; GFX11-NEXT:    v_or_b32_e32 v6, v6, v9
8092; GFX11-NEXT:    s_setpc_b64 s[30:31]
8093  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
8094  ret <2 x i128> %result
8095}
8096
8097declare i7 @llvm.fshr.i7(i7, i7, i7) #0
8098declare i8 @llvm.fshr.i8(i8, i8, i8) #0
8099declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
8100declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
8101
8102declare i16 @llvm.fshr.i16(i16, i16, i16) #0
8103declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
8104declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
8105declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
8106declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
8107declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
8108declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
8109
8110declare i24 @llvm.fshr.i24(i24, i24, i24) #0
8111declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
8112
8113declare i32 @llvm.fshr.i32(i32, i32, i32) #0
8114declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
8115declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
8116declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
8117declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
8118declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
8119
8120declare i48 @llvm.fshr.i48(i48, i48, i48) #0
8121
8122declare i64 @llvm.fshr.i64(i64, i64, i64) #0
8123declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
8124
8125declare i128 @llvm.fshr.i128(i128, i128, i128) #0
8126declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
8127
8128attributes #0 = { nounwind readnone speculatable willreturn }
8129