1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6
7define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
8; GFX6-LABEL: s_fshr_i7:
9; GFX6:       ; %bb.0:
10; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
11; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
12; GFX6-NEXT:    s_movk_i32 s3, 0x7f
13; GFX6-NEXT:    s_and_b32 s2, s2, s3
14; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
15; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
16; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
17; GFX6-NEXT:    s_and_b32 s1, s1, s3
18; GFX6-NEXT:    v_mul_lo_u32 v1, -7, v0
19; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
20; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
21; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
22; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
23; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
24; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
25; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
26; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
27; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
28; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
29; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
30; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
31; GFX6-NEXT:    v_and_b32_e32 v0, s3, v0
32; GFX6-NEXT:    v_and_b32_e32 v1, s3, v1
33; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
34; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
35; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
36; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
37; GFX6-NEXT:    ; return to shader part epilog
38;
39; GFX8-LABEL: s_fshr_i7:
40; GFX8:       ; %bb.0:
41; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
42; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
43; GFX8-NEXT:    s_movk_i32 s3, 0x7f
44; GFX8-NEXT:    s_and_b32 s2, s2, s3
45; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
46; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
47; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
48; GFX8-NEXT:    s_and_b32 s1, s1, s3
49; GFX8-NEXT:    v_mul_lo_u32 v1, -7, v0
50; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
51; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
52; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
53; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
54; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
55; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
56; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
57; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
58; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
59; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
60; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
61; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
62; GFX8-NEXT:    v_and_b32_e32 v0, s3, v0
63; GFX8-NEXT:    v_and_b32_e32 v1, s3, v1
64; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
65; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
66; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
67; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
68; GFX8-NEXT:    ; return to shader part epilog
69;
70; GFX9-LABEL: s_fshr_i7:
71; GFX9:       ; %bb.0:
72; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
73; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
74; GFX9-NEXT:    s_movk_i32 s3, 0x7f
75; GFX9-NEXT:    s_and_b32 s2, s2, s3
76; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
77; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
78; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
79; GFX9-NEXT:    s_and_b32 s1, s1, s3
80; GFX9-NEXT:    v_mul_lo_u32 v1, -7, v0
81; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
82; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
83; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
84; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
85; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
86; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
87; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
88; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
89; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
90; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
91; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
92; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
93; GFX9-NEXT:    v_and_b32_e32 v0, s3, v0
94; GFX9-NEXT:    v_and_b32_e32 v1, s3, v1
95; GFX9-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
96; GFX9-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
97; GFX9-NEXT:    v_or_b32_e32 v0, v1, v0
98; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
99; GFX9-NEXT:    ; return to shader part epilog
100;
101; GFX10-LABEL: s_fshr_i7:
102; GFX10:       ; %bb.0:
103; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
104; GFX10-NEXT:    s_movk_i32 s3, 0x7f
105; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
106; GFX10-NEXT:    s_and_b32 s2, s2, s3
107; GFX10-NEXT:    s_and_b32 s1, s1, s3
108; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
109; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
110; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
111; GFX10-NEXT:    v_mul_lo_u32 v1, -7, v0
112; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
113; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
114; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
115; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
116; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
117; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
118; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
119; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
120; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
121; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
122; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
123; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
124; GFX10-NEXT:    v_and_b32_e32 v0, s3, v0
125; GFX10-NEXT:    v_and_b32_e32 v1, s3, v1
126; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
127; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
128; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
129; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
130; GFX10-NEXT:    ; return to shader part epilog
131  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
132  ret i7 %result
133}
134
135define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
136; GFX6-LABEL: v_fshr_i7:
137; GFX6:       ; %bb.0:
138; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
140; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
141; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
142; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
143; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
144; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
145; GFX6-NEXT:    v_mul_lo_u32 v4, -7, v3
146; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
147; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
148; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
149; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7f
150; GFX6-NEXT:    v_and_b32_e32 v1, v1, v4
151; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
152; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
153; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
154; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
155; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
156; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
157; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
158; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
159; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
160; GFX6-NEXT:    v_and_b32_e32 v2, v2, v4
161; GFX6-NEXT:    v_and_b32_e32 v3, v3, v4
162; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
163; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
164; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
165; GFX6-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX8-LABEL: v_fshr_i7:
168; GFX8:       ; %bb.0:
169; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
171; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
172; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
173; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
174; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
175; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
176; GFX8-NEXT:    v_mul_lo_u32 v4, -7, v3
177; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
178; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
179; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
180; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7f
181; GFX8-NEXT:    v_and_b32_e32 v1, v1, v4
182; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
183; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
184; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
185; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
186; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
187; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
188; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
189; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
190; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
191; GFX8-NEXT:    v_and_b32_e32 v2, v2, v4
192; GFX8-NEXT:    v_and_b32_e32 v3, v3, v4
193; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
194; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
195; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
196; GFX8-NEXT:    s_setpc_b64 s[30:31]
197;
198; GFX9-LABEL: v_fshr_i7:
199; GFX9:       ; %bb.0:
200; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
202; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
203; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
204; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
205; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
206; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
207; GFX9-NEXT:    v_mul_lo_u32 v4, -7, v3
208; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
209; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
210; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
211; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7f
212; GFX9-NEXT:    v_and_b32_e32 v1, v1, v4
213; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
214; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
215; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
216; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
217; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
218; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
219; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
220; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
221; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
222; GFX9-NEXT:    v_and_b32_e32 v2, v2, v4
223; GFX9-NEXT:    v_and_b32_e32 v3, v3, v4
224; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
225; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
226; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
227; GFX9-NEXT:    s_setpc_b64 s[30:31]
228;
229; GFX10-LABEL: v_fshr_i7:
230; GFX10:       ; %bb.0:
231; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
233; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
234; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
235; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
236; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
237; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
238; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
239; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
240; GFX10-NEXT:    v_mul_lo_u32 v4, -7, v3
241; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
242; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
243; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
244; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
245; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
246; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
247; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
248; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
249; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
250; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
251; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
252; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7f
253; GFX10-NEXT:    v_sub_nc_u16 v4, 6, v2
254; GFX10-NEXT:    v_and_b32_e32 v2, v2, v3
255; GFX10-NEXT:    v_and_b32_e32 v4, v4, v3
256; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
257; GFX10-NEXT:    v_lshlrev_b16 v0, v4, v0
258; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
259; GFX10-NEXT:    s_setpc_b64 s[30:31]
260  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
261  ret i7 %result
262}
263
264define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
265; GFX6-LABEL: s_fshr_i8:
266; GFX6:       ; %bb.0:
267; GFX6-NEXT:    s_and_b32 s3, s2, 7
268; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
269; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
270; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
271; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
272; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
273; GFX6-NEXT:    s_or_b32 s0, s0, s1
274; GFX6-NEXT:    ; return to shader part epilog
275;
276; GFX8-LABEL: s_fshr_i8:
277; GFX8:       ; %bb.0:
278; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
279; GFX8-NEXT:    s_and_b32 s3, s2, 7
280; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
281; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
282; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
283; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
284; GFX8-NEXT:    s_lshr_b32 s1, s1, s3
285; GFX8-NEXT:    s_or_b32 s0, s0, s1
286; GFX8-NEXT:    ; return to shader part epilog
287;
288; GFX9-LABEL: s_fshr_i8:
289; GFX9:       ; %bb.0:
290; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
291; GFX9-NEXT:    s_and_b32 s3, s2, 7
292; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
293; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
294; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
295; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
296; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
297; GFX9-NEXT:    s_or_b32 s0, s0, s1
298; GFX9-NEXT:    ; return to shader part epilog
299;
300; GFX10-LABEL: s_fshr_i8:
301; GFX10:       ; %bb.0:
302; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
303; GFX10-NEXT:    s_and_b32 s3, s2, 7
304; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
305; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
306; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
307; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
308; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
309; GFX10-NEXT:    s_or_b32 s0, s0, s1
310; GFX10-NEXT:    ; return to shader part epilog
311  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
312  ret i8 %result
313}
314
315define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) {
316; GFX6-LABEL: v_fshr_i8:
317; GFX6:       ; %bb.0:
318; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; GFX6-NEXT:    v_and_b32_e32 v3, 7, v2
320; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
321; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
322; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
323; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
324; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
325; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
326; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
327; GFX6-NEXT:    s_setpc_b64 s[30:31]
328;
329; GFX8-LABEL: v_fshr_i8:
330; GFX8:       ; %bb.0:
331; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332; GFX8-NEXT:    v_and_b32_e32 v3, 7, v2
333; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
334; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
335; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
336; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
337; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
338; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
339; GFX8-NEXT:    s_setpc_b64 s[30:31]
340;
341; GFX9-LABEL: v_fshr_i8:
342; GFX9:       ; %bb.0:
343; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX9-NEXT:    v_and_b32_e32 v3, 7, v2
345; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
346; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
347; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
348; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
349; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
350; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
351; GFX9-NEXT:    s_setpc_b64 s[30:31]
352;
353; GFX10-LABEL: v_fshr_i8:
354; GFX10:       ; %bb.0:
355; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
357; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
358; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
359; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
360; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
361; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
362; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
363; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
364; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
365; GFX10-NEXT:    s_setpc_b64 s[30:31]
366  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
367  ret i8 %result
368}
369
370define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
371; GFX6-LABEL: s_fshr_i8_4:
372; GFX6:       ; %bb.0:
373; GFX6-NEXT:    s_lshl_b32 s0, s0, 4
374; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x40004
375; GFX6-NEXT:    s_or_b32 s0, s0, s1
376; GFX6-NEXT:    ; return to shader part epilog
377;
378; GFX8-LABEL: s_fshr_i8_4:
379; GFX8:       ; %bb.0:
380; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
381; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
382; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
383; GFX8-NEXT:    s_lshr_b32 s1, s1, 4
384; GFX8-NEXT:    s_or_b32 s0, s0, s1
385; GFX8-NEXT:    ; return to shader part epilog
386;
387; GFX9-LABEL: s_fshr_i8_4:
388; GFX9:       ; %bb.0:
389; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
390; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
391; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
392; GFX9-NEXT:    s_lshr_b32 s1, s1, 4
393; GFX9-NEXT:    s_or_b32 s0, s0, s1
394; GFX9-NEXT:    ; return to shader part epilog
395;
396; GFX10-LABEL: s_fshr_i8_4:
397; GFX10:       ; %bb.0:
398; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
399; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
400; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
401; GFX10-NEXT:    s_lshr_b32 s1, s1, 4
402; GFX10-NEXT:    s_or_b32 s0, s0, s1
403; GFX10-NEXT:    ; return to shader part epilog
404  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
405  ret i8 %result
406}
407
408define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) {
409; GFX6-LABEL: v_fshr_i8_4:
410; GFX6:       ; %bb.0:
411; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
413; GFX6-NEXT:    v_bfe_u32 v1, v1, 4, 4
414; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
415; GFX6-NEXT:    s_setpc_b64 s[30:31]
416;
417; GFX8-LABEL: v_fshr_i8_4:
418; GFX8:       ; %bb.0:
419; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420; GFX8-NEXT:    v_mov_b32_e32 v2, 4
421; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
422; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
423; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
424; GFX8-NEXT:    s_setpc_b64 s[30:31]
425;
426; GFX9-LABEL: v_fshr_i8_4:
427; GFX9:       ; %bb.0:
428; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429; GFX9-NEXT:    s_mov_b32 s4, 4
430; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
431; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
432; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
433; GFX9-NEXT:    s_setpc_b64 s[30:31]
434;
435; GFX10-LABEL: v_fshr_i8_4:
436; GFX10:       ; %bb.0:
437; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
439; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
440; GFX10-NEXT:    v_lshlrev_b16 v0, 4, v0
441; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
442; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
443; GFX10-NEXT:    s_setpc_b64 s[30:31]
444  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
445  ret i8 %result
446}
447
448define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
449; GFX6-LABEL: s_fshr_i8_5:
450; GFX6:       ; %bb.0:
451; GFX6-NEXT:    s_lshl_b32 s0, s0, 3
452; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x30005
453; GFX6-NEXT:    s_or_b32 s0, s0, s1
454; GFX6-NEXT:    ; return to shader part epilog
455;
456; GFX8-LABEL: s_fshr_i8_5:
457; GFX8:       ; %bb.0:
458; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
459; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
460; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
461; GFX8-NEXT:    s_lshr_b32 s1, s1, 5
462; GFX8-NEXT:    s_or_b32 s0, s0, s1
463; GFX8-NEXT:    ; return to shader part epilog
464;
465; GFX9-LABEL: s_fshr_i8_5:
466; GFX9:       ; %bb.0:
467; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
468; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
469; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
470; GFX9-NEXT:    s_lshr_b32 s1, s1, 5
471; GFX9-NEXT:    s_or_b32 s0, s0, s1
472; GFX9-NEXT:    ; return to shader part epilog
473;
474; GFX10-LABEL: s_fshr_i8_5:
475; GFX10:       ; %bb.0:
476; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
477; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
478; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
479; GFX10-NEXT:    s_lshr_b32 s1, s1, 5
480; GFX10-NEXT:    s_or_b32 s0, s0, s1
481; GFX10-NEXT:    ; return to shader part epilog
482  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
483  ret i8 %result
484}
485
486define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) {
487; GFX6-LABEL: v_fshr_i8_5:
488; GFX6:       ; %bb.0:
489; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
491; GFX6-NEXT:    v_bfe_u32 v1, v1, 5, 3
492; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
493; GFX6-NEXT:    s_setpc_b64 s[30:31]
494;
495; GFX8-LABEL: v_fshr_i8_5:
496; GFX8:       ; %bb.0:
497; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498; GFX8-NEXT:    v_mov_b32_e32 v2, 5
499; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
500; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
501; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
502; GFX8-NEXT:    s_setpc_b64 s[30:31]
503;
504; GFX9-LABEL: v_fshr_i8_5:
505; GFX9:       ; %bb.0:
506; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507; GFX9-NEXT:    v_mov_b32_e32 v2, 5
508; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
509; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
510; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
511; GFX9-NEXT:    s_setpc_b64 s[30:31]
512;
513; GFX10-LABEL: v_fshr_i8_5:
514; GFX10:       ; %bb.0:
515; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
517; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
518; GFX10-NEXT:    v_lshlrev_b16 v0, 3, v0
519; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
520; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
521; GFX10-NEXT:    s_setpc_b64 s[30:31]
522  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
523  ret i8 %result
524}
525
526define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
527; GFX6-LABEL: s_fshr_v2i8:
528; GFX6:       ; %bb.0:
529; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
530; GFX6-NEXT:    s_lshr_b32 s4, s2, 8
531; GFX6-NEXT:    s_and_b32 s5, s2, 7
532; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
533; GFX6-NEXT:    s_movk_i32 s6, 0xff
534; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
535; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
536; GFX6-NEXT:    s_and_b32 s2, s1, s6
537; GFX6-NEXT:    s_lshr_b32 s2, s2, s5
538; GFX6-NEXT:    s_or_b32 s0, s0, s2
539; GFX6-NEXT:    s_and_b32 s2, s4, 7
540; GFX6-NEXT:    s_andn2_b32 s4, 7, s4
541; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
542; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80008
543; GFX6-NEXT:    s_lshl_b32 s3, s3, s4
544; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
545; GFX6-NEXT:    s_or_b32 s1, s3, s1
546; GFX6-NEXT:    s_and_b32 s1, s1, s6
547; GFX6-NEXT:    s_and_b32 s0, s0, s6
548; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
549; GFX6-NEXT:    s_or_b32 s0, s0, s1
550; GFX6-NEXT:    ; return to shader part epilog
551;
552; GFX8-LABEL: s_fshr_v2i8:
553; GFX8:       ; %bb.0:
554; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
555; GFX8-NEXT:    s_lshr_b32 s5, s2, 8
556; GFX8-NEXT:    s_and_b32 s6, s2, 7
557; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
558; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
559; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
560; GFX8-NEXT:    s_movk_i32 s2, 0xff
561; GFX8-NEXT:    s_lshr_b32 s4, s1, 8
562; GFX8-NEXT:    s_and_b32 s1, s1, s2
563; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
564; GFX8-NEXT:    s_lshr_b32 s1, s1, s6
565; GFX8-NEXT:    s_and_b32 s4, s4, s2
566; GFX8-NEXT:    s_or_b32 s0, s0, s1
567; GFX8-NEXT:    s_and_b32 s1, s5, 7
568; GFX8-NEXT:    s_andn2_b32 s5, 7, s5
569; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
570; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
571; GFX8-NEXT:    s_lshl_b32 s3, s3, s5
572; GFX8-NEXT:    s_lshr_b32 s1, s4, s1
573; GFX8-NEXT:    s_or_b32 s1, s3, s1
574; GFX8-NEXT:    s_and_b32 s0, s0, s2
575; GFX8-NEXT:    s_and_b32 s1, s1, s2
576; GFX8-NEXT:    s_bfe_u32 s2, 8, 0x100000
577; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
578; GFX8-NEXT:    s_or_b32 s0, s0, s1
579; GFX8-NEXT:    ; return to shader part epilog
580;
581; GFX9-LABEL: s_fshr_v2i8:
582; GFX9:       ; %bb.0:
583; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
584; GFX9-NEXT:    s_lshr_b32 s5, s2, 8
585; GFX9-NEXT:    s_and_b32 s6, s2, 7
586; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
587; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
588; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
589; GFX9-NEXT:    s_movk_i32 s2, 0xff
590; GFX9-NEXT:    s_lshr_b32 s4, s1, 8
591; GFX9-NEXT:    s_and_b32 s1, s1, s2
592; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
593; GFX9-NEXT:    s_lshr_b32 s1, s1, s6
594; GFX9-NEXT:    s_and_b32 s4, s4, s2
595; GFX9-NEXT:    s_or_b32 s0, s0, s1
596; GFX9-NEXT:    s_and_b32 s1, s5, 7
597; GFX9-NEXT:    s_andn2_b32 s5, 7, s5
598; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
599; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
600; GFX9-NEXT:    s_lshl_b32 s3, s3, s5
601; GFX9-NEXT:    s_lshr_b32 s1, s4, s1
602; GFX9-NEXT:    s_or_b32 s1, s3, s1
603; GFX9-NEXT:    s_and_b32 s0, s0, s2
604; GFX9-NEXT:    s_and_b32 s1, s1, s2
605; GFX9-NEXT:    s_bfe_u32 s2, 8, 0x100000
606; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
607; GFX9-NEXT:    s_or_b32 s0, s0, s1
608; GFX9-NEXT:    ; return to shader part epilog
609;
610; GFX10-LABEL: s_fshr_v2i8:
611; GFX10:       ; %bb.0:
612; GFX10-NEXT:    s_lshr_b32 s4, s1, 8
613; GFX10-NEXT:    s_movk_i32 s7, 0xff
614; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
615; GFX10-NEXT:    s_lshr_b32 s5, s2, 8
616; GFX10-NEXT:    s_and_b32 s6, s2, 7
617; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
618; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
619; GFX10-NEXT:    s_and_b32 s4, s4, s7
620; GFX10-NEXT:    s_and_b32 s1, s1, s7
621; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
622; GFX10-NEXT:    s_and_b32 s2, s5, 7
623; GFX10-NEXT:    s_andn2_b32 s5, 7, s5
624; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
625; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
626; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
627; GFX10-NEXT:    s_lshl_b32 s3, s3, s5
628; GFX10-NEXT:    s_lshr_b32 s2, s4, s2
629; GFX10-NEXT:    s_lshr_b32 s1, s1, s6
630; GFX10-NEXT:    s_or_b32 s2, s3, s2
631; GFX10-NEXT:    s_or_b32 s0, s0, s1
632; GFX10-NEXT:    s_and_b32 s1, s2, s7
633; GFX10-NEXT:    s_bfe_u32 s2, 8, 0x100000
634; GFX10-NEXT:    s_and_b32 s0, s0, s7
635; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
636; GFX10-NEXT:    s_or_b32 s0, s0, s1
637; GFX10-NEXT:    ; return to shader part epilog
638  %lhs = bitcast i16 %lhs.arg to <2 x i8>
639  %rhs = bitcast i16 %rhs.arg to <2 x i8>
640  %amt = bitcast i16 %amt.arg to <2 x i8>
641  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
642  %cast.result = bitcast <2 x i8> %result to i16
643  ret i16 %cast.result
644}
645
646define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
647; GFX6-LABEL: v_fshr_v2i8:
648; GFX6:       ; %bb.0:
649; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 8, v2
651; GFX6-NEXT:    v_and_b32_e32 v5, 7, v2
652; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
653; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
654; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
655; GFX6-NEXT:    s_movk_i32 s4, 0xff
656; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
657; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
658; GFX6-NEXT:    v_and_b32_e32 v2, s4, v1
659; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v5, v2
660; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
661; GFX6-NEXT:    v_and_b32_e32 v2, 7, v4
662; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
663; GFX6-NEXT:    v_and_b32_e32 v4, 7, v4
664; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
665; GFX6-NEXT:    v_bfe_u32 v1, v1, 8, 8
666; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
667; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
668; GFX6-NEXT:    v_or_b32_e32 v1, v3, v1
669; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
670; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
671; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
672; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
673; GFX6-NEXT:    s_setpc_b64 s[30:31]
674;
675; GFX8-LABEL: v_fshr_v2i8:
676; GFX8:       ; %bb.0:
677; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
679; GFX8-NEXT:    v_and_b32_e32 v6, 7, v2
680; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
681; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
682; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
683; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
684; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
685; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
686; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
687; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v5
688; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
689; GFX8-NEXT:    v_and_b32_e32 v1, 7, v5
690; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
691; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
692; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v2, v3
693; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
694; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
695; GFX8-NEXT:    v_and_b32_e32 v1, 0xff, v1
696; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
697; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
698; GFX8-NEXT:    s_setpc_b64 s[30:31]
699;
700; GFX9-LABEL: v_fshr_v2i8:
701; GFX9:       ; %bb.0:
702; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
704; GFX9-NEXT:    v_and_b32_e32 v6, 7, v2
705; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
706; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
707; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
708; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
709; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
710; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
711; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
712; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v5
713; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
714; GFX9-NEXT:    v_and_b32_e32 v1, 7, v5
715; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
716; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
717; GFX9-NEXT:    v_lshlrev_b16_e32 v2, v2, v3
718; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
719; GFX9-NEXT:    v_or_b32_e32 v1, v2, v1
720; GFX9-NEXT:    v_and_b32_e32 v1, 0xff, v1
721; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
722; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
723; GFX9-NEXT:    s_setpc_b64 s[30:31]
724;
725; GFX10-LABEL: v_fshr_v2i8:
726; GFX10:       ; %bb.0:
727; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
729; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
730; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
731; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
732; GFX10-NEXT:    s_movk_i32 s4, 0xff
733; GFX10-NEXT:    v_and_b32_e32 v7, 7, v2
734; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v3
735; GFX10-NEXT:    v_xor_b32_e32 v2, -1, v2
736; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
737; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
738; GFX10-NEXT:    v_and_b32_e32 v5, s4, v5
739; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
740; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
741; GFX10-NEXT:    v_and_b32_e32 v1, s4, v1
742; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
743; GFX10-NEXT:    v_lshrrev_b16 v3, v3, v5
744; GFX10-NEXT:    v_lshlrev_b16 v4, v6, v4
745; GFX10-NEXT:    v_lshrrev_b16 v1, v7, v1
746; GFX10-NEXT:    v_lshlrev_b16 v0, v2, v0
747; GFX10-NEXT:    v_or_b32_e32 v2, v4, v3
748; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
749; GFX10-NEXT:    v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
750; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
751; GFX10-NEXT:    s_setpc_b64 s[30:31]
752  %lhs = bitcast i16 %lhs.arg to <2 x i8>
753  %rhs = bitcast i16 %rhs.arg to <2 x i8>
754  %amt = bitcast i16 %amt.arg to <2 x i8>
755  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
756  %cast.result = bitcast <2 x i8> %result to i16
757  ret i16 %cast.result
758}
759
760define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
761; GFX6-LABEL: s_fshr_v4i8:
762; GFX6:       ; %bb.0:
763; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
764; GFX6-NEXT:    s_lshr_b32 s4, s0, 16
765; GFX6-NEXT:    s_lshr_b32 s5, s0, 24
766; GFX6-NEXT:    s_lshr_b32 s7, s2, 8
767; GFX6-NEXT:    s_lshr_b32 s8, s2, 16
768; GFX6-NEXT:    s_lshr_b32 s9, s2, 24
769; GFX6-NEXT:    s_and_b32 s10, s2, 7
770; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
771; GFX6-NEXT:    s_movk_i32 s11, 0xff
772; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
773; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
774; GFX6-NEXT:    s_and_b32 s2, s1, s11
775; GFX6-NEXT:    s_lshr_b32 s2, s2, s10
776; GFX6-NEXT:    s_or_b32 s0, s0, s2
777; GFX6-NEXT:    s_and_b32 s2, s7, 7
778; GFX6-NEXT:    s_andn2_b32 s7, 7, s7
779; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
780; GFX6-NEXT:    s_lshl_b32 s3, s3, s7
781; GFX6-NEXT:    s_bfe_u32 s7, s1, 0x80008
782; GFX6-NEXT:    s_lshr_b32 s2, s7, s2
783; GFX6-NEXT:    s_lshr_b32 s6, s1, 24
784; GFX6-NEXT:    s_or_b32 s2, s3, s2
785; GFX6-NEXT:    s_and_b32 s3, s8, 7
786; GFX6-NEXT:    s_andn2_b32 s7, 7, s8
787; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
788; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80010
789; GFX6-NEXT:    s_lshl_b32 s4, s4, s7
790; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
791; GFX6-NEXT:    s_or_b32 s1, s4, s1
792; GFX6-NEXT:    s_and_b32 s3, s9, 7
793; GFX6-NEXT:    s_andn2_b32 s4, 7, s9
794; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
795; GFX6-NEXT:    s_and_b32 s2, s2, s11
796; GFX6-NEXT:    s_lshl_b32 s4, s5, s4
797; GFX6-NEXT:    s_lshr_b32 s3, s6, s3
798; GFX6-NEXT:    s_and_b32 s0, s0, s11
799; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
800; GFX6-NEXT:    s_and_b32 s1, s1, s11
801; GFX6-NEXT:    s_or_b32 s3, s4, s3
802; GFX6-NEXT:    s_or_b32 s0, s0, s2
803; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
804; GFX6-NEXT:    s_or_b32 s0, s0, s1
805; GFX6-NEXT:    s_and_b32 s1, s3, s11
806; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
807; GFX6-NEXT:    s_or_b32 s0, s0, s1
808; GFX6-NEXT:    ; return to shader part epilog
809;
810; GFX8-LABEL: s_fshr_v4i8:
811; GFX8:       ; %bb.0:
812; GFX8-NEXT:    s_movk_i32 s13, 0xff
813; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
814; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
815; GFX8-NEXT:    s_lshr_b32 s5, s0, 24
816; GFX8-NEXT:    s_lshr_b32 s6, s1, 8
817; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
818; GFX8-NEXT:    s_lshr_b32 s8, s1, 24
819; GFX8-NEXT:    s_lshr_b32 s9, s2, 8
820; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
821; GFX8-NEXT:    s_lshr_b32 s11, s2, 24
822; GFX8-NEXT:    s_and_b32 s12, s2, 7
823; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
824; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
825; GFX8-NEXT:    s_and_b32 s1, s1, s13
826; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
827; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
828; GFX8-NEXT:    s_andn2_b32 s2, 7, s9
829; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
830; GFX8-NEXT:    s_lshr_b32 s1, s1, s12
831; GFX8-NEXT:    s_lshl_b32 s2, s3, s2
832; GFX8-NEXT:    s_and_b32 s3, s6, s13
833; GFX8-NEXT:    s_or_b32 s0, s0, s1
834; GFX8-NEXT:    s_and_b32 s1, s9, 7
835; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
836; GFX8-NEXT:    s_lshr_b32 s1, s3, s1
837; GFX8-NEXT:    s_andn2_b32 s3, 7, s10
838; GFX8-NEXT:    s_lshl_b32 s4, s4, 1
839; GFX8-NEXT:    s_lshl_b32 s3, s4, s3
840; GFX8-NEXT:    s_and_b32 s4, s7, s13
841; GFX8-NEXT:    s_or_b32 s1, s2, s1
842; GFX8-NEXT:    s_and_b32 s2, s10, 7
843; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
844; GFX8-NEXT:    s_lshr_b32 s2, s4, s2
845; GFX8-NEXT:    s_and_b32 s1, s1, s13
846; GFX8-NEXT:    s_or_b32 s2, s3, s2
847; GFX8-NEXT:    s_and_b32 s3, s11, 7
848; GFX8-NEXT:    s_andn2_b32 s4, 7, s11
849; GFX8-NEXT:    s_lshl_b32 s5, s5, 1
850; GFX8-NEXT:    s_and_b32 s0, s0, s13
851; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
852; GFX8-NEXT:    s_lshl_b32 s4, s5, s4
853; GFX8-NEXT:    s_lshr_b32 s3, s8, s3
854; GFX8-NEXT:    s_or_b32 s0, s0, s1
855; GFX8-NEXT:    s_and_b32 s1, s2, s13
856; GFX8-NEXT:    s_or_b32 s3, s4, s3
857; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
858; GFX8-NEXT:    s_or_b32 s0, s0, s1
859; GFX8-NEXT:    s_and_b32 s1, s3, s13
860; GFX8-NEXT:    s_lshl_b32 s1, s1, 24
861; GFX8-NEXT:    s_or_b32 s0, s0, s1
862; GFX8-NEXT:    ; return to shader part epilog
863;
864; GFX9-LABEL: s_fshr_v4i8:
865; GFX9:       ; %bb.0:
866; GFX9-NEXT:    s_movk_i32 s13, 0xff
867; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
868; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
869; GFX9-NEXT:    s_lshr_b32 s5, s0, 24
870; GFX9-NEXT:    s_lshr_b32 s6, s1, 8
871; GFX9-NEXT:    s_lshr_b32 s7, s1, 16
872; GFX9-NEXT:    s_lshr_b32 s8, s1, 24
873; GFX9-NEXT:    s_lshr_b32 s9, s2, 8
874; GFX9-NEXT:    s_lshr_b32 s10, s2, 16
875; GFX9-NEXT:    s_lshr_b32 s11, s2, 24
876; GFX9-NEXT:    s_and_b32 s12, s2, 7
877; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
878; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
879; GFX9-NEXT:    s_and_b32 s1, s1, s13
880; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
881; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
882; GFX9-NEXT:    s_andn2_b32 s2, 7, s9
883; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
884; GFX9-NEXT:    s_lshr_b32 s1, s1, s12
885; GFX9-NEXT:    s_lshl_b32 s2, s3, s2
886; GFX9-NEXT:    s_and_b32 s3, s6, s13
887; GFX9-NEXT:    s_or_b32 s0, s0, s1
888; GFX9-NEXT:    s_and_b32 s1, s9, 7
889; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
890; GFX9-NEXT:    s_lshr_b32 s1, s3, s1
891; GFX9-NEXT:    s_andn2_b32 s3, 7, s10
892; GFX9-NEXT:    s_lshl_b32 s4, s4, 1
893; GFX9-NEXT:    s_lshl_b32 s3, s4, s3
894; GFX9-NEXT:    s_and_b32 s4, s7, s13
895; GFX9-NEXT:    s_or_b32 s1, s2, s1
896; GFX9-NEXT:    s_and_b32 s2, s10, 7
897; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
898; GFX9-NEXT:    s_lshr_b32 s2, s4, s2
899; GFX9-NEXT:    s_and_b32 s1, s1, s13
900; GFX9-NEXT:    s_or_b32 s2, s3, s2
901; GFX9-NEXT:    s_and_b32 s3, s11, 7
902; GFX9-NEXT:    s_andn2_b32 s4, 7, s11
903; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
904; GFX9-NEXT:    s_and_b32 s0, s0, s13
905; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
906; GFX9-NEXT:    s_lshl_b32 s4, s5, s4
907; GFX9-NEXT:    s_lshr_b32 s3, s8, s3
908; GFX9-NEXT:    s_or_b32 s0, s0, s1
909; GFX9-NEXT:    s_and_b32 s1, s2, s13
910; GFX9-NEXT:    s_or_b32 s3, s4, s3
911; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
912; GFX9-NEXT:    s_or_b32 s0, s0, s1
913; GFX9-NEXT:    s_and_b32 s1, s3, s13
914; GFX9-NEXT:    s_lshl_b32 s1, s1, 24
915; GFX9-NEXT:    s_or_b32 s0, s0, s1
916; GFX9-NEXT:    ; return to shader part epilog
917;
918; GFX10-LABEL: s_fshr_v4i8:
919; GFX10:       ; %bb.0:
920; GFX10-NEXT:    s_lshr_b32 s6, s1, 8
921; GFX10-NEXT:    s_movk_i32 s13, 0xff
922; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
923; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
924; GFX10-NEXT:    s_lshr_b32 s5, s0, 24
925; GFX10-NEXT:    s_lshr_b32 s7, s1, 16
926; GFX10-NEXT:    s_lshr_b32 s8, s1, 24
927; GFX10-NEXT:    s_lshr_b32 s9, s2, 8
928; GFX10-NEXT:    s_lshr_b32 s10, s2, 16
929; GFX10-NEXT:    s_lshr_b32 s11, s2, 24
930; GFX10-NEXT:    s_and_b32 s12, s2, 7
931; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
932; GFX10-NEXT:    s_and_b32 s1, s1, s13
933; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
934; GFX10-NEXT:    s_and_b32 s6, s6, s13
935; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
936; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
937; GFX10-NEXT:    s_and_b32 s2, s9, 7
938; GFX10-NEXT:    s_andn2_b32 s9, 7, s9
939; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
940; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
941; GFX10-NEXT:    s_lshr_b32 s1, s1, s12
942; GFX10-NEXT:    s_lshl_b32 s3, s3, s9
943; GFX10-NEXT:    s_lshr_b32 s2, s6, s2
944; GFX10-NEXT:    s_and_b32 s6, s7, s13
945; GFX10-NEXT:    s_or_b32 s0, s0, s1
946; GFX10-NEXT:    s_or_b32 s1, s3, s2
947; GFX10-NEXT:    s_and_b32 s2, s10, 7
948; GFX10-NEXT:    s_andn2_b32 s3, 7, s10
949; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
950; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
951; GFX10-NEXT:    s_lshl_b32 s3, s4, s3
952; GFX10-NEXT:    s_lshr_b32 s2, s6, s2
953; GFX10-NEXT:    s_andn2_b32 s4, 7, s11
954; GFX10-NEXT:    s_lshl_b32 s5, s5, 1
955; GFX10-NEXT:    s_and_b32 s6, s11, 7
956; GFX10-NEXT:    s_lshl_b32 s4, s5, s4
957; GFX10-NEXT:    s_lshr_b32 s5, s8, s6
958; GFX10-NEXT:    s_or_b32 s2, s3, s2
959; GFX10-NEXT:    s_and_b32 s1, s1, s13
960; GFX10-NEXT:    s_or_b32 s3, s4, s5
961; GFX10-NEXT:    s_and_b32 s0, s0, s13
962; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
963; GFX10-NEXT:    s_and_b32 s2, s2, s13
964; GFX10-NEXT:    s_or_b32 s0, s0, s1
965; GFX10-NEXT:    s_lshl_b32 s1, s2, 16
966; GFX10-NEXT:    s_and_b32 s2, s3, s13
967; GFX10-NEXT:    s_or_b32 s0, s0, s1
968; GFX10-NEXT:    s_lshl_b32 s1, s2, 24
969; GFX10-NEXT:    s_or_b32 s0, s0, s1
970; GFX10-NEXT:    ; return to shader part epilog
971  %lhs = bitcast i32 %lhs.arg to <4 x i8>
972  %rhs = bitcast i32 %rhs.arg to <4 x i8>
973  %amt = bitcast i32 %amt.arg to <4 x i8>
974  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
975  %cast.result = bitcast <4 x i8> %result to i32
976  ret i32 %cast.result
977}
978
979define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
980; GFX6-LABEL: v_fshr_v4i8:
981; GFX6:       ; %bb.0:
982; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 8, v2
984; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
985; GFX6-NEXT:    v_lshrrev_b32_e32 v9, 24, v2
986; GFX6-NEXT:    v_and_b32_e32 v10, 7, v2
987; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
988; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
989; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
990; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
991; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
992; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
993; GFX6-NEXT:    v_and_b32_e32 v11, 0xff, v1
994; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
995; GFX6-NEXT:    v_lshrrev_b32_e32 v10, v10, v11
996; GFX6-NEXT:    v_or_b32_e32 v0, v0, v10
997; GFX6-NEXT:    v_and_b32_e32 v10, 7, v7
998; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v7
999; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1000; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
1001; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v7, v3
1002; GFX6-NEXT:    v_bfe_u32 v7, v1, 8, 8
1003; GFX6-NEXT:    v_lshrrev_b32_e32 v7, v10, v7
1004; GFX6-NEXT:    v_or_b32_e32 v3, v3, v7
1005; GFX6-NEXT:    v_and_b32_e32 v7, 7, v8
1006; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
1007; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 24, v1
1008; GFX6-NEXT:    v_and_b32_e32 v8, 7, v8
1009; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
1010; GFX6-NEXT:    v_bfe_u32 v1, v1, 16, 8
1011; GFX6-NEXT:    v_mov_b32_e32 v2, 0xff
1012; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v8, v4
1013; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v7, v1
1014; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v9
1015; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
1016; GFX6-NEXT:    v_and_b32_e32 v4, 7, v9
1017; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1018; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
1019; GFX6-NEXT:    v_and_b32_e32 v3, v3, v2
1020; GFX6-NEXT:    v_lshlrev_b32_e32 v5, v7, v5
1021; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v4, v6
1022; GFX6-NEXT:    v_and_b32_e32 v0, v0, v2
1023; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1024; GFX6-NEXT:    v_and_b32_e32 v1, v1, v2
1025; GFX6-NEXT:    v_or_b32_e32 v4, v5, v4
1026; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
1027; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1028; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1029; GFX6-NEXT:    v_and_b32_e32 v1, v4, v2
1030; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1031; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1032; GFX6-NEXT:    s_setpc_b64 s[30:31]
1033;
1034; GFX8-LABEL: v_fshr_v4i8:
1035; GFX8:       ; %bb.0:
1036; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1038; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
1039; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v2
1040; GFX8-NEXT:    v_and_b32_e32 v8, 7, v2
1041; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
1042; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
1043; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v0
1044; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v2, v9
1045; GFX8-NEXT:    v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1046; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1047; GFX8-NEXT:    v_or_b32_e32 v2, v2, v8
1048; GFX8-NEXT:    v_and_b32_e32 v8, 7, v5
1049; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
1050; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1051; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1052; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1053; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v5, v3
1054; GFX8-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1055; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
1056; GFX8-NEXT:    v_and_b32_e32 v4, 7, v6
1057; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v6
1058; GFX8-NEXT:    v_mov_b32_e32 v6, 1
1059; GFX8-NEXT:    v_mov_b32_e32 v9, 0xff
1060; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1061; GFX8-NEXT:    v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1062; GFX8-NEXT:    v_lshlrev_b16_e32 v5, v5, v8
1063; GFX8-NEXT:    v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1064; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v8
1065; GFX8-NEXT:    v_or_b32_e32 v4, v5, v4
1066; GFX8-NEXT:    v_and_b32_e32 v5, 7, v7
1067; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v7
1068; GFX8-NEXT:    v_and_b32_e32 v7, 7, v7
1069; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1070; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
1071; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1072; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1073; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1074; GFX8-NEXT:    s_movk_i32 s4, 0xff
1075; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1076; GFX8-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1077; GFX8-NEXT:    v_and_b32_e32 v2, s4, v4
1078; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1079; GFX8-NEXT:    v_and_b32_e32 v0, s4, v0
1080; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
1081; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1082; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1083; GFX8-NEXT:    s_setpc_b64 s[30:31]
1084;
1085; GFX9-LABEL: v_fshr_v4i8:
1086; GFX9:       ; %bb.0:
1087; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1088; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1089; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
1090; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v2
1091; GFX9-NEXT:    v_and_b32_e32 v8, 7, v2
1092; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
1093; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
1094; GFX9-NEXT:    v_lshlrev_b16_e32 v9, 1, v0
1095; GFX9-NEXT:    v_lshlrev_b16_e32 v2, v2, v9
1096; GFX9-NEXT:    v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1097; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1098; GFX9-NEXT:    v_or_b32_e32 v2, v2, v8
1099; GFX9-NEXT:    v_and_b32_e32 v8, 7, v5
1100; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v5
1101; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1102; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1103; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1104; GFX9-NEXT:    v_lshlrev_b16_e32 v3, v5, v3
1105; GFX9-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1106; GFX9-NEXT:    v_or_b32_e32 v3, v3, v4
1107; GFX9-NEXT:    v_and_b32_e32 v4, 7, v6
1108; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v6
1109; GFX9-NEXT:    v_mov_b32_e32 v6, 1
1110; GFX9-NEXT:    v_mov_b32_e32 v9, 0xff
1111; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1112; GFX9-NEXT:    v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1113; GFX9-NEXT:    v_lshlrev_b16_e32 v5, v5, v8
1114; GFX9-NEXT:    v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1115; GFX9-NEXT:    v_lshrrev_b16_e32 v4, v4, v8
1116; GFX9-NEXT:    v_or_b32_e32 v4, v5, v4
1117; GFX9-NEXT:    v_and_b32_e32 v5, 7, v7
1118; GFX9-NEXT:    v_xor_b32_e32 v7, -1, v7
1119; GFX9-NEXT:    v_and_b32_e32 v7, 7, v7
1120; GFX9-NEXT:    v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1121; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
1122; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1123; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
1124; GFX9-NEXT:    v_mov_b32_e32 v1, 8
1125; GFX9-NEXT:    s_movk_i32 s4, 0xff
1126; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1127; GFX9-NEXT:    v_and_or_b32 v1, v2, s4, v1
1128; GFX9-NEXT:    v_and_b32_e32 v2, s4, v4
1129; GFX9-NEXT:    v_and_b32_e32 v0, s4, v0
1130; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1131; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1132; GFX9-NEXT:    v_or3_b32 v0, v1, v2, v0
1133; GFX9-NEXT:    s_setpc_b64 s[30:31]
1134;
1135; GFX10-LABEL: v_fshr_v4i8:
1136; GFX10:       ; %bb.0:
1137; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1139; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 8, v2
1140; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1141; GFX10-NEXT:    v_xor_b32_e32 v8, -1, v2
1142; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
1143; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 24, v2
1144; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v6
1145; GFX10-NEXT:    v_lshlrev_b16 v3, 1, v3
1146; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1147; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1148; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 8, v1
1149; GFX10-NEXT:    v_and_b32_e32 v11, 7, v11
1150; GFX10-NEXT:    v_and_b32_e32 v8, 7, v8
1151; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
1152; GFX10-NEXT:    v_mov_b32_e32 v13, 0xff
1153; GFX10-NEXT:    v_xor_b32_e32 v14, -1, v12
1154; GFX10-NEXT:    v_lshlrev_b16 v3, v11, v3
1155; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v10
1156; GFX10-NEXT:    s_movk_i32 s4, 0xff
1157; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
1158; GFX10-NEXT:    v_lshlrev_b16 v0, v8, v0
1159; GFX10-NEXT:    v_and_b32_e32 v8, s4, v1
1160; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
1161; GFX10-NEXT:    v_and_b32_e32 v7, s4, v7
1162; GFX10-NEXT:    v_and_b32_e32 v10, 7, v10
1163; GFX10-NEXT:    v_and_b32_e32 v11, 7, v11
1164; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
1165; GFX10-NEXT:    v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1166; GFX10-NEXT:    v_and_b32_e32 v13, 7, v14
1167; GFX10-NEXT:    v_lshlrev_b16 v5, 1, v5
1168; GFX10-NEXT:    v_and_b32_e32 v12, 7, v12
1169; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
1170; GFX10-NEXT:    v_lshrrev_b16 v6, v6, v7
1171; GFX10-NEXT:    v_lshlrev_b16 v4, v11, v4
1172; GFX10-NEXT:    v_lshrrev_b16 v1, v10, v1
1173; GFX10-NEXT:    v_lshlrev_b16 v5, v13, v5
1174; GFX10-NEXT:    v_lshrrev_b16 v7, v12, v9
1175; GFX10-NEXT:    v_lshrrev_b16 v2, v2, v8
1176; GFX10-NEXT:    v_or_b32_e32 v3, v3, v6
1177; GFX10-NEXT:    v_mov_b32_e32 v6, 8
1178; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
1179; GFX10-NEXT:    v_or_b32_e32 v4, v5, v7
1180; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
1181; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1182; GFX10-NEXT:    v_and_b32_e32 v1, s4, v1
1183; GFX10-NEXT:    v_and_b32_e32 v3, s4, v4
1184; GFX10-NEXT:    v_and_or_b32 v0, v0, s4, v2
1185; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1186; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1187; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
1188; GFX10-NEXT:    s_setpc_b64 s[30:31]
1189  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1190  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1191  %amt = bitcast i32 %amt.arg to <4 x i8>
1192  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1193  %cast.result = bitcast <4 x i8> %result to i32
1194  ret i32 %cast.result
1195}
1196
1197define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1198; GFX6-LABEL: s_fshr_i24:
1199; GFX6:       ; %bb.0:
1200; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1201; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1202; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1203; GFX6-NEXT:    s_mov_b32 s3, 0xffffff
1204; GFX6-NEXT:    s_and_b32 s2, s2, s3
1205; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1206; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1207; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
1208; GFX6-NEXT:    s_and_b32 s1, s1, s3
1209; GFX6-NEXT:    v_mul_lo_u32 v1, v1, v0
1210; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
1211; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1212; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
1213; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1214; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
1215; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
1216; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1217; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1218; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
1219; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1220; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1221; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
1222; GFX6-NEXT:    v_and_b32_e32 v0, s3, v0
1223; GFX6-NEXT:    v_and_b32_e32 v1, s3, v1
1224; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
1225; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
1226; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
1227; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1228; GFX6-NEXT:    ; return to shader part epilog
1229;
1230; GFX8-LABEL: s_fshr_i24:
1231; GFX8:       ; %bb.0:
1232; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1233; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1234; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1235; GFX8-NEXT:    s_mov_b32 s3, 0xffffff
1236; GFX8-NEXT:    s_and_b32 s2, s2, s3
1237; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1238; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1239; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1240; GFX8-NEXT:    s_and_b32 s1, s1, s3
1241; GFX8-NEXT:    v_mul_lo_u32 v1, v1, v0
1242; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
1243; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
1244; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
1245; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
1246; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
1247; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
1248; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1249; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1250; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
1251; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1252; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1253; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
1254; GFX8-NEXT:    v_and_b32_e32 v0, s3, v0
1255; GFX8-NEXT:    v_and_b32_e32 v1, s3, v1
1256; GFX8-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
1257; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1258; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1259; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1260; GFX8-NEXT:    ; return to shader part epilog
1261;
1262; GFX9-LABEL: s_fshr_i24:
1263; GFX9:       ; %bb.0:
1264; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1265; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1266; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1267; GFX9-NEXT:    s_mov_b32 s3, 0xffffff
1268; GFX9-NEXT:    s_and_b32 s2, s2, s3
1269; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1270; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
1271; GFX9-NEXT:    s_and_b32 s1, s1, s3
1272; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1273; GFX9-NEXT:    v_mul_lo_u32 v1, v1, v0
1274; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
1275; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
1276; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
1277; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
1278; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
1279; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
1280; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1281; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1282; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
1283; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1284; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1285; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
1286; GFX9-NEXT:    v_and_b32_e32 v0, s3, v0
1287; GFX9-NEXT:    v_and_b32_e32 v1, s3, v1
1288; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1289; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1290; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1291; GFX9-NEXT:    ; return to shader part epilog
1292;
1293; GFX10-LABEL: s_fshr_i24:
1294; GFX10:       ; %bb.0:
1295; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1296; GFX10-NEXT:    s_mov_b32 s3, 0xffffff
1297; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1298; GFX10-NEXT:    s_and_b32 s2, s2, s3
1299; GFX10-NEXT:    s_and_b32 s1, s1, s3
1300; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1301; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1302; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
1303; GFX10-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1304; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
1305; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1306; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
1307; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
1308; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1309; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1310; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1311; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1312; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1313; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1314; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1315; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1316; GFX10-NEXT:    v_and_b32_e32 v0, s3, v0
1317; GFX10-NEXT:    v_and_b32_e32 v1, s3, v1
1318; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1319; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1320; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1321; GFX10-NEXT:    ; return to shader part epilog
1322  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1323  ret i24 %result
1324}
1325
1326define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1327; GFX6-LABEL: v_fshr_i24:
1328; GFX6:       ; %bb.0:
1329; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1331; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1332; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1333; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1334; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1335; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1336; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
1337; GFX6-NEXT:    v_mul_lo_u32 v4, v4, v3
1338; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
1339; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1340; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
1341; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffffff
1342; GFX6-NEXT:    v_and_b32_e32 v1, v1, v4
1343; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
1344; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
1345; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
1346; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1347; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1348; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
1349; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1350; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1351; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
1352; GFX6-NEXT:    v_and_b32_e32 v2, v2, v4
1353; GFX6-NEXT:    v_and_b32_e32 v3, v3, v4
1354; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1355; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1356; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1357; GFX6-NEXT:    s_setpc_b64 s[30:31]
1358;
1359; GFX8-LABEL: v_fshr_i24:
1360; GFX8:       ; %bb.0:
1361; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1362; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1363; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1364; GFX8-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1365; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1366; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1367; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1368; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
1369; GFX8-NEXT:    v_mul_lo_u32 v4, v4, v3
1370; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
1371; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
1372; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
1373; GFX8-NEXT:    v_mov_b32_e32 v4, 0xffffff
1374; GFX8-NEXT:    v_and_b32_e32 v1, v1, v4
1375; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
1376; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
1377; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
1378; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1379; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1380; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
1381; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1382; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1383; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
1384; GFX8-NEXT:    v_and_b32_e32 v2, v2, v4
1385; GFX8-NEXT:    v_and_b32_e32 v3, v3, v4
1386; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1387; GFX8-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1388; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1389; GFX8-NEXT:    s_setpc_b64 s[30:31]
1390;
1391; GFX9-LABEL: v_fshr_i24:
1392; GFX9:       ; %bb.0:
1393; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1395; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1396; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1397; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1398; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1399; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1400; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
1401; GFX9-NEXT:    v_mul_lo_u32 v4, v4, v3
1402; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
1403; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
1404; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
1405; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffffff
1406; GFX9-NEXT:    v_and_b32_e32 v1, v1, v4
1407; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
1408; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
1409; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
1410; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1411; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1412; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
1413; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1414; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1415; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
1416; GFX9-NEXT:    v_and_b32_e32 v2, v2, v4
1417; GFX9-NEXT:    v_and_b32_e32 v3, v3, v4
1418; GFX9-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1419; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1420; GFX9-NEXT:    s_setpc_b64 s[30:31]
1421;
1422; GFX10-LABEL: v_fshr_i24:
1423; GFX10:       ; %bb.0:
1424; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1426; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1427; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1428; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1429; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1430; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1431; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
1432; GFX10-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1433; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
1434; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1435; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffffff
1436; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
1437; GFX10-NEXT:    v_and_b32_e32 v1, v1, v4
1438; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
1439; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1440; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1441; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1442; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1443; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1444; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1445; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1446; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1447; GFX10-NEXT:    v_and_b32_e32 v2, v2, v4
1448; GFX10-NEXT:    v_and_b32_e32 v3, v3, v4
1449; GFX10-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1450; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1451; GFX10-NEXT:    s_setpc_b64 s[30:31]
1452  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1453  ret i24 %result
1454}
1455
1456define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1457; GFX6-LABEL: s_fshr_v2i24:
1458; GFX6:       ; %bb.0:
1459; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1460; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1461; GFX6-NEXT:    s_movk_i32 s9, 0xff
1462; GFX6-NEXT:    s_mov_b32 s11, 0x80008
1463; GFX6-NEXT:    s_lshr_b32 s6, s0, 16
1464; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1465; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1466; GFX6-NEXT:    s_lshr_b32 s7, s0, 24
1467; GFX6-NEXT:    s_lshr_b32 s8, s1, 8
1468; GFX6-NEXT:    s_and_b32 s10, s0, s9
1469; GFX6-NEXT:    s_bfe_u32 s0, s0, s11
1470; GFX6-NEXT:    s_and_b32 s1, s1, s9
1471; GFX6-NEXT:    s_lshl_b32 s0, s0, 8
1472; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
1473; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1474; GFX6-NEXT:    s_or_b32 s0, s10, s0
1475; GFX6-NEXT:    s_or_b32 s1, s7, s1
1476; GFX6-NEXT:    s_and_b32 s7, s8, s9
1477; GFX6-NEXT:    s_lshr_b32 s8, s2, 16
1478; GFX6-NEXT:    s_lshr_b32 s10, s2, 24
1479; GFX6-NEXT:    s_and_b32 s13, s2, s9
1480; GFX6-NEXT:    s_bfe_u32 s2, s2, s11
1481; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v0
1482; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
1483; GFX6-NEXT:    s_and_b32 s8, s8, s9
1484; GFX6-NEXT:    s_or_b32 s2, s13, s2
1485; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1486; GFX6-NEXT:    s_lshr_b32 s12, s3, 8
1487; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
1488; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1489; GFX6-NEXT:    s_and_b32 s3, s3, s9
1490; GFX6-NEXT:    s_or_b32 s2, s2, s8
1491; GFX6-NEXT:    s_lshl_b32 s3, s3, 8
1492; GFX6-NEXT:    s_and_b32 s8, s12, s9
1493; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
1494; GFX6-NEXT:    s_or_b32 s3, s10, s3
1495; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1496; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x100000
1497; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1498; GFX6-NEXT:    s_or_b32 s3, s3, s8
1499; GFX6-NEXT:    s_lshr_b32 s8, s4, 16
1500; GFX6-NEXT:    s_lshr_b32 s10, s4, 24
1501; GFX6-NEXT:    s_and_b32 s13, s4, s9
1502; GFX6-NEXT:    s_bfe_u32 s4, s4, s11
1503; GFX6-NEXT:    s_lshl_b32 s4, s4, 8
1504; GFX6-NEXT:    s_and_b32 s8, s8, s9
1505; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1506; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1507; GFX6-NEXT:    s_or_b32 s4, s13, s4
1508; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1509; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1510; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
1511; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1512; GFX6-NEXT:    s_or_b32 s4, s4, s8
1513; GFX6-NEXT:    v_mul_hi_u32 v0, s4, v0
1514; GFX6-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1515; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v2
1516; GFX6-NEXT:    s_lshr_b32 s12, s5, 8
1517; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1518; GFX6-NEXT:    s_and_b32 s5, s5, s9
1519; GFX6-NEXT:    v_mul_lo_u32 v1, v1, v2
1520; GFX6-NEXT:    s_lshl_b32 s5, s5, 8
1521; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
1522; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v0
1523; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1524; GFX6-NEXT:    v_mul_hi_u32 v1, v2, v1
1525; GFX6-NEXT:    s_and_b32 s8, s12, s9
1526; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1527; GFX6-NEXT:    s_or_b32 s5, s10, s5
1528; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1529; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v0
1530; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
1531; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1532; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1533; GFX6-NEXT:    s_or_b32 s5, s5, s8
1534; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1535; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
1536; GFX6-NEXT:    v_mul_hi_u32 v1, s5, v1
1537; GFX6-NEXT:    s_and_b32 s6, s6, s9
1538; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
1539; GFX6-NEXT:    s_bfe_u32 s6, s6, 0x100000
1540; GFX6-NEXT:    v_mul_lo_u32 v1, v1, 24
1541; GFX6-NEXT:    s_mov_b32 s8, 0xffffff
1542; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v0
1543; GFX6-NEXT:    s_lshl_b32 s4, s6, 17
1544; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
1545; GFX6-NEXT:    s_or_b32 s0, s4, s0
1546; GFX6-NEXT:    v_and_b32_e32 v2, s8, v3
1547; GFX6-NEXT:    v_and_b32_e32 v0, s8, v0
1548; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
1549; GFX6-NEXT:    v_lshr_b32_e32 v0, s2, v0
1550; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s5, v1
1551; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1552; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 24, v1
1553; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1554; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1555; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 24, v1
1556; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1557; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
1558; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
1559; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1560; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffffff
1561; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 23, v1
1562; GFX6-NEXT:    s_lshl_b32 s0, s7, 17
1563; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
1564; GFX6-NEXT:    s_or_b32 s0, s0, s1
1565; GFX6-NEXT:    v_and_b32_e32 v2, v2, v4
1566; GFX6-NEXT:    v_and_b32_e32 v1, v1, v4
1567; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
1568; GFX6-NEXT:    v_lshr_b32_e32 v1, s3, v1
1569; GFX6-NEXT:    v_bfe_u32 v3, v0, 8, 8
1570; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
1571; GFX6-NEXT:    v_and_b32_e32 v2, s9, v0
1572; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1573; GFX6-NEXT:    v_bfe_u32 v0, v0, 16, 8
1574; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
1575; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1576; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1577; GFX6-NEXT:    v_and_b32_e32 v2, s9, v1
1578; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1579; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1580; GFX6-NEXT:    v_bfe_u32 v2, v1, 8, 8
1581; GFX6-NEXT:    v_bfe_u32 v1, v1, 16, 8
1582; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1583; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
1584; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1585; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
1586; GFX6-NEXT:    ; return to shader part epilog
1587;
1588; GFX8-LABEL: s_fshr_v2i24:
1589; GFX8:       ; %bb.0:
1590; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1591; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1592; GFX8-NEXT:    s_movk_i32 s10, 0xff
1593; GFX8-NEXT:    s_lshr_b32 s9, s1, 8
1594; GFX8-NEXT:    s_bfe_u32 s11, 8, 0x100000
1595; GFX8-NEXT:    s_and_b32 s1, s1, s10
1596; GFX8-NEXT:    s_lshr_b32 s6, s0, 8
1597; GFX8-NEXT:    s_lshr_b32 s8, s0, 24
1598; GFX8-NEXT:    s_lshl_b32 s1, s1, s11
1599; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1600; GFX8-NEXT:    s_and_b32 s6, s6, s10
1601; GFX8-NEXT:    s_or_b32 s1, s8, s1
1602; GFX8-NEXT:    s_lshr_b32 s8, s2, 8
1603; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1604; GFX8-NEXT:    s_lshr_b32 s7, s0, 16
1605; GFX8-NEXT:    s_and_b32 s0, s0, s10
1606; GFX8-NEXT:    s_lshl_b32 s6, s6, s11
1607; GFX8-NEXT:    s_and_b32 s8, s8, s10
1608; GFX8-NEXT:    s_or_b32 s0, s0, s6
1609; GFX8-NEXT:    s_and_b32 s6, s7, s10
1610; GFX8-NEXT:    s_and_b32 s7, s9, s10
1611; GFX8-NEXT:    s_lshr_b32 s9, s2, 16
1612; GFX8-NEXT:    s_lshr_b32 s12, s2, 24
1613; GFX8-NEXT:    s_and_b32 s2, s2, s10
1614; GFX8-NEXT:    s_lshl_b32 s8, s8, s11
1615; GFX8-NEXT:    s_or_b32 s2, s2, s8
1616; GFX8-NEXT:    s_and_b32 s8, s9, s10
1617; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1618; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1619; GFX8-NEXT:    v_mul_lo_u32 v2, v1, v0
1620; GFX8-NEXT:    s_lshr_b32 s13, s3, 8
1621; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
1622; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1623; GFX8-NEXT:    s_and_b32 s3, s3, s10
1624; GFX8-NEXT:    s_or_b32 s2, s2, s8
1625; GFX8-NEXT:    s_lshl_b32 s3, s3, s11
1626; GFX8-NEXT:    s_and_b32 s8, s13, s10
1627; GFX8-NEXT:    s_or_b32 s3, s12, s3
1628; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1629; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
1630; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1631; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
1632; GFX8-NEXT:    s_or_b32 s3, s3, s8
1633; GFX8-NEXT:    s_lshr_b32 s8, s4, 8
1634; GFX8-NEXT:    s_and_b32 s8, s8, s10
1635; GFX8-NEXT:    s_lshr_b32 s9, s4, 16
1636; GFX8-NEXT:    s_lshr_b32 s12, s4, 24
1637; GFX8-NEXT:    s_and_b32 s4, s4, s10
1638; GFX8-NEXT:    s_lshl_b32 s8, s8, s11
1639; GFX8-NEXT:    s_or_b32 s4, s4, s8
1640; GFX8-NEXT:    s_and_b32 s8, s9, s10
1641; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1642; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1643; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1644; GFX8-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1645; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
1646; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1647; GFX8-NEXT:    s_or_b32 s4, s4, s8
1648; GFX8-NEXT:    v_mul_hi_u32 v0, s4, v0
1649; GFX8-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1650; GFX8-NEXT:    v_cvt_u32_f32_e32 v2, v2
1651; GFX8-NEXT:    s_lshr_b32 s13, s5, 8
1652; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
1653; GFX8-NEXT:    s_and_b32 s5, s5, s10
1654; GFX8-NEXT:    v_mul_lo_u32 v1, v1, v2
1655; GFX8-NEXT:    s_lshl_b32 s5, s5, s11
1656; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s4, v0
1657; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v0
1658; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1659; GFX8-NEXT:    v_mul_hi_u32 v1, v2, v1
1660; GFX8-NEXT:    s_and_b32 s8, s13, s10
1661; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1662; GFX8-NEXT:    s_or_b32 s5, s12, s5
1663; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1664; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v0
1665; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
1666; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1667; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1668; GFX8-NEXT:    s_or_b32 s5, s5, s8
1669; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1670; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
1671; GFX8-NEXT:    v_mul_hi_u32 v1, s5, v1
1672; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
1673; GFX8-NEXT:    s_bfe_u32 s6, s6, 0x100000
1674; GFX8-NEXT:    s_mov_b32 s8, 0xffffff
1675; GFX8-NEXT:    v_mul_lo_u32 v1, v1, 24
1676; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v0
1677; GFX8-NEXT:    s_lshl_b32 s4, s6, 17
1678; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1679; GFX8-NEXT:    s_or_b32 s0, s4, s0
1680; GFX8-NEXT:    v_and_b32_e32 v2, s8, v3
1681; GFX8-NEXT:    v_and_b32_e32 v0, s8, v0
1682; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
1683; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
1684; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s5, v1
1685; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
1686; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
1687; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1688; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1689; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
1690; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1691; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
1692; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
1693; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1694; GFX8-NEXT:    v_mov_b32_e32 v4, 0xffffff
1695; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 23, v1
1696; GFX8-NEXT:    s_lshl_b32 s0, s7, 17
1697; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
1698; GFX8-NEXT:    s_or_b32 s0, s0, s1
1699; GFX8-NEXT:    v_and_b32_e32 v2, v2, v4
1700; GFX8-NEXT:    v_and_b32_e32 v1, v1, v4
1701; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
1702; GFX8-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
1703; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
1704; GFX8-NEXT:    v_mov_b32_e32 v2, 8
1705; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1706; GFX8-NEXT:    v_mov_b32_e32 v4, 16
1707; GFX8-NEXT:    v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1708; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1709; GFX8-NEXT:    v_or_b32_e32 v0, v3, v0
1710; GFX8-NEXT:    v_and_b32_e32 v3, s10, v1
1711; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1712; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1713; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
1714; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
1715; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1716; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
1717; GFX8-NEXT:    ; return to shader part epilog
1718;
1719; GFX9-LABEL: s_fshr_v2i24:
1720; GFX9:       ; %bb.0:
1721; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1722; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1723; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1724; GFX9-NEXT:    s_movk_i32 s12, 0xff
1725; GFX9-NEXT:    s_lshr_b32 s11, s1, 8
1726; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1727; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
1728; GFX9-NEXT:    s_bfe_u32 s13, 8, 0x100000
1729; GFX9-NEXT:    s_and_b32 s1, s1, s12
1730; GFX9-NEXT:    s_lshr_b32 s7, s0, 8
1731; GFX9-NEXT:    v_mul_lo_u32 v2, v1, v0
1732; GFX9-NEXT:    s_lshr_b32 s10, s0, 24
1733; GFX9-NEXT:    s_lshl_b32 s1, s1, s13
1734; GFX9-NEXT:    s_and_b32 s7, s7, s12
1735; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
1736; GFX9-NEXT:    s_or_b32 s1, s10, s1
1737; GFX9-NEXT:    s_lshr_b32 s10, s2, 8
1738; GFX9-NEXT:    s_lshr_b32 s9, s0, 16
1739; GFX9-NEXT:    s_and_b32 s0, s0, s12
1740; GFX9-NEXT:    s_lshl_b32 s7, s7, s13
1741; GFX9-NEXT:    s_and_b32 s10, s10, s12
1742; GFX9-NEXT:    s_or_b32 s0, s0, s7
1743; GFX9-NEXT:    s_and_b32 s7, s9, s12
1744; GFX9-NEXT:    s_and_b32 s9, s11, s12
1745; GFX9-NEXT:    s_lshr_b32 s11, s2, 16
1746; GFX9-NEXT:    s_lshr_b32 s14, s2, 24
1747; GFX9-NEXT:    s_and_b32 s2, s2, s12
1748; GFX9-NEXT:    s_lshl_b32 s10, s10, s13
1749; GFX9-NEXT:    s_or_b32 s2, s2, s10
1750; GFX9-NEXT:    s_and_b32 s10, s11, s12
1751; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
1752; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1753; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1754; GFX9-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1755; GFX9-NEXT:    s_lshr_b32 s15, s3, 8
1756; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
1757; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1758; GFX9-NEXT:    s_and_b32 s3, s3, s12
1759; GFX9-NEXT:    s_or_b32 s2, s2, s10
1760; GFX9-NEXT:    s_lshl_b32 s3, s3, s13
1761; GFX9-NEXT:    s_and_b32 s10, s15, s12
1762; GFX9-NEXT:    s_or_b32 s3, s14, s3
1763; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1764; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
1765; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1766; GFX9-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1767; GFX9-NEXT:    s_or_b32 s3, s3, s10
1768; GFX9-NEXT:    s_lshr_b32 s10, s4, 8
1769; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
1770; GFX9-NEXT:    s_and_b32 s10, s10, s12
1771; GFX9-NEXT:    s_lshr_b32 s11, s4, 16
1772; GFX9-NEXT:    s_lshr_b32 s14, s4, 24
1773; GFX9-NEXT:    s_and_b32 s4, s4, s12
1774; GFX9-NEXT:    s_lshl_b32 s10, s10, s13
1775; GFX9-NEXT:    s_or_b32 s4, s4, s10
1776; GFX9-NEXT:    s_and_b32 s10, s11, s12
1777; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1778; GFX9-NEXT:    v_mul_lo_u32 v1, v1, v2
1779; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
1780; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1781; GFX9-NEXT:    s_or_b32 s4, s4, s10
1782; GFX9-NEXT:    v_mul_hi_u32 v0, s4, v0
1783; GFX9-NEXT:    s_lshr_b32 s15, s5, 8
1784; GFX9-NEXT:    s_and_b32 s5, s5, s12
1785; GFX9-NEXT:    v_mul_hi_u32 v1, v2, v1
1786; GFX9-NEXT:    s_lshl_b32 s5, s5, s13
1787; GFX9-NEXT:    s_and_b32 s10, s15, s12
1788; GFX9-NEXT:    s_or_b32 s5, s14, s5
1789; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1790; GFX9-NEXT:    s_bfe_u32 s5, s5, 0x100000
1791; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1792; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
1793; GFX9-NEXT:    s_or_b32 s5, s5, s10
1794; GFX9-NEXT:    v_add_u32_e32 v1, v2, v1
1795; GFX9-NEXT:    v_mul_hi_u32 v1, s5, v1
1796; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
1797; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v0
1798; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1799; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1800; GFX9-NEXT:    v_mul_lo_u32 v1, v1, 24
1801; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v0
1802; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1803; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
1804; GFX9-NEXT:    s_bfe_u32 s7, s7, 0x100000
1805; GFX9-NEXT:    s_mov_b32 s10, 0xffffff
1806; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1807; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v0
1808; GFX9-NEXT:    s_lshl_b32 s4, s7, 17
1809; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1810; GFX9-NEXT:    v_and_b32_e32 v0, s10, v0
1811; GFX9-NEXT:    s_or_b32 s0, s4, s0
1812; GFX9-NEXT:    v_and_b32_e32 v3, s10, v3
1813; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
1814; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
1815; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v3, v0
1816; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v1
1817; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1818; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1819; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v1
1820; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1821; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
1822; GFX9-NEXT:    s_bfe_u32 s9, s9, 0x100000
1823; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffffff
1824; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1825; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v1
1826; GFX9-NEXT:    s_lshl_b32 s0, s9, 17
1827; GFX9-NEXT:    s_lshl_b32 s1, s1, 1
1828; GFX9-NEXT:    v_and_b32_e32 v1, v1, v2
1829; GFX9-NEXT:    s_or_b32 s0, s0, s1
1830; GFX9-NEXT:    v_and_b32_e32 v3, v3, v2
1831; GFX9-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
1832; GFX9-NEXT:    s_mov_b32 s6, 8
1833; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v3, v1
1834; GFX9-NEXT:    s_mov_b32 s8, 16
1835; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1836; GFX9-NEXT:    v_and_b32_e32 v3, s12, v1
1837; GFX9-NEXT:    v_and_or_b32 v2, v0, s12, v2
1838; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1839; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1840; GFX9-NEXT:    v_or3_b32 v0, v2, v0, v3
1841; GFX9-NEXT:    v_bfe_u32 v2, v1, 8, 8
1842; GFX9-NEXT:    v_bfe_u32 v1, v1, 16, 8
1843; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 8, v2
1844; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1845; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
1846; GFX9-NEXT:    ; return to shader part epilog
1847;
1848; GFX10-LABEL: s_fshr_v2i24:
1849; GFX10:       ; %bb.0:
1850; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1851; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v1, 24
1852; GFX10-NEXT:    s_movk_i32 s9, 0xff
1853; GFX10-NEXT:    s_lshr_b32 s12, s4, 8
1854; GFX10-NEXT:    s_bfe_u32 s10, 8, 0x100000
1855; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1856; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
1857; GFX10-NEXT:    s_lshr_b32 s13, s4, 16
1858; GFX10-NEXT:    s_and_b32 s12, s12, s9
1859; GFX10-NEXT:    s_lshr_b32 s14, s4, 24
1860; GFX10-NEXT:    s_and_b32 s4, s4, s9
1861; GFX10-NEXT:    s_lshl_b32 s12, s12, s10
1862; GFX10-NEXT:    s_and_b32 s13, s13, s9
1863; GFX10-NEXT:    s_or_b32 s4, s4, s12
1864; GFX10-NEXT:    s_bfe_u32 s12, s13, 0x100000
1865; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1866; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
1867; GFX10-NEXT:    s_lshr_b32 s15, s5, 8
1868; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
1869; GFX10-NEXT:    s_lshl_b32 s12, s12, 16
1870; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
1871; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
1872; GFX10-NEXT:    s_and_b32 s5, s5, s9
1873; GFX10-NEXT:    s_or_b32 s4, s4, s12
1874; GFX10-NEXT:    s_lshl_b32 s5, s5, s10
1875; GFX10-NEXT:    v_mul_lo_u32 v2, 0xffffffe8, v0
1876; GFX10-NEXT:    v_mul_lo_u32 v3, 0xffffffe8, v1
1877; GFX10-NEXT:    s_and_b32 s12, s15, s9
1878; GFX10-NEXT:    s_or_b32 s5, s14, s5
1879; GFX10-NEXT:    s_bfe_u32 s12, s12, 0x100000
1880; GFX10-NEXT:    s_bfe_u32 s5, s5, 0x100000
1881; GFX10-NEXT:    s_lshl_b32 s12, s12, 16
1882; GFX10-NEXT:    s_lshr_b32 s11, s1, 8
1883; GFX10-NEXT:    v_mul_hi_u32 v2, v0, v2
1884; GFX10-NEXT:    s_or_b32 s5, s5, s12
1885; GFX10-NEXT:    s_and_b32 s1, s1, s9
1886; GFX10-NEXT:    s_lshr_b32 s6, s0, 8
1887; GFX10-NEXT:    s_lshr_b32 s8, s0, 24
1888; GFX10-NEXT:    s_lshl_b32 s1, s1, s10
1889; GFX10-NEXT:    s_and_b32 s6, s6, s9
1890; GFX10-NEXT:    s_or_b32 s1, s8, s1
1891; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v2
1892; GFX10-NEXT:    v_mul_hi_u32 v2, v1, v3
1893; GFX10-NEXT:    s_lshr_b32 s8, s2, 8
1894; GFX10-NEXT:    s_lshr_b32 s7, s0, 16
1895; GFX10-NEXT:    s_and_b32 s0, s0, s9
1896; GFX10-NEXT:    v_mul_hi_u32 v0, s4, v0
1897; GFX10-NEXT:    s_lshl_b32 s6, s6, s10
1898; GFX10-NEXT:    s_and_b32 s8, s8, s9
1899; GFX10-NEXT:    s_or_b32 s0, s0, s6
1900; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v2
1901; GFX10-NEXT:    s_and_b32 s6, s7, s9
1902; GFX10-NEXT:    s_and_b32 s7, s11, s9
1903; GFX10-NEXT:    s_lshr_b32 s11, s2, 16
1904; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
1905; GFX10-NEXT:    v_mul_hi_u32 v1, s5, v1
1906; GFX10-NEXT:    s_lshr_b32 s13, s2, 24
1907; GFX10-NEXT:    s_and_b32 s2, s2, s9
1908; GFX10-NEXT:    s_lshl_b32 s8, s8, s10
1909; GFX10-NEXT:    s_lshr_b32 s12, s3, 8
1910; GFX10-NEXT:    s_or_b32 s2, s2, s8
1911; GFX10-NEXT:    s_and_b32 s8, s11, s9
1912; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s4, v0
1913; GFX10-NEXT:    v_mul_lo_u32 v1, v1, 24
1914; GFX10-NEXT:    s_bfe_u32 s4, s8, 0x100000
1915; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
1916; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
1917; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
1918; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1919; GFX10-NEXT:    s_and_b32 s3, s3, s9
1920; GFX10-NEXT:    s_or_b32 s2, s2, s4
1921; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s5, v1
1922; GFX10-NEXT:    s_mov_b32 s4, 0xffffff
1923; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1924; GFX10-NEXT:    s_lshl_b32 s3, s3, s10
1925; GFX10-NEXT:    s_and_b32 s5, s12, s9
1926; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
1927; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
1928; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v0
1929; GFX10-NEXT:    s_or_b32 s3, s13, s3
1930; GFX10-NEXT:    s_bfe_u32 s5, s5, 0x100000
1931; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
1932; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
1933; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1934; GFX10-NEXT:    s_lshl_b32 s5, s5, 16
1935; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x100000
1936; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
1937; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
1938; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1939; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
1940; GFX10-NEXT:    s_or_b32 s3, s3, s5
1941; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
1942; GFX10-NEXT:    s_bfe_u32 s7, s7, 0x100000
1943; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
1944; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
1945; GFX10-NEXT:    v_mov_b32_e32 v2, 0xffffff
1946; GFX10-NEXT:    v_and_b32_e32 v0, s4, v0
1947; GFX10-NEXT:    s_lshl_b32 s5, s6, 17
1948; GFX10-NEXT:    v_and_b32_e32 v3, s4, v3
1949; GFX10-NEXT:    v_sub_nc_u32_e32 v4, 23, v1
1950; GFX10-NEXT:    v_and_b32_e32 v1, v1, v2
1951; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
1952; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1953; GFX10-NEXT:    s_lshl_b32 s2, s7, 17
1954; GFX10-NEXT:    v_and_b32_e32 v2, v4, v2
1955; GFX10-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
1956; GFX10-NEXT:    s_or_b32 s0, s5, s0
1957; GFX10-NEXT:    s_lshl_b32 s1, s1, 1
1958; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v3, v0
1959; GFX10-NEXT:    s_or_b32 s0, s2, s1
1960; GFX10-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
1961; GFX10-NEXT:    s_mov_b32 s0, 8
1962; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1963; GFX10-NEXT:    s_mov_b32 s0, 16
1964; GFX10-NEXT:    v_and_b32_e32 v3, s9, v1
1965; GFX10-NEXT:    v_bfe_u32 v4, v1, 8, 8
1966; GFX10-NEXT:    v_bfe_u32 v1, v1, 16, 8
1967; GFX10-NEXT:    v_and_or_b32 v2, v0, s9, v2
1968; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1969; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1970; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 8, v4
1971; GFX10-NEXT:    v_or3_b32 v0, v2, v0, v3
1972; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
1973; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1974; GFX10-NEXT:    ; return to shader part epilog
1975  %lhs = bitcast i48 %lhs.arg to <2 x i24>
1976  %rhs = bitcast i48 %rhs.arg to <2 x i24>
1977  %amt = bitcast i48 %amt.arg to <2 x i24>
1978  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
1979  %cast.result = bitcast <2 x i24> %result to i48
1980  ret i48 %cast.result
1981}
1982
1983define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
1984; GFX6-LABEL: v_fshr_v2i24:
1985; GFX6:       ; %bb.0:
1986; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1987; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
1988; GFX6-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1989; GFX6-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
1990; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
1991; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
1992; GFX6-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
1993; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v6
1994; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1995; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
1996; GFX6-NEXT:    v_mul_lo_u32 v8, v7, v6
1997; GFX6-NEXT:    v_mul_hi_u32 v8, v6, v8
1998; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1999; GFX6-NEXT:    v_mul_hi_u32 v6, v4, v6
2000; GFX6-NEXT:    v_rcp_iflag_f32_e32 v8, v9
2001; GFX6-NEXT:    v_mov_b32_e32 v9, 0xffffff
2002; GFX6-NEXT:    v_and_b32_e32 v5, v5, v9
2003; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2004; GFX6-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
2005; GFX6-NEXT:    v_cvt_u32_f32_e32 v8, v8
2006; GFX6-NEXT:    v_and_b32_e32 v2, v2, v9
2007; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
2008; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 24, v4
2009; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2010; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2011; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 24, v4
2012; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2013; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2014; GFX6-NEXT:    v_mul_lo_u32 v6, v7, v8
2015; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 23, v4
2016; GFX6-NEXT:    v_and_b32_e32 v7, v7, v9
2017; GFX6-NEXT:    v_mul_hi_u32 v6, v8, v6
2018; GFX6-NEXT:    v_and_b32_e32 v4, v4, v9
2019; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
2020; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2021; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2022; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
2023; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
2024; GFX6-NEXT:    v_and_b32_e32 v3, v3, v9
2025; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2026; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
2027; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
2028; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2029; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2030; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
2031; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2032; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2033; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
2034; GFX6-NEXT:    v_and_b32_e32 v4, v4, v9
2035; GFX6-NEXT:    v_and_b32_e32 v2, v2, v9
2036; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2037; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2038; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
2039; GFX6-NEXT:    s_setpc_b64 s[30:31]
2040;
2041; GFX8-LABEL: v_fshr_v2i24:
2042; GFX8:       ; %bb.0:
2043; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2045; GFX8-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2046; GFX8-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
2047; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2048; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
2049; GFX8-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2050; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
2051; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2052; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2053; GFX8-NEXT:    v_mul_lo_u32 v8, v7, v6
2054; GFX8-NEXT:    v_mul_hi_u32 v8, v6, v8
2055; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
2056; GFX8-NEXT:    v_mul_hi_u32 v6, v4, v6
2057; GFX8-NEXT:    v_rcp_iflag_f32_e32 v8, v9
2058; GFX8-NEXT:    v_mov_b32_e32 v9, 0xffffff
2059; GFX8-NEXT:    v_and_b32_e32 v5, v5, v9
2060; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2061; GFX8-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
2062; GFX8-NEXT:    v_cvt_u32_f32_e32 v8, v8
2063; GFX8-NEXT:    v_and_b32_e32 v2, v2, v9
2064; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v6
2065; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 24, v4
2066; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2067; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2068; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 24, v4
2069; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2070; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2071; GFX8-NEXT:    v_mul_lo_u32 v6, v7, v8
2072; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, 23, v4
2073; GFX8-NEXT:    v_and_b32_e32 v7, v7, v9
2074; GFX8-NEXT:    v_mul_hi_u32 v6, v8, v6
2075; GFX8-NEXT:    v_and_b32_e32 v4, v4, v9
2076; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
2077; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2078; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v8, v6
2079; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
2080; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2081; GFX8-NEXT:    v_and_b32_e32 v3, v3, v9
2082; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2083; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
2084; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
2085; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2086; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2087; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
2088; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2089; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2090; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
2091; GFX8-NEXT:    v_and_b32_e32 v4, v4, v9
2092; GFX8-NEXT:    v_and_b32_e32 v2, v2, v9
2093; GFX8-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2094; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2095; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
2096; GFX8-NEXT:    s_setpc_b64 s[30:31]
2097;
2098; GFX9-LABEL: v_fshr_v2i24:
2099; GFX9:       ; %bb.0:
2100; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2101; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2102; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2103; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
2104; GFX9-NEXT:    v_rcp_iflag_f32_e32 v9, v9
2105; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
2106; GFX9-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2107; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v6
2108; GFX9-NEXT:    v_mul_f32_e32 v9, 0x4f7ffffe, v9
2109; GFX9-NEXT:    v_cvt_u32_f32_e32 v9, v9
2110; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2111; GFX9-NEXT:    v_mul_lo_u32 v8, v7, v6
2112; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2113; GFX9-NEXT:    v_mul_lo_u32 v7, v7, v9
2114; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2115; GFX9-NEXT:    v_mul_hi_u32 v8, v6, v8
2116; GFX9-NEXT:    v_mul_hi_u32 v7, v9, v7
2117; GFX9-NEXT:    v_add_u32_e32 v6, v6, v8
2118; GFX9-NEXT:    v_mul_hi_u32 v6, v4, v6
2119; GFX9-NEXT:    v_mov_b32_e32 v8, 0xffffff
2120; GFX9-NEXT:    v_and_b32_e32 v5, v5, v8
2121; GFX9-NEXT:    v_add_u32_e32 v7, v9, v7
2122; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
2123; GFX9-NEXT:    v_mul_hi_u32 v7, v5, v7
2124; GFX9-NEXT:    v_and_b32_e32 v2, v2, v8
2125; GFX9-NEXT:    v_and_b32_e32 v3, v3, v8
2126; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v6
2127; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
2128; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2129; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2130; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
2131; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2132; GFX9-NEXT:    v_mul_lo_u32 v7, v7, 24
2133; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2134; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
2135; GFX9-NEXT:    v_and_b32_e32 v4, v4, v8
2136; GFX9-NEXT:    v_and_b32_e32 v6, v6, v8
2137; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2138; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2139; GFX9-NEXT:    v_sub_u32_e32 v2, v5, v7
2140; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
2141; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2142; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2143; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
2144; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2145; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2146; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
2147; GFX9-NEXT:    v_and_b32_e32 v2, v2, v8
2148; GFX9-NEXT:    v_and_b32_e32 v4, v4, v8
2149; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2150; GFX9-NEXT:    v_lshl_or_b32 v1, v1, v4, v2
2151; GFX9-NEXT:    s_setpc_b64 s[30:31]
2152;
2153; GFX10-LABEL: v_fshr_v2i24:
2154; GFX10:       ; %bb.0:
2155; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2156; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2157; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2158; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v7, 24
2159; GFX10-NEXT:    v_mov_b32_e32 v10, 0xffffff
2160; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2161; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2162; GFX10-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2163; GFX10-NEXT:    v_rcp_iflag_f32_e32 v7, v7
2164; GFX10-NEXT:    v_and_b32_e32 v5, v5, v10
2165; GFX10-NEXT:    v_and_b32_e32 v2, v2, v10
2166; GFX10-NEXT:    v_and_b32_e32 v3, v3, v10
2167; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2168; GFX10-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2169; GFX10-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
2170; GFX10-NEXT:    v_cvt_u32_f32_e32 v6, v6
2171; GFX10-NEXT:    v_cvt_u32_f32_e32 v7, v7
2172; GFX10-NEXT:    v_mul_lo_u32 v8, 0xffffffe8, v6
2173; GFX10-NEXT:    v_mul_lo_u32 v9, 0xffffffe8, v7
2174; GFX10-NEXT:    v_mul_hi_u32 v8, v6, v8
2175; GFX10-NEXT:    v_mul_hi_u32 v9, v7, v9
2176; GFX10-NEXT:    v_add_nc_u32_e32 v6, v6, v8
2177; GFX10-NEXT:    v_add_nc_u32_e32 v7, v7, v9
2178; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v6
2179; GFX10-NEXT:    v_mul_hi_u32 v7, v5, v7
2180; GFX10-NEXT:    v_mul_lo_u32 v6, v6, 24
2181; GFX10-NEXT:    v_mul_lo_u32 v7, v7, 24
2182; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v6
2183; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v7
2184; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2185; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2186; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2187; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2188; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2189; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2190; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2191; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2192; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2193; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2194; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2195; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2196; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2197; GFX10-NEXT:    v_and_b32_e32 v4, v4, v10
2198; GFX10-NEXT:    v_and_b32_e32 v6, v6, v10
2199; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2200; GFX10-NEXT:    v_and_b32_e32 v5, v5, v10
2201; GFX10-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2202; GFX10-NEXT:    v_and_b32_e32 v4, v7, v10
2203; GFX10-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
2204; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2205; GFX10-NEXT:    v_lshl_or_b32 v1, v1, v4, v3
2206; GFX10-NEXT:    s_setpc_b64 s[30:31]
2207  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2208  ret <2 x i24> %result
2209}
2210
2211define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2212; GFX6-LABEL: s_fshr_i32:
2213; GFX6:       ; %bb.0:
2214; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2215; GFX6-NEXT:    v_mov_b32_e32 v1, s2
2216; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2217; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2218; GFX6-NEXT:    ; return to shader part epilog
2219;
2220; GFX8-LABEL: s_fshr_i32:
2221; GFX8:       ; %bb.0:
2222; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2223; GFX8-NEXT:    v_mov_b32_e32 v1, s2
2224; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2225; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2226; GFX8-NEXT:    ; return to shader part epilog
2227;
2228; GFX9-LABEL: s_fshr_i32:
2229; GFX9:       ; %bb.0:
2230; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2231; GFX9-NEXT:    v_mov_b32_e32 v1, s2
2232; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2233; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2234; GFX9-NEXT:    ; return to shader part epilog
2235;
2236; GFX10-LABEL: s_fshr_i32:
2237; GFX10:       ; %bb.0:
2238; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2239; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2240; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2241; GFX10-NEXT:    ; return to shader part epilog
2242  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2243  ret i32 %result
2244}
2245
2246define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2247; GFX6-LABEL: s_fshr_i32_5:
2248; GFX6:       ; %bb.0:
2249; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2250; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2251; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2252; GFX6-NEXT:    ; return to shader part epilog
2253;
2254; GFX8-LABEL: s_fshr_i32_5:
2255; GFX8:       ; %bb.0:
2256; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2257; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2258; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2259; GFX8-NEXT:    ; return to shader part epilog
2260;
2261; GFX9-LABEL: s_fshr_i32_5:
2262; GFX9:       ; %bb.0:
2263; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2264; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2265; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2266; GFX9-NEXT:    ; return to shader part epilog
2267;
2268; GFX10-LABEL: s_fshr_i32_5:
2269; GFX10:       ; %bb.0:
2270; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 5
2271; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2272; GFX10-NEXT:    ; return to shader part epilog
2273  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2274  ret i32 %result
2275}
2276
2277define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2278; GFX6-LABEL: s_fshr_i32_8:
2279; GFX6:       ; %bb.0:
2280; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2281; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2282; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2283; GFX6-NEXT:    ; return to shader part epilog
2284;
2285; GFX8-LABEL: s_fshr_i32_8:
2286; GFX8:       ; %bb.0:
2287; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2288; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2289; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2290; GFX8-NEXT:    ; return to shader part epilog
2291;
2292; GFX9-LABEL: s_fshr_i32_8:
2293; GFX9:       ; %bb.0:
2294; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2295; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2296; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2297; GFX9-NEXT:    ; return to shader part epilog
2298;
2299; GFX10-LABEL: s_fshr_i32_8:
2300; GFX10:       ; %bb.0:
2301; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 8
2302; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2303; GFX10-NEXT:    ; return to shader part epilog
2304  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2305  ret i32 %result
2306}
2307
2308define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2309; GFX6-LABEL: v_fshr_i32:
2310; GFX6:       ; %bb.0:
2311; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2312; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2313; GFX6-NEXT:    s_setpc_b64 s[30:31]
2314;
2315; GFX8-LABEL: v_fshr_i32:
2316; GFX8:       ; %bb.0:
2317; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2318; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2319; GFX8-NEXT:    s_setpc_b64 s[30:31]
2320;
2321; GFX9-LABEL: v_fshr_i32:
2322; GFX9:       ; %bb.0:
2323; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2324; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2325; GFX9-NEXT:    s_setpc_b64 s[30:31]
2326;
2327; GFX10-LABEL: v_fshr_i32:
2328; GFX10:       ; %bb.0:
2329; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2330; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2331; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2332; GFX10-NEXT:    s_setpc_b64 s[30:31]
2333  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2334  ret i32 %result
2335}
2336
2337define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) {
2338; GFX6-LABEL: v_fshr_i32_5:
2339; GFX6:       ; %bb.0:
2340; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2341; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2342; GFX6-NEXT:    s_setpc_b64 s[30:31]
2343;
2344; GFX8-LABEL: v_fshr_i32_5:
2345; GFX8:       ; %bb.0:
2346; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2347; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2348; GFX8-NEXT:    s_setpc_b64 s[30:31]
2349;
2350; GFX9-LABEL: v_fshr_i32_5:
2351; GFX9:       ; %bb.0:
2352; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2354; GFX9-NEXT:    s_setpc_b64 s[30:31]
2355;
2356; GFX10-LABEL: v_fshr_i32_5:
2357; GFX10:       ; %bb.0:
2358; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2360; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2361; GFX10-NEXT:    s_setpc_b64 s[30:31]
2362  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2363  ret i32 %result
2364}
2365
2366define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) {
2367; GFX6-LABEL: v_fshr_i32_8:
2368; GFX6:       ; %bb.0:
2369; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2371; GFX6-NEXT:    s_setpc_b64 s[30:31]
2372;
2373; GFX8-LABEL: v_fshr_i32_8:
2374; GFX8:       ; %bb.0:
2375; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2376; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2377; GFX8-NEXT:    s_setpc_b64 s[30:31]
2378;
2379; GFX9-LABEL: v_fshr_i32_8:
2380; GFX9:       ; %bb.0:
2381; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2383; GFX9-NEXT:    s_setpc_b64 s[30:31]
2384;
2385; GFX10-LABEL: v_fshr_i32_8:
2386; GFX10:       ; %bb.0:
2387; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2388; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2389; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2390; GFX10-NEXT:    s_setpc_b64 s[30:31]
2391  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2392  ret i32 %result
2393}
2394
2395define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2396; GFX6-LABEL: v_fshr_i32_ssv:
2397; GFX6:       ; %bb.0:
2398; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2399; GFX6-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2400; GFX6-NEXT:    ; return to shader part epilog
2401;
2402; GFX8-LABEL: v_fshr_i32_ssv:
2403; GFX8:       ; %bb.0:
2404; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2405; GFX8-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2406; GFX8-NEXT:    ; return to shader part epilog
2407;
2408; GFX9-LABEL: v_fshr_i32_ssv:
2409; GFX9:       ; %bb.0:
2410; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2411; GFX9-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2412; GFX9-NEXT:    ; return to shader part epilog
2413;
2414; GFX10-LABEL: v_fshr_i32_ssv:
2415; GFX10:       ; %bb.0:
2416; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2417; GFX10-NEXT:    ; return to shader part epilog
2418  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2419  %cast.result = bitcast i32 %result to float
2420  ret float %cast.result
2421}
2422
2423define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2424; GFX6-LABEL: v_fshr_i32_svs:
2425; GFX6:       ; %bb.0:
2426; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2427; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2428; GFX6-NEXT:    ; return to shader part epilog
2429;
2430; GFX8-LABEL: v_fshr_i32_svs:
2431; GFX8:       ; %bb.0:
2432; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2433; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2434; GFX8-NEXT:    ; return to shader part epilog
2435;
2436; GFX9-LABEL: v_fshr_i32_svs:
2437; GFX9:       ; %bb.0:
2438; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2439; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2440; GFX9-NEXT:    ; return to shader part epilog
2441;
2442; GFX10-LABEL: v_fshr_i32_svs:
2443; GFX10:       ; %bb.0:
2444; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2445; GFX10-NEXT:    ; return to shader part epilog
2446  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2447  %cast.result = bitcast i32 %result to float
2448  ret float %cast.result
2449}
2450
2451define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2452; GFX6-LABEL: v_fshr_i32_vss:
2453; GFX6:       ; %bb.0:
2454; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2455; GFX6-NEXT:    v_mov_b32_e32 v1, s2
2456; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2457; GFX6-NEXT:    ; return to shader part epilog
2458;
2459; GFX8-LABEL: v_fshr_i32_vss:
2460; GFX8:       ; %bb.0:
2461; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2462; GFX8-NEXT:    v_mov_b32_e32 v1, s2
2463; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2464; GFX8-NEXT:    ; return to shader part epilog
2465;
2466; GFX9-LABEL: v_fshr_i32_vss:
2467; GFX9:       ; %bb.0:
2468; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2469; GFX9-NEXT:    v_mov_b32_e32 v1, s2
2470; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2471; GFX9-NEXT:    ; return to shader part epilog
2472;
2473; GFX10-LABEL: v_fshr_i32_vss:
2474; GFX10:       ; %bb.0:
2475; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2476; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2477; GFX10-NEXT:    ; return to shader part epilog
2478  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2479  %cast.result = bitcast i32 %result to float
2480  ret float %cast.result
2481}
2482
2483define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
2484; GFX6-LABEL: v_fshr_v2i32:
2485; GFX6:       ; %bb.0:
2486; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2487; GFX6-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2488; GFX6-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2489; GFX6-NEXT:    s_setpc_b64 s[30:31]
2490;
2491; GFX8-LABEL: v_fshr_v2i32:
2492; GFX8:       ; %bb.0:
2493; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2494; GFX8-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2495; GFX8-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2496; GFX8-NEXT:    s_setpc_b64 s[30:31]
2497;
2498; GFX9-LABEL: v_fshr_v2i32:
2499; GFX9:       ; %bb.0:
2500; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2501; GFX9-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2502; GFX9-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2503; GFX9-NEXT:    s_setpc_b64 s[30:31]
2504;
2505; GFX10-LABEL: v_fshr_v2i32:
2506; GFX10:       ; %bb.0:
2507; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2508; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2509; GFX10-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2510; GFX10-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2511; GFX10-NEXT:    s_setpc_b64 s[30:31]
2512  %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
2513  ret <2 x i32> %result
2514}
2515
2516define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
2517; GFX6-LABEL: v_fshr_v3i32:
2518; GFX6:       ; %bb.0:
2519; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520; GFX6-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2521; GFX6-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2522; GFX6-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2523; GFX6-NEXT:    s_setpc_b64 s[30:31]
2524;
2525; GFX8-LABEL: v_fshr_v3i32:
2526; GFX8:       ; %bb.0:
2527; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2528; GFX8-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2529; GFX8-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2530; GFX8-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2531; GFX8-NEXT:    s_setpc_b64 s[30:31]
2532;
2533; GFX9-LABEL: v_fshr_v3i32:
2534; GFX9:       ; %bb.0:
2535; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2536; GFX9-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2537; GFX9-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2538; GFX9-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2539; GFX9-NEXT:    s_setpc_b64 s[30:31]
2540;
2541; GFX10-LABEL: v_fshr_v3i32:
2542; GFX10:       ; %bb.0:
2543; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2544; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2545; GFX10-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2546; GFX10-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2547; GFX10-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2548; GFX10-NEXT:    s_setpc_b64 s[30:31]
2549  %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
2550  ret <3 x i32> %result
2551}
2552
2553define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
2554; GFX6-LABEL: v_fshr_v4i32:
2555; GFX6:       ; %bb.0:
2556; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2557; GFX6-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2558; GFX6-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2559; GFX6-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2560; GFX6-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2561; GFX6-NEXT:    s_setpc_b64 s[30:31]
2562;
2563; GFX8-LABEL: v_fshr_v4i32:
2564; GFX8:       ; %bb.0:
2565; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2566; GFX8-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2567; GFX8-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2568; GFX8-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2569; GFX8-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2570; GFX8-NEXT:    s_setpc_b64 s[30:31]
2571;
2572; GFX9-LABEL: v_fshr_v4i32:
2573; GFX9:       ; %bb.0:
2574; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2575; GFX9-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2576; GFX9-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2577; GFX9-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2578; GFX9-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2579; GFX9-NEXT:    s_setpc_b64 s[30:31]
2580;
2581; GFX10-LABEL: v_fshr_v4i32:
2582; GFX10:       ; %bb.0:
2583; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2584; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2585; GFX10-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2586; GFX10-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2587; GFX10-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2588; GFX10-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2589; GFX10-NEXT:    s_setpc_b64 s[30:31]
2590  %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
2591  ret <4 x i32> %result
2592}
2593
2594define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
2595; GFX6-LABEL: s_fshr_i16:
2596; GFX6:       ; %bb.0:
2597; GFX6-NEXT:    s_and_b32 s3, s2, 15
2598; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
2599; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
2600; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
2601; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
2602; GFX6-NEXT:    s_bfe_u32 s2, s3, 0x100000
2603; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
2604; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
2605; GFX6-NEXT:    s_or_b32 s0, s0, s1
2606; GFX6-NEXT:    ; return to shader part epilog
2607;
2608; GFX8-LABEL: s_fshr_i16:
2609; GFX8:       ; %bb.0:
2610; GFX8-NEXT:    s_and_b32 s3, s2, 15
2611; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
2612; GFX8-NEXT:    s_bfe_u32 s4, 1, 0x100000
2613; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
2614; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
2615; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2616; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2617; GFX8-NEXT:    s_bfe_u32 s2, s3, 0x100000
2618; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
2619; GFX8-NEXT:    s_or_b32 s0, s0, s1
2620; GFX8-NEXT:    ; return to shader part epilog
2621;
2622; GFX9-LABEL: s_fshr_i16:
2623; GFX9:       ; %bb.0:
2624; GFX9-NEXT:    s_and_b32 s3, s2, 15
2625; GFX9-NEXT:    s_andn2_b32 s2, 15, s2
2626; GFX9-NEXT:    s_bfe_u32 s4, 1, 0x100000
2627; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
2628; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
2629; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2630; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2631; GFX9-NEXT:    s_bfe_u32 s2, s3, 0x100000
2632; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
2633; GFX9-NEXT:    s_or_b32 s0, s0, s1
2634; GFX9-NEXT:    ; return to shader part epilog
2635;
2636; GFX10-LABEL: s_fshr_i16:
2637; GFX10:       ; %bb.0:
2638; GFX10-NEXT:    s_and_b32 s3, s2, 15
2639; GFX10-NEXT:    s_bfe_u32 s4, 1, 0x100000
2640; GFX10-NEXT:    s_andn2_b32 s2, 15, s2
2641; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
2642; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
2643; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2644; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
2645; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2646; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
2647; GFX10-NEXT:    s_or_b32 s0, s0, s1
2648; GFX10-NEXT:    ; return to shader part epilog
2649  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2650  ret i16 %result
2651}
2652
2653define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
2654; GFX6-LABEL: s_fshr_i16_4:
2655; GFX6:       ; %bb.0:
2656; GFX6-NEXT:    s_lshl_b32 s0, s0, 12
2657; GFX6-NEXT:    s_bfe_u32 s1, s1, 0xc0004
2658; GFX6-NEXT:    s_or_b32 s0, s0, s1
2659; GFX6-NEXT:    ; return to shader part epilog
2660;
2661; GFX8-LABEL: s_fshr_i16_4:
2662; GFX8:       ; %bb.0:
2663; GFX8-NEXT:    s_bfe_u32 s2, 12, 0x100000
2664; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2665; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2666; GFX8-NEXT:    s_bfe_u32 s2, 4, 0x100000
2667; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
2668; GFX8-NEXT:    s_or_b32 s0, s0, s1
2669; GFX8-NEXT:    ; return to shader part epilog
2670;
2671; GFX9-LABEL: s_fshr_i16_4:
2672; GFX9:       ; %bb.0:
2673; GFX9-NEXT:    s_bfe_u32 s2, 12, 0x100000
2674; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2675; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2676; GFX9-NEXT:    s_bfe_u32 s2, 4, 0x100000
2677; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
2678; GFX9-NEXT:    s_or_b32 s0, s0, s1
2679; GFX9-NEXT:    ; return to shader part epilog
2680;
2681; GFX10-LABEL: s_fshr_i16_4:
2682; GFX10:       ; %bb.0:
2683; GFX10-NEXT:    s_bfe_u32 s2, 12, 0x100000
2684; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2685; GFX10-NEXT:    s_bfe_u32 s3, 4, 0x100000
2686; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2687; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
2688; GFX10-NEXT:    s_or_b32 s0, s0, s1
2689; GFX10-NEXT:    ; return to shader part epilog
2690  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
2691  ret i16 %result
2692}
2693
2694define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
2695; GFX6-LABEL: s_fshr_i16_5:
2696; GFX6:       ; %bb.0:
2697; GFX6-NEXT:    s_lshl_b32 s0, s0, 11
2698; GFX6-NEXT:    s_bfe_u32 s1, s1, 0xb0005
2699; GFX6-NEXT:    s_or_b32 s0, s0, s1
2700; GFX6-NEXT:    ; return to shader part epilog
2701;
2702; GFX8-LABEL: s_fshr_i16_5:
2703; GFX8:       ; %bb.0:
2704; GFX8-NEXT:    s_bfe_u32 s2, 11, 0x100000
2705; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2706; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2707; GFX8-NEXT:    s_bfe_u32 s2, 5, 0x100000
2708; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
2709; GFX8-NEXT:    s_or_b32 s0, s0, s1
2710; GFX8-NEXT:    ; return to shader part epilog
2711;
2712; GFX9-LABEL: s_fshr_i16_5:
2713; GFX9:       ; %bb.0:
2714; GFX9-NEXT:    s_bfe_u32 s2, 11, 0x100000
2715; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2716; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2717; GFX9-NEXT:    s_bfe_u32 s2, 5, 0x100000
2718; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
2719; GFX9-NEXT:    s_or_b32 s0, s0, s1
2720; GFX9-NEXT:    ; return to shader part epilog
2721;
2722; GFX10-LABEL: s_fshr_i16_5:
2723; GFX10:       ; %bb.0:
2724; GFX10-NEXT:    s_bfe_u32 s2, 11, 0x100000
2725; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2726; GFX10-NEXT:    s_bfe_u32 s3, 5, 0x100000
2727; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2728; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
2729; GFX10-NEXT:    s_or_b32 s0, s0, s1
2730; GFX10-NEXT:    ; return to shader part epilog
2731  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
2732  ret i16 %result
2733}
2734
2735define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
2736; GFX6-LABEL: v_fshr_i16:
2737; GFX6:       ; %bb.0:
2738; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2739; GFX6-NEXT:    v_and_b32_e32 v3, 15, v2
2740; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
2741; GFX6-NEXT:    v_and_b32_e32 v2, 15, v2
2742; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2743; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
2744; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
2745; GFX6-NEXT:    v_bfe_u32 v2, v3, 0, 16
2746; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2747; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
2748; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2749; GFX6-NEXT:    s_setpc_b64 s[30:31]
2750;
2751; GFX8-LABEL: v_fshr_i16:
2752; GFX8:       ; %bb.0:
2753; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2754; GFX8-NEXT:    v_and_b32_e32 v3, 15, v2
2755; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
2756; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
2757; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
2758; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
2759; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
2760; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2761; GFX8-NEXT:    s_setpc_b64 s[30:31]
2762;
2763; GFX9-LABEL: v_fshr_i16:
2764; GFX9:       ; %bb.0:
2765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2766; GFX9-NEXT:    v_and_b32_e32 v3, 15, v2
2767; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
2768; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
2769; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
2770; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
2771; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
2772; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2773; GFX9-NEXT:    s_setpc_b64 s[30:31]
2774;
2775; GFX10-LABEL: v_fshr_i16:
2776; GFX10:       ; %bb.0:
2777; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2778; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2779; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
2780; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
2781; GFX10-NEXT:    v_and_b32_e32 v2, 15, v2
2782; GFX10-NEXT:    v_and_b32_e32 v3, 15, v3
2783; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
2784; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
2785; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
2786; GFX10-NEXT:    s_setpc_b64 s[30:31]
2787  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2788  ret i16 %result
2789}
2790
2791define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) {
2792; GFX6-LABEL: v_fshr_i16_4:
2793; GFX6:       ; %bb.0:
2794; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2795; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 12, v0
2796; GFX6-NEXT:    v_bfe_u32 v1, v1, 4, 12
2797; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2798; GFX6-NEXT:    s_setpc_b64 s[30:31]
2799;
2800; GFX8-LABEL: v_fshr_i16_4:
2801; GFX8:       ; %bb.0:
2802; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2803; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
2804; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
2805; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2806; GFX8-NEXT:    s_setpc_b64 s[30:31]
2807;
2808; GFX9-LABEL: v_fshr_i16_4:
2809; GFX9:       ; %bb.0:
2810; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2811; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
2812; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
2813; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2814; GFX9-NEXT:    s_setpc_b64 s[30:31]
2815;
2816; GFX10-LABEL: v_fshr_i16_4:
2817; GFX10:       ; %bb.0:
2818; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2819; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2820; GFX10-NEXT:    v_lshlrev_b16 v0, 12, v0
2821; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
2822; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
2823; GFX10-NEXT:    s_setpc_b64 s[30:31]
2824  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
2825  ret i16 %result
2826}
2827
2828define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) {
2829; GFX6-LABEL: v_fshr_i16_5:
2830; GFX6:       ; %bb.0:
2831; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2832; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 11, v0
2833; GFX6-NEXT:    v_bfe_u32 v1, v1, 5, 11
2834; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2835; GFX6-NEXT:    s_setpc_b64 s[30:31]
2836;
2837; GFX8-LABEL: v_fshr_i16_5:
2838; GFX8:       ; %bb.0:
2839; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2840; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
2841; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
2842; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2843; GFX8-NEXT:    s_setpc_b64 s[30:31]
2844;
2845; GFX9-LABEL: v_fshr_i16_5:
2846; GFX9:       ; %bb.0:
2847; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2848; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
2849; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
2850; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2851; GFX9-NEXT:    s_setpc_b64 s[30:31]
2852;
2853; GFX10-LABEL: v_fshr_i16_5:
2854; GFX10:       ; %bb.0:
2855; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2857; GFX10-NEXT:    v_lshlrev_b16 v0, 11, v0
2858; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
2859; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
2860; GFX10-NEXT:    s_setpc_b64 s[30:31]
2861  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
2862  ret i16 %result
2863}
2864
2865define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
2866; GFX6-LABEL: v_fshr_i16_ssv:
2867; GFX6:       ; %bb.0:
2868; GFX6-NEXT:    v_and_b32_e32 v1, 15, v0
2869; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
2870; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
2871; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
2872; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
2873; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
2874; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
2875; GFX6-NEXT:    s_and_b32 s0, s1, 0xffff
2876; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
2877; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2878; GFX6-NEXT:    ; return to shader part epilog
2879;
2880; GFX8-LABEL: v_fshr_i16_ssv:
2881; GFX8:       ; %bb.0:
2882; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
2883; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
2884; GFX8-NEXT:    s_bfe_u32 s2, 1, 0x100000
2885; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
2886; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2887; GFX8-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
2888; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
2889; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2890; GFX8-NEXT:    ; return to shader part epilog
2891;
2892; GFX9-LABEL: v_fshr_i16_ssv:
2893; GFX9:       ; %bb.0:
2894; GFX9-NEXT:    v_and_b32_e32 v1, 15, v0
2895; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
2896; GFX9-NEXT:    s_bfe_u32 s2, 1, 0x100000
2897; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
2898; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2899; GFX9-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
2900; GFX9-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
2901; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2902; GFX9-NEXT:    ; return to shader part epilog
2903;
2904; GFX10-LABEL: v_fshr_i16_ssv:
2905; GFX10:       ; %bb.0:
2906; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
2907; GFX10-NEXT:    v_and_b32_e32 v0, 15, v0
2908; GFX10-NEXT:    s_bfe_u32 s2, 1, 0x100000
2909; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2910; GFX10-NEXT:    v_and_b32_e32 v1, 15, v1
2911; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
2912; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
2913; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
2914; GFX10-NEXT:    ; return to shader part epilog
2915  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2916  %cast.result = bitcast i16 %result to half
2917  ret half %cast.result
2918}
2919
2920define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
2921; GFX6-LABEL: v_fshr_i16_svs:
2922; GFX6:       ; %bb.0:
2923; GFX6-NEXT:    s_and_b32 s2, s1, 15
2924; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
2925; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
2926; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
2927; GFX6-NEXT:    s_lshl_b32 s0, s0, s1
2928; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
2929; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2930; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s1, v0
2931; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
2932; GFX6-NEXT:    ; return to shader part epilog
2933;
2934; GFX8-LABEL: v_fshr_i16_svs:
2935; GFX8:       ; %bb.0:
2936; GFX8-NEXT:    s_and_b32 s2, s1, 15
2937; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
2938; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
2939; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
2940; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2941; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
2942; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
2943; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
2944; GFX8-NEXT:    ; return to shader part epilog
2945;
2946; GFX9-LABEL: v_fshr_i16_svs:
2947; GFX9:       ; %bb.0:
2948; GFX9-NEXT:    s_and_b32 s2, s1, 15
2949; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
2950; GFX9-NEXT:    s_bfe_u32 s3, 1, 0x100000
2951; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
2952; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2953; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
2954; GFX9-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
2955; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
2956; GFX9-NEXT:    ; return to shader part epilog
2957;
2958; GFX10-LABEL: v_fshr_i16_svs:
2959; GFX10:       ; %bb.0:
2960; GFX10-NEXT:    s_and_b32 s2, s1, 15
2961; GFX10-NEXT:    s_bfe_u32 s3, 1, 0x100000
2962; GFX10-NEXT:    s_andn2_b32 s1, 15, s1
2963; GFX10-NEXT:    v_lshrrev_b16 v0, s2, v0
2964; GFX10-NEXT:    s_lshl_b32 s0, s0, s3
2965; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2966; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
2967; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
2968; GFX10-NEXT:    ; return to shader part epilog
2969  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2970  %cast.result = bitcast i16 %result to half
2971  ret half %cast.result
2972}
2973
2974define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
2975; GFX6-LABEL: v_fshr_i16_vss:
2976; GFX6:       ; %bb.0:
2977; GFX6-NEXT:    s_and_b32 s2, s1, 15
2978; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
2979; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2980; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
2981; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s1, v0
2982; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
2983; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
2984; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
2985; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
2986; GFX6-NEXT:    ; return to shader part epilog
2987;
2988; GFX8-LABEL: v_fshr_i16_vss:
2989; GFX8:       ; %bb.0:
2990; GFX8-NEXT:    s_and_b32 s2, s1, 15
2991; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
2992; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
2993; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
2994; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
2995; GFX8-NEXT:    s_bfe_u32 s1, s2, 0x100000
2996; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
2997; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
2998; GFX8-NEXT:    ; return to shader part epilog
2999;
3000; GFX9-LABEL: v_fshr_i16_vss:
3001; GFX9:       ; %bb.0:
3002; GFX9-NEXT:    s_and_b32 s2, s1, 15
3003; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3004; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3005; GFX9-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
3006; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
3007; GFX9-NEXT:    s_bfe_u32 s1, s2, 0x100000
3008; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
3009; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3010; GFX9-NEXT:    ; return to shader part epilog
3011;
3012; GFX10-LABEL: v_fshr_i16_vss:
3013; GFX10:       ; %bb.0:
3014; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
3015; GFX10-NEXT:    s_andn2_b32 s2, 15, s1
3016; GFX10-NEXT:    s_and_b32 s1, s1, 15
3017; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x100000
3018; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
3019; GFX10-NEXT:    v_lshlrev_b16 v0, s2, v0
3020; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
3021; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3022; GFX10-NEXT:    ; return to shader part epilog
3023  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3024  %cast.result = bitcast i16 %result to half
3025  ret half %cast.result
3026}
3027
3028define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3029; GFX6-LABEL: s_fshr_v2i16:
3030; GFX6:       ; %bb.0:
3031; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
3032; GFX6-NEXT:    s_and_b32 s4, s4, 0xffff
3033; GFX6-NEXT:    s_or_b32 s4, s5, s4
3034; GFX6-NEXT:    s_bfe_u32 s5, 1, 0x100000
3035; GFX6-NEXT:    s_mov_b32 s6, 0xf0001
3036; GFX6-NEXT:    s_lshl_b32 s0, s0, s5
3037; GFX6-NEXT:    s_bfe_u32 s7, s2, s6
3038; GFX6-NEXT:    s_bfe_u32 s8, 14, 0x100000
3039; GFX6-NEXT:    s_lshl_b32 s1, s1, s5
3040; GFX6-NEXT:    s_bfe_u32 s5, s3, s6
3041; GFX6-NEXT:    s_lshr_b32 s7, s7, s8
3042; GFX6-NEXT:    s_lshr_b32 s5, s5, s8
3043; GFX6-NEXT:    s_xor_b32 s4, s4, -1
3044; GFX6-NEXT:    s_or_b32 s0, s0, s7
3045; GFX6-NEXT:    s_or_b32 s1, s1, s5
3046; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
3047; GFX6-NEXT:    s_lshr_b32 s5, s4, 16
3048; GFX6-NEXT:    s_and_b32 s7, s4, 15
3049; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
3050; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
3051; GFX6-NEXT:    s_bfe_u32 s2, s2, s6
3052; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
3053; GFX6-NEXT:    s_lshl_b32 s0, s0, s7
3054; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
3055; GFX6-NEXT:    s_or_b32 s0, s0, s2
3056; GFX6-NEXT:    s_and_b32 s2, s5, 15
3057; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
3058; GFX6-NEXT:    s_andn2_b32 s4, 15, s5
3059; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3060; GFX6-NEXT:    s_lshl_b32 s1, s1, s2
3061; GFX6-NEXT:    s_bfe_u32 s2, s3, s6
3062; GFX6-NEXT:    s_bfe_u32 s3, s4, 0x100000
3063; GFX6-NEXT:    s_lshr_b32 s2, s2, s3
3064; GFX6-NEXT:    s_or_b32 s1, s1, s2
3065; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3066; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3067; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
3068; GFX6-NEXT:    s_or_b32 s0, s0, s1
3069; GFX6-NEXT:    ; return to shader part epilog
3070;
3071; GFX8-LABEL: s_fshr_v2i16:
3072; GFX8:       ; %bb.0:
3073; GFX8-NEXT:    s_bfe_u32 s5, 1, 0x100000
3074; GFX8-NEXT:    s_bfe_u32 s6, s1, 0x100000
3075; GFX8-NEXT:    s_bfe_u32 s7, 15, 0x100000
3076; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
3077; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
3078; GFX8-NEXT:    s_lshl_b32 s0, s0, s5
3079; GFX8-NEXT:    s_lshr_b32 s6, s6, s7
3080; GFX8-NEXT:    s_or_b32 s0, s0, s6
3081; GFX8-NEXT:    s_lshl_b32 s3, s3, s5
3082; GFX8-NEXT:    s_lshr_b32 s6, s4, s7
3083; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
3084; GFX8-NEXT:    s_xor_b32 s2, s2, -1
3085; GFX8-NEXT:    s_or_b32 s3, s3, s6
3086; GFX8-NEXT:    s_lshr_b32 s6, s2, 16
3087; GFX8-NEXT:    s_and_b32 s7, s2, 15
3088; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3089; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3090; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
3091; GFX8-NEXT:    s_lshr_b32 s1, s1, s5
3092; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3093; GFX8-NEXT:    s_lshl_b32 s0, s0, s7
3094; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3095; GFX8-NEXT:    s_or_b32 s0, s0, s1
3096; GFX8-NEXT:    s_and_b32 s1, s6, 15
3097; GFX8-NEXT:    s_lshl_b32 s4, s4, s5
3098; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3099; GFX8-NEXT:    s_andn2_b32 s2, 15, s6
3100; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
3101; GFX8-NEXT:    s_bfe_u32 s3, s4, 0x100000
3102; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
3103; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3104; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
3105; GFX8-NEXT:    s_or_b32 s1, s1, s2
3106; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3107; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3108; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
3109; GFX8-NEXT:    s_or_b32 s0, s0, s1
3110; GFX8-NEXT:    ; return to shader part epilog
3111;
3112; GFX9-LABEL: s_fshr_v2i16:
3113; GFX9:       ; %bb.0:
3114; GFX9-NEXT:    s_mov_b32 s3, 0xf000f
3115; GFX9-NEXT:    s_and_b32 s4, s2, s3
3116; GFX9-NEXT:    s_andn2_b32 s2, s3, s2
3117; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
3118; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3119; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
3120; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3121; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
3122; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
3123; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3124; GFX9-NEXT:    s_lshl_b32 s2, s3, s5
3125; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3126; GFX9-NEXT:    s_lshr_b32 s2, s1, 16
3127; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
3128; GFX9-NEXT:    s_lshr_b32 s3, s4, 16
3129; GFX9-NEXT:    s_lshr_b32 s1, s1, s4
3130; GFX9-NEXT:    s_lshr_b32 s2, s2, s3
3131; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
3132; GFX9-NEXT:    s_or_b32 s0, s0, s1
3133; GFX9-NEXT:    ; return to shader part epilog
3134;
3135; GFX10-LABEL: s_fshr_v2i16:
3136; GFX10:       ; %bb.0:
3137; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
3138; GFX10-NEXT:    s_mov_b32 s3, 0xf000f
3139; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3140; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
3141; GFX10-NEXT:    s_and_b32 s5, s2, s3
3142; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
3143; GFX10-NEXT:    s_andn2_b32 s2, s3, s2
3144; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3145; GFX10-NEXT:    s_lshr_b32 s4, s2, 16
3146; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3147; GFX10-NEXT:    s_lshl_b32 s2, s3, s4
3148; GFX10-NEXT:    s_lshr_b32 s3, s1, 16
3149; GFX10-NEXT:    s_and_b32 s1, s1, 0xffff
3150; GFX10-NEXT:    s_lshr_b32 s4, s5, 16
3151; GFX10-NEXT:    s_lshr_b32 s1, s1, s5
3152; GFX10-NEXT:    s_lshr_b32 s3, s3, s4
3153; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3154; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
3155; GFX10-NEXT:    s_or_b32 s0, s0, s1
3156; GFX10-NEXT:    ; return to shader part epilog
3157  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3158  %cast = bitcast <2 x i16> %result to i32
3159  ret i32 %cast
3160}
3161
3162define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3163; GFX6-LABEL: v_fshr_v2i16:
3164; GFX6:       ; %bb.0:
3165; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3166; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
3167; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
3168; GFX6-NEXT:    v_or_b32_e32 v4, v5, v4
3169; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
3170; GFX6-NEXT:    v_bfe_u32 v5, v2, 1, 15
3171; GFX6-NEXT:    s_bfe_u32 s5, 14, 0x100000
3172; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
3173; GFX6-NEXT:    v_lshrrev_b32_e32 v5, s5, v5
3174; GFX6-NEXT:    v_or_b32_e32 v0, v0, v5
3175; GFX6-NEXT:    v_bfe_u32 v5, v3, 1, 15
3176; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
3177; GFX6-NEXT:    v_lshrrev_b32_e32 v5, s5, v5
3178; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3179; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
3180; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
3181; GFX6-NEXT:    v_and_b32_e32 v6, 15, v4
3182; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3183; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
3184; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3185; GFX6-NEXT:    v_bfe_u32 v6, v6, 0, 16
3186; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
3187; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
3188; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v6, v0
3189; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
3190; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3191; GFX6-NEXT:    v_and_b32_e32 v2, 15, v5
3192; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v5
3193; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
3194; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3195; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3196; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
3197; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
3198; GFX6-NEXT:    v_bfe_u32 v3, v4, 0, 16
3199; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
3200; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3201; GFX6-NEXT:    s_setpc_b64 s[30:31]
3202;
3203; GFX8-LABEL: v_fshr_v2i16:
3204; GFX8:       ; %bb.0:
3205; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3206; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
3207; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 15, v1
3208; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
3209; GFX8-NEXT:    v_mov_b32_e32 v4, 1
3210; GFX8-NEXT:    v_mov_b32_e32 v5, 15
3211; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3212; GFX8-NEXT:    v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3213; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3214; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
3215; GFX8-NEXT:    v_lshlrev_b16_e32 v5, 1, v1
3216; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3217; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
3218; GFX8-NEXT:    v_and_b32_e32 v6, 15, v2
3219; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3220; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
3221; GFX8-NEXT:    v_lshrrev_b16_e32 v5, 1, v5
3222; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v6, v3
3223; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v2, v5
3224; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
3225; GFX8-NEXT:    v_and_b32_e32 v3, 15, v4
3226; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
3227; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
3228; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
3229; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
3230; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v4, v1
3231; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3232; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3233; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3234; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3235; GFX8-NEXT:    s_setpc_b64 s[30:31]
3236;
3237; GFX9-LABEL: v_fshr_v2i16:
3238; GFX9:       ; %bb.0:
3239; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3240; GFX9-NEXT:    s_mov_b32 s4, 0xf000f
3241; GFX9-NEXT:    v_and_b32_e32 v3, s4, v2
3242; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
3243; GFX9-NEXT:    v_and_b32_e32 v2, s4, v2
3244; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3245; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
3246; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
3247; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3248; GFX9-NEXT:    s_setpc_b64 s[30:31]
3249;
3250; GFX10-LABEL: v_fshr_v2i16:
3251; GFX10:       ; %bb.0:
3252; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3253; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3254; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
3255; GFX10-NEXT:    s_mov_b32 s4, 0xf000f
3256; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3257; GFX10-NEXT:    v_and_b32_e32 v2, s4, v2
3258; GFX10-NEXT:    v_and_b32_e32 v3, s4, v3
3259; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3260; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
3261; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3262; GFX10-NEXT:    s_setpc_b64 s[30:31]
3263  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3264  ret <2 x i16> %result
3265}
3266
3267define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
3268; GFX6-LABEL: v_fshr_v2i16_4_8:
3269; GFX6:       ; %bb.0:
3270; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3271; GFX6-NEXT:    s_bfe_u32 s4, 12, 0x100000
3272; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
3273; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
3274; GFX6-NEXT:    s_bfe_u32 s4, 3, 0x100000
3275; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
3276; GFX6-NEXT:    s_bfe_u32 s4, 8, 0x100000
3277; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3278; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
3279; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
3280; GFX6-NEXT:    s_bfe_u32 s4, 7, 0x100000
3281; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
3282; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3283; GFX6-NEXT:    s_setpc_b64 s[30:31]
3284;
3285; GFX8-LABEL: v_fshr_v2i16_4_8:
3286; GFX8:       ; %bb.0:
3287; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3288; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
3289; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
3290; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 4, v1
3291; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
3292; GFX8-NEXT:    v_mov_b32_e32 v3, 8
3293; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
3294; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3295; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
3296; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3297; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3298; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3299; GFX8-NEXT:    s_setpc_b64 s[30:31]
3300;
3301; GFX9-LABEL: v_fshr_v2i16_4_8:
3302; GFX9:       ; %bb.0:
3303; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3304; GFX9-NEXT:    v_mov_b32_e32 v2, 0x8000c
3305; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
3306; GFX9-NEXT:    v_mov_b32_e32 v2, 0x80004
3307; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3308; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3309; GFX9-NEXT:    s_setpc_b64 s[30:31]
3310;
3311; GFX10-LABEL: v_fshr_v2i16_4_8:
3312; GFX10:       ; %bb.0:
3313; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3314; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3315; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 0x8000c, v0
3316; GFX10-NEXT:    v_pk_lshrrev_b16 v1, 0x80004, v1
3317; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3318; GFX10-NEXT:    s_setpc_b64 s[30:31]
3319  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
3320  ret <2 x i16> %result
3321}
3322
3323define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
3324; GFX6-LABEL: v_fshr_v2i16_ssv:
3325; GFX6:       ; %bb.0:
3326; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3327; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3328; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
3329; GFX6-NEXT:    s_mov_b32 s5, 0xf0001
3330; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
3331; GFX6-NEXT:    s_bfe_u32 s6, s2, s5
3332; GFX6-NEXT:    s_bfe_u32 s7, 14, 0x100000
3333; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3334; GFX6-NEXT:    s_lshl_b32 s0, s0, s4
3335; GFX6-NEXT:    s_lshr_b32 s6, s6, s7
3336; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
3337; GFX6-NEXT:    v_and_b32_e32 v2, 15, v0
3338; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3339; GFX6-NEXT:    s_or_b32 s0, s0, s6
3340; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
3341; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
3342; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3343; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
3344; GFX6-NEXT:    s_bfe_u32 s0, s2, s5
3345; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3346; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
3347; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
3348; GFX6-NEXT:    s_bfe_u32 s4, s3, s5
3349; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
3350; GFX6-NEXT:    v_and_b32_e32 v2, 15, v1
3351; GFX6-NEXT:    v_xor_b32_e32 v1, -1, v1
3352; GFX6-NEXT:    s_lshr_b32 s4, s4, s7
3353; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
3354; GFX6-NEXT:    v_and_b32_e32 v1, 15, v1
3355; GFX6-NEXT:    s_or_b32 s1, s1, s4
3356; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3357; GFX6-NEXT:    s_bfe_u32 s0, s3, s5
3358; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3359; GFX6-NEXT:    v_lshl_b32_e32 v2, s1, v2
3360; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
3361; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
3362; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3363; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3364; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3365; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3366; GFX6-NEXT:    ; return to shader part epilog
3367;
3368; GFX8-LABEL: v_fshr_v2i16_ssv:
3369; GFX8:       ; %bb.0:
3370; GFX8-NEXT:    s_bfe_u32 s4, 1, 0x100000
3371; GFX8-NEXT:    s_bfe_u32 s5, s1, 0x100000
3372; GFX8-NEXT:    s_bfe_u32 s6, 15, 0x100000
3373; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3374; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
3375; GFX8-NEXT:    s_lshr_b32 s5, s5, s6
3376; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3377; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
3378; GFX8-NEXT:    s_or_b32 s0, s0, s5
3379; GFX8-NEXT:    s_lshl_b32 s1, s1, s4
3380; GFX8-NEXT:    v_and_b32_e32 v2, 15, v0
3381; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
3382; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3383; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s0
3384; GFX8-NEXT:    s_bfe_u32 s0, s1, 0x100000
3385; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
3386; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
3387; GFX8-NEXT:    s_lshr_b32 s5, s3, s6
3388; GFX8-NEXT:    s_lshl_b32 s3, s3, s4
3389; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
3390; GFX8-NEXT:    s_lshl_b32 s2, s2, s4
3391; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
3392; GFX8-NEXT:    v_and_b32_e32 v2, 15, v1
3393; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v1
3394; GFX8-NEXT:    s_bfe_u32 s0, s3, 0x100000
3395; GFX8-NEXT:    s_or_b32 s2, s2, s5
3396; GFX8-NEXT:    v_and_b32_e32 v1, 15, v1
3397; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
3398; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s2
3399; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s0
3400; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
3401; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3402; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3403; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3404; GFX8-NEXT:    ; return to shader part epilog
3405;
3406; GFX9-LABEL: v_fshr_v2i16_ssv:
3407; GFX9:       ; %bb.0:
3408; GFX9-NEXT:    s_mov_b32 s2, 0xf000f
3409; GFX9-NEXT:    v_and_b32_e32 v1, s2, v0
3410; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
3411; GFX9-NEXT:    v_and_b32_e32 v0, s2, v0
3412; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
3413; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3414; GFX9-NEXT:    s_lshl_b32 s2, s2, 1
3415; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3416; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
3417; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v1, s1
3418; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3419; GFX9-NEXT:    ; return to shader part epilog
3420;
3421; GFX10-LABEL: v_fshr_v2i16_ssv:
3422; GFX10:       ; %bb.0:
3423; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
3424; GFX10-NEXT:    s_mov_b32 s2, 0xf000f
3425; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3426; GFX10-NEXT:    v_and_b32_e32 v0, s2, v0
3427; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3428; GFX10-NEXT:    v_and_b32_e32 v1, s2, v1
3429; GFX10-NEXT:    s_lshl_b32 s2, s3, 1
3430; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3431; GFX10-NEXT:    v_pk_lshrrev_b16 v0, v0, s1
3432; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v1, s0
3433; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
3434; GFX10-NEXT:    ; return to shader part epilog
3435  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3436  %cast = bitcast <2 x i16> %result to float
3437  ret float %cast
3438}
3439
3440define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
3441; GFX6-LABEL: v_fshr_v2i16_svs:
3442; GFX6:       ; %bb.0:
3443; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
3444; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
3445; GFX6-NEXT:    s_or_b32 s2, s3, s2
3446; GFX6-NEXT:    s_bfe_u32 s3, 1, 0x100000
3447; GFX6-NEXT:    v_bfe_u32 v2, v0, 1, 15
3448; GFX6-NEXT:    s_bfe_u32 s4, 14, 0x100000
3449; GFX6-NEXT:    s_lshl_b32 s0, s0, s3
3450; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
3451; GFX6-NEXT:    v_bfe_u32 v3, v1, 1, 15
3452; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
3453; GFX6-NEXT:    s_lshl_b32 s0, s1, s3
3454; GFX6-NEXT:    v_lshrrev_b32_e32 v3, s4, v3
3455; GFX6-NEXT:    v_or_b32_e32 v3, s0, v3
3456; GFX6-NEXT:    s_xor_b32 s0, s2, -1
3457; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
3458; GFX6-NEXT:    s_lshr_b32 s1, s0, 16
3459; GFX6-NEXT:    s_and_b32 s2, s0, 15
3460; GFX6-NEXT:    s_andn2_b32 s0, 15, s0
3461; GFX6-NEXT:    v_bfe_u32 v0, v0, 1, 15
3462; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3463; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3464; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
3465; GFX6-NEXT:    s_and_b32 s0, s1, 15
3466; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
3467; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s2, v2
3468; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3469; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3470; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
3471; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s0, v3
3472; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 15
3473; GFX6-NEXT:    s_bfe_u32 s0, s1, 0x100000
3474; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s0, v1
3475; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
3476; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3477; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3478; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3479; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3480; GFX6-NEXT:    ; return to shader part epilog
3481;
3482; GFX8-LABEL: v_fshr_v2i16_svs:
3483; GFX8:       ; %bb.0:
3484; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
3485; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3486; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
3487; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 15, v0
3488; GFX8-NEXT:    v_mov_b32_e32 v2, 15
3489; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
3490; GFX8-NEXT:    s_lshl_b32 s0, s2, s3
3491; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3492; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
3493; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
3494; GFX8-NEXT:    v_mov_b32_e32 v4, 1
3495; GFX8-NEXT:    s_xor_b32 s0, s1, -1
3496; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3497; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
3498; GFX8-NEXT:    s_and_b32 s2, s0, 15
3499; GFX8-NEXT:    s_andn2_b32 s0, 15, s0
3500; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
3501; GFX8-NEXT:    v_lshrrev_b16_e32 v3, s0, v3
3502; GFX8-NEXT:    s_and_b32 s0, s1, 15
3503; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3504; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 1, v0
3505; GFX8-NEXT:    v_lshlrev_b16_e32 v2, s0, v2
3506; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
3507; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s2, v1
3508; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
3509; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3510; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
3511; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3512; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3513; GFX8-NEXT:    ; return to shader part epilog
3514;
3515; GFX9-LABEL: v_fshr_v2i16_svs:
3516; GFX9:       ; %bb.0:
3517; GFX9-NEXT:    s_mov_b32 s2, 0xf000f
3518; GFX9-NEXT:    s_and_b32 s3, s1, s2
3519; GFX9-NEXT:    s_andn2_b32 s1, s2, s1
3520; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
3521; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3522; GFX9-NEXT:    s_lshl_b32 s2, s2, 1
3523; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3524; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
3525; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
3526; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
3527; GFX9-NEXT:    s_lshl_b32 s1, s2, s4
3528; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3529; GFX9-NEXT:    v_pk_lshrrev_b16 v0, s3, v0
3530; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3531; GFX9-NEXT:    ; return to shader part epilog
3532;
3533; GFX10-LABEL: v_fshr_v2i16_svs:
3534; GFX10:       ; %bb.0:
3535; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3536; GFX10-NEXT:    s_mov_b32 s2, 0xf000f
3537; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3538; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
3539; GFX10-NEXT:    s_and_b32 s4, s1, s2
3540; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3541; GFX10-NEXT:    s_andn2_b32 s1, s2, s1
3542; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
3543; GFX10-NEXT:    s_lshr_b32 s3, s1, 16
3544; GFX10-NEXT:    v_pk_lshrrev_b16 v0, s4, v0
3545; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
3546; GFX10-NEXT:    s_lshl_b32 s1, s2, s3
3547; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3548; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3549; GFX10-NEXT:    ; return to shader part epilog
3550  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3551  %cast = bitcast <2 x i16> %result to float
3552  ret float %cast
3553}
3554
3555define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3556; GFX6-LABEL: v_fshr_v2i16_vss:
3557; GFX6:       ; %bb.0:
3558; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
3559; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
3560; GFX6-NEXT:    s_or_b32 s2, s3, s2
3561; GFX6-NEXT:    s_bfe_u32 s3, 1, 0x100000
3562; GFX6-NEXT:    s_mov_b32 s4, 0xf0001
3563; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s3, v0
3564; GFX6-NEXT:    s_bfe_u32 s5, s0, s4
3565; GFX6-NEXT:    s_bfe_u32 s6, 14, 0x100000
3566; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s3, v1
3567; GFX6-NEXT:    s_bfe_u32 s3, s1, s4
3568; GFX6-NEXT:    s_lshr_b32 s5, s5, s6
3569; GFX6-NEXT:    s_lshr_b32 s3, s3, s6
3570; GFX6-NEXT:    s_xor_b32 s2, s2, -1
3571; GFX6-NEXT:    v_or_b32_e32 v0, s5, v0
3572; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
3573; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3574; GFX6-NEXT:    s_lshr_b32 s3, s2, 16
3575; GFX6-NEXT:    s_and_b32 s5, s2, 15
3576; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
3577; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
3578; GFX6-NEXT:    s_bfe_u32 s0, s0, s4
3579; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3580; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s5, v0
3581; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
3582; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3583; GFX6-NEXT:    s_and_b32 s0, s3, 15
3584; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
3585; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
3586; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3587; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s0, v1
3588; GFX6-NEXT:    s_bfe_u32 s0, s1, s4
3589; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
3590; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
3591; GFX6-NEXT:    v_or_b32_e32 v1, s0, v1
3592; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3593; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3594; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3595; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3596; GFX6-NEXT:    ; return to shader part epilog
3597;
3598; GFX8-LABEL: v_fshr_v2i16_vss:
3599; GFX8:       ; %bb.0:
3600; GFX8-NEXT:    s_bfe_u32 s3, s0, 0x100000
3601; GFX8-NEXT:    s_bfe_u32 s4, 15, 0x100000
3602; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3603; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v0
3604; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
3605; GFX8-NEXT:    v_mov_b32_e32 v2, 1
3606; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
3607; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3608; GFX8-NEXT:    s_lshr_b32 s3, s2, s4
3609; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
3610; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
3611; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
3612; GFX8-NEXT:    s_xor_b32 s1, s1, -1
3613; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
3614; GFX8-NEXT:    s_and_b32 s5, s1, 15
3615; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3616; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3617; GFX8-NEXT:    s_lshr_b32 s0, s0, s3
3618; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3619; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s5, v1
3620; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3621; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
3622; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
3623; GFX8-NEXT:    s_and_b32 s0, s4, 15
3624; GFX8-NEXT:    s_andn2_b32 s1, 15, s4
3625; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s0, v0
3626; GFX8-NEXT:    s_bfe_u32 s0, s2, 0x100000
3627; GFX8-NEXT:    s_lshr_b32 s0, s0, s3
3628; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3629; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3630; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3631; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3632; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3633; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3634; GFX8-NEXT:    ; return to shader part epilog
3635;
3636; GFX9-LABEL: v_fshr_v2i16_vss:
3637; GFX9:       ; %bb.0:
3638; GFX9-NEXT:    s_mov_b32 s2, 0xf000f
3639; GFX9-NEXT:    s_and_b32 s3, s1, s2
3640; GFX9-NEXT:    s_andn2_b32 s1, s2, s1
3641; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3642; GFX9-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
3643; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
3644; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
3645; GFX9-NEXT:    s_lshr_b32 s2, s3, 16
3646; GFX9-NEXT:    s_lshr_b32 s0, s0, s3
3647; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
3648; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3649; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3650; GFX9-NEXT:    ; return to shader part epilog
3651;
3652; GFX10-LABEL: v_fshr_v2i16_vss:
3653; GFX10:       ; %bb.0:
3654; GFX10-NEXT:    s_mov_b32 s2, 0xf000f
3655; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3656; GFX10-NEXT:    s_and_b32 s3, s1, s2
3657; GFX10-NEXT:    s_andn2_b32 s1, s2, s1
3658; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
3659; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
3660; GFX10-NEXT:    s_lshr_b32 s4, s3, 16
3661; GFX10-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
3662; GFX10-NEXT:    s_lshr_b32 s0, s0, s3
3663; GFX10-NEXT:    s_lshr_b32 s1, s2, s4
3664; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3665; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3666; GFX10-NEXT:    ; return to shader part epilog
3667  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3668  %cast = bitcast <2 x i16> %result to float
3669  ret float %cast
3670}
3671
3672; ; FIXME
3673; define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
3674;   %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
3675;   %cast = bitcast <3 x i16> %result to i48
3676;   ret i48 %cast
3677; }
3678
3679; ; FIXME
3680; define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
3681;   %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
3682;   %cast.result = bitcast <3 x i16> %result to <3 x half>
3683;   ret <3 x half> %cast.result
3684; }
3685
3686define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
3687; GFX6-LABEL: s_fshr_v4i16:
3688; GFX6:       ; %bb.0:
3689; GFX6-NEXT:    s_mov_b32 s12, 0xffff
3690; GFX6-NEXT:    s_lshl_b32 s9, s9, 16
3691; GFX6-NEXT:    s_and_b32 s8, s8, s12
3692; GFX6-NEXT:    s_or_b32 s8, s9, s8
3693; GFX6-NEXT:    s_lshl_b32 s9, s11, 16
3694; GFX6-NEXT:    s_and_b32 s10, s10, s12
3695; GFX6-NEXT:    s_mov_b32 s11, 0xf0001
3696; GFX6-NEXT:    s_or_b32 s9, s9, s10
3697; GFX6-NEXT:    s_bfe_u32 s10, 1, 0x100000
3698; GFX6-NEXT:    s_bfe_u32 s12, s4, s11
3699; GFX6-NEXT:    s_bfe_u32 s13, 14, 0x100000
3700; GFX6-NEXT:    s_lshl_b32 s0, s0, s10
3701; GFX6-NEXT:    s_lshr_b32 s12, s12, s13
3702; GFX6-NEXT:    s_or_b32 s0, s0, s12
3703; GFX6-NEXT:    s_bfe_u32 s12, s5, s11
3704; GFX6-NEXT:    s_lshl_b32 s1, s1, s10
3705; GFX6-NEXT:    s_lshr_b32 s12, s12, s13
3706; GFX6-NEXT:    s_xor_b32 s8, s8, -1
3707; GFX6-NEXT:    s_or_b32 s1, s1, s12
3708; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
3709; GFX6-NEXT:    s_lshr_b32 s12, s8, 16
3710; GFX6-NEXT:    s_and_b32 s14, s8, 15
3711; GFX6-NEXT:    s_andn2_b32 s8, 15, s8
3712; GFX6-NEXT:    s_bfe_u32 s14, s14, 0x100000
3713; GFX6-NEXT:    s_bfe_u32 s4, s4, s11
3714; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
3715; GFX6-NEXT:    s_lshl_b32 s0, s0, s14
3716; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
3717; GFX6-NEXT:    s_or_b32 s0, s0, s4
3718; GFX6-NEXT:    s_and_b32 s4, s12, 15
3719; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
3720; GFX6-NEXT:    s_andn2_b32 s8, 15, s12
3721; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
3722; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
3723; GFX6-NEXT:    s_bfe_u32 s4, s5, s11
3724; GFX6-NEXT:    s_bfe_u32 s5, s8, 0x100000
3725; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
3726; GFX6-NEXT:    s_or_b32 s1, s1, s4
3727; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3728; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3729; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
3730; GFX6-NEXT:    s_or_b32 s0, s0, s1
3731; GFX6-NEXT:    s_lshl_b32 s1, s2, s10
3732; GFX6-NEXT:    s_bfe_u32 s2, s6, s11
3733; GFX6-NEXT:    s_lshr_b32 s2, s2, s13
3734; GFX6-NEXT:    s_or_b32 s1, s1, s2
3735; GFX6-NEXT:    s_lshl_b32 s2, s3, s10
3736; GFX6-NEXT:    s_bfe_u32 s3, s7, s11
3737; GFX6-NEXT:    s_lshr_b32 s3, s3, s13
3738; GFX6-NEXT:    s_xor_b32 s5, s9, -1
3739; GFX6-NEXT:    s_or_b32 s2, s2, s3
3740; GFX6-NEXT:    s_lshl_b32 s3, s6, 1
3741; GFX6-NEXT:    s_lshl_b32 s4, s7, 1
3742; GFX6-NEXT:    s_lshr_b32 s6, s5, 16
3743; GFX6-NEXT:    s_and_b32 s7, s5, 15
3744; GFX6-NEXT:    s_andn2_b32 s5, 15, s5
3745; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
3746; GFX6-NEXT:    s_bfe_u32 s3, s3, s11
3747; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
3748; GFX6-NEXT:    s_lshl_b32 s1, s1, s7
3749; GFX6-NEXT:    s_lshr_b32 s3, s3, s5
3750; GFX6-NEXT:    s_or_b32 s1, s1, s3
3751; GFX6-NEXT:    s_and_b32 s3, s6, 15
3752; GFX6-NEXT:    s_andn2_b32 s5, 15, s6
3753; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x100000
3754; GFX6-NEXT:    s_lshl_b32 s2, s2, s3
3755; GFX6-NEXT:    s_bfe_u32 s3, s4, s11
3756; GFX6-NEXT:    s_bfe_u32 s4, s5, 0x100000
3757; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
3758; GFX6-NEXT:    s_or_b32 s2, s2, s3
3759; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3760; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3761; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
3762; GFX6-NEXT:    s_or_b32 s1, s1, s2
3763; GFX6-NEXT:    ; return to shader part epilog
3764;
3765; GFX8-LABEL: s_fshr_v4i16:
3766; GFX8:       ; %bb.0:
3767; GFX8-NEXT:    s_bfe_u32 s8, 1, 0x100000
3768; GFX8-NEXT:    s_bfe_u32 s9, s2, 0x100000
3769; GFX8-NEXT:    s_bfe_u32 s10, 15, 0x100000
3770; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
3771; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
3772; GFX8-NEXT:    s_lshl_b32 s0, s0, s8
3773; GFX8-NEXT:    s_lshr_b32 s9, s9, s10
3774; GFX8-NEXT:    s_or_b32 s0, s0, s9
3775; GFX8-NEXT:    s_lshl_b32 s6, s6, s8
3776; GFX8-NEXT:    s_lshr_b32 s9, s7, s10
3777; GFX8-NEXT:    s_lshl_b32 s2, s2, s8
3778; GFX8-NEXT:    s_xor_b32 s4, s4, -1
3779; GFX8-NEXT:    s_or_b32 s6, s6, s9
3780; GFX8-NEXT:    s_lshr_b32 s9, s4, 16
3781; GFX8-NEXT:    s_and_b32 s11, s4, 15
3782; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
3783; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3784; GFX8-NEXT:    s_bfe_u32 s11, s11, 0x100000
3785; GFX8-NEXT:    s_lshr_b32 s2, s2, s8
3786; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
3787; GFX8-NEXT:    s_lshl_b32 s0, s0, s11
3788; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
3789; GFX8-NEXT:    s_or_b32 s0, s0, s2
3790; GFX8-NEXT:    s_and_b32 s2, s9, 15
3791; GFX8-NEXT:    s_lshl_b32 s7, s7, s8
3792; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3793; GFX8-NEXT:    s_andn2_b32 s4, 15, s9
3794; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
3795; GFX8-NEXT:    s_bfe_u32 s6, s7, 0x100000
3796; GFX8-NEXT:    s_lshr_b32 s6, s6, s8
3797; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
3798; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
3799; GFX8-NEXT:    s_or_b32 s2, s2, s4
3800; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3801; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3802; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
3803; GFX8-NEXT:    s_bfe_u32 s6, s3, 0x100000
3804; GFX8-NEXT:    s_or_b32 s0, s0, s2
3805; GFX8-NEXT:    s_lshr_b32 s2, s1, 16
3806; GFX8-NEXT:    s_lshr_b32 s4, s3, 16
3807; GFX8-NEXT:    s_lshl_b32 s1, s1, s8
3808; GFX8-NEXT:    s_lshr_b32 s6, s6, s10
3809; GFX8-NEXT:    s_or_b32 s1, s1, s6
3810; GFX8-NEXT:    s_lshl_b32 s2, s2, s8
3811; GFX8-NEXT:    s_lshr_b32 s6, s4, s10
3812; GFX8-NEXT:    s_lshl_b32 s3, s3, s8
3813; GFX8-NEXT:    s_xor_b32 s5, s5, -1
3814; GFX8-NEXT:    s_or_b32 s2, s2, s6
3815; GFX8-NEXT:    s_lshr_b32 s6, s5, 16
3816; GFX8-NEXT:    s_and_b32 s7, s5, 15
3817; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
3818; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
3819; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
3820; GFX8-NEXT:    s_lshr_b32 s3, s3, s8
3821; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
3822; GFX8-NEXT:    s_lshl_b32 s1, s1, s7
3823; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
3824; GFX8-NEXT:    s_or_b32 s1, s1, s3
3825; GFX8-NEXT:    s_and_b32 s3, s6, 15
3826; GFX8-NEXT:    s_lshl_b32 s4, s4, s8
3827; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
3828; GFX8-NEXT:    s_andn2_b32 s5, 15, s6
3829; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
3830; GFX8-NEXT:    s_bfe_u32 s3, s4, 0x100000
3831; GFX8-NEXT:    s_lshr_b32 s3, s3, s8
3832; GFX8-NEXT:    s_bfe_u32 s4, s5, 0x100000
3833; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
3834; GFX8-NEXT:    s_or_b32 s2, s2, s3
3835; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3836; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3837; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
3838; GFX8-NEXT:    s_or_b32 s1, s1, s2
3839; GFX8-NEXT:    ; return to shader part epilog
3840;
3841; GFX9-LABEL: s_fshr_v4i16:
3842; GFX9:       ; %bb.0:
3843; GFX9-NEXT:    s_mov_b32 s8, 0x10001
3844; GFX9-NEXT:    s_lshr_b32 s9, s0, 16
3845; GFX9-NEXT:    s_mov_b32 s6, 0xf000f
3846; GFX9-NEXT:    s_lshl_b32 s0, s0, s8
3847; GFX9-NEXT:    s_lshl_b32 s9, s9, 1
3848; GFX9-NEXT:    s_and_b32 s7, s4, s6
3849; GFX9-NEXT:    s_andn2_b32 s4, s6, s4
3850; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s9
3851; GFX9-NEXT:    s_lshr_b32 s9, s0, 16
3852; GFX9-NEXT:    s_lshr_b32 s10, s4, 16
3853; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
3854; GFX9-NEXT:    s_lshl_b32 s4, s9, s10
3855; GFX9-NEXT:    s_mov_b32 s9, 0xffff
3856; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
3857; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
3858; GFX9-NEXT:    s_and_b32 s2, s2, s9
3859; GFX9-NEXT:    s_lshr_b32 s10, s7, 16
3860; GFX9-NEXT:    s_lshr_b32 s2, s2, s7
3861; GFX9-NEXT:    s_lshr_b32 s4, s4, s10
3862; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
3863; GFX9-NEXT:    s_or_b32 s0, s0, s2
3864; GFX9-NEXT:    s_and_b32 s2, s5, s6
3865; GFX9-NEXT:    s_andn2_b32 s4, s6, s5
3866; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
3867; GFX9-NEXT:    s_lshl_b32 s1, s1, s8
3868; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
3869; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s5
3870; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
3871; GFX9-NEXT:    s_lshr_b32 s6, s4, 16
3872; GFX9-NEXT:    s_lshl_b32 s1, s1, s4
3873; GFX9-NEXT:    s_lshl_b32 s4, s5, s6
3874; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
3875; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
3876; GFX9-NEXT:    s_and_b32 s3, s3, s9
3877; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
3878; GFX9-NEXT:    s_lshr_b32 s2, s3, s2
3879; GFX9-NEXT:    s_lshr_b32 s3, s4, s5
3880; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
3881; GFX9-NEXT:    s_or_b32 s1, s1, s2
3882; GFX9-NEXT:    ; return to shader part epilog
3883;
3884; GFX10-LABEL: s_fshr_v4i16:
3885; GFX10:       ; %bb.0:
3886; GFX10-NEXT:    s_mov_b32 s7, 0x10001
3887; GFX10-NEXT:    s_lshr_b32 s8, s0, 16
3888; GFX10-NEXT:    s_mov_b32 s6, 0xf000f
3889; GFX10-NEXT:    s_lshl_b32 s0, s0, s7
3890; GFX10-NEXT:    s_lshl_b32 s8, s8, 1
3891; GFX10-NEXT:    s_and_b32 s9, s4, s6
3892; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s8
3893; GFX10-NEXT:    s_andn2_b32 s4, s6, s4
3894; GFX10-NEXT:    s_lshr_b32 s8, s0, 16
3895; GFX10-NEXT:    s_lshr_b32 s10, s4, 16
3896; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
3897; GFX10-NEXT:    s_lshl_b32 s4, s8, s10
3898; GFX10-NEXT:    s_mov_b32 s8, 0xffff
3899; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
3900; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
3901; GFX10-NEXT:    s_lshl_b32 s1, s1, s7
3902; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
3903; GFX10-NEXT:    s_and_b32 s7, s5, s6
3904; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
3905; GFX10-NEXT:    s_andn2_b32 s4, s6, s5
3906; GFX10-NEXT:    s_lshr_b32 s5, s1, 16
3907; GFX10-NEXT:    s_lshr_b32 s6, s4, 16
3908; GFX10-NEXT:    s_lshr_b32 s10, s2, 16
3909; GFX10-NEXT:    s_and_b32 s2, s2, s8
3910; GFX10-NEXT:    s_lshr_b32 s11, s9, 16
3911; GFX10-NEXT:    s_lshl_b32 s1, s1, s4
3912; GFX10-NEXT:    s_lshl_b32 s4, s5, s6
3913; GFX10-NEXT:    s_lshr_b32 s5, s3, 16
3914; GFX10-NEXT:    s_and_b32 s3, s3, s8
3915; GFX10-NEXT:    s_lshr_b32 s6, s7, 16
3916; GFX10-NEXT:    s_lshr_b32 s2, s2, s9
3917; GFX10-NEXT:    s_lshr_b32 s9, s10, s11
3918; GFX10-NEXT:    s_lshr_b32 s3, s3, s7
3919; GFX10-NEXT:    s_lshr_b32 s5, s5, s6
3920; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s9
3921; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
3922; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
3923; GFX10-NEXT:    s_or_b32 s0, s0, s2
3924; GFX10-NEXT:    s_or_b32 s1, s1, s3
3925; GFX10-NEXT:    ; return to shader part epilog
3926  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
3927  %cast.result = bitcast <4 x i16> %result to <2 x i32>
3928  ret <2 x i32> %cast.result
3929}
3930
3931define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
3932; GFX6-LABEL: v_fshr_v4i16:
3933; GFX6:       ; %bb.0:
3934; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3935; GFX6-NEXT:    v_mov_b32_e32 v12, 0xffff
3936; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
3937; GFX6-NEXT:    v_and_b32_e32 v8, v8, v12
3938; GFX6-NEXT:    v_or_b32_e32 v8, v9, v8
3939; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v11
3940; GFX6-NEXT:    v_and_b32_e32 v10, v10, v12
3941; GFX6-NEXT:    v_or_b32_e32 v9, v9, v10
3942; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
3943; GFX6-NEXT:    v_bfe_u32 v10, v4, 1, 15
3944; GFX6-NEXT:    s_bfe_u32 s5, 14, 0x100000
3945; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
3946; GFX6-NEXT:    v_lshrrev_b32_e32 v10, s5, v10
3947; GFX6-NEXT:    v_or_b32_e32 v0, v0, v10
3948; GFX6-NEXT:    v_bfe_u32 v10, v5, 1, 15
3949; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
3950; GFX6-NEXT:    v_lshrrev_b32_e32 v10, s5, v10
3951; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
3952; GFX6-NEXT:    v_or_b32_e32 v1, v1, v10
3953; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
3954; GFX6-NEXT:    v_and_b32_e32 v11, 15, v8
3955; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
3956; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
3957; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
3958; GFX6-NEXT:    v_bfe_u32 v11, v11, 0, 16
3959; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
3960; GFX6-NEXT:    v_bfe_u32 v8, v8, 0, 16
3961; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v11, v0
3962; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
3963; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
3964; GFX6-NEXT:    v_and_b32_e32 v4, 15, v10
3965; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
3966; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
3967; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
3968; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
3969; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
3970; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
3971; GFX6-NEXT:    v_bfe_u32 v5, v8, 0, 16
3972; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
3973; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
3974; GFX6-NEXT:    v_bfe_u32 v4, v6, 1, 15
3975; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s4, v2
3976; GFX6-NEXT:    v_lshrrev_b32_e32 v4, s5, v4
3977; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
3978; GFX6-NEXT:    v_bfe_u32 v4, v7, 1, 15
3979; GFX6-NEXT:    v_lshlrev_b32_e32 v3, s4, v3
3980; GFX6-NEXT:    v_lshrrev_b32_e32 v4, s5, v4
3981; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
3982; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v6
3983; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v9
3984; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v7
3985; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
3986; GFX6-NEXT:    v_and_b32_e32 v8, 15, v6
3987; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
3988; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
3989; GFX6-NEXT:    v_bfe_u32 v8, v8, 0, 16
3990; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
3991; GFX6-NEXT:    v_bfe_u32 v6, v6, 0, 16
3992; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v8, v2
3993; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v6, v4
3994; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
3995; GFX6-NEXT:    v_and_b32_e32 v4, 15, v7
3996; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v7
3997; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
3998; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
3999; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
4000; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
4001; GFX6-NEXT:    v_bfe_u32 v5, v6, 0, 16
4002; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
4003; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
4004; GFX6-NEXT:    s_setpc_b64 s[30:31]
4005;
4006; GFX8-LABEL: v_fshr_v4i16:
4007; GFX8:       ; %bb.0:
4008; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4009; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
4010; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 15, v2
4011; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
4012; GFX8-NEXT:    v_mov_b32_e32 v7, 1
4013; GFX8-NEXT:    v_mov_b32_e32 v8, 15
4014; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4015; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4016; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4017; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
4018; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v2
4019; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4020; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
4021; GFX8-NEXT:    v_and_b32_e32 v10, 15, v4
4022; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4023; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
4024; GFX8-NEXT:    v_lshrrev_b16_e32 v9, 1, v9
4025; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v10, v6
4026; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v9
4027; GFX8-NEXT:    v_or_b32_e32 v4, v6, v4
4028; GFX8-NEXT:    v_and_b32_e32 v6, 15, v7
4029; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v7
4030; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
4031; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
4032; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v6, v0
4033; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v7, v2
4034; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4035; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4036; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4037; GFX8-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4038; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 1, v1
4039; GFX8-NEXT:    v_lshrrev_b16_e32 v6, 15, v3
4040; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
4041; GFX8-NEXT:    v_mov_b32_e32 v6, 1
4042; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4043; GFX8-NEXT:    v_lshrrev_b16_sdwa v7, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4044; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
4045; GFX8-NEXT:    v_or_b32_e32 v1, v1, v7
4046; GFX8-NEXT:    v_lshlrev_b16_e32 v7, 1, v3
4047; GFX8-NEXT:    v_lshlrev_b16_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4048; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
4049; GFX8-NEXT:    v_and_b32_e32 v8, 15, v5
4050; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
4051; GFX8-NEXT:    v_and_b32_e32 v5, 15, v5
4052; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 1, v7
4053; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v8, v4
4054; GFX8-NEXT:    v_lshrrev_b16_e32 v5, v5, v7
4055; GFX8-NEXT:    v_or_b32_e32 v4, v4, v5
4056; GFX8-NEXT:    v_and_b32_e32 v5, 15, v6
4057; GFX8-NEXT:    v_xor_b32_e32 v6, -1, v6
4058; GFX8-NEXT:    v_and_b32_e32 v6, 15, v6
4059; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
4060; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v5, v1
4061; GFX8-NEXT:    v_lshrrev_b16_e32 v3, v6, v3
4062; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4063; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4064; GFX8-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4065; GFX8-NEXT:    s_setpc_b64 s[30:31]
4066;
4067; GFX9-LABEL: v_fshr_v4i16:
4068; GFX9:       ; %bb.0:
4069; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4070; GFX9-NEXT:    s_mov_b32 s4, 0xf000f
4071; GFX9-NEXT:    v_and_b32_e32 v6, s4, v4
4072; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
4073; GFX9-NEXT:    v_and_b32_e32 v4, s4, v4
4074; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4075; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
4076; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
4077; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v5
4078; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4079; GFX9-NEXT:    v_and_b32_e32 v2, s4, v5
4080; GFX9-NEXT:    v_and_b32_e32 v4, s4, v4
4081; GFX9-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4082; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v4, v1
4083; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v2, v3
4084; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
4085; GFX9-NEXT:    s_setpc_b64 s[30:31]
4086;
4087; GFX10-LABEL: v_fshr_v4i16:
4088; GFX10:       ; %bb.0:
4089; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4090; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4091; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
4092; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v5
4093; GFX10-NEXT:    s_mov_b32 s4, 0xf000f
4094; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4095; GFX10-NEXT:    v_and_b32_e32 v4, s4, v4
4096; GFX10-NEXT:    v_and_b32_e32 v6, s4, v6
4097; GFX10-NEXT:    v_and_b32_e32 v5, s4, v5
4098; GFX10-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4099; GFX10-NEXT:    v_and_b32_e32 v7, s4, v7
4100; GFX10-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
4101; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
4102; GFX10-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
4103; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
4104; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
4105; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
4106; GFX10-NEXT:    s_setpc_b64 s[30:31]
4107  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4108  %cast.result = bitcast <4 x i16> %result to <4 x half>
4109  ret <4 x half> %cast.result
4110}
4111
4112define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
4113; GFX6-LABEL: s_fshr_i64:
4114; GFX6:       ; %bb.0:
4115; GFX6-NEXT:    s_and_b64 s[6:7], s[4:5], 63
4116; GFX6-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
4117; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4118; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
4119; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
4120; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4121; GFX6-NEXT:    ; return to shader part epilog
4122;
4123; GFX8-LABEL: s_fshr_i64:
4124; GFX8:       ; %bb.0:
4125; GFX8-NEXT:    s_and_b64 s[6:7], s[4:5], 63
4126; GFX8-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
4127; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4128; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
4129; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
4130; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4131; GFX8-NEXT:    ; return to shader part epilog
4132;
4133; GFX9-LABEL: s_fshr_i64:
4134; GFX9:       ; %bb.0:
4135; GFX9-NEXT:    s_and_b64 s[6:7], s[4:5], 63
4136; GFX9-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
4137; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4138; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
4139; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
4140; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4141; GFX9-NEXT:    ; return to shader part epilog
4142;
4143; GFX10-LABEL: s_fshr_i64:
4144; GFX10:       ; %bb.0:
4145; GFX10-NEXT:    s_andn2_b64 s[6:7], 63, s[4:5]
4146; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4147; GFX10-NEXT:    s_and_b64 s[4:5], s[4:5], 63
4148; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s6
4149; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
4150; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4151; GFX10-NEXT:    ; return to shader part epilog
4152  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4153  ret i64 %result
4154}
4155
4156define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
4157; GCN-LABEL: s_fshr_i64_5:
4158; GCN:       ; %bb.0:
4159; GCN-NEXT:    s_lshl_b32 s1, s0, 27
4160; GCN-NEXT:    s_mov_b32 s0, 0
4161; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], 5
4162; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4163; GCN-NEXT:    ; return to shader part epilog
4164  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
4165  ret i64 %result
4166}
4167
4168define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
4169; GCN-LABEL: s_fshr_i64_32:
4170; GCN:       ; %bb.0:
4171; GCN-NEXT:    s_mov_b32 s1, s0
4172; GCN-NEXT:    s_mov_b32 s0, 0
4173; GCN-NEXT:    s_mov_b32 s2, s3
4174; GCN-NEXT:    s_mov_b32 s3, s0
4175; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4176; GCN-NEXT:    ; return to shader part epilog
4177  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
4178  ret i64 %result
4179}
4180
4181define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
4182; GCN-LABEL: s_fshr_i64_48:
4183; GCN:       ; %bb.0:
4184; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
4185; GCN-NEXT:    s_lshr_b32 s2, s3, 16
4186; GCN-NEXT:    s_mov_b32 s3, 0
4187; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4188; GCN-NEXT:    ; return to shader part epilog
4189  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
4190  ret i64 %result
4191}
4192
4193define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) {
4194; GFX6-LABEL: v_fshr_i64:
4195; GFX6:       ; %bb.0:
4196; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4197; GFX6-NEXT:    v_and_b32_e32 v5, 63, v4
4198; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
4199; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
4200; GFX6-NEXT:    v_and_b32_e32 v4, 63, v4
4201; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v4
4202; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v5
4203; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4204; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
4205; GFX6-NEXT:    s_setpc_b64 s[30:31]
4206;
4207; GFX8-LABEL: v_fshr_i64:
4208; GFX8:       ; %bb.0:
4209; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4210; GFX8-NEXT:    v_and_b32_e32 v5, 63, v4
4211; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4212; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4213; GFX8-NEXT:    v_and_b32_e32 v4, 63, v4
4214; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
4215; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
4216; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4217; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4218; GFX8-NEXT:    s_setpc_b64 s[30:31]
4219;
4220; GFX9-LABEL: v_fshr_i64:
4221; GFX9:       ; %bb.0:
4222; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4223; GFX9-NEXT:    v_and_b32_e32 v5, 63, v4
4224; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
4225; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4226; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
4227; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
4228; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
4229; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4230; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
4231; GFX9-NEXT:    s_setpc_b64 s[30:31]
4232;
4233; GFX10-LABEL: v_fshr_i64:
4234; GFX10:       ; %bb.0:
4235; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4236; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4237; GFX10-NEXT:    v_xor_b32_e32 v5, -1, v4
4238; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4239; GFX10-NEXT:    v_and_b32_e32 v4, 63, v4
4240; GFX10-NEXT:    v_and_b32_e32 v5, 63, v5
4241; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
4242; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
4243; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
4244; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
4245; GFX10-NEXT:    s_setpc_b64 s[30:31]
4246  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4247  ret i64 %result
4248}
4249
4250define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) {
4251; GFX6-LABEL: v_fshr_i64_5:
4252; GFX6:       ; %bb.0:
4253; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4254; GFX6-NEXT:    v_mov_b32_e32 v4, v0
4255; GFX6-NEXT:    v_lshr_b64 v[0:1], v[2:3], 5
4256; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4257; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
4258; GFX6-NEXT:    s_setpc_b64 s[30:31]
4259;
4260; GFX8-LABEL: v_fshr_i64_5:
4261; GFX8:       ; %bb.0:
4262; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4263; GFX8-NEXT:    v_mov_b32_e32 v4, v0
4264; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
4265; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4266; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
4267; GFX8-NEXT:    s_setpc_b64 s[30:31]
4268;
4269; GFX9-LABEL: v_fshr_i64_5:
4270; GFX9:       ; %bb.0:
4271; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4272; GFX9-NEXT:    v_mov_b32_e32 v4, v0
4273; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
4274; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4275; GFX9-NEXT:    v_or_b32_e32 v1, v2, v1
4276; GFX9-NEXT:    s_setpc_b64 s[30:31]
4277;
4278; GFX10-LABEL: v_fshr_i64_5:
4279; GFX10:       ; %bb.0:
4280; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4281; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4282; GFX10-NEXT:    v_mov_b32_e32 v4, v0
4283; GFX10-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
4284; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4285; GFX10-NEXT:    v_or_b32_e32 v1, v2, v1
4286; GFX10-NEXT:    s_setpc_b64 s[30:31]
4287  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
4288  ret i64 %result
4289}
4290
4291define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) {
4292; GFX6-LABEL: v_fshr_i64_32:
4293; GFX6:       ; %bb.0:
4294; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4295; GFX6-NEXT:    v_mov_b32_e32 v1, v0
4296; GFX6-NEXT:    v_mov_b32_e32 v0, v3
4297; GFX6-NEXT:    s_setpc_b64 s[30:31]
4298;
4299; GFX8-LABEL: v_fshr_i64_32:
4300; GFX8:       ; %bb.0:
4301; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4302; GFX8-NEXT:    v_mov_b32_e32 v1, v0
4303; GFX8-NEXT:    v_mov_b32_e32 v0, v3
4304; GFX8-NEXT:    s_setpc_b64 s[30:31]
4305;
4306; GFX9-LABEL: v_fshr_i64_32:
4307; GFX9:       ; %bb.0:
4308; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4309; GFX9-NEXT:    v_mov_b32_e32 v1, v0
4310; GFX9-NEXT:    v_mov_b32_e32 v0, v3
4311; GFX9-NEXT:    s_setpc_b64 s[30:31]
4312;
4313; GFX10-LABEL: v_fshr_i64_32:
4314; GFX10:       ; %bb.0:
4315; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4316; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4317; GFX10-NEXT:    v_mov_b32_e32 v1, v0
4318; GFX10-NEXT:    v_mov_b32_e32 v0, v3
4319; GFX10-NEXT:    s_setpc_b64 s[30:31]
4320  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
4321  ret i64 %result
4322}
4323
4324define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) {
4325; GFX6-LABEL: v_fshr_i64_48:
4326; GFX6:       ; %bb.0:
4327; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4328; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 16
4329; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
4330; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4331; GFX6-NEXT:    s_setpc_b64 s[30:31]
4332;
4333; GFX8-LABEL: v_fshr_i64_48:
4334; GFX8:       ; %bb.0:
4335; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4336; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
4337; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4338; GFX8-NEXT:    s_setpc_b64 s[30:31]
4339;
4340; GFX9-LABEL: v_fshr_i64_48:
4341; GFX9:       ; %bb.0:
4342; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4343; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
4344; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4345; GFX9-NEXT:    s_setpc_b64 s[30:31]
4346;
4347; GFX10-LABEL: v_fshr_i64_48:
4348; GFX10:       ; %bb.0:
4349; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4350; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4351; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
4352; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4353; GFX10-NEXT:    s_setpc_b64 s[30:31]
4354  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
4355  ret i64 %result
4356}
4357
4358define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
4359; GFX6-LABEL: v_fshr_i64_ssv:
4360; GFX6:       ; %bb.0:
4361; GFX6-NEXT:    v_and_b32_e32 v2, 63, v0
4362; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
4363; GFX6-NEXT:    v_and_b32_e32 v0, 63, v0
4364; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4365; GFX6-NEXT:    v_lshl_b64 v[0:1], s[0:1], v0
4366; GFX6-NEXT:    v_lshr_b64 v[2:3], s[2:3], v2
4367; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4368; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
4369; GFX6-NEXT:    ; return to shader part epilog
4370;
4371; GFX8-LABEL: v_fshr_i64_ssv:
4372; GFX8:       ; %bb.0:
4373; GFX8-NEXT:    v_and_b32_e32 v2, 63, v0
4374; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
4375; GFX8-NEXT:    v_and_b32_e32 v0, 63, v0
4376; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4377; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
4378; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
4379; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4380; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4381; GFX8-NEXT:    ; return to shader part epilog
4382;
4383; GFX9-LABEL: v_fshr_i64_ssv:
4384; GFX9:       ; %bb.0:
4385; GFX9-NEXT:    v_and_b32_e32 v2, 63, v0
4386; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
4387; GFX9-NEXT:    v_and_b32_e32 v0, 63, v0
4388; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4389; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
4390; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
4391; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4392; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
4393; GFX9-NEXT:    ; return to shader part epilog
4394;
4395; GFX10-LABEL: v_fshr_i64_ssv:
4396; GFX10:       ; %bb.0:
4397; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
4398; GFX10-NEXT:    v_and_b32_e32 v0, 63, v0
4399; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4400; GFX10-NEXT:    v_and_b32_e32 v2, 63, v1
4401; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v0, s[2:3]
4402; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v2, s[0:1]
4403; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
4404; GFX10-NEXT:    v_or_b32_e32 v1, v3, v1
4405; GFX10-NEXT:    ; return to shader part epilog
4406  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4407  %cast = bitcast i64 %result to <2 x float>
4408  ret <2 x float> %cast
4409}
4410
4411define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
4412; GFX6-LABEL: v_fshr_i64_svs:
4413; GFX6:       ; %bb.0:
4414; GFX6-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4415; GFX6-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4416; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4417; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], s4
4418; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4419; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4420; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
4421; GFX6-NEXT:    ; return to shader part epilog
4422;
4423; GFX8-LABEL: v_fshr_i64_svs:
4424; GFX8:       ; %bb.0:
4425; GFX8-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4426; GFX8-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4427; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4428; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4429; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4430; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4431; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
4432; GFX8-NEXT:    ; return to shader part epilog
4433;
4434; GFX9-LABEL: v_fshr_i64_svs:
4435; GFX9:       ; %bb.0:
4436; GFX9-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4437; GFX9-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4438; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4439; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4440; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4441; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4442; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
4443; GFX9-NEXT:    ; return to shader part epilog
4444;
4445; GFX10-LABEL: v_fshr_i64_svs:
4446; GFX10:       ; %bb.0:
4447; GFX10-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4448; GFX10-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4449; GFX10-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4450; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4451; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4452; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4453; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
4454; GFX10-NEXT:    ; return to shader part epilog
4455  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4456  %cast = bitcast i64 %result to <2 x float>
4457  ret <2 x float> %cast
4458}
4459
4460define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
4461; GFX6-LABEL: v_fshr_i64_vss:
4462; GFX6:       ; %bb.0:
4463; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
4464; GFX6-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4465; GFX6-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4466; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], s2
4467; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
4468; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4469; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
4470; GFX6-NEXT:    ; return to shader part epilog
4471;
4472; GFX8-LABEL: v_fshr_i64_vss:
4473; GFX8:       ; %bb.0:
4474; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4475; GFX8-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4476; GFX8-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4477; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s2, v[0:1]
4478; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
4479; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4480; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
4481; GFX8-NEXT:    ; return to shader part epilog
4482;
4483; GFX9-LABEL: v_fshr_i64_vss:
4484; GFX9:       ; %bb.0:
4485; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4486; GFX9-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4487; GFX9-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4488; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s2, v[0:1]
4489; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
4490; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4491; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
4492; GFX9-NEXT:    ; return to shader part epilog
4493;
4494; GFX10-LABEL: v_fshr_i64_vss:
4495; GFX10:       ; %bb.0:
4496; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4497; GFX10-NEXT:    s_andn2_b64 s[4:5], 63, s[2:3]
4498; GFX10-NEXT:    s_and_b64 s[2:3], s[2:3], 63
4499; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
4500; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s4, v[0:1]
4501; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4502; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
4503; GFX10-NEXT:    ; return to shader part epilog
4504  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4505  %cast = bitcast i64 %result to <2 x float>
4506  ret <2 x float> %cast
4507}
4508
4509define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
4510; GFX6-LABEL: s_fshr_v2i64:
4511; GFX6:       ; %bb.0:
4512; GFX6-NEXT:    s_and_b64 s[12:13], s[8:9], 63
4513; GFX6-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
4514; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4515; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
4516; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
4517; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4518; GFX6-NEXT:    s_and_b64 s[4:5], s[10:11], 63
4519; GFX6-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4520; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4521; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4522; GFX6-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
4523; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4524; GFX6-NEXT:    ; return to shader part epilog
4525;
4526; GFX8-LABEL: s_fshr_v2i64:
4527; GFX8:       ; %bb.0:
4528; GFX8-NEXT:    s_and_b64 s[12:13], s[8:9], 63
4529; GFX8-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
4530; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4531; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
4532; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
4533; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4534; GFX8-NEXT:    s_and_b64 s[4:5], s[10:11], 63
4535; GFX8-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4536; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4537; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4538; GFX8-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
4539; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4540; GFX8-NEXT:    ; return to shader part epilog
4541;
4542; GFX9-LABEL: s_fshr_v2i64:
4543; GFX9:       ; %bb.0:
4544; GFX9-NEXT:    s_and_b64 s[12:13], s[8:9], 63
4545; GFX9-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
4546; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4547; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
4548; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
4549; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4550; GFX9-NEXT:    s_and_b64 s[4:5], s[10:11], 63
4551; GFX9-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4552; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4553; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4554; GFX9-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
4555; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4556; GFX9-NEXT:    ; return to shader part epilog
4557;
4558; GFX10-LABEL: s_fshr_v2i64:
4559; GFX10:       ; %bb.0:
4560; GFX10-NEXT:    s_andn2_b64 s[12:13], 63, s[8:9]
4561; GFX10-NEXT:    s_and_b64 s[8:9], s[8:9], 63
4562; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4563; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
4564; GFX10-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4565; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4566; GFX10-NEXT:    s_and_b64 s[10:11], s[10:11], 63
4567; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
4568; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4569; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s10
4570; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4571; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
4572; GFX10-NEXT:    ; return to shader part epilog
4573  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
4574  ret <2 x i64> %result
4575}
4576
4577define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
4578; GFX6-LABEL: v_fshr_v2i64:
4579; GFX6:       ; %bb.0:
4580; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4581; GFX6-NEXT:    v_and_b32_e32 v9, 63, v8
4582; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4583; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
4584; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
4585; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v8
4586; GFX6-NEXT:    v_lshr_b64 v[4:5], v[4:5], v9
4587; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
4588; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
4589; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
4590; GFX6-NEXT:    v_and_b32_e32 v4, 63, v10
4591; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
4592; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], v8
4593; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], v4
4594; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
4595; GFX6-NEXT:    v_or_b32_e32 v2, v2, v6
4596; GFX6-NEXT:    v_or_b32_e32 v3, v3, v7
4597; GFX6-NEXT:    s_setpc_b64 s[30:31]
4598;
4599; GFX8-LABEL: v_fshr_v2i64:
4600; GFX8:       ; %bb.0:
4601; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4602; GFX8-NEXT:    v_and_b32_e32 v9, 63, v8
4603; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v8
4604; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4605; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
4606; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
4607; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
4608; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v10
4609; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4610; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
4611; GFX8-NEXT:    v_and_b32_e32 v4, 63, v10
4612; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
4613; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v8, v[2:3]
4614; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
4615; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
4616; GFX8-NEXT:    v_or_b32_e32 v2, v2, v6
4617; GFX8-NEXT:    v_or_b32_e32 v3, v3, v7
4618; GFX8-NEXT:    s_setpc_b64 s[30:31]
4619;
4620; GFX9-LABEL: v_fshr_v2i64:
4621; GFX9:       ; %bb.0:
4622; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4623; GFX9-NEXT:    v_and_b32_e32 v9, 63, v8
4624; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v8
4625; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4626; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
4627; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
4628; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
4629; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v10
4630; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4631; GFX9-NEXT:    v_or_b32_e32 v0, v0, v4
4632; GFX9-NEXT:    v_and_b32_e32 v4, 63, v10
4633; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
4634; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v8, v[2:3]
4635; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
4636; GFX9-NEXT:    v_or_b32_e32 v1, v1, v5
4637; GFX9-NEXT:    v_or_b32_e32 v2, v2, v6
4638; GFX9-NEXT:    v_or_b32_e32 v3, v3, v7
4639; GFX9-NEXT:    s_setpc_b64 s[30:31]
4640;
4641; GFX10-LABEL: v_fshr_v2i64:
4642; GFX10:       ; %bb.0:
4643; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4644; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4645; GFX10-NEXT:    v_xor_b32_e32 v9, -1, v8
4646; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v10
4647; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4648; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4649; GFX10-NEXT:    v_and_b32_e32 v8, 63, v8
4650; GFX10-NEXT:    v_and_b32_e32 v9, 63, v9
4651; GFX10-NEXT:    v_and_b32_e32 v11, 63, v11
4652; GFX10-NEXT:    v_and_b32_e32 v10, 63, v10
4653; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
4654; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
4655; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v11, v[2:3]
4656; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v10, v[6:7]
4657; GFX10-NEXT:    v_or_b32_e32 v0, v0, v4
4658; GFX10-NEXT:    v_or_b32_e32 v1, v1, v5
4659; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
4660; GFX10-NEXT:    v_or_b32_e32 v3, v3, v7
4661; GFX10-NEXT:    s_setpc_b64 s[30:31]
4662  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
4663  ret <2 x i64> %result
4664}
4665
4666define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
4667; GFX6-LABEL: s_fshr_i128:
4668; GFX6:       ; %bb.0:
4669; GFX6-NEXT:    s_movk_i32 s10, 0x7f
4670; GFX6-NEXT:    s_mov_b32 s11, 0
4671; GFX6-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4672; GFX6-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4673; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4674; GFX6-NEXT:    s_lshr_b32 s10, s1, 31
4675; GFX6-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
4676; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
4677; GFX6-NEXT:    s_sub_i32 s13, s8, 64
4678; GFX6-NEXT:    s_sub_i32 s9, 64, s8
4679; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
4680; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
4681; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
4682; GFX6-NEXT:    s_cselect_b32 s17, 1, 0
4683; GFX6-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
4684; GFX6-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
4685; GFX6-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4686; GFX6-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
4687; GFX6-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
4688; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
4689; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
4690; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
4691; GFX6-NEXT:    s_cmp_lg_u32 s17, 0
4692; GFX6-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
4693; GFX6-NEXT:    s_sub_i32 s14, s12, 64
4694; GFX6-NEXT:    s_sub_i32 s13, 64, s12
4695; GFX6-NEXT:    s_cmp_lt_u32 s12, 64
4696; GFX6-NEXT:    s_cselect_b32 s15, 1, 0
4697; GFX6-NEXT:    s_cmp_eq_u32 s12, 0
4698; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
4699; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
4700; GFX6-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
4701; GFX6-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
4702; GFX6-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
4703; GFX6-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4704; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
4705; GFX6-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
4706; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
4707; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
4708; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
4709; GFX6-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
4710; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
4711; GFX6-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
4712; GFX6-NEXT:    ; return to shader part epilog
4713;
4714; GFX8-LABEL: s_fshr_i128:
4715; GFX8:       ; %bb.0:
4716; GFX8-NEXT:    s_movk_i32 s10, 0x7f
4717; GFX8-NEXT:    s_mov_b32 s11, 0
4718; GFX8-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4719; GFX8-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4720; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4721; GFX8-NEXT:    s_lshr_b32 s10, s1, 31
4722; GFX8-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
4723; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
4724; GFX8-NEXT:    s_sub_i32 s13, s8, 64
4725; GFX8-NEXT:    s_sub_i32 s9, 64, s8
4726; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
4727; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
4728; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
4729; GFX8-NEXT:    s_cselect_b32 s17, 1, 0
4730; GFX8-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
4731; GFX8-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
4732; GFX8-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4733; GFX8-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
4734; GFX8-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
4735; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
4736; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
4737; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
4738; GFX8-NEXT:    s_cmp_lg_u32 s17, 0
4739; GFX8-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
4740; GFX8-NEXT:    s_sub_i32 s14, s12, 64
4741; GFX8-NEXT:    s_sub_i32 s13, 64, s12
4742; GFX8-NEXT:    s_cmp_lt_u32 s12, 64
4743; GFX8-NEXT:    s_cselect_b32 s15, 1, 0
4744; GFX8-NEXT:    s_cmp_eq_u32 s12, 0
4745; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
4746; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
4747; GFX8-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
4748; GFX8-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
4749; GFX8-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
4750; GFX8-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4751; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
4752; GFX8-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
4753; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
4754; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
4755; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
4756; GFX8-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
4757; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
4758; GFX8-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
4759; GFX8-NEXT:    ; return to shader part epilog
4760;
4761; GFX9-LABEL: s_fshr_i128:
4762; GFX9:       ; %bb.0:
4763; GFX9-NEXT:    s_movk_i32 s10, 0x7f
4764; GFX9-NEXT:    s_mov_b32 s11, 0
4765; GFX9-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4766; GFX9-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4767; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4768; GFX9-NEXT:    s_lshr_b32 s10, s1, 31
4769; GFX9-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
4770; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
4771; GFX9-NEXT:    s_sub_i32 s13, s8, 64
4772; GFX9-NEXT:    s_sub_i32 s9, 64, s8
4773; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
4774; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
4775; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
4776; GFX9-NEXT:    s_cselect_b32 s17, 1, 0
4777; GFX9-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
4778; GFX9-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
4779; GFX9-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4780; GFX9-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
4781; GFX9-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
4782; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
4783; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
4784; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
4785; GFX9-NEXT:    s_cmp_lg_u32 s17, 0
4786; GFX9-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
4787; GFX9-NEXT:    s_sub_i32 s14, s12, 64
4788; GFX9-NEXT:    s_sub_i32 s13, 64, s12
4789; GFX9-NEXT:    s_cmp_lt_u32 s12, 64
4790; GFX9-NEXT:    s_cselect_b32 s15, 1, 0
4791; GFX9-NEXT:    s_cmp_eq_u32 s12, 0
4792; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
4793; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
4794; GFX9-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
4795; GFX9-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
4796; GFX9-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
4797; GFX9-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4798; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
4799; GFX9-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
4800; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
4801; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
4802; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
4803; GFX9-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
4804; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
4805; GFX9-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
4806; GFX9-NEXT:    ; return to shader part epilog
4807;
4808; GFX10-LABEL: s_fshr_i128:
4809; GFX10:       ; %bb.0:
4810; GFX10-NEXT:    s_movk_i32 s10, 0x7f
4811; GFX10-NEXT:    s_mov_b32 s11, 0
4812; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4813; GFX10-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4814; GFX10-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4815; GFX10-NEXT:    s_lshr_b32 s10, s1, 31
4816; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4817; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[10:11]
4818; GFX10-NEXT:    s_sub_i32 s13, s8, 64
4819; GFX10-NEXT:    s_sub_i32 s9, 64, s8
4820; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
4821; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
4822; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
4823; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
4824; GFX10-NEXT:    s_lshr_b64 s[10:11], s[0:1], s9
4825; GFX10-NEXT:    s_lshl_b64 s[14:15], s[2:3], s8
4826; GFX10-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4827; GFX10-NEXT:    s_or_b64 s[10:11], s[10:11], s[14:15]
4828; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s13
4829; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
4830; GFX10-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
4831; GFX10-NEXT:    s_cselect_b64 s[0:1], s[10:11], s[0:1]
4832; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
4833; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
4834; GFX10-NEXT:    s_sub_i32 s14, s12, 64
4835; GFX10-NEXT:    s_sub_i32 s10, 64, s12
4836; GFX10-NEXT:    s_cmp_lt_u32 s12, 64
4837; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
4838; GFX10-NEXT:    s_cmp_eq_u32 s12, 0
4839; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
4840; GFX10-NEXT:    s_lshr_b64 s[0:1], s[4:5], s12
4841; GFX10-NEXT:    s_lshl_b64 s[10:11], s[6:7], s10
4842; GFX10-NEXT:    s_lshr_b64 s[12:13], s[6:7], s12
4843; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
4844; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4845; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
4846; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[6:7]
4847; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
4848; GFX10-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
4849; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
4850; GFX10-NEXT:    s_cselect_b64 s[4:5], s[12:13], 0
4851; GFX10-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
4852; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4853; GFX10-NEXT:    ; return to shader part epilog
4854  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
4855  ret i128 %result
4856}
4857
4858define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
4859; GFX6-LABEL: v_fshr_i128:
4860; GFX6:       ; %bb.0:
4861; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4862; GFX6-NEXT:    s_movk_i32 s4, 0x7f
4863; GFX6-NEXT:    v_and_b32_e32 v14, s4, v8
4864; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4865; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
4866; GFX6-NEXT:    v_and_b32_e32 v15, s4, v8
4867; GFX6-NEXT:    v_lshl_b64 v[8:9], v[0:1], 1
4868; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
4869; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
4870; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v15
4871; GFX6-NEXT:    v_lshr_b64 v[0:1], v[8:9], v0
4872; GFX6-NEXT:    v_lshl_b64 v[10:11], v[2:3], v15
4873; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v15
4874; GFX6-NEXT:    v_lshl_b64 v[12:13], v[8:9], v15
4875; GFX6-NEXT:    v_or_b32_e32 v10, v0, v10
4876; GFX6-NEXT:    v_or_b32_e32 v11, v1, v11
4877; GFX6-NEXT:    v_lshl_b64 v[0:1], v[8:9], v16
4878; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
4879; GFX6-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
4880; GFX6-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
4881; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
4882; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
4883; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
4884; GFX6-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
4885; GFX6-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
4886; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v14
4887; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], v14
4888; GFX6-NEXT:    v_lshl_b64 v[2:3], v[6:7], v2
4889; GFX6-NEXT:    v_subrev_i32_e32 v15, vcc, 64, v14
4890; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
4891; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
4892; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], v15
4893; GFX6-NEXT:    v_lshr_b64 v[8:9], v[6:7], v14
4894; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
4895; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
4896; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
4897; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
4898; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
4899; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
4900; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
4901; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
4902; GFX6-NEXT:    v_or_b32_e32 v0, v12, v0
4903; GFX6-NEXT:    v_or_b32_e32 v1, v13, v1
4904; GFX6-NEXT:    v_or_b32_e32 v2, v10, v2
4905; GFX6-NEXT:    v_or_b32_e32 v3, v11, v3
4906; GFX6-NEXT:    s_setpc_b64 s[30:31]
4907;
4908; GFX8-LABEL: v_fshr_i128:
4909; GFX8:       ; %bb.0:
4910; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4911; GFX8-NEXT:    s_movk_i32 s4, 0x7f
4912; GFX8-NEXT:    v_and_b32_e32 v14, s4, v8
4913; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v8
4914; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4915; GFX8-NEXT:    v_and_b32_e32 v15, s4, v8
4916; GFX8-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
4917; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
4918; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
4919; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v15
4920; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[8:9]
4921; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v15, v[2:3]
4922; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v15
4923; GFX8-NEXT:    v_lshlrev_b64 v[12:13], v15, v[8:9]
4924; GFX8-NEXT:    v_or_b32_e32 v10, v0, v10
4925; GFX8-NEXT:    v_or_b32_e32 v11, v1, v11
4926; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v16, v[8:9]
4927; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
4928; GFX8-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
4929; GFX8-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
4930; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
4931; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
4932; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
4933; GFX8-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
4934; GFX8-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
4935; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v14
4936; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
4937; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
4938; GFX8-NEXT:    v_subrev_u32_e32 v15, vcc, 64, v14
4939; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
4940; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
4941; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
4942; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
4943; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
4944; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
4945; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
4946; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
4947; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
4948; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
4949; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
4950; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
4951; GFX8-NEXT:    v_or_b32_e32 v0, v12, v0
4952; GFX8-NEXT:    v_or_b32_e32 v1, v13, v1
4953; GFX8-NEXT:    v_or_b32_e32 v2, v10, v2
4954; GFX8-NEXT:    v_or_b32_e32 v3, v11, v3
4955; GFX8-NEXT:    s_setpc_b64 s[30:31]
4956;
4957; GFX9-LABEL: v_fshr_i128:
4958; GFX9:       ; %bb.0:
4959; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4960; GFX9-NEXT:    s_movk_i32 s4, 0x7f
4961; GFX9-NEXT:    v_and_b32_e32 v14, s4, v8
4962; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v8
4963; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4964; GFX9-NEXT:    v_and_b32_e32 v15, s4, v8
4965; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
4966; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
4967; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
4968; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v15
4969; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[8:9]
4970; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[2:3]
4971; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v15
4972; GFX9-NEXT:    v_lshlrev_b64 v[12:13], v15, v[8:9]
4973; GFX9-NEXT:    v_or_b32_e32 v10, v0, v10
4974; GFX9-NEXT:    v_or_b32_e32 v11, v1, v11
4975; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v16, v[8:9]
4976; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
4977; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
4978; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
4979; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
4980; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
4981; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
4982; GFX9-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
4983; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v14
4984; GFX9-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
4985; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
4986; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
4987; GFX9-NEXT:    v_subrev_u32_e32 v15, 64, v14
4988; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
4989; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
4990; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
4991; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
4992; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
4993; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
4994; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
4995; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
4996; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
4997; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
4998; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
4999; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5000; GFX9-NEXT:    v_or_b32_e32 v0, v12, v0
5001; GFX9-NEXT:    v_or_b32_e32 v1, v13, v1
5002; GFX9-NEXT:    v_or_b32_e32 v2, v10, v2
5003; GFX9-NEXT:    v_or_b32_e32 v3, v11, v3
5004; GFX9-NEXT:    s_setpc_b64 s[30:31]
5005;
5006; GFX10-LABEL: v_fshr_i128:
5007; GFX10:       ; %bb.0:
5008; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5009; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5010; GFX10-NEXT:    v_xor_b32_e32 v9, -1, v8
5011; GFX10-NEXT:    s_movk_i32 s4, 0x7f
5012; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5013; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 31, v1
5014; GFX10-NEXT:    v_and_b32_e32 v19, s4, v8
5015; GFX10-NEXT:    v_and_b32_e32 v18, s4, v9
5016; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5017; GFX10-NEXT:    v_or_b32_e32 v2, v2, v10
5018; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
5019; GFX10-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
5020; GFX10-NEXT:    v_subrev_nc_u32_e32 v21, 64, v18
5021; GFX10-NEXT:    v_subrev_nc_u32_e32 v20, 64, v19
5022; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
5023; GFX10-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
5024; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
5025; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
5026; GFX10-NEXT:    v_lshlrev_b64 v[14:15], v18, v[0:1]
5027; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v21, v[0:1]
5028; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
5029; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v19
5030; GFX10-NEXT:    v_or_b32_e32 v10, v10, v8
5031; GFX10-NEXT:    v_or_b32_e32 v11, v11, v9
5032; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v20, v[6:7]
5033; GFX10-NEXT:    v_or_b32_e32 v12, v12, v16
5034; GFX10-NEXT:    v_or_b32_e32 v13, v13, v17
5035; GFX10-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
5036; GFX10-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
5037; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v19, v[6:7]
5038; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s4
5039; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v19
5040; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v18
5041; GFX10-NEXT:    v_cndmask_b32_e64 v6, v9, v13, s4
5042; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc_lo
5043; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
5044; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s5
5045; GFX10-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s6
5046; GFX10-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s6
5047; GFX10-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s5
5048; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s4
5049; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v1, s4
5050; GFX10-NEXT:    v_or_b32_e32 v0, v14, v4
5051; GFX10-NEXT:    v_or_b32_e32 v1, v7, v5
5052; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
5053; GFX10-NEXT:    v_or_b32_e32 v3, v3, v8
5054; GFX10-NEXT:    s_setpc_b64 s[30:31]
5055  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5056  ret i128 %result
5057}
5058
5059define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
5060; GFX6-LABEL: v_fshr_i128_ssv:
5061; GFX6:       ; %bb.0:
5062; GFX6-NEXT:    s_movk_i32 s8, 0x7f
5063; GFX6-NEXT:    v_and_b32_e32 v6, s8, v0
5064; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
5065; GFX6-NEXT:    s_mov_b32 s9, 0
5066; GFX6-NEXT:    v_and_b32_e32 v7, s8, v0
5067; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5068; GFX6-NEXT:    s_lshr_b32 s8, s1, 31
5069; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5070; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5071; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v7
5072; GFX6-NEXT:    v_lshr_b64 v[0:1], s[10:11], v0
5073; GFX6-NEXT:    v_lshl_b64 v[2:3], s[0:1], v7
5074; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 64, v7
5075; GFX6-NEXT:    v_lshl_b64 v[4:5], s[10:11], v7
5076; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
5077; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
5078; GFX6-NEXT:    v_lshl_b64 v[0:1], s[10:11], v8
5079; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
5080; GFX6-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
5081; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
5082; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5083; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5084; GFX6-NEXT:    v_mov_b32_e32 v2, s0
5085; GFX6-NEXT:    v_mov_b32_e32 v3, s1
5086; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
5087; GFX6-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
5088; GFX6-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
5089; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v6
5090; GFX6-NEXT:    v_lshr_b64 v[0:1], s[4:5], v6
5091; GFX6-NEXT:    v_lshl_b64 v[2:3], s[6:7], v2
5092; GFX6-NEXT:    v_subrev_i32_e32 v11, vcc, 64, v6
5093; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
5094; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
5095; GFX6-NEXT:    v_lshr_b64 v[0:1], s[6:7], v11
5096; GFX6-NEXT:    v_lshr_b64 v[4:5], s[6:7], v6
5097; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
5098; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5099; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5100; GFX6-NEXT:    v_mov_b32_e32 v2, s4
5101; GFX6-NEXT:    v_mov_b32_e32 v3, s5
5102; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
5103; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5104; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5105; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
5106; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
5107; GFX6-NEXT:    v_or_b32_e32 v0, v8, v0
5108; GFX6-NEXT:    v_or_b32_e32 v1, v9, v1
5109; GFX6-NEXT:    v_or_b32_e32 v2, v7, v2
5110; GFX6-NEXT:    v_or_b32_e32 v3, v10, v3
5111; GFX6-NEXT:    ; return to shader part epilog
5112;
5113; GFX8-LABEL: v_fshr_i128_ssv:
5114; GFX8:       ; %bb.0:
5115; GFX8-NEXT:    s_movk_i32 s8, 0x7f
5116; GFX8-NEXT:    v_and_b32_e32 v6, s8, v0
5117; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
5118; GFX8-NEXT:    s_mov_b32 s9, 0
5119; GFX8-NEXT:    v_and_b32_e32 v7, s8, v0
5120; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5121; GFX8-NEXT:    s_lshr_b32 s8, s1, 31
5122; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5123; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5124; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v7
5125; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, s[10:11]
5126; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v7, s[0:1]
5127; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 64, v7
5128; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v7, s[10:11]
5129; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
5130; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
5131; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v8, s[10:11]
5132; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
5133; GFX8-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
5134; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
5135; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5136; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5137; GFX8-NEXT:    v_mov_b32_e32 v2, s0
5138; GFX8-NEXT:    v_mov_b32_e32 v3, s1
5139; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
5140; GFX8-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
5141; GFX8-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
5142; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v6
5143; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v6, s[4:5]
5144; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
5145; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, 64, v6
5146; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
5147; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
5148; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
5149; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v6, s[6:7]
5150; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
5151; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5152; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5153; GFX8-NEXT:    v_mov_b32_e32 v2, s4
5154; GFX8-NEXT:    v_mov_b32_e32 v3, s5
5155; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
5156; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5157; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5158; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
5159; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
5160; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
5161; GFX8-NEXT:    v_or_b32_e32 v1, v9, v1
5162; GFX8-NEXT:    v_or_b32_e32 v2, v7, v2
5163; GFX8-NEXT:    v_or_b32_e32 v3, v10, v3
5164; GFX8-NEXT:    ; return to shader part epilog
5165;
5166; GFX9-LABEL: v_fshr_i128_ssv:
5167; GFX9:       ; %bb.0:
5168; GFX9-NEXT:    s_movk_i32 s8, 0x7f
5169; GFX9-NEXT:    v_and_b32_e32 v6, s8, v0
5170; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
5171; GFX9-NEXT:    s_mov_b32 s9, 0
5172; GFX9-NEXT:    v_and_b32_e32 v7, s8, v0
5173; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5174; GFX9-NEXT:    s_lshr_b32 s8, s1, 31
5175; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5176; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5177; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v7
5178; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, s[10:11]
5179; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v7, s[0:1]
5180; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v7
5181; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v7, s[10:11]
5182; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
5183; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
5184; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v8, s[10:11]
5185; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
5186; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
5187; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
5188; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5189; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5190; GFX9-NEXT:    v_mov_b32_e32 v2, s0
5191; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
5192; GFX9-NEXT:    v_mov_b32_e32 v3, s1
5193; GFX9-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
5194; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v6
5195; GFX9-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
5196; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v6, s[4:5]
5197; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
5198; GFX9-NEXT:    v_subrev_u32_e32 v11, 64, v6
5199; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
5200; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
5201; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
5202; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v6, s[6:7]
5203; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
5204; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5205; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5206; GFX9-NEXT:    v_mov_b32_e32 v2, s4
5207; GFX9-NEXT:    v_mov_b32_e32 v3, s5
5208; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
5209; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5210; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5211; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
5212; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
5213; GFX9-NEXT:    v_or_b32_e32 v0, v8, v0
5214; GFX9-NEXT:    v_or_b32_e32 v1, v9, v1
5215; GFX9-NEXT:    v_or_b32_e32 v2, v7, v2
5216; GFX9-NEXT:    v_or_b32_e32 v3, v10, v3
5217; GFX9-NEXT:    ; return to shader part epilog
5218;
5219; GFX10-LABEL: v_fshr_i128_ssv:
5220; GFX10:       ; %bb.0:
5221; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
5222; GFX10-NEXT:    s_movk_i32 s10, 0x7f
5223; GFX10-NEXT:    s_mov_b32 s9, 0
5224; GFX10-NEXT:    v_and_b32_e32 v13, s10, v0
5225; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5226; GFX10-NEXT:    v_and_b32_e32 v12, s10, v1
5227; GFX10-NEXT:    s_lshr_b32 s8, s1, 31
5228; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5229; GFX10-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
5230; GFX10-NEXT:    s_or_b64 s[8:9], s[2:3], s[8:9]
5231; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
5232; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
5233; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
5234; GFX10-NEXT:    v_subrev_nc_u32_e32 v14, 64, v13
5235; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
5236; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
5237; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
5238; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
5239; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
5240; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v12, s[0:1]
5241; GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
5242; GFX10-NEXT:    v_or_b32_e32 v2, v2, v0
5243; GFX10-NEXT:    v_or_b32_e32 v3, v3, v1
5244; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v14, s[6:7]
5245; GFX10-NEXT:    v_or_b32_e32 v4, v4, v8
5246; GFX10-NEXT:    v_or_b32_e32 v5, v5, v9
5247; GFX10-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
5248; GFX10-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
5249; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
5250; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
5251; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
5252; GFX10-NEXT:    v_cmp_eq_u32_e64 s2, 0, v12
5253; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
5254; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
5255; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0, v7, vcc_lo
5256; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s1
5257; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, s8, s2
5258; GFX10-NEXT:    v_cndmask_b32_e64 v7, v10, s9, s2
5259; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s1
5260; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
5261; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
5262; GFX10-NEXT:    v_or_b32_e32 v0, v6, v0
5263; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
5264; GFX10-NEXT:    v_or_b32_e32 v2, v5, v2
5265; GFX10-NEXT:    v_or_b32_e32 v3, v7, v3
5266; GFX10-NEXT:    ; return to shader part epilog
5267  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5268  %cast.result = bitcast i128 %result to <4 x float>
5269  ret <4 x float> %cast.result
5270}
5271
5272define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
5273; GFX6-LABEL: v_fshr_i128_svs:
5274; GFX6:       ; %bb.0:
5275; GFX6-NEXT:    s_movk_i32 s6, 0x7f
5276; GFX6-NEXT:    s_mov_b32 s7, 0
5277; GFX6-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5278; GFX6-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5279; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5280; GFX6-NEXT:    s_lshr_b32 s6, s1, 31
5281; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5282; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
5283; GFX6-NEXT:    s_sub_i32 s9, s4, 64
5284; GFX6-NEXT:    s_sub_i32 s5, 64, s4
5285; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
5286; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
5287; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
5288; GFX6-NEXT:    s_cselect_b32 s13, 1, 0
5289; GFX6-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
5290; GFX6-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
5291; GFX6-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5292; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
5293; GFX6-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
5294; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
5295; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5296; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5297; GFX6-NEXT:    s_cmp_lg_u32 s13, 0
5298; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5299; GFX6-NEXT:    s_sub_i32 s4, s8, 64
5300; GFX6-NEXT:    s_sub_i32 s5, 64, s8
5301; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
5302; GFX6-NEXT:    s_cselect_b32 s6, 1, 0
5303; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
5304; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], s8
5305; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s5
5306; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
5307; GFX6-NEXT:    v_lshr_b64 v[8:9], v[2:3], s8
5308; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], s4
5309; GFX6-NEXT:    s_and_b32 s4, 1, s6
5310; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
5311; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
5312; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5313; GFX6-NEXT:    s_and_b32 s4, 1, s7
5314; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
5315; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
5316; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5317; GFX6-NEXT:    s_and_b32 s4, 1, s6
5318; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
5319; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
5320; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5321; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5322; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5323; GFX6-NEXT:    v_or_b32_e32 v0, s2, v0
5324; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
5325; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
5326; GFX6-NEXT:    v_or_b32_e32 v3, s1, v3
5327; GFX6-NEXT:    ; return to shader part epilog
5328;
5329; GFX8-LABEL: v_fshr_i128_svs:
5330; GFX8:       ; %bb.0:
5331; GFX8-NEXT:    s_movk_i32 s6, 0x7f
5332; GFX8-NEXT:    s_mov_b32 s7, 0
5333; GFX8-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5334; GFX8-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5335; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5336; GFX8-NEXT:    s_lshr_b32 s6, s1, 31
5337; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5338; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
5339; GFX8-NEXT:    s_sub_i32 s9, s4, 64
5340; GFX8-NEXT:    s_sub_i32 s5, 64, s4
5341; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
5342; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
5343; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
5344; GFX8-NEXT:    s_cselect_b32 s13, 1, 0
5345; GFX8-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
5346; GFX8-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
5347; GFX8-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5348; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
5349; GFX8-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
5350; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
5351; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5352; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5353; GFX8-NEXT:    s_cmp_lg_u32 s13, 0
5354; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5355; GFX8-NEXT:    s_sub_i32 s4, s8, 64
5356; GFX8-NEXT:    s_sub_i32 s5, 64, s8
5357; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
5358; GFX8-NEXT:    s_cselect_b32 s6, 1, 0
5359; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
5360; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
5361; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
5362; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
5363; GFX8-NEXT:    v_lshrrev_b64 v[8:9], s8, v[2:3]
5364; GFX8-NEXT:    v_lshrrev_b64 v[2:3], s4, v[2:3]
5365; GFX8-NEXT:    s_and_b32 s4, 1, s6
5366; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
5367; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
5368; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5369; GFX8-NEXT:    s_and_b32 s4, 1, s7
5370; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
5371; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
5372; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5373; GFX8-NEXT:    s_and_b32 s4, 1, s6
5374; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
5375; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
5376; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5377; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5378; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5379; GFX8-NEXT:    v_or_b32_e32 v0, s2, v0
5380; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
5381; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
5382; GFX8-NEXT:    v_or_b32_e32 v3, s1, v3
5383; GFX8-NEXT:    ; return to shader part epilog
5384;
5385; GFX9-LABEL: v_fshr_i128_svs:
5386; GFX9:       ; %bb.0:
5387; GFX9-NEXT:    s_movk_i32 s6, 0x7f
5388; GFX9-NEXT:    s_mov_b32 s7, 0
5389; GFX9-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5390; GFX9-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5391; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5392; GFX9-NEXT:    s_lshr_b32 s6, s1, 31
5393; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5394; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
5395; GFX9-NEXT:    s_sub_i32 s9, s4, 64
5396; GFX9-NEXT:    s_sub_i32 s5, 64, s4
5397; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
5398; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
5399; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
5400; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
5401; GFX9-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
5402; GFX9-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
5403; GFX9-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5404; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
5405; GFX9-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
5406; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
5407; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5408; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5409; GFX9-NEXT:    s_cmp_lg_u32 s13, 0
5410; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5411; GFX9-NEXT:    s_sub_i32 s4, s8, 64
5412; GFX9-NEXT:    s_sub_i32 s5, 64, s8
5413; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
5414; GFX9-NEXT:    s_cselect_b32 s6, 1, 0
5415; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
5416; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
5417; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
5418; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
5419; GFX9-NEXT:    v_lshrrev_b64 v[8:9], s8, v[2:3]
5420; GFX9-NEXT:    v_lshrrev_b64 v[2:3], s4, v[2:3]
5421; GFX9-NEXT:    s_and_b32 s4, 1, s6
5422; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
5423; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
5424; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5425; GFX9-NEXT:    s_and_b32 s4, 1, s7
5426; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
5427; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
5428; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5429; GFX9-NEXT:    s_and_b32 s4, 1, s6
5430; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
5431; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
5432; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5433; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5434; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5435; GFX9-NEXT:    v_or_b32_e32 v0, s2, v0
5436; GFX9-NEXT:    v_or_b32_e32 v1, s3, v1
5437; GFX9-NEXT:    v_or_b32_e32 v2, s0, v2
5438; GFX9-NEXT:    v_or_b32_e32 v3, s1, v3
5439; GFX9-NEXT:    ; return to shader part epilog
5440;
5441; GFX10-LABEL: v_fshr_i128_svs:
5442; GFX10:       ; %bb.0:
5443; GFX10-NEXT:    s_movk_i32 s6, 0x7f
5444; GFX10-NEXT:    s_mov_b32 s7, 0
5445; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5446; GFX10-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5447; GFX10-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5448; GFX10-NEXT:    s_lshr_b32 s6, s1, 31
5449; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5450; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5451; GFX10-NEXT:    s_sub_i32 s9, s4, 64
5452; GFX10-NEXT:    s_sub_i32 s5, 64, s4
5453; GFX10-NEXT:    s_cmp_lt_u32 s4, 64
5454; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
5455; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
5456; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
5457; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
5458; GFX10-NEXT:    s_lshr_b64 s[6:7], s[0:1], s5
5459; GFX10-NEXT:    s_lshl_b64 s[10:11], s[2:3], s4
5460; GFX10-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5461; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
5462; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
5463; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
5464; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], 0
5465; GFX10-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
5466; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
5467; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5468; GFX10-NEXT:    s_sub_i32 s0, 64, s8
5469; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s0, v[2:3]
5470; GFX10-NEXT:    s_sub_i32 s0, s8, 64
5471; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
5472; GFX10-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
5473; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
5474; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
5475; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
5476; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
5477; GFX10-NEXT:    s_and_b32 s0, 1, s1
5478; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
5479; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
5480; GFX10-NEXT:    s_and_b32 s0, 1, s6
5481; GFX10-NEXT:    s_and_b32 s1, 1, s1
5482; GFX10-NEXT:    v_lshrrev_b64 v[2:3], s8, v[2:3]
5483; GFX10-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc_lo
5484; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc_lo
5485; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
5486; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
5487; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc_lo
5488; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
5489; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
5490; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
5491; GFX10-NEXT:    v_or_b32_e32 v0, s4, v0
5492; GFX10-NEXT:    v_or_b32_e32 v1, s5, v1
5493; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
5494; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
5495; GFX10-NEXT:    ; return to shader part epilog
5496  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5497  %cast.result = bitcast i128 %result to <4 x float>
5498  ret <4 x float> %cast.result
5499}
5500
5501define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
5502; GFX6-LABEL: v_fshr_i128_vss:
5503; GFX6:       ; %bb.0:
5504; GFX6-NEXT:    s_mov_b64 s[6:7], 0x7f
5505; GFX6-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5506; GFX6-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5507; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
5508; GFX6-NEXT:    s_sub_i32 s5, s4, 64
5509; GFX6-NEXT:    s_sub_i32 s6, 64, s4
5510; GFX6-NEXT:    v_lshl_b64 v[4:5], v[0:1], 1
5511; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5512; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
5513; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
5514; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
5515; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
5516; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
5517; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], s6
5518; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s4
5519; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], s4
5520; GFX6-NEXT:    s_and_b32 s4, 1, s7
5521; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5522; GFX6-NEXT:    s_and_b32 s4, 1, s9
5523; GFX6-NEXT:    s_sub_i32 s10, s8, 64
5524; GFX6-NEXT:    s_sub_i32 s9, 64, s8
5525; GFX6-NEXT:    v_or_b32_e32 v6, v0, v6
5526; GFX6-NEXT:    v_or_b32_e32 v7, v1, v7
5527; GFX6-NEXT:    v_lshl_b64 v[0:1], v[4:5], s5
5528; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
5529; GFX6-NEXT:    s_cselect_b32 s11, 1, 0
5530; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
5531; GFX6-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
5532; GFX6-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
5533; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
5534; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5535; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5536; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
5537; GFX6-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
5538; GFX6-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
5539; GFX6-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
5540; GFX6-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
5541; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5542; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
5543; GFX6-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
5544; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
5545; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5546; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
5547; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
5548; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
5549; GFX6-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
5550; GFX6-NEXT:    v_or_b32_e32 v0, s0, v4
5551; GFX6-NEXT:    v_or_b32_e32 v1, s1, v5
5552; GFX6-NEXT:    v_or_b32_e32 v2, s2, v2
5553; GFX6-NEXT:    v_or_b32_e32 v3, s3, v3
5554; GFX6-NEXT:    ; return to shader part epilog
5555;
5556; GFX8-LABEL: v_fshr_i128_vss:
5557; GFX8:       ; %bb.0:
5558; GFX8-NEXT:    s_mov_b64 s[6:7], 0x7f
5559; GFX8-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5560; GFX8-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5561; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5562; GFX8-NEXT:    s_sub_i32 s5, s4, 64
5563; GFX8-NEXT:    s_sub_i32 s6, 64, s4
5564; GFX8-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
5565; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5566; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
5567; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
5568; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
5569; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
5570; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
5571; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s6, v[4:5]
5572; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
5573; GFX8-NEXT:    v_lshlrev_b64 v[8:9], s4, v[4:5]
5574; GFX8-NEXT:    s_and_b32 s4, 1, s7
5575; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5576; GFX8-NEXT:    s_and_b32 s4, 1, s9
5577; GFX8-NEXT:    s_sub_i32 s10, s8, 64
5578; GFX8-NEXT:    s_sub_i32 s9, 64, s8
5579; GFX8-NEXT:    v_or_b32_e32 v6, v0, v6
5580; GFX8-NEXT:    v_or_b32_e32 v7, v1, v7
5581; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s5, v[4:5]
5582; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
5583; GFX8-NEXT:    s_cselect_b32 s11, 1, 0
5584; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
5585; GFX8-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
5586; GFX8-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
5587; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
5588; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5589; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5590; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
5591; GFX8-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
5592; GFX8-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
5593; GFX8-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
5594; GFX8-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
5595; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5596; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
5597; GFX8-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
5598; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
5599; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5600; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
5601; GFX8-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
5602; GFX8-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
5603; GFX8-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
5604; GFX8-NEXT:    v_or_b32_e32 v0, s0, v4
5605; GFX8-NEXT:    v_or_b32_e32 v1, s1, v5
5606; GFX8-NEXT:    v_or_b32_e32 v2, s2, v2
5607; GFX8-NEXT:    v_or_b32_e32 v3, s3, v3
5608; GFX8-NEXT:    ; return to shader part epilog
5609;
5610; GFX9-LABEL: v_fshr_i128_vss:
5611; GFX9:       ; %bb.0:
5612; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
5613; GFX9-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5614; GFX9-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5615; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5616; GFX9-NEXT:    s_sub_i32 s5, s4, 64
5617; GFX9-NEXT:    s_sub_i32 s6, 64, s4
5618; GFX9-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
5619; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5620; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
5621; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
5622; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
5623; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
5624; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
5625; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s6, v[4:5]
5626; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
5627; GFX9-NEXT:    v_lshlrev_b64 v[8:9], s4, v[4:5]
5628; GFX9-NEXT:    s_and_b32 s4, 1, s7
5629; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5630; GFX9-NEXT:    s_and_b32 s4, 1, s9
5631; GFX9-NEXT:    s_sub_i32 s10, s8, 64
5632; GFX9-NEXT:    s_sub_i32 s9, 64, s8
5633; GFX9-NEXT:    v_or_b32_e32 v6, v0, v6
5634; GFX9-NEXT:    v_or_b32_e32 v7, v1, v7
5635; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s5, v[4:5]
5636; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
5637; GFX9-NEXT:    s_cselect_b32 s11, 1, 0
5638; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
5639; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
5640; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
5641; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
5642; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5643; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5644; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
5645; GFX9-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
5646; GFX9-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
5647; GFX9-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
5648; GFX9-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
5649; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5650; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
5651; GFX9-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
5652; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
5653; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5654; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
5655; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
5656; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
5657; GFX9-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
5658; GFX9-NEXT:    v_or_b32_e32 v0, s0, v4
5659; GFX9-NEXT:    v_or_b32_e32 v1, s1, v5
5660; GFX9-NEXT:    v_or_b32_e32 v2, s2, v2
5661; GFX9-NEXT:    v_or_b32_e32 v3, s3, v3
5662; GFX9-NEXT:    ; return to shader part epilog
5663;
5664; GFX10-LABEL: v_fshr_i128_vss:
5665; GFX10:       ; %bb.0:
5666; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5667; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 31, v1
5668; GFX10-NEXT:    s_mov_b64 s[6:7], 0x7f
5669; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5670; GFX10-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5671; GFX10-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5672; GFX10-NEXT:    v_or_b32_e32 v2, v2, v4
5673; GFX10-NEXT:    s_sub_i32 s6, 64, s4
5674; GFX10-NEXT:    s_sub_i32 s5, s4, 64
5675; GFX10-NEXT:    s_cmp_lt_u32 s4, 64
5676; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s6, v[0:1]
5677; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
5678; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
5679; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
5680; GFX10-NEXT:    v_lshlrev_b64 v[8:9], s4, v[0:1]
5681; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
5682; GFX10-NEXT:    s_and_b32 s4, 1, s7
5683; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s5, v[0:1]
5684; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
5685; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
5686; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
5687; GFX10-NEXT:    s_and_b32 s4, 1, s9
5688; GFX10-NEXT:    s_sub_i32 s10, s8, 64
5689; GFX10-NEXT:    s_sub_i32 s6, 64, s8
5690; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
5691; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc_lo
5692; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
5693; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
5694; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc_lo
5695; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
5696; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
5697; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
5698; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
5699; GFX10-NEXT:    s_lshr_b64 s[4:5], s[0:1], s8
5700; GFX10-NEXT:    s_lshl_b64 s[6:7], s[2:3], s6
5701; GFX10-NEXT:    s_lshr_b64 s[8:9], s[2:3], s8
5702; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
5703; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5704; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
5705; GFX10-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc_lo
5706; GFX10-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
5707; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
5708; GFX10-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
5709; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5710; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
5711; GFX10-NEXT:    v_or_b32_e32 v0, s0, v6
5712; GFX10-NEXT:    s_cselect_b64 s[2:3], s[8:9], 0
5713; GFX10-NEXT:    v_or_b32_e32 v1, s1, v7
5714; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
5715; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
5716; GFX10-NEXT:    ; return to shader part epilog
5717  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5718  %cast.result = bitcast i128 %result to <4 x float>
5719  ret <4 x float> %cast.result
5720}
5721
5722define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
5723; GFX6-LABEL: s_fshr_i128_65:
5724; GFX6:       ; %bb.0:
5725; GFX6-NEXT:    s_mov_b32 s4, 0
5726; GFX6-NEXT:    s_lshl_b32 s5, s0, 31
5727; GFX6-NEXT:    s_lshl_b32 s3, s2, 31
5728; GFX6-NEXT:    s_mov_b32 s2, s4
5729; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5730; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
5731; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
5732; GFX6-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
5733; GFX6-NEXT:    ; return to shader part epilog
5734;
5735; GFX8-LABEL: s_fshr_i128_65:
5736; GFX8:       ; %bb.0:
5737; GFX8-NEXT:    s_mov_b32 s4, 0
5738; GFX8-NEXT:    s_lshl_b32 s5, s0, 31
5739; GFX8-NEXT:    s_lshl_b32 s3, s2, 31
5740; GFX8-NEXT:    s_mov_b32 s2, s4
5741; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5742; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
5743; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
5744; GFX8-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
5745; GFX8-NEXT:    ; return to shader part epilog
5746;
5747; GFX9-LABEL: s_fshr_i128_65:
5748; GFX9:       ; %bb.0:
5749; GFX9-NEXT:    s_mov_b32 s4, 0
5750; GFX9-NEXT:    s_lshl_b32 s5, s0, 31
5751; GFX9-NEXT:    s_lshl_b32 s3, s2, 31
5752; GFX9-NEXT:    s_mov_b32 s2, s4
5753; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5754; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
5755; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
5756; GFX9-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
5757; GFX9-NEXT:    ; return to shader part epilog
5758;
5759; GFX10-LABEL: s_fshr_i128_65:
5760; GFX10:       ; %bb.0:
5761; GFX10-NEXT:    s_mov_b32 s4, 0
5762; GFX10-NEXT:    s_lshl_b32 s5, s0, 31
5763; GFX10-NEXT:    s_lshl_b32 s3, s2, 31
5764; GFX10-NEXT:    s_mov_b32 s2, s4
5765; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
5766; GFX10-NEXT:    s_lshr_b64 s[8:9], s[0:1], 1
5767; GFX10-NEXT:    s_or_b64 s[0:1], s[4:5], s[6:7]
5768; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
5769; GFX10-NEXT:    ; return to shader part epilog
5770  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
5771  ret i128 %result
5772}
5773
5774define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) {
5775; GFX6-LABEL: v_fshr_i128_65:
5776; GFX6:       ; %bb.0:
5777; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5778; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
5779; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
5780; GFX6-NEXT:    v_lshr_b64 v[2:3], v[0:1], 1
5781; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], 1
5782; GFX6-NEXT:    v_or_b32_e32 v3, v5, v3
5783; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
5784; GFX6-NEXT:    s_setpc_b64 s[30:31]
5785;
5786; GFX8-LABEL: v_fshr_i128_65:
5787; GFX8:       ; %bb.0:
5788; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5789; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
5790; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
5791; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
5792; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 1, v[6:7]
5793; GFX8-NEXT:    v_or_b32_e32 v3, v5, v3
5794; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
5795; GFX8-NEXT:    s_setpc_b64 s[30:31]
5796;
5797; GFX9-LABEL: v_fshr_i128_65:
5798; GFX9:       ; %bb.0:
5799; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5800; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
5801; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
5802; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
5803; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 1, v[6:7]
5804; GFX9-NEXT:    v_or_b32_e32 v3, v5, v3
5805; GFX9-NEXT:    v_or_b32_e32 v1, v4, v1
5806; GFX9-NEXT:    s_setpc_b64 s[30:31]
5807;
5808; GFX10-LABEL: v_fshr_i128_65:
5809; GFX10:       ; %bb.0:
5810; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5811; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5812; GFX10-NEXT:    v_mov_b32_e32 v8, v2
5813; GFX10-NEXT:    v_lshrrev_b64 v[4:5], 1, v[6:7]
5814; GFX10-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
5815; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 31, v0
5816; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 31, v8
5817; GFX10-NEXT:    v_or_b32_e32 v1, v9, v5
5818; GFX10-NEXT:    v_or_b32_e32 v3, v0, v3
5819; GFX10-NEXT:    v_mov_b32_e32 v0, v4
5820; GFX10-NEXT:    s_setpc_b64 s[30:31]
5821  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
5822  ret i128 %result
5823}
5824
5825define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
5826; GFX6-LABEL: s_fshr_v2i128:
5827; GFX6:       ; %bb.0:
5828; GFX6-NEXT:    s_movk_i32 s18, 0x7f
5829; GFX6-NEXT:    s_mov_b32 s19, 0
5830; GFX6-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
5831; GFX6-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
5832; GFX6-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
5833; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5834; GFX6-NEXT:    s_lshr_b32 s0, s1, 31
5835; GFX6-NEXT:    s_mov_b32 s1, s19
5836; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
5837; GFX6-NEXT:    s_sub_i32 s23, s16, 64
5838; GFX6-NEXT:    s_sub_i32 s17, 64, s16
5839; GFX6-NEXT:    s_cmp_lt_u32 s16, 64
5840; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
5841; GFX6-NEXT:    s_cmp_eq_u32 s16, 0
5842; GFX6-NEXT:    s_cselect_b32 s29, 1, 0
5843; GFX6-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
5844; GFX6-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
5845; GFX6-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
5846; GFX6-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
5847; GFX6-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
5848; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
5849; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5850; GFX6-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
5851; GFX6-NEXT:    s_cmp_lg_u32 s29, 0
5852; GFX6-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
5853; GFX6-NEXT:    s_sub_i32 s26, s22, 64
5854; GFX6-NEXT:    s_sub_i32 s24, 64, s22
5855; GFX6-NEXT:    s_cmp_lt_u32 s22, 64
5856; GFX6-NEXT:    s_cselect_b32 s27, 1, 0
5857; GFX6-NEXT:    s_cmp_eq_u32 s22, 0
5858; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
5859; GFX6-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
5860; GFX6-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
5861; GFX6-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
5862; GFX6-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
5863; GFX6-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
5864; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
5865; GFX6-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
5866; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
5867; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
5868; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
5869; GFX6-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
5870; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5871; GFX6-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
5872; GFX6-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
5873; GFX6-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
5874; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
5875; GFX6-NEXT:    s_lshr_b32 s18, s5, 31
5876; GFX6-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
5877; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
5878; GFX6-NEXT:    s_sub_i32 s9, s10, 64
5879; GFX6-NEXT:    s_sub_i32 s11, 64, s10
5880; GFX6-NEXT:    s_cmp_lt_u32 s10, 64
5881; GFX6-NEXT:    s_cselect_b32 s20, 1, 0
5882; GFX6-NEXT:    s_cmp_eq_u32 s10, 0
5883; GFX6-NEXT:    s_cselect_b32 s21, 1, 0
5884; GFX6-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
5885; GFX6-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
5886; GFX6-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
5887; GFX6-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
5888; GFX6-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
5889; GFX6-NEXT:    s_cmp_lg_u32 s20, 0
5890; GFX6-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
5891; GFX6-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
5892; GFX6-NEXT:    s_cmp_lg_u32 s21, 0
5893; GFX6-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
5894; GFX6-NEXT:    s_sub_i32 s18, s8, 64
5895; GFX6-NEXT:    s_sub_i32 s16, 64, s8
5896; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
5897; GFX6-NEXT:    s_cselect_b32 s19, 1, 0
5898; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
5899; GFX6-NEXT:    s_cselect_b32 s20, 1, 0
5900; GFX6-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
5901; GFX6-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
5902; GFX6-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
5903; GFX6-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
5904; GFX6-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
5905; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
5906; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
5907; GFX6-NEXT:    s_cmp_lg_u32 s20, 0
5908; GFX6-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
5909; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
5910; GFX6-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
5911; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
5912; GFX6-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
5913; GFX6-NEXT:    ; return to shader part epilog
5914;
5915; GFX8-LABEL: s_fshr_v2i128:
5916; GFX8:       ; %bb.0:
5917; GFX8-NEXT:    s_movk_i32 s18, 0x7f
5918; GFX8-NEXT:    s_mov_b32 s19, 0
5919; GFX8-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
5920; GFX8-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
5921; GFX8-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
5922; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5923; GFX8-NEXT:    s_lshr_b32 s0, s1, 31
5924; GFX8-NEXT:    s_mov_b32 s1, s19
5925; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
5926; GFX8-NEXT:    s_sub_i32 s23, s16, 64
5927; GFX8-NEXT:    s_sub_i32 s17, 64, s16
5928; GFX8-NEXT:    s_cmp_lt_u32 s16, 64
5929; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
5930; GFX8-NEXT:    s_cmp_eq_u32 s16, 0
5931; GFX8-NEXT:    s_cselect_b32 s29, 1, 0
5932; GFX8-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
5933; GFX8-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
5934; GFX8-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
5935; GFX8-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
5936; GFX8-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
5937; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
5938; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5939; GFX8-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
5940; GFX8-NEXT:    s_cmp_lg_u32 s29, 0
5941; GFX8-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
5942; GFX8-NEXT:    s_sub_i32 s26, s22, 64
5943; GFX8-NEXT:    s_sub_i32 s24, 64, s22
5944; GFX8-NEXT:    s_cmp_lt_u32 s22, 64
5945; GFX8-NEXT:    s_cselect_b32 s27, 1, 0
5946; GFX8-NEXT:    s_cmp_eq_u32 s22, 0
5947; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
5948; GFX8-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
5949; GFX8-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
5950; GFX8-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
5951; GFX8-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
5952; GFX8-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
5953; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
5954; GFX8-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
5955; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
5956; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
5957; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
5958; GFX8-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
5959; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5960; GFX8-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
5961; GFX8-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
5962; GFX8-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
5963; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
5964; GFX8-NEXT:    s_lshr_b32 s18, s5, 31
5965; GFX8-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
5966; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
5967; GFX8-NEXT:    s_sub_i32 s9, s10, 64
5968; GFX8-NEXT:    s_sub_i32 s11, 64, s10
5969; GFX8-NEXT:    s_cmp_lt_u32 s10, 64
5970; GFX8-NEXT:    s_cselect_b32 s20, 1, 0
5971; GFX8-NEXT:    s_cmp_eq_u32 s10, 0
5972; GFX8-NEXT:    s_cselect_b32 s21, 1, 0
5973; GFX8-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
5974; GFX8-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
5975; GFX8-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
5976; GFX8-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
5977; GFX8-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
5978; GFX8-NEXT:    s_cmp_lg_u32 s20, 0
5979; GFX8-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
5980; GFX8-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
5981; GFX8-NEXT:    s_cmp_lg_u32 s21, 0
5982; GFX8-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
5983; GFX8-NEXT:    s_sub_i32 s18, s8, 64
5984; GFX8-NEXT:    s_sub_i32 s16, 64, s8
5985; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
5986; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
5987; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
5988; GFX8-NEXT:    s_cselect_b32 s20, 1, 0
5989; GFX8-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
5990; GFX8-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
5991; GFX8-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
5992; GFX8-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
5993; GFX8-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
5994; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
5995; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
5996; GFX8-NEXT:    s_cmp_lg_u32 s20, 0
5997; GFX8-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
5998; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
5999; GFX8-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
6000; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
6001; GFX8-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
6002; GFX8-NEXT:    ; return to shader part epilog
6003;
6004; GFX9-LABEL: s_fshr_v2i128:
6005; GFX9:       ; %bb.0:
6006; GFX9-NEXT:    s_movk_i32 s18, 0x7f
6007; GFX9-NEXT:    s_mov_b32 s19, 0
6008; GFX9-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
6009; GFX9-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
6010; GFX9-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
6011; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6012; GFX9-NEXT:    s_lshr_b32 s0, s1, 31
6013; GFX9-NEXT:    s_mov_b32 s1, s19
6014; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6015; GFX9-NEXT:    s_sub_i32 s23, s16, 64
6016; GFX9-NEXT:    s_sub_i32 s17, 64, s16
6017; GFX9-NEXT:    s_cmp_lt_u32 s16, 64
6018; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
6019; GFX9-NEXT:    s_cmp_eq_u32 s16, 0
6020; GFX9-NEXT:    s_cselect_b32 s29, 1, 0
6021; GFX9-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
6022; GFX9-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
6023; GFX9-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
6024; GFX9-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
6025; GFX9-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
6026; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
6027; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6028; GFX9-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
6029; GFX9-NEXT:    s_cmp_lg_u32 s29, 0
6030; GFX9-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
6031; GFX9-NEXT:    s_sub_i32 s26, s22, 64
6032; GFX9-NEXT:    s_sub_i32 s24, 64, s22
6033; GFX9-NEXT:    s_cmp_lt_u32 s22, 64
6034; GFX9-NEXT:    s_cselect_b32 s27, 1, 0
6035; GFX9-NEXT:    s_cmp_eq_u32 s22, 0
6036; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
6037; GFX9-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
6038; GFX9-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
6039; GFX9-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
6040; GFX9-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
6041; GFX9-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
6042; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
6043; GFX9-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
6044; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
6045; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
6046; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
6047; GFX9-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
6048; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
6049; GFX9-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
6050; GFX9-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
6051; GFX9-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
6052; GFX9-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
6053; GFX9-NEXT:    s_lshr_b32 s18, s5, 31
6054; GFX9-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
6055; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
6056; GFX9-NEXT:    s_sub_i32 s9, s10, 64
6057; GFX9-NEXT:    s_sub_i32 s11, 64, s10
6058; GFX9-NEXT:    s_cmp_lt_u32 s10, 64
6059; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
6060; GFX9-NEXT:    s_cmp_eq_u32 s10, 0
6061; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
6062; GFX9-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
6063; GFX9-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
6064; GFX9-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
6065; GFX9-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
6066; GFX9-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
6067; GFX9-NEXT:    s_cmp_lg_u32 s20, 0
6068; GFX9-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
6069; GFX9-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
6070; GFX9-NEXT:    s_cmp_lg_u32 s21, 0
6071; GFX9-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
6072; GFX9-NEXT:    s_sub_i32 s18, s8, 64
6073; GFX9-NEXT:    s_sub_i32 s16, 64, s8
6074; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
6075; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
6076; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
6077; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
6078; GFX9-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
6079; GFX9-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
6080; GFX9-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
6081; GFX9-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
6082; GFX9-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
6083; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
6084; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
6085; GFX9-NEXT:    s_cmp_lg_u32 s20, 0
6086; GFX9-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
6087; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
6088; GFX9-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
6089; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
6090; GFX9-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
6091; GFX9-NEXT:    ; return to shader part epilog
6092;
6093; GFX10-LABEL: s_fshr_v2i128:
6094; GFX10:       ; %bb.0:
6095; GFX10-NEXT:    s_movk_i32 s18, 0x7f
6096; GFX10-NEXT:    s_mov_b32 s19, 0
6097; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6098; GFX10-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
6099; GFX10-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
6100; GFX10-NEXT:    s_lshr_b32 s24, s1, 31
6101; GFX10-NEXT:    s_mov_b32 s25, s19
6102; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6103; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[24:25]
6104; GFX10-NEXT:    s_sub_i32 s23, s16, 64
6105; GFX10-NEXT:    s_sub_i32 s17, 64, s16
6106; GFX10-NEXT:    s_cmp_lt_u32 s16, 64
6107; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
6108; GFX10-NEXT:    s_cmp_eq_u32 s16, 0
6109; GFX10-NEXT:    s_cselect_b32 s29, 1, 0
6110; GFX10-NEXT:    s_lshr_b64 s[24:25], s[0:1], s17
6111; GFX10-NEXT:    s_lshl_b64 s[26:27], s[2:3], s16
6112; GFX10-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
6113; GFX10-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
6114; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s23
6115; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
6116; GFX10-NEXT:    s_cselect_b64 s[16:17], s[16:17], 0
6117; GFX10-NEXT:    s_cselect_b64 s[0:1], s[24:25], s[0:1]
6118; GFX10-NEXT:    s_cmp_lg_u32 s29, 0
6119; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6120; GFX10-NEXT:    s_sub_i32 s26, s22, 64
6121; GFX10-NEXT:    s_sub_i32 s23, 64, s22
6122; GFX10-NEXT:    s_cmp_lt_u32 s22, 64
6123; GFX10-NEXT:    s_cselect_b32 s27, 1, 0
6124; GFX10-NEXT:    s_cmp_eq_u32 s22, 0
6125; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
6126; GFX10-NEXT:    s_lshr_b64 s[0:1], s[8:9], s22
6127; GFX10-NEXT:    s_lshl_b64 s[24:25], s[10:11], s23
6128; GFX10-NEXT:    s_lshr_b64 s[22:23], s[10:11], s22
6129; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[24:25]
6130; GFX10-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
6131; GFX10-NEXT:    s_cmp_lg_u32 s27, 0
6132; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[10:11]
6133; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
6134; GFX10-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
6135; GFX10-NEXT:    s_cmp_lg_u32 s27, 0
6136; GFX10-NEXT:    s_cselect_b64 s[8:9], s[22:23], 0
6137; GFX10-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
6138; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
6139; GFX10-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
6140; GFX10-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
6141; GFX10-NEXT:    s_lshr_b32 s18, s5, 31
6142; GFX10-NEXT:    s_or_b64 s[0:1], s[16:17], s[0:1]
6143; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], 1
6144; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[18:19]
6145; GFX10-NEXT:    s_sub_i32 s9, s10, 64
6146; GFX10-NEXT:    s_sub_i32 s11, 64, s10
6147; GFX10-NEXT:    s_cmp_lt_u32 s10, 64
6148; GFX10-NEXT:    s_cselect_b32 s20, 1, 0
6149; GFX10-NEXT:    s_cmp_eq_u32 s10, 0
6150; GFX10-NEXT:    s_cselect_b32 s21, 1, 0
6151; GFX10-NEXT:    s_lshr_b64 s[16:17], s[4:5], s11
6152; GFX10-NEXT:    s_lshl_b64 s[18:19], s[6:7], s10
6153; GFX10-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
6154; GFX10-NEXT:    s_or_b64 s[16:17], s[16:17], s[18:19]
6155; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
6156; GFX10-NEXT:    s_cmp_lg_u32 s20, 0
6157; GFX10-NEXT:    s_cselect_b64 s[10:11], s[10:11], 0
6158; GFX10-NEXT:    s_cselect_b64 s[4:5], s[16:17], s[4:5]
6159; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
6160; GFX10-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
6161; GFX10-NEXT:    s_sub_i32 s18, s8, 64
6162; GFX10-NEXT:    s_sub_i32 s9, 64, s8
6163; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
6164; GFX10-NEXT:    s_cselect_b32 s19, 1, 0
6165; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
6166; GFX10-NEXT:    s_cselect_b32 s20, 1, 0
6167; GFX10-NEXT:    s_lshr_b64 s[4:5], s[12:13], s8
6168; GFX10-NEXT:    s_lshl_b64 s[16:17], s[14:15], s9
6169; GFX10-NEXT:    s_lshr_b64 s[8:9], s[14:15], s8
6170; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[16:17]
6171; GFX10-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
6172; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
6173; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[14:15]
6174; GFX10-NEXT:    s_cmp_lg_u32 s20, 0
6175; GFX10-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[4:5]
6176; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
6177; GFX10-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
6178; GFX10-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
6179; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6180; GFX10-NEXT:    ; return to shader part epilog
6181  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
6182  ret <2 x i128> %result
6183}
6184
6185define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
6186; GFX6-LABEL: v_fshr_v2i128:
6187; GFX6:       ; %bb.0:
6188; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6189; GFX6-NEXT:    s_movk_i32 s6, 0x7f
6190; GFX6-NEXT:    v_xor_b32_e32 v17, -1, v16
6191; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
6192; GFX6-NEXT:    v_and_b32_e32 v23, s6, v17
6193; GFX6-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6194; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
6195; GFX6-NEXT:    v_or_b32_e32 v2, v2, v17
6196; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, 64, v23
6197; GFX6-NEXT:    v_lshr_b64 v[17:18], v[0:1], v17
6198; GFX6-NEXT:    v_lshl_b64 v[21:22], v[2:3], v23
6199; GFX6-NEXT:    v_and_b32_e32 v24, s6, v16
6200; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, 64, v24
6201; GFX6-NEXT:    v_or_b32_e32 v21, v17, v21
6202; GFX6-NEXT:    v_or_b32_e32 v22, v18, v22
6203; GFX6-NEXT:    v_lshl_b64 v[16:17], v[10:11], v16
6204; GFX6-NEXT:    v_lshr_b64 v[18:19], v[8:9], v24
6205; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
6206; GFX6-NEXT:    v_or_b32_e32 v18, v18, v16
6207; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v23
6208; GFX6-NEXT:    v_or_b32_e32 v19, v19, v17
6209; GFX6-NEXT:    v_lshl_b64 v[16:17], v[0:1], v16
6210; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v23
6211; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
6212; GFX6-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
6213; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
6214; GFX6-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
6215; GFX6-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6216; GFX6-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6217; GFX6-NEXT:    v_subrev_i32_e64 v0, s[4:5], 64, v24
6218; GFX6-NEXT:    v_lshr_b64 v[2:3], v[10:11], v0
6219; GFX6-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
6220; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6221; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
6222; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v24
6223; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
6224; GFX6-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6225; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
6226; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6227; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
6228; GFX6-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6229; GFX6-NEXT:    v_or_b32_e32 v0, v25, v2
6230; GFX6-NEXT:    v_or_b32_e32 v2, v17, v8
6231; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v20
6232; GFX6-NEXT:    v_lshl_b64 v[6:7], v[6:7], 1
6233; GFX6-NEXT:    v_or_b32_e32 v1, v18, v3
6234; GFX6-NEXT:    v_or_b32_e32 v3, v16, v9
6235; GFX6-NEXT:    v_and_b32_e32 v17, s6, v8
6236; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], 1
6237; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
6238; GFX6-NEXT:    v_or_b32_e32 v6, v6, v4
6239; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 64, v17
6240; GFX6-NEXT:    v_lshr_b64 v[4:5], v[8:9], v4
6241; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v17
6242; GFX6-NEXT:    v_subrev_i32_e32 v18, vcc, 64, v17
6243; GFX6-NEXT:    v_or_b32_e32 v10, v4, v10
6244; GFX6-NEXT:    v_or_b32_e32 v11, v5, v11
6245; GFX6-NEXT:    v_lshl_b64 v[4:5], v[8:9], v17
6246; GFX6-NEXT:    v_lshl_b64 v[8:9], v[8:9], v18
6247; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
6248; GFX6-NEXT:    v_and_b32_e32 v16, s6, v20
6249; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
6250; GFX6-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
6251; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
6252; GFX6-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
6253; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
6254; GFX6-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
6255; GFX6-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
6256; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 64, v16
6257; GFX6-NEXT:    v_lshr_b64 v[4:5], v[12:13], v16
6258; GFX6-NEXT:    v_lshl_b64 v[6:7], v[14:15], v6
6259; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, 64, v16
6260; GFX6-NEXT:    v_or_b32_e32 v11, v4, v6
6261; GFX6-NEXT:    v_or_b32_e32 v17, v5, v7
6262; GFX6-NEXT:    v_lshr_b64 v[6:7], v[14:15], v10
6263; GFX6-NEXT:    v_lshr_b64 v[4:5], v[14:15], v16
6264; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
6265; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
6266; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
6267; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
6268; GFX6-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
6269; GFX6-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
6270; GFX6-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
6271; GFX6-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
6272; GFX6-NEXT:    v_or_b32_e32 v4, v18, v6
6273; GFX6-NEXT:    v_or_b32_e32 v5, v19, v7
6274; GFX6-NEXT:    v_or_b32_e32 v6, v8, v10
6275; GFX6-NEXT:    v_or_b32_e32 v7, v9, v11
6276; GFX6-NEXT:    s_setpc_b64 s[30:31]
6277;
6278; GFX8-LABEL: v_fshr_v2i128:
6279; GFX8:       ; %bb.0:
6280; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6281; GFX8-NEXT:    s_movk_i32 s6, 0x7f
6282; GFX8-NEXT:    v_xor_b32_e32 v17, -1, v16
6283; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6284; GFX8-NEXT:    v_and_b32_e32 v23, s6, v17
6285; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6286; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6287; GFX8-NEXT:    v_or_b32_e32 v2, v2, v17
6288; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, 64, v23
6289; GFX8-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
6290; GFX8-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
6291; GFX8-NEXT:    v_and_b32_e32 v24, s6, v16
6292; GFX8-NEXT:    v_sub_u32_e32 v16, vcc, 64, v24
6293; GFX8-NEXT:    v_or_b32_e32 v21, v17, v21
6294; GFX8-NEXT:    v_or_b32_e32 v22, v18, v22
6295; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
6296; GFX8-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
6297; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
6298; GFX8-NEXT:    v_or_b32_e32 v18, v18, v16
6299; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v23
6300; GFX8-NEXT:    v_or_b32_e32 v19, v19, v17
6301; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
6302; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
6303; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
6304; GFX8-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
6305; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
6306; GFX8-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
6307; GFX8-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6308; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6309; GFX8-NEXT:    v_subrev_u32_e64 v0, s[4:5], 64, v24
6310; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
6311; GFX8-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
6312; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6313; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
6314; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
6315; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
6316; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6317; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
6318; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6319; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
6320; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6321; GFX8-NEXT:    v_or_b32_e32 v0, v25, v2
6322; GFX8-NEXT:    v_or_b32_e32 v2, v17, v8
6323; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v20
6324; GFX8-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
6325; GFX8-NEXT:    v_or_b32_e32 v1, v18, v3
6326; GFX8-NEXT:    v_or_b32_e32 v3, v16, v9
6327; GFX8-NEXT:    v_and_b32_e32 v17, s6, v8
6328; GFX8-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
6329; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
6330; GFX8-NEXT:    v_or_b32_e32 v6, v6, v4
6331; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 64, v17
6332; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
6333; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v17, v[6:7]
6334; GFX8-NEXT:    v_subrev_u32_e32 v18, vcc, 64, v17
6335; GFX8-NEXT:    v_or_b32_e32 v10, v4, v10
6336; GFX8-NEXT:    v_or_b32_e32 v11, v5, v11
6337; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v17, v[8:9]
6338; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v18, v[8:9]
6339; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
6340; GFX8-NEXT:    v_and_b32_e32 v16, s6, v20
6341; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
6342; GFX8-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
6343; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
6344; GFX8-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
6345; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
6346; GFX8-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
6347; GFX8-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
6348; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 64, v16
6349; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v16, v[12:13]
6350; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
6351; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, 64, v16
6352; GFX8-NEXT:    v_or_b32_e32 v11, v4, v6
6353; GFX8-NEXT:    v_or_b32_e32 v17, v5, v7
6354; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v10, v[14:15]
6355; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v16, v[14:15]
6356; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
6357; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
6358; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
6359; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
6360; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
6361; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
6362; GFX8-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
6363; GFX8-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
6364; GFX8-NEXT:    v_or_b32_e32 v4, v18, v6
6365; GFX8-NEXT:    v_or_b32_e32 v5, v19, v7
6366; GFX8-NEXT:    v_or_b32_e32 v6, v8, v10
6367; GFX8-NEXT:    v_or_b32_e32 v7, v9, v11
6368; GFX8-NEXT:    s_setpc_b64 s[30:31]
6369;
6370; GFX9-LABEL: v_fshr_v2i128:
6371; GFX9:       ; %bb.0:
6372; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6373; GFX9-NEXT:    s_movk_i32 s6, 0x7f
6374; GFX9-NEXT:    v_xor_b32_e32 v17, -1, v16
6375; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6376; GFX9-NEXT:    v_and_b32_e32 v23, s6, v17
6377; GFX9-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6378; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6379; GFX9-NEXT:    v_or_b32_e32 v2, v2, v17
6380; GFX9-NEXT:    v_sub_u32_e32 v17, 64, v23
6381; GFX9-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
6382; GFX9-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
6383; GFX9-NEXT:    v_and_b32_e32 v24, s6, v16
6384; GFX9-NEXT:    v_sub_u32_e32 v16, 64, v24
6385; GFX9-NEXT:    v_or_b32_e32 v21, v17, v21
6386; GFX9-NEXT:    v_or_b32_e32 v22, v18, v22
6387; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
6388; GFX9-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
6389; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
6390; GFX9-NEXT:    v_or_b32_e32 v18, v18, v16
6391; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v23
6392; GFX9-NEXT:    v_or_b32_e32 v19, v19, v17
6393; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
6394; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
6395; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
6396; GFX9-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
6397; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
6398; GFX9-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
6399; GFX9-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6400; GFX9-NEXT:    v_subrev_u32_e32 v0, 64, v24
6401; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6402; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
6403; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
6404; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6405; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
6406; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
6407; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
6408; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6409; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
6410; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6411; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
6412; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6413; GFX9-NEXT:    v_or_b32_e32 v0, v25, v2
6414; GFX9-NEXT:    v_or_b32_e32 v2, v17, v8
6415; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v20
6416; GFX9-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
6417; GFX9-NEXT:    v_or_b32_e32 v1, v18, v3
6418; GFX9-NEXT:    v_or_b32_e32 v3, v16, v9
6419; GFX9-NEXT:    v_and_b32_e32 v17, s6, v8
6420; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
6421; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
6422; GFX9-NEXT:    v_or_b32_e32 v6, v6, v4
6423; GFX9-NEXT:    v_sub_u32_e32 v4, 64, v17
6424; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
6425; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v17, v[6:7]
6426; GFX9-NEXT:    v_subrev_u32_e32 v18, 64, v17
6427; GFX9-NEXT:    v_or_b32_e32 v10, v4, v10
6428; GFX9-NEXT:    v_or_b32_e32 v11, v5, v11
6429; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v17, v[8:9]
6430; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v18, v[8:9]
6431; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
6432; GFX9-NEXT:    v_and_b32_e32 v16, s6, v20
6433; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
6434; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
6435; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
6436; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
6437; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
6438; GFX9-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
6439; GFX9-NEXT:    v_sub_u32_e32 v6, 64, v16
6440; GFX9-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
6441; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v16, v[12:13]
6442; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
6443; GFX9-NEXT:    v_subrev_u32_e32 v10, 64, v16
6444; GFX9-NEXT:    v_or_b32_e32 v11, v4, v6
6445; GFX9-NEXT:    v_or_b32_e32 v17, v5, v7
6446; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v10, v[14:15]
6447; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v16, v[14:15]
6448; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
6449; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
6450; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
6451; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
6452; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
6453; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
6454; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
6455; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
6456; GFX9-NEXT:    v_or_b32_e32 v4, v18, v6
6457; GFX9-NEXT:    v_or_b32_e32 v5, v19, v7
6458; GFX9-NEXT:    v_or_b32_e32 v6, v8, v10
6459; GFX9-NEXT:    v_or_b32_e32 v7, v9, v11
6460; GFX9-NEXT:    s_setpc_b64 s[30:31]
6461;
6462; GFX10-LABEL: v_fshr_v2i128:
6463; GFX10:       ; %bb.0:
6464; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6465; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6466; GFX10-NEXT:    v_xor_b32_e32 v17, -1, v16
6467; GFX10-NEXT:    s_movk_i32 s5, 0x7f
6468; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6469; GFX10-NEXT:    v_and_b32_e32 v26, s5, v16
6470; GFX10-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
6471; GFX10-NEXT:    v_and_b32_e32 v25, s5, v17
6472; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6473; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6474; GFX10-NEXT:    v_subrev_nc_u32_e32 v27, 64, v26
6475; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v26
6476; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
6477; GFX10-NEXT:    v_or_b32_e32 v2, v2, v17
6478; GFX10-NEXT:    v_subrev_nc_u32_e32 v19, 64, v25
6479; GFX10-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
6480; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
6481; GFX10-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
6482; GFX10-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
6483; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
6484; GFX10-NEXT:    v_cndmask_b32_e32 v23, 0, v23, vcc_lo
6485; GFX10-NEXT:    v_cndmask_b32_e32 v24, 0, v24, vcc_lo
6486; GFX10-NEXT:    v_or_b32_e32 v22, v18, v22
6487; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
6488; GFX10-NEXT:    v_or_b32_e32 v21, v17, v21
6489; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v26, v[8:9]
6490; GFX10-NEXT:    v_cndmask_b32_e32 v22, v1, v22, vcc_lo
6491; GFX10-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
6492; GFX10-NEXT:    v_cndmask_b32_e32 v21, v0, v21, vcc_lo
6493; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
6494; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
6495; GFX10-NEXT:    v_or_b32_e32 v16, v16, v18
6496; GFX10-NEXT:    v_or_b32_e32 v17, v17, v19
6497; GFX10-NEXT:    v_cndmask_b32_e32 v18, v21, v2, vcc_lo
6498; GFX10-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
6499; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
6500; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s4
6501; GFX10-NEXT:    v_xor_b32_e32 v16, -1, v20
6502; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s4
6503; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v26, v[10:11]
6504; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
6505; GFX10-NEXT:    v_and_b32_e32 v25, s5, v16
6506; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 31, v5
6507; GFX10-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
6508; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
6509; GFX10-NEXT:    v_or_b32_e32 v0, v23, v0
6510; GFX10-NEXT:    v_sub_nc_u32_e32 v9, 64, v25
6511; GFX10-NEXT:    v_or_b32_e32 v6, v6, v8
6512; GFX10-NEXT:    v_and_b32_e32 v23, s5, v20
6513; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s4
6514; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v3, s4
6515; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
6516; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v25, v[6:7]
6517; GFX10-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
6518; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 64, v25
6519; GFX10-NEXT:    v_or_b32_e32 v2, v18, v2
6520; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v25, v[4:5]
6521; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
6522; GFX10-NEXT:    v_or_b32_e32 v10, v8, v10
6523; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, 64, v23
6524; GFX10-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
6525; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
6526; GFX10-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
6527; GFX10-NEXT:    v_or_b32_e32 v5, v9, v11
6528; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v8, v[14:15]
6529; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v23
6530; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0, v16, vcc_lo
6531; GFX10-NEXT:    v_or_b32_e32 v16, v18, v20
6532; GFX10-NEXT:    v_or_b32_e32 v18, v19, v21
6533; GFX10-NEXT:    v_cndmask_b32_e32 v10, v3, v10, vcc_lo
6534; GFX10-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
6535; GFX10-NEXT:    v_lshrrev_b64 v[3:4], v23, v[14:15]
6536; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s4
6537; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v23
6538; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v25
6539; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v18, s4
6540; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
6541; GFX10-NEXT:    v_or_b32_e32 v1, v24, v1
6542; GFX10-NEXT:    v_cndmask_b32_e64 v6, v10, v6, s6
6543; GFX10-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s6
6544; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, v12, s5
6545; GFX10-NEXT:    v_cndmask_b32_e64 v8, v9, v13, s5
6546; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s4
6547; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s4
6548; GFX10-NEXT:    v_or_b32_e32 v3, v22, v26
6549; GFX10-NEXT:    v_or_b32_e32 v4, v11, v5
6550; GFX10-NEXT:    v_or_b32_e32 v5, v14, v8
6551; GFX10-NEXT:    v_or_b32_e32 v6, v6, v9
6552; GFX10-NEXT:    v_or_b32_e32 v7, v7, v10
6553; GFX10-NEXT:    s_setpc_b64 s[30:31]
6554  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
6555  ret <2 x i128> %result
6556}
6557
6558declare i7 @llvm.fshr.i7(i7, i7, i7) #0
6559declare i8 @llvm.fshr.i8(i8, i8, i8) #0
6560declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
6561declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
6562
6563declare i16 @llvm.fshr.i16(i16, i16, i16) #0
6564declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
6565declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
6566declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
6567declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
6568declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
6569declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
6570
6571declare i24 @llvm.fshr.i24(i24, i24, i24) #0
6572declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
6573
6574declare i32 @llvm.fshr.i32(i32, i32, i32) #0
6575declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
6576declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
6577declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
6578declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
6579declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
6580
6581declare i48 @llvm.fshr.i48(i48, i48, i48) #0
6582
6583declare i64 @llvm.fshr.i64(i64, i64, i64) #0
6584declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
6585
6586declare i128 @llvm.fshr.i128(i128, i128, i128) #0
6587declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
6588
6589attributes #0 = { nounwind readnone speculatable willreturn }
6590