1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
4
5define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
6; GCN-LABEL: s_test_sdiv:
7; GCN:       ; %bb.0:
8; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
9; GCN-NEXT:    s_mov_b32 s7, 0xf000
10; GCN-NEXT:    s_mov_b32 s6, -1
11; GCN-NEXT:    s_waitcnt lgkmcnt(0)
12; GCN-NEXT:    s_ashr_i32 s8, s3, 31
13; GCN-NEXT:    s_add_u32 s2, s2, s8
14; GCN-NEXT:    s_mov_b32 s9, s8
15; GCN-NEXT:    s_addc_u32 s3, s3, s8
16; GCN-NEXT:    s_xor_b64 s[10:11], s[2:3], s[8:9]
17; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s10
18; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s11
19; GCN-NEXT:    s_sub_u32 s4, 0, s10
20; GCN-NEXT:    s_subb_u32 s5, 0, s11
21; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
22; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
23; GCN-NEXT:    v_rcp_f32_e32 v0, v0
24; GCN-NEXT:    s_waitcnt lgkmcnt(0)
25; GCN-NEXT:    s_ashr_i32 s12, s3, 31
26; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
27; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
28; GCN-NEXT:    v_trunc_f32_e32 v1, v1
29; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
30; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
31; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
32; GCN-NEXT:    s_add_u32 s2, s2, s12
33; GCN-NEXT:    s_mov_b32 s13, s12
34; GCN-NEXT:    v_mul_lo_u32 v2, s4, v1
35; GCN-NEXT:    v_mul_hi_u32 v3, s4, v0
36; GCN-NEXT:    v_mul_lo_u32 v5, s5, v0
37; GCN-NEXT:    v_mul_lo_u32 v4, s4, v0
38; GCN-NEXT:    s_addc_u32 s3, s3, s12
39; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
40; GCN-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
41; GCN-NEXT:    v_mul_hi_u32 v3, v0, v4
42; GCN-NEXT:    v_mul_lo_u32 v5, v0, v2
43; GCN-NEXT:    v_mul_hi_u32 v7, v0, v2
44; GCN-NEXT:    v_mul_lo_u32 v6, v1, v4
45; GCN-NEXT:    v_mul_hi_u32 v4, v1, v4
46; GCN-NEXT:    v_mul_hi_u32 v8, v1, v2
47; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
48; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
49; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
50; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
51; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
52; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v8, vcc
53; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
54; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
55; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
56; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
57; GCN-NEXT:    v_mul_lo_u32 v2, s4, v1
58; GCN-NEXT:    v_mul_hi_u32 v3, s4, v0
59; GCN-NEXT:    v_mul_lo_u32 v4, s5, v0
60; GCN-NEXT:    s_xor_b64 s[2:3], s[2:3], s[12:13]
61; GCN-NEXT:    s_mov_b32 s5, s1
62; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
63; GCN-NEXT:    v_mul_lo_u32 v3, s4, v0
64; GCN-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
65; GCN-NEXT:    v_mul_lo_u32 v6, v0, v2
66; GCN-NEXT:    v_mul_hi_u32 v7, v0, v3
67; GCN-NEXT:    v_mul_hi_u32 v8, v0, v2
68; GCN-NEXT:    v_mul_hi_u32 v5, v1, v3
69; GCN-NEXT:    v_mul_lo_u32 v3, v1, v3
70; GCN-NEXT:    v_mul_hi_u32 v4, v1, v2
71; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
72; GCN-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
73; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
74; GCN-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
75; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v7, v5, vcc
76; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
77; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
78; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
79; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
80; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
81; GCN-NEXT:    v_mul_lo_u32 v2, s2, v1
82; GCN-NEXT:    v_mul_hi_u32 v3, s2, v0
83; GCN-NEXT:    v_mul_hi_u32 v4, s2, v1
84; GCN-NEXT:    v_mul_hi_u32 v5, s3, v1
85; GCN-NEXT:    v_mul_lo_u32 v1, s3, v1
86; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
87; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
88; GCN-NEXT:    v_mul_lo_u32 v4, s3, v0
89; GCN-NEXT:    v_mul_hi_u32 v0, s3, v0
90; GCN-NEXT:    s_mov_b32 s4, s0
91; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
92; GCN-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
93; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
94; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
95; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
96; GCN-NEXT:    v_mul_lo_u32 v2, s10, v1
97; GCN-NEXT:    v_mul_hi_u32 v3, s10, v0
98; GCN-NEXT:    v_mul_lo_u32 v4, s11, v0
99; GCN-NEXT:    v_mov_b32_e32 v5, s11
100; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
101; GCN-NEXT:    v_mul_lo_u32 v3, s10, v0
102; GCN-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
103; GCN-NEXT:    v_sub_i32_e32 v4, vcc, s3, v2
104; GCN-NEXT:    v_sub_i32_e32 v3, vcc, s2, v3
105; GCN-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
106; GCN-NEXT:    v_subrev_i32_e64 v5, s[0:1], s10, v3
107; GCN-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
108; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v4
109; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
110; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v5
111; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
112; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v4
113; GCN-NEXT:    v_cndmask_b32_e64 v4, v6, v5, s[0:1]
114; GCN-NEXT:    v_add_i32_e64 v5, s[0:1], 2, v0
115; GCN-NEXT:    v_addc_u32_e64 v6, s[0:1], 0, v1, s[0:1]
116; GCN-NEXT:    v_add_i32_e64 v7, s[0:1], 1, v0
117; GCN-NEXT:    v_addc_u32_e64 v8, s[0:1], 0, v1, s[0:1]
118; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v4
119; GCN-NEXT:    v_cndmask_b32_e64 v4, v8, v6, s[0:1]
120; GCN-NEXT:    v_mov_b32_e32 v6, s3
121; GCN-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
122; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s11, v2
123; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
124; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
125; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
126; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v2
127; GCN-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
128; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
129; GCN-NEXT:    v_cndmask_b32_e64 v2, v7, v5, s[0:1]
130; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
131; GCN-NEXT:    s_xor_b64 s[0:1], s[12:13], s[8:9]
132; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
133; GCN-NEXT:    v_xor_b32_e32 v0, s0, v0
134; GCN-NEXT:    v_xor_b32_e32 v1, s1, v1
135; GCN-NEXT:    v_mov_b32_e32 v2, s1
136; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
137; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
138; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
139; GCN-NEXT:    s_endpgm
140;
141; GCN-IR-LABEL: s_test_sdiv:
142; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
143; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
144; GCN-IR-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
145; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
146; GCN-IR-NEXT:    s_ashr_i32 s0, s7, 31
147; GCN-IR-NEXT:    s_mov_b32 s1, s0
148; GCN-IR-NEXT:    s_ashr_i32 s2, s9, 31
149; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[0:1], s[6:7]
150; GCN-IR-NEXT:    s_mov_b32 s3, s2
151; GCN-IR-NEXT:    s_sub_u32 s12, s6, s0
152; GCN-IR-NEXT:    s_subb_u32 s13, s7, s0
153; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[2:3], s[8:9]
154; GCN-IR-NEXT:    s_sub_u32 s6, s6, s2
155; GCN-IR-NEXT:    s_subb_u32 s7, s7, s2
156; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[6:7], 0
157; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[14:15], s[12:13], 0
158; GCN-IR-NEXT:    s_mov_b64 s[8:9], 0
159; GCN-IR-NEXT:    s_or_b64 s[16:17], s[10:11], s[14:15]
160; GCN-IR-NEXT:    s_flbit_i32_b32 s10, s6
161; GCN-IR-NEXT:    s_add_i32 s10, s10, 32
162; GCN-IR-NEXT:    s_flbit_i32_b32 s11, s7
163; GCN-IR-NEXT:    s_min_u32 s14, s10, s11
164; GCN-IR-NEXT:    s_flbit_i32_b32 s10, s12
165; GCN-IR-NEXT:    s_add_i32 s10, s10, 32
166; GCN-IR-NEXT:    s_flbit_i32_b32 s11, s13
167; GCN-IR-NEXT:    s_min_u32 s18, s10, s11
168; GCN-IR-NEXT:    s_sub_u32 s10, s14, s18
169; GCN-IR-NEXT:    s_subb_u32 s11, 0, 0
170; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[20:21], s[10:11], 63
171; GCN-IR-NEXT:    s_mov_b32 s15, 0
172; GCN-IR-NEXT:    s_or_b64 s[16:17], s[16:17], s[20:21]
173; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[20:21], s[10:11], 63
174; GCN-IR-NEXT:    s_xor_b64 s[22:23], s[16:17], -1
175; GCN-IR-NEXT:    s_and_b64 s[20:21], s[22:23], s[20:21]
176; GCN-IR-NEXT:    s_and_b64 vcc, exec, s[20:21]
177; GCN-IR-NEXT:    s_cbranch_vccz .LBB0_5
178; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
179; GCN-IR-NEXT:    s_add_u32 s16, s10, 1
180; GCN-IR-NEXT:    s_addc_u32 s17, s11, 0
181; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[20:21], s[16:17], 0
182; GCN-IR-NEXT:    s_sub_i32 s10, 63, s10
183; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[20:21]
184; GCN-IR-NEXT:    s_lshl_b64 s[10:11], s[12:13], s10
185; GCN-IR-NEXT:    s_cbranch_vccz .LBB0_4
186; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
187; GCN-IR-NEXT:    s_lshr_b64 s[16:17], s[12:13], s16
188; GCN-IR-NEXT:    s_add_u32 s19, s6, -1
189; GCN-IR-NEXT:    s_addc_u32 s20, s7, -1
190; GCN-IR-NEXT:    s_not_b64 s[8:9], s[14:15]
191; GCN-IR-NEXT:    s_add_u32 s12, s8, s18
192; GCN-IR-NEXT:    s_addc_u32 s13, s9, 0
193; GCN-IR-NEXT:    s_mov_b64 s[14:15], 0
194; GCN-IR-NEXT:    s_mov_b32 s9, 0
195; GCN-IR-NEXT:  .LBB0_3: ; %udiv-do-while
196; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
197; GCN-IR-NEXT:    s_lshl_b64 s[16:17], s[16:17], 1
198; GCN-IR-NEXT:    s_lshr_b32 s8, s11, 31
199; GCN-IR-NEXT:    s_lshl_b64 s[10:11], s[10:11], 1
200; GCN-IR-NEXT:    s_or_b64 s[16:17], s[16:17], s[8:9]
201; GCN-IR-NEXT:    s_or_b64 s[10:11], s[14:15], s[10:11]
202; GCN-IR-NEXT:    s_sub_u32 s8, s19, s16
203; GCN-IR-NEXT:    s_subb_u32 s8, s20, s17
204; GCN-IR-NEXT:    s_ashr_i32 s14, s8, 31
205; GCN-IR-NEXT:    s_mov_b32 s15, s14
206; GCN-IR-NEXT:    s_and_b32 s8, s14, 1
207; GCN-IR-NEXT:    s_and_b64 s[14:15], s[14:15], s[6:7]
208; GCN-IR-NEXT:    s_sub_u32 s16, s16, s14
209; GCN-IR-NEXT:    s_subb_u32 s17, s17, s15
210; GCN-IR-NEXT:    s_add_u32 s12, s12, 1
211; GCN-IR-NEXT:    s_addc_u32 s13, s13, 0
212; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[22:23], s[12:13], 0
213; GCN-IR-NEXT:    s_mov_b64 s[14:15], s[8:9]
214; GCN-IR-NEXT:    s_and_b64 vcc, exec, s[22:23]
215; GCN-IR-NEXT:    s_cbranch_vccz .LBB0_3
216; GCN-IR-NEXT:  .LBB0_4: ; %Flow6
217; GCN-IR-NEXT:    s_lshl_b64 s[6:7], s[10:11], 1
218; GCN-IR-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
219; GCN-IR-NEXT:    v_mov_b32_e32 v0, s6
220; GCN-IR-NEXT:    v_mov_b32_e32 v1, s7
221; GCN-IR-NEXT:    s_branch .LBB0_6
222; GCN-IR-NEXT:  .LBB0_5:
223; GCN-IR-NEXT:    v_mov_b32_e32 v0, s13
224; GCN-IR-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[16:17]
225; GCN-IR-NEXT:    v_mov_b32_e32 v0, s12
226; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[16:17]
227; GCN-IR-NEXT:  .LBB0_6: ; %udiv-end
228; GCN-IR-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
229; GCN-IR-NEXT:    v_xor_b32_e32 v0, s0, v0
230; GCN-IR-NEXT:    v_xor_b32_e32 v1, s1, v1
231; GCN-IR-NEXT:    v_mov_b32_e32 v2, s1
232; GCN-IR-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
233; GCN-IR-NEXT:    s_mov_b32 s7, 0xf000
234; GCN-IR-NEXT:    s_mov_b32 s6, -1
235; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
236; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
237; GCN-IR-NEXT:    s_endpgm
238  %result = sdiv i64 %x, %y
239  store i64 %result, i64 addrspace(1)* %out
240  ret void
241}
242
243define i64 @v_test_sdiv(i64 %x, i64 %y) {
244; GCN-LABEL: v_test_sdiv:
245; GCN:       ; %bb.0:
246; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247; GCN-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
248; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
249; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
250; GCN-NEXT:    v_xor_b32_e32 v3, v3, v4
251; GCN-NEXT:    v_xor_b32_e32 v2, v2, v4
252; GCN-NEXT:    v_cvt_f32_u32_e32 v5, v2
253; GCN-NEXT:    v_cvt_f32_u32_e32 v6, v3
254; GCN-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
255; GCN-NEXT:    v_subb_u32_e32 v8, vcc, 0, v3, vcc
256; GCN-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
257; GCN-NEXT:    v_rcp_f32_e32 v5, v5
258; GCN-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
259; GCN-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
260; GCN-NEXT:    v_trunc_f32_e32 v6, v6
261; GCN-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
262; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v5
263; GCN-NEXT:    v_cvt_u32_f32_e32 v6, v6
264; GCN-NEXT:    v_mul_hi_u32 v9, v7, v5
265; GCN-NEXT:    v_mul_lo_u32 v10, v7, v6
266; GCN-NEXT:    v_mul_lo_u32 v11, v8, v5
267; GCN-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
268; GCN-NEXT:    v_mul_lo_u32 v10, v7, v5
269; GCN-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
270; GCN-NEXT:    v_mul_lo_u32 v11, v5, v9
271; GCN-NEXT:    v_mul_hi_u32 v12, v5, v10
272; GCN-NEXT:    v_mul_hi_u32 v13, v5, v9
273; GCN-NEXT:    v_mul_hi_u32 v14, v6, v9
274; GCN-NEXT:    v_mul_lo_u32 v9, v6, v9
275; GCN-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
276; GCN-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
277; GCN-NEXT:    v_mul_lo_u32 v13, v6, v10
278; GCN-NEXT:    v_mul_hi_u32 v10, v6, v10
279; GCN-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
280; GCN-NEXT:    v_addc_u32_e32 v10, vcc, v12, v10, vcc
281; GCN-NEXT:    v_addc_u32_e32 v11, vcc, 0, v14, vcc
282; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
283; GCN-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
284; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
285; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
286; GCN-NEXT:    v_mul_lo_u32 v9, v7, v6
287; GCN-NEXT:    v_mul_hi_u32 v10, v7, v5
288; GCN-NEXT:    v_mul_lo_u32 v8, v8, v5
289; GCN-NEXT:    v_mul_lo_u32 v7, v7, v5
290; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
291; GCN-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
292; GCN-NEXT:    v_mul_lo_u32 v11, v5, v8
293; GCN-NEXT:    v_mul_hi_u32 v12, v5, v7
294; GCN-NEXT:    v_mul_hi_u32 v13, v5, v8
295; GCN-NEXT:    v_mul_hi_u32 v10, v6, v7
296; GCN-NEXT:    v_mul_lo_u32 v7, v6, v7
297; GCN-NEXT:    v_mul_hi_u32 v9, v6, v8
298; GCN-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
299; GCN-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
300; GCN-NEXT:    v_mul_lo_u32 v8, v6, v8
301; GCN-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
302; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v12, v10, vcc
303; GCN-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
304; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
305; GCN-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
306; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
307; GCN-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
308; GCN-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
309; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v7
310; GCN-NEXT:    v_xor_b32_e32 v0, v0, v7
311; GCN-NEXT:    v_mul_lo_u32 v8, v0, v6
312; GCN-NEXT:    v_mul_hi_u32 v9, v0, v5
313; GCN-NEXT:    v_mul_hi_u32 v10, v0, v6
314; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
315; GCN-NEXT:    v_xor_b32_e32 v1, v1, v7
316; GCN-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
317; GCN-NEXT:    v_addc_u32_e32 v9, vcc, 0, v10, vcc
318; GCN-NEXT:    v_mul_lo_u32 v10, v1, v5
319; GCN-NEXT:    v_mul_hi_u32 v5, v1, v5
320; GCN-NEXT:    v_mul_hi_u32 v11, v1, v6
321; GCN-NEXT:    v_mul_lo_u32 v6, v1, v6
322; GCN-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
323; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v9, v5, vcc
324; GCN-NEXT:    v_addc_u32_e32 v8, vcc, 0, v11, vcc
325; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
326; GCN-NEXT:    v_addc_u32_e32 v6, vcc, 0, v8, vcc
327; GCN-NEXT:    v_mul_lo_u32 v8, v2, v6
328; GCN-NEXT:    v_mul_hi_u32 v9, v2, v5
329; GCN-NEXT:    v_mul_lo_u32 v10, v3, v5
330; GCN-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
331; GCN-NEXT:    v_mul_lo_u32 v9, v2, v5
332; GCN-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
333; GCN-NEXT:    v_sub_i32_e32 v10, vcc, v1, v8
334; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v9
335; GCN-NEXT:    v_subb_u32_e64 v9, s[4:5], v10, v3, vcc
336; GCN-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v2
337; GCN-NEXT:    v_subbrev_u32_e64 v9, s[4:5], 0, v9, s[4:5]
338; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v3
339; GCN-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
340; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v2
341; GCN-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
342; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v3
343; GCN-NEXT:    v_cndmask_b32_e64 v9, v11, v10, s[4:5]
344; GCN-NEXT:    v_add_i32_e64 v10, s[4:5], 2, v5
345; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
346; GCN-NEXT:    v_addc_u32_e64 v11, s[4:5], 0, v6, s[4:5]
347; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
348; GCN-NEXT:    v_add_i32_e64 v12, s[4:5], 1, v5
349; GCN-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
350; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
351; GCN-NEXT:    v_addc_u32_e64 v13, s[4:5], 0, v6, s[4:5]
352; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
353; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
354; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
355; GCN-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
356; GCN-NEXT:    v_cndmask_b32_e64 v9, v13, v11, s[4:5]
357; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
358; GCN-NEXT:    v_cndmask_b32_e64 v1, v12, v10, s[4:5]
359; GCN-NEXT:    v_cndmask_b32_e32 v0, v6, v9, vcc
360; GCN-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
361; GCN-NEXT:    v_xor_b32_e32 v2, v7, v4
362; GCN-NEXT:    v_xor_b32_e32 v3, v0, v2
363; GCN-NEXT:    v_xor_b32_e32 v0, v1, v2
364; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
365; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v3, v2, vcc
366; GCN-NEXT:    s_setpc_b64 s[30:31]
367;
368; GCN-IR-LABEL: v_test_sdiv:
369; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
370; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; GCN-IR-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
372; GCN-IR-NEXT:    v_xor_b32_e32 v0, v4, v0
373; GCN-IR-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
374; GCN-IR-NEXT:    v_xor_b32_e32 v1, v4, v1
375; GCN-IR-NEXT:    v_sub_i32_e32 v11, vcc, v0, v4
376; GCN-IR-NEXT:    v_subb_u32_e32 v12, vcc, v1, v4, vcc
377; GCN-IR-NEXT:    v_xor_b32_e32 v1, v5, v2
378; GCN-IR-NEXT:    v_xor_b32_e32 v0, v5, v3
379; GCN-IR-NEXT:    v_sub_i32_e32 v2, vcc, v1, v5
380; GCN-IR-NEXT:    v_subb_u32_e32 v3, vcc, v0, v5, vcc
381; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
382; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[11:12]
383; GCN-IR-NEXT:    v_ffbh_u32_e32 v0, v2
384; GCN-IR-NEXT:    s_or_b64 s[6:7], vcc, s[4:5]
385; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
386; GCN-IR-NEXT:    v_ffbh_u32_e32 v7, v3
387; GCN-IR-NEXT:    v_min_u32_e32 v0, v0, v7
388; GCN-IR-NEXT:    v_ffbh_u32_e32 v7, v11
389; GCN-IR-NEXT:    v_add_i32_e32 v7, vcc, 32, v7
390; GCN-IR-NEXT:    v_ffbh_u32_e32 v8, v12
391; GCN-IR-NEXT:    v_min_u32_e32 v13, v7, v8
392; GCN-IR-NEXT:    v_sub_i32_e32 v7, vcc, v0, v13
393; GCN-IR-NEXT:    v_subb_u32_e64 v8, s[4:5], 0, 0, vcc
394; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[7:8]
395; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[4:5], 63, v[7:8]
396; GCN-IR-NEXT:    s_or_b64 s[6:7], s[6:7], vcc
397; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[6:7], -1
398; GCN-IR-NEXT:    v_mov_b32_e32 v6, v4
399; GCN-IR-NEXT:    v_mov_b32_e32 v1, v5
400; GCN-IR-NEXT:    v_cndmask_b32_e64 v10, v12, 0, s[6:7]
401; GCN-IR-NEXT:    s_and_b64 s[4:5], s[8:9], s[4:5]
402; GCN-IR-NEXT:    v_cndmask_b32_e64 v9, v11, 0, s[6:7]
403; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
404; GCN-IR-NEXT:    s_cbranch_execz .LBB1_6
405; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
406; GCN-IR-NEXT:    v_add_i32_e32 v14, vcc, 1, v7
407; GCN-IR-NEXT:    v_addc_u32_e32 v15, vcc, 0, v8, vcc
408; GCN-IR-NEXT:    v_sub_i32_e64 v7, s[4:5], 63, v7
409; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
410; GCN-IR-NEXT:    v_lshl_b64 v[7:8], v[11:12], v7
411; GCN-IR-NEXT:    v_mov_b32_e32 v9, 0
412; GCN-IR-NEXT:    v_mov_b32_e32 v10, 0
413; GCN-IR-NEXT:    s_and_saveexec_b64 s[4:5], vcc
414; GCN-IR-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
415; GCN-IR-NEXT:    s_cbranch_execz .LBB1_5
416; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
417; GCN-IR-NEXT:    v_add_i32_e32 v18, vcc, -1, v2
418; GCN-IR-NEXT:    v_addc_u32_e32 v19, vcc, -1, v3, vcc
419; GCN-IR-NEXT:    v_not_b32_e32 v0, v0
420; GCN-IR-NEXT:    v_lshr_b64 v[14:15], v[11:12], v14
421; GCN-IR-NEXT:    v_not_b32_e32 v9, 0
422; GCN-IR-NEXT:    v_add_i32_e32 v11, vcc, v0, v13
423; GCN-IR-NEXT:    v_mov_b32_e32 v16, 0
424; GCN-IR-NEXT:    v_addc_u32_e32 v12, vcc, 0, v9, vcc
425; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
426; GCN-IR-NEXT:    v_mov_b32_e32 v17, 0
427; GCN-IR-NEXT:    v_mov_b32_e32 v10, 0
428; GCN-IR-NEXT:  .LBB1_3: ; %udiv-do-while
429; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
430; GCN-IR-NEXT:    v_lshl_b64 v[14:15], v[14:15], 1
431; GCN-IR-NEXT:    v_lshrrev_b32_e32 v0, 31, v8
432; GCN-IR-NEXT:    v_or_b32_e32 v0, v14, v0
433; GCN-IR-NEXT:    v_sub_i32_e32 v9, vcc, v18, v0
434; GCN-IR-NEXT:    v_lshl_b64 v[7:8], v[7:8], 1
435; GCN-IR-NEXT:    v_subb_u32_e32 v9, vcc, v19, v15, vcc
436; GCN-IR-NEXT:    v_ashrrev_i32_e32 v13, 31, v9
437; GCN-IR-NEXT:    v_add_i32_e32 v11, vcc, 1, v11
438; GCN-IR-NEXT:    v_or_b32_e32 v7, v16, v7
439; GCN-IR-NEXT:    v_and_b32_e32 v9, 1, v13
440; GCN-IR-NEXT:    v_and_b32_e32 v16, v13, v3
441; GCN-IR-NEXT:    v_and_b32_e32 v13, v13, v2
442; GCN-IR-NEXT:    v_addc_u32_e32 v12, vcc, 0, v12, vcc
443; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[11:12]
444; GCN-IR-NEXT:    v_sub_i32_e64 v14, s[4:5], v0, v13
445; GCN-IR-NEXT:    v_or_b32_e32 v8, v17, v8
446; GCN-IR-NEXT:    v_subb_u32_e64 v15, s[4:5], v15, v16, s[4:5]
447; GCN-IR-NEXT:    v_mov_b32_e32 v17, v10
448; GCN-IR-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
449; GCN-IR-NEXT:    v_mov_b32_e32 v16, v9
450; GCN-IR-NEXT:    s_andn2_b64 exec, exec, s[10:11]
451; GCN-IR-NEXT:    s_cbranch_execnz .LBB1_3
452; GCN-IR-NEXT:  ; %bb.4: ; %Flow
453; GCN-IR-NEXT:    s_or_b64 exec, exec, s[10:11]
454; GCN-IR-NEXT:  .LBB1_5: ; %Flow3
455; GCN-IR-NEXT:    s_or_b64 exec, exec, s[8:9]
456; GCN-IR-NEXT:    v_lshl_b64 v[2:3], v[7:8], 1
457; GCN-IR-NEXT:    v_or_b32_e32 v10, v10, v3
458; GCN-IR-NEXT:    v_or_b32_e32 v9, v9, v2
459; GCN-IR-NEXT:  .LBB1_6: ; %Flow4
460; GCN-IR-NEXT:    s_or_b64 exec, exec, s[6:7]
461; GCN-IR-NEXT:    v_xor_b32_e32 v0, v5, v4
462; GCN-IR-NEXT:    v_xor_b32_e32 v1, v1, v6
463; GCN-IR-NEXT:    v_xor_b32_e32 v3, v9, v0
464; GCN-IR-NEXT:    v_xor_b32_e32 v2, v10, v1
465; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, v3, v0
466; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v2, v1, vcc
467; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
468  %result = sdiv i64 %x, %y
469  ret i64 %result
470}
471
472define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
473; GCN-LABEL: s_test_sdiv24_64:
474; GCN:       ; %bb.0:
475; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
476; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
477; GCN-NEXT:    s_mov_b32 s3, 0xf000
478; GCN-NEXT:    s_mov_b32 s2, -1
479; GCN-NEXT:    s_waitcnt lgkmcnt(0)
480; GCN-NEXT:    s_mov_b32 s0, s4
481; GCN-NEXT:    s_ashr_i64 s[8:9], s[0:1], 40
482; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s8
483; GCN-NEXT:    s_mov_b32 s1, s5
484; GCN-NEXT:    s_ashr_i64 s[4:5], s[6:7], 40
485; GCN-NEXT:    v_cvt_f32_i32_e32 v1, s4
486; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
487; GCN-NEXT:    s_xor_b32 s4, s4, s8
488; GCN-NEXT:    s_ashr_i32 s4, s4, 30
489; GCN-NEXT:    s_or_b32 s4, s4, 1
490; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
491; GCN-NEXT:    v_trunc_f32_e32 v2, v2
492; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
493; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
494; GCN-NEXT:    v_mov_b32_e32 v3, s4
495; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
496; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
497; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
498; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
499; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
500; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
501; GCN-NEXT:    s_endpgm
502;
503; GCN-IR-LABEL: s_test_sdiv24_64:
504; GCN-IR:       ; %bb.0:
505; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
506; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
507; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
508; GCN-IR-NEXT:    s_mov_b32 s2, -1
509; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
510; GCN-IR-NEXT:    s_mov_b32 s0, s4
511; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[0:1], 40
512; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s8
513; GCN-IR-NEXT:    s_mov_b32 s1, s5
514; GCN-IR-NEXT:    s_ashr_i64 s[4:5], s[6:7], 40
515; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, s4
516; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
517; GCN-IR-NEXT:    s_xor_b32 s4, s4, s8
518; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
519; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
520; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
521; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
522; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
523; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
524; GCN-IR-NEXT:    v_mov_b32_e32 v3, s4
525; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
526; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
527; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
528; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
529; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
530; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
531; GCN-IR-NEXT:    s_endpgm
532  %1 = ashr i64 %x, 40
533  %2 = ashr i64 %y, 40
534  %result = sdiv i64 %1, %2
535  store i64 %result, i64 addrspace(1)* %out
536  ret void
537}
538
539define i64 @v_test_sdiv24_64(i64 %x, i64 %y) {
540; GCN-LABEL: v_test_sdiv24_64:
541; GCN:       ; %bb.0:
542; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543; GCN-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
544; GCN-NEXT:    v_cvt_f32_i32_e32 v0, v0
545; GCN-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
546; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
547; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
548; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
549; GCN-NEXT:    v_trunc_f32_e32 v2, v2
550; GCN-NEXT:    v_cvt_i32_f32_e32 v3, v2
551; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
552; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
553; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
554; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 25
555; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
556; GCN-NEXT:    s_setpc_b64 s[30:31]
557;
558; GCN-IR-LABEL: v_test_sdiv24_64:
559; GCN-IR:       ; %bb.0:
560; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561; GCN-IR-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
562; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, v0
563; GCN-IR-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
564; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, v1
565; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
566; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
567; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
568; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v3, v2
569; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
570; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
571; GCN-IR-NEXT:    v_addc_u32_e32 v0, vcc, 0, v3, vcc
572; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 25
573; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
574; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
575  %1 = lshr i64 %x, 40
576  %2 = lshr i64 %y, 40
577  %result = sdiv i64 %1, %2
578  ret i64 %result
579}
580
581define amdgpu_kernel void @s_test_sdiv32_64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
582; GCN-LABEL: s_test_sdiv32_64:
583; GCN:       ; %bb.0:
584; GCN-NEXT:    s_load_dword s8, s[0:1], 0xe
585; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
586; GCN-NEXT:    s_mov_b32 s7, 0xf000
587; GCN-NEXT:    s_mov_b32 s6, -1
588; GCN-NEXT:    s_waitcnt lgkmcnt(0)
589; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s8
590; GCN-NEXT:    v_cvt_f32_i32_e32 v1, s3
591; GCN-NEXT:    s_mov_b32 s4, s0
592; GCN-NEXT:    s_xor_b32 s0, s3, s8
593; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
594; GCN-NEXT:    s_ashr_i32 s0, s0, 30
595; GCN-NEXT:    s_or_b32 s0, s0, 1
596; GCN-NEXT:    v_mov_b32_e32 v3, s0
597; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
598; GCN-NEXT:    v_trunc_f32_e32 v2, v2
599; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
600; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
601; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
602; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
603; GCN-NEXT:    s_mov_b32 s5, s1
604; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
605; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
606; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
607; GCN-NEXT:    s_endpgm
608;
609; GCN-IR-LABEL: s_test_sdiv32_64:
610; GCN-IR:       ; %bb.0:
611; GCN-IR-NEXT:    s_load_dword s8, s[0:1], 0xe
612; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
613; GCN-IR-NEXT:    s_mov_b32 s7, 0xf000
614; GCN-IR-NEXT:    s_mov_b32 s6, -1
615; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
616; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s8
617; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, s3
618; GCN-IR-NEXT:    s_mov_b32 s4, s0
619; GCN-IR-NEXT:    s_xor_b32 s0, s3, s8
620; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
621; GCN-IR-NEXT:    s_ashr_i32 s0, s0, 30
622; GCN-IR-NEXT:    s_or_b32 s0, s0, 1
623; GCN-IR-NEXT:    v_mov_b32_e32 v3, s0
624; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
625; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
626; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
627; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
628; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
629; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
630; GCN-IR-NEXT:    s_mov_b32 s5, s1
631; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
632; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
633; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
634; GCN-IR-NEXT:    s_endpgm
635  %1 = ashr i64 %x, 32
636  %2 = ashr i64 %y, 32
637  %result = sdiv i64 %1, %2
638  store i64 %result, i64 addrspace(1)* %out
639  ret void
640}
641
642define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
643; GCN-LABEL: s_test_sdiv31_64:
644; GCN:       ; %bb.0:
645; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
646; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
647; GCN-NEXT:    s_mov_b32 s3, 0xf000
648; GCN-NEXT:    s_mov_b32 s2, -1
649; GCN-NEXT:    s_waitcnt lgkmcnt(0)
650; GCN-NEXT:    s_mov_b32 s0, s4
651; GCN-NEXT:    s_ashr_i64 s[8:9], s[0:1], 33
652; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s8
653; GCN-NEXT:    s_mov_b32 s1, s5
654; GCN-NEXT:    s_ashr_i64 s[4:5], s[6:7], 33
655; GCN-NEXT:    v_cvt_f32_i32_e32 v1, s4
656; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
657; GCN-NEXT:    s_xor_b32 s4, s4, s8
658; GCN-NEXT:    s_ashr_i32 s4, s4, 30
659; GCN-NEXT:    s_or_b32 s4, s4, 1
660; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
661; GCN-NEXT:    v_trunc_f32_e32 v2, v2
662; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
663; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
664; GCN-NEXT:    v_mov_b32_e32 v3, s4
665; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
666; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
667; GCN-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
668; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 31
669; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
670; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
671; GCN-NEXT:    s_endpgm
672;
673; GCN-IR-LABEL: s_test_sdiv31_64:
674; GCN-IR:       ; %bb.0:
675; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
676; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
677; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
678; GCN-IR-NEXT:    s_mov_b32 s2, -1
679; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
680; GCN-IR-NEXT:    s_mov_b32 s0, s4
681; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[0:1], 33
682; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s8
683; GCN-IR-NEXT:    s_mov_b32 s1, s5
684; GCN-IR-NEXT:    s_ashr_i64 s[4:5], s[6:7], 33
685; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, s4
686; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
687; GCN-IR-NEXT:    s_xor_b32 s4, s4, s8
688; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
689; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
690; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
691; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
692; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
693; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
694; GCN-IR-NEXT:    v_mov_b32_e32 v3, s4
695; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
696; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
697; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
698; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 31
699; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
700; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
701; GCN-IR-NEXT:    s_endpgm
702  %1 = ashr i64 %x, 33
703  %2 = ashr i64 %y, 33
704  %result = sdiv i64 %1, %2
705  store i64 %result, i64 addrspace(1)* %out
706  ret void
707}
708
709define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
710; GCN-LABEL: s_test_sdiv23_64:
711; GCN:       ; %bb.0:
712; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
713; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
714; GCN-NEXT:    s_mov_b32 s3, 0xf000
715; GCN-NEXT:    s_mov_b32 s2, -1
716; GCN-NEXT:    s_waitcnt lgkmcnt(0)
717; GCN-NEXT:    s_mov_b32 s0, s4
718; GCN-NEXT:    s_ashr_i64 s[8:9], s[0:1], 41
719; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s8
720; GCN-NEXT:    s_mov_b32 s1, s5
721; GCN-NEXT:    s_ashr_i64 s[4:5], s[6:7], 41
722; GCN-NEXT:    v_cvt_f32_i32_e32 v1, s4
723; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
724; GCN-NEXT:    s_xor_b32 s4, s4, s8
725; GCN-NEXT:    s_ashr_i32 s4, s4, 30
726; GCN-NEXT:    s_or_b32 s4, s4, 1
727; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
728; GCN-NEXT:    v_trunc_f32_e32 v2, v2
729; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
730; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
731; GCN-NEXT:    v_mov_b32_e32 v3, s4
732; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
733; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
734; GCN-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
735; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 23
736; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
737; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
738; GCN-NEXT:    s_endpgm
739;
740; GCN-IR-LABEL: s_test_sdiv23_64:
741; GCN-IR:       ; %bb.0:
742; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
743; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
744; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
745; GCN-IR-NEXT:    s_mov_b32 s2, -1
746; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
747; GCN-IR-NEXT:    s_mov_b32 s0, s4
748; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[0:1], 41
749; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s8
750; GCN-IR-NEXT:    s_mov_b32 s1, s5
751; GCN-IR-NEXT:    s_ashr_i64 s[4:5], s[6:7], 41
752; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, s4
753; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
754; GCN-IR-NEXT:    s_xor_b32 s4, s4, s8
755; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
756; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
757; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
758; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
759; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
760; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
761; GCN-IR-NEXT:    v_mov_b32_e32 v3, s4
762; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
763; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
764; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
765; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 23
766; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
767; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
768; GCN-IR-NEXT:    s_endpgm
769  %1 = ashr i64 %x, 41
770  %2 = ashr i64 %y, 41
771  %result = sdiv i64 %1, %2
772  store i64 %result, i64 addrspace(1)* %out
773  ret void
774}
775
776define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
777; GCN-LABEL: s_test_sdiv25_64:
778; GCN:       ; %bb.0:
779; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
780; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
781; GCN-NEXT:    s_mov_b32 s3, 0xf000
782; GCN-NEXT:    s_mov_b32 s2, -1
783; GCN-NEXT:    s_waitcnt lgkmcnt(0)
784; GCN-NEXT:    s_mov_b32 s0, s4
785; GCN-NEXT:    s_ashr_i64 s[8:9], s[0:1], 39
786; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s8
787; GCN-NEXT:    s_mov_b32 s1, s5
788; GCN-NEXT:    s_ashr_i64 s[4:5], s[6:7], 39
789; GCN-NEXT:    v_cvt_f32_i32_e32 v1, s4
790; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
791; GCN-NEXT:    s_xor_b32 s4, s4, s8
792; GCN-NEXT:    s_ashr_i32 s4, s4, 30
793; GCN-NEXT:    s_or_b32 s4, s4, 1
794; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
795; GCN-NEXT:    v_trunc_f32_e32 v2, v2
796; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
797; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
798; GCN-NEXT:    v_mov_b32_e32 v3, s4
799; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
800; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
801; GCN-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
802; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 25
803; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
804; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
805; GCN-NEXT:    s_endpgm
806;
807; GCN-IR-LABEL: s_test_sdiv25_64:
808; GCN-IR:       ; %bb.0:
809; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
810; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
811; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
812; GCN-IR-NEXT:    s_mov_b32 s2, -1
813; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
814; GCN-IR-NEXT:    s_mov_b32 s0, s4
815; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[0:1], 39
816; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s8
817; GCN-IR-NEXT:    s_mov_b32 s1, s5
818; GCN-IR-NEXT:    s_ashr_i64 s[4:5], s[6:7], 39
819; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, s4
820; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
821; GCN-IR-NEXT:    s_xor_b32 s4, s4, s8
822; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
823; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
824; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
825; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
826; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
827; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
828; GCN-IR-NEXT:    v_mov_b32_e32 v3, s4
829; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
830; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
831; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
832; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 25
833; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
834; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
835; GCN-IR-NEXT:    s_endpgm
836  %1 = ashr i64 %x, 39
837  %2 = ashr i64 %y, 39
838  %result = sdiv i64 %1, %2
839  store i64 %result, i64 addrspace(1)* %out
840  ret void
841}
842
843define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) {
844; GCN-LABEL: s_test_sdiv24_v2i64:
845; GCN:       ; %bb.0:
846; GCN-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
847; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
848; GCN-NEXT:    s_mov_b32 s3, 0xf000
849; GCN-NEXT:    s_mov_b32 s2, -1
850; GCN-NEXT:    s_waitcnt lgkmcnt(0)
851; GCN-NEXT:    s_ashr_i64 s[8:9], s[8:9], 40
852; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s8
853; GCN-NEXT:    s_ashr_i64 s[4:5], s[4:5], 40
854; GCN-NEXT:    v_cvt_f32_i32_e32 v1, s4
855; GCN-NEXT:    s_xor_b32 s4, s4, s8
856; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
857; GCN-NEXT:    s_ashr_i32 s4, s4, 30
858; GCN-NEXT:    s_or_b32 s4, s4, 1
859; GCN-NEXT:    v_mov_b32_e32 v3, s4
860; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
861; GCN-NEXT:    v_trunc_f32_e32 v2, v2
862; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
863; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
864; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
865; GCN-NEXT:    s_ashr_i64 s[10:11], s[10:11], 40
866; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
867; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
868; GCN-NEXT:    v_cvt_f32_i32_e32 v2, s10
869; GCN-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
870; GCN-NEXT:    v_cvt_f32_i32_e32 v3, s6
871; GCN-NEXT:    s_xor_b32 s4, s6, s10
872; GCN-NEXT:    v_rcp_iflag_f32_e32 v4, v2
873; GCN-NEXT:    s_ashr_i32 s4, s4, 30
874; GCN-NEXT:    s_or_b32 s4, s4, 1
875; GCN-NEXT:    v_mov_b32_e32 v5, s4
876; GCN-NEXT:    v_mul_f32_e32 v4, v3, v4
877; GCN-NEXT:    v_trunc_f32_e32 v4, v4
878; GCN-NEXT:    v_mad_f32 v3, -v4, v2, v3
879; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v4
880; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
881; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
882; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
883; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
884; GCN-NEXT:    v_bfe_i32 v2, v2, 0, 24
885; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
886; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
887; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
888; GCN-NEXT:    s_endpgm
889;
890; GCN-IR-LABEL: s_test_sdiv24_v2i64:
891; GCN-IR:       ; %bb.0:
892; GCN-IR-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
893; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
894; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
895; GCN-IR-NEXT:    s_mov_b32 s2, -1
896; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
897; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[8:9], 40
898; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s8
899; GCN-IR-NEXT:    s_ashr_i64 s[4:5], s[4:5], 40
900; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, s4
901; GCN-IR-NEXT:    s_xor_b32 s4, s4, s8
902; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
903; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
904; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
905; GCN-IR-NEXT:    v_mov_b32_e32 v3, s4
906; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
907; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
908; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
909; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
910; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
911; GCN-IR-NEXT:    s_ashr_i64 s[10:11], s[10:11], 40
912; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
913; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
914; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v2, s10
915; GCN-IR-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
916; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v3, s6
917; GCN-IR-NEXT:    s_xor_b32 s4, s6, s10
918; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v4, v2
919; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
920; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
921; GCN-IR-NEXT:    v_mov_b32_e32 v5, s4
922; GCN-IR-NEXT:    v_mul_f32_e32 v4, v3, v4
923; GCN-IR-NEXT:    v_trunc_f32_e32 v4, v4
924; GCN-IR-NEXT:    v_mad_f32 v3, -v4, v2, v3
925; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v4, v4
926; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
927; GCN-IR-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
928; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
929; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
930; GCN-IR-NEXT:    v_bfe_i32 v2, v2, 0, 24
931; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
932; GCN-IR-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
933; GCN-IR-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
934; GCN-IR-NEXT:    s_endpgm
935  %1 = ashr <2 x i64> %x, <i64 40, i64 40>
936  %2 = ashr <2 x i64> %y, <i64 40, i64 40>
937  %result = sdiv <2 x i64> %1, %2
938  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
939  ret void
940}
941
942define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 %y) {
943; GCN-LABEL: s_test_sdiv24_48:
944; GCN:       ; %bb.0:
945; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
946; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
947; GCN-NEXT:    s_load_dword s3, s[0:1], 0xc
948; GCN-NEXT:    s_load_dword s8, s[0:1], 0xe
949; GCN-NEXT:    s_load_dword s0, s[0:1], 0xd
950; GCN-NEXT:    s_mov_b32 s7, 0xf000
951; GCN-NEXT:    s_waitcnt lgkmcnt(0)
952; GCN-NEXT:    v_mov_b32_e32 v2, s2
953; GCN-NEXT:    s_sext_i32_i16 s1, s3
954; GCN-NEXT:    s_sext_i32_i16 s3, s8
955; GCN-NEXT:    v_mov_b32_e32 v0, s0
956; GCN-NEXT:    v_alignbit_b32 v0, s3, v0, 24
957; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v0
958; GCN-NEXT:    v_alignbit_b32 v2, s1, v2, 24
959; GCN-NEXT:    v_cvt_f32_i32_e32 v3, v2
960; GCN-NEXT:    v_xor_b32_e32 v0, v2, v0
961; GCN-NEXT:    v_rcp_iflag_f32_e32 v4, v1
962; GCN-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
963; GCN-NEXT:    v_or_b32_e32 v0, 1, v0
964; GCN-NEXT:    s_mov_b32 s6, -1
965; GCN-NEXT:    v_mul_f32_e32 v2, v3, v4
966; GCN-NEXT:    v_trunc_f32_e32 v2, v2
967; GCN-NEXT:    v_mad_f32 v3, -v2, v1, v3
968; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
969; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v1|
970; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
971; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
972; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
973; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
974; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
975; GCN-NEXT:    buffer_store_short v1, off, s[4:7], 0 offset:4
976; GCN-NEXT:    s_endpgm
977;
978; GCN-IR-LABEL: s_test_sdiv24_48:
979; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
980; GCN-IR-NEXT:    s_load_dword s3, s[0:1], 0xc
981; GCN-IR-NEXT:    s_load_dword s5, s[0:1], 0xe
982; GCN-IR-NEXT:    s_load_dword s2, s[0:1], 0xb
983; GCN-IR-NEXT:    s_load_dword s4, s[0:1], 0xd
984; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
985; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
986; GCN-IR-NEXT:    s_sext_i32_i16 s3, s3
987; GCN-IR-NEXT:    s_sext_i32_i16 s5, s5
988; GCN-IR-NEXT:    s_ashr_i64 s[6:7], s[2:3], 24
989; GCN-IR-NEXT:    s_ashr_i32 s2, s3, 31
990; GCN-IR-NEXT:    s_mov_b32 s3, s2
991; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[4:5], 24
992; GCN-IR-NEXT:    s_ashr_i32 s4, s5, 31
993; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[2:3], s[6:7]
994; GCN-IR-NEXT:    s_mov_b32 s5, s4
995; GCN-IR-NEXT:    s_sub_u32 s12, s6, s2
996; GCN-IR-NEXT:    s_subb_u32 s13, s7, s2
997; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], s[8:9]
998; GCN-IR-NEXT:    s_sub_u32 s6, s6, s4
999; GCN-IR-NEXT:    s_subb_u32 s7, s7, s4
1000; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[6:7], 0
1001; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[14:15], s[12:13], 0
1002; GCN-IR-NEXT:    s_mov_b64 s[8:9], 0
1003; GCN-IR-NEXT:    s_or_b64 s[16:17], s[10:11], s[14:15]
1004; GCN-IR-NEXT:    s_flbit_i32_b32 s10, s6
1005; GCN-IR-NEXT:    s_add_i32 s10, s10, 32
1006; GCN-IR-NEXT:    s_flbit_i32_b32 s11, s7
1007; GCN-IR-NEXT:    s_min_u32 s14, s10, s11
1008; GCN-IR-NEXT:    s_flbit_i32_b32 s10, s12
1009; GCN-IR-NEXT:    s_add_i32 s10, s10, 32
1010; GCN-IR-NEXT:    s_flbit_i32_b32 s11, s13
1011; GCN-IR-NEXT:    s_min_u32 s18, s10, s11
1012; GCN-IR-NEXT:    s_sub_u32 s10, s14, s18
1013; GCN-IR-NEXT:    s_subb_u32 s11, 0, 0
1014; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[20:21], s[10:11], 63
1015; GCN-IR-NEXT:    s_mov_b32 s15, 0
1016; GCN-IR-NEXT:    s_or_b64 s[16:17], s[16:17], s[20:21]
1017; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[20:21], s[10:11], 63
1018; GCN-IR-NEXT:    s_xor_b64 s[22:23], s[16:17], -1
1019; GCN-IR-NEXT:    s_and_b64 s[20:21], s[22:23], s[20:21]
1020; GCN-IR-NEXT:    s_and_b64 vcc, exec, s[20:21]
1021; GCN-IR-NEXT:    s_cbranch_vccz .LBB9_5
1022; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
1023; GCN-IR-NEXT:    s_add_u32 s16, s10, 1
1024; GCN-IR-NEXT:    s_addc_u32 s17, s11, 0
1025; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[20:21], s[16:17], 0
1026; GCN-IR-NEXT:    s_sub_i32 s10, 63, s10
1027; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[20:21]
1028; GCN-IR-NEXT:    s_lshl_b64 s[10:11], s[12:13], s10
1029; GCN-IR-NEXT:    s_cbranch_vccz .LBB9_4
1030; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
1031; GCN-IR-NEXT:    s_lshr_b64 s[16:17], s[12:13], s16
1032; GCN-IR-NEXT:    s_add_u32 s19, s6, -1
1033; GCN-IR-NEXT:    s_addc_u32 s20, s7, -1
1034; GCN-IR-NEXT:    s_not_b64 s[8:9], s[14:15]
1035; GCN-IR-NEXT:    s_add_u32 s12, s8, s18
1036; GCN-IR-NEXT:    s_addc_u32 s13, s9, 0
1037; GCN-IR-NEXT:    s_mov_b64 s[14:15], 0
1038; GCN-IR-NEXT:    s_mov_b32 s9, 0
1039; GCN-IR-NEXT:  .LBB9_3: ; %udiv-do-while
1040; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
1041; GCN-IR-NEXT:    s_lshl_b64 s[16:17], s[16:17], 1
1042; GCN-IR-NEXT:    s_lshr_b32 s8, s11, 31
1043; GCN-IR-NEXT:    s_lshl_b64 s[10:11], s[10:11], 1
1044; GCN-IR-NEXT:    s_or_b64 s[16:17], s[16:17], s[8:9]
1045; GCN-IR-NEXT:    s_or_b64 s[10:11], s[14:15], s[10:11]
1046; GCN-IR-NEXT:    s_sub_u32 s8, s19, s16
1047; GCN-IR-NEXT:    s_subb_u32 s8, s20, s17
1048; GCN-IR-NEXT:    s_ashr_i32 s14, s8, 31
1049; GCN-IR-NEXT:    s_mov_b32 s15, s14
1050; GCN-IR-NEXT:    s_and_b32 s8, s14, 1
1051; GCN-IR-NEXT:    s_and_b64 s[14:15], s[14:15], s[6:7]
1052; GCN-IR-NEXT:    s_sub_u32 s16, s16, s14
1053; GCN-IR-NEXT:    s_subb_u32 s17, s17, s15
1054; GCN-IR-NEXT:    s_add_u32 s12, s12, 1
1055; GCN-IR-NEXT:    s_addc_u32 s13, s13, 0
1056; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[22:23], s[12:13], 0
1057; GCN-IR-NEXT:    s_mov_b64 s[14:15], s[8:9]
1058; GCN-IR-NEXT:    s_and_b64 vcc, exec, s[22:23]
1059; GCN-IR-NEXT:    s_cbranch_vccz .LBB9_3
1060; GCN-IR-NEXT:  .LBB9_4: ; %Flow3
1061; GCN-IR-NEXT:    s_lshl_b64 s[6:7], s[10:11], 1
1062; GCN-IR-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
1063; GCN-IR-NEXT:    v_mov_b32_e32 v0, s6
1064; GCN-IR-NEXT:    v_mov_b32_e32 v1, s7
1065; GCN-IR-NEXT:    s_branch .LBB9_6
1066; GCN-IR-NEXT:  .LBB9_5:
1067; GCN-IR-NEXT:    v_mov_b32_e32 v0, s13
1068; GCN-IR-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[16:17]
1069; GCN-IR-NEXT:    v_mov_b32_e32 v0, s12
1070; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[16:17]
1071; GCN-IR-NEXT:  .LBB9_6: ; %udiv-end
1072; GCN-IR-NEXT:    s_xor_b64 s[2:3], s[4:5], s[2:3]
1073; GCN-IR-NEXT:    v_xor_b32_e32 v0, s2, v0
1074; GCN-IR-NEXT:    v_xor_b32_e32 v1, s3, v1
1075; GCN-IR-NEXT:    v_mov_b32_e32 v2, s3
1076; GCN-IR-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
1077; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
1078; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
1079; GCN-IR-NEXT:    s_mov_b32 s2, -1
1080; GCN-IR-NEXT:    buffer_store_short v1, off, s[0:3], 0 offset:4
1081; GCN-IR-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1082; GCN-IR-NEXT:    s_endpgm
1083  %1 = ashr i48 %x, 24
1084  %2 = ashr i48 %y, 24
1085  %result = sdiv i48 %1, %2
1086  store i48 %result, i48 addrspace(1)* %out
1087  ret void
1088}
1089
1090define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) {
1091; GCN-LABEL: s_test_sdiv_k_num_i64:
1092; GCN:       ; %bb.0:
1093; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1094; GCN-NEXT:    s_mov_b32 s7, 0xf000
1095; GCN-NEXT:    s_mov_b32 s6, -1
1096; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1097; GCN-NEXT:    s_ashr_i32 s8, s3, 31
1098; GCN-NEXT:    s_add_u32 s2, s2, s8
1099; GCN-NEXT:    s_mov_b32 s9, s8
1100; GCN-NEXT:    s_addc_u32 s3, s3, s8
1101; GCN-NEXT:    s_xor_b64 s[2:3], s[2:3], s[8:9]
1102; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s2
1103; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s3
1104; GCN-NEXT:    s_sub_u32 s4, 0, s2
1105; GCN-NEXT:    s_subb_u32 s5, 0, s3
1106; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
1107; GCN-NEXT:    v_rcp_f32_e32 v0, v0
1108; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
1109; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
1110; GCN-NEXT:    v_trunc_f32_e32 v1, v1
1111; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
1112; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
1113; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
1114; GCN-NEXT:    v_mul_lo_u32 v2, s4, v1
1115; GCN-NEXT:    v_mul_hi_u32 v3, s4, v0
1116; GCN-NEXT:    v_mul_lo_u32 v5, s5, v0
1117; GCN-NEXT:    v_mul_lo_u32 v4, s4, v0
1118; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1119; GCN-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
1120; GCN-NEXT:    v_mul_hi_u32 v3, v0, v4
1121; GCN-NEXT:    v_mul_lo_u32 v5, v0, v2
1122; GCN-NEXT:    v_mul_hi_u32 v7, v0, v2
1123; GCN-NEXT:    v_mul_hi_u32 v6, v1, v4
1124; GCN-NEXT:    v_mul_lo_u32 v4, v1, v4
1125; GCN-NEXT:    v_mul_hi_u32 v8, v1, v2
1126; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1127; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v7, vcc
1128; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
1129; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1130; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v5, v6, vcc
1131; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v8, vcc
1132; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1133; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
1134; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1135; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
1136; GCN-NEXT:    v_mul_lo_u32 v2, s4, v1
1137; GCN-NEXT:    v_mul_hi_u32 v3, s4, v0
1138; GCN-NEXT:    v_mul_lo_u32 v4, s5, v0
1139; GCN-NEXT:    s_mov_b32 s5, s1
1140; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1141; GCN-NEXT:    v_mul_lo_u32 v3, s4, v0
1142; GCN-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
1143; GCN-NEXT:    v_mul_lo_u32 v6, v0, v2
1144; GCN-NEXT:    v_mul_hi_u32 v7, v0, v3
1145; GCN-NEXT:    v_mul_hi_u32 v8, v0, v2
1146; GCN-NEXT:    v_mul_hi_u32 v5, v1, v3
1147; GCN-NEXT:    v_mul_lo_u32 v3, v1, v3
1148; GCN-NEXT:    v_mul_hi_u32 v4, v1, v2
1149; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1150; GCN-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
1151; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
1152; GCN-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
1153; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v7, v5, vcc
1154; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
1155; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1156; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
1157; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1158; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
1159; GCN-NEXT:    v_mul_lo_u32 v2, v1, 24
1160; GCN-NEXT:    v_mul_hi_u32 v0, v0, 24
1161; GCN-NEXT:    v_mul_hi_u32 v1, v1, 24
1162; GCN-NEXT:    v_mov_b32_e32 v4, s3
1163; GCN-NEXT:    s_mov_b32 s4, s0
1164; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1165; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v1, vcc
1166; GCN-NEXT:    v_mul_lo_u32 v1, s3, v0
1167; GCN-NEXT:    v_mul_hi_u32 v2, s2, v0
1168; GCN-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
1169; GCN-NEXT:    v_mul_lo_u32 v2, s2, v0
1170; GCN-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
1171; GCN-NEXT:    v_sub_i32_e32 v2, vcc, 24, v2
1172; GCN-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v4, vcc
1173; GCN-NEXT:    v_subrev_i32_e64 v4, s[0:1], s2, v2
1174; GCN-NEXT:    v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1]
1175; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v3
1176; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
1177; GCN-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v4
1178; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
1179; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v3
1180; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, v4, s[0:1]
1181; GCN-NEXT:    v_add_i32_e64 v4, s[0:1], 2, v0
1182; GCN-NEXT:    v_addc_u32_e64 v5, s[0:1], 0, 0, s[0:1]
1183; GCN-NEXT:    v_add_i32_e64 v6, s[0:1], 1, v0
1184; GCN-NEXT:    v_addc_u32_e64 v7, s[0:1], 0, 0, s[0:1]
1185; GCN-NEXT:    v_subb_u32_e32 v1, vcc, 0, v1, vcc
1186; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v3
1187; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
1188; GCN-NEXT:    v_cndmask_b32_e64 v3, v7, v5, s[0:1]
1189; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1190; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
1191; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
1192; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v1
1193; GCN-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
1194; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
1195; GCN-NEXT:    v_cndmask_b32_e64 v2, v6, v4, s[0:1]
1196; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1197; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
1198; GCN-NEXT:    v_xor_b32_e32 v0, s8, v0
1199; GCN-NEXT:    v_xor_b32_e32 v1, s8, v1
1200; GCN-NEXT:    v_mov_b32_e32 v2, s8
1201; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
1202; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
1203; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1204; GCN-NEXT:    s_endpgm
1205;
1206; GCN-IR-LABEL: s_test_sdiv_k_num_i64:
1207; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
1208; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1209; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
1210; GCN-IR-NEXT:    s_ashr_i32 s4, s3, 31
1211; GCN-IR-NEXT:    s_mov_b32 s5, s4
1212; GCN-IR-NEXT:    s_xor_b64 s[2:3], s[4:5], s[2:3]
1213; GCN-IR-NEXT:    s_sub_u32 s2, s2, s4
1214; GCN-IR-NEXT:    s_subb_u32 s3, s3, s4
1215; GCN-IR-NEXT:    s_flbit_i32_b32 s6, s2
1216; GCN-IR-NEXT:    s_add_i32 s6, s6, 32
1217; GCN-IR-NEXT:    s_flbit_i32_b32 s7, s3
1218; GCN-IR-NEXT:    s_min_u32 s10, s6, s7
1219; GCN-IR-NEXT:    s_add_u32 s8, s10, 0xffffffc5
1220; GCN-IR-NEXT:    s_addc_u32 s9, 0, -1
1221; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[12:13], s[2:3], 0
1222; GCN-IR-NEXT:    v_cmp_gt_u64_e64 s[14:15], s[8:9], 63
1223; GCN-IR-NEXT:    s_mov_b64 s[6:7], 0
1224; GCN-IR-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
1225; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[14:15], s[8:9], 63
1226; GCN-IR-NEXT:    s_xor_b64 s[16:17], s[12:13], -1
1227; GCN-IR-NEXT:    s_and_b64 s[14:15], s[16:17], s[14:15]
1228; GCN-IR-NEXT:    s_and_b64 vcc, exec, s[14:15]
1229; GCN-IR-NEXT:    s_cbranch_vccz .LBB10_5
1230; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
1231; GCN-IR-NEXT:    s_add_u32 s12, s8, 1
1232; GCN-IR-NEXT:    s_addc_u32 s13, s9, 0
1233; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[14:15], s[12:13], 0
1234; GCN-IR-NEXT:    s_sub_i32 s8, 63, s8
1235; GCN-IR-NEXT:    s_andn2_b64 vcc, exec, s[14:15]
1236; GCN-IR-NEXT:    s_lshl_b64 s[8:9], 24, s8
1237; GCN-IR-NEXT:    s_cbranch_vccz .LBB10_4
1238; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
1239; GCN-IR-NEXT:    s_lshr_b64 s[12:13], 24, s12
1240; GCN-IR-NEXT:    s_add_u32 s16, s2, -1
1241; GCN-IR-NEXT:    s_addc_u32 s17, s3, -1
1242; GCN-IR-NEXT:    s_sub_u32 s10, 58, s10
1243; GCN-IR-NEXT:    s_subb_u32 s11, 0, 0
1244; GCN-IR-NEXT:    s_mov_b64 s[14:15], 0
1245; GCN-IR-NEXT:    s_mov_b32 s7, 0
1246; GCN-IR-NEXT:  .LBB10_3: ; %udiv-do-while
1247; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
1248; GCN-IR-NEXT:    s_lshl_b64 s[12:13], s[12:13], 1
1249; GCN-IR-NEXT:    s_lshr_b32 s6, s9, 31
1250; GCN-IR-NEXT:    s_lshl_b64 s[8:9], s[8:9], 1
1251; GCN-IR-NEXT:    s_or_b64 s[12:13], s[12:13], s[6:7]
1252; GCN-IR-NEXT:    s_or_b64 s[8:9], s[14:15], s[8:9]
1253; GCN-IR-NEXT:    s_sub_u32 s6, s16, s12
1254; GCN-IR-NEXT:    s_subb_u32 s6, s17, s13
1255; GCN-IR-NEXT:    s_ashr_i32 s14, s6, 31
1256; GCN-IR-NEXT:    s_mov_b32 s15, s14
1257; GCN-IR-NEXT:    s_and_b32 s6, s14, 1
1258; GCN-IR-NEXT:    s_and_b64 s[14:15], s[14:15], s[2:3]
1259; GCN-IR-NEXT:    s_sub_u32 s12, s12, s14
1260; GCN-IR-NEXT:    s_subb_u32 s13, s13, s15
1261; GCN-IR-NEXT:    s_add_u32 s10, s10, 1
1262; GCN-IR-NEXT:    s_addc_u32 s11, s11, 0
1263; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[18:19], s[10:11], 0
1264; GCN-IR-NEXT:    s_mov_b64 s[14:15], s[6:7]
1265; GCN-IR-NEXT:    s_and_b64 vcc, exec, s[18:19]
1266; GCN-IR-NEXT:    s_cbranch_vccz .LBB10_3
1267; GCN-IR-NEXT:  .LBB10_4: ; %Flow5
1268; GCN-IR-NEXT:    s_lshl_b64 s[2:3], s[8:9], 1
1269; GCN-IR-NEXT:    s_or_b64 s[2:3], s[6:7], s[2:3]
1270; GCN-IR-NEXT:    v_mov_b32_e32 v0, s2
1271; GCN-IR-NEXT:    v_mov_b32_e32 v1, s3
1272; GCN-IR-NEXT:    s_branch .LBB10_6
1273; GCN-IR-NEXT:  .LBB10_5:
1274; GCN-IR-NEXT:    v_mov_b32_e32 v1, 0
1275; GCN-IR-NEXT:    v_cndmask_b32_e64 v0, 24, 0, s[12:13]
1276; GCN-IR-NEXT:  .LBB10_6: ; %udiv-end
1277; GCN-IR-NEXT:    v_xor_b32_e32 v0, s4, v0
1278; GCN-IR-NEXT:    v_xor_b32_e32 v1, s5, v1
1279; GCN-IR-NEXT:    v_mov_b32_e32 v2, s5
1280; GCN-IR-NEXT:    v_subrev_i32_e32 v0, vcc, s4, v0
1281; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
1282; GCN-IR-NEXT:    s_mov_b32 s2, -1
1283; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
1284; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1285; GCN-IR-NEXT:    s_endpgm
1286  %result = sdiv i64 24, %x
1287  store i64 %result, i64 addrspace(1)* %out
1288  ret void
1289}
1290
1291define i64 @v_test_sdiv_k_num_i64(i64 %x) {
1292; GCN-LABEL: v_test_sdiv_k_num_i64:
1293; GCN:       ; %bb.0:
1294; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1296; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1297; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
1298; GCN-NEXT:    v_xor_b32_e32 v1, v1, v2
1299; GCN-NEXT:    v_xor_b32_e32 v0, v0, v2
1300; GCN-NEXT:    v_cvt_f32_u32_e32 v3, v0
1301; GCN-NEXT:    v_cvt_f32_u32_e32 v4, v1
1302; GCN-NEXT:    v_sub_i32_e32 v5, vcc, 0, v0
1303; GCN-NEXT:    v_subb_u32_e32 v6, vcc, 0, v1, vcc
1304; GCN-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
1305; GCN-NEXT:    v_rcp_f32_e32 v3, v3
1306; GCN-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
1307; GCN-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
1308; GCN-NEXT:    v_trunc_f32_e32 v4, v4
1309; GCN-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v4
1310; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
1311; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
1312; GCN-NEXT:    v_mul_hi_u32 v7, v5, v3
1313; GCN-NEXT:    v_mul_lo_u32 v8, v5, v4
1314; GCN-NEXT:    v_mul_lo_u32 v9, v6, v3
1315; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1316; GCN-NEXT:    v_mul_lo_u32 v8, v5, v3
1317; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1318; GCN-NEXT:    v_mul_lo_u32 v9, v3, v7
1319; GCN-NEXT:    v_mul_hi_u32 v10, v3, v8
1320; GCN-NEXT:    v_mul_hi_u32 v11, v3, v7
1321; GCN-NEXT:    v_mul_hi_u32 v12, v4, v7
1322; GCN-NEXT:    v_mul_lo_u32 v7, v4, v7
1323; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1324; GCN-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
1325; GCN-NEXT:    v_mul_lo_u32 v11, v4, v8
1326; GCN-NEXT:    v_mul_hi_u32 v8, v4, v8
1327; GCN-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1328; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v10, v8, vcc
1329; GCN-NEXT:    v_addc_u32_e32 v9, vcc, 0, v12, vcc
1330; GCN-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1331; GCN-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
1332; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
1333; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
1334; GCN-NEXT:    v_mul_lo_u32 v7, v5, v4
1335; GCN-NEXT:    v_mul_hi_u32 v8, v5, v3
1336; GCN-NEXT:    v_mul_lo_u32 v6, v6, v3
1337; GCN-NEXT:    v_mul_lo_u32 v5, v5, v3
1338; GCN-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1339; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1340; GCN-NEXT:    v_mul_lo_u32 v9, v3, v6
1341; GCN-NEXT:    v_mul_hi_u32 v10, v3, v5
1342; GCN-NEXT:    v_mul_hi_u32 v11, v3, v6
1343; GCN-NEXT:    v_mul_hi_u32 v8, v4, v5
1344; GCN-NEXT:    v_mul_lo_u32 v5, v4, v5
1345; GCN-NEXT:    v_mul_hi_u32 v7, v4, v6
1346; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1347; GCN-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
1348; GCN-NEXT:    v_mul_lo_u32 v6, v4, v6
1349; GCN-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
1350; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v10, v8, vcc
1351; GCN-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1352; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1353; GCN-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
1354; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1355; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
1356; GCN-NEXT:    v_mul_lo_u32 v5, v4, 24
1357; GCN-NEXT:    v_mul_hi_u32 v3, v3, 24
1358; GCN-NEXT:    v_mul_hi_u32 v4, v4, 24
1359; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1360; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
1361; GCN-NEXT:    v_mul_lo_u32 v4, v1, v3
1362; GCN-NEXT:    v_mul_hi_u32 v5, v0, v3
1363; GCN-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1364; GCN-NEXT:    v_mul_lo_u32 v5, v0, v3
1365; GCN-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
1366; GCN-NEXT:    v_sub_i32_e32 v5, vcc, 24, v5
1367; GCN-NEXT:    v_subb_u32_e64 v6, s[4:5], v6, v1, vcc
1368; GCN-NEXT:    v_sub_i32_e64 v7, s[4:5], v5, v0
1369; GCN-NEXT:    v_subbrev_u32_e64 v6, s[4:5], 0, v6, s[4:5]
1370; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
1371; GCN-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
1372; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v0
1373; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
1374; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v1
1375; GCN-NEXT:    v_cndmask_b32_e64 v6, v8, v7, s[4:5]
1376; GCN-NEXT:    v_add_i32_e64 v7, s[4:5], 2, v3
1377; GCN-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
1378; GCN-NEXT:    v_add_i32_e64 v9, s[4:5], 1, v3
1379; GCN-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5]
1380; GCN-NEXT:    v_subb_u32_e32 v4, vcc, 0, v4, vcc
1381; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v6
1382; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
1383; GCN-NEXT:    v_cndmask_b32_e64 v6, v10, v8, s[4:5]
1384; GCN-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
1385; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v0
1386; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1387; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
1388; GCN-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
1389; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1390; GCN-NEXT:    v_cndmask_b32_e64 v1, v9, v7, s[4:5]
1391; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
1392; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1393; GCN-NEXT:    v_xor_b32_e32 v3, v0, v2
1394; GCN-NEXT:    v_xor_b32_e32 v0, v1, v2
1395; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1396; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v3, v2, vcc
1397; GCN-NEXT:    s_setpc_b64 s[30:31]
1398;
1399; GCN-IR-LABEL: v_test_sdiv_k_num_i64:
1400; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
1401; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402; GCN-IR-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1403; GCN-IR-NEXT:    v_xor_b32_e32 v0, v2, v0
1404; GCN-IR-NEXT:    v_xor_b32_e32 v1, v2, v1
1405; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1406; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
1407; GCN-IR-NEXT:    v_ffbh_u32_e32 v4, v0
1408; GCN-IR-NEXT:    v_add_i32_e32 v4, vcc, 32, v4
1409; GCN-IR-NEXT:    v_ffbh_u32_e32 v5, v1
1410; GCN-IR-NEXT:    v_min_u32_e32 v8, v4, v5
1411; GCN-IR-NEXT:    s_movk_i32 s6, 0xffc5
1412; GCN-IR-NEXT:    v_add_i32_e32 v5, vcc, s6, v8
1413; GCN-IR-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, -1, vcc
1414; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
1415; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[5:6]
1416; GCN-IR-NEXT:    v_cmp_ne_u64_e64 s[6:7], 63, v[5:6]
1417; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
1418; GCN-IR-NEXT:    v_cndmask_b32_e64 v7, 24, 0, s[4:5]
1419; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
1420; GCN-IR-NEXT:    v_mov_b32_e32 v3, v2
1421; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0
1422; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
1423; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1424; GCN-IR-NEXT:    s_cbranch_execz .LBB11_6
1425; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
1426; GCN-IR-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
1427; GCN-IR-NEXT:    v_addc_u32_e32 v10, vcc, 0, v6, vcc
1428; GCN-IR-NEXT:    v_sub_i32_e64 v4, s[4:5], 63, v5
1429; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
1430; GCN-IR-NEXT:    v_lshl_b64 v[4:5], 24, v4
1431; GCN-IR-NEXT:    v_mov_b32_e32 v6, 0
1432; GCN-IR-NEXT:    v_mov_b32_e32 v7, 0
1433; GCN-IR-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1434; GCN-IR-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
1435; GCN-IR-NEXT:    s_cbranch_execz .LBB11_5
1436; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
1437; GCN-IR-NEXT:    v_add_i32_e32 v14, vcc, -1, v0
1438; GCN-IR-NEXT:    v_addc_u32_e32 v15, vcc, -1, v1, vcc
1439; GCN-IR-NEXT:    v_lshr_b64 v[10:11], 24, v9
1440; GCN-IR-NEXT:    v_sub_i32_e32 v8, vcc, 58, v8
1441; GCN-IR-NEXT:    v_mov_b32_e32 v12, 0
1442; GCN-IR-NEXT:    v_subb_u32_e64 v9, s[4:5], 0, 0, vcc
1443; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
1444; GCN-IR-NEXT:    v_mov_b32_e32 v13, 0
1445; GCN-IR-NEXT:    v_mov_b32_e32 v7, 0
1446; GCN-IR-NEXT:  .LBB11_3: ; %udiv-do-while
1447; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
1448; GCN-IR-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
1449; GCN-IR-NEXT:    v_lshrrev_b32_e32 v6, 31, v5
1450; GCN-IR-NEXT:    v_or_b32_e32 v10, v10, v6
1451; GCN-IR-NEXT:    v_lshl_b64 v[4:5], v[4:5], 1
1452; GCN-IR-NEXT:    v_sub_i32_e32 v6, vcc, v14, v10
1453; GCN-IR-NEXT:    v_subb_u32_e32 v6, vcc, v15, v11, vcc
1454; GCN-IR-NEXT:    v_or_b32_e32 v4, v12, v4
1455; GCN-IR-NEXT:    v_ashrrev_i32_e32 v12, 31, v6
1456; GCN-IR-NEXT:    v_add_i32_e32 v8, vcc, 1, v8
1457; GCN-IR-NEXT:    v_or_b32_e32 v5, v13, v5
1458; GCN-IR-NEXT:    v_and_b32_e32 v6, 1, v12
1459; GCN-IR-NEXT:    v_and_b32_e32 v13, v12, v1
1460; GCN-IR-NEXT:    v_and_b32_e32 v12, v12, v0
1461; GCN-IR-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
1462; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
1463; GCN-IR-NEXT:    v_sub_i32_e64 v10, s[4:5], v10, v12
1464; GCN-IR-NEXT:    v_subb_u32_e64 v11, s[4:5], v11, v13, s[4:5]
1465; GCN-IR-NEXT:    v_mov_b32_e32 v13, v7
1466; GCN-IR-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
1467; GCN-IR-NEXT:    v_mov_b32_e32 v12, v6
1468; GCN-IR-NEXT:    s_andn2_b64 exec, exec, s[10:11]
1469; GCN-IR-NEXT:    s_cbranch_execnz .LBB11_3
1470; GCN-IR-NEXT:  ; %bb.4: ; %Flow
1471; GCN-IR-NEXT:    s_or_b64 exec, exec, s[10:11]
1472; GCN-IR-NEXT:  .LBB11_5: ; %Flow3
1473; GCN-IR-NEXT:    s_or_b64 exec, exec, s[8:9]
1474; GCN-IR-NEXT:    v_lshl_b64 v[0:1], v[4:5], 1
1475; GCN-IR-NEXT:    v_or_b32_e32 v4, v7, v1
1476; GCN-IR-NEXT:    v_or_b32_e32 v7, v6, v0
1477; GCN-IR-NEXT:  .LBB11_6: ; %Flow4
1478; GCN-IR-NEXT:    s_or_b64 exec, exec, s[6:7]
1479; GCN-IR-NEXT:    v_xor_b32_e32 v0, v7, v2
1480; GCN-IR-NEXT:    v_xor_b32_e32 v1, v4, v3
1481; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1482; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
1483; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
1484  %result = sdiv i64 24, %x
1485  ret i64 %result
1486}
1487
1488define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
1489; GCN-LABEL: v_test_sdiv_pow2_k_num_i64:
1490; GCN:       ; %bb.0:
1491; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1492; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1493; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1494; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
1495; GCN-NEXT:    v_xor_b32_e32 v1, v1, v2
1496; GCN-NEXT:    v_xor_b32_e32 v0, v0, v2
1497; GCN-NEXT:    v_cvt_f32_u32_e32 v3, v0
1498; GCN-NEXT:    v_cvt_f32_u32_e32 v4, v1
1499; GCN-NEXT:    v_sub_i32_e32 v5, vcc, 0, v0
1500; GCN-NEXT:    v_subb_u32_e32 v6, vcc, 0, v1, vcc
1501; GCN-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
1502; GCN-NEXT:    v_rcp_f32_e32 v3, v3
1503; GCN-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
1504; GCN-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
1505; GCN-NEXT:    v_trunc_f32_e32 v4, v4
1506; GCN-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v4
1507; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
1508; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
1509; GCN-NEXT:    v_mul_hi_u32 v7, v5, v3
1510; GCN-NEXT:    v_mul_lo_u32 v8, v5, v4
1511; GCN-NEXT:    v_mul_lo_u32 v9, v6, v3
1512; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1513; GCN-NEXT:    v_mul_lo_u32 v8, v5, v3
1514; GCN-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1515; GCN-NEXT:    v_mul_lo_u32 v9, v3, v7
1516; GCN-NEXT:    v_mul_hi_u32 v10, v3, v8
1517; GCN-NEXT:    v_mul_hi_u32 v11, v3, v7
1518; GCN-NEXT:    v_mul_hi_u32 v12, v4, v7
1519; GCN-NEXT:    v_mul_lo_u32 v7, v4, v7
1520; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1521; GCN-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
1522; GCN-NEXT:    v_mul_lo_u32 v11, v4, v8
1523; GCN-NEXT:    v_mul_hi_u32 v8, v4, v8
1524; GCN-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1525; GCN-NEXT:    v_addc_u32_e32 v8, vcc, v10, v8, vcc
1526; GCN-NEXT:    v_addc_u32_e32 v9, vcc, 0, v12, vcc
1527; GCN-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1528; GCN-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
1529; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
1530; GCN-NEXT:    v_addc_u32_e32 v4, vcc, v4, v8, vcc
1531; GCN-NEXT:    v_mul_lo_u32 v7, v5, v4
1532; GCN-NEXT:    v_mul_hi_u32 v8, v5, v3
1533; GCN-NEXT:    v_mul_lo_u32 v6, v6, v3
1534; GCN-NEXT:    v_mul_lo_u32 v5, v5, v3
1535; GCN-NEXT:    v_add_i32_e32 v7, vcc, v8, v7
1536; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1537; GCN-NEXT:    v_mul_lo_u32 v9, v3, v6
1538; GCN-NEXT:    v_mul_hi_u32 v10, v3, v5
1539; GCN-NEXT:    v_mul_hi_u32 v11, v3, v6
1540; GCN-NEXT:    v_mul_hi_u32 v8, v4, v5
1541; GCN-NEXT:    v_mul_lo_u32 v5, v4, v5
1542; GCN-NEXT:    v_mul_hi_u32 v7, v4, v6
1543; GCN-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1544; GCN-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
1545; GCN-NEXT:    v_mul_lo_u32 v6, v4, v6
1546; GCN-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
1547; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v10, v8, vcc
1548; GCN-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1549; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1550; GCN-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
1551; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1552; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v4, v6, vcc
1553; GCN-NEXT:    v_lshrrev_b32_e32 v3, 17, v3
1554; GCN-NEXT:    v_mul_lo_u32 v4, v1, v3
1555; GCN-NEXT:    v_mul_hi_u32 v5, v0, v3
1556; GCN-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1557; GCN-NEXT:    v_mul_lo_u32 v5, v0, v3
1558; GCN-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
1559; GCN-NEXT:    v_sub_i32_e32 v5, vcc, 0x8000, v5
1560; GCN-NEXT:    v_subb_u32_e64 v6, s[4:5], v6, v1, vcc
1561; GCN-NEXT:    v_sub_i32_e64 v7, s[4:5], v5, v0
1562; GCN-NEXT:    v_subbrev_u32_e64 v6, s[4:5], 0, v6, s[4:5]
1563; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v1
1564; GCN-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
1565; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v0
1566; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
1567; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], v6, v1
1568; GCN-NEXT:    v_cndmask_b32_e64 v6, v8, v7, s[4:5]
1569; GCN-NEXT:    v_add_i32_e64 v7, s[4:5], 2, v3
1570; GCN-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, 0, s[4:5]
1571; GCN-NEXT:    v_add_i32_e64 v9, s[4:5], 1, v3
1572; GCN-NEXT:    v_addc_u32_e64 v10, s[4:5], 0, 0, s[4:5]
1573; GCN-NEXT:    v_subb_u32_e32 v4, vcc, 0, v4, vcc
1574; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v6
1575; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
1576; GCN-NEXT:    v_cndmask_b32_e64 v6, v10, v8, s[4:5]
1577; GCN-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
1578; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v0
1579; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
1580; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
1581; GCN-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
1582; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1583; GCN-NEXT:    v_cndmask_b32_e64 v1, v9, v7, s[4:5]
1584; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
1585; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1586; GCN-NEXT:    v_xor_b32_e32 v3, v0, v2
1587; GCN-NEXT:    v_xor_b32_e32 v0, v1, v2
1588; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1589; GCN-NEXT:    v_subb_u32_e32 v1, vcc, v3, v2, vcc
1590; GCN-NEXT:    s_setpc_b64 s[30:31]
1591;
1592; GCN-IR-LABEL: v_test_sdiv_pow2_k_num_i64:
1593; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
1594; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1595; GCN-IR-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1596; GCN-IR-NEXT:    v_xor_b32_e32 v0, v2, v0
1597; GCN-IR-NEXT:    v_xor_b32_e32 v1, v2, v1
1598; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1599; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
1600; GCN-IR-NEXT:    v_ffbh_u32_e32 v4, v0
1601; GCN-IR-NEXT:    v_add_i32_e32 v4, vcc, 32, v4
1602; GCN-IR-NEXT:    v_ffbh_u32_e32 v5, v1
1603; GCN-IR-NEXT:    v_min_u32_e32 v8, v4, v5
1604; GCN-IR-NEXT:    s_movk_i32 s6, 0xffd0
1605; GCN-IR-NEXT:    v_add_i32_e32 v5, vcc, s6, v8
1606; GCN-IR-NEXT:    v_addc_u32_e64 v6, s[6:7], 0, -1, vcc
1607; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
1608; GCN-IR-NEXT:    v_cmp_lt_u64_e32 vcc, 63, v[5:6]
1609; GCN-IR-NEXT:    v_mov_b32_e32 v7, 0x8000
1610; GCN-IR-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
1611; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[5:6]
1612; GCN-IR-NEXT:    v_cndmask_b32_e64 v7, v7, 0, s[4:5]
1613; GCN-IR-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
1614; GCN-IR-NEXT:    v_mov_b32_e32 v3, v2
1615; GCN-IR-NEXT:    v_mov_b32_e32 v4, 0
1616; GCN-IR-NEXT:    s_mov_b64 s[8:9], 0x8000
1617; GCN-IR-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
1618; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1619; GCN-IR-NEXT:    s_cbranch_execz .LBB12_6
1620; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
1621; GCN-IR-NEXT:    v_add_i32_e32 v9, vcc, 1, v5
1622; GCN-IR-NEXT:    v_addc_u32_e32 v10, vcc, 0, v6, vcc
1623; GCN-IR-NEXT:    v_sub_i32_e64 v4, s[4:5], 63, v5
1624; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
1625; GCN-IR-NEXT:    v_lshl_b64 v[4:5], s[8:9], v4
1626; GCN-IR-NEXT:    v_mov_b32_e32 v6, 0
1627; GCN-IR-NEXT:    v_mov_b32_e32 v7, 0
1628; GCN-IR-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1629; GCN-IR-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
1630; GCN-IR-NEXT:    s_cbranch_execz .LBB12_5
1631; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
1632; GCN-IR-NEXT:    v_add_i32_e32 v14, vcc, -1, v0
1633; GCN-IR-NEXT:    s_mov_b64 s[4:5], 0x8000
1634; GCN-IR-NEXT:    v_addc_u32_e32 v15, vcc, -1, v1, vcc
1635; GCN-IR-NEXT:    v_lshr_b64 v[10:11], s[4:5], v9
1636; GCN-IR-NEXT:    v_sub_i32_e32 v8, vcc, 47, v8
1637; GCN-IR-NEXT:    v_mov_b32_e32 v12, 0
1638; GCN-IR-NEXT:    v_subb_u32_e64 v9, s[4:5], 0, 0, vcc
1639; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
1640; GCN-IR-NEXT:    v_mov_b32_e32 v13, 0
1641; GCN-IR-NEXT:    v_mov_b32_e32 v7, 0
1642; GCN-IR-NEXT:  .LBB12_3: ; %udiv-do-while
1643; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
1644; GCN-IR-NEXT:    v_lshl_b64 v[10:11], v[10:11], 1
1645; GCN-IR-NEXT:    v_lshrrev_b32_e32 v6, 31, v5
1646; GCN-IR-NEXT:    v_or_b32_e32 v10, v10, v6
1647; GCN-IR-NEXT:    v_lshl_b64 v[4:5], v[4:5], 1
1648; GCN-IR-NEXT:    v_sub_i32_e32 v6, vcc, v14, v10
1649; GCN-IR-NEXT:    v_subb_u32_e32 v6, vcc, v15, v11, vcc
1650; GCN-IR-NEXT:    v_or_b32_e32 v4, v12, v4
1651; GCN-IR-NEXT:    v_ashrrev_i32_e32 v12, 31, v6
1652; GCN-IR-NEXT:    v_add_i32_e32 v8, vcc, 1, v8
1653; GCN-IR-NEXT:    v_or_b32_e32 v5, v13, v5
1654; GCN-IR-NEXT:    v_and_b32_e32 v6, 1, v12
1655; GCN-IR-NEXT:    v_and_b32_e32 v13, v12, v1
1656; GCN-IR-NEXT:    v_and_b32_e32 v12, v12, v0
1657; GCN-IR-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
1658; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[8:9]
1659; GCN-IR-NEXT:    v_sub_i32_e64 v10, s[4:5], v10, v12
1660; GCN-IR-NEXT:    v_subb_u32_e64 v11, s[4:5], v11, v13, s[4:5]
1661; GCN-IR-NEXT:    v_mov_b32_e32 v13, v7
1662; GCN-IR-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
1663; GCN-IR-NEXT:    v_mov_b32_e32 v12, v6
1664; GCN-IR-NEXT:    s_andn2_b64 exec, exec, s[10:11]
1665; GCN-IR-NEXT:    s_cbranch_execnz .LBB12_3
1666; GCN-IR-NEXT:  ; %bb.4: ; %Flow
1667; GCN-IR-NEXT:    s_or_b64 exec, exec, s[10:11]
1668; GCN-IR-NEXT:  .LBB12_5: ; %Flow3
1669; GCN-IR-NEXT:    s_or_b64 exec, exec, s[8:9]
1670; GCN-IR-NEXT:    v_lshl_b64 v[0:1], v[4:5], 1
1671; GCN-IR-NEXT:    v_or_b32_e32 v4, v7, v1
1672; GCN-IR-NEXT:    v_or_b32_e32 v7, v6, v0
1673; GCN-IR-NEXT:  .LBB12_6: ; %Flow4
1674; GCN-IR-NEXT:    s_or_b64 exec, exec, s[6:7]
1675; GCN-IR-NEXT:    v_xor_b32_e32 v0, v7, v2
1676; GCN-IR-NEXT:    v_xor_b32_e32 v1, v4, v3
1677; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1678; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
1679; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
1680  %result = sdiv i64 32768, %x
1681  ret i64 %result
1682}
1683
1684define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
1685; GCN-LABEL: v_test_sdiv_pow2_k_den_i64:
1686; GCN:       ; %bb.0:
1687; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1688; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1689; GCN-NEXT:    v_lshrrev_b32_e32 v2, 17, v2
1690; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1691; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1692; GCN-NEXT:    v_ashr_i64 v[0:1], v[0:1], 15
1693; GCN-NEXT:    s_setpc_b64 s[30:31]
1694;
1695; GCN-IR-LABEL: v_test_sdiv_pow2_k_den_i64:
1696; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
1697; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1698; GCN-IR-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1699; GCN-IR-NEXT:    v_xor_b32_e32 v0, v2, v0
1700; GCN-IR-NEXT:    v_xor_b32_e32 v1, v2, v1
1701; GCN-IR-NEXT:    v_sub_i32_e32 v7, vcc, v0, v2
1702; GCN-IR-NEXT:    v_subb_u32_e32 v8, vcc, v1, v2, vcc
1703; GCN-IR-NEXT:    v_ffbh_u32_e32 v0, v7
1704; GCN-IR-NEXT:    v_add_i32_e64 v0, s[4:5], 32, v0
1705; GCN-IR-NEXT:    v_ffbh_u32_e32 v1, v8
1706; GCN-IR-NEXT:    v_min_u32_e32 v0, v0, v1
1707; GCN-IR-NEXT:    v_sub_i32_e64 v3, s[4:5], 48, v0
1708; GCN-IR-NEXT:    v_subb_u32_e64 v4, s[4:5], 0, 0, s[4:5]
1709; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[7:8]
1710; GCN-IR-NEXT:    v_cmp_lt_u64_e64 s[4:5], 63, v[3:4]
1711; GCN-IR-NEXT:    v_mov_b32_e32 v1, v2
1712; GCN-IR-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1713; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 63, v[3:4]
1714; GCN-IR-NEXT:    s_xor_b64 s[6:7], s[4:5], -1
1715; GCN-IR-NEXT:    v_cndmask_b32_e64 v6, v8, 0, s[4:5]
1716; GCN-IR-NEXT:    v_cndmask_b32_e64 v5, v7, 0, s[4:5]
1717; GCN-IR-NEXT:    s_and_b64 s[4:5], s[6:7], vcc
1718; GCN-IR-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
1719; GCN-IR-NEXT:    s_cbranch_execz .LBB13_6
1720; GCN-IR-NEXT:  ; %bb.1: ; %udiv-bb1
1721; GCN-IR-NEXT:    v_add_i32_e32 v9, vcc, 1, v3
1722; GCN-IR-NEXT:    v_addc_u32_e32 v10, vcc, 0, v4, vcc
1723; GCN-IR-NEXT:    v_sub_i32_e64 v3, s[4:5], 63, v3
1724; GCN-IR-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
1725; GCN-IR-NEXT:    v_lshl_b64 v[3:4], v[7:8], v3
1726; GCN-IR-NEXT:    v_mov_b32_e32 v5, 0
1727; GCN-IR-NEXT:    v_mov_b32_e32 v6, 0
1728; GCN-IR-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1729; GCN-IR-NEXT:    s_xor_b64 s[8:9], exec, s[4:5]
1730; GCN-IR-NEXT:    s_cbranch_execz .LBB13_5
1731; GCN-IR-NEXT:  ; %bb.2: ; %udiv-preheader
1732; GCN-IR-NEXT:    v_lshr_b64 v[9:10], v[7:8], v9
1733; GCN-IR-NEXT:    v_add_i32_e32 v7, vcc, 0xffffffcf, v0
1734; GCN-IR-NEXT:    v_mov_b32_e32 v11, 0
1735; GCN-IR-NEXT:    v_addc_u32_e64 v8, s[4:5], 0, -1, vcc
1736; GCN-IR-NEXT:    s_mov_b64 s[10:11], 0
1737; GCN-IR-NEXT:    v_mov_b32_e32 v12, 0
1738; GCN-IR-NEXT:    v_mov_b32_e32 v6, 0
1739; GCN-IR-NEXT:    s_movk_i32 s12, 0x7fff
1740; GCN-IR-NEXT:  .LBB13_3: ; %udiv-do-while
1741; GCN-IR-NEXT:    ; =>This Inner Loop Header: Depth=1
1742; GCN-IR-NEXT:    v_lshl_b64 v[9:10], v[9:10], 1
1743; GCN-IR-NEXT:    v_lshrrev_b32_e32 v0, 31, v4
1744; GCN-IR-NEXT:    v_or_b32_e32 v0, v9, v0
1745; GCN-IR-NEXT:    v_sub_i32_e32 v5, vcc, s12, v0
1746; GCN-IR-NEXT:    v_subb_u32_e32 v5, vcc, 0, v10, vcc
1747; GCN-IR-NEXT:    v_add_i32_e32 v7, vcc, 1, v7
1748; GCN-IR-NEXT:    v_lshl_b64 v[3:4], v[3:4], 1
1749; GCN-IR-NEXT:    v_ashrrev_i32_e32 v9, 31, v5
1750; GCN-IR-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
1751; GCN-IR-NEXT:    v_and_b32_e32 v5, 1, v9
1752; GCN-IR-NEXT:    v_and_b32_e32 v9, 0x8000, v9
1753; GCN-IR-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[7:8]
1754; GCN-IR-NEXT:    v_or_b32_e32 v4, v12, v4
1755; GCN-IR-NEXT:    v_or_b32_e32 v3, v11, v3
1756; GCN-IR-NEXT:    v_sub_i32_e64 v9, s[4:5], v0, v9
1757; GCN-IR-NEXT:    v_mov_b32_e32 v12, v6
1758; GCN-IR-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v10, s[4:5]
1759; GCN-IR-NEXT:    s_or_b64 s[10:11], vcc, s[10:11]
1760; GCN-IR-NEXT:    v_mov_b32_e32 v11, v5
1761; GCN-IR-NEXT:    s_andn2_b64 exec, exec, s[10:11]
1762; GCN-IR-NEXT:    s_cbranch_execnz .LBB13_3
1763; GCN-IR-NEXT:  ; %bb.4: ; %Flow
1764; GCN-IR-NEXT:    s_or_b64 exec, exec, s[10:11]
1765; GCN-IR-NEXT:  .LBB13_5: ; %Flow3
1766; GCN-IR-NEXT:    s_or_b64 exec, exec, s[8:9]
1767; GCN-IR-NEXT:    v_lshl_b64 v[3:4], v[3:4], 1
1768; GCN-IR-NEXT:    v_or_b32_e32 v6, v6, v4
1769; GCN-IR-NEXT:    v_or_b32_e32 v5, v5, v3
1770; GCN-IR-NEXT:  .LBB13_6: ; %Flow4
1771; GCN-IR-NEXT:    s_or_b64 exec, exec, s[6:7]
1772; GCN-IR-NEXT:    v_xor_b32_e32 v0, v5, v2
1773; GCN-IR-NEXT:    v_xor_b32_e32 v3, v6, v1
1774; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
1775; GCN-IR-NEXT:    v_subb_u32_e32 v1, vcc, v3, v1, vcc
1776; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
1777  %result = sdiv i64 %x, 32768
1778  ret i64 %result
1779}
1780
1781define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 %x) {
1782; GCN-LABEL: s_test_sdiv24_k_num_i64:
1783; GCN:       ; %bb.0:
1784; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1785; GCN-NEXT:    s_mov_b32 s7, 0xf000
1786; GCN-NEXT:    s_mov_b32 s6, -1
1787; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1788; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], 40
1789; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s2
1790; GCN-NEXT:    s_mov_b32 s3, 0x41c00000
1791; GCN-NEXT:    s_mov_b32 s4, s0
1792; GCN-NEXT:    s_ashr_i32 s0, s2, 30
1793; GCN-NEXT:    v_rcp_iflag_f32_e32 v1, v0
1794; GCN-NEXT:    s_or_b32 s0, s0, 1
1795; GCN-NEXT:    v_mov_b32_e32 v3, s0
1796; GCN-NEXT:    s_mov_b32 s5, s1
1797; GCN-NEXT:    v_mul_f32_e32 v1, 0x41c00000, v1
1798; GCN-NEXT:    v_trunc_f32_e32 v1, v1
1799; GCN-NEXT:    v_mad_f32 v2, -v1, v0, s3
1800; GCN-NEXT:    v_cvt_i32_f32_e32 v1, v1
1801; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, |v0|
1802; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
1803; GCN-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
1804; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
1805; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1806; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1807; GCN-NEXT:    s_endpgm
1808;
1809; GCN-IR-LABEL: s_test_sdiv24_k_num_i64:
1810; GCN-IR:       ; %bb.0:
1811; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1812; GCN-IR-NEXT:    s_mov_b32 s7, 0xf000
1813; GCN-IR-NEXT:    s_mov_b32 s6, -1
1814; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
1815; GCN-IR-NEXT:    s_ashr_i64 s[2:3], s[2:3], 40
1816; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s2
1817; GCN-IR-NEXT:    s_mov_b32 s3, 0x41c00000
1818; GCN-IR-NEXT:    s_mov_b32 s4, s0
1819; GCN-IR-NEXT:    s_ashr_i32 s0, s2, 30
1820; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v1, v0
1821; GCN-IR-NEXT:    s_or_b32 s0, s0, 1
1822; GCN-IR-NEXT:    v_mov_b32_e32 v3, s0
1823; GCN-IR-NEXT:    s_mov_b32 s5, s1
1824; GCN-IR-NEXT:    v_mul_f32_e32 v1, 0x41c00000, v1
1825; GCN-IR-NEXT:    v_trunc_f32_e32 v1, v1
1826; GCN-IR-NEXT:    v_mad_f32 v2, -v1, v0, s3
1827; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v1, v1
1828; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, |v0|
1829; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
1830; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
1831; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
1832; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1833; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1834; GCN-IR-NEXT:    s_endpgm
1835  %x.shr = ashr i64 %x, 40
1836  %result = sdiv i64 24, %x.shr
1837  store i64 %result, i64 addrspace(1)* %out
1838  ret void
1839}
1840
1841define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 %x) {
1842; GCN-LABEL: s_test_sdiv24_k_den_i64:
1843; GCN:       ; %bb.0:
1844; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1845; GCN-NEXT:    s_mov_b32 s8, 0x46b6fe00
1846; GCN-NEXT:    s_mov_b32 s7, 0xf000
1847; GCN-NEXT:    s_mov_b32 s6, -1
1848; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1849; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], 40
1850; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s2
1851; GCN-NEXT:    s_mov_b32 s4, s0
1852; GCN-NEXT:    s_ashr_i32 s0, s2, 30
1853; GCN-NEXT:    s_or_b32 s0, s0, 1
1854; GCN-NEXT:    v_mul_f32_e32 v1, 0x38331158, v0
1855; GCN-NEXT:    v_trunc_f32_e32 v1, v1
1856; GCN-NEXT:    v_mad_f32 v0, -v1, s8, v0
1857; GCN-NEXT:    v_cvt_i32_f32_e32 v1, v1
1858; GCN-NEXT:    v_mov_b32_e32 v2, s0
1859; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s8
1860; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
1861; GCN-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
1862; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
1863; GCN-NEXT:    s_mov_b32 s5, s1
1864; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1865; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1866; GCN-NEXT:    s_endpgm
1867;
1868; GCN-IR-LABEL: s_test_sdiv24_k_den_i64:
1869; GCN-IR:       ; %bb.0:
1870; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1871; GCN-IR-NEXT:    s_mov_b32 s8, 0x46b6fe00
1872; GCN-IR-NEXT:    s_mov_b32 s7, 0xf000
1873; GCN-IR-NEXT:    s_mov_b32 s6, -1
1874; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
1875; GCN-IR-NEXT:    s_ashr_i64 s[2:3], s[2:3], 40
1876; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s2
1877; GCN-IR-NEXT:    s_mov_b32 s4, s0
1878; GCN-IR-NEXT:    s_ashr_i32 s0, s2, 30
1879; GCN-IR-NEXT:    s_or_b32 s0, s0, 1
1880; GCN-IR-NEXT:    v_mul_f32_e32 v1, 0x38331158, v0
1881; GCN-IR-NEXT:    v_trunc_f32_e32 v1, v1
1882; GCN-IR-NEXT:    v_mad_f32 v0, -v1, s8, v0
1883; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v1, v1
1884; GCN-IR-NEXT:    v_mov_b32_e32 v2, s0
1885; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s8
1886; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
1887; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1888; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
1889; GCN-IR-NEXT:    s_mov_b32 s5, s1
1890; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1891; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1892; GCN-IR-NEXT:    s_endpgm
1893  %x.shr = ashr i64 %x, 40
1894  %result = sdiv i64 %x.shr, 23423
1895  store i64 %result, i64 addrspace(1)* %out
1896  ret void
1897}
1898
1899define i64 @v_test_sdiv24_k_num_i64(i64 %x) {
1900; GCN-LABEL: v_test_sdiv24_k_num_i64:
1901; GCN:       ; %bb.0:
1902; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1903; GCN-NEXT:    v_ashr_i64 v[0:1], v[0:1], 40
1904; GCN-NEXT:    s_mov_b32 s4, 0x41c00000
1905; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v0
1906; GCN-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
1907; GCN-NEXT:    v_or_b32_e32 v0, 1, v0
1908; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v1
1909; GCN-NEXT:    v_mul_f32_e32 v2, 0x41c00000, v2
1910; GCN-NEXT:    v_trunc_f32_e32 v2, v2
1911; GCN-NEXT:    v_mad_f32 v3, -v2, v1, s4
1912; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
1913; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v1|
1914; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1915; GCN-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
1916; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
1917; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1918; GCN-NEXT:    s_setpc_b64 s[30:31]
1919;
1920; GCN-IR-LABEL: v_test_sdiv24_k_num_i64:
1921; GCN-IR:       ; %bb.0:
1922; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1923; GCN-IR-NEXT:    v_ashr_i64 v[0:1], v[0:1], 40
1924; GCN-IR-NEXT:    s_mov_b32 s4, 0x41c00000
1925; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, v0
1926; GCN-IR-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
1927; GCN-IR-NEXT:    v_or_b32_e32 v0, 1, v0
1928; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v1
1929; GCN-IR-NEXT:    v_mul_f32_e32 v2, 0x41c00000, v2
1930; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
1931; GCN-IR-NEXT:    v_mad_f32 v3, -v2, v1, s4
1932; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
1933; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v1|
1934; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1935; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
1936; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
1937; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1938; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
1939  %x.shr = ashr i64 %x, 40
1940  %result = sdiv i64 24, %x.shr
1941  ret i64 %result
1942}
1943
1944define i64 @v_test_sdiv24_pow2_k_num_i64(i64 %x) {
1945; GCN-LABEL: v_test_sdiv24_pow2_k_num_i64:
1946; GCN:       ; %bb.0:
1947; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1948; GCN-NEXT:    v_ashr_i64 v[0:1], v[0:1], 40
1949; GCN-NEXT:    s_mov_b32 s4, 0x47000000
1950; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v0
1951; GCN-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
1952; GCN-NEXT:    v_or_b32_e32 v0, 1, v0
1953; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v1
1954; GCN-NEXT:    v_mul_f32_e32 v2, 0x47000000, v2
1955; GCN-NEXT:    v_trunc_f32_e32 v2, v2
1956; GCN-NEXT:    v_mad_f32 v3, -v2, v1, s4
1957; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
1958; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v1|
1959; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1960; GCN-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
1961; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
1962; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1963; GCN-NEXT:    s_setpc_b64 s[30:31]
1964;
1965; GCN-IR-LABEL: v_test_sdiv24_pow2_k_num_i64:
1966; GCN-IR:       ; %bb.0:
1967; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1968; GCN-IR-NEXT:    v_ashr_i64 v[0:1], v[0:1], 40
1969; GCN-IR-NEXT:    s_mov_b32 s4, 0x47000000
1970; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, v0
1971; GCN-IR-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
1972; GCN-IR-NEXT:    v_or_b32_e32 v0, 1, v0
1973; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v1
1974; GCN-IR-NEXT:    v_mul_f32_e32 v2, 0x47000000, v2
1975; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
1976; GCN-IR-NEXT:    v_mad_f32 v3, -v2, v1, s4
1977; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
1978; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v1|
1979; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1980; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
1981; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
1982; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1983; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
1984  %x.shr = ashr i64 %x, 40
1985  %result = sdiv i64 32768, %x.shr
1986  ret i64 %result
1987}
1988
1989define i64 @v_test_sdiv24_pow2_k_den_i64(i64 %x) {
1990; GCN-LABEL: v_test_sdiv24_pow2_k_den_i64:
1991; GCN:       ; %bb.0:
1992; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1993; GCN-NEXT:    v_ashr_i64 v[0:1], v[0:1], 40
1994; GCN-NEXT:    v_lshrrev_b32_e32 v2, 17, v1
1995; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1996; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1997; GCN-NEXT:    v_ashr_i64 v[0:1], v[0:1], 15
1998; GCN-NEXT:    s_setpc_b64 s[30:31]
1999;
2000; GCN-IR-LABEL: v_test_sdiv24_pow2_k_den_i64:
2001; GCN-IR:       ; %bb.0:
2002; GCN-IR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2003; GCN-IR-NEXT:    v_ashr_i64 v[0:1], v[0:1], 40
2004; GCN-IR-NEXT:    s_mov_b32 s4, 0x47000000
2005; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, v0
2006; GCN-IR-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
2007; GCN-IR-NEXT:    v_or_b32_e32 v0, 1, v0
2008; GCN-IR-NEXT:    v_mul_f32_e32 v2, 0x38000000, v1
2009; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
2010; GCN-IR-NEXT:    v_mad_f32 v1, -v2, s4, v1
2011; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
2012; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, s4
2013; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
2014; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
2015; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
2016; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
2017; GCN-IR-NEXT:    s_setpc_b64 s[30:31]
2018  %x.shr = ashr i64 %x, 40
2019  %result = sdiv i64 %x.shr, 32768
2020  ret i64 %result
2021}
2022